swresample/resample: optimize exact_rational=on:linear_interp=on case

separate dsp.resample to dsp.resample_common and dsp.resample_linear
and choose to call faster resample_common even when linear_interp=on
when c->frac and c->dst_incr_mod are both zero

speed up resampling when exact_rational and linear_interp are both
enabled because exact_rational force c->frac and c->dst_incr_mod to
be zero when soft compensation does not happen

benchmark on exact_rational=on:linear_interp=on
        old     new
real    8.432s  5.097s
user    7.679s  4.989s
sys     0.125s  0.107s

Reviewed-by: Michael Niedermayer <michael@niedermayer.cc>
Signed-off-by: Muhammad Faiz <mfcc64@gmail.com>
This commit is contained in:
Muhammad Faiz 2016-11-24 15:02:07 +07:00
parent ebb4c783d0
commit 06f94149c6
5 changed files with 36 additions and 27 deletions

View File

@ -111,12 +111,10 @@ av_cold void swri_resample_dsp_arm_init(ResampleContext *c)
switch(c->format) { switch(c->format) {
case AV_SAMPLE_FMT_FLTP: case AV_SAMPLE_FMT_FLTP:
if (!c->linear) c->dsp.resample_common = ff_resample_common_float_neon;
c->dsp.resample = ff_resample_common_float_neon;
break; break;
case AV_SAMPLE_FMT_S16P: case AV_SAMPLE_FMT_S16P:
if (!c->linear) c->dsp.resample_common = ff_resample_common_s16_neon;
c->dsp.resample = ff_resample_common_s16_neon;
break; break;
} }
} }

View File

@ -496,7 +496,12 @@ static int swri_resample(ResampleContext *c,
dst_size = FFMIN(dst_size, delta_n); dst_size = FFMIN(dst_size, delta_n);
if (dst_size > 0) { if (dst_size > 0) {
*consumed = c->dsp.resample(c, dst, src, dst_size, update_ctx); /* resample_linear and resample_common should have same behavior
* when frac and dst_incr_mod are zero */
if (c->linear && (c->frac || c->dst_incr_mod))
*consumed = c->dsp.resample_linear(c, dst, src, dst_size, update_ctx);
else
*consumed = c->dsp.resample_common(c, dst, src, dst_size, update_ctx);
} else { } else {
*consumed = 0; *consumed = 0;
} }

View File

@ -53,8 +53,10 @@ typedef struct ResampleContext {
struct { struct {
void (*resample_one)(void *dst, const void *src, void (*resample_one)(void *dst, const void *src,
int n, int64_t index, int64_t incr); int n, int64_t index, int64_t incr);
int (*resample)(struct ResampleContext *c, void *dst, int (*resample_common)(struct ResampleContext *c, void *dst,
const void *src, int n, int update_ctx); const void *src, int n, int update_ctx);
int (*resample_linear)(struct ResampleContext *c, void *dst,
const void *src, int n, int update_ctx);
} dsp; } dsp;
} ResampleContext; } ResampleContext;

View File

@ -48,19 +48,23 @@ void swri_resample_dsp_init(ResampleContext *c)
switch(c->format){ switch(c->format){
case AV_SAMPLE_FMT_S16P: case AV_SAMPLE_FMT_S16P:
c->dsp.resample_one = resample_one_int16; c->dsp.resample_one = resample_one_int16;
c->dsp.resample = c->linear ? resample_linear_int16 : resample_common_int16; c->dsp.resample_common = resample_common_int16;
c->dsp.resample_linear = resample_linear_int16;
break; break;
case AV_SAMPLE_FMT_S32P: case AV_SAMPLE_FMT_S32P:
c->dsp.resample_one = resample_one_int32; c->dsp.resample_one = resample_one_int32;
c->dsp.resample = c->linear ? resample_linear_int32 : resample_common_int32; c->dsp.resample_common = resample_common_int32;
c->dsp.resample_linear = resample_linear_int32;
break; break;
case AV_SAMPLE_FMT_FLTP: case AV_SAMPLE_FMT_FLTP:
c->dsp.resample_one = resample_one_float; c->dsp.resample_one = resample_one_float;
c->dsp.resample = c->linear ? resample_linear_float : resample_common_float; c->dsp.resample_common = resample_common_float;
c->dsp.resample_linear = resample_linear_float;
break; break;
case AV_SAMPLE_FMT_DBLP: case AV_SAMPLE_FMT_DBLP:
c->dsp.resample_one = resample_one_double; c->dsp.resample_one = resample_one_double;
c->dsp.resample = c->linear ? resample_linear_double : resample_common_double; c->dsp.resample_common = resample_common_double;
c->dsp.resample_linear = resample_linear_double;
break; break;
} }

View File

@ -50,40 +50,40 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c)
switch(c->format){ switch(c->format){
case AV_SAMPLE_FMT_S16P: case AV_SAMPLE_FMT_S16P:
if (ARCH_X86_32 && EXTERNAL_MMXEXT(mm_flags)) { if (ARCH_X86_32 && EXTERNAL_MMXEXT(mm_flags)) {
c->dsp.resample = c->linear ? ff_resample_linear_int16_mmxext c->dsp.resample_linear = ff_resample_linear_int16_mmxext;
: ff_resample_common_int16_mmxext; c->dsp.resample_common = ff_resample_common_int16_mmxext;
} }
if (EXTERNAL_SSE2(mm_flags)) { if (EXTERNAL_SSE2(mm_flags)) {
c->dsp.resample = c->linear ? ff_resample_linear_int16_sse2 c->dsp.resample_linear = ff_resample_linear_int16_sse2;
: ff_resample_common_int16_sse2; c->dsp.resample_common = ff_resample_common_int16_sse2;
} }
if (EXTERNAL_XOP(mm_flags)) { if (EXTERNAL_XOP(mm_flags)) {
c->dsp.resample = c->linear ? ff_resample_linear_int16_xop c->dsp.resample_linear = ff_resample_linear_int16_xop;
: ff_resample_common_int16_xop; c->dsp.resample_common = ff_resample_common_int16_xop;
} }
break; break;
case AV_SAMPLE_FMT_FLTP: case AV_SAMPLE_FMT_FLTP:
if (EXTERNAL_SSE(mm_flags)) { if (EXTERNAL_SSE(mm_flags)) {
c->dsp.resample = c->linear ? ff_resample_linear_float_sse c->dsp.resample_linear = ff_resample_linear_float_sse;
: ff_resample_common_float_sse; c->dsp.resample_common = ff_resample_common_float_sse;
} }
if (EXTERNAL_AVX_FAST(mm_flags)) { if (EXTERNAL_AVX_FAST(mm_flags)) {
c->dsp.resample = c->linear ? ff_resample_linear_float_avx c->dsp.resample_linear = ff_resample_linear_float_avx;
: ff_resample_common_float_avx; c->dsp.resample_common = ff_resample_common_float_avx;
} }
if (EXTERNAL_FMA3_FAST(mm_flags)) { if (EXTERNAL_FMA3_FAST(mm_flags)) {
c->dsp.resample = c->linear ? ff_resample_linear_float_fma3 c->dsp.resample_linear = ff_resample_linear_float_fma3;
: ff_resample_common_float_fma3; c->dsp.resample_common = ff_resample_common_float_fma3;
} }
if (EXTERNAL_FMA4(mm_flags)) { if (EXTERNAL_FMA4(mm_flags)) {
c->dsp.resample = c->linear ? ff_resample_linear_float_fma4 c->dsp.resample_linear = ff_resample_linear_float_fma4;
: ff_resample_common_float_fma4; c->dsp.resample_common = ff_resample_common_float_fma4;
} }
break; break;
case AV_SAMPLE_FMT_DBLP: case AV_SAMPLE_FMT_DBLP:
if (EXTERNAL_SSE2(mm_flags)) { if (EXTERNAL_SSE2(mm_flags)) {
c->dsp.resample = c->linear ? ff_resample_linear_double_sse2 c->dsp.resample_linear = ff_resample_linear_double_sse2;
: ff_resample_common_double_sse2; c->dsp.resample_common = ff_resample_common_double_sse2;
} }
break; break;
} }