From 9b53853756f7e7535b4178054757fc2cb90408f3 Mon Sep 17 00:00:00 2001 From: "Ronald S. Bultje" Date: Mon, 26 May 2014 20:38:18 -0400 Subject: [PATCH] Rewrite main resampling loop (common and linear). This removes a branch at a performance-sensitive point (in the middle of the loop). In fate-swr-resample-s32p-8000-2626, this makes the code about 10% faster. It also simplifies the loops, allowing us to rewrite it in yasm at some later point. The compensation_distance != 0 code and index < 0 code are still kind of hairy. For compensation_distance != 0, this should likely be handled in the caller, so that it calls swri_resample twice (once until the dst_incr switch-point, and once with the remainder of the samples). For index < 0, the code should probably be rewritten to break out of the loop once sample_index >= 0, and then resume (e.g. as a tail-call) to the common or linear resampling loops. Signed-off-by: Michael Niedermayer --- libswresample/resample_template.c | 68 +++++++++++++++++++++++-------- 1 file changed, 50 insertions(+), 18 deletions(-) diff --git a/libswresample/resample_template.c b/libswresample/resample_template.c index becff1265a..3fc83158bc 100644 --- a/libswresample/resample_template.c +++ b/libswresample/resample_template.c @@ -134,37 +134,69 @@ int RENAME(swri_resample)(ResampleContext *c, DELEM *dst, const DELEM *src, int av_assert2(index >= 0); *consumed= index; index = 0; - }else if(compensation_distance == 0 && !c->linear && index >= 0){ - int sample_index = 0; - for(dst_index=0; dst_index < dst_size; dst_index++){ - FELEM *filter; - sample_index += index >> c->phase_shift; - index &= c->phase_mask; - filter= ((FELEM*)c->filter_bank) + c->filter_alloc*index; + } else if (compensation_distance == 0 && index >= 0) { + int64_t end_index = (1 + src_size - c->filter_length) << c->phase_shift; + int64_t delta_frac = (end_index - index) * c->src_incr - c->frac; + int delta_n = (delta_frac + c->dst_incr - 1) / c->dst_incr; + int n = FFMIN(dst_size, delta_n); + int sample_index; + + if (!c->linear) { + sample_index = index >> c->phase_shift; + index &= c->phase_mask; + for (dst_index = 0; dst_index < n; dst_index++) { + FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index; - if(sample_index + c->filter_length > src_size){ - break; - }else{ #ifdef COMMON_CORE COMMON_CORE #else FELEM2 val=0; - for(i=0; ifilter_length; i++){ + for (i = 0; i < c->filter_length; i++) { val += src[sample_index + i] * (FELEM2)filter[i]; } OUT(dst[dst_index], val); #endif - } - frac += dst_incr_frac; - index += dst_incr; - if(frac >= c->src_incr){ - frac -= c->src_incr; - index++; + frac += dst_incr_frac; + index += dst_incr; + if (frac >= c->src_incr) { + frac -= c->src_incr; + index++; + } + sample_index += index >> c->phase_shift; + index &= c->phase_mask; + } + } else { + sample_index = index >> c->phase_shift; + index &= c->phase_mask; + for (dst_index = 0; dst_index < n; dst_index++) { + FELEM *filter = ((FELEM *) c->filter_bank) + c->filter_alloc * index; + FELEM2 val=0, v2 = 0; + +#ifdef LINEAR_CORE + LINEAR_CORE +#else + for (i = 0; i < c->filter_length; i++) { + val += src[sample_index + i] * (FELEM2)filter[i]; + v2 += src[sample_index + i] * (FELEM2)filter[i + c->filter_alloc]; + } +#endif + val += (v2 - val) * (FELEML) frac / c->src_incr; + OUT(dst[dst_index], val); + + frac += dst_incr_frac; + index += dst_incr; + if (frac >= c->src_incr) { + frac -= c->src_incr; + index++; + } + sample_index += index >> c->phase_shift; + index &= c->phase_mask; } } + *consumed = sample_index; - }else{ + } else { int sample_index = 0; for(dst_index=0; dst_index < dst_size; dst_index++){ FELEM *filter;