diff --git a/libavutil/tx.c b/libavutil/tx.c index 6d0e854084..dcfb257899 100644 --- a/libavutil/tx.c +++ b/libavutil/tx.c @@ -106,22 +106,24 @@ int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup) { const int m = s->m, inv = s->inv; - if (!(s->revtab = av_malloc(m*sizeof(*s->revtab)))) + if (!(s->revtab = av_malloc(s->m*sizeof(*s->revtab)))) + return AVERROR(ENOMEM); + if (!(s->revtab_c = av_malloc(m*sizeof(*s->revtab_c)))) return AVERROR(ENOMEM); /* Default */ for (int i = 0; i < m; i++) { int k = -split_radix_permutation(i, m, inv) & (m - 1); if (invert_lookup) - s->revtab[i] = k; + s->revtab[i] = s->revtab_c[i] = k; else - s->revtab[k] = i; + s->revtab[i] = s->revtab_c[k] = i; } return 0; } -int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s) +int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s, int *revtab) { int nb_inplace_idx = 0; @@ -130,7 +132,7 @@ int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s) /* The first coefficient is always already in-place */ for (int src = 1; src < s->m; src++) { - int dst = s->revtab[src]; + int dst = revtab[src]; int found = 0; if (dst <= src) @@ -146,7 +148,7 @@ int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s) break; } } - dst = s->revtab[dst]; + dst = revtab[dst]; } while (dst != src && !found); if (!found) @@ -215,6 +217,7 @@ av_cold void av_tx_uninit(AVTXContext **ctx) av_free((*ctx)->pfatab); av_free((*ctx)->exptab); av_free((*ctx)->revtab); + av_free((*ctx)->revtab_c); av_free((*ctx)->inplace_idx); av_free((*ctx)->tmp); diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h index b889f6d3b4..88589fcbb4 100644 --- a/libavutil/tx_priv.h +++ b/libavutil/tx_priv.h @@ -122,6 +122,9 @@ struct AVTXContext { int *revtab; /* Input mapping for power of two transforms */ int *inplace_idx; /* Required indices to revtab for in-place transforms */ + int *revtab_c; /* Revtab for only the C transforms, needed because + * checkasm makes us reuse the same context. */ + av_tx_fn top_tx; /* Used for computing transforms derived from other * transforms, like full-length iMDCTs and RDFTs. * NOTE: Do NOT use this to mix assembly with C code. */ @@ -147,7 +150,7 @@ int ff_tx_gen_ptwo_revtab(AVTXContext *s, int invert_lookup); * specific order, allows the revtab to be done in-place. AVTXContext->revtab * must already exist. */ -int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s); +int ff_tx_gen_ptwo_inplace_revtab_idx(AVTXContext *s, int *revtab); /* * This generates a parity-based revtab of length len and direction inv. diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c index a68a84dcd5..cad66a8bc0 100644 --- a/libavutil/tx_template.c +++ b/libavutil/tx_template.c @@ -593,7 +593,7 @@ static void compound_fft_##N##xM(AVTXContext *s, void *_out, \ for (int i = 0; i < m; i++) { \ for (int j = 0; j < N; j++) \ fft##N##in[j] = in[in_map[i*N + j]]; \ - fft##N(s->tmp + s->revtab[i], fft##N##in, m); \ + fft##N(s->tmp + s->revtab_c[i], fft##N##in, m); \ } \ \ for (int i = 0; i < N; i++) \ @@ -624,16 +624,16 @@ static void split_radix_fft(AVTXContext *s, void *_out, void *_in, do { tmp = out[src]; - dst = s->revtab[src]; + dst = s->revtab_c[src]; do { FFSWAP(FFTComplex, tmp, out[dst]); - dst = s->revtab[dst]; + dst = s->revtab_c[dst]; } while (dst != src); /* Can be > as well, but is less predictable */ out[dst] = tmp; } while ((src = *inplace_idx++)); } else { for (int i = 0; i < m; i++) - out[i] = in[s->revtab[i]]; + out[i] = in[s->revtab_c[i]]; } fft_dispatch[mb](out); @@ -685,7 +685,7 @@ static void compound_imdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \ FFTComplex tmp = { in2[-k*stride], in1[k*stride] }; \ CMUL3(fft##N##in[j], tmp, exp[k >> 1]); \ } \ - fft##N(s->tmp + s->revtab[i], fft##N##in, m); \ + fft##N(s->tmp + s->revtab_c[i], fft##N##in, m); \ } \ \ for (int i = 0; i < N; i++) \ @@ -733,7 +733,7 @@ static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \ CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \ exp[k >> 1].re, exp[k >> 1].im); \ } \ - fft##N(s->tmp + s->revtab[i], fft##N##in, m); \ + fft##N(s->tmp + s->revtab_c[i], fft##N##in, m); \ } \ \ for (int i = 0; i < N; i++) \ @@ -772,7 +772,7 @@ static void monolithic_imdct(AVTXContext *s, void *_dst, void *_src, for (int i = 0; i < m; i++) { FFTComplex tmp = { in2[-2*i*stride], in1[2*i*stride] }; - CMUL3(z[s->revtab[i]], tmp, exp[i]); + CMUL3(z[s->revtab_c[i]], tmp, exp[i]); } fftp(z); @@ -806,7 +806,7 @@ static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src, tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); } - CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im, + CMUL(z[s->revtab_c[i]].im, z[s->revtab_c[i]].re, tmp.re, tmp.im, exp[i].re, exp[i].im); } @@ -1005,7 +1005,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx, if (flags & AV_TX_INPLACE) { if (is_mdct) /* In-place MDCTs are not supported yet */ return AVERROR(ENOSYS); - if ((err = ff_tx_gen_ptwo_inplace_revtab_idx(s))) + if ((err = ff_tx_gen_ptwo_inplace_revtab_idx(s, s->revtab_c))) return err; } for (int i = 4; i <= av_log2(m); i++) diff --git a/tests/checkasm/Makefile b/tests/checkasm/Makefile index 1827a4e134..4ef5fa87da 100644 --- a/tests/checkasm/Makefile +++ b/tests/checkasm/Makefile @@ -50,6 +50,7 @@ SWSCALEOBJS += sw_rgb.o sw_scale.o CHECKASMOBJS-$(CONFIG_SWSCALE) += $(SWSCALEOBJS) # libavutil tests +AVUTILOBJS += av_tx.o AVUTILOBJS += fixed_dsp.o AVUTILOBJS += float_dsp.o diff --git a/tests/checkasm/av_tx.c b/tests/checkasm/av_tx.c new file mode 100644 index 0000000000..178fb61972 --- /dev/null +++ b/tests/checkasm/av_tx.c @@ -0,0 +1,108 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License along + * with FFmpeg; if not, write to the Free Software Foundation, Inc., + * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. + */ + +#include "libavutil/mem_internal.h" +#include "libavutil/tx.h" +#include "libavutil/error.h" + +#include "checkasm.h" + +#define EPS 0.00005 + +#define SCALE_NOOP(x) (x) +#define SCALE_INT20(x) (av_clip64(lrintf((x) * 2147483648.0), INT32_MIN, INT32_MAX) >> 12) + +#define randomize_complex(BUF, LEN, TYPE, SCALE) \ + do { \ + TYPE *buf = (TYPE *)BUF; \ + for (int i = 0; i < LEN; i++) { \ + double fre = (double)rnd() / UINT_MAX; \ + double fim = (double)rnd() / UINT_MAX; \ + buf[i] = (TYPE){ SCALE(fre), SCALE(fim) }; \ + } \ + } while (0) + +static const int check_lens[] = { + 2, 4, 8, 16, 32, 64, 1024, 16384, +}; + +#define CHECK_TEMPLATE(PREFIX, TYPE, DATA_TYPE, SCALE, LENGTHS, CHECK_EXPRESSION) \ + do { \ + int err; \ + AVTXContext *tx; \ + av_tx_fn fn; \ + int num_checks = 0; \ + int last_check = 0; \ + const void *scale = &SCALE; \ + \ + for (int i = 0; i < FF_ARRAY_ELEMS(LENGTHS); i++) { \ + int len = LENGTHS[i]; \ + \ + if ((err = av_tx_init(&tx, &fn, TYPE, 0, len, &scale, 0x0)) < 0) { \ + fprintf(stderr, "av_tx: %s\n", av_err2str(err)); \ + return; \ + } \ + \ + if (check_func(fn, PREFIX "_%i", len)) { \ + num_checks++; \ + last_check = len; \ + call_ref(tx, out_ref, in, sizeof(DATA_TYPE)); \ + call_new(tx, out_new, in, sizeof(DATA_TYPE)); \ + if (CHECK_EXPRESSION) { \ + fail(); \ + break; \ + } \ + bench_new(tx, out_new, in, sizeof(DATA_TYPE)); \ + } \ + \ + av_tx_uninit(&tx); \ + fn = NULL; \ + } \ + \ + av_tx_uninit(&tx); \ + fn = NULL; \ + \ + if (num_checks == 1) \ + report(PREFIX "_%i", last_check); \ + else if (num_checks) \ + report(PREFIX); \ + } while (0) + +void checkasm_check_av_tx(void) +{ + const float scale_float = 1.0f; + const double scale_double = 1.0f; + + declare_func(void, AVTXContext *tx, void *out, void *in, ptrdiff_t stride); + + void *in = av_malloc(16384*2*8); + void *out_ref = av_malloc(16384*2*8); + void *out_new = av_malloc(16384*2*8); + + randomize_complex(in, 16384, AVComplexFloat, SCALE_NOOP); + CHECK_TEMPLATE("float_fft", AV_TX_FLOAT_FFT, AVComplexFloat, scale_float, check_lens, + !float_near_abs_eps_array(out_ref, out_new, EPS, len*2)); + + randomize_complex(in, 16384, AVComplexDouble, SCALE_NOOP); + CHECK_TEMPLATE("double_fft", AV_TX_DOUBLE_FFT, AVComplexDouble, scale_double, check_lens, + !double_near_abs_eps_array(out_ref, out_new, EPS, len*2)); + + av_free(in); + av_free(out_ref); + av_free(out_new); +} diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index 8338e8ff58..e2e17d2b11 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -198,6 +198,7 @@ static const struct { #if CONFIG_AVUTIL { "fixed_dsp", checkasm_check_fixed_dsp }, { "float_dsp", checkasm_check_float_dsp }, + { "av_tx", checkasm_check_av_tx }, #endif { NULL } }; diff --git a/tests/checkasm/checkasm.h b/tests/checkasm/checkasm.h index ef6645e3a2..0593d0edac 100644 --- a/tests/checkasm/checkasm.h +++ b/tests/checkasm/checkasm.h @@ -43,6 +43,7 @@ void checkasm_check_aacpsdsp(void); void checkasm_check_afir(void); void checkasm_check_alacdsp(void); void checkasm_check_audiodsp(void); +void checkasm_check_av_tx(void); void checkasm_check_blend(void); void checkasm_check_blockdsp(void); void checkasm_check_bswapdsp(void); diff --git a/tests/fate/checkasm.mak b/tests/fate/checkasm.mak index 07f1d8238e..3108fcd510 100644 --- a/tests/fate/checkasm.mak +++ b/tests/fate/checkasm.mak @@ -2,6 +2,7 @@ FATE_CHECKASM = fate-checkasm-aacpsdsp \ fate-checkasm-af_afir \ fate-checkasm-alacdsp \ fate-checkasm-audiodsp \ + fate-checkasm-av_tx \ fate-checkasm-blockdsp \ fate-checkasm-bswapdsp \ fate-checkasm-exrdsp \