diff --git a/doc/APIchanges b/doc/APIchanges index 30f188d6aa..761f37f2d2 100644 --- a/doc/APIchanges +++ b/doc/APIchanges @@ -15,6 +15,9 @@ libavutil: 2017-10-21 API changes, most recent first: +2020-02-13 - xxxxxxxxxx - lavu 56.41.100 - tx.h + Add AV_TX_INT32_FFT and AV_TX_INT32_MDCT + 2020-02-12 - xxxxxxxxxx - lavu 56.40.100 - log.h Add av_log_once(). diff --git a/libavutil/Makefile b/libavutil/Makefile index b189f9abea..a2dae8e89a 100644 --- a/libavutil/Makefile +++ b/libavutil/Makefile @@ -163,7 +163,8 @@ OBJS = adler32.o \ tea.o \ tx.o \ tx_float.o \ - tx_double.o + tx_double.o \ + tx_int32.o OBJS-$(CONFIG_CUDA) += hwcontext_cuda.o OBJS-$(CONFIG_D3D11VA) += hwcontext_d3d11va.o diff --git a/libavutil/tx.c b/libavutil/tx.c index b8683b416b..3b0568a5e1 100644 --- a/libavutil/tx.c +++ b/libavutil/tx.c @@ -18,6 +18,18 @@ #include "tx_priv.h" +int ff_tx_type_is_mdct(enum AVTXType type) +{ + switch (type) { + case AV_TX_FLOAT_MDCT: + case AV_TX_DOUBLE_MDCT: + case AV_TX_INT32_MDCT: + return 1; + default: + return 0; + } +} + /* Calculates the modular multiplicative inverse, not fast, replace */ static av_always_inline int mulinv(int n, int m) { @@ -35,11 +47,10 @@ int ff_tx_gen_compound_mapping(AVTXContext *s) const int n = s->n; const int m = s->m; const int inv = s->inv; - const int type = s->type; const int len = n*m; const int m_inv = mulinv(m, n); const int n_inv = mulinv(n, m); - const int mdct = type == AV_TX_FLOAT_MDCT || type == AV_TX_DOUBLE_MDCT; + const int mdct = ff_tx_type_is_mdct(s->type); if (!(s->pfatab = av_malloc(2*len*sizeof(*s->pfatab)))) return AVERROR(ENOMEM); @@ -128,6 +139,11 @@ av_cold int av_tx_init(AVTXContext **ctx, av_tx_fn *tx, enum AVTXType type, if ((err = ff_tx_init_mdct_fft_double(s, tx, type, inv, len, scale, flags))) goto fail; break; + case AV_TX_INT32_FFT: + case AV_TX_INT32_MDCT: + if ((err = ff_tx_init_mdct_fft_int32(s, tx, type, inv, len, scale, flags))) + goto fail; + break; default: err = AVERROR(EINVAL); goto fail; diff --git a/libavutil/tx.h b/libavutil/tx.h index 8b405c0021..53018c84e6 100644 --- a/libavutil/tx.h +++ b/libavutil/tx.h @@ -32,6 +32,10 @@ typedef struct AVComplexDouble { double re, im; } AVComplexDouble; +typedef struct AVComplexInt32 { + int32_t re, im; +} AVComplexInt32; + enum AVTXType { /** * Standard complex to complex FFT with sample data type AVComplexFloat. @@ -51,6 +55,15 @@ enum AVTXType { * Same as AV_TX_FLOAT_MDCT with data and scale type of double. */ AV_TX_DOUBLE_MDCT = 3, + /** + * Same as AV_TX_FLOAT_FFT with a data type of AVComplexInt32. + */ + AV_TX_INT32_FFT = 4, + /** + * Same as AV_TX_FLOAT_MDCT with data type of int32_t and scale type of float. + * Only scale values less than or equal to 1.0 are supported. + */ + AV_TX_INT32_MDCT = 5, }; /** diff --git a/libavutil/tx_int32.c b/libavutil/tx_int32.c new file mode 100644 index 0000000000..9261013bf6 --- /dev/null +++ b/libavutil/tx_int32.c @@ -0,0 +1,21 @@ +/* + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#define TX_INT32 +#include "tx_priv.h" +#include "tx_template.c" diff --git a/libavutil/tx_priv.h b/libavutil/tx_priv.h index 94517b4b47..6fabea2d4d 100644 --- a/libavutil/tx_priv.h +++ b/libavutil/tx_priv.h @@ -28,28 +28,77 @@ #ifdef TX_FLOAT #define TX_NAME(x) x ## _float +#define SCALE_TYPE float typedef float FFTSample; typedef AVComplexFloat FFTComplex; #elif defined(TX_DOUBLE) #define TX_NAME(x) x ## _double +#define SCALE_TYPE double typedef double FFTSample; typedef AVComplexDouble FFTComplex; +#elif defined(TX_INT32) +#define TX_NAME(x) x ## _int32 +#define SCALE_TYPE float +typedef int32_t FFTSample; +typedef AVComplexInt32 FFTComplex; #else typedef void FFTComplex; #endif #if defined(TX_FLOAT) || defined(TX_DOUBLE) -#define BF(x, y, a, b) do { \ - x = (a) - (b); \ - y = (a) + (b); \ - } while (0) + +#define MUL(x, y) ((x)*(y)) #define CMUL(dre, dim, are, aim, bre, bim) do { \ (dre) = (are) * (bre) - (aim) * (bim); \ (dim) = (are) * (bim) + (aim) * (bre); \ } while (0) + +#define SMUL(dre, dim, are, aim, bre, bim) do { \ + (dre) = (are) * (bre) - (aim) * (bim); \ + (dim) = (are) * (bim) - (aim) * (bre); \ + } while (0) + +#define RESCALE(x) (x) + +#define FOLD(a, b) ((a) + (b)) + +#elif defined(TX_INT32) + +#define MUL(x, y) ((int32_t)(((int64_t)(x) * (int64_t)(y) + 0x40000000) >> 31)) + +/* Properly rounds the result */ +#define CMUL(dre, dim, are, aim, bre, bim) do { \ + int64_t accu; \ + (accu) = (int64_t)(bre) * (are); \ + (accu) -= (int64_t)(bim) * (aim); \ + (dre) = (int)(((accu) + 0x40000000) >> 31); \ + (accu) = (int64_t)(bim) * (are); \ + (accu) += (int64_t)(bre) * (aim); \ + (dim) = (int)(((accu) + 0x40000000) >> 31); \ + } while (0) + +#define SMUL(dre, dim, are, aim, bre, bim) do { \ + int64_t accu; \ + (accu) = (int64_t)(bre) * (are); \ + (accu) -= (int64_t)(bim) * (aim); \ + (dre) = (int)(((accu) + 0x40000000) >> 31); \ + (accu) = (int64_t)(bim) * (are); \ + (accu) -= (int64_t)(bre) * (aim); \ + (dim) = (int)(((accu) + 0x40000000) >> 31); \ + } while (0) + +#define RESCALE(x) (lrintf((x) * 2147483648.0)) + +#define FOLD(x, y) ((int)((x) + (unsigned)(y) + 32) >> 6) + #endif +#define BF(x, y, a, b) do { \ + x = (a) - (b); \ + y = (a) + (b); \ + } while (0) + #define CMUL3(c, a, b) \ CMUL((c).re, (c).im, (a).re, (a).im, (b).re, (b).im) @@ -70,6 +119,7 @@ struct AVTXContext { }; /* Shared functions */ +int ff_tx_type_is_mdct(enum AVTXType type); int ff_tx_gen_compound_mapping(AVTXContext *s); int ff_tx_gen_ptwo_revtab(AVTXContext *s); @@ -96,6 +146,9 @@ int ff_tx_init_mdct_fft_float(AVTXContext *s, av_tx_fn *tx, int ff_tx_init_mdct_fft_double(AVTXContext *s, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags); +int ff_tx_init_mdct_fft_int32(AVTXContext *s, av_tx_fn *tx, + enum AVTXType type, int inv, int len, + const void *scale, uint64_t flags); typedef struct CosTabsInitOnce { void (*func)(void); diff --git a/libavutil/tx_template.c b/libavutil/tx_template.c index 9196ee383d..d33c9ce351 100644 --- a/libavutil/tx_template.c +++ b/libavutil/tx_template.c @@ -66,7 +66,7 @@ static av_always_inline void init_cos_tabs_idx(int index) double freq = 2*M_PI/m; FFTSample *tab = cos_tabs[index]; for(int i = 0; i <= m/4; i++) - tab[i] = cos(i*freq); + tab[i] = RESCALE(cos(i*freq)); for(int i = 1; i < m/4; i++) tab[m/2 - i] = tab[i]; } @@ -94,10 +94,10 @@ INIT_FF_COS_TABS_FUNC(17, 131072) static av_cold void ff_init_53_tabs(void) { - TX_NAME(ff_cos_53)[0] = (FFTComplex){ cos(2 * M_PI / 12), cos(2 * M_PI / 12) }; - TX_NAME(ff_cos_53)[1] = (FFTComplex){ 0.5, 0.5 }; - TX_NAME(ff_cos_53)[2] = (FFTComplex){ cos(2 * M_PI / 5), sin(2 * M_PI / 5) }; - TX_NAME(ff_cos_53)[3] = (FFTComplex){ cos(2 * M_PI / 10), sin(2 * M_PI / 10) }; + TX_NAME(ff_cos_53)[0] = (FFTComplex){ RESCALE(cos(2 * M_PI / 12)), RESCALE(cos(2 * M_PI / 12)) }; + TX_NAME(ff_cos_53)[1] = (FFTComplex){ RESCALE(cos(2 * M_PI / 6)), RESCALE(cos(2 * M_PI / 6)) }; + TX_NAME(ff_cos_53)[2] = (FFTComplex){ RESCALE(cos(2 * M_PI / 5)), RESCALE(sin(2 * M_PI / 5)) }; + TX_NAME(ff_cos_53)[3] = (FFTComplex){ RESCALE(cos(2 * M_PI / 10)), RESCALE(sin(2 * M_PI / 10)) }; } static CosTabsInitOnce cos_tabs_init_once[] = { @@ -132,18 +132,16 @@ static av_always_inline void fft3(FFTComplex *out, FFTComplex *in, { FFTComplex tmp[2]; - tmp[0].re = in[1].im - in[2].im; - tmp[0].im = in[1].re - in[2].re; - tmp[1].re = in[1].re + in[2].re; - tmp[1].im = in[1].im + in[2].im; + BF(tmp[0].re, tmp[1].im, in[1].im, in[2].im); + BF(tmp[0].im, tmp[1].re, in[1].re, in[2].re); out[0*stride].re = in[0].re + tmp[1].re; out[0*stride].im = in[0].im + tmp[1].im; - tmp[0].re *= TX_NAME(ff_cos_53)[0].re; - tmp[0].im *= TX_NAME(ff_cos_53)[0].im; - tmp[1].re *= TX_NAME(ff_cos_53)[1].re; - tmp[1].im *= TX_NAME(ff_cos_53)[1].re; + tmp[0].re = MUL(TX_NAME(ff_cos_53)[0].re, tmp[0].re); + tmp[0].im = MUL(TX_NAME(ff_cos_53)[0].im, tmp[0].im); + tmp[1].re = MUL(TX_NAME(ff_cos_53)[1].re, tmp[1].re); + tmp[1].im = MUL(TX_NAME(ff_cos_53)[1].re, tmp[1].im); out[1*stride].re = in[0].re - tmp[1].re + tmp[0].re; out[1*stride].im = in[0].im - tmp[1].im - tmp[0].im; @@ -151,61 +149,38 @@ static av_always_inline void fft3(FFTComplex *out, FFTComplex *in, out[2*stride].im = in[0].im - tmp[1].im + tmp[0].im; } -#define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \ -static av_always_inline void NAME(FFTComplex *out, FFTComplex *in, \ - ptrdiff_t stride) \ -{ \ - FFTComplex z0[4], t[6]; \ - \ - t[0].re = in[1].re + in[4].re; \ - t[0].im = in[1].im + in[4].im; \ - t[1].im = in[1].re - in[4].re; \ - t[1].re = in[1].im - in[4].im; \ - t[2].re = in[2].re + in[3].re; \ - t[2].im = in[2].im + in[3].im; \ - t[3].im = in[2].re - in[3].re; \ - t[3].re = in[2].im - in[3].im; \ - \ - out[D0*stride].re = in[0].re + in[1].re + in[2].re + \ - in[3].re + in[4].re; \ - out[D0*stride].im = in[0].im + in[1].im + in[2].im + \ - in[3].im + in[4].im; \ - \ - t[4].re = TX_NAME(ff_cos_53)[2].re * t[2].re; \ - t[4].im = TX_NAME(ff_cos_53)[2].re * t[2].im; \ - t[4].re -= TX_NAME(ff_cos_53)[3].re * t[0].re; \ - t[4].im -= TX_NAME(ff_cos_53)[3].re * t[0].im; \ - t[0].re = TX_NAME(ff_cos_53)[2].re * t[0].re; \ - t[0].im = TX_NAME(ff_cos_53)[2].re * t[0].im; \ - t[0].re -= TX_NAME(ff_cos_53)[3].re * t[2].re; \ - t[0].im -= TX_NAME(ff_cos_53)[3].re * t[2].im; \ - t[5].re = TX_NAME(ff_cos_53)[2].im * t[3].re; \ - t[5].im = TX_NAME(ff_cos_53)[2].im * t[3].im; \ - t[5].re -= TX_NAME(ff_cos_53)[3].im * t[1].re; \ - t[5].im -= TX_NAME(ff_cos_53)[3].im * t[1].im; \ - t[1].re = TX_NAME(ff_cos_53)[2].im * t[1].re; \ - t[1].im = TX_NAME(ff_cos_53)[2].im * t[1].im; \ - t[1].re += TX_NAME(ff_cos_53)[3].im * t[3].re; \ - t[1].im += TX_NAME(ff_cos_53)[3].im * t[3].im; \ - \ - z0[0].re = t[0].re - t[1].re; \ - z0[0].im = t[0].im - t[1].im; \ - z0[1].re = t[4].re + t[5].re; \ - z0[1].im = t[4].im + t[5].im; \ - \ - z0[2].re = t[4].re - t[5].re; \ - z0[2].im = t[4].im - t[5].im; \ - z0[3].re = t[0].re + t[1].re; \ - z0[3].im = t[0].im + t[1].im; \ - \ - out[D1*stride].re = in[0].re + z0[3].re; \ - out[D1*stride].im = in[0].im + z0[0].im; \ - out[D2*stride].re = in[0].re + z0[2].re; \ - out[D2*stride].im = in[0].im + z0[1].im; \ - out[D3*stride].re = in[0].re + z0[1].re; \ - out[D3*stride].im = in[0].im + z0[2].im; \ - out[D4*stride].re = in[0].re + z0[0].re; \ - out[D4*stride].im = in[0].im + z0[3].im; \ +#define DECL_FFT5(NAME, D0, D1, D2, D3, D4) \ +static av_always_inline void NAME(FFTComplex *out, FFTComplex *in, \ + ptrdiff_t stride) \ +{ \ + FFTComplex z0[4], t[6]; \ + \ + BF(t[1].im, t[0].re, in[1].re, in[4].re); \ + BF(t[1].re, t[0].im, in[1].im, in[4].im); \ + BF(t[3].im, t[2].re, in[2].re, in[3].re); \ + BF(t[3].re, t[2].im, in[2].im, in[3].im); \ + \ + out[D0*stride].re = in[0].re + in[1].re + in[2].re + in[3].re + in[4].re; \ + out[D0*stride].im = in[0].im + in[1].im + in[2].im + in[3].im + in[4].im; \ + \ + SMUL(t[4].re, t[0].re, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].re, t[0].re); \ + SMUL(t[4].im, t[0].im, TX_NAME(ff_cos_53)[2].re, TX_NAME(ff_cos_53)[3].re, t[2].im, t[0].im); \ + CMUL(t[5].re, t[1].re, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].re, t[1].re); \ + CMUL(t[5].im, t[1].im, TX_NAME(ff_cos_53)[2].im, TX_NAME(ff_cos_53)[3].im, t[3].im, t[1].im); \ + \ + BF(z0[0].re, z0[3].re, t[0].re, t[1].re); \ + BF(z0[0].im, z0[3].im, t[0].im, t[1].im); \ + BF(z0[2].re, z0[1].re, t[4].re, t[5].re); \ + BF(z0[2].im, z0[1].im, t[4].im, t[5].im); \ + \ + out[D1*stride].re = in[0].re + z0[3].re; \ + out[D1*stride].im = in[0].im + z0[0].im; \ + out[D2*stride].re = in[0].re + z0[2].re; \ + out[D2*stride].im = in[0].im + z0[1].im; \ + out[D3*stride].re = in[0].re + z0[1].re; \ + out[D3*stride].im = in[0].im + z0[2].im; \ + out[D4*stride].re = in[0].re + z0[0].re; \ + out[D4*stride].im = in[0].im + z0[3].im; \ } DECL_FFT5(fft5, 0, 1, 2, 3, 4) @@ -324,7 +299,7 @@ static void fft8(FFTComplex *z) BF(t6, z[7].im, z[6].im, -z[7].im); BUTTERFLIES(z[0],z[2],z[4],z[6]); - TRANSFORM(z[1],z[3],z[5],z[7],M_SQRT1_2,M_SQRT1_2); + TRANSFORM(z[1],z[3],z[5],z[7],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2)); } static void fft16(FFTComplex *z) @@ -338,7 +313,7 @@ static void fft16(FFTComplex *z) fft4(z+12); TRANSFORM_ZERO(z[0],z[4],z[8],z[12]); - TRANSFORM(z[2],z[6],z[10],z[14],M_SQRT1_2,M_SQRT1_2); + TRANSFORM(z[2],z[6],z[10],z[14],RESCALE(M_SQRT1_2),RESCALE(M_SQRT1_2)); TRANSFORM(z[1],z[5],z[9],z[13],cos_16_1,cos_16_3); TRANSFORM(z[3],z[7],z[11],z[15],cos_16_3,cos_16_1); } @@ -459,11 +434,11 @@ static void compound_mdct_##N##xM(AVTXContext *s, void *_dst, void *_src, \ for (int j = 0; j < N; j++) { \ const int k = in_map[i*N + j]; \ if (k < len4) { \ - tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k]; \ - tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k]; \ + tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); \ + tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); \ } else { \ - tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k]; \ - tmp.im = src[-len4 + k] - src[1*len3 - 1 - k]; \ + tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); \ + tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); \ } \ CMUL(fft##N##in[j].im, fft##N##in[j].re, tmp.re, tmp.im, \ exp[k >> 1].re, exp[k >> 1].im); \ @@ -533,11 +508,11 @@ static void monolithic_mdct(AVTXContext *s, void *_dst, void *_src, for (int i = 0; i < m; i++) { /* Folding and pre-reindexing */ const int k = 2*i; if (k < len4) { - tmp.re = -src[ len4 + k] + src[1*len4 - 1 - k]; - tmp.im = -src[ len3 + k] - src[1*len3 - 1 - k]; + tmp.re = FOLD(-src[ len4 + k], src[1*len4 - 1 - k]); + tmp.im = FOLD(-src[ len3 + k], -src[1*len3 - 1 - k]); } else { - tmp.re = -src[ len4 + k] - src[5*len4 - 1 - k]; - tmp.im = src[-len4 + k] - src[1*len3 - 1 - k]; + tmp.re = FOLD(-src[ len4 + k], -src[5*len4 - 1 - k]); + tmp.im = FOLD( src[-len4 + k], -src[1*len3 - 1 - k]); } CMUL(z[s->revtab[i]].im, z[s->revtab[i]].re, tmp.re, tmp.im, exp[i].re, exp[i].im); @@ -567,8 +542,8 @@ static int gen_mdct_exptab(AVTXContext *s, int len4, double scale) scale = sqrt(fabs(scale)); for (int i = 0; i < len4; i++) { const double alpha = M_PI_2 * (i + theta) / len4; - s->exptab[i].re = cos(alpha) * scale; - s->exptab[i].im = sin(alpha) * scale; + s->exptab[i].re = RESCALE(cos(alpha) * scale); + s->exptab[i].im = RESCALE(sin(alpha) * scale); } return 0; @@ -578,7 +553,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx, enum AVTXType type, int inv, int len, const void *scale, uint64_t flags) { - const int is_mdct = type == AV_TX_FLOAT_MDCT || type == AV_TX_DOUBLE_MDCT; + const int is_mdct = ff_tx_type_is_mdct(type); int err, n = 1, m = 1, max_ptwo = 1 << (FF_ARRAY_ELEMS(fft_dispatch) + 1); if (is_mdct) @@ -637,7 +612,7 @@ int TX_NAME(ff_tx_init_mdct_fft)(AVTXContext *s, av_tx_fn *tx, } if (is_mdct) - return gen_mdct_exptab(s, n*m, *((FFTSample *)scale)); + return gen_mdct_exptab(s, n*m, *((SCALE_TYPE *)scale)); return 0; } diff --git a/libavutil/version.h b/libavutil/version.h index 633a21dca3..90cc55b9ac 100644 --- a/libavutil/version.h +++ b/libavutil/version.h @@ -79,7 +79,7 @@ */ #define LIBAVUTIL_VERSION_MAJOR 56 -#define LIBAVUTIL_VERSION_MINOR 40 +#define LIBAVUTIL_VERSION_MINOR 41 #define LIBAVUTIL_VERSION_MICRO 100 #define LIBAVUTIL_VERSION_INT AV_VERSION_INT(LIBAVUTIL_VERSION_MAJOR, \