Move some mpegaudio functions to new mpegaudiodsp subsystem

This separation allows these functions to be used in a cleaner fashion from other codecs (e.g. qdm2) and simplifies creating optimised versions of them. Signed-off-by: Mans Rullgard <mans@mansr.com>
2011-05-16 16:52:01 +01:00 · 2011-05-16 16:52:01 +01:00 · c4f5c2d6f4
commit c4f5c2d6f4
parent ea91e77127
17 changed files with 390 additions and 255 deletions
--- a/19
+++ b/19
@ -952,6 +952,7 @@ CONFIG_LIST="
    mdct
    memalign_hack
    mlib
+    mpegaudiodsp
    network
    nonfree
    pic
@ -1235,6 +1236,7 @@ symver_if_any="symver_asm_label symver_gnu_asm"
 dct_select="rdft"
 mdct_select="fft"
 rdft_select="fft"
+mpegaudiodsp_select="dct"

 # decoders / encoders / hardware accelerators
 aac_decoder_select="mdct sinewin"
@ -1286,11 +1288,16 @@ ljpeg_encoder_select="aandct"
 loco_decoder_select="golomb"
 mjpeg_encoder_select="aandct"
 mlp_decoder_select="mlp_parser"
-mp1float_decoder_select="dct"
-mp2float_decoder_select="dct"
-mp3adufloat_decoder_select="dct"
-mp3float_decoder_select="dct"
-mp3on4float_decoder_select="dct"
+mp1_decoder_select="mpegaudiodsp"
+mp2_decoder_select="mpegaudiodsp"
+mp3adu_decoder_select="mpegaudiodsp"
+mp3_decoder_select="mpegaudiodsp"
+mp3on4_decoder_select="mpegaudiodsp"
+mp1float_decoder_select="mpegaudiodsp"
+mp2float_decoder_select="mpegaudiodsp"
+mp3adufloat_decoder_select="mpegaudiodsp"
+mp3float_decoder_select="mpegaudiodsp"
+mp3on4float_decoder_select="mpegaudiodsp"
 mpeg1video_encoder_select="aandct"
 mpeg2video_encoder_select="aandct"
 mpeg4_decoder_select="h263_decoder mpeg4video_parser"
@ -1315,7 +1322,7 @@ nellymoser_encoder_select="mdct sinewin"
 png_decoder_select="zlib"
 png_encoder_select="zlib"
 qcelp_decoder_select="lsp"
-qdm2_decoder_select="mdct rdft"
+qdm2_decoder_select="mdct rdft mpegaudiodsp"
 ra_144_encoder_select="lpc"
 rv10_decoder_select="h263_decoder"
 rv10_encoder_select="h263_encoder"
--- a/libavcodec/Makefile
+++ b/libavcodec/Makefile
@ -40,6 +40,9 @@ OBJS-$(CONFIG_HUFFMAN)                 += huffman.o
 OBJS-$(CONFIG_LPC)                     += lpc.o
 OBJS-$(CONFIG_LSP)                     += lsp.o
 OBJS-$(CONFIG_MDCT)                    += mdct_fixed.o mdct_float.o
+OBJS-$(CONFIG_MPEGAUDIODSP)            += mpegaudiodsp.o                \
+                                          mpegaudiodsp_fixed.o          \
+                                          mpegaudiodsp_float.o
 RDFT-OBJS-$(CONFIG_HARDCODED_TABLES)   += sin_tables.o
 OBJS-$(CONFIG_RDFT)                    += rdft.o $(RDFT-OBJS-yes)
 OBJS-$(CONFIG_SINEWIN)                 += sinewin.o
--- a/libavcodec/mpc.c
+++ b/libavcodec/mpc.c
@ -29,6 +29,7 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "dsputil.h"
+#include "mpegaudiodsp.h"
 #include "mpegaudio.h"

 #include "mpc.h"
@ -51,7 +52,8 @@ static void mpc_synth(MPCContext *c, int16_t *out, int channels)
    for(ch = 0;  ch < channels; ch++){
        samples_ptr = samples + ch;
        for(i = 0; i < SAMPLES_PER_BAND; i++) {
-            ff_mpa_synth_filter_fixed(c->synth_buf[ch], &(c->synth_buf_offset[ch]),
+            ff_mpa_synth_filter_fixed(&c->mpadsp,
+                                c->synth_buf[ch], &(c->synth_buf_offset[ch]),
                                ff_mpa_synth_window_fixed, &dither_state,
                                samples_ptr, channels,
                                c->sb_samples[ch][i]);
--- a/libavcodec/mpc.h
+++ b/libavcodec/mpc.h
@ -52,6 +52,7 @@ typedef struct {

 typedef struct {
    DSPContext dsp;
+    MPADSPContext mpadsp;
    GetBitContext gb;
    int IS, MSS, gapless;
    int lastframelen;
--- a/libavcodec/mpc7.c
+++ b/libavcodec/mpc7.c
@ -29,7 +29,7 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "dsputil.h"
-#include "mpegaudio.h"
+#include "mpegaudiodsp.h"
 #include "libavutil/audioconvert.h"

 #include "mpc.h"
@ -68,6 +68,7 @@ static av_cold int mpc7_decode_init(AVCodecContext * avctx)
    memset(c->oldDSCF, 0, sizeof(c->oldDSCF));
    av_lfg_init(&c->rnd, 0xDEADBEEF);
    dsputil_init(&c->dsp, avctx);
+    ff_mpadsp_init(&c->mpadsp);
    c->dsp.bswap_buf((uint32_t*)buf, (const uint32_t*)avctx->extradata, 4);
    ff_mpc_init();
    init_get_bits(&gb, buf, 128);
--- a/libavcodec/mpc8.c
+++ b/libavcodec/mpc8.c
@ -29,7 +29,7 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "dsputil.h"
-#include "mpegaudio.h"
+#include "mpegaudiodsp.h"
 #include "libavutil/audioconvert.h"

 #include "mpc.h"
@ -120,6 +120,7 @@ static av_cold int mpc8_decode_init(AVCodecContext * avctx)
    memset(c->oldDSCF, 0, sizeof(c->oldDSCF));
    av_lfg_init(&c->rnd, 0xDEADBEEF);
    dsputil_init(&c->dsp, avctx);
+    ff_mpadsp_init(&c->mpadsp);

    ff_mpc_init();

--- a/libavcodec/mpegaudio.h
+++ b/libavcodec/mpegaudio.h
@ -33,7 +33,6 @@
 #include "avcodec.h"
 #include "get_bits.h"
 #include "dsputil.h"
-#include "dct.h"

 /* max frame size, in samples */
 #define MPA_FRAME_SIZE 1152
@ -69,7 +68,6 @@
 typedef float OUT_INT;
 #else
 typedef int16_t OUT_INT;
-#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
 #endif

 #if CONFIG_FLOAT
@ -142,11 +140,7 @@ typedef struct MPADecodeContext {
    int dither_state;
    int error_recognition;
    AVCodecContext* avctx;
-#if CONFIG_FLOAT
-    DCTContext dct;
-#endif
-    void (*apply_window_mp3)(MPA_INT *synth_buf, MPA_INT *window,
-                             int *dither_state, OUT_INT *samples, int incr);
+    MPADSPContext mpadsp;
 } MPADecodeContext;

 /* layer 3 huffman tables */
@ -158,22 +152,6 @@ typedef struct HuffTable {

 int ff_mpa_l2_select_table(int bitrate, int nb_channels, int freq, int lsf);
 int ff_mpa_decode_header(AVCodecContext *avctx, uint32_t head, int *sample_rate, int *channels, int *frame_size, int *bitrate);
-extern MPA_INT ff_mpa_synth_window_fixed[];
-void ff_mpa_synth_init_fixed(MPA_INT *window);
-void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
-                         MPA_INT *window, int *dither_state,
-                         OUT_INT *samples, int incr,
-                         INTFLOAT sb_samples[SBLIMIT]);
-
-void ff_mpa_synth_init_float(MPA_INT *window);
-void ff_mpa_synth_filter_float(MPADecodeContext *s,
-                         MPA_INT *synth_buf_ptr, int *synth_buf_offset,
-                         MPA_INT *window, int *dither_state,
-                         OUT_INT *samples, int incr,
-                         INTFLOAT sb_samples[SBLIMIT]);
-
-void ff_mpegaudiodec_init_mmx(MPADecodeContext *s);
-void ff_mpegaudiodec_init_altivec(MPADecodeContext *s);

 /* fast header check for resync */
 static inline int ff_mpa_check_header(uint32_t header){
--- a/libavcodec/mpegaudiodec.c
+++ b/libavcodec/mpegaudiodec.c
@ -29,7 +29,7 @@
 #include "get_bits.h"
 #include "dsputil.h"
 #include "mathops.h"
-#include "dct32.h"
+#include "mpegaudiodsp.h"

 /*
 * TODO:
@ -68,8 +68,6 @@
 #include "mpegaudiodectab.h"

 static void RENAME(compute_antialias)(MPADecodeContext *s, GranuleDef *g);
-static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
-                               int *dither_state, OUT_INT *samples, int incr);

 /* vlc structure for decoding layer 3 huffman tables */
 static VLC huff_vlc[16];
@ -119,8 +117,6 @@ static const int32_t scale_factor_mult2[3][3] = {
    SCALE_GEN(4.0 / 9.0), /* 9 steps */
 };

-DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512+256];
-
 /**
 * Convert region offsets to region sizes and truncate
 * size to big_values.
@ -259,14 +255,8 @@ static av_cold int decode_init(AVCodecContext * avctx)
    int i, j, k;

    s->avctx = avctx;
-    s->apply_window_mp3 = apply_window_mp3_c;
-#if HAVE_MMX && CONFIG_FLOAT
-    ff_mpegaudiodec_init_mmx(s);
-#endif
-#if CONFIG_FLOAT
-    ff_dct_init(&s->dct, 5, DCT_II);
-#endif
-    if (HAVE_ALTIVEC && CONFIG_FLOAT) ff_mpegaudiodec_init_altivec(s);
+
+    ff_mpadsp_init(&s->mpadsp);

    avctx->sample_fmt= OUT_FMT;
    s->error_recognition= avctx->error_recognition;
@ -461,183 +451,6 @@ static av_cold int decode_init(AVCodecContext * avctx)
    return 0;
 }

-
-#if CONFIG_FLOAT
-static inline float round_sample(float *sum)
-{
-    float sum1=*sum;
-    *sum = 0;
-    return sum1;
-}
-
-/* signed 16x16 -> 32 multiply add accumulate */
-#define MACS(rt, ra, rb) rt+=(ra)*(rb)
-
-/* signed 16x16 -> 32 multiply */
-#define MULS(ra, rb) ((ra)*(rb))
-
-#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
-
-#else
-
-static inline int round_sample(int64_t *sum)
-{
-    int sum1;
-    sum1 = (int)((*sum) >> OUT_SHIFT);
-    *sum &= (1<<OUT_SHIFT)-1;
-    return av_clip_int16(sum1);
-}
-
-#   define MULS(ra, rb) MUL64(ra, rb)
-#   define MACS(rt, ra, rb) MAC64(rt, ra, rb)
-#   define MLSS(rt, ra, rb) MLS64(rt, ra, rb)
-#endif
-
-#define SUM8(op, sum, w, p)               \
-{                                         \
-    op(sum, (w)[0 * 64], (p)[0 * 64]);    \
-    op(sum, (w)[1 * 64], (p)[1 * 64]);    \
-    op(sum, (w)[2 * 64], (p)[2 * 64]);    \
-    op(sum, (w)[3 * 64], (p)[3 * 64]);    \
-    op(sum, (w)[4 * 64], (p)[4 * 64]);    \
-    op(sum, (w)[5 * 64], (p)[5 * 64]);    \
-    op(sum, (w)[6 * 64], (p)[6 * 64]);    \
-    op(sum, (w)[7 * 64], (p)[7 * 64]);    \
-}
-
-#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
-{                                               \
-    INTFLOAT tmp;\
-    tmp = p[0 * 64];\
-    op1(sum1, (w1)[0 * 64], tmp);\
-    op2(sum2, (w2)[0 * 64], tmp);\
-    tmp = p[1 * 64];\
-    op1(sum1, (w1)[1 * 64], tmp);\
-    op2(sum2, (w2)[1 * 64], tmp);\
-    tmp = p[2 * 64];\
-    op1(sum1, (w1)[2 * 64], tmp);\
-    op2(sum2, (w2)[2 * 64], tmp);\
-    tmp = p[3 * 64];\
-    op1(sum1, (w1)[3 * 64], tmp);\
-    op2(sum2, (w2)[3 * 64], tmp);\
-    tmp = p[4 * 64];\
-    op1(sum1, (w1)[4 * 64], tmp);\
-    op2(sum2, (w2)[4 * 64], tmp);\
-    tmp = p[5 * 64];\
-    op1(sum1, (w1)[5 * 64], tmp);\
-    op2(sum2, (w2)[5 * 64], tmp);\
-    tmp = p[6 * 64];\
-    op1(sum1, (w1)[6 * 64], tmp);\
-    op2(sum2, (w2)[6 * 64], tmp);\
-    tmp = p[7 * 64];\
-    op1(sum1, (w1)[7 * 64], tmp);\
-    op2(sum2, (w2)[7 * 64], tmp);\
-}
-
-void av_cold RENAME(ff_mpa_synth_init)(MPA_INT *window)
-{
-    int i, j;
-
-    /* max = 18760, max sum over all 16 coefs : 44736 */
-    for(i=0;i<257;i++) {
-        INTFLOAT v;
-        v = ff_mpa_enwindow[i];
-#if CONFIG_FLOAT
-        v *= 1.0 / (1LL<<(16 + FRAC_BITS));
-#endif
-        window[i] = v;
-        if ((i & 63) != 0)
-            v = -v;
-        if (i != 0)
-            window[512 - i] = v;
-    }
-
-    // Needed for avoiding shuffles in ASM implementations
-    for(i=0; i < 8; i++)
-        for(j=0; j < 16; j++)
-            window[512+16*i+j] = window[64*i+32-j];
-
-    for(i=0; i < 8; i++)
-        for(j=0; j < 16; j++)
-            window[512+128+16*i+j] = window[64*i+48-j];
-}
-
-static void apply_window_mp3_c(MPA_INT *synth_buf, MPA_INT *window,
-                               int *dither_state, OUT_INT *samples, int incr)
-{
-    register const MPA_INT *w, *w2, *p;
-    int j;
-    OUT_INT *samples2;
-#if CONFIG_FLOAT
-    float sum, sum2;
-#else
-    int64_t sum, sum2;
-#endif
-
-    /* copy to avoid wrap */
-    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf));
-
-    samples2 = samples + 31 * incr;
-    w = window;
-    w2 = window + 31;
-
-    sum = *dither_state;
-    p = synth_buf + 16;
-    SUM8(MACS, sum, w, p);
-    p = synth_buf + 48;
-    SUM8(MLSS, sum, w + 32, p);
-    *samples = round_sample(&sum);
-    samples += incr;
-    w++;
-
-    /* we calculate two samples at the same time to avoid one memory
-       access per two sample */
-    for(j=1;j<16;j++) {
-        sum2 = 0;
-        p = synth_buf + 16 + j;
-        SUM8P2(sum, MACS, sum2, MLSS, w, w2, p);
-        p = synth_buf + 48 - j;
-        SUM8P2(sum, MLSS, sum2, MLSS, w + 32, w2 + 32, p);
-
-        *samples = round_sample(&sum);
-        samples += incr;
-        sum += sum2;
-        *samples2 = round_sample(&sum);
-        samples2 -= incr;
-        w++;
-        w2--;
-    }
-
-    p = synth_buf + 32;
-    SUM8(MLSS, sum, w + 32, p);
-    *samples = round_sample(&sum);
-    *dither_state= sum;
-}
-
-
-/* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
-   32 samples. */
-/* XXX: optimize by avoiding ring buffer usage */
-#if !CONFIG_FLOAT
-void ff_mpa_synth_filter_fixed(MPA_INT *synth_buf_ptr, int *synth_buf_offset,
-                         MPA_INT *window, int *dither_state,
-                         OUT_INT *samples, int incr,
-                         INTFLOAT sb_samples[SBLIMIT])
-{
-    register MPA_INT *synth_buf;
-    int offset;
-
-    offset = *synth_buf_offset;
-    synth_buf = synth_buf_ptr + offset;
-
-    ff_dct32_fixed(synth_buf, sb_samples);
-    apply_window_mp3_c(synth_buf, window, dither_state, samples, incr);
-
-    offset = (offset - 32) & 511;
-    *synth_buf_offset = offset;
-}
-#endif
-
 #define C3 FIXHR(0.86602540378443864676/2)

 /* 0.5 / cos(pi*(2*i+1)/36) */
@ -1915,9 +1728,7 @@ static int mp_decode_frame(MPADecodeContext *s,
        samples_ptr = samples + ch;
        for(i=0;i<nb_frames;i++) {
            RENAME(ff_mpa_synth_filter)(
-#if CONFIG_FLOAT
-                         s,
-#endif
+                         &s->mpadsp,
                         s->synth_buf[ch], &(s->synth_buf_offset[ch]),
                         RENAME(ff_mpa_synth_window), &s->dither_state,
                         samples_ptr, s->nb_channels,
--- a/libavcodec/mpegaudiodec_float.c
+++ b/libavcodec/mpegaudiodec_float.c
@ -22,25 +22,6 @@
 #define CONFIG_FLOAT 1
 #include "mpegaudiodec.c"

-void ff_mpa_synth_filter_float(MPADecodeContext *s, float *synth_buf_ptr,
-                               int *synth_buf_offset,
-                               float *window, int *dither_state,
-                               float *samples, int incr,
-                               float sb_samples[SBLIMIT])
-{
-    float *synth_buf;
-    int offset;
-
-    offset = *synth_buf_offset;
-    synth_buf = synth_buf_ptr + offset;
-
-    s->dct.dct32(synth_buf, sb_samples);
-    s->apply_window_mp3(synth_buf, window, dither_state, samples, incr);
-
-    offset = (offset - 32) & 511;
-    *synth_buf_offset = offset;
-}
-
 static void compute_antialias_float(MPADecodeContext *s,
                              GranuleDef *g)
 {
--- a/libavcodec/mpegaudiodsp.c
+++ b/libavcodec/mpegaudiodsp.c
@ -0,0 +1,40 @@
+/*
+ * Copyright (c) 2011 Mans Rullgard
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "config.h"
+#include "mpegaudiodsp.h"
+#include "dct.h"
+#include "dct32.h"
+
+void ff_mpadsp_init(MPADSPContext *s)
+{
+    DCTContext dct;
+
+    ff_dct_init(&dct, 5, DCT_II);
+
+    s->apply_window_float = ff_mpadsp_apply_window_float;
+    s->apply_window_fixed = ff_mpadsp_apply_window_fixed;
+
+    s->dct32_float = dct.dct32;
+    s->dct32_fixed = ff_dct32_fixed;
+
+    if (HAVE_MMX)     ff_mpadsp_init_mmx(s);
+    if (HAVE_ALTIVEC) ff_mpadsp_init_altivec(s);
+}
--- a/libavcodec/mpegaudiodsp.h
+++ b/libavcodec/mpegaudiodsp.h
@ -0,0 +1,63 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#ifndef AVCODEC_MPEGAUDIODSP_H
+#define AVCODEC_MPEGAUDIODSP_H
+
+#include <stdint.h>
+
+typedef struct MPADSPContext {
+    void (*apply_window_float)(float *synth_buf, float *window,
+                               int *dither_state, float *samples, int incr);
+    void (*apply_window_fixed)(int32_t *synth_buf, int32_t *window,
+                               int *dither_state, int16_t *samples, int incr);
+    void (*dct32_float)(float *dst, const float *src);
+    void (*dct32_fixed)(int *dst, const int *src);
+} MPADSPContext;
+
+void ff_mpadsp_init(MPADSPContext *s);
+
+extern int32_t ff_mpa_synth_window_fixed[];
+extern float   ff_mpa_synth_window_float[];
+
+void ff_mpa_synth_filter_fixed(MPADSPContext *s,
+                               int32_t *synth_buf_ptr, int *synth_buf_offset,
+                               int32_t *window, int *dither_state,
+                               int16_t *samples, int incr,
+                               int *sb_samples);
+
+void ff_mpa_synth_filter_float(MPADSPContext *s,
+                               float *synth_buf_ptr, int *synth_buf_offset,
+                               float *window, int *dither_state,
+                               float *samples, int incr,
+                               float *sb_samples);
+
+void ff_mpadsp_init_mmx(MPADSPContext *s);
+void ff_mpadsp_init_altivec(MPADSPContext *s);
+
+void ff_mpa_synth_init_float(float *window);
+void ff_mpa_synth_init_fixed(int32_t *window);
+
+void ff_mpadsp_apply_window_float(float *synth_buf, float *window,
+                                  int *dither_state, float *samples,
+                                  int incr);
+void ff_mpadsp_apply_window_fixed(int32_t *synth_buf, int32_t *window,
+                                  int *dither_state, int16_t *samples,
+                                  int incr);
+
+#endif
--- a/libavcodec/mpegaudiodsp_fixed.c
+++ b/libavcodec/mpegaudiodsp_fixed.c
@ -0,0 +1,20 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define CONFIG_FLOAT 0
+#include "mpegaudiodsp_template.c"
--- a/libavcodec/mpegaudiodsp_float.c
+++ b/libavcodec/mpegaudiodsp_float.c
@ -0,0 +1,20 @@
+/*
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#define CONFIG_FLOAT 1
+#include "mpegaudiodsp_template.c"
--- a/libavcodec/mpegaudiodsp_template.c
+++ b/libavcodec/mpegaudiodsp_template.c
@ -0,0 +1,205 @@
+/*
+ * Copyright (c) 2001, 2002 Fabrice Bellard
+ *
+ * This file is part of Libav.
+ *
+ * Libav is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * Libav is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with Libav; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include <stdint.h>
+
+#include "libavutil/mem.h"
+#include "dct32.h"
+#include "mathops.h"
+#include "mpegaudiodsp.h"
+#include "mpegaudio.h"
+#include "mpegaudiodata.h"
+
+#if CONFIG_FLOAT
+#define RENAME(n) n##_float
+
+static inline float round_sample(float *sum)
+{
+    float sum1=*sum;
+    *sum = 0;
+    return sum1;
+}
+
+#define MACS(rt, ra, rb) rt+=(ra)*(rb)
+#define MULS(ra, rb) ((ra)*(rb))
+#define MLSS(rt, ra, rb) rt-=(ra)*(rb)
+
+#else
+
+#define RENAME(n) n##_fixed
+#define OUT_SHIFT (WFRAC_BITS + FRAC_BITS - 15)
+
+static inline int round_sample(int64_t *sum)
+{
+    int sum1;
+    sum1 = (int)((*sum) >> OUT_SHIFT);
+    *sum &= (1<<OUT_SHIFT)-1;
+    return av_clip_int16(sum1);
+}
+
+#   define MULS(ra, rb) MUL64(ra, rb)
+#   define MACS(rt, ra, rb) MAC64(rt, ra, rb)
+#   define MLSS(rt, ra, rb) MLS64(rt, ra, rb)
+#endif
+
+DECLARE_ALIGNED(16, MPA_INT, RENAME(ff_mpa_synth_window))[512+256];
+
+#define SUM8(op, sum, w, p)               \
+{                                         \
+    op(sum, (w)[0 * 64], (p)[0 * 64]);    \
+    op(sum, (w)[1 * 64], (p)[1 * 64]);    \
+    op(sum, (w)[2 * 64], (p)[2 * 64]);    \
+    op(sum, (w)[3 * 64], (p)[3 * 64]);    \
+    op(sum, (w)[4 * 64], (p)[4 * 64]);    \
+    op(sum, (w)[5 * 64], (p)[5 * 64]);    \
+    op(sum, (w)[6 * 64], (p)[6 * 64]);    \
+    op(sum, (w)[7 * 64], (p)[7 * 64]);    \
+}
+
+#define SUM8P2(sum1, op1, sum2, op2, w1, w2, p) \
+{                                               \
+    INTFLOAT tmp;\
+    tmp = p[0 * 64];\
+    op1(sum1, (w1)[0 * 64], tmp);\
+    op2(sum2, (w2)[0 * 64], tmp);\
+    tmp = p[1 * 64];\
+    op1(sum1, (w1)[1 * 64], tmp);\
+    op2(sum2, (w2)[1 * 64], tmp);\
+    tmp = p[2 * 64];\
+    op1(sum1, (w1)[2 * 64], tmp);\
+    op2(sum2, (w2)[2 * 64], tmp);\
+    tmp = p[3 * 64];\
+    op1(sum1, (w1)[3 * 64], tmp);\
+    op2(sum2, (w2)[3 * 64], tmp);\
+    tmp = p[4 * 64];\
+    op1(sum1, (w1)[4 * 64], tmp);\
+    op2(sum2, (w2)[4 * 64], tmp);\
+    tmp = p[5 * 64];\
+    op1(sum1, (w1)[5 * 64], tmp);\
+    op2(sum2, (w2)[5 * 64], tmp);\
+    tmp = p[6 * 64];\
+    op1(sum1, (w1)[6 * 64], tmp);\
+    op2(sum2, (w2)[6 * 64], tmp);\
+    tmp = p[7 * 64];\
+    op1(sum1, (w1)[7 * 64], tmp);\
+    op2(sum2, (w2)[7 * 64], tmp);\
+}
+
+void RENAME(ff_mpadsp_apply_window)(MPA_INT *synth_buf, MPA_INT *window,
+                                  int *dither_state, OUT_INT *samples,
+                                  int incr)
+{
+    register const MPA_INT *w, *w2, *p;
+    int j;
+    OUT_INT *samples2;
+#if CONFIG_FLOAT
+    float sum, sum2;
+#else
+    int64_t sum, sum2;
+#endif
+
+    /* copy to avoid wrap */
+    memcpy(synth_buf + 512, synth_buf, 32 * sizeof(*synth_buf));
+
+    samples2 = samples + 31 * incr;
+    w = window;
+    w2 = window + 31;
+
+    sum = *dither_state;
+    p = synth_buf + 16;
+    SUM8(MACS, sum, w, p);
+    p = synth_buf + 48;
+    SUM8(MLSS, sum, w + 32, p);
+    *samples = round_sample(&sum);
+    samples += incr;
+    w++;
+
+    /* we calculate two samples at the same time to avoid one memory
+       access per two sample */
+    for(j=1;j<16;j++) {
+        sum2 = 0;
+        p = synth_buf + 16 + j;
+        SUM8P2(sum, MACS, sum2, MLSS, w, w2, p);
+        p = synth_buf + 48 - j;
+        SUM8P2(sum, MLSS, sum2, MLSS, w + 32, w2 + 32, p);
+
+        *samples = round_sample(&sum);
+        samples += incr;
+        sum += sum2;
+        *samples2 = round_sample(&sum);
+        samples2 -= incr;
+        w++;
+        w2--;
+    }
+
+    p = synth_buf + 32;
+    SUM8(MLSS, sum, w + 32, p);
+    *samples = round_sample(&sum);
+    *dither_state= sum;
+}
+
+/* 32 sub band synthesis filter. Input: 32 sub band samples, Output:
+   32 samples. */
+void RENAME(ff_mpa_synth_filter)(MPADSPContext *s, MPA_INT *synth_buf_ptr,
+                                 int *synth_buf_offset,
+                                 MPA_INT *window, int *dither_state,
+                                 OUT_INT *samples, int incr,
+                                 MPA_INT *sb_samples)
+{
+    MPA_INT *synth_buf;
+    int offset;
+
+    offset = *synth_buf_offset;
+    synth_buf = synth_buf_ptr + offset;
+
+    s->RENAME(dct32)(synth_buf, sb_samples);
+    s->RENAME(apply_window)(synth_buf, window, dither_state, samples, incr);
+
+    offset = (offset - 32) & 511;
+    *synth_buf_offset = offset;
+}
+
+void av_cold RENAME(ff_mpa_synth_init)(MPA_INT *window)
+{
+    int i, j;
+
+    /* max = 18760, max sum over all 16 coefs : 44736 */
+    for(i=0;i<257;i++) {
+        INTFLOAT v;
+        v = ff_mpa_enwindow[i];
+#if CONFIG_FLOAT
+        v *= 1.0 / (1LL<<(16 + FRAC_BITS));
+#endif
+        window[i] = v;
+        if ((i & 63) != 0)
+            v = -v;
+        if (i != 0)
+            window[512 - i] = v;
+    }
+
+    // Needed for avoiding shuffles in ASM implementations
+    for(i=0; i < 8; i++)
+        for(j=0; j < 16; j++)
+            window[512+16*i+j] = window[64*i+32-j];
+
+    for(i=0; i < 8; i++)
+        for(j=0; j < 16; j++)
+            window[512+128+16*i+j] = window[64*i+48-j];
+}
--- a/libavcodec/ppc/mpegaudiodec_altivec.c
+++ b/libavcodec/ppc/mpegaudiodec_altivec.c
@ -21,9 +21,8 @@

 #include "dsputil_altivec.h"
 #include "util_altivec.h"
-
-#define CONFIG_FLOAT 1
-#include "libavcodec/mpegaudio.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/mpegaudiodsp.h"

 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
@ -124,7 +123,7 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out,
    *out = sum;
 }

-void ff_mpegaudiodec_init_altivec(MPADecodeContext *s)
+void ff_mpadsp_init_altivec(MPADSPContext *s)
 {
-    s->apply_window_mp3 = apply_window_mp3;
+    s->apply_window_float = apply_window_mp3;
 }
--- a/libavcodec/qdm2.c
+++ b/libavcodec/qdm2.c
@ -39,6 +39,7 @@
 #include "get_bits.h"
 #include "dsputil.h"
 #include "rdft.h"
+#include "mpegaudiodsp.h"
 #include "mpegaudio.h"

 #include "qdm2data.h"
@ -170,6 +171,7 @@ typedef struct {
    float output_buffer[1024];

    /// Synthesis filter
+    MPADSPContext mpadsp;
    DECLARE_ALIGNED(16, MPA_INT, synth_buf)[MPA_MAX_CHANNELS][512*2];
    int synth_buf_offset[MPA_MAX_CHANNELS];
    DECLARE_ALIGNED(16, int32_t, sb_samples)[MPA_MAX_CHANNELS][128][SBLIMIT];
@ -1616,7 +1618,8 @@ static void qdm2_synthesis_filter (QDM2Context *q, int index)
        OUT_INT *samples_ptr = samples + ch;

        for (i = 0; i < 8; i++) {
-            ff_mpa_synth_filter_fixed(q->synth_buf[ch], &(q->synth_buf_offset[ch]),
+            ff_mpa_synth_filter_fixed(&q->mpadsp,
+                q->synth_buf[ch], &(q->synth_buf_offset[ch]),
                ff_mpa_synth_window_fixed, &dither_state,
                samples_ptr, q->nb_channels,
                q->sb_samples[ch][(8 * index) + i]);
@ -1863,6 +1866,7 @@ static av_cold int qdm2_decode_init(AVCodecContext *avctx)
    }

    ff_rdft_init(&s->rdft_ctx, s->fft_order, IDFT_C2R);
+    ff_mpadsp_init(&s->mpadsp);

    qdm2_init(s);

--- a/libavcodec/x86/mpegaudiodec_mmx.c
+++ b/libavcodec/x86/mpegaudiodec_mmx.c
@ -21,9 +21,8 @@

 #include "libavutil/cpu.h"
 #include "libavutil/x86_cpu.h"
-
-#define CONFIG_FLOAT 1
-#include "libavcodec/mpegaudio.h"
+#include "libavcodec/dsputil.h"
+#include "libavcodec/mpegaudiodsp.h"

 #define MACS(rt, ra, rb) rt+=(ra)*(rb)
 #define MLSS(rt, ra, rb) rt-=(ra)*(rb)
@ -148,11 +147,11 @@ static void apply_window_mp3(float *in, float *win, int *unused, float *out,
    *out = sum;
 }

-void ff_mpegaudiodec_init_mmx(MPADecodeContext *s)
+void ff_mpadsp_init_mmx(MPADSPContext *s)
 {
    int mm_flags = av_get_cpu_flags();

    if (mm_flags & AV_CPU_FLAG_SSE2) {
-        s->apply_window_mp3 = apply_window_mp3;
+        s->apply_window_float = apply_window_mp3;
    }
 }