avcodec/mips: version 1 of vc1dsp optimizations for loongson mmi

Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
Zhou Xiaoyong 2016-10-10 16:11:37 +08:00 committed by Michael Niedermayer
parent 334fad3645
commit 5b74ebe937
8 changed files with 2396 additions and 0 deletions

View File

@ -36,6 +36,7 @@ OBJS-$(CONFIG_MPEGVIDEO) += mips/mpegvideo_init_mips.o
OBJS-$(CONFIG_MPEGVIDEOENC) += mips/mpegvideoencdsp_init_mips.o
OBJS-$(CONFIG_ME_CMP) += mips/me_cmp_init_mips.o
OBJS-$(CONFIG_MPEG4_DECODER) += mips/xvididct_init_mips.o
OBJS-$(CONFIG_VC1_DECODER) += mips/vc1dsp_init_mips.o
OBJS-$(CONFIG_WMV2DSP) += mips/wmv2dsp_init_mips.o
MSA-OBJS-$(CONFIG_HEVC_DECODER) += mips/hevcdsp_msa.o \
mips/hevc_mc_uni_msa.o \
@ -80,4 +81,5 @@ MMI-OBJS-$(CONFIG_PIXBLOCKDSP) += mips/pixblockdsp_mmi.o
MMI-OBJS-$(CONFIG_H264QPEL) += mips/h264qpel_mmi.o
MMI-OBJS-$(CONFIG_VP8_DECODER) += mips/vp8dsp_mmi.o
MMI-OBJS-$(CONFIG_HPELDSP) += mips/hpeldsp_mmi.o
MMI-OBJS-$(CONFIG_VC1_DECODER) += mips/vc1dsp_mmi.o
MMI-OBJS-$(CONFIG_WMV2DSP) += mips/wmv2dsp_mmi.o

View File

@ -32,9 +32,13 @@ DECLARE_ALIGNED(8, const uint64_t, ff_pw_6) = {0x0006000600060006ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_8) = {0x0008000800080008ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_9) = {0x0009000900090009ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_10) = {0x000A000A000A000AULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_12) = {0x000C000C000C000CULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_15) = {0x000F000F000F000FULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_16) = {0x0010001000100010ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_17) = {0x0011001100110011ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_18) = {0x0012001200120012ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_20) = {0x0014001400140014ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_22) = {0x0016001600160016ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_28) = {0x001C001C001C001CULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_32) = {0x0020002000200020ULL};
DECLARE_ALIGNED(8, const uint64_t, ff_pw_53) = {0x0035003500350035ULL};

View File

@ -33,9 +33,13 @@ extern const uint64_t ff_pw_6;
extern const uint64_t ff_pw_8;
extern const uint64_t ff_pw_9;
extern const uint64_t ff_pw_10;
extern const uint64_t ff_pw_12;
extern const uint64_t ff_pw_15;
extern const uint64_t ff_pw_16;
extern const uint64_t ff_pw_17;
extern const uint64_t ff_pw_18;
extern const uint64_t ff_pw_20;
extern const uint64_t ff_pw_22;
extern const uint64_t ff_pw_28;
extern const uint64_t ff_pw_32;
extern const uint64_t ff_pw_53;

View File

@ -0,0 +1,108 @@
/*
* Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavcodec/vc1dsp.h"
#include "vc1dsp_mips.h"
#include "config.h"
#if HAVE_MMI
static av_cold void vc1dsp_init_mmi(VC1DSPContext *dsp)
{
#if _MIPS_SIM != _ABIO32
dsp->vc1_inv_trans_8x8 = ff_vc1_inv_trans_8x8_mmi;
dsp->vc1_inv_trans_4x8 = ff_vc1_inv_trans_4x8_mmi;
dsp->vc1_inv_trans_8x4 = ff_vc1_inv_trans_8x4_mmi;
#endif
dsp->vc1_inv_trans_4x4 = ff_vc1_inv_trans_4x4_mmi;
dsp->vc1_inv_trans_8x8_dc = ff_vc1_inv_trans_8x8_dc_mmi;
dsp->vc1_inv_trans_4x8_dc = ff_vc1_inv_trans_4x8_dc_mmi;
dsp->vc1_inv_trans_8x4_dc = ff_vc1_inv_trans_8x4_dc_mmi;
dsp->vc1_inv_trans_4x4_dc = ff_vc1_inv_trans_4x4_dc_mmi;
dsp->vc1_h_overlap = ff_vc1_h_overlap_mmi;
dsp->vc1_v_overlap = ff_vc1_v_overlap_mmi;
dsp->vc1_h_s_overlap = ff_vc1_h_s_overlap_mmi;
dsp->vc1_v_s_overlap = ff_vc1_v_s_overlap_mmi;
dsp->vc1_v_loop_filter4 = ff_vc1_v_loop_filter4_mmi;
dsp->vc1_h_loop_filter4 = ff_vc1_h_loop_filter4_mmi;
dsp->vc1_v_loop_filter8 = ff_vc1_v_loop_filter8_mmi;
dsp->vc1_h_loop_filter8 = ff_vc1_h_loop_filter8_mmi;
dsp->vc1_v_loop_filter16 = ff_vc1_v_loop_filter16_mmi;
dsp->vc1_h_loop_filter16 = ff_vc1_h_loop_filter16_mmi;
#define FN_ASSIGN(OP, X, Y, INSN) \
dsp->OP##vc1_mspel_pixels_tab[1][X+4*Y] = ff_##OP##vc1_mspel_mc##X##Y##INSN; \
dsp->OP##vc1_mspel_pixels_tab[0][X+4*Y] = ff_##OP##vc1_mspel_mc##X##Y##_16##INSN
FN_ASSIGN(put_, 0, 0, _mmi);
FN_ASSIGN(put_, 0, 1, _mmi);
FN_ASSIGN(put_, 0, 2, _mmi);
FN_ASSIGN(put_, 0, 3, _mmi);
FN_ASSIGN(put_, 1, 0, _mmi);
//FN_ASSIGN(put_, 1, 1, _mmi);//FIXME
//FN_ASSIGN(put_, 1, 2, _mmi);//FIXME
//FN_ASSIGN(put_, 1, 3, _mmi);//FIXME
FN_ASSIGN(put_, 2, 0, _mmi);
//FN_ASSIGN(put_, 2, 1, _mmi);//FIXME
//FN_ASSIGN(put_, 2, 2, _mmi);//FIXME
//FN_ASSIGN(put_, 2, 3, _mmi);//FIXME
FN_ASSIGN(put_, 3, 0, _mmi);
//FN_ASSIGN(put_, 3, 1, _mmi);//FIXME
//FN_ASSIGN(put_, 3, 2, _mmi);//FIXME
//FN_ASSIGN(put_, 3, 3, _mmi);//FIXME
FN_ASSIGN(avg_, 0, 0, _mmi);
FN_ASSIGN(avg_, 0, 1, _mmi);
FN_ASSIGN(avg_, 0, 2, _mmi);
FN_ASSIGN(avg_, 0, 3, _mmi);
FN_ASSIGN(avg_, 1, 0, _mmi);
//FN_ASSIGN(avg_, 1, 1, _mmi);//FIXME
//FN_ASSIGN(avg_, 1, 2, _mmi);//FIXME
//FN_ASSIGN(avg_, 1, 3, _mmi);//FIXME
FN_ASSIGN(avg_, 2, 0, _mmi);
//FN_ASSIGN(avg_, 2, 1, _mmi);//FIXME
//FN_ASSIGN(avg_, 2, 2, _mmi);//FIXME
//FN_ASSIGN(avg_, 2, 3, _mmi);//FIXME
FN_ASSIGN(avg_, 3, 0, _mmi);
//FN_ASSIGN(avg_, 3, 1, _mmi);//FIXME
//FN_ASSIGN(avg_, 3, 2, _mmi);//FIXME
//FN_ASSIGN(avg_, 3, 3, _mmi);//FIXME
dsp->put_no_rnd_vc1_chroma_pixels_tab[0] = ff_put_no_rnd_vc1_chroma_mc8_mmi;
dsp->avg_no_rnd_vc1_chroma_pixels_tab[0] = ff_avg_no_rnd_vc1_chroma_mc8_mmi;
dsp->put_no_rnd_vc1_chroma_pixels_tab[1] = ff_put_no_rnd_vc1_chroma_mc4_mmi;
dsp->avg_no_rnd_vc1_chroma_pixels_tab[1] = ff_avg_no_rnd_vc1_chroma_mc4_mmi;
}
#endif /* HAVE_MMI */
av_cold void ff_vc1dsp_init_mips(VC1DSPContext *dsp)
{
#if HAVE_MMI
vc1dsp_init_mmi(dsp);
#endif /* HAVE_MMI */
}

View File

@ -0,0 +1,194 @@
/*
* Copyright (c) 2016 Zhou Xiaoyong <zhouxiaoyong@loongson.cn>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_MIPS_VC1DSP_MIPS_H
#define AVCODEC_MIPS_VC1DSP_MIPS_H
#include "libavcodec/vc1dsp.h"
void ff_put_vc1_mspel_mc00_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc01_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc02_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc03_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc10_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc11_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc12_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc13_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc20_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc21_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc22_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc23_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc30_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc31_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc32_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc33_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc00_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc01_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc02_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc03_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc10_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc11_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc12_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc13_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc20_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc21_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc22_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc23_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc30_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc31_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc32_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc33_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc00_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc01_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc02_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc03_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc10_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc11_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc12_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc13_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc20_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc21_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc22_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc23_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc30_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc31_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc32_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_put_vc1_mspel_mc33_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc00_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc01_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc02_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc03_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc10_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc11_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc12_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc13_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc20_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc21_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc22_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc23_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc30_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc31_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc32_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_avg_vc1_mspel_mc33_16_mmi(uint8_t *dst, const uint8_t *src,
ptrdiff_t stride, int rnd);
void ff_vc1_inv_trans_8x8_mmi(int16_t block[64]);
void ff_vc1_inv_trans_8x4_mmi(uint8_t *dest, int linesize, int16_t *block);
void ff_vc1_inv_trans_4x8_mmi(uint8_t *dest, int linesize, int16_t *block);
void ff_vc1_inv_trans_4x4_mmi(uint8_t *dest, int linesize, int16_t *block);
void ff_vc1_inv_trans_4x4_dc_mmi(uint8_t *dest, int linesize, int16_t *block);
void ff_vc1_inv_trans_4x8_dc_mmi(uint8_t *dest, int linesize, int16_t *block);
void ff_vc1_inv_trans_8x4_dc_mmi(uint8_t *dest, int linesize, int16_t *block);
void ff_vc1_inv_trans_8x8_dc_mmi(uint8_t *dest, int linesize, int16_t *block);
void ff_vc1_v_overlap_mmi(uint8_t *src, int stride);
void ff_vc1_h_overlap_mmi(uint8_t *src, int stride);
void ff_vc1_v_s_overlap_mmi(int16_t *top, int16_t *bottom);
void ff_vc1_h_s_overlap_mmi(int16_t *left, int16_t *right);
void ff_vc1_v_loop_filter4_mmi(uint8_t *src, int stride, int pq);
void ff_vc1_h_loop_filter4_mmi(uint8_t *src, int stride, int pq);
void ff_vc1_v_loop_filter8_mmi(uint8_t *src, int stride, int pq);
void ff_vc1_h_loop_filter8_mmi(uint8_t *src, int stride, int pq);
void ff_vc1_v_loop_filter16_mmi(uint8_t *src, int stride, int pq);
void ff_vc1_h_loop_filter16_mmi(uint8_t *src, int stride, int pq);
void ff_put_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */,
int stride, int h, int x, int y);
void ff_put_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */,
int stride, int h, int x, int y);
void ff_avg_no_rnd_vc1_chroma_mc8_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */,
int stride, int h, int x, int y);
void ff_avg_no_rnd_vc1_chroma_mc4_mmi(uint8_t *dst /* align 8 */,
uint8_t *src /* align 1 */,
int stride, int h, int x, int y);
#endif /* AVCODEC_MIPS_VC1DSP_MIPS_H */

2081
libavcodec/mips/vc1dsp_mmi.c Normal file

File diff suppressed because it is too large Load Diff

View File

@ -1034,4 +1034,6 @@ av_cold void ff_vc1dsp_init(VC1DSPContext *dsp)
ff_vc1dsp_init_ppc(dsp);
if (ARCH_X86)
ff_vc1dsp_init_x86(dsp);
if (ARCH_MIPS)
ff_vc1dsp_init_mips(dsp);
}

View File

@ -87,5 +87,6 @@ void ff_vc1dsp_init_aarch64(VC1DSPContext* dsp);
void ff_vc1dsp_init_arm(VC1DSPContext* dsp);
void ff_vc1dsp_init_ppc(VC1DSPContext *c);
void ff_vc1dsp_init_x86(VC1DSPContext* dsp);
void ff_vc1dsp_init_mips(VC1DSPContext* dsp);
#endif /* AVCODEC_VC1DSP_H */