avcodec: [loongarch] Optimize hpeldsp with LASX.

./ffmpeg -i 8_mpeg4_1080p_24fps_12Mbps.avi -f rawvideo -y /dev/null -an
before:376fps
after :433fps

Reviewed-by: 殷时友 <yinshiyou-hf@loongson.cn>
Signed-off-by: Michael Niedermayer <michael@niedermayer.cc>
This commit is contained in:
Shiyou Yin 2021-12-29 18:18:20 +08:00 committed by Michael Niedermayer
parent 10e4b2b1d2
commit 5d58355bf1
6 changed files with 1400 additions and 0 deletions

View File

@ -367,4 +367,6 @@ av_cold void ff_hpeldsp_init(HpelDSPContext *c, int flags)
ff_hpeldsp_init_x86(c, flags);
if (ARCH_MIPS)
ff_hpeldsp_init_mips(c, flags);
if (ARCH_LOONGARCH64)
ff_hpeldsp_init_loongarch(c, flags);
}

View File

@ -102,5 +102,6 @@ void ff_hpeldsp_init_arm(HpelDSPContext *c, int flags);
void ff_hpeldsp_init_ppc(HpelDSPContext *c, int flags);
void ff_hpeldsp_init_x86(HpelDSPContext *c, int flags);
void ff_hpeldsp_init_mips(HpelDSPContext *c, int flags);
void ff_hpeldsp_init_loongarch(HpelDSPContext *c, int flags);
#endif /* AVCODEC_HPELDSP_H */

View File

@ -5,6 +5,7 @@ OBJS-$(CONFIG_H264PRED) += loongarch/h264_intrapred_init_loongarch
OBJS-$(CONFIG_VP8_DECODER) += loongarch/vp8dsp_init_loongarch.o
OBJS-$(CONFIG_VP9_DECODER) += loongarch/vp9dsp_init_loongarch.o
OBJS-$(CONFIG_VC1DSP) += loongarch/vc1dsp_init_loongarch.o
OBJS-$(CONFIG_HPELDSP) += loongarch/hpeldsp_init_loongarch.o
LASX-OBJS-$(CONFIG_H264CHROMA) += loongarch/h264chroma_lasx.o
LASX-OBJS-$(CONFIG_H264QPEL) += loongarch/h264qpel_lasx.o
LASX-OBJS-$(CONFIG_H264DSP) += loongarch/h264dsp_lasx.o \
@ -12,6 +13,7 @@ LASX-OBJS-$(CONFIG_H264DSP) += loongarch/h264dsp_lasx.o \
loongarch/h264_deblock_lasx.o
LASX-OBJS-$(CONFIG_H264PRED) += loongarch/h264_intrapred_lasx.o
LASX-OBJS-$(CONFIG_VC1_DECODER) += loongarch/vc1dsp_lasx.o
LASX-OBJS-$(CONFIG_HPELDSP) += loongarch/hpeldsp_lasx.o
LSX-OBJS-$(CONFIG_VP8_DECODER) += loongarch/vp8_mc_lsx.o \
loongarch/vp8_lpf_lsx.o
LSX-OBJS-$(CONFIG_VP9_DECODER) += loongarch/vp9_mc_lsx.o \

View File

@ -0,0 +1,50 @@
/*
* Copyright (c) 2021 Loongson Technology Corporation Limited
* Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/loongarch/cpu.h"
#include "libavcodec/hpeldsp.h"
#include "libavcodec/loongarch/hpeldsp_lasx.h"
void ff_hpeldsp_init_loongarch(HpelDSPContext *c, int flags)
{
int cpu_flags = av_get_cpu_flags();
if (have_lasx(cpu_flags)) {
c->put_pixels_tab[0][0] = ff_put_pixels16_8_lsx;
c->put_pixels_tab[0][1] = ff_put_pixels16_x2_8_lasx;
c->put_pixels_tab[0][2] = ff_put_pixels16_y2_8_lasx;
c->put_pixels_tab[0][3] = ff_put_pixels16_xy2_8_lasx;
c->put_pixels_tab[1][0] = ff_put_pixels8_8_lasx;
c->put_pixels_tab[1][1] = ff_put_pixels8_x2_8_lasx;
c->put_pixels_tab[1][2] = ff_put_pixels8_y2_8_lasx;
c->put_pixels_tab[1][3] = ff_put_pixels8_xy2_8_lasx;
c->put_no_rnd_pixels_tab[0][0] = ff_put_pixels16_8_lsx;
c->put_no_rnd_pixels_tab[0][1] = ff_put_no_rnd_pixels16_x2_8_lasx;
c->put_no_rnd_pixels_tab[0][2] = ff_put_no_rnd_pixels16_y2_8_lasx;
c->put_no_rnd_pixels_tab[0][3] = ff_put_no_rnd_pixels16_xy2_8_lasx;
c->put_no_rnd_pixels_tab[1][0] = ff_put_pixels8_8_lasx;
c->put_no_rnd_pixels_tab[1][1] = ff_put_no_rnd_pixels8_x2_8_lasx;
c->put_no_rnd_pixels_tab[1][2] = ff_put_no_rnd_pixels8_y2_8_lasx;
c->put_no_rnd_pixels_tab[1][3] = ff_put_no_rnd_pixels8_xy2_8_lasx;
}
}

File diff suppressed because it is too large Load Diff

View File

@ -0,0 +1,58 @@
/*
* Copyright (c) 2021 Loongson Technology Corporation Limited
* Contributed by Shiyou Yin <yinshiyou-hf@loongson.cn>
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_LOONGARCH_HPELDSP_LASX_H
#define AVCODEC_LOONGARCH_HPELDSP_LASX_H
#include <stdint.h>
#include <stddef.h>
#include "libavutil/attributes.h"
void ff_put_pixels8_8_lasx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels8_x2_8_lasx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int32_t h);
void ff_put_pixels8_y2_8_lasx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int32_t h);
void ff_put_pixels16_8_lsx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels16_x2_8_lasx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int32_t h);
void ff_put_pixels16_y2_8_lasx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int32_t h);
void ff_put_no_rnd_pixels16_x2_8_lasx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels16_y2_8_lasx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels16_xy2_8_lasx(uint8_t *block,
const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_x2_8_lasx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_y2_8_lasx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_no_rnd_pixels8_xy2_8_lasx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels8_xy2_8_lasx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
void ff_put_pixels16_xy2_8_lasx(uint8_t *block, const uint8_t *pixels,
ptrdiff_t line_size, int h);
#endif