lavc/vp9dsp: R-V V ipred dc

C908:
vp9_dc_8x8_8bpp_c: 46.0
vp9_dc_8x8_8bpp_rvv_i64: 41.0
vp9_dc_16x16_8bpp_c: 109.2
vp9_dc_16x16_8bpp_rvv_i32: 72.7
vp9_dc_32x32_8bpp_c: 365.2
vp9_dc_32x32_8bpp_rvv_i32: 165.5
vp9_dc_127_8x8_8bpp_c: 23.0
vp9_dc_127_8x8_8bpp_rvv_i64: 22.0
vp9_dc_127_16x16_8bpp_c: 70.2
vp9_dc_127_16x16_8bpp_rvv_i32: 50.2
vp9_dc_127_32x32_8bpp_c: 295.2
vp9_dc_127_32x32_8bpp_rvv_i32: 136.7
vp9_dc_128_8x8_8bpp_c: 23.0
vp9_dc_128_8x8_8bpp_rvv_i64: 22.0
vp9_dc_128_16x16_8bpp_c: 70.2
vp9_dc_128_16x16_8bpp_rvv_i32: 50.2
vp9_dc_128_32x32_8bpp_c: 295.2
vp9_dc_128_32x32_8bpp_rvv_i32: 136.7
vp9_dc_129_8x8_8bpp_c: 23.0
vp9_dc_129_8x8_8bpp_rvv_i64: 22.0
vp9_dc_129_16x16_8bpp_c: 70.2
vp9_dc_129_16x16_8bpp_rvv_i32: 50.2
vp9_dc_129_32x32_8bpp_c: 295.2
vp9_dc_129_32x32_8bpp_rvv_i32: 136.7
vp9_dc_left_8x8_8bpp_c: 38.0
vp9_dc_left_8x8_8bpp_rvv_i64: 36.0
vp9_dc_left_16x16_8bpp_c: 93.2
vp9_dc_left_16x16_8bpp_rvv_i32: 67.7
vp9_dc_left_32x32_8bpp_c: 333.2
vp9_dc_left_32x32_8bpp_rvv_i32: 158.5
vp9_dc_top_8x8_8bpp_c: 38.7
vp9_dc_top_8x8_8bpp_rvv_i64: 36.0
vp9_dc_top_16x16_8bpp_c: 93.2
vp9_dc_top_16x16_8bpp_rvv_i32: 67.7
vp9_dc_top_32x32_8bpp_c: 333.2
vp9_dc_top_32x32_8bpp_rvv_i32: 156.2

Signed-off-by: Rémi Denis-Courmont <remi@remlab.net>
This commit is contained in:
sunyuechi 2024-02-26 14:42:17 +08:00 committed by Rémi Denis-Courmont
parent dedc2456bf
commit c3a96f97f8
6 changed files with 352 additions and 0 deletions

View File

@ -58,5 +58,7 @@ OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_init.o
RVV-OBJS-$(CONFIG_VC1DSP) += riscv/vc1dsp_rvv.o
OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_init.o
RVV-OBJS-$(CONFIG_VP8DSP) += riscv/vp8dsp_rvv.o
OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9dsp_init.o
RVV-OBJS-$(CONFIG_VP9_DECODER) += riscv/vp9_intra_rvv.o
OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_init.o
RVV-OBJS-$(CONFIG_VORBIS_DECODER) += riscv/vorbisdsp_rvv.o

View File

@ -0,0 +1,115 @@
/*
* Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/riscv/asm.S"
.macro avgdc size
vwredsumu.vs v16, v8, v16
vsetivli zero, 1, e16, m1, ta, ma
vmv.x.s t1, v16
addi t1, t1, 1 << (\size - 1)
srai t1, t1, \size
.endm
.macro getdc type size
.ifc \type,top
vmv.v.x v16, zero
vle8.v v8, (a3)
avgdc \size
.elseif \type == left
vmv.v.x v16, zero
vle8.v v8, (a2)
avgdc \size
.elseif \type == dc
vmv.v.x v16, zero
vle8.v v8, (a2)
vwredsumu.vs v16, v8, v16
vle8.v v8, (a3)
avgdc \size
.else
li t1, \type
.endif
.endm
.macro dc_e32 type size n restore
.ifc \size,32
li t0, 32
vsetvli zero, t0, e8, m2, ta, ma
.else
vsetivli zero, 16, e8, m1, ta, ma
.endif
getdc \type \n
.if \restore == 1 && \size == 32
vsetvli zero, t0, e8, m2, ta, ma
.elseif \restore == 1 && \size == 16
vsetivli zero, 16, e8, m1, ta, ma
.endif
vmv.v.x v0, t1
.rept \size
vse8.v v0, (a0)
add a0, a0, a1
.endr
ret
.endm
.macro dc_e64 type size n restore
vsetivli zero, 8, e8, mf2, ta, ma
getdc \type \n
li t0, 64
vsetvli zero, t0, e8, m4, ta, ma
vmv.v.x v0, t1
vsetivli zero, 8, e8, mf2, ta, ma
vsse64.v v0, (a0), a1
ret
.endm
.macro func_dc name size type n restore ext
func ff_\()\name\()_\()\size\()x\size\()_rvv, \ext
.ifc \size,8
dc_e64 \type \size \n \restore
.else
dc_e32 \type \size \n \restore
.endif
endfunc
.endm
func_dc dc_127 32 127 0 0 zve32x
func_dc dc_127 16 127 0 0 zve32x
func_dc dc_127 8 127 0 0 zve64x
func_dc dc_128 32 128 0 0 zve32x
func_dc dc_128 16 128 0 0 zve32x
func_dc dc_128 8 128 0 0 zve64x
func_dc dc_129 32 129 0 0 zve32x
func_dc dc_129 16 129 0 0 zve32x
func_dc dc_129 8 129 0 0 zve64x
func_dc dc 32 dc 6 1 zve32x
func_dc dc 16 dc 5 1 zve32x
func_dc dc 8 dc 4 0 zve64x
func_dc dc_left 32 left 5 1 zve32x
func_dc dc_left 16 left 4 1 zve32x
func_dc dc_left 8 left 3 0 zve64x
func_dc dc_top 32 top 5 1 zve32x
func_dc dc_top 16 top 4 1 zve32x
func_dc dc_top 8 top 3 0 zve64x

171
libavcodec/riscv/vp9dsp.h Normal file
View File

@ -0,0 +1,171 @@
/*
* Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lesser General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lesser General Public License for more details.
*
* You should have received a copy of the GNU Lesser General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#ifndef AVCODEC_RISCV_VP9DSP_RISCV_H
#define AVCODEC_RISCV_VP9DSP_RISCV_H
#include <stddef.h>
#include <stdint.h>
void ff_dc_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_top_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_top_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_top_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_left_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_left_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_left_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_127_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_127_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_127_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_128_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_128_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_128_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_129_32x32_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_129_16x16_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
void ff_dc_129_8x8_rvv(uint8_t *dst, ptrdiff_t stride, const uint8_t *l,
const uint8_t *a);
#define VP9_8TAP_RISCV_RVV_FUNC(SIZE, type, type_idx) \
void ff_put_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, \
ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_put_8tap_##type##_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, \
ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_put_8tap_##type##_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, \
ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_avg_8tap_##type##_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, \
ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_avg_8tap_##type##_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, \
ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_avg_8tap_##type##_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, \
ptrdiff_t srcstride, \
int h, int mx, int my);
#define VP9_BILINEAR_RISCV_RVV_FUNC(SIZE) \
void ff_put_bilin_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_put_bilin_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_put_bilin_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_avg_bilin_##SIZE##h_rvv(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_avg_bilin_##SIZE##v_rvv(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_avg_bilin_##SIZE##hv_rvv(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, ptrdiff_t srcstride, \
int h, int mx, int my);
#define VP9_COPY_AVG_RISCV_RVV_FUNC(SIZE) \
void ff_copy##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, ptrdiff_t srcstride, \
int h, int mx, int my); \
\
void ff_avg##SIZE##_rvv(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, ptrdiff_t srcstride, \
int h, int mx, int my);
VP9_8TAP_RISCV_RVV_FUNC(64, regular, FILTER_8TAP_REGULAR);
VP9_8TAP_RISCV_RVV_FUNC(32, regular, FILTER_8TAP_REGULAR);
VP9_8TAP_RISCV_RVV_FUNC(16, regular, FILTER_8TAP_REGULAR);
VP9_8TAP_RISCV_RVV_FUNC(8, regular, FILTER_8TAP_REGULAR);
VP9_8TAP_RISCV_RVV_FUNC(4, regular, FILTER_8TAP_REGULAR);
VP9_8TAP_RISCV_RVV_FUNC(64, sharp, FILTER_8TAP_SHARP);
VP9_8TAP_RISCV_RVV_FUNC(32, sharp, FILTER_8TAP_SHARP);
VP9_8TAP_RISCV_RVV_FUNC(16, sharp, FILTER_8TAP_SHARP);
VP9_8TAP_RISCV_RVV_FUNC(8, sharp, FILTER_8TAP_SHARP);
VP9_8TAP_RISCV_RVV_FUNC(4, sharp, FILTER_8TAP_SHARP);
VP9_8TAP_RISCV_RVV_FUNC(64, smooth, FILTER_8TAP_SMOOTH);
VP9_8TAP_RISCV_RVV_FUNC(32, smooth, FILTER_8TAP_SMOOTH);
VP9_8TAP_RISCV_RVV_FUNC(16, smooth, FILTER_8TAP_SMOOTH);
VP9_8TAP_RISCV_RVV_FUNC(8, smooth, FILTER_8TAP_SMOOTH);
VP9_8TAP_RISCV_RVV_FUNC(4, smooth, FILTER_8TAP_SMOOTH);
VP9_BILINEAR_RISCV_RVV_FUNC(64);
VP9_BILINEAR_RISCV_RVV_FUNC(32);
VP9_BILINEAR_RISCV_RVV_FUNC(16);
VP9_BILINEAR_RISCV_RVV_FUNC(8);
VP9_BILINEAR_RISCV_RVV_FUNC(4);
VP9_COPY_AVG_RISCV_RVV_FUNC(64);
VP9_COPY_AVG_RISCV_RVV_FUNC(32);
VP9_COPY_AVG_RISCV_RVV_FUNC(16);
VP9_COPY_AVG_RISCV_RVV_FUNC(8);
VP9_COPY_AVG_RISCV_RVV_FUNC(4);
#define VP9_COPY_RISCV_RVI_FUNC(SIZE) \
void ff_copy##SIZE##_rvi(uint8_t *dst, ptrdiff_t dststride, \
const uint8_t *src, ptrdiff_t srcstride, \
int h, int mx, int my);
VP9_COPY_RISCV_RVI_FUNC(8);
VP9_COPY_RISCV_RVI_FUNC(4);
#undef VP9_8TAP_RISCV_RVV_FUNC
#undef VP9_BILINEAR_RISCV_RVV_FUNC
#undef VP9_COPY_AVG_RISCV_RVV_FUNC
#endif // #ifndef AVCODEC_RISCV_VP9DSP_RISCV_H

View File

@ -0,0 +1,61 @@
/*
* Copyright (c) 2024 Institue of Software Chinese Academy of Sciences (ISCAS).
*
* This file is part of FFmpeg.
*
* FFmpeg is free software; you can redistribute it and/or
* modify it under the terms of the GNU Lervvr General Public
* License as published by the Free Software Foundation; either
* version 2.1 of the License, or (at your option) any later version.
*
* FFmpeg is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
* Lervvr General Public License for more details.
*
* You should have received a copy of the GNU Lervvr General Public
* License along with FFmpeg; if not, write to the Free Software
* Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
*/
#include "libavutil/attributes.h"
#include "libavutil/cpu.h"
#include "libavutil/riscv/cpu.h"
#include "libavcodec/vp9dsp.h"
#include "vp9dsp.h"
static av_cold void vp9dsp_intrapred_init_rvv(VP9DSPContext *dsp, int bpp)
{
#if HAVE_RVV
int flags = av_get_cpu_flags();
if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I64 && ff_get_rv_vlenb() >= 16) {
dsp->intra_pred[TX_8X8][DC_PRED] = ff_dc_8x8_rvv;
dsp->intra_pred[TX_8X8][LEFT_DC_PRED] = ff_dc_left_8x8_rvv;
dsp->intra_pred[TX_8X8][DC_127_PRED] = ff_dc_127_8x8_rvv;
dsp->intra_pred[TX_8X8][DC_128_PRED] = ff_dc_128_8x8_rvv;
dsp->intra_pred[TX_8X8][DC_129_PRED] = ff_dc_129_8x8_rvv;
dsp->intra_pred[TX_8X8][TOP_DC_PRED] = ff_dc_top_8x8_rvv;
}
if (bpp == 8 && flags & AV_CPU_FLAG_RVV_I32 && ff_get_rv_vlenb() >= 16) {
dsp->intra_pred[TX_32X32][DC_PRED] = ff_dc_32x32_rvv;
dsp->intra_pred[TX_16X16][DC_PRED] = ff_dc_16x16_rvv;
dsp->intra_pred[TX_32X32][LEFT_DC_PRED] = ff_dc_left_32x32_rvv;
dsp->intra_pred[TX_16X16][LEFT_DC_PRED] = ff_dc_left_16x16_rvv;
dsp->intra_pred[TX_32X32][DC_127_PRED] = ff_dc_127_32x32_rvv;
dsp->intra_pred[TX_16X16][DC_127_PRED] = ff_dc_127_16x16_rvv;
dsp->intra_pred[TX_32X32][DC_128_PRED] = ff_dc_128_32x32_rvv;
dsp->intra_pred[TX_16X16][DC_128_PRED] = ff_dc_128_16x16_rvv;
dsp->intra_pred[TX_32X32][DC_129_PRED] = ff_dc_129_32x32_rvv;
dsp->intra_pred[TX_16X16][DC_129_PRED] = ff_dc_129_16x16_rvv;
dsp->intra_pred[TX_32X32][TOP_DC_PRED] = ff_dc_top_32x32_rvv;
dsp->intra_pred[TX_16X16][TOP_DC_PRED] = ff_dc_top_16x16_rvv;
}
#endif
}
av_cold void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact)
{
vp9dsp_intrapred_init_rvv(dsp, bpp);
}

View File

@ -100,6 +100,8 @@ av_cold void ff_vp9dsp_init(VP9DSPContext *dsp, int bpp, int bitexact)
ff_vp9dsp_init_aarch64(dsp, bpp);
#elif ARCH_ARM
ff_vp9dsp_init_arm(dsp, bpp);
#elif ARCH_RISCV
ff_vp9dsp_init_riscv(dsp, bpp, bitexact);
#elif ARCH_X86
ff_vp9dsp_init_x86(dsp, bpp, bitexact);
#elif ARCH_MIPS

View File

@ -131,6 +131,7 @@ void ff_vp9dsp_init_12(VP9DSPContext *dsp);
void ff_vp9dsp_init_aarch64(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_arm(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_riscv(VP9DSPContext *dsp, int bpp, int bitexact);
void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp, int bitexact);
void ff_vp9dsp_init_mips(VP9DSPContext *dsp, int bpp);
void ff_vp9dsp_init_loongarch(VP9DSPContext *dsp, int bpp);