lavu/fixed_dsp: R-V V vector_fmul_add

vector_fmul_add_fixed_c: 2.2
vector_fmul_add_fixed_rvv_i64: 0.5
This commit is contained in:
Rémi Denis-Courmont 2023-10-03 22:06:07 +03:00
parent 9091ffb006
commit bf911cc1bf
2 changed files with 26 additions and 0 deletions

View File

@ -25,6 +25,8 @@
#include "libavutil/cpu.h"
#include "libavutil/fixed_dsp.h"
void ff_vector_fmul_add_fixed_rvv(int *dst, const int *src0, const int *src1,
const int *src2, int len);
int ff_scalarproduct_fixed_rvv(const int *v1, const int *v2, int len);
void ff_butterflies_fixed_rvv(int *v1, int *v2, int len);
@ -34,8 +36,11 @@ av_cold void ff_fixed_dsp_init_riscv(AVFixedDSPContext *fdsp)
int flags = av_get_cpu_flags();
if ((flags & AV_CPU_FLAG_RVV_I32) && (flags & AV_CPU_FLAG_RVB_ADDR)) {
fdsp->vector_fmul_add = ff_vector_fmul_add_fixed_rvv;
if (flags & AV_CPU_FLAG_RVV_I64)
fdsp->scalarproduct_fixed = ff_scalarproduct_fixed_rvv;
fdsp->butterflies_fixed = ff_butterflies_fixed_rvv;
}
#endif

View File

@ -20,6 +20,27 @@
#include "asm.S"
func ff_vector_fmul_add_fixed_rvv, zve32x
csrwi vxrm, 0
1:
vsetvli t0, a4, e32, m8, ta, ma
vle32.v v16, (a1)
sub a4, a4, t0
vle32.v v24, (a2)
sh2add a1, t0, a1
vsmul.vv v8, v16, v24
sh2add a2, t0, a2
vle32.v v0,(a3)
sh2add a3, t0, a3
vadd.vv v8, v8, v0
vse32.v v8, (a0)
sh2add a0, t0, a0
bnez a4, 1b
ret
endfunc
func ff_scalarproduct_fixed_rvv, zve64x
li t1, 1 << 30
vsetvli t0, zero, e64, m8, ta, ma