From 9a840ffa176038d44e0197283590f891b2cf9eeb Mon Sep 17 00:00:00 2001 From: Shiyou Yin Date: Wed, 15 Dec 2021 11:51:04 +0800 Subject: [PATCH] avutil: [loongarch] Add support for loongarch SIMD. LSX and LASX is loongarch SIMD extention. They are enabled by default if compiler support it, and can be disabled with '--disable-lsx' '--disable-lasx'. Change-Id: Ie2608ea61dbd9b7fffadbf0ec2348bad6c124476 Reviewed-by: Shiyou Yin Reviewed-by: guxiwei Signed-off-by: Michael Niedermayer --- Makefile | 2 +- configure | 20 +++++++++-- ffbuild/arch.mak | 4 ++- ffbuild/common.mak | 8 +++++ libavutil/cpu.c | 7 ++++ libavutil/cpu.h | 4 +++ libavutil/cpu_internal.h | 2 ++ libavutil/loongarch/Makefile | 1 + libavutil/loongarch/cpu.c | 69 ++++++++++++++++++++++++++++++++++++ libavutil/loongarch/cpu.h | 31 ++++++++++++++++ libavutil/tests/cpu.c | 3 ++ tests/checkasm/checkasm.c | 3 ++ 12 files changed, 150 insertions(+), 4 deletions(-) create mode 100644 libavutil/loongarch/Makefile create mode 100644 libavutil/loongarch/cpu.c create mode 100644 libavutil/loongarch/cpu.h diff --git a/Makefile b/Makefile index 26c9107237..5b20658b52 100644 --- a/Makefile +++ b/Makefile @@ -89,7 +89,7 @@ SUBDIR_VARS := CLEANFILES FFLIBS HOSTPROGS TESTPROGS TOOLS \ ARMV5TE-OBJS ARMV6-OBJS ARMV8-OBJS VFP-OBJS NEON-OBJS \ ALTIVEC-OBJS VSX-OBJS MMX-OBJS X86ASM-OBJS \ MIPSFPU-OBJS MIPSDSPR2-OBJS MIPSDSP-OBJS MSA-OBJS \ - MMI-OBJS OBJS SLIBOBJS HOSTOBJS TESTOBJS + MMI-OBJS LSX-OBJS LASX-OBJS OBJS SLIBOBJS HOSTOBJS TESTOBJS define RESET $(1) := diff --git a/configure b/configure index a7593ec2db..c4afde4c5c 100755 --- a/configure +++ b/configure @@ -452,7 +452,9 @@ Optimization options (experts only): --disable-mipsdspr2 disable MIPS DSP ASE R2 optimizations --disable-msa disable MSA optimizations --disable-mipsfpu disable floating point MIPS optimizations - --disable-mmi disable Loongson SIMD optimizations + --disable-mmi disable Loongson MMI optimizations + --disable-lsx disable Loongson LSX optimizations + --disable-lasx disable Loongson LASX optimizations --disable-fast-unaligned consider unaligned accesses slow Developer options (useful when working on FFmpeg itself): @@ -2081,6 +2083,8 @@ ARCH_EXT_LIST_LOONGSON=" loongson2 loongson3 mmi + lsx + lasx " ARCH_EXT_LIST_X86_SIMD=" @@ -2617,6 +2621,10 @@ power8_deps="vsx" loongson2_deps="mips" loongson3_deps="mips" +mmi_deps_any="loongson2 loongson3" +lsx_deps="loongarch" +lasx_deps="lsx" + mips32r2_deps="mips" mips32r5_deps="mips" mips32r6_deps="mips" @@ -2625,7 +2633,6 @@ mips64r6_deps="mips" mipsfpu_deps="mips" mipsdsp_deps="mips" mipsdspr2_deps="mips" -mmi_deps_any="loongson2 loongson3" msa_deps="mipsfpu" cpunop_deps="i686" @@ -6134,6 +6141,9 @@ EOF ;; esac +elif enabled loongarch; then + enabled lsx && check_inline_asm lsx '"vadd.b $vr0, $vr1, $vr2"' '-mlsx' && append LSXFLAGS '-mlsx' + enabled lasx && check_inline_asm lasx '"xvadd.b $xr0, $xr1, $xr2"' '-mlasx' && append LASXFLAGS '-mlasx' fi check_cc intrinsics_neon arm_neon.h "int16x8_t test = vdupq_n_s16(0)" @@ -7484,6 +7494,10 @@ if enabled ppc; then echo "PPC 4xx optimizations ${ppc4xx-no}" echo "dcbzl available ${dcbzl-no}" fi +if enabled loongarch; then + echo "LSX enabled ${lsx-no}" + echo "LASX enabled ${lasx-no}" +fi echo "debug symbols ${debug-no}" echo "strip symbols ${stripping-no}" echo "optimize for size ${small-no}" @@ -7645,6 +7659,8 @@ ASMSTRIPFLAGS=$ASMSTRIPFLAGS X86ASMFLAGS=$X86ASMFLAGS MSAFLAGS=$MSAFLAGS MMIFLAGS=$MMIFLAGS +LSXFLAGS=$LSXFLAGS +LASXFLAGS=$LASXFLAGS BUILDSUF=$build_suffix PROGSSUF=$progs_suffix FULLNAME=$FULLNAME diff --git a/ffbuild/arch.mak b/ffbuild/arch.mak index e09006efca..997e31e85e 100644 --- a/ffbuild/arch.mak +++ b/ffbuild/arch.mak @@ -8,7 +8,9 @@ OBJS-$(HAVE_MIPSFPU) += $(MIPSFPU-OBJS) $(MIPSFPU-OBJS-yes) OBJS-$(HAVE_MIPSDSP) += $(MIPSDSP-OBJS) $(MIPSDSP-OBJS-yes) OBJS-$(HAVE_MIPSDSPR2) += $(MIPSDSPR2-OBJS) $(MIPSDSPR2-OBJS-yes) OBJS-$(HAVE_MSA) += $(MSA-OBJS) $(MSA-OBJS-yes) -OBJS-$(HAVE_MMI) += $(MMI-OBJS) $(MMI-OBJS-yes) +OBJS-$(HAVE_MMI) += $(MMI-OBJS) $(MMI-OBJS-yes) +OBJS-$(HAVE_LSX) += $(LSX-OBJS) $(LSX-OBJS-yes) +OBJS-$(HAVE_LASX) += $(LASX-OBJS) $(LASX-OBJS-yes) OBJS-$(HAVE_ALTIVEC) += $(ALTIVEC-OBJS) $(ALTIVEC-OBJS-yes) OBJS-$(HAVE_VSX) += $(VSX-OBJS) $(VSX-OBJS-yes) diff --git a/ffbuild/common.mak b/ffbuild/common.mak index 268ae61154..0eb831d434 100644 --- a/ffbuild/common.mak +++ b/ffbuild/common.mak @@ -59,6 +59,8 @@ COMPILE_HOSTC = $(call COMPILE,HOSTCC) COMPILE_NVCC = $(call COMPILE,NVCC) COMPILE_MMI = $(call COMPILE,CC,MMIFLAGS) COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS) +COMPILE_LSX = $(call COMPILE,CC,LSXFLAGS) +COMPILE_LASX = $(call COMPILE,CC,LASXFLAGS) %_mmi.o: %_mmi.c $(COMPILE_MMI) @@ -66,6 +68,12 @@ COMPILE_MSA = $(call COMPILE,CC,MSAFLAGS) %_msa.o: %_msa.c $(COMPILE_MSA) +%_lsx.o: %_lsx.c + $(COMPILE_LSX) + +%_lasx.o: %_lasx.c + $(COMPILE_LASX) + %.o: %.c $(COMPILE_C) diff --git a/libavutil/cpu.c b/libavutil/cpu.c index 4627af4f23..63efb97ffd 100644 --- a/libavutil/cpu.c +++ b/libavutil/cpu.c @@ -62,6 +62,8 @@ static int get_cpu_flags(void) return ff_get_cpu_flags_ppc(); if (ARCH_X86) return ff_get_cpu_flags_x86(); + if (ARCH_LOONGARCH) + return ff_get_cpu_flags_loongarch(); return 0; } @@ -168,6 +170,9 @@ int av_parse_cpu_caps(unsigned *flags, const char *s) #elif ARCH_MIPS { "mmi", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MMI }, .unit = "flags" }, { "msa", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_MSA }, .unit = "flags" }, +#elif ARCH_LOONGARCH + { "lsx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LSX }, .unit = "flags" }, + { "lasx", NULL, 0, AV_OPT_TYPE_CONST, { .i64 = AV_CPU_FLAG_LASX }, .unit = "flags" }, #endif { NULL }, }; @@ -253,6 +258,8 @@ size_t av_cpu_max_align(void) return ff_get_cpu_max_align_ppc(); if (ARCH_X86) return ff_get_cpu_max_align_x86(); + if (ARCH_LOONGARCH) + return ff_get_cpu_max_align_loongarch(); return 8; } diff --git a/libavutil/cpu.h b/libavutil/cpu.h index afea0640b4..ae443eccad 100644 --- a/libavutil/cpu.h +++ b/libavutil/cpu.h @@ -72,6 +72,10 @@ #define AV_CPU_FLAG_MMI (1 << 0) #define AV_CPU_FLAG_MSA (1 << 1) +//Loongarch SIMD extension. +#define AV_CPU_FLAG_LSX (1 << 0) +#define AV_CPU_FLAG_LASX (1 << 1) + /** * Return the flags which specify extensions supported by the CPU. * The returned value is affected by av_force_cpu_flags() if that was used diff --git a/libavutil/cpu_internal.h b/libavutil/cpu_internal.h index 889764320b..e207b2d480 100644 --- a/libavutil/cpu_internal.h +++ b/libavutil/cpu_internal.h @@ -46,11 +46,13 @@ int ff_get_cpu_flags_aarch64(void); int ff_get_cpu_flags_arm(void); int ff_get_cpu_flags_ppc(void); int ff_get_cpu_flags_x86(void); +int ff_get_cpu_flags_loongarch(void); size_t ff_get_cpu_max_align_mips(void); size_t ff_get_cpu_max_align_aarch64(void); size_t ff_get_cpu_max_align_arm(void); size_t ff_get_cpu_max_align_ppc(void); size_t ff_get_cpu_max_align_x86(void); +size_t ff_get_cpu_max_align_loongarch(void); #endif /* AVUTIL_CPU_INTERNAL_H */ diff --git a/libavutil/loongarch/Makefile b/libavutil/loongarch/Makefile new file mode 100644 index 0000000000..2addd9351c --- /dev/null +++ b/libavutil/loongarch/Makefile @@ -0,0 +1 @@ +OBJS += loongarch/cpu.o diff --git a/libavutil/loongarch/cpu.c b/libavutil/loongarch/cpu.c new file mode 100644 index 0000000000..e4b240bc44 --- /dev/null +++ b/libavutil/loongarch/cpu.c @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2020 Loongson Technology Corporation Limited + * Contributed by Shiyou Yin + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#include +#include "cpu.h" + +#define LOONGARCH_CFG2 0x2 +#define LOONGARCH_CFG2_LSX (1 << 6) +#define LOONGARCH_CFG2_LASX (1 << 7) + +static int cpu_flags_cpucfg(void) +{ + int flags = 0; + uint32_t cfg2 = 0; + + __asm__ volatile( + "cpucfg %0, %1 \n\t" + : "+&r"(cfg2) + : "r"(LOONGARCH_CFG2) + ); + + if (cfg2 & LOONGARCH_CFG2_LSX) + flags |= AV_CPU_FLAG_LSX; + + if (cfg2 & LOONGARCH_CFG2_LASX) + flags |= AV_CPU_FLAG_LASX; + + return flags; +} + +int ff_get_cpu_flags_loongarch(void) +{ +#if defined __linux__ + return cpu_flags_cpucfg(); +#else + /* Assume no SIMD ASE supported */ + return 0; +#endif +} + +size_t ff_get_cpu_max_align_loongarch(void) +{ + int flags = av_get_cpu_flags(); + + if (flags & AV_CPU_FLAG_LASX) + return 32; + if (flags & AV_CPU_FLAG_LSX) + return 16; + + return 8; +} diff --git a/libavutil/loongarch/cpu.h b/libavutil/loongarch/cpu.h new file mode 100644 index 0000000000..1a445c69bc --- /dev/null +++ b/libavutil/loongarch/cpu.h @@ -0,0 +1,31 @@ +/* + * Copyright (c) 2020 Loongson Technology Corporation Limited + * Contributed by Shiyou Yin + * + * This file is part of FFmpeg. + * + * FFmpeg is free software; you can redistribute it and/or + * modify it under the terms of the GNU Lesser General Public + * License as published by the Free Software Foundation; either + * version 2.1 of the License, or (at your option) any later version. + * + * FFmpeg is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU + * Lesser General Public License for more details. + * + * You should have received a copy of the GNU Lesser General Public + * License along with FFmpeg; if not, write to the Free Software + * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA + */ + +#ifndef AVUTIL_LOONGARCH_CPU_H +#define AVUTIL_LOONGARCH_CPU_H + +#include "libavutil/cpu.h" +#include "libavutil/cpu_internal.h" + +#define have_lsx(flags) CPUEXT(flags, LSX) +#define have_lasx(flags) CPUEXT(flags, LASX) + +#endif /* AVUTIL_LOONGARCH_CPU_H */ diff --git a/libavutil/tests/cpu.c b/libavutil/tests/cpu.c index c853371fb3..0a6c0cd32e 100644 --- a/libavutil/tests/cpu.c +++ b/libavutil/tests/cpu.c @@ -77,6 +77,9 @@ static const struct { { AV_CPU_FLAG_BMI2, "bmi2" }, { AV_CPU_FLAG_AESNI, "aesni" }, { AV_CPU_FLAG_AVX512, "avx512" }, +#elif ARCH_LOONGARCH + { AV_CPU_FLAG_LSX, "lsx" }, + { AV_CPU_FLAG_LASX, "lasx" }, #endif { 0 } }; diff --git a/tests/checkasm/checkasm.c b/tests/checkasm/checkasm.c index b1353f7cbe..90d080de02 100644 --- a/tests/checkasm/checkasm.c +++ b/tests/checkasm/checkasm.c @@ -236,6 +236,9 @@ static const struct { { "FMA4", "fma4", AV_CPU_FLAG_FMA4 }, { "AVX2", "avx2", AV_CPU_FLAG_AVX2 }, { "AVX-512", "avx512", AV_CPU_FLAG_AVX512 }, +#elif ARCH_LOONGARCH + { "LSX", "lsx", AV_CPU_FLAG_LSX }, + { "LASX", "lasx", AV_CPU_FLAG_LASX }, #endif { NULL } };