From 608319a311a31f7d85333a7b08286c00be38eab6 Mon Sep 17 00:00:00 2001 From: Andreas Rheinhardt Date: Sat, 11 Jun 2022 01:07:57 +0200 Subject: [PATCH] swscale/x86/rgb2rgb: Remove obsolete MMX, 3dnow functions x64 always has MMX, MMXEXT, SSE and SSE2 and this means that some functions for MMX, MMXEXT and 3dnow are always overridden by other functions (unless one e.g. explicitly disables SSE2) for x64. So given that the only systems that benefit from these functions are truely ancient 32bit x86s they are removed. Signed-off-by: Andreas Rheinhardt --- libswscale/x86/rgb2rgb.c | 26 ------- libswscale/x86/rgb2rgb_template.c | 123 ++++-------------------------- 2 files changed, 15 insertions(+), 134 deletions(-) diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c index 0ab139aca4..b325e5dbd5 100644 --- a/libswscale/x86/rgb2rgb.c +++ b/libswscale/x86/rgb2rgb.c @@ -85,20 +85,11 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset); // Note: We have C, MMX, MMXEXT, 3DNOW versions, there is no 3DNOW + MMXEXT one. -#define COMPILE_TEMPLATE_MMXEXT 0 -#define COMPILE_TEMPLATE_AMD3DNOW 0 #define COMPILE_TEMPLATE_SSE2 0 #define COMPILE_TEMPLATE_AVX 0 -//MMX versions -#undef RENAME -#define RENAME(a) a ## _mmx -#include "rgb2rgb_template.c" - // MMXEXT versions #undef RENAME -#undef COMPILE_TEMPLATE_MMXEXT -#define COMPILE_TEMPLATE_MMXEXT 1 #define RENAME(a) a ## _mmxext #include "rgb2rgb_template.c" @@ -116,19 +107,6 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset); #define RENAME(a) a ## _avx #include "rgb2rgb_template.c" -//3DNOW versions -#undef RENAME -#undef COMPILE_TEMPLATE_MMXEXT -#undef COMPILE_TEMPLATE_SSE2 -#undef COMPILE_TEMPLATE_AVX -#undef COMPILE_TEMPLATE_AMD3DNOW -#define COMPILE_TEMPLATE_MMXEXT 0 -#define COMPILE_TEMPLATE_SSE2 0 -#define COMPILE_TEMPLATE_AVX 0 -#define COMPILE_TEMPLATE_AMD3DNOW 1 -#define RENAME(a) a ## _3dnow -#include "rgb2rgb_template.c" - /* RGB15->RGB16 original by Strepto/Astral ported to gcc & bugfixed : A'rpi @@ -165,10 +143,6 @@ av_cold void rgb2rgb_init_x86(void) int cpu_flags = av_get_cpu_flags(); #if HAVE_INLINE_ASM - if (INLINE_MMX(cpu_flags)) - rgb2rgb_init_mmx(); - if (INLINE_AMD3DNOW(cpu_flags)) - rgb2rgb_init_3dnow(); if (INLINE_MMXEXT(cpu_flags)) rgb2rgb_init_mmxext(); if (INLINE_SSE2(cpu_flags)) diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index ae2469e663..4aba25dd51 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -36,35 +36,15 @@ #undef SFENCE #undef PAVGB -#if COMPILE_TEMPLATE_AMD3DNOW -#define PREFETCH "prefetch" -#define PAVGB "pavgusb" -#elif COMPILE_TEMPLATE_MMXEXT #define PREFETCH "prefetchnta" #define PAVGB "pavgb" -#else -#define PREFETCH " # nop" -#endif - -#if COMPILE_TEMPLATE_AMD3DNOW -/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ -#define EMMS "femms" -#else -#define EMMS "emms" -#endif - -#if COMPILE_TEMPLATE_MMXEXT #define MOVNTQ "movntq" #define SFENCE "sfence" -#else -#define MOVNTQ "movq" -#define SFENCE " # nop" -#endif + +#define EMMS "emms" #if !COMPILE_TEMPLATE_SSE2 -#if !COMPILE_TEMPLATE_AMD3DNOW - static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size) { uint8_t *dest = dst; @@ -1353,9 +1333,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t SFENCE" \n\t" :::"memory"); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ -#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride) { int x,y; @@ -1453,9 +1431,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid SFENCE" \n\t" :::"memory"); } -#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ -#if !COMPILE_TEMPLATE_AMD3DNOW /** * Height should be a multiple of 2 and width should be a multiple of 16. * (If this is a problem for anyone then tell me, and I will fix it.) @@ -1559,7 +1535,6 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t SFENCE" \n\t" :::"memory"); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ /** * Height should be a multiple of 2 and width should be a multiple of 2. @@ -1673,7 +1648,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "1: \n\t" PREFETCH" 64(%0, %%"FF_REG_d") \n\t" PREFETCH" 64(%1, %%"FF_REG_d") \n\t" -#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW "movq (%0, %%"FF_REG_d"), %%mm0 \n\t" "movq (%1, %%"FF_REG_d"), %%mm1 \n\t" "movq 6(%0, %%"FF_REG_d"), %%mm2 \n\t" @@ -1688,32 +1662,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ PAVGB" %%mm3, %%mm2 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" -#else - "movd (%0, %%"FF_REG_d"), %%mm0 \n\t" - "movd (%1, %%"FF_REG_d"), %%mm1 \n\t" - "movd 3(%0, %%"FF_REG_d"), %%mm2 \n\t" - "movd 3(%1, %%"FF_REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm0 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm2, %%mm0 \n\t" - "movd 6(%0, %%"FF_REG_d"), %%mm4 \n\t" - "movd 6(%1, %%"FF_REG_d"), %%mm1 \n\t" - "movd 9(%0, %%"FF_REG_d"), %%mm2 \n\t" - "movd 9(%1, %%"FF_REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm4 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm4, %%mm2 \n\t" - "psrlw $2, %%mm0 \n\t" - "psrlw $2, %%mm2 \n\t" -#endif "movq "BGR2V_IDX"(%5), %%mm1 \n\t" "movq "BGR2V_IDX"(%5), %%mm3 \n\t" @@ -1732,7 +1680,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 "psraw $7, %%mm0 \n\t" -#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW "movq 12(%0, %%"FF_REG_d"), %%mm4 \n\t" "movq 12(%1, %%"FF_REG_d"), %%mm1 \n\t" "movq 18(%0, %%"FF_REG_d"), %%mm2 \n\t" @@ -1747,33 +1694,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ PAVGB" %%mm3, %%mm2 \n\t" "punpcklbw %%mm7, %%mm4 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" -#else - "movd 12(%0, %%"FF_REG_d"), %%mm4 \n\t" - "movd 12(%1, %%"FF_REG_d"), %%mm1 \n\t" - "movd 15(%0, %%"FF_REG_d"), %%mm2 \n\t" - "movd 15(%1, %%"FF_REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm4 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm2, %%mm4 \n\t" - "movd 18(%0, %%"FF_REG_d"), %%mm5 \n\t" - "movd 18(%1, %%"FF_REG_d"), %%mm1 \n\t" - "movd 21(%0, %%"FF_REG_d"), %%mm2 \n\t" - "movd 21(%1, %%"FF_REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm5 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm5 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm5, %%mm2 \n\t" - "movq "MANGLE(ff_w1111)", %%mm5 \n\t" - "psrlw $2, %%mm4 \n\t" - "psrlw $2, %%mm2 \n\t" -#endif "movq "BGR2V_IDX"(%5), %%mm1 \n\t" "movq "BGR2V_IDX"(%5), %%mm3 \n\t" @@ -1822,7 +1742,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ #endif /* HAVE_7REGS */ #endif /* !COMPILE_TEMPLATE_SSE2 */ -#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX +#if !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, int width, int height, int src1Stride, int src2Stride, int dstStride) @@ -1833,7 +1753,6 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui int w; if (width >= 16) { -#if COMPILE_TEMPLATE_SSE2 if (!((((intptr_t)src1) | ((intptr_t)src2) | ((intptr_t)dest))&15)) { __asm__( "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" @@ -1854,7 +1773,6 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui : "memory", XMM_CLOBBERS("xmm0", "xmm1", "xmm2",) "%"FF_REG_a ); } else -#endif __asm__( "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" "1: \n\t" @@ -1896,10 +1814,10 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui ::: "memory" ); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */ +#endif /* !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 */ #if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL -#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM +#if COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused, const uint8_t *src1, @@ -1919,18 +1837,14 @@ static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t dst2 += dst2Stride; } __asm__( -#if !COMPILE_TEMPLATE_SSE2 - EMMS" \n\t" -#endif SFENCE" \n\t" ::: "memory" ); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +#endif /* COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM */ #endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */ #if !COMPILE_TEMPLATE_SSE2 -#if !COMPILE_TEMPLATE_AMD3DNOW static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst1, uint8_t *dst2, int width, int height, @@ -2108,7 +2022,6 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2 ::: "memory" ); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count) { @@ -2185,7 +2098,7 @@ static void RENAME(extract_odd)(const uint8_t *src, uint8_t *dst, x86_reg count) } } -#if !COMPILE_TEMPLATE_AMD3DNOW +#if ARCH_X86_32 static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) { dst0+= count; @@ -2231,7 +2144,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds count++; } } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +#endif /* ARCH_X86_32 */ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) { @@ -2286,7 +2199,6 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u } } -#if !COMPILE_TEMPLATE_AMD3DNOW static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) { dst0+= count; @@ -2333,7 +2245,6 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst count++; } } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) { @@ -2415,7 +2326,6 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ); } -#if !COMPILE_TEMPLATE_AMD3DNOW static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride) @@ -2438,7 +2348,6 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ::: "memory" ); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, @@ -2465,7 +2374,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ); } -#if !COMPILE_TEMPLATE_AMD3DNOW +#if ARCH_X86_32 static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride) @@ -2488,13 +2397,12 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ::: "memory" ); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +#endif /* ARCH_X86_32 */ #endif /* !COMPILE_TEMPLATE_SSE2 */ static av_cold void RENAME(rgb2rgb_init)(void) { #if !COMPILE_TEMPLATE_SSE2 -#if !COMPILE_TEMPLATE_AMD3DNOW rgb15to16 = RENAME(rgb15to16); rgb15tobgr24 = RENAME(rgb15tobgr24); rgb15to32 = RENAME(rgb15to32); @@ -2519,13 +2427,12 @@ static av_cold void RENAME(rgb2rgb_init)(void) yuy2toyv12 = RENAME(yuy2toyv12); vu9_to_vu12 = RENAME(vu9_to_vu12); yvu9_to_yuy2 = RENAME(yvu9_to_yuy2); +#if ARCH_X86_32 uyvytoyuv422 = RENAME(uyvytoyuv422); +#endif yuyvtoyuv422 = RENAME(yuyvtoyuv422); -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ -#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW planar2x = RENAME(planar2x); -#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ #if HAVE_7REGS ff_rgb24toyv12 = RENAME(rgb24toyv12); #endif /* HAVE_7REGS */ @@ -2534,11 +2441,11 @@ static av_cold void RENAME(rgb2rgb_init)(void) uyvytoyuv420 = RENAME(uyvytoyuv420); #endif /* !COMPILE_TEMPLATE_SSE2 */ -#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX +#if !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 interleaveBytes = RENAME(interleaveBytes); -#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */ +#endif /* !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 */ #if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL -#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM +#if COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM deinterleaveBytes = RENAME(deinterleaveBytes); #endif #endif