diff --git a/libswscale/x86/rgb2rgb.c b/libswscale/x86/rgb2rgb.c index 0ab139aca4..b325e5dbd5 100644 --- a/libswscale/x86/rgb2rgb.c +++ b/libswscale/x86/rgb2rgb.c @@ -85,20 +85,11 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset); // Note: We have C, MMX, MMXEXT, 3DNOW versions, there is no 3DNOW + MMXEXT one. -#define COMPILE_TEMPLATE_MMXEXT 0 -#define COMPILE_TEMPLATE_AMD3DNOW 0 #define COMPILE_TEMPLATE_SSE2 0 #define COMPILE_TEMPLATE_AVX 0 -//MMX versions -#undef RENAME -#define RENAME(a) a ## _mmx -#include "rgb2rgb_template.c" - // MMXEXT versions #undef RENAME -#undef COMPILE_TEMPLATE_MMXEXT -#define COMPILE_TEMPLATE_MMXEXT 1 #define RENAME(a) a ## _mmxext #include "rgb2rgb_template.c" @@ -116,19 +107,6 @@ DECLARE_ALIGNED(8, extern const uint64_t, ff_bgr2UVOffset); #define RENAME(a) a ## _avx #include "rgb2rgb_template.c" -//3DNOW versions -#undef RENAME -#undef COMPILE_TEMPLATE_MMXEXT -#undef COMPILE_TEMPLATE_SSE2 -#undef COMPILE_TEMPLATE_AVX -#undef COMPILE_TEMPLATE_AMD3DNOW -#define COMPILE_TEMPLATE_MMXEXT 0 -#define COMPILE_TEMPLATE_SSE2 0 -#define COMPILE_TEMPLATE_AVX 0 -#define COMPILE_TEMPLATE_AMD3DNOW 1 -#define RENAME(a) a ## _3dnow -#include "rgb2rgb_template.c" - /* RGB15->RGB16 original by Strepto/Astral ported to gcc & bugfixed : A'rpi @@ -165,10 +143,6 @@ av_cold void rgb2rgb_init_x86(void) int cpu_flags = av_get_cpu_flags(); #if HAVE_INLINE_ASM - if (INLINE_MMX(cpu_flags)) - rgb2rgb_init_mmx(); - if (INLINE_AMD3DNOW(cpu_flags)) - rgb2rgb_init_3dnow(); if (INLINE_MMXEXT(cpu_flags)) rgb2rgb_init_mmxext(); if (INLINE_SSE2(cpu_flags)) diff --git a/libswscale/x86/rgb2rgb_template.c b/libswscale/x86/rgb2rgb_template.c index ae2469e663..4aba25dd51 100644 --- a/libswscale/x86/rgb2rgb_template.c +++ b/libswscale/x86/rgb2rgb_template.c @@ -36,35 +36,15 @@ #undef SFENCE #undef PAVGB -#if COMPILE_TEMPLATE_AMD3DNOW -#define PREFETCH "prefetch" -#define PAVGB "pavgusb" -#elif COMPILE_TEMPLATE_MMXEXT #define PREFETCH "prefetchnta" #define PAVGB "pavgb" -#else -#define PREFETCH " # nop" -#endif - -#if COMPILE_TEMPLATE_AMD3DNOW -/* On K6 femms is faster than emms. On K7 femms is directly mapped to emms. */ -#define EMMS "femms" -#else -#define EMMS "emms" -#endif - -#if COMPILE_TEMPLATE_MMXEXT #define MOVNTQ "movntq" #define SFENCE "sfence" -#else -#define MOVNTQ "movq" -#define SFENCE " # nop" -#endif + +#define EMMS "emms" #if !COMPILE_TEMPLATE_SSE2 -#if !COMPILE_TEMPLATE_AMD3DNOW - static inline void RENAME(rgb24tobgr32)(const uint8_t *src, uint8_t *dst, int src_size) { uint8_t *dest = dst; @@ -1353,9 +1333,7 @@ static inline void RENAME(yuy2toyv12)(const uint8_t *src, uint8_t *ydst, uint8_t SFENCE" \n\t" :::"memory"); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ -#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWidth, int srcHeight, int srcStride, int dstStride) { int x,y; @@ -1453,9 +1431,7 @@ static inline void RENAME(planar2x)(const uint8_t *src, uint8_t *dst, int srcWid SFENCE" \n\t" :::"memory"); } -#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ -#if !COMPILE_TEMPLATE_AMD3DNOW /** * Height should be a multiple of 2 and width should be a multiple of 16. * (If this is a problem for anyone then tell me, and I will fix it.) @@ -1559,7 +1535,6 @@ static inline void RENAME(uyvytoyv12)(const uint8_t *src, uint8_t *ydst, uint8_t SFENCE" \n\t" :::"memory"); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ /** * Height should be a multiple of 2 and width should be a multiple of 2. @@ -1673,7 +1648,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "1: \n\t" PREFETCH" 64(%0, %%"FF_REG_d") \n\t" PREFETCH" 64(%1, %%"FF_REG_d") \n\t" -#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW "movq (%0, %%"FF_REG_d"), %%mm0 \n\t" "movq (%1, %%"FF_REG_d"), %%mm1 \n\t" "movq 6(%0, %%"FF_REG_d"), %%mm2 \n\t" @@ -1688,32 +1662,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ PAVGB" %%mm3, %%mm2 \n\t" "punpcklbw %%mm7, %%mm0 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" -#else - "movd (%0, %%"FF_REG_d"), %%mm0 \n\t" - "movd (%1, %%"FF_REG_d"), %%mm1 \n\t" - "movd 3(%0, %%"FF_REG_d"), %%mm2 \n\t" - "movd 3(%1, %%"FF_REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm0 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm2, %%mm0 \n\t" - "movd 6(%0, %%"FF_REG_d"), %%mm4 \n\t" - "movd 6(%1, %%"FF_REG_d"), %%mm1 \n\t" - "movd 9(%0, %%"FF_REG_d"), %%mm2 \n\t" - "movd 9(%1, %%"FF_REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm4 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm4, %%mm2 \n\t" - "psrlw $2, %%mm0 \n\t" - "psrlw $2, %%mm2 \n\t" -#endif "movq "BGR2V_IDX"(%5), %%mm1 \n\t" "movq "BGR2V_IDX"(%5), %%mm3 \n\t" @@ -1732,7 +1680,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ "packssdw %%mm1, %%mm0 \n\t" // V1 V0 U1 U0 "psraw $7, %%mm0 \n\t" -#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW "movq 12(%0, %%"FF_REG_d"), %%mm4 \n\t" "movq 12(%1, %%"FF_REG_d"), %%mm1 \n\t" "movq 18(%0, %%"FF_REG_d"), %%mm2 \n\t" @@ -1747,33 +1694,6 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ PAVGB" %%mm3, %%mm2 \n\t" "punpcklbw %%mm7, %%mm4 \n\t" "punpcklbw %%mm7, %%mm2 \n\t" -#else - "movd 12(%0, %%"FF_REG_d"), %%mm4 \n\t" - "movd 12(%1, %%"FF_REG_d"), %%mm1 \n\t" - "movd 15(%0, %%"FF_REG_d"), %%mm2 \n\t" - "movd 15(%1, %%"FF_REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm4 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm2, %%mm4 \n\t" - "movd 18(%0, %%"FF_REG_d"), %%mm5 \n\t" - "movd 18(%1, %%"FF_REG_d"), %%mm1 \n\t" - "movd 21(%0, %%"FF_REG_d"), %%mm2 \n\t" - "movd 21(%1, %%"FF_REG_d"), %%mm3 \n\t" - "punpcklbw %%mm7, %%mm5 \n\t" - "punpcklbw %%mm7, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" - "punpcklbw %%mm7, %%mm3 \n\t" - "paddw %%mm1, %%mm5 \n\t" - "paddw %%mm3, %%mm2 \n\t" - "paddw %%mm5, %%mm2 \n\t" - "movq "MANGLE(ff_w1111)", %%mm5 \n\t" - "psrlw $2, %%mm4 \n\t" - "psrlw $2, %%mm2 \n\t" -#endif "movq "BGR2V_IDX"(%5), %%mm1 \n\t" "movq "BGR2V_IDX"(%5), %%mm3 \n\t" @@ -1822,7 +1742,7 @@ static inline void RENAME(rgb24toyv12)(const uint8_t *src, uint8_t *ydst, uint8_ #endif /* HAVE_7REGS */ #endif /* !COMPILE_TEMPLATE_SSE2 */ -#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX +#if !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, uint8_t *dest, int width, int height, int src1Stride, int src2Stride, int dstStride) @@ -1833,7 +1753,6 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui int w; if (width >= 16) { -#if COMPILE_TEMPLATE_SSE2 if (!((((intptr_t)src1) | ((intptr_t)src2) | ((intptr_t)dest))&15)) { __asm__( "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" @@ -1854,7 +1773,6 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui : "memory", XMM_CLOBBERS("xmm0", "xmm1", "xmm2",) "%"FF_REG_a ); } else -#endif __asm__( "xor %%"FF_REG_a", %%"FF_REG_a" \n\t" "1: \n\t" @@ -1896,10 +1814,10 @@ static void RENAME(interleaveBytes)(const uint8_t *src1, const uint8_t *src2, ui ::: "memory" ); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */ +#endif /* !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 */ #if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL -#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM +#if COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM void RENAME(ff_nv12ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *unused, const uint8_t *src1, @@ -1919,18 +1837,14 @@ static void RENAME(deinterleaveBytes)(const uint8_t *src, uint8_t *dst1, uint8_t dst2 += dst2Stride; } __asm__( -#if !COMPILE_TEMPLATE_SSE2 - EMMS" \n\t" -#endif SFENCE" \n\t" ::: "memory" ); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +#endif /* COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM */ #endif /* !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL */ #if !COMPILE_TEMPLATE_SSE2 -#if !COMPILE_TEMPLATE_AMD3DNOW static inline void RENAME(vu9_to_vu12)(const uint8_t *src1, const uint8_t *src2, uint8_t *dst1, uint8_t *dst2, int width, int height, @@ -2108,7 +2022,6 @@ static inline void RENAME(yvu9_to_yuy2)(const uint8_t *src1, const uint8_t *src2 ::: "memory" ); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ static void RENAME(extract_even)(const uint8_t *src, uint8_t *dst, x86_reg count) { @@ -2185,7 +2098,7 @@ static void RENAME(extract_odd)(const uint8_t *src, uint8_t *dst, x86_reg count) } } -#if !COMPILE_TEMPLATE_AMD3DNOW +#if ARCH_X86_32 static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) { dst0+= count; @@ -2231,7 +2144,7 @@ static void RENAME(extract_even2)(const uint8_t *src, uint8_t *dst0, uint8_t *ds count++; } } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +#endif /* ARCH_X86_32 */ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) { @@ -2286,7 +2199,6 @@ static void RENAME(extract_even2avg)(const uint8_t *src0, const uint8_t *src1, u } } -#if !COMPILE_TEMPLATE_AMD3DNOW static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst1, x86_reg count) { dst0+= count; @@ -2333,7 +2245,6 @@ static void RENAME(extract_odd2)(const uint8_t *src, uint8_t *dst0, uint8_t *dst count++; } } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ static void RENAME(extract_odd2avg)(const uint8_t *src0, const uint8_t *src1, uint8_t *dst0, uint8_t *dst1, x86_reg count) { @@ -2415,7 +2326,6 @@ static void RENAME(yuyvtoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ); } -#if !COMPILE_TEMPLATE_AMD3DNOW static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride) @@ -2438,7 +2348,6 @@ static void RENAME(yuyvtoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ::: "memory" ); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, @@ -2465,7 +2374,7 @@ static void RENAME(uyvytoyuv420)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ); } -#if !COMPILE_TEMPLATE_AMD3DNOW +#if ARCH_X86_32 static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, const uint8_t *src, int width, int height, int lumStride, int chromStride, int srcStride) @@ -2488,13 +2397,12 @@ static void RENAME(uyvytoyuv422)(uint8_t *ydst, uint8_t *udst, uint8_t *vdst, co ::: "memory" ); } -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ +#endif /* ARCH_X86_32 */ #endif /* !COMPILE_TEMPLATE_SSE2 */ static av_cold void RENAME(rgb2rgb_init)(void) { #if !COMPILE_TEMPLATE_SSE2 -#if !COMPILE_TEMPLATE_AMD3DNOW rgb15to16 = RENAME(rgb15to16); rgb15tobgr24 = RENAME(rgb15tobgr24); rgb15to32 = RENAME(rgb15to32); @@ -2519,13 +2427,12 @@ static av_cold void RENAME(rgb2rgb_init)(void) yuy2toyv12 = RENAME(yuy2toyv12); vu9_to_vu12 = RENAME(vu9_to_vu12); yvu9_to_yuy2 = RENAME(yvu9_to_yuy2); +#if ARCH_X86_32 uyvytoyuv422 = RENAME(uyvytoyuv422); +#endif yuyvtoyuv422 = RENAME(yuyvtoyuv422); -#endif /* !COMPILE_TEMPLATE_AMD3DNOW */ -#if COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW planar2x = RENAME(planar2x); -#endif /* COMPILE_TEMPLATE_MMXEXT || COMPILE_TEMPLATE_AMD3DNOW */ #if HAVE_7REGS ff_rgb24toyv12 = RENAME(rgb24toyv12); #endif /* HAVE_7REGS */ @@ -2534,11 +2441,11 @@ static av_cold void RENAME(rgb2rgb_init)(void) uyvytoyuv420 = RENAME(uyvytoyuv420); #endif /* !COMPILE_TEMPLATE_SSE2 */ -#if !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX +#if !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 interleaveBytes = RENAME(interleaveBytes); -#endif /* !COMPILE_TEMPLATE_AMD3DNOW && !COMPILE_TEMPLATE_AVX */ +#endif /* !COMPILE_TEMPLATE_AVX && COMPILE_TEMPLATE_SSE2 */ #if !COMPILE_TEMPLATE_AVX || HAVE_AVX_EXTERNAL -#if !COMPILE_TEMPLATE_AMD3DNOW && (ARCH_X86_32 || COMPILE_TEMPLATE_SSE2) && COMPILE_TEMPLATE_MMXEXT == COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM +#if COMPILE_TEMPLATE_SSE2 && HAVE_X86ASM deinterleaveBytes = RENAME(deinterleaveBytes); #endif #endif