diff --git a/libpostproc/postprocess.c b/libpostproc/postprocess.c index 383c691cb4..0586e458b4 100644 --- a/libpostproc/postprocess.c +++ b/libpostproc/postprocess.c @@ -26,28 +26,27 @@ */ /* - C MMX MMX2 3DNow AltiVec -isVertDC Ec Ec Ec -isVertMinMaxOk Ec Ec Ec -doVertLowPass E e e Ec -doVertDefFilter Ec Ec e e Ec -isHorizDC Ec Ec Ec -isHorizMinMaxOk a E Ec -doHorizLowPass E e e Ec -doHorizDefFilter Ec Ec e e Ec -do_a_deblock Ec E Ec E -deRing E e e* Ecp -Vertical RKAlgo1 E a a -Horizontal RKAlgo1 a a -Vertical X1# a E E -Horizontal X1# a E E -LinIpolDeinterlace e E E* -CubicIpolDeinterlace a e e* -LinBlendDeinterlace e E E* + C MMX MMX2 AltiVec +isVertDC Ec Ec Ec +isVertMinMaxOk Ec Ec Ec +doVertLowPass E e Ec +doVertDefFilter Ec Ec e Ec +isHorizDC Ec Ec Ec +isHorizMinMaxOk a E Ec +doHorizLowPass E e Ec +doHorizDefFilter Ec Ec e Ec +do_a_deblock Ec E Ec +deRing E e Ecp +Vertical RKAlgo1 E a +Horizontal RKAlgo1 a +Vertical X1# a E +Horizontal X1# a E +LinIpolDeinterlace e E +CubicIpolDeinterlace a e +LinBlendDeinterlace e E MedianDeinterlace# E Ec Ec -TempDeNoiser# E e e Ec +TempDeNoiser# E e Ec -* I do not have a 3DNow! CPU -> it is untested, but no one said it does not work so it seems to work # more or less selfinvented filters so the exactness is not too meaningful E = Exact implementation e = almost exact implementation (slightly different rounding,...) @@ -83,7 +82,6 @@ try to unroll inner for(x=0 ... loop to avoid these damn if(x ... checks #include #include //#undef HAVE_MMXEXT_INLINE -//#define HAVE_AMD3DNOW_INLINE //#undef HAVE_MMX_INLINE //#undef ARCH_X86 //#define DEBUG_BRIGHTNESS @@ -494,7 +492,7 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, } } -//Note: we have C, MMX, MMX2, 3DNOW version there is no 3DNOW+MMX2 one +//Note: we have C and SSE2 version (which uses MMX(EXT) when advantageous) //Plain C versions //we always compile C for testing which needs bitexactness #define TEMPLATE_PP_C 1 @@ -508,27 +506,12 @@ static av_always_inline void do_a_deblock_C(uint8_t *src, int step, #if ARCH_X86 && HAVE_INLINE_ASM # if CONFIG_RUNTIME_CPUDETECT -# define TEMPLATE_PP_MMX 1 -# include "postprocess_template.c" -# define TEMPLATE_PP_MMXEXT 1 -# include "postprocess_template.c" -# define TEMPLATE_PP_3DNOW 1 -# include "postprocess_template.c" # define TEMPLATE_PP_SSE2 1 # include "postprocess_template.c" # else # if HAVE_SSE2_INLINE # define TEMPLATE_PP_SSE2 1 # include "postprocess_template.c" -# elif HAVE_MMXEXT_INLINE -# define TEMPLATE_PP_MMXEXT 1 -# include "postprocess_template.c" -# elif HAVE_AMD3DNOW_INLINE -# define TEMPLATE_PP_3DNOW 1 -# include "postprocess_template.c" -# elif HAVE_MMX_INLINE -# define TEMPLATE_PP_MMX 1 -# include "postprocess_template.c" # endif # endif #endif @@ -549,21 +532,12 @@ static inline void postProcess(const uint8_t src[], int srcStride, uint8_t dst[] #if ARCH_X86 && HAVE_INLINE_ASM // ordered per speed fastest first if (c->cpuCaps & AV_CPU_FLAG_SSE2) pp = postProcess_SSE2; - else if (c->cpuCaps & AV_CPU_FLAG_MMXEXT) pp = postProcess_MMX2; - else if (c->cpuCaps & AV_CPU_FLAG_3DNOW) pp = postProcess_3DNow; - else if (c->cpuCaps & AV_CPU_FLAG_MMX) pp = postProcess_MMX; #elif HAVE_ALTIVEC if (c->cpuCaps & AV_CPU_FLAG_ALTIVEC) pp = postProcess_altivec; #endif #else /* CONFIG_RUNTIME_CPUDETECT */ #if HAVE_SSE2_INLINE pp = postProcess_SSE2; -#elif HAVE_MMXEXT_INLINE - pp = postProcess_MMX2; -#elif HAVE_AMD3DNOW_INLINE - pp = postProcess_3DNow; -#elif HAVE_MMX_INLINE - pp = postProcess_MMX; #elif HAVE_ALTIVEC pp = postProcess_altivec; #endif @@ -877,9 +851,6 @@ av_cold pp_context *pp_get_context(int width, int height, int cpuCaps){ c->cpuCaps = av_get_cpu_flags(); } else { c->cpuCaps = 0; - if (cpuCaps & PP_CPU_CAPS_MMX) c->cpuCaps |= AV_CPU_FLAG_MMX; - if (cpuCaps & PP_CPU_CAPS_MMX2) c->cpuCaps |= AV_CPU_FLAG_MMXEXT; - if (cpuCaps & PP_CPU_CAPS_3DNOW) c->cpuCaps |= AV_CPU_FLAG_3DNOW; if (cpuCaps & PP_CPU_CAPS_ALTIVEC) c->cpuCaps |= AV_CPU_FLAG_ALTIVEC; } diff --git a/libpostproc/postprocess_template.c b/libpostproc/postprocess_template.c index 9f76b7c587..bcf7bdad66 100644 --- a/libpostproc/postprocess_template.c +++ b/libpostproc/postprocess_template.c @@ -20,7 +20,7 @@ /** * @file - * mmx/mmx2/3dnow postprocess code. + * mmx/mmx2/sse2 postprocess code. */ #include "libavutil/mem_internal.h" @@ -57,14 +57,6 @@ # define TEMPLATE_PP_MMXEXT 0 #endif -#ifdef TEMPLATE_PP_3DNOW -# undef TEMPLATE_PP_MMX -# define TEMPLATE_PP_MMX 1 -# define RENAME(a) a ## _3DNow -#else -# define TEMPLATE_PP_3DNOW 0 -#endif - #ifdef TEMPLATE_PP_SSE2 # undef TEMPLATE_PP_MMX # define TEMPLATE_PP_MMX 1 @@ -82,30 +74,19 @@ #if TEMPLATE_PP_MMXEXT #define REAL_PAVGB(a,b) "pavgb " #a ", " #b " \n\t" -#elif TEMPLATE_PP_3DNOW -#define REAL_PAVGB(a,b) "pavgusb " #a ", " #b " \n\t" #endif #define PAVGB(a,b) REAL_PAVGB(a,b) #if TEMPLATE_PP_MMXEXT #define PMINUB(a,b,t) "pminub " #a ", " #b " \n\t" -#elif TEMPLATE_PP_MMX -#define PMINUB(b,a,t) \ - "movq " #a ", " #t " \n\t"\ - "psubusb " #b ", " #t " \n\t"\ - "psubb " #t ", " #a " \n\t" #endif #if TEMPLATE_PP_MMXEXT #define PMAXUB(a,b) "pmaxub " #a ", " #b " \n\t" -#elif TEMPLATE_PP_MMX -#define PMAXUB(a,b) \ - "psubusb " #a ", " #b " \n\t"\ - "paddb " #a ", " #b " \n\t" #endif //FIXME? |255-0| = 1 (should not be a problem ...) -#if TEMPLATE_PP_MMX +#if TEMPLATE_PP_MMXEXT /** * Check if the middle 8x8 Block in the given 8x16 block is flat */ @@ -185,20 +166,8 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex "psubusb %%mm3, %%mm4 \n\t" " \n\t" -#if TEMPLATE_PP_MMXEXT "pxor %%mm7, %%mm7 \n\t" "psadbw %%mm7, %%mm0 \n\t" -#else - "movq %%mm0, %%mm1 \n\t" - "psrlw $8, %%mm0 \n\t" - "paddb %%mm1, %%mm0 \n\t" - "movq %%mm0, %%mm1 \n\t" - "psrlq $16, %%mm0 \n\t" - "paddb %%mm1, %%mm0 \n\t" - "movq %%mm0, %%mm1 \n\t" - "psrlq $32, %%mm0 \n\t" - "paddb %%mm1, %%mm0 \n\t" -#endif "movq %4, %%mm7 \n\t" // QP,..., QP "paddusb %%mm7, %%mm7 \n\t" // 2QP ... 2QP "psubusb %%mm7, %%mm4 \n\t" // Diff <= 2QP -> 0 @@ -219,7 +188,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex return 2; } } -#endif //TEMPLATE_PP_MMX +#endif //TEMPLATE_PP_MMXEXT /** * Do a vertical low pass filter on the 8x16 block (only write to the 8x8 block in the middle) @@ -228,7 +197,7 @@ static inline int RENAME(vertClassify)(const uint8_t src[], int stride, PPContex #if !TEMPLATE_PP_ALTIVEC static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) { -#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#if TEMPLATE_PP_MMXEXT src+= stride*3; __asm__ volatile( //"movv %0 %1 %2\n\t" "movq %2, %%mm0 \n\t" // QP,..., QP @@ -355,7 +324,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) : "r" (src), "r" ((x86_reg)stride), "m" (c->pQPb) : "%"FF_REG_a, "%"FF_REG_c ); -#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#else //TEMPLATE_PP_MMXEXT const int l1= stride; const int l2= stride + l1; const int l3= stride + l2; @@ -394,7 +363,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) src++; } -#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#endif //TEMPLATE_PP_MMXEXT } #endif //TEMPLATE_PP_ALTIVEC @@ -407,7 +376,7 @@ static inline void RENAME(doVertLowPass)(uint8_t *src, int stride, PPContext *c) */ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) { -#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#if TEMPLATE_PP_MMXEXT src+= stride*3; __asm__ volatile( @@ -494,7 +463,7 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) NAMED_CONSTRAINTS_ADD(b01) : "%"FF_REG_a, "%"FF_REG_c ); -#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#else //TEMPLATE_PP_MMXEXT const int l1= stride; const int l2= stride + l1; @@ -528,13 +497,13 @@ static inline void RENAME(vertX1Filter)(uint8_t *src, int stride, PPContext *co) } src++; } -#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#endif //TEMPLATE_PP_MMXEXT } #if !TEMPLATE_PP_ALTIVEC static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext *c) { -#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#if TEMPLATE_PP_MMXEXT /* uint8_t tmp[16]; const int l1= stride; @@ -816,239 +785,7 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext } } */ -#elif TEMPLATE_PP_MMX - DECLARE_ALIGNED(8, uint64_t, tmp)[4]; // make space for 4 8-byte vars - src+= stride*4; - __asm__ volatile( - "pxor %%mm7, %%mm7 \n\t" -// 0 1 2 3 4 5 6 7 -// %0 %0+%1 %0+2%1 eax+2%1 %0+4%1 eax+4%1 edx+%1 edx+2%1 -// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 - - "movq (%0), %%mm0 \n\t" - "movq %%mm0, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" // low part of line 0 - "punpckhbw %%mm7, %%mm1 \n\t" // high part of line 0 - - "movq (%0, %1), %%mm2 \n\t" - "lea (%0, %1, 2), %%"FF_REG_a" \n\t" - "movq %%mm2, %%mm3 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" // low part of line 1 - "punpckhbw %%mm7, %%mm3 \n\t" // high part of line 1 - - "movq (%%"FF_REG_a"), %%mm4 \n\t" - "movq %%mm4, %%mm5 \n\t" - "punpcklbw %%mm7, %%mm4 \n\t" // low part of line 2 - "punpckhbw %%mm7, %%mm5 \n\t" // high part of line 2 - - "paddw %%mm0, %%mm0 \n\t" // 2L0 - "paddw %%mm1, %%mm1 \n\t" // 2H0 - "psubw %%mm4, %%mm2 \n\t" // L1 - L2 - "psubw %%mm5, %%mm3 \n\t" // H1 - H2 - "psubw %%mm2, %%mm0 \n\t" // 2L0 - L1 + L2 - "psubw %%mm3, %%mm1 \n\t" // 2H0 - H1 + H2 - - "psllw $2, %%mm2 \n\t" // 4L1 - 4L2 - "psllw $2, %%mm3 \n\t" // 4H1 - 4H2 - "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - - "movq (%%"FF_REG_a", %1), %%mm2 \n\t" - "movq %%mm2, %%mm3 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" // L3 - "punpckhbw %%mm7, %%mm3 \n\t" // H3 - - "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - L3 - "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - H3 - "psubw %%mm2, %%mm0 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 - "psubw %%mm3, %%mm1 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 - "movq %%mm0, (%3) \n\t" // 2L0 - 5L1 + 5L2 - 2L3 - "movq %%mm1, 8(%3) \n\t" // 2H0 - 5H1 + 5H2 - 2H3 - - "movq (%%"FF_REG_a", %1, 2), %%mm0 \n\t" - "movq %%mm0, %%mm1 \n\t" - "punpcklbw %%mm7, %%mm0 \n\t" // L4 - "punpckhbw %%mm7, %%mm1 \n\t" // H4 - - "psubw %%mm0, %%mm2 \n\t" // L3 - L4 - "psubw %%mm1, %%mm3 \n\t" // H3 - H4 - "movq %%mm2, 16(%3) \n\t" // L3 - L4 - "movq %%mm3, 24(%3) \n\t" // H3 - H4 - "paddw %%mm4, %%mm4 \n\t" // 2L2 - "paddw %%mm5, %%mm5 \n\t" // 2H2 - "psubw %%mm2, %%mm4 \n\t" // 2L2 - L3 + L4 - "psubw %%mm3, %%mm5 \n\t" // 2H2 - H3 + H4 - - "lea (%%"FF_REG_a", %1), %0 \n\t" - "psllw $2, %%mm2 \n\t" // 4L3 - 4L4 - "psllw $2, %%mm3 \n\t" // 4H3 - 4H4 - "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 - "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 -//50 opcodes so far - "movq (%0, %1, 2), %%mm2 \n\t" - "movq %%mm2, %%mm3 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" // L5 - "punpckhbw %%mm7, %%mm3 \n\t" // H5 - "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 - L5 - "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 - H5 - "psubw %%mm2, %%mm4 \n\t" // 2L2 - 5L3 + 5L4 - 2L5 - "psubw %%mm3, %%mm5 \n\t" // 2H2 - 5H3 + 5H4 - 2H5 - - "movq (%%"FF_REG_a", %1, 4), %%mm6 \n\t" - "punpcklbw %%mm7, %%mm6 \n\t" // L6 - "psubw %%mm6, %%mm2 \n\t" // L5 - L6 - "movq (%%"FF_REG_a", %1, 4), %%mm6 \n\t" - "punpckhbw %%mm7, %%mm6 \n\t" // H6 - "psubw %%mm6, %%mm3 \n\t" // H5 - H6 - - "paddw %%mm0, %%mm0 \n\t" // 2L4 - "paddw %%mm1, %%mm1 \n\t" // 2H4 - "psubw %%mm2, %%mm0 \n\t" // 2L4 - L5 + L6 - "psubw %%mm3, %%mm1 \n\t" // 2H4 - H5 + H6 - - "psllw $2, %%mm2 \n\t" // 4L5 - 4L6 - "psllw $2, %%mm3 \n\t" // 4H5 - 4H6 - "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - - "movq (%0, %1, 4), %%mm2 \n\t" - "movq %%mm2, %%mm3 \n\t" - "punpcklbw %%mm7, %%mm2 \n\t" // L7 - "punpckhbw %%mm7, %%mm3 \n\t" // H7 - - "paddw %%mm2, %%mm2 \n\t" // 2L7 - "paddw %%mm3, %%mm3 \n\t" // 2H7 - "psubw %%mm2, %%mm0 \n\t" // 2L4 - 5L5 + 5L6 - 2L7 - "psubw %%mm3, %%mm1 \n\t" // 2H4 - 5H5 + 5H6 - 2H7 - - "movq (%3), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 - "movq 8(%3), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 - -#if TEMPLATE_PP_MMXEXT - "movq %%mm7, %%mm6 \n\t" // 0 - "psubw %%mm0, %%mm6 \n\t" - "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| - "movq %%mm7, %%mm6 \n\t" // 0 - "psubw %%mm1, %%mm6 \n\t" - "pmaxsw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| - "movq %%mm7, %%mm6 \n\t" // 0 - "psubw %%mm2, %%mm6 \n\t" - "pmaxsw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| - "movq %%mm7, %%mm6 \n\t" // 0 - "psubw %%mm3, %%mm6 \n\t" - "pmaxsw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| -#else - "movq %%mm7, %%mm6 \n\t" // 0 - "pcmpgtw %%mm0, %%mm6 \n\t" - "pxor %%mm6, %%mm0 \n\t" - "psubw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| - "movq %%mm7, %%mm6 \n\t" // 0 - "pcmpgtw %%mm1, %%mm6 \n\t" - "pxor %%mm6, %%mm1 \n\t" - "psubw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| - "movq %%mm7, %%mm6 \n\t" // 0 - "pcmpgtw %%mm2, %%mm6 \n\t" - "pxor %%mm6, %%mm2 \n\t" - "psubw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| - "movq %%mm7, %%mm6 \n\t" // 0 - "pcmpgtw %%mm3, %%mm6 \n\t" - "pxor %%mm6, %%mm3 \n\t" - "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| -#endif - -#if TEMPLATE_PP_MMXEXT - "pminsw %%mm2, %%mm0 \n\t" - "pminsw %%mm3, %%mm1 \n\t" -#else - "movq %%mm0, %%mm6 \n\t" - "psubusw %%mm2, %%mm6 \n\t" - "psubw %%mm6, %%mm0 \n\t" - "movq %%mm1, %%mm6 \n\t" - "psubusw %%mm3, %%mm6 \n\t" - "psubw %%mm6, %%mm1 \n\t" -#endif - - "movd %2, %%mm2 \n\t" // QP - "punpcklbw %%mm7, %%mm2 \n\t" - - "movq %%mm7, %%mm6 \n\t" // 0 - "pcmpgtw %%mm4, %%mm6 \n\t" // sign(2L2 - 5L3 + 5L4 - 2L5) - "pxor %%mm6, %%mm4 \n\t" - "psubw %%mm6, %%mm4 \n\t" // |2L2 - 5L3 + 5L4 - 2L5| - "pcmpgtw %%mm5, %%mm7 \n\t" // sign(2H2 - 5H3 + 5H4 - 2H5) - "pxor %%mm7, %%mm5 \n\t" - "psubw %%mm7, %%mm5 \n\t" // |2H2 - 5H3 + 5H4 - 2H5| -// 100 opcodes - "psllw $3, %%mm2 \n\t" // 8QP - "movq %%mm2, %%mm3 \n\t" // 8QP - "pcmpgtw %%mm4, %%mm2 \n\t" - "pcmpgtw %%mm5, %%mm3 \n\t" - "pand %%mm2, %%mm4 \n\t" - "pand %%mm3, %%mm5 \n\t" - - - "psubusw %%mm0, %%mm4 \n\t" // hd - "psubusw %%mm1, %%mm5 \n\t" // ld - - - "movq "MANGLE(w05)", %%mm2 \n\t" // 5 - "pmullw %%mm2, %%mm4 \n\t" - "pmullw %%mm2, %%mm5 \n\t" - "movq "MANGLE(w20)", %%mm2 \n\t" // 32 - "paddw %%mm2, %%mm4 \n\t" - "paddw %%mm2, %%mm5 \n\t" - "psrlw $6, %%mm4 \n\t" - "psrlw $6, %%mm5 \n\t" - - "movq 16(%3), %%mm0 \n\t" // L3 - L4 - "movq 24(%3), %%mm1 \n\t" // H3 - H4 - - "pxor %%mm2, %%mm2 \n\t" - "pxor %%mm3, %%mm3 \n\t" - - "pcmpgtw %%mm0, %%mm2 \n\t" // sign (L3-L4) - "pcmpgtw %%mm1, %%mm3 \n\t" // sign (H3-H4) - "pxor %%mm2, %%mm0 \n\t" - "pxor %%mm3, %%mm1 \n\t" - "psubw %%mm2, %%mm0 \n\t" // |L3-L4| - "psubw %%mm3, %%mm1 \n\t" // |H3-H4| - "psrlw $1, %%mm0 \n\t" // |L3 - L4|/2 - "psrlw $1, %%mm1 \n\t" // |H3 - H4|/2 - - "pxor %%mm6, %%mm2 \n\t" - "pxor %%mm7, %%mm3 \n\t" - "pand %%mm2, %%mm4 \n\t" - "pand %%mm3, %%mm5 \n\t" - -#if TEMPLATE_PP_MMXEXT - "pminsw %%mm0, %%mm4 \n\t" - "pminsw %%mm1, %%mm5 \n\t" -#else - "movq %%mm4, %%mm2 \n\t" - "psubusw %%mm0, %%mm2 \n\t" - "psubw %%mm2, %%mm4 \n\t" - "movq %%mm5, %%mm2 \n\t" - "psubusw %%mm1, %%mm2 \n\t" - "psubw %%mm2, %%mm5 \n\t" -#endif - "pxor %%mm6, %%mm4 \n\t" - "pxor %%mm7, %%mm5 \n\t" - "psubw %%mm6, %%mm4 \n\t" - "psubw %%mm7, %%mm5 \n\t" - "packsswb %%mm5, %%mm4 \n\t" - "movq (%0), %%mm0 \n\t" - "paddb %%mm4, %%mm0 \n\t" - "movq %%mm0, (%0) \n\t" - "movq (%0, %1), %%mm0 \n\t" - "psubb %%mm4, %%mm0 \n\t" - "movq %%mm0, (%0, %1) \n\t" - - : "+r" (src) - : "r" ((x86_reg)stride), "m" (c->pQPb), "r"(tmp) - NAMED_CONSTRAINTS_ADD(w05,w20) - : "%"FF_REG_a - ); -#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#else //TEMPLATE_PP_MMXEXT const int l1= stride; const int l2= stride + l1; const int l3= stride + l2; @@ -1086,14 +823,14 @@ static inline void RENAME(doVertDefFilter)(uint8_t src[], int stride, PPContext } src++; } -#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#endif //TEMPLATE_PP_MMXEXT } #endif //TEMPLATE_PP_ALTIVEC #if !TEMPLATE_PP_ALTIVEC static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) { -#if HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) +#if HAVE_7REGS && TEMPLATE_PP_MMXEXT DECLARE_ALIGNED(8, uint64_t, tmp)[3]; __asm__ volatile( "pxor %%mm6, %%mm6 \n\t" @@ -1113,20 +850,10 @@ static inline void RENAME(dering)(uint8_t src[], int stride, PPContext *c) #undef REAL_FIND_MIN_MAX #undef FIND_MIN_MAX -#if TEMPLATE_PP_MMXEXT #define REAL_FIND_MIN_MAX(addr)\ "movq " #addr ", %%mm0 \n\t"\ "pminub %%mm0, %%mm7 \n\t"\ "pmaxub %%mm0, %%mm6 \n\t" -#else -#define REAL_FIND_MIN_MAX(addr)\ - "movq " #addr ", %%mm0 \n\t"\ - "movq %%mm7, %%mm1 \n\t"\ - "psubusb %%mm0, %%mm6 \n\t"\ - "paddb %%mm0, %%mm6 \n\t"\ - "psubusb %%mm0, %%mm1 \n\t"\ - "psubb %%mm1, %%mm7 \n\t" -#endif #define FIND_MIN_MAX(addr) REAL_FIND_MIN_MAX(addr) FIND_MIN_MAX((%%FF_REGa)) @@ -1140,49 +867,20 @@ FIND_MIN_MAX((%0, %1, 8)) "movq %%mm7, %%mm4 \n\t" "psrlq $8, %%mm7 \n\t" -#if TEMPLATE_PP_MMXEXT "pminub %%mm4, %%mm7 \n\t" // min of pixels "pshufw $0xF9, %%mm7, %%mm4 \n\t" "pminub %%mm4, %%mm7 \n\t" // min of pixels "pshufw $0xFE, %%mm7, %%mm4 \n\t" "pminub %%mm4, %%mm7 \n\t" -#else - "movq %%mm7, %%mm1 \n\t" - "psubusb %%mm4, %%mm1 \n\t" - "psubb %%mm1, %%mm7 \n\t" - "movq %%mm7, %%mm4 \n\t" - "psrlq $16, %%mm7 \n\t" - "movq %%mm7, %%mm1 \n\t" - "psubusb %%mm4, %%mm1 \n\t" - "psubb %%mm1, %%mm7 \n\t" - "movq %%mm7, %%mm4 \n\t" - "psrlq $32, %%mm7 \n\t" - "movq %%mm7, %%mm1 \n\t" - "psubusb %%mm4, %%mm1 \n\t" - "psubb %%mm1, %%mm7 \n\t" -#endif "movq %%mm6, %%mm4 \n\t" "psrlq $8, %%mm6 \n\t" -#if TEMPLATE_PP_MMXEXT "pmaxub %%mm4, %%mm6 \n\t" // max of pixels "pshufw $0xF9, %%mm6, %%mm4 \n\t" "pmaxub %%mm4, %%mm6 \n\t" "pshufw $0xFE, %%mm6, %%mm4 \n\t" "pmaxub %%mm4, %%mm6 \n\t" -#else - "psubusb %%mm4, %%mm6 \n\t" - "paddb %%mm4, %%mm6 \n\t" - "movq %%mm6, %%mm4 \n\t" - "psrlq $16, %%mm6 \n\t" - "psubusb %%mm4, %%mm6 \n\t" - "paddb %%mm4, %%mm6 \n\t" - "movq %%mm6, %%mm4 \n\t" - "psrlq $32, %%mm6 \n\t" - "psubusb %%mm4, %%mm6 \n\t" - "paddb %%mm4, %%mm6 \n\t" -#endif "movq %%mm6, %%mm0 \n\t" // max "psubb %%mm7, %%mm6 \n\t" // max - min "push %%"FF_REG_a" \n\t" @@ -1320,7 +1018,7 @@ DERING_CORE((%0, %1, 8) ,(%%FF_REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5, NAMED_CONSTRAINTS_ADD(deringThreshold,b00,b02,b08) : "%"FF_REG_a, "%"FF_REG_d ); -#else // HAVE_7REGS && (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) +#else // HAVE_7REGS && TEMPLATE_PP_MMXEXT int y; int min=255; int max=0; @@ -1438,7 +1136,7 @@ DERING_CORE((%0, %1, 8) ,(%%FF_REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5, // src[0] = src[7]=src[stride*7]=src[stride*7 + 7]=255; } #endif -#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#endif //TEMPLATE_PP_MMXEXT } #endif //TEMPLATE_PP_ALTIVEC @@ -1450,7 +1148,7 @@ DERING_CORE((%0, %1, 8) ,(%%FF_REGd, %1, 4),%%mm2,%%mm4,%%mm0,%%mm3,%%mm5, */ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int stride) { -#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#if TEMPLATE_PP_MMXEXT src+= 4*stride; __asm__ volatile( "lea (%0, %1), %%"FF_REG_a" \n\t" @@ -1503,14 +1201,13 @@ static inline void RENAME(deInterlaceInterpolateLinear)(uint8_t src[], int strid */ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride) { -#if TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#if TEMPLATE_PP_SSE2 src+= stride*3; __asm__ volatile( "lea (%0, %1), %%"FF_REG_a" \n\t" "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t" "lea (%%"FF_REG_d", %1, 4), %%"FF_REG_c"\n\t" "add %1, %%"FF_REG_c" \n\t" -#if TEMPLATE_PP_SSE2 "pxor %%xmm7, %%xmm7 \n\t" #define REAL_DEINT_CUBIC(a,b,c,d,e)\ "movq " #a ", %%xmm0 \n\t"\ @@ -1526,33 +1223,6 @@ static inline void RENAME(deInterlaceInterpolateCubic)(uint8_t src[], int stride "psubw %%xmm0, %%xmm1 \n\t"\ "packuswb %%xmm1, %%xmm1 \n\t"\ "movlps %%xmm1, " #c " \n\t" -#else //TEMPLATE_PP_SSE2 - "pxor %%mm7, %%mm7 \n\t" -// 0 1 2 3 4 5 6 7 8 9 10 -// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 ecx - -#define REAL_DEINT_CUBIC(a,b,c,d,e)\ - "movq " #a ", %%mm0 \n\t"\ - "movq " #b ", %%mm1 \n\t"\ - "movq " #d ", %%mm2 \n\t"\ - "movq " #e ", %%mm3 \n\t"\ - PAVGB(%%mm2, %%mm1) /* (b+d) /2 */\ - PAVGB(%%mm3, %%mm0) /* (a+e) /2 */\ - "movq %%mm0, %%mm2 \n\t"\ - "punpcklbw %%mm7, %%mm0 \n\t"\ - "punpckhbw %%mm7, %%mm2 \n\t"\ - "movq %%mm1, %%mm3 \n\t"\ - "punpcklbw %%mm7, %%mm1 \n\t"\ - "punpckhbw %%mm7, %%mm3 \n\t"\ - "psubw %%mm1, %%mm0 \n\t" /* L(a+e - (b+d))/2 */\ - "psubw %%mm3, %%mm2 \n\t" /* H(a+e - (b+d))/2 */\ - "psraw $3, %%mm0 \n\t" /* L(a+e - (b+d))/16 */\ - "psraw $3, %%mm2 \n\t" /* H(a+e - (b+d))/16 */\ - "psubw %%mm0, %%mm1 \n\t" /* L(9b + 9d - a - e)/16 */\ - "psubw %%mm2, %%mm3 \n\t" /* H(9b + 9d - a - e)/16 */\ - "packuswb %%mm3, %%mm1 \n\t"\ - "movq %%mm1, " #c " \n\t" -#endif //TEMPLATE_PP_SSE2 #define DEINT_CUBIC(a,b,c,d,e) REAL_DEINT_CUBIC(a,b,c,d,e) DEINT_CUBIC((%0) , (%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4) , (%%FF_REGd, %1)) @@ -1562,13 +1232,11 @@ DEINT_CUBIC((%%FF_REGd, %1), (%0, %1, 8) , (%%FF_REGd, %1, 4), (%%FF_REGc) : : "r" (src), "r" ((x86_reg)stride) : -#if TEMPLATE_PP_SSE2 XMM_CLOBBERS("%xmm0", "%xmm1", "%xmm2", "%xmm3", "%xmm7",) -#endif "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_c ); #undef REAL_DEINT_CUBIC -#else //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#else //TEMPLATE_PP_SSE2 int x; src+= stride*3; for(x=0; x<8; x++){ @@ -1578,7 +1246,7 @@ DEINT_CUBIC((%%FF_REGd, %1), (%0, %1, 8) , (%%FF_REGd, %1, 4), (%%FF_REGc) src[stride*9] = av_clip_uint8((-src[stride*6] + 9*src[stride*8] + 9*src[stride*10] - src[stride*12])>>4); src++; } -#endif //TEMPLATE_PP_SSE2 || TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#endif //TEMPLATE_PP_SSE2 } /** @@ -1590,7 +1258,7 @@ DEINT_CUBIC((%%FF_REGd, %1), (%0, %1, 8) , (%%FF_REGd, %1, 4), (%%FF_REGc) */ static inline void RENAME(deInterlaceFF)(uint8_t src[], int stride, uint8_t *tmp) { -#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#if TEMPLATE_PP_MMXEXT src+= stride*4; __asm__ volatile( "lea (%0, %1), %%"FF_REG_a" \n\t" @@ -1639,7 +1307,7 @@ DEINT_FF((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8) , (%%FF_REGd, %1, 4 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp) : "%"FF_REG_a, "%"FF_REG_d ); -#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#else //TEMPLATE_PP_MMXEXT int x; src+= stride*4; for(x=0; x<8; x++){ @@ -1657,7 +1325,7 @@ DEINT_FF((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8) , (%%FF_REGd, %1, 4 src++; } -#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#endif //TEMPLATE_PP_MMXEXT } /** @@ -1669,7 +1337,7 @@ DEINT_FF((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8) , (%%FF_REGd, %1, 4 */ static inline void RENAME(deInterlaceL5)(uint8_t src[], int stride, uint8_t *tmp, uint8_t *tmp2) { -#if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS +#if TEMPLATE_PP_MMXEXT && HAVE_6REGS src+= stride*4; __asm__ volatile( "lea (%0, %1), %%"FF_REG_a" \n\t" @@ -1729,7 +1397,7 @@ DEINT_L5(%%mm1, %%mm0, (%%FF_REGd, %1, 2), (%0, %1, 8) , (%%FF_REGd, %1, 4 : : "r" (src), "r" ((x86_reg)stride), "r"(tmp), "r"(tmp2) : "%"FF_REG_a, "%"FF_REG_d ); -#else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS +#else //TEMPLATE_PP_MMXEXT && HAVE_6REGS int x; src+= stride*4; for(x=0; x<8; x++){ @@ -1758,7 +1426,7 @@ DEINT_L5(%%mm1, %%mm0, (%%FF_REGd, %1, 2), (%0, %1, 8) , (%%FF_REGd, %1, 4 src++; } -#endif //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS +#endif // TEMPLATE_PP_MMXEXT && HAVE_6REGS } /** @@ -1770,7 +1438,7 @@ DEINT_L5(%%mm1, %%mm0, (%%FF_REGd, %1, 2), (%0, %1, 8) , (%%FF_REGd, %1, 4 */ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uint8_t *tmp) { -#if TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#if TEMPLATE_PP_MMXEXT src+= 4*stride; __asm__ volatile( "lea (%0, %1), %%"FF_REG_a" \n\t" @@ -1817,7 +1485,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin : : "r" (src), "r" ((x86_reg)stride), "r" (tmp) : "%"FF_REG_a, "%"FF_REG_d ); -#else //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#else //TEMPLATE_PP_MMXEXT int a, b, c, x; src+= 4*stride; @@ -1860,7 +1528,7 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin src += 4; tmp += 4; } -#endif //TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW +#endif //TEMPLATE_PP_MMXEXT } /** @@ -1871,9 +1539,8 @@ static inline void RENAME(deInterlaceBlendLinear)(uint8_t src[], int stride, uin */ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) { -#if TEMPLATE_PP_MMX - src+= 4*stride; #if TEMPLATE_PP_MMXEXT + src+= 4*stride; __asm__ volatile( "lea (%0, %1), %%"FF_REG_a" \n\t" "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t" @@ -1922,48 +1589,6 @@ static inline void RENAME(deInterlaceMedian)(uint8_t src[], int stride) : "%"FF_REG_a, "%"FF_REG_d ); -#else // MMX without MMX2 - __asm__ volatile( - "lea (%0, %1), %%"FF_REG_a" \n\t" - "lea (%%"FF_REG_a", %1, 4), %%"FF_REG_d"\n\t" -// 0 1 2 3 4 5 6 7 8 9 -// %0 eax eax+%1 eax+2%1 %0+4%1 edx edx+%1 edx+2%1 %0+8%1 edx+4%1 - "pxor %%mm7, %%mm7 \n\t" - -#define REAL_MEDIAN(a,b,c)\ - "movq " #a ", %%mm0 \n\t"\ - "movq " #b ", %%mm2 \n\t"\ - "movq " #c ", %%mm1 \n\t"\ - "movq %%mm0, %%mm3 \n\t"\ - "movq %%mm1, %%mm4 \n\t"\ - "movq %%mm2, %%mm5 \n\t"\ - "psubusb %%mm1, %%mm3 \n\t"\ - "psubusb %%mm2, %%mm4 \n\t"\ - "psubusb %%mm0, %%mm5 \n\t"\ - "pcmpeqb %%mm7, %%mm3 \n\t"\ - "pcmpeqb %%mm7, %%mm4 \n\t"\ - "pcmpeqb %%mm7, %%mm5 \n\t"\ - "movq %%mm3, %%mm6 \n\t"\ - "pxor %%mm4, %%mm3 \n\t"\ - "pxor %%mm5, %%mm4 \n\t"\ - "pxor %%mm6, %%mm5 \n\t"\ - "por %%mm3, %%mm1 \n\t"\ - "por %%mm4, %%mm2 \n\t"\ - "por %%mm5, %%mm0 \n\t"\ - "pand %%mm2, %%mm0 \n\t"\ - "pand %%mm1, %%mm0 \n\t"\ - "movq %%mm0, " #b " \n\t" -#define MEDIAN(a,b,c) REAL_MEDIAN(a,b,c) - -MEDIAN((%0) , (%%FF_REGa) , (%%FF_REGa, %1)) -MEDIAN((%%FF_REGa, %1), (%%FF_REGa, %1, 2), (%0, %1, 4)) -MEDIAN((%0, %1, 4) , (%%FF_REGd) , (%%FF_REGd, %1)) -MEDIAN((%%FF_REGd, %1), (%%FF_REGd, %1, 2), (%0, %1, 8)) - - : : "r" (src), "r" ((x86_reg)stride) - : "%"FF_REG_a, "%"FF_REG_d - ); -#endif //TEMPLATE_PP_MMXEXT #else //TEMPLATE_PP_MMX int x, y; src+= 4*stride; @@ -2165,7 +1790,7 @@ static inline void RENAME(tempNoiseReducer)(uint8_t *src, int stride, #define FAST_L2_DIFF //#define L1_DIFF //u should change the thresholds too if u try that one -#if (TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS +#if TEMPLATE_PP_MMXEXT && HAVE_6REGS __asm__ volatile( "lea (%2, %2, 2), %%"FF_REG_a" \n\t" // 3*stride "lea (%2, %2, 4), %%"FF_REG_d" \n\t" // 5*stride @@ -2454,7 +2079,7 @@ L2_DIFF_CORE((%0, %%FF_REGc) , (%1, %%FF_REGc)) NAMED_CONSTRAINTS_ADD(b80) : "%"FF_REG_a, "%"FF_REG_d, "%"FF_REG_c, "memory" ); -#else //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS +#else //TEMPLATE_PP_MMXEXT && HAVE_6REGS { int y; int d=0; @@ -2537,11 +2162,11 @@ Switch between } } } -#endif //(TEMPLATE_PP_MMXEXT || TEMPLATE_PP_3DNOW) && HAVE_6REGS +#endif //TEMPLATE_PP_MMXEXT && HAVE_6REGS } #endif //TEMPLATE_PP_ALTIVEC -#if TEMPLATE_PP_MMX +#if TEMPLATE_PP_MMXEXT /** * accurate deblock filter */ @@ -2945,7 +2570,6 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "movq (%4), %%mm2 \n\t" // 2L0 - 5L1 + 5L2 - 2L3 "movq 8(%4), %%mm3 \n\t" // 2H0 - 5H1 + 5H2 - 2H3 -#if TEMPLATE_PP_MMXEXT "movq %%mm7, %%mm6 \n\t" // 0 "psubw %%mm0, %%mm6 \n\t" "pmaxsw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| @@ -2958,36 +2582,9 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "movq %%mm7, %%mm6 \n\t" // 0 "psubw %%mm3, %%mm6 \n\t" "pmaxsw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| -#else - "movq %%mm7, %%mm6 \n\t" // 0 - "pcmpgtw %%mm0, %%mm6 \n\t" - "pxor %%mm6, %%mm0 \n\t" - "psubw %%mm6, %%mm0 \n\t" // |2L4 - 5L5 + 5L6 - 2L7| - "movq %%mm7, %%mm6 \n\t" // 0 - "pcmpgtw %%mm1, %%mm6 \n\t" - "pxor %%mm6, %%mm1 \n\t" - "psubw %%mm6, %%mm1 \n\t" // |2H4 - 5H5 + 5H6 - 2H7| - "movq %%mm7, %%mm6 \n\t" // 0 - "pcmpgtw %%mm2, %%mm6 \n\t" - "pxor %%mm6, %%mm2 \n\t" - "psubw %%mm6, %%mm2 \n\t" // |2L0 - 5L1 + 5L2 - 2L3| - "movq %%mm7, %%mm6 \n\t" // 0 - "pcmpgtw %%mm3, %%mm6 \n\t" - "pxor %%mm6, %%mm3 \n\t" - "psubw %%mm6, %%mm3 \n\t" // |2H0 - 5H1 + 5H2 - 2H3| -#endif -#if TEMPLATE_PP_MMXEXT "pminsw %%mm2, %%mm0 \n\t" "pminsw %%mm3, %%mm1 \n\t" -#else - "movq %%mm0, %%mm6 \n\t" - "psubusw %%mm2, %%mm6 \n\t" - "psubw %%mm6, %%mm0 \n\t" - "movq %%mm1, %%mm6 \n\t" - "psubusw %%mm3, %%mm6 \n\t" - "psubw %%mm6, %%mm1 \n\t" -#endif "movd %2, %%mm2 \n\t" // QP "punpcklbw %%mm7, %%mm2 \n\t" @@ -3041,17 +2638,8 @@ static av_always_inline void RENAME(do_a_deblock)(uint8_t *src, int step, int st "pand %%mm2, %%mm4 \n\t" "pand %%mm3, %%mm5 \n\t" -#if TEMPLATE_PP_MMXEXT "pminsw %%mm0, %%mm4 \n\t" "pminsw %%mm1, %%mm5 \n\t" -#else - "movq %%mm4, %%mm2 \n\t" - "psubusw %%mm0, %%mm2 \n\t" - "psubw %%mm2, %%mm4 \n\t" - "movq %%mm5, %%mm2 \n\t" - "psubusw %%mm1, %%mm2 \n\t" - "psubw %%mm2, %%mm5 \n\t" -#endif "pxor %%mm6, %%mm4 \n\t" "pxor %%mm7, %%mm5 \n\t" "psubw %%mm6, %%mm4 \n\t" @@ -3088,18 +2676,14 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t src[], int srcStride, int levelFix, int64_t *packedOffsetAndScale) { -#if !TEMPLATE_PP_MMX || !HAVE_6REGS - int i; -#endif if(levelFix){ -#if TEMPLATE_PP_MMX && HAVE_6REGS +#if TEMPLATE_PP_MMXEXT && HAVE_6REGS __asm__ volatile( "movq (%%"FF_REG_a"), %%mm2 \n\t" // packedYOffset "movq 8(%%"FF_REG_a"), %%mm3 \n\t" // packedYScale "lea (%2,%4), %%"FF_REG_a" \n\t" "lea (%3,%5), %%"FF_REG_d" \n\t" "pxor %%mm4, %%mm4 \n\t" -#if TEMPLATE_PP_MMXEXT #define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ "movq " #src1 ", %%mm0 \n\t"\ "movq " #src1 ", %%mm5 \n\t"\ @@ -3122,34 +2706,6 @@ static inline void RENAME(blockCopy)(uint8_t dst[], int dstStride, const uint8_t "movq %%mm0, " #dst1 " \n\t"\ "movq %%mm1, " #dst2 " \n\t"\ -#else //TEMPLATE_PP_MMXEXT -#define REAL_SCALED_CPY(src1, src2, dst1, dst2) \ - "movq " #src1 ", %%mm0 \n\t"\ - "movq " #src1 ", %%mm5 \n\t"\ - "punpcklbw %%mm4, %%mm0 \n\t"\ - "punpckhbw %%mm4, %%mm5 \n\t"\ - "psubw %%mm2, %%mm0 \n\t"\ - "psubw %%mm2, %%mm5 \n\t"\ - "movq " #src2 ", %%mm1 \n\t"\ - "psllw $6, %%mm0 \n\t"\ - "psllw $6, %%mm5 \n\t"\ - "pmulhw %%mm3, %%mm0 \n\t"\ - "movq " #src2 ", %%mm6 \n\t"\ - "pmulhw %%mm3, %%mm5 \n\t"\ - "punpcklbw %%mm4, %%mm1 \n\t"\ - "punpckhbw %%mm4, %%mm6 \n\t"\ - "psubw %%mm2, %%mm1 \n\t"\ - "psubw %%mm2, %%mm6 \n\t"\ - "psllw $6, %%mm1 \n\t"\ - "psllw $6, %%mm6 \n\t"\ - "pmulhw %%mm3, %%mm1 \n\t"\ - "pmulhw %%mm3, %%mm6 \n\t"\ - "packuswb %%mm5, %%mm0 \n\t"\ - "packuswb %%mm6, %%mm1 \n\t"\ - "movq %%mm0, " #dst1 " \n\t"\ - "movq %%mm1, " #dst2 " \n\t"\ - -#endif //TEMPLATE_PP_MMXEXT #define SCALED_CPY(src1, src2, dst1, dst2)\ REAL_SCALED_CPY(src1, src2, dst1, dst2) @@ -3170,7 +2726,7 @@ SCALED_CPY((%%FF_REGa, %4), (%%FF_REGa, %4, 2), (%%FF_REGd, %5), (%%FF_REGd, %5, : "%"FF_REG_d ); #else //TEMPLATE_PP_MMX && HAVE_6REGS - for(i=0; i<8; i++) + for (int i = 0; i < 8; i++) memcpy( &(dst[dstStride*i]), &(src[srcStride*i]), BLOCK_SIZE); #endif //TEMPLATE_PP_MMX && HAVE_6REGS @@ -3203,7 +2759,7 @@ SIMPLE_CPY((%%FF_REGa, %2), (%%FF_REGa, %2, 2), (%%FF_REGd, %3), (%%FF_REGd, %3, : "%"FF_REG_a, "%"FF_REG_d ); #else //TEMPLATE_PP_MMX && HAVE_6REGS - for(i=0; i<8; i++) + for (int i = 0; i < 8; i++) memcpy( &(dst[dstStride*i]), &(src[srcStride*i]), BLOCK_SIZE); #endif //TEMPLATE_PP_MMX && HAVE_6REGS @@ -3696,9 +3252,7 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ } } } -#if TEMPLATE_PP_3DNOW - __asm__ volatile("femms"); -#elif TEMPLATE_PP_MMX +#if TEMPLATE_PP_MMX __asm__ volatile("emms"); #endif @@ -3734,5 +3288,4 @@ static void RENAME(postProcess)(const uint8_t src[], int srcStride, uint8_t dst[ #undef TEMPLATE_PP_ALTIVEC #undef TEMPLATE_PP_MMX #undef TEMPLATE_PP_MMXEXT -#undef TEMPLATE_PP_3DNOW #undef TEMPLATE_PP_SSE2