From b38d487466e68bd6baf2889017d2a751831560f0 Mon Sep 17 00:00:00 2001 From: Ivo van Poorten Date: Mon, 16 Apr 2007 21:41:03 +0000 Subject: [PATCH] New implementation of rgb32tobgr32 The previous implementation segfaulted with MMX enabled when fed an image smaller than the size of the units the MMX code processed. The new code: - is faster for MMX, MMX2 and plain C - processes small images correctly - is LGPL Originally committed as revision 23009 to svn://svn.mplayerhq.hu/mplayer/trunk/libswscale --- libswscale/rgb2rgb_template.c | 97 ++++++++++++++++++++--------------- 1 file changed, 57 insertions(+), 40 deletions(-) diff --git a/libswscale/rgb2rgb_template.c b/libswscale/rgb2rgb_template.c index 6489a4db91..7147855fed 100644 --- a/libswscale/rgb2rgb_template.c +++ b/libswscale/rgb2rgb_template.c @@ -1364,49 +1364,66 @@ static inline void RENAME(rgb16to32)(const uint8_t *src, uint8_t *dst, long src_ static inline void RENAME(rgb32tobgr32)(const uint8_t *src, uint8_t *dst, long src_size) { + uint8_t *d = dst, *s = (uint8_t *) src; + const uint8_t *end = s + src_size; #ifdef HAVE_MMX -/* TODO: unroll this loop */ - asm volatile ( - "xor %%"REG_a", %%"REG_a" \n\t" - ASMALIGN(4) - "1: \n\t" - PREFETCH" 32(%0, %%"REG_a") \n\t" - "movq (%0, %%"REG_a"), %%mm0 \n\t" - "movq %%mm0, %%mm1 \n\t" - "movq %%mm0, %%mm2 \n\t" - "pslld $16, %%mm0 \n\t" - "psrld $16, %%mm1 \n\t" - "pand "MANGLE(mask32r)", %%mm0 \n\t" - "pand "MANGLE(mask32g)", %%mm2 \n\t" - "pand "MANGLE(mask32b)", %%mm1 \n\t" - "por %%mm0, %%mm2 \n\t" - "por %%mm1, %%mm2 \n\t" - MOVNTQ" %%mm2, (%1, %%"REG_a") \n\t" - "add $8, %%"REG_a" \n\t" - "cmp %2, %%"REG_a" \n\t" - " jb 1b \n\t" - :: "r" (src), "r"(dst), "r" (src_size-7) - : "%"REG_a - ); - - __asm __volatile(SFENCE:::"memory"); - __asm __volatile(EMMS:::"memory"); -#else - unsigned i; - unsigned num_pixels = src_size >> 2; - for(i=0; i>16) + g + (v<<16); } -#endif } static inline void RENAME(rgb24tobgr24)(const uint8_t *src, uint8_t *dst, long src_size)