swscale: Do not loose precission on yuv values after rgb->yuv.

Signed-off-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2011-05-26 15:32:33 +02:00
parent 5655469ee7
commit 2b6bfff2b2
17 changed files with 128 additions and 123 deletions

View File

@ -1045,7 +1045,7 @@ static void fillPlane(uint8_t* plane, int stride, int width, int height, int y,
}
}
static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, long width,
static inline void rgb48ToY(int16_t *dst, const uint8_t *src, long width,
uint32_t *unused)
{
int i;
@ -1054,11 +1054,11 @@ static inline void rgb48ToY(uint8_t *dst, const uint8_t *src, long width,
int g = src[i*6+2];
int b = src[i*6+4];
dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
}
}
static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV,
static inline void rgb48ToUV(int16_t *dstU, int16_t *dstV,
const uint8_t *src1, const uint8_t *src2,
long width, uint32_t *unused)
{
@ -1069,12 +1069,12 @@ static inline void rgb48ToUV(uint8_t *dstU, uint8_t *dstV,
int g = src1[6*i + 2];
int b = src1[6*i + 4];
dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
}
}
static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
static inline void rgb48ToUV_half(int16_t *dstU, int16_t *dstV,
const uint8_t *src1, const uint8_t *src2,
long width, uint32_t *unused)
{
@ -1085,12 +1085,12 @@ static inline void rgb48ToUV_half(uint8_t *dstU, uint8_t *dstV,
int g= src1[12*i + 2] + src1[12*i + 8];
int b= src1[12*i + 4] + src1[12*i + 10];
dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
}
}
static inline void bgr48ToY(uint8_t *dst, const uint8_t *src, long width,
static inline void bgr48ToY(int16_t *dst, const uint8_t *src, long width,
uint32_t *unused)
{
int i;
@ -1099,11 +1099,11 @@ static inline void bgr48ToY(uint8_t *dst, const uint8_t *src, long width,
int g = src[i*6+2];
int r = src[i*6+4];
dst[i] = (RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
dst[i] = (RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
}
}
static inline void bgr48ToUV(uint8_t *dstU, uint8_t *dstV,
static inline void bgr48ToUV(int16_t *dstU, int16_t *dstV,
const uint8_t *src1, const uint8_t *src2,
long width, uint32_t *unused)
{
@ -1113,12 +1113,12 @@ static inline void bgr48ToUV(uint8_t *dstU, uint8_t *dstV,
int g = src1[6*i + 2];
int r = src1[6*i + 4];
dstU[i] = (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
dstV[i] = (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1))) >> RGB2YUV_SHIFT;
dstU[i] = (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
dstV[i] = (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7))) >> (RGB2YUV_SHIFT-6);
}
}
static inline void bgr48ToUV_half(uint8_t *dstU, uint8_t *dstV,
static inline void bgr48ToUV_half(int16_t *dstU, int16_t *dstV,
const uint8_t *src1, const uint8_t *src2,
long width, uint32_t *unused)
{
@ -1128,13 +1128,13 @@ static inline void bgr48ToUV_half(uint8_t *dstU, uint8_t *dstV,
int g= src1[12*i + 2] + src1[12*i + 8];
int r= src1[12*i + 4] + src1[12*i + 10];
dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT)) >> (RGB2YUV_SHIFT+1);
dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT)) + (1<<(RGB2YUV_SHIFT-6))) >> (RGB2YUV_SHIFT-5);
}
}
#define BGR2Y(type, name, shr, shg, shb, maskr, maskg, maskb, RY, GY, BY, S)\
static inline void name(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)\
static inline void name(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)\
{\
int i;\
for (i=0; i<width; i++) {\
@ -1142,7 +1142,7 @@ static inline void name(uint8_t *dst, const uint8_t *src, long width, uint32_t *
int g= (((const type*)src)[i]>>shg)&maskg;\
int r= (((const type*)src)[i]>>shr)&maskr;\
\
dst[i]= (((RY)*r + (GY)*g + (BY)*b + (33<<((S)-1)))>>(S));\
dst[i]= (((RY)*r + (GY)*g + (BY)*b + (32<<((S)-1)) + (1<<(S-7)))>>((S)-6));\
}\
}
@ -1155,16 +1155,16 @@ BGR2Y(uint16_t, bgr15ToY, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RY<<10, GY<<5, BY
BGR2Y(uint16_t, rgb16ToY, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RY , GY<<5, BY<<11, RGB2YUV_SHIFT+8)
BGR2Y(uint16_t, rgb15ToY, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RY , GY<<5, BY<<10, RGB2YUV_SHIFT+7)
static inline void abgrToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
static inline void abgrToA(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
int i;
for (i=0; i<width; i++) {
dst[i]= src[4*i];
dst[i]= src[4*i]<<6;
}
}
#define BGR2UV(type, name, shr, shg, shb, shp, maskr, maskg, maskb, RU, GU, BU, RV, GV, BV, S) \
static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
static inline void name(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
{\
int i;\
for (i=0; i<width; i++) {\
@ -1172,11 +1172,11 @@ static inline void name(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const
int g= ((((const type*)src)[i]>>shp)&maskg)>>shg;\
int r= ((((const type*)src)[i]>>shp)&maskr)>>shr;\
\
dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<((S)-1)))>>(S);\
dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<((S)-1)))>>(S);\
dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (256<<((S)-1)) + (1<<(S-7)))>>((S)-6);\
dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (256<<((S)-1)) + (1<<(S-7)))>>((S)-6);\
}\
}\
static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
static inline void name ## _half(int16_t *dstU, int16_t *dstV, const uint8_t *src, const uint8_t *dummy, long width, uint32_t *unused)\
{\
int i;\
for (i=0; i<width; i++) {\
@ -1189,8 +1189,8 @@ static inline void name ## _half(uint8_t *dstU, uint8_t *dstV, const uint8_t *sr
\
g>>=shg;\
\
dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (257<<(S)))>>((S)+1);\
dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (257<<(S)))>>((S)+1);\
dstU[i]= ((RU)*r + (GU)*g + (BU)*b + (256U<<(S)) + (1<<(S-6)))>>((S)-6+1);\
dstV[i]= ((RV)*r + (GV)*g + (BV)*b + (256U<<(S)) + (1<<(S-6)))>>((S)-6+1);\
}\
}
@ -1203,27 +1203,27 @@ BGR2UV(uint16_t, bgr15ToUV, 0, 0, 0, 0, 0x001F, 0x03E0, 0x7C00, RU<<10, GU<<
BGR2UV(uint16_t, rgb16ToUV, 0, 0, 0, 0, 0xF800, 0x07E0, 0x001F, RU , GU<<5, BU<<11, RV , GV<<5, BV<<11, RGB2YUV_SHIFT+8)
BGR2UV(uint16_t, rgb15ToUV, 0, 0, 0, 0, 0x7C00, 0x03E0, 0x001F, RU , GU<<5, BU<<10, RV , GV<<5, BV<<10, RGB2YUV_SHIFT+7)
static inline void palToA(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal)
static inline void palToA(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
{
int i;
for (i=0; i<width; i++) {
int d= src[i];
dst[i]= pal[d] >> 24;
dst[i]= (pal[d] >> 24)<<6;
}
}
static inline void palToY(uint8_t *dst, const uint8_t *src, long width, uint32_t *pal)
static inline void palToY(int16_t *dst, const uint8_t *src, long width, uint32_t *pal)
{
int i;
for (i=0; i<width; i++) {
int d= src[i];
dst[i]= pal[d] & 0xFF;
dst[i]= (pal[d] & 0xFF)<<6;
}
}
static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
static inline void palToUV(uint16_t *dstU, int16_t *dstV,
const uint8_t *src1, const uint8_t *src2,
long width, uint32_t *pal)
{
@ -1232,28 +1232,28 @@ static inline void palToUV(uint8_t *dstU, uint8_t *dstV,
for (i=0; i<width; i++) {
int p= pal[src1[i]];
dstU[i]= p>>8;
dstV[i]= p>>16;
dstU[i]= (uint8_t)(p>> 8)<<6;
dstV[i]= (uint8_t)(p>>16)<<6;
}
}
static inline void monowhite2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
static inline void monowhite2Y(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
int i, j;
for (i=0; i<width/8; i++) {
int d= ~src[i];
for(j=0; j<8; j++)
dst[8*i+j]= ((d>>(7-j))&1)*255;
dst[8*i+j]= ((d>>(7-j))&1)*16383;
}
}
static inline void monoblack2Y(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
static inline void monoblack2Y(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
int i, j;
for (i=0; i<width/8; i++) {
int d= src[i];
for(j=0; j<8; j++)
dst[8*i+j]= ((d>>(7-j))&1)*255;
dst[8*i+j]= ((d>>(7-j))&1)*16383;
}
}

View File

@ -254,7 +254,7 @@ static inline void nv21ToUV_c(uint8_t *dstU, uint8_t *dstV,
nvXXtoUV_c(dstV, dstU, src1, width);
}
static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
static inline void bgr24ToY_c(int16_t *dst, const uint8_t *src,
long width, uint32_t *unused)
{
int i;
@ -263,11 +263,11 @@ static inline void bgr24ToY_c(uint8_t *dst, const uint8_t *src,
int g= src[i*3+1];
int r= src[i*3+2];
dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
}
}
static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
static inline void bgr24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
const uint8_t *src2, long width, uint32_t *unused)
{
int i;
@ -276,13 +276,13 @@ static inline void bgr24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1
int g= src1[3*i + 1];
int r= src1[3*i + 2];
dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
}
assert(src1 == src2);
}
static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
static inline void bgr24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
const uint8_t *src2, long width, uint32_t *unused)
{
int i;
@ -291,13 +291,13 @@ static inline void bgr24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t
int g= src1[6*i + 1] + src1[6*i + 4];
int r= src1[6*i + 2] + src1[6*i + 5];
dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
}
assert(src1 == src2);
}
static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, long width,
static inline void rgb24ToY_c(int16_t *dst, const uint8_t *src, long width,
uint32_t *unused)
{
int i;
@ -306,11 +306,11 @@ static inline void rgb24ToY_c(uint8_t *dst, const uint8_t *src, long width,
int g= src[i*3+1];
int b= src[i*3+2];
dst[i]= ((RY*r + GY*g + BY*b + (33<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT);
dst[i]= ((RY*r + GY*g + BY*b + (32<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6));
}
}
static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
static inline void rgb24ToUV_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
const uint8_t *src2, long width, uint32_t *unused)
{
int i;
@ -320,12 +320,12 @@ static inline void rgb24ToUV_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1
int g= src1[3*i + 1];
int b= src1[3*i + 2];
dstU[i]= (RU*r + GU*g + BU*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
dstV[i]= (RV*r + GV*g + BV*b + (257<<(RGB2YUV_SHIFT-1)))>>RGB2YUV_SHIFT;
dstU[i]= (RU*r + GU*g + BU*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
dstV[i]= (RV*r + GV*g + BV*b + (256<<(RGB2YUV_SHIFT-1)) + (1<<(RGB2YUV_SHIFT-7)))>>(RGB2YUV_SHIFT-6);
}
}
static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1,
static inline void rgb24ToUV_half_c(int16_t *dstU, int16_t *dstV, const uint8_t *src1,
const uint8_t *src2, long width, uint32_t *unused)
{
int i;
@ -335,8 +335,8 @@ static inline void rgb24ToUV_half_c(uint8_t *dstU, uint8_t *dstV, const uint8_t
int g= src1[6*i + 1] + src1[6*i + 4];
int b= src1[6*i + 2] + src1[6*i + 5];
dstU[i]= (RU*r + GU*g + BU*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
dstV[i]= (RV*r + GV*g + BV*b + (257<<RGB2YUV_SHIFT))>>(RGB2YUV_SHIFT+1);
dstU[i]= (RU*r + GU*g + BU*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
dstV[i]= (RV*r + GV*g + BV*b + (256<<RGB2YUV_SHIFT) + (1<<(RGB2YUV_SHIFT-6)))>>(RGB2YUV_SHIFT-5);
}
}
@ -455,7 +455,8 @@ static inline void hyscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
}
if (c->hScale16) {
c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
c->hScale16(dst, dstWidth, (uint16_t*)src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize, shift);
} else if (!c->hyscale_fast) {
c->hScale(dst, dstWidth, src, srcW, xInc, hLumFilter, hLumFilterPos, hLumFilterSize);
} else { // fast bilinear upscale / crap downscale
@ -502,8 +503,9 @@ inline static void hcscale_c(SwsContext *c, uint16_t *dst, long dstWidth,
}
if (c->hScale16) {
c->hScale16(dst , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1);
int shift= isAnyRGB(c->srcFormat) || c->srcFormat==PIX_FMT_PAL8 ? 13 : av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1;
c->hScale16(dst , dstWidth, (uint16_t*)src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
c->hScale16(dst+VOFW, dstWidth, (uint16_t*)src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize, shift);
} else if (!c->hcscale_fast) {
c->hScale(dst , dstWidth, src1, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
c->hScale(dst+VOFW, dstWidth, src2, srcW, xInc, hChrFilter, hChrFilterPos, hChrFilterSize);
@ -959,6 +961,9 @@ static void sws_init_swScale_c(SwsContext *c)
}
}
if(isAnyRGB(c->srcFormat) || c->srcFormat == PIX_FMT_PAL8)
c->hScale16= hScale16_c;
switch (srcFormat) {
case PIX_FMT_GRAY8A :
c->alpSrcOffset = 1;

View File

@ -860,7 +860,7 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
if (flags&SWS_PRINT_INFO)
av_log(c, AV_LOG_INFO, "output width is not a multiple of 32 -> no MMX2 scaler\n");
}
if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat)) c->canMMX2BeUsed=0;
if (usesHFilter || isNBPS(c->srcFormat) || is16BPS(c->srcFormat) || isAnyRGB(c->srcFormat)) c->canMMX2BeUsed=0;
}
else
c->canMMX2BeUsed=0;

View File

@ -1615,7 +1615,7 @@ static inline void RENAME(nv21ToUV)(uint8_t *dstU, uint8_t *dstV,
RENAME(nvXXtoUV)(dstV, dstU, src1, width);
}
static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
static inline void RENAME(bgr24ToY_mmx)(int16_t *dst, const uint8_t *src, long width, enum PixelFormat srcFormat)
{
if(srcFormat == PIX_FMT_BGR24) {
@ -1655,20 +1655,19 @@ static inline void RENAME(bgr24ToY_mmx)(uint8_t *dst, const uint8_t *src, long w
"paddd %%mm3, %%mm2 \n\t"
"paddd %%mm4, %%mm0 \n\t"
"paddd %%mm4, %%mm2 \n\t"
"psrad $15, %%mm0 \n\t"
"psrad $15, %%mm2 \n\t"
"psrad $9, %%mm0 \n\t"
"psrad $9, %%mm2 \n\t"
"packssdw %%mm2, %%mm0 \n\t"
"packuswb %%mm0, %%mm0 \n\t"
"movd %%mm0, (%1, %%"REG_a") \n\t"
"add $4, %%"REG_a" \n\t"
"movq %%mm0, (%1, %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
: "+r" (src)
: "r" (dst+width), "g" ((x86_reg)-width)
: "r" (dst+width), "g" ((x86_reg)-2*width)
: "%"REG_a
);
}
static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
static inline void RENAME(bgr24ToUV_mmx)(int16_t *dstU, int16_t *dstV, const uint8_t *src, long width, enum PixelFormat srcFormat)
{
__asm__ volatile(
"movq 24(%4), %%mm6 \n\t"
@ -1708,41 +1707,39 @@ static inline void RENAME(bgr24ToUV_mmx)(uint8_t *dstU, uint8_t *dstV, const uin
"paddd %%mm3, %%mm2 \n\t"
"paddd %%mm3, %%mm1 \n\t"
"paddd %%mm3, %%mm4 \n\t"
"psrad $15, %%mm0 \n\t"
"psrad $15, %%mm2 \n\t"
"psrad $15, %%mm1 \n\t"
"psrad $15, %%mm4 \n\t"
"psrad $9, %%mm0 \n\t"
"psrad $9, %%mm2 \n\t"
"psrad $9, %%mm1 \n\t"
"psrad $9, %%mm4 \n\t"
"packssdw %%mm1, %%mm0 \n\t"
"packssdw %%mm4, %%mm2 \n\t"
"packuswb %%mm0, %%mm0 \n\t"
"packuswb %%mm2, %%mm2 \n\t"
"movd %%mm0, (%1, %%"REG_a") \n\t"
"movd %%mm2, (%2, %%"REG_a") \n\t"
"add $4, %%"REG_a" \n\t"
"movq %%mm0, (%1, %%"REG_a") \n\t"
"movq %%mm2, (%2, %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
: "+r" (src)
: "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
: "r" (dstU+width), "r" (dstV+width), "g" ((x86_reg)-2*width), "r"(ff_bgr24toUV[srcFormat == PIX_FMT_RGB24])
: "%"REG_a
);
}
static inline void RENAME(bgr24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
static inline void RENAME(bgr24ToY)(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_BGR24);
}
static inline void RENAME(bgr24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
static inline void RENAME(bgr24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_BGR24);
assert(src1 == src2);
}
static inline void RENAME(rgb24ToY)(uint8_t *dst, const uint8_t *src, long width, uint32_t *unused)
static inline void RENAME(rgb24ToY)(int16_t *dst, const uint8_t *src, long width, uint32_t *unused)
{
RENAME(bgr24ToY_mmx)(dst, src, width, PIX_FMT_RGB24);
}
static inline void RENAME(rgb24ToUV)(uint8_t *dstU, uint8_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
static inline void RENAME(rgb24ToUV)(int16_t *dstU, int16_t *dstV, const uint8_t *src1, const uint8_t *src2, long width, uint32_t *unused)
{
assert(src1==src2);
RENAME(bgr24ToUV_mmx)(dstU, dstV, src1, width, PIX_FMT_RGB24);
@ -2323,7 +2320,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
case PIX_FMT_YUV420P16LE:
case PIX_FMT_YUV422P16LE:
case PIX_FMT_YUV444P16LE: c->hScale16= RENAME(hScale16); break;
}
}
if (!c->chrSrcHSubSample) {
switch(srcFormat) {
case PIX_FMT_BGR24 : c->chrToYV12 = RENAME(bgr24ToUV); break;
@ -2348,4 +2345,7 @@ static void RENAME(sws_init_swScale)(SwsContext *c)
default: break;
}
}
if(isAnyRGB(c->srcFormat))
c->hScale16= RENAME(hScale16);
}

View File

@ -67,13 +67,13 @@ DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY1Coeff) = 0x0C88000040870C88ULL;
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toY2Coeff) = 0x20DE4087000020DEULL;
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY1Coeff) = 0x20DE0000408720DEULL;
DECLARE_ASM_CONST(8, uint64_t, ff_rgb24toY2Coeff) = 0x0C88408700000C88ULL;
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008400000084000ULL;
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toYOffset) = 0x0008010000080100ULL;
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUV)[2][4] = {
{0x38380000DAC83838ULL, 0xECFFDAC80000ECFFULL, 0xF6E40000D0E3F6E4ULL, 0x3838D0E300003838ULL},
{0xECFF0000DAC8ECFFULL, 0x3838DAC800003838ULL, 0x38380000D0E33838ULL, 0xF6E4D0E30000F6E4ULL},
};
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040400000404000ULL;
DECLARE_ASM_CONST(8, uint64_t, ff_bgr24toUVOffset)= 0x0040010000400100ULL;
#endif /* SWSCALE_X86_SWSCALE_TEMPLATE_H */

View File

@ -16,15 +16,15 @@ e176bd14185788110e055f945de7f95f *./tests/data/pixfmt/yuvj420p.yuv
304128 ./tests/data/pixfmt/yuvj422p.yuv
c10442da177c9f1d12be3c53be6fa12c *./tests/data/pixfmt/yuvj444p.yuv
304128 ./tests/data/pixfmt/yuvj444p.yuv
c6e0f9b5817f484b175c1ec4ffb4e9c9 *./tests/data/pixfmt/rgb24.yuv
6bb61113e7b70eb09dbcec356122a0e2 *./tests/data/pixfmt/rgb24.yuv
304128 ./tests/data/pixfmt/rgb24.yuv
c6e0f9b5817f484b175c1ec4ffb4e9c9 *./tests/data/pixfmt/bgr24.yuv
6bb61113e7b70eb09dbcec356122a0e2 *./tests/data/pixfmt/bgr24.yuv
304128 ./tests/data/pixfmt/bgr24.yuv
c6e0f9b5817f484b175c1ec4ffb4e9c9 *./tests/data/pixfmt/rgb32.yuv
6bb61113e7b70eb09dbcec356122a0e2 *./tests/data/pixfmt/rgb32.yuv
304128 ./tests/data/pixfmt/rgb32.yuv
66d39d464bd89ded2a124897f0a75ade *./tests/data/pixfmt/rgb565.yuv
efa7c0337cc00c796c6df615223716f1 *./tests/data/pixfmt/rgb565.yuv
304128 ./tests/data/pixfmt/rgb565.yuv
c894c3bd8d2631ed1964500b90a0c350 *./tests/data/pixfmt/rgb555.yuv
0df2a477af1415a1b8fbf2a3e552bc39 *./tests/data/pixfmt/rgb555.yuv
304128 ./tests/data/pixfmt/rgb555.yuv
6be306b0cce5f8e6c271ea17fef9745b *./tests/data/pixfmt/gray.yuv
304128 ./tests/data/pixfmt/gray.yuv

View File

@ -1,28 +1,28 @@
abgr d894cb97f6c80eb21bdbe8a4eea62d86
argb 54346f2b2eef10919e0f247241df3b24
bgr24 570f8d6b51a838aed022ef67535f6bdc
bgr48be fcc0f2dbf45d325f84f816c74cbeeebe
bgr48le 3f9c2b23eed3b8d196d1c14b38ce50f5
abgr cff82561a074874027ac1cc896fd2730
argb 756dd1eaa5baca2238ce23dbdc452684
bgr24 e44192347a45586c6c157e3059610cd1
bgr48be 62e6043fbe9734e63ad679999ca8011c
bgr48le 61237dad4fa5f3e9109db85f53cd25d9
bgr4_byte ee1d35a7baf8e9016891929a2f565c0b
bgr555le 36b745067197f9ca8c1731cac51329c9
bgr565le 3a514a298c6161a071ddf9963c06509d
bgr555le 41e3e0961478dc634bf68a7bbd670cc9
bgr565le 614897eaeb422bd9a972f8ee51909be5
bgr8 7f007fa6c153a16e808a9c51605a4016
bgra a5e7040f9a80cccd65e5acf2ca09ace5
bgra 01cfdda1f72fcabb6c46424e27f8c519
gray d7786a7d9d99ac74230cc045cab5632c
gray16be 5ba22d4802b40ec27e62abb22ad1d1cc
gray16le 2d5e83aa875a4c3baa6fecf55e3223bf
monob 88c4c050758e64d120f50c7eff694381
monow d31772ebaa877fc2a78565937f7f9673
monob cb62f31b701c6e987b574974d1b31e32
monow fd5d417ab7728acddffc06870661df61
nv12 4676d59db43d657dc12841f6bc3ab452
nv21 69c699510ff1fb777b118ebee1002f14
rgb24 514692e28e8ff6860e415ce4fcf6eb8c
rgb48be 1894cd30dabcd3180518e4d5f09f25e7
rgb48le 1354e6e27ce3c1d4d4989ee56030c94b
rgb24 13ff53ebeab74dc05492836f1cfbd2c1
rgb48be 7f6b1f8139c6a64eadf9dfa867ac20e8
rgb48le 2756d8710c152cbc367656de4d0f1b76
rgb4_byte d81ffd3add95842a618eec81024f0b5c
rgb555le a350ef1dc2c9688ed49e7ba018843795
rgb565le 6f4bb711238baa762d73305213f8d035
rgb555le bd698d86c03170c4a16607c0fd1f750f
rgb565le bfa0c639d80c3c03fd0c9e5f34296a5e
rgb8 091d0170b354ef0e97312b95feb5483f
rgba a3d362f222098a00e63867f612018659
rgba 16873e3ac914e76116629a5ff8940ac4
uyvy422 314bd486277111a95d9369b944fa0400
yuv410p 7df8f6d69b56a8dcb6c7ee908e5018b5
yuv411p 1143e7c5cc28fe0922b051b17733bc4c

View File

@ -1,4 +1,4 @@
97894502b4cb57aca1105b6333f72dae *./tests/data/vsynth1/flashsv.flv
14681925 ./tests/data/vsynth1/flashsv.flv
947cb24ec45a453348ae6fe3fa278071 *./tests/data/flashsv.vsynth1.out.yuv
stddev: 2.85 PSNR: 39.03 MAXDIFF: 49 bytes: 7603200/ 7603200
791e1fb999deb2e4156e2286d48c4ed1 *./tests/data/flashsv.vsynth1.out.yuv
stddev: 2.84 PSNR: 39.04 MAXDIFF: 49 bytes: 7603200/ 7603200

View File

@ -1,4 +1,4 @@
519e26bb1ac0f3db8f90b36537f2f760 *./tests/data/vsynth1/jpegls.avi
9089812 ./tests/data/vsynth1/jpegls.avi
947cb24ec45a453348ae6fe3fa278071 *./tests/data/jpegls.vsynth1.out.yuv
stddev: 2.85 PSNR: 39.03 MAXDIFF: 49 bytes: 7603200/ 7603200
791e1fb999deb2e4156e2286d48c4ed1 *./tests/data/jpegls.vsynth1.out.yuv
stddev: 2.84 PSNR: 39.04 MAXDIFF: 49 bytes: 7603200/ 7603200

View File

@ -1,4 +1,4 @@
267a152a73cbc5ac4694a6e3b254be34 *./tests/data/vsynth1/msvideo1.avi
2162264 ./tests/data/vsynth1/msvideo1.avi
cf15dd12b8347567ae350383bf4ef4bb *./tests/data/msvideo1.vsynth1.out.yuv
stddev: 11.81 PSNR: 26.68 MAXDIFF: 151 bytes: 7603200/ 7603200
c0665fac1bd896b6fe7fe0eead805bd5 *./tests/data/msvideo1.vsynth1.out.yuv
stddev: 11.80 PSNR: 26.69 MAXDIFF: 151 bytes: 7603200/ 7603200

View File

@ -1,4 +1,4 @@
d14041925ce5ec5001dc519276b1a1ab *./tests/data/vsynth1/qtrle.mov
15263232 ./tests/data/vsynth1/qtrle.mov
243325fb2cae1a9245efd49aff936327 *./tests/data/qtrle.vsynth1.out.yuv
stddev: 3.42 PSNR: 37.43 MAXDIFF: 48 bytes: 7603200/ 7603200
93695a27c24a61105076ca7b1f010bbd *./tests/data/qtrle.vsynth1.out.yuv
stddev: 3.42 PSNR: 37.44 MAXDIFF: 48 bytes: 7603200/ 7603200

View File

@ -1,4 +1,4 @@
05f0719cb52486d9a4beb9cfae3f2571 *./tests/data/vsynth1/rgb.avi
15213260 ./tests/data/vsynth1/rgb.avi
243325fb2cae1a9245efd49aff936327 *./tests/data/rgb.vsynth1.out.yuv
stddev: 3.42 PSNR: 37.43 MAXDIFF: 48 bytes: 7603200/ 7603200
93695a27c24a61105076ca7b1f010bbd *./tests/data/rgb.vsynth1.out.yuv
stddev: 3.42 PSNR: 37.44 MAXDIFF: 48 bytes: 7603200/ 7603200

View File

@ -1,4 +1,4 @@
0667077971e0cb63b5f49c580006e90e *./tests/data/vsynth2/flashsv.flv
12368953 ./tests/data/vsynth2/flashsv.flv
592b3321994e26a990deb3a0a1415de9 *./tests/data/flashsv.vsynth2.out.yuv
stddev: 0.65 PSNR: 51.84 MAXDIFF: 14 bytes: 7603200/ 7603200
3a984506f1ebfc9fb73b6814cab201cc *./tests/data/flashsv.vsynth2.out.yuv
stddev: 0.66 PSNR: 51.73 MAXDIFF: 14 bytes: 7603200/ 7603200

View File

@ -1,4 +1,4 @@
4fc53937f048c900ae6d50fda9dba206 *./tests/data/vsynth2/jpegls.avi
8334630 ./tests/data/vsynth2/jpegls.avi
592b3321994e26a990deb3a0a1415de9 *./tests/data/jpegls.vsynth2.out.yuv
stddev: 0.65 PSNR: 51.84 MAXDIFF: 14 bytes: 7603200/ 7603200
3a984506f1ebfc9fb73b6814cab201cc *./tests/data/jpegls.vsynth2.out.yuv
stddev: 0.66 PSNR: 51.73 MAXDIFF: 14 bytes: 7603200/ 7603200

View File

@ -1,4 +1,4 @@
5dddbbd6616d9be4bc0fd0c9650bd9e3 *./tests/data/vsynth2/msvideo1.avi
1259308 ./tests/data/vsynth2/msvideo1.avi
e2e7a952135f6307a74f2e178dc0df20 *./tests/data/msvideo1.vsynth2.out.yuv
stddev: 7.42 PSNR: 30.71 MAXDIFF: 123 bytes: 7603200/ 7603200
cd83ffcbc73573044e3aead3094229e5 *./tests/data/msvideo1.vsynth2.out.yuv
stddev: 7.42 PSNR: 30.72 MAXDIFF: 123 bytes: 7603200/ 7603200

View File

@ -1,4 +1,4 @@
d8c1604dc46d9aa4ec0385e6722c6989 *./tests/data/vsynth2/qtrle.mov
14798419 ./tests/data/vsynth2/qtrle.mov
b2418e0e3a9a8619b31219cbcf24dc82 *./tests/data/qtrle.vsynth2.out.yuv
stddev: 1.26 PSNR: 46.06 MAXDIFF: 13 bytes: 7603200/ 7603200
98d0e2854731472c5bf13d8638502d0a *./tests/data/qtrle.vsynth2.out.yuv
stddev: 1.26 PSNR: 46.10 MAXDIFF: 13 bytes: 7603200/ 7603200

View File

@ -1,4 +1,4 @@
f2e9c419023c743bf99aa5b2e55ad233 *./tests/data/vsynth2/rgb.avi
15213260 ./tests/data/vsynth2/rgb.avi
b2418e0e3a9a8619b31219cbcf24dc82 *./tests/data/rgb.vsynth2.out.yuv
stddev: 1.26 PSNR: 46.06 MAXDIFF: 13 bytes: 7603200/ 7603200
98d0e2854731472c5bf13d8638502d0a *./tests/data/rgb.vsynth2.out.yuv
stddev: 1.26 PSNR: 46.10 MAXDIFF: 13 bytes: 7603200/ 7603200