Merge branch 'ronalds_buggy_gt8bit_sws'

* ronalds_buggy_gt8bit_sws:
  swscale: fix another yuv range conversion overflow in 16bit scaling. (cherry picked from commit 81cc7d0bd1)
  swscale: fix yuv range correction when using 16-bit scaling. (cherry picked from commit e0b8fff6c7)
  swscale: implement >8bit scaling support.

Conflicts:
	libswscale/swscale.c
	libswscale/utils.c
	libswscale/x86/swscale_template.c
	tests/ref/lavfi/pixdesc
	tests/ref/lavfi/pixfmts_copy
	tests/ref/lavfi/pixfmts_null
	tests/ref/lavfi/pixfmts_scale
	tests/ref/lavfi/pixfmts_vflip

Merged-by: Michael Niedermayer <michaelni@gmx.at>
This commit is contained in:
Michael Niedermayer 2011-07-01 05:28:13 +02:00
commit f2db5602ba
5 changed files with 476 additions and 286 deletions

View File

@ -406,7 +406,9 @@ void ff_sws_init_swScale_altivec(SwsContext *c)
if (!(av_get_cpu_flags() & AV_CPU_FLAG_ALTIVEC))
return;
c->hScale = hScale_altivec_real;
if (c->scalingBpp == 8) {
c->hScale = hScale_altivec_real;
}
if (!is16BPS(dstFormat) && !is9_OR_10BPS(dstFormat) &&
dstFormat != PIX_FMT_NV12 && dstFormat != PIX_FMT_NV21 &&
!c->alpPixBuf) {

File diff suppressed because it is too large Load Diff

View File

@ -208,6 +208,7 @@ typedef struct SwsContext {
enum PixelFormat srcFormat; ///< Source pixel format.
int dstFormatBpp; ///< Number of bits per pixel of the destination pixel format.
int srcFormatBpp; ///< Number of bits per pixel of the source pixel format.
int scalingBpp;
int chrSrcHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in source image.
int chrSrcVSubSample; ///< Binary logarithm of vertical subsampling factor between luma/alpha and chroma planes in source image.
int chrDstHSubSample; ///< Binary logarithm of horizontal subsampling factor between luma/alpha and chroma planes in destination image.
@ -457,6 +458,15 @@ typedef struct SwsContext {
void (*lumConvertRange)(int16_t *dst, int width); ///< Color range conversion function for luma plane if needed.
void (*chrConvertRange)(int16_t *dst1, int16_t *dst2, int width); ///< Color range conversion function for chroma planes if needed.
/**
* dst[..] = (src[..] << 8) | src[..];
*/
void (*scale8To16Rv)(uint16_t *dst, const uint8_t *src, int len);
/**
* dst[..] = src[..] >> 4;
*/
void (*scale19To15Fw)(int16_t *dst, const int32_t *src, int len);
int needs_hcscale; ///< Set if there are chroma planes to be converted.
} SwsContext;

View File

@ -46,6 +46,7 @@
#include "libavutil/bswap.h"
#include "libavutil/opt.h"
#include "libavutil/pixdesc.h"
#include "libavutil/avassert.h"
unsigned swscale_version(void)
{
@ -882,8 +883,15 @@ int sws_init_context(SwsContext *c, SwsFilter *srcFilter, SwsFilter *dstFilter)
}
}
// FIXME it's even nicer if bpp isn't 16, but max({src,dst}formatbpp)
c->scalingBpp = FFMAX(av_pix_fmt_descriptors[srcFormat].comp[0].depth_minus1,
av_pix_fmt_descriptors[dstFormat].comp[0].depth_minus1) >= 15 ? 16 : 8;
if (c->scalingBpp == 16)
dst_stride <<= 1;
av_assert0(c->scalingBpp<=16);
FF_ALLOC_OR_GOTO(c, c->formatConvBuffer, FFALIGN(srcW*2+78, 16) * 2, fail);
if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2) {
if (HAVE_MMX2 && cpu_flags & AV_CPU_FLAG_MMX2 && c->scalingBpp == 8) {
c->canMMX2BeUsed= (dstW >=srcW && (dstW&31)==0 && (srcW&15)==0) ? 1 : 0;
if (!c->canMMX2BeUsed && dstW >=srcW && (srcW&15)==0 && (flags&SWS_FAST_BILINEAR)) {
if (flags&SWS_PRINT_INFO)

View File

@ -1637,32 +1637,6 @@ static void RENAME(yuy2ToUV)(uint8_t *dstU, uint8_t *dstV,
assert(src1 == src2);
}
static void RENAME(LEToUV)(uint8_t *dstU, uint8_t *dstV,
const uint8_t *src1, const uint8_t *src2,
int width, uint32_t *unused)
{
__asm__ volatile(
"mov %0, %%"REG_a" \n\t"
"1: \n\t"
"movq (%1, %%"REG_a",2), %%mm0 \n\t"
"movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
"movq (%2, %%"REG_a",2), %%mm2 \n\t"
"movq 8(%2, %%"REG_a",2), %%mm3 \n\t"
"psrlw $8, %%mm0 \n\t"
"psrlw $8, %%mm1 \n\t"
"psrlw $8, %%mm2 \n\t"
"psrlw $8, %%mm3 \n\t"
"packuswb %%mm1, %%mm0 \n\t"
"packuswb %%mm3, %%mm2 \n\t"
"movq %%mm0, (%3, %%"REG_a") \n\t"
"movq %%mm2, (%4, %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
: : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
: "%"REG_a
);
}
/* This is almost identical to the previous, end exists only because
* yuy2ToY/UV)(dst, src+1, ...) would have 100% unaligned accesses. */
static void RENAME(uyvyToY)(uint8_t *dst, const uint8_t *src,
@ -1712,33 +1686,6 @@ static void RENAME(uyvyToUV)(uint8_t *dstU, uint8_t *dstV,
assert(src1 == src2);
}
static void RENAME(BEToUV)(uint8_t *dstU, uint8_t *dstV,
const uint8_t *src1, const uint8_t *src2,
int width, uint32_t *unused)
{
__asm__ volatile(
"movq "MANGLE(bm01010101)", %%mm4 \n\t"
"mov %0, %%"REG_a" \n\t"
"1: \n\t"
"movq (%1, %%"REG_a",2), %%mm0 \n\t"
"movq 8(%1, %%"REG_a",2), %%mm1 \n\t"
"movq (%2, %%"REG_a",2), %%mm2 \n\t"
"movq 8(%2, %%"REG_a",2), %%mm3 \n\t"
"pand %%mm4, %%mm0 \n\t"
"pand %%mm4, %%mm1 \n\t"
"pand %%mm4, %%mm2 \n\t"
"pand %%mm4, %%mm3 \n\t"
"packuswb %%mm1, %%mm0 \n\t"
"packuswb %%mm3, %%mm2 \n\t"
"movq %%mm0, (%3, %%"REG_a") \n\t"
"movq %%mm2, (%4, %%"REG_a") \n\t"
"add $8, %%"REG_a" \n\t"
" js 1b \n\t"
: : "g" ((x86_reg)-width), "r" (src1+width*2), "r" (src2+width*2), "r" (dstU+width), "r" (dstV+width)
: "%"REG_a
);
}
static av_always_inline void RENAME(nvXXtoUV)(uint8_t *dst1, uint8_t *dst2,
const uint8_t *src, int width)
{
@ -2433,6 +2380,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
}
}
if (c->scalingBpp == 8) {
#if !COMPILE_TEMPLATE_MMX2
c->hScale = RENAME(hScale );
#endif /* !COMPILE_TEMPLATE_MMX2 */
@ -2450,6 +2398,7 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
#if COMPILE_TEMPLATE_MMX2
}
#endif /* COMPILE_TEMPLATE_MMX2 */
}
#if !COMPILE_TEMPLATE_MMX2
switch(srcFormat) {
@ -2457,13 +2406,10 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
case PIX_FMT_UYVY422 : c->chrToYV12 = RENAME(uyvyToUV); break;
case PIX_FMT_NV12 : c->chrToYV12 = RENAME(nv12ToUV); break;
case PIX_FMT_NV21 : c->chrToYV12 = RENAME(nv21ToUV); break;
case PIX_FMT_GRAY16LE :
case PIX_FMT_YUV420P9LE:
case PIX_FMT_YUV422P10LE:
case PIX_FMT_YUV420P10LE:
case PIX_FMT_YUV420P16LE:
case PIX_FMT_YUV422P16LE:
case PIX_FMT_YUV444P16LE: c->hScale16= RENAME(hScale16); break;
case PIX_FMT_YUV420P10LE: c->hScale16= RENAME(hScale16); break;
default: break;
}
#endif /* !COMPILE_TEMPLATE_MMX2 */
if (!c->chrSrcHSubSample) {
@ -2477,10 +2423,8 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
switch (srcFormat) {
#if !COMPILE_TEMPLATE_MMX2
case PIX_FMT_YUYV422 :
case PIX_FMT_Y400A :
c->lumToYV12 = RENAME(yuy2ToY); break;
case PIX_FMT_UYVY422 :
c->lumToYV12 = RENAME(uyvyToY); break;
case PIX_FMT_Y400A : c->lumToYV12 = RENAME(yuy2ToY); break;
case PIX_FMT_UYVY422 : c->lumToYV12 = RENAME(uyvyToY); break;
#endif /* !COMPILE_TEMPLATE_MMX2 */
case PIX_FMT_BGR24 : c->lumToYV12 = RENAME(bgr24ToY); break;
case PIX_FMT_RGB24 : c->lumToYV12 = RENAME(rgb24ToY); break;
@ -2494,6 +2438,6 @@ static av_cold void RENAME(sws_init_swScale)(SwsContext *c)
}
}
#endif /* !COMPILE_TEMPLATE_MMX2 */
if(isAnyRGB(c->srcFormat))
if(isAnyRGB(c->srcFormat) && av_pix_fmt_descriptors[c->srcFormat].comp[0].depth_minus1<15)
c->hScale16= RENAME(hScale16);
}