ffmpeg/postproc/swscale.c


// Software scaling and colorspace conversion routines for MPlayer

// temporary storage for 4 yuv lines:
static unsigned int pix_buf_y[4][2048];
static unsigned int pix_buf_uv[2][2048*2];

// clipping helper table for C implementations:
static unsigned char clip_table[768];

// yuv->rgb conversion tables:
static    int yuvtab_2568[256];
static    int yuvtab_3343[256];
static    int yuvtab_0c92[256];
static    int yuvtab_1a1e[256];
static    int yuvtab_40cf[256];

// *** bilinear scaling and yuv->rgb conversion of yv12 slices:
// *** Note: it's called multiple times while decoding a frame, first time y==0
// *** Designed to upscale, but may work for downscale too.
// s_xinc = (src_width << 8) / dst_width
// s_yinc = (src_height << 16) / dst_height
void SwScale_YV12slice_brg24(unsigned char* srcptr[],int stride[], int y, int h,
			     unsigned char* dstptr, int dststride, int dstw, int dstbpp,
			     unsigned int s_xinc,unsigned int s_yinc){

// scaling factors:
//static int s_yinc=(vo_dga_src_height<<16)/vo_dga_vp_height;
//static int s_xinc=(vo_dga_src_width<<8)/vo_dga_vp_width;

unsigned int s_xinc2=s_xinc>>1;

static int s_srcypos;
static int s_ypos;
static int s_last_ypos;

  if(y==0){
      s_srcypos=-2*s_yinc;
      s_ypos=-2;
      s_last_ypos=-2;
  } // reset counters
  
  while(1){
    unsigned char *dest=dstptr+dststride*s_ypos;
    int y0=2+(s_srcypos>>16);
    int y1=1+(s_srcypos>>17);
    int yalpha=(s_srcypos&0xFFFF)>>8;
    int yalpha1=yalpha^255;
    int uvalpha=((s_srcypos>>1)&0xFFFF)>>8;
    int uvalpha1=uvalpha^255;
    unsigned int *buf0=pix_buf_y[y0&3];
    unsigned int *buf1=pix_buf_y[((y0+1)&3)];
    unsigned int *uvbuf0=pix_buf_uv[y1&1];
    unsigned int *uvbuf1=pix_buf_uv[(y1&1)^1];
    int i;

    if(y0>=y+h) break;

    s_ypos++; s_srcypos+=s_yinc;

    if(s_last_ypos!=y0){
      unsigned char *src=srcptr[0]+(y0-y)*stride[0];
      unsigned int xpos=0;
      s_last_ypos=y0;
      // *** horizontal scale Y line to temp buffer
      // this loop should be rewritten in MMX assembly!!!!
      for(i=0;i<dstw;i++){
	register unsigned int xx=xpos>>8;
        register unsigned int xalpha=xpos&0xFF;
	buf1[i]=(src[xx]*(xalpha^255)+src[xx+1]*xalpha);
	xpos+=s_xinc;
      }
      // *** horizontal scale U and V lines to temp buffer
      if(!(y0&1)){
        unsigned char *src1=srcptr[1]+(y1-y/2)*stride[1];
        unsigned char *src2=srcptr[2]+(y1-y/2)*stride[2];
        xpos=0;
        // this loop should be rewritten in MMX assembly!!!!
        for(i=0;i<dstw;i++){
	  register unsigned int xx=xpos>>8;
          register unsigned int xalpha=xpos&0xFF;
	  uvbuf1[i]=(src1[xx]*(xalpha^255)+src1[xx+1]*xalpha);
	  uvbuf1[i+2048]=(src2[xx]*(xalpha^255)+src2[xx+1]*xalpha);
	  xpos+=s_xinc2;
        }
      }
      if(!y0) continue;
    }

    // this loop should be rewritten in MMX assembly!!!!
    // Note1: this code can be resticted to n*8 (or n*16) width lines to simplify optimization...
    // Note2: instead of using lookup tabs, mmx version could do the multiply...
    // Note3: maybe we should make separated 15/16, 24 and 32bpp version of this:
    for(i=0;i<dstw;i++){
	// vertical linear interpolation && yuv2rgb in a single step:
	int Y=yuvtab_2568[((buf0[i]*yalpha1+buf1[i]*yalpha)>>16)];
	int U=((uvbuf0[i]*uvalpha1+uvbuf1[i]*uvalpha)>>16);
	int V=((uvbuf0[i+2048]*uvalpha1+uvbuf1[i+2048]*uvalpha)>>16);
#if 1
	// 24/32 bpp
	dest[0]=clip_table[((Y + yuvtab_3343[U]) >>13)];
	dest[1]=clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)];
	dest[2]=clip_table[((Y + yuvtab_40cf[V]) >>13)];
#else
	unsigned short *d=dest;
	unsigned int b=clip_table[((Y + yuvtab_3343[U]) >>13)];
	unsigned int g=clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)];
	unsigned int r=clip_table[((Y + yuvtab_40cf[V]) >>13)];
//	d[0]=((r>>3)<<10)|((g>>3)<<5)|((b>>3)); // 15bpp
	d[0]=((r>>3)<<11)|((g>>2)<<5)|((b>>3)); // 16bpp
#endif
	dest+=dstbpp;
    }
  
  }

}


void SwScale_Init(){
    // generating tables:
    int i;
    for(i=0;i<256;i++){
        clip_table[i]=0;
        clip_table[i+256]=i;
        clip_table[i+512]=255;
	yuvtab_2568[i]=(0x2568*(i-16))+(256<<13);
	yuvtab_3343[i]=0x3343*(i-128);
	yuvtab_0c92[i]=-0x0c92*(i-128);
	yuvtab_1a1e[i]=-0x1a1e*(i-128);
	yuvtab_40cf[i]=0x40cf*(i-128);
    }

}
software yv12->rgb scaler - separated from fsdga Originally committed as revision 2217 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc 2001-10-15 20:59:44 +02:00
			`// Software scaling and colorspace conversion routines for MPlayer`

			`// temporary storage for 4 yuv lines:`
			`static unsigned int pix_buf_y[4][2048];`
			`static unsigned int pix_buf_uv[2][2048*2];`

			`// clipping helper table for C implementations:`
			`static unsigned char clip_table[768];`

			`// yuv->rgb conversion tables:`
			`static int yuvtab_2568[256];`
			`static int yuvtab_3343[256];`
			`static int yuvtab_0c92[256];`
			`static int yuvtab_1a1e[256];`
			`static int yuvtab_40cf[256];`

			`// *** bilinear scaling and yuv->rgb conversion of yv12 slices:`
			`// *** Note: it's called multiple times while decoding a frame, first time y==0`
			`// *** Designed to upscale, but may work for downscale too.`
			`// s_xinc = (src_width << 8) / dst_width`
			`// s_yinc = (src_height << 16) / dst_height`
			`void SwScale_YV12slice_brg24(unsigned char* srcptr[],int stride[], int y, int h,`
			`unsigned char* dstptr, int dststride, int dstw, int dstbpp,`
			`unsigned int s_xinc,unsigned int s_yinc){`

			`// scaling factors:`
			`//static int s_yinc=(vo_dga_src_height<<16)/vo_dga_vp_height;`
			`//static int s_xinc=(vo_dga_src_width<<8)/vo_dga_vp_width;`

			`unsigned int s_xinc2=s_xinc>>1;`

			`static int s_srcypos;`
			`static int s_ypos;`
			`static int s_last_ypos;`

			`if(y==0){`
			`s_srcypos=-2*s_yinc;`
			`s_ypos=-2;`
			`s_last_ypos=-2;`
			`} // reset counters`

			`while(1){`
			`unsigned char dest=dstptr+dststrides_ypos;`
			`int y0=2+(s_srcypos>>16);`
			`int y1=1+(s_srcypos>>17);`
			`int yalpha=(s_srcypos&0xFFFF)>>8;`
			`int yalpha1=yalpha^255;`
			`int uvalpha=((s_srcypos>>1)&0xFFFF)>>8;`
			`int uvalpha1=uvalpha^255;`
			`unsigned int *buf0=pix_buf_y[y0&3];`
			`unsigned int *buf1=pix_buf_y[((y0+1)&3)];`
			`unsigned int *uvbuf0=pix_buf_uv[y1&1];`
			`unsigned int *uvbuf1=pix_buf_uv[(y1&1)^1];`
			`int i;`

			`if(y0>=y+h) break;`

			`s_ypos++; s_srcypos+=s_yinc;`

			`if(s_last_ypos!=y0){`
			`unsigned char src=srcptr[0]+(y0-y)stride[0];`
			`unsigned int xpos=0;`
			`s_last_ypos=y0;`
			`// *** horizontal scale Y line to temp buffer`
			`// this loop should be rewritten in MMX assembly!!!!`
			`for(i=0;i<dstw;i++){`
			`register unsigned int xx=xpos>>8;`
			`register unsigned int xalpha=xpos&0xFF;`
			`buf1[i]=(src[xx](xalpha^255)+src[xx+1]xalpha);`
			`xpos+=s_xinc;`
			`}`
			`// *** horizontal scale U and V lines to temp buffer`
			`if(!(y0&1)){`
			`unsigned char src1=srcptr[1]+(y1-y/2)stride[1];`
			`unsigned char src2=srcptr[2]+(y1-y/2)stride[2];`
			`xpos=0;`
			`// this loop should be rewritten in MMX assembly!!!!`
			`for(i=0;i<dstw;i++){`
			`register unsigned int xx=xpos>>8;`
			`register unsigned int xalpha=xpos&0xFF;`
			`uvbuf1[i]=(src1[xx](xalpha^255)+src1[xx+1]xalpha);`
			`uvbuf1[i+2048]=(src2[xx](xalpha^255)+src2[xx+1]xalpha);`
			`xpos+=s_xinc2;`
			`}`
			`}`
			`if(!y0) continue;`
			`}`

			`// this loop should be rewritten in MMX assembly!!!!`
			`// Note1: this code can be resticted to n8 (or n16) width lines to simplify optimization...`
			`// Note2: instead of using lookup tabs, mmx version could do the multiply...`
			`// Note3: maybe we should make separated 15/16, 24 and 32bpp version of this:`
			`for(i=0;i<dstw;i++){`
			`// vertical linear interpolation && yuv2rgb in a single step:`
			`int Y=yuvtab_2568[((buf0[i]yalpha1+buf1[i]yalpha)>>16)];`
			`int U=((uvbuf0[i]uvalpha1+uvbuf1[i]uvalpha)>>16);`
			`int V=((uvbuf0[i+2048]uvalpha1+uvbuf1[i+2048]uvalpha)>>16);`
16bpp code for atmos Originally committed as revision 2230 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc 2001-10-17 01:58:17 +02:00			`#if 1`
			`// 24/32 bpp`
software yv12->rgb scaler - separated from fsdga Originally committed as revision 2217 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc 2001-10-15 20:59:44 +02:00			`dest[0]=clip_table[((Y + yuvtab_3343[U]) >>13)];`
			`dest[1]=clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)];`
			`dest[2]=clip_table[((Y + yuvtab_40cf[V]) >>13)];`
16bpp code for atmos Originally committed as revision 2230 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc 2001-10-17 01:58:17 +02:00			`#else`
			`unsigned short *d=dest;`
fix rgb/bgr for 15/16bpp Originally committed as revision 2231 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc 2001-10-17 02:29:07 +02:00			`unsigned int b=clip_table[((Y + yuvtab_3343[U]) >>13)];`
16bpp code for atmos Originally committed as revision 2230 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc 2001-10-17 01:58:17 +02:00			`unsigned int g=clip_table[((Y + yuvtab_0c92[V] + yuvtab_1a1e[U]) >>13)];`
fix rgb/bgr for 15/16bpp Originally committed as revision 2231 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc 2001-10-17 02:29:07 +02:00			`unsigned int r=clip_table[((Y + yuvtab_40cf[V]) >>13)];`
			`// d[0]=((r>>3)<<10)\|((g>>3)<<5)\|((b>>3)); // 15bpp`
			`d[0]=((r>>3)<<11)\|((g>>2)<<5)\|((b>>3)); // 16bpp`
16bpp code for atmos Originally committed as revision 2230 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc 2001-10-17 01:58:17 +02:00			`#endif`
software yv12->rgb scaler - separated from fsdga Originally committed as revision 2217 to svn://svn.mplayerhq.hu/mplayer/trunk/postproc 2001-10-15 20:59:44 +02:00			`dest+=dstbpp;`
			`}`

			`}`

			`}`


			`void SwScale_Init(){`
			`// generating tables:`
			`int i;`
			`for(i=0;i<256;i++){`
			`clip_table[i]=0;`
			`clip_table[i+256]=i;`
			`clip_table[i+512]=255;`
			`yuvtab_2568[i]=(0x2568*(i-16))+(256<<13);`
			`yuvtab_3343[i]=0x3343*(i-128);`
			`yuvtab_0c92[i]=-0x0c92*(i-128);`
			`yuvtab_1a1e[i]=-0x1a1e*(i-128);`
			`yuvtab_40cf[i]=0x40cf*(i-128);`
			`}`

			`}`