libswscale/x86/rgb2rgb: add shuffle_bytes avx2
Performance data(Less is better): shuffle_bytes_ssse3 3.64654 shuffle_bytes_avx2 0.94288 Signed-off-by: Wu Jianhua <jianhua.wu@intel.com>
This commit is contained in:
parent
767f162432
commit
2c734a8496
|
@ -146,6 +146,12 @@ void ff_shuffle_bytes_3012_ssse3(const uint8_t *src, uint8_t *dst, int src_size)
|
||||||
void ff_shuffle_bytes_3210_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
|
void ff_shuffle_bytes_3210_ssse3(const uint8_t *src, uint8_t *dst, int src_size);
|
||||||
|
|
||||||
#if ARCH_X86_64
|
#if ARCH_X86_64
|
||||||
|
void ff_shuffle_bytes_2103_avx2(const uint8_t *src, uint8_t *dst, int src_size);
|
||||||
|
void ff_shuffle_bytes_0321_avx2(const uint8_t *src, uint8_t *dst, int src_size);
|
||||||
|
void ff_shuffle_bytes_1230_avx2(const uint8_t *src, uint8_t *dst, int src_size);
|
||||||
|
void ff_shuffle_bytes_3012_avx2(const uint8_t *src, uint8_t *dst, int src_size);
|
||||||
|
void ff_shuffle_bytes_3210_avx2(const uint8_t *src, uint8_t *dst, int src_size);
|
||||||
|
|
||||||
void ff_uyvytoyuv422_sse2(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
void ff_uyvytoyuv422_sse2(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||||
const uint8_t *src, int width, int height,
|
const uint8_t *src, int width, int height,
|
||||||
int lumStride, int chromStride, int srcStride);
|
int lumStride, int chromStride, int srcStride);
|
||||||
|
@ -186,9 +192,16 @@ av_cold void rgb2rgb_init_x86(void)
|
||||||
shuffle_bytes_3012 = ff_shuffle_bytes_3012_ssse3;
|
shuffle_bytes_3012 = ff_shuffle_bytes_3012_ssse3;
|
||||||
shuffle_bytes_3210 = ff_shuffle_bytes_3210_ssse3;
|
shuffle_bytes_3210 = ff_shuffle_bytes_3210_ssse3;
|
||||||
}
|
}
|
||||||
if (EXTERNAL_AVX(cpu_flags)) {
|
|
||||||
#if ARCH_X86_64
|
#if ARCH_X86_64
|
||||||
uyvytoyuv422 = ff_uyvytoyuv422_avx;
|
if (EXTERNAL_AVX2_FAST(cpu_flags)) {
|
||||||
#endif
|
shuffle_bytes_0321 = ff_shuffle_bytes_0321_avx2;
|
||||||
|
shuffle_bytes_2103 = ff_shuffle_bytes_2103_avx2;
|
||||||
|
shuffle_bytes_1230 = ff_shuffle_bytes_1230_avx2;
|
||||||
|
shuffle_bytes_3012 = ff_shuffle_bytes_3012_avx2;
|
||||||
|
shuffle_bytes_3210 = ff_shuffle_bytes_3210_avx2;
|
||||||
}
|
}
|
||||||
|
if (EXTERNAL_AVX(cpu_flags)) {
|
||||||
|
uyvytoyuv422 = ff_uyvytoyuv422_avx;
|
||||||
|
}
|
||||||
|
#endif
|
||||||
}
|
}
|
||||||
|
|
|
@ -159,6 +159,17 @@ SHUFFLE_BYTES 1, 2, 3, 0
|
||||||
SHUFFLE_BYTES 3, 0, 1, 2
|
SHUFFLE_BYTES 3, 0, 1, 2
|
||||||
SHUFFLE_BYTES 3, 2, 1, 0
|
SHUFFLE_BYTES 3, 2, 1, 0
|
||||||
|
|
||||||
|
%if ARCH_X86_64
|
||||||
|
%if HAVE_AVX2_EXTERNAL
|
||||||
|
INIT_YMM avx2
|
||||||
|
SHUFFLE_BYTES 2, 1, 0, 3
|
||||||
|
SHUFFLE_BYTES 0, 3, 2, 1
|
||||||
|
SHUFFLE_BYTES 1, 2, 3, 0
|
||||||
|
SHUFFLE_BYTES 3, 0, 1, 2
|
||||||
|
SHUFFLE_BYTES 3, 2, 1, 0
|
||||||
|
%endif
|
||||||
|
%endif
|
||||||
|
|
||||||
;-----------------------------------------------------------------------------------------------
|
;-----------------------------------------------------------------------------------------------
|
||||||
; uyvytoyuv422(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
; uyvytoyuv422(uint8_t *ydst, uint8_t *udst, uint8_t *vdst,
|
||||||
; const uint8_t *src, int width, int height,
|
; const uint8_t *src, int width, int height,
|
||||||
|
|
Loading…
Reference in New Issue