From 74e49cc583fbc6a0dd06f8f6eb1ba2f5f340d547 Mon Sep 17 00:00:00 2001 From: Mark Reid Date: Sat, 13 Nov 2021 18:56:52 -0800 Subject: [PATCH] swscale/input: unify grayf32 funcs with rgbf32 funcs This is ment to be a cosmetic change old timings: 42780 UNITS in grayf32le, 1 runs, 0 skips 56720 UNITS in grayf32le, 2 runs, 0 skips 67265 UNITS in grayf32le, 4 runs, 0 skips 58082 UNITS in grayf32le, 8 runs, 0 skips 63512 UNITS in grayf32le, 16 runs, 0 skips 52720 UNITS in grayf32le, 32 runs, 0 skips 46491 UNITS in grayf32le, 64 runs, 0 skips 68500 UNITS in grayf32be, 1 runs, 0 skips 66930 UNITS in grayf32be, 2 runs, 0 skips 62305 UNITS in grayf32be, 4 runs, 0 skips 55510 UNITS in grayf32be, 8 runs, 0 skips 50216 UNITS in grayf32be, 16 runs, 0 skips 44480 UNITS in grayf32be, 32 runs, 0 skips 42394 UNITS in grayf32be, 64 runs, 0 skips new timings: 46660 UNITS in grayf32le, 1 runs, 0 skips 51830 UNITS in grayf32le, 2 runs, 0 skips 53390 UNITS in grayf32le, 4 runs, 0 skips 50910 UNITS in grayf32le, 8 runs, 0 skips 44968 UNITS in grayf32le, 16 runs, 0 skips 40349 UNITS in grayf32le, 32 runs, 0 skips 38330 UNITS in grayf32le, 64 runs, 0 skips 39980 UNITS in grayf32be, 1 runs, 0 skips 49630 UNITS in grayf32be, 2 runs, 0 skips 53540 UNITS in grayf32be, 4 runs, 0 skips 59767 UNITS in grayf32be, 8 runs, 0 skips 51206 UNITS in grayf32be, 16 runs, 0 skips 44743 UNITS in grayf32be, 32 runs, 0 skips 41468 UNITS in grayf32be, 64 runs, 0 skips Signed-off-by: Michael Niedermayer --- libswscale/input.c | 36 +++++++++++------------------------- 1 file changed, 11 insertions(+), 25 deletions(-) diff --git a/libswscale/input.c b/libswscale/input.c index 336f957c8c..90efdd2ffc 100644 --- a/libswscale/input.c +++ b/libswscale/input.c @@ -1013,31 +1013,19 @@ static av_always_inline void planar_rgbf32_to_y(uint8_t *_dst, const uint8_t *_s } } -#undef rdpx - static av_always_inline void grayf32ToY16_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, - const uint8_t *unused2, int width, uint32_t *unused) + const uint8_t *unused2, int width, int is_be, uint32_t *unused) { int i; const float *src = (const float *)_src; uint16_t *dst = (uint16_t *)_dst; for (i = 0; i < width; ++i){ - dst[i] = av_clip_uint16(lrintf(65535.0f * src[i])); + dst[i] = av_clip_uint16(lrintf(65535.0f * rdpx(src + i))); } } -static av_always_inline void grayf32ToY16_bswap_c(uint8_t *_dst, const uint8_t *_src, const uint8_t *unused1, - const uint8_t *unused2, int width, uint32_t *unused) -{ - int i; - const uint32_t *src = (const uint32_t *)_src; - uint16_t *dst = (uint16_t *)_dst; - - for (i = 0; i < width; ++i){ - dst[i] = av_clip_uint16(lrintf(65535.0f * av_int2float(av_bswap32(src[i])))); - } -} +#undef rdpx #define rgb9plus_planar_funcs_endian(nbits, endian_name, endian) \ static void planar_rgb##nbits##endian_name##_to_y(uint8_t *dst, const uint8_t *src[4], \ @@ -1092,6 +1080,12 @@ static void planar_rgbf32##endian_name##_to_a(uint8_t *dst, const uint8_t *src[4 int w, int32_t *rgb2yuv) \ { \ planar_rgbf32_to_a(dst, src, w, endian, rgb2yuv); \ +} \ +static void grayf32##endian_name##ToY16_c(uint8_t *dst, const uint8_t *src, \ + const uint8_t *unused1, const uint8_t *unused2, \ + int width, uint32_t *unused) \ +{ \ + grayf32ToY16_c(dst, src, unused1, unused2, width, endian, unused); \ } rgbf32_planar_funcs_endian(le, 0) @@ -1699,18 +1693,10 @@ av_cold void ff_sws_init_input_funcs(SwsContext *c) c->lumToYV12 = p010BEToY_c; break; case AV_PIX_FMT_GRAYF32LE: -#if HAVE_BIGENDIAN - c->lumToYV12 = grayf32ToY16_bswap_c; -#else - c->lumToYV12 = grayf32ToY16_c; -#endif + c->lumToYV12 = grayf32leToY16_c; break; case AV_PIX_FMT_GRAYF32BE: -#if HAVE_BIGENDIAN - c->lumToYV12 = grayf32ToY16_c; -#else - c->lumToYV12 = grayf32ToY16_bswap_c; -#endif + c->lumToYV12 = grayf32beToY16_c; break; case AV_PIX_FMT_Y210LE: c->lumToYV12 = y210le_Y_c;