From fcc0424c933742c8fc852371e985d16b6eb4bfe9 Mon Sep 17 00:00:00 2001 From: Paul B Mahol Date: Thu, 30 Jan 2020 22:01:23 +0100 Subject: [PATCH] avfilter/vf_ssim: improve precision Use doubles for accumulating floats. --- libavfilter/ssim.h | 2 +- libavfilter/vf_ssim.c | 20 +++++++++--------- libavfilter/x86/vf_ssim.asm | 37 +++++++++++++++++++++++----------- libavfilter/x86/vf_ssim_init.c | 2 +- 4 files changed, 37 insertions(+), 24 deletions(-) diff --git a/libavfilter/ssim.h b/libavfilter/ssim.h index ac0395a22a..a6a41aabe6 100644 --- a/libavfilter/ssim.h +++ b/libavfilter/ssim.h @@ -28,7 +28,7 @@ typedef struct SSIMDSPContext { void (*ssim_4x4_line)(const uint8_t *buf, ptrdiff_t buf_stride, const uint8_t *ref, ptrdiff_t ref_stride, int (*sums)[4], int w); - float (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int w); + double (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int w); } SSIMDSPContext; void ff_ssim_init_x86(SSIMDSPContext *dsp); diff --git a/libavfilter/vf_ssim.c b/libavfilter/vf_ssim.c index 7f10c52ca9..a32fada220 100644 --- a/libavfilter/vf_ssim.c +++ b/libavfilter/vf_ssim.c @@ -55,13 +55,13 @@ typedef struct SSIMContext { uint64_t nb_frames; double ssim[4], ssim_total; char comps[4]; - float coefs[4]; + double coefs[4]; uint8_t rgba_map[4]; int planewidth[4]; int planeheight[4]; int *temp; int is_rgb; - float (*ssim_plane)(SSIMDSPContext *dsp, + double (*ssim_plane)(SSIMDSPContext *dsp, uint8_t *main, int main_stride, uint8_t *ref, int ref_stride, int width, int height, void *temp, @@ -206,9 +206,9 @@ static float ssim_endn_16bit(const int64_t (*sum0)[4], const int64_t (*sum1)[4], return ssim; } -static float ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width) +static double ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width) { - float ssim = 0.0; + double ssim = 0.0; int i; for (i = 0; i < width; i++) @@ -221,14 +221,14 @@ static float ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int widt #define SUM_LEN(w) (((w) >> 2) + 3) -static float ssim_plane_16bit(SSIMDSPContext *dsp, +static double ssim_plane_16bit(SSIMDSPContext *dsp, uint8_t *main, int main_stride, uint8_t *ref, int ref_stride, int width, int height, void *temp, int max) { int z = 0, y; - float ssim = 0.0; + double ssim = 0.0; int64_t (*sum0)[4] = temp; int64_t (*sum1)[4] = sum0 + SUM_LEN(width); @@ -249,14 +249,14 @@ static float ssim_plane_16bit(SSIMDSPContext *dsp, return ssim / ((height - 1) * (width - 1)); } -static float ssim_plane(SSIMDSPContext *dsp, +static double ssim_plane(SSIMDSPContext *dsp, uint8_t *main, int main_stride, uint8_t *ref, int ref_stride, int width, int height, void *temp, int max) { int z = 0, y; - float ssim = 0.0; + double ssim = 0.0; int (*sum0)[4] = temp; int (*sum1)[4] = sum0 + SUM_LEN(width); @@ -279,7 +279,7 @@ static float ssim_plane(SSIMDSPContext *dsp, static double ssim_db(double ssim, double weight) { - return 10 * log10(weight / (weight - ssim)); + return (fabs(weight - ssim) > 1e-9) ? 10.0 * log10(weight / (weight - ssim)) : INFINITY; } static int do_ssim(FFFrameSync *fs) @@ -288,7 +288,7 @@ static int do_ssim(FFFrameSync *fs) SSIMContext *s = ctx->priv; AVFrame *master, *ref; AVDictionary **metadata; - float c[4], ssimv = 0.0; + double c[4] = { 0 }, ssimv = 0.0; int ret, i; ret = ff_framesync_dualinput_get(fs, &master, &ref); diff --git a/libavfilter/x86/vf_ssim.asm b/libavfilter/x86/vf_ssim.asm index 3293e66701..1e682fe452 100644 --- a/libavfilter/x86/vf_ssim.asm +++ b/libavfilter/x86/vf_ssim.asm @@ -169,8 +169,9 @@ SSIM_4X4_LINE 8 %endif INIT_XMM sse4 -cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w +cglobal ssim_end_line, 3, 3, 7, sum0, sum1, w pxor m0, m0 + pxor m6, m6 .loop: mova m1, [sum0q+mmsize*0] mova m2, [sum0q+mmsize*1] @@ -214,34 +215,46 @@ cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w mulps m4, m5 mulps m3, m1 divps m4, m3 ; ssim_endl - addps m0, m4 ; ssim + mova m5, m4 + cvtps2pd m3, m5 + movhlps m5, m5 + cvtps2pd m5, m5 + addpd m0, m3 ; ssim + addpd m6, m5 ; ssim add sum0q, mmsize*4 add sum1q, mmsize*4 sub wd, 4 jg .loop - ; subps the ones we added too much + ; subpd the ones we added too much test wd, wd jz .end add wd, 4 + test wd, 3 + jz .skip3 test wd, 2 jz .skip2 - psrldq m4, 8 -.skip2: test wd, 1 jz .skip1 - psrldq m4, 4 +.skip3: + psrldq m5, 8 + subpd m6, m5 + jmp .end +.skip2: + psrldq m5, 8 + subpd m6, m5 + subpd m0, m3 + jmp .end .skip1: - subps m0, m4 + psrldq m3, 16 + subpd m6, m5 .end: + addpd m0, m6 movhlps m4, m0 - addps m0, m4 - movss m4, m0 - shufps m0, m0, 1 - addss m0, m4 + addpd m0, m4 %if ARCH_X86_32 - movss r0m, m0 + movsd r0m, m0 fld r0mp %endif RET diff --git a/libavfilter/x86/vf_ssim_init.c b/libavfilter/x86/vf_ssim_init.c index 599c928403..cbaa20ef16 100644 --- a/libavfilter/x86/vf_ssim_init.c +++ b/libavfilter/x86/vf_ssim_init.c @@ -28,7 +28,7 @@ void ff_ssim_4x4_line_ssse3(const uint8_t *buf, ptrdiff_t buf_stride, void ff_ssim_4x4_line_xop (const uint8_t *buf, ptrdiff_t buf_stride, const uint8_t *ref, ptrdiff_t ref_stride, int (*sums)[4], int w); -float ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w); +double ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w); void ff_ssim_init_x86(SSIMDSPContext *dsp) {