avfilter/vf_ssim: improve precision

Use doubles for accumulating floats.
This commit is contained in:
Paul B Mahol 2020-01-30 22:01:23 +01:00
parent c35382aaf4
commit fcc0424c93
4 changed files with 37 additions and 24 deletions

View File

@ -28,7 +28,7 @@ typedef struct SSIMDSPContext {
void (*ssim_4x4_line)(const uint8_t *buf, ptrdiff_t buf_stride, void (*ssim_4x4_line)(const uint8_t *buf, ptrdiff_t buf_stride,
const uint8_t *ref, ptrdiff_t ref_stride, const uint8_t *ref, ptrdiff_t ref_stride,
int (*sums)[4], int w); int (*sums)[4], int w);
float (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int w); double (*ssim_end_line)(const int (*sum0)[4], const int (*sum1)[4], int w);
} SSIMDSPContext; } SSIMDSPContext;
void ff_ssim_init_x86(SSIMDSPContext *dsp); void ff_ssim_init_x86(SSIMDSPContext *dsp);

View File

@ -55,13 +55,13 @@ typedef struct SSIMContext {
uint64_t nb_frames; uint64_t nb_frames;
double ssim[4], ssim_total; double ssim[4], ssim_total;
char comps[4]; char comps[4];
float coefs[4]; double coefs[4];
uint8_t rgba_map[4]; uint8_t rgba_map[4];
int planewidth[4]; int planewidth[4];
int planeheight[4]; int planeheight[4];
int *temp; int *temp;
int is_rgb; int is_rgb;
float (*ssim_plane)(SSIMDSPContext *dsp, double (*ssim_plane)(SSIMDSPContext *dsp,
uint8_t *main, int main_stride, uint8_t *main, int main_stride,
uint8_t *ref, int ref_stride, uint8_t *ref, int ref_stride,
int width, int height, void *temp, int width, int height, void *temp,
@ -206,9 +206,9 @@ static float ssim_endn_16bit(const int64_t (*sum0)[4], const int64_t (*sum1)[4],
return ssim; return ssim;
} }
static float ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width) static double ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int width)
{ {
float ssim = 0.0; double ssim = 0.0;
int i; int i;
for (i = 0; i < width; i++) for (i = 0; i < width; i++)
@ -221,14 +221,14 @@ static float ssim_endn_8bit(const int (*sum0)[4], const int (*sum1)[4], int widt
#define SUM_LEN(w) (((w) >> 2) + 3) #define SUM_LEN(w) (((w) >> 2) + 3)
static float ssim_plane_16bit(SSIMDSPContext *dsp, static double ssim_plane_16bit(SSIMDSPContext *dsp,
uint8_t *main, int main_stride, uint8_t *main, int main_stride,
uint8_t *ref, int ref_stride, uint8_t *ref, int ref_stride,
int width, int height, void *temp, int width, int height, void *temp,
int max) int max)
{ {
int z = 0, y; int z = 0, y;
float ssim = 0.0; double ssim = 0.0;
int64_t (*sum0)[4] = temp; int64_t (*sum0)[4] = temp;
int64_t (*sum1)[4] = sum0 + SUM_LEN(width); int64_t (*sum1)[4] = sum0 + SUM_LEN(width);
@ -249,14 +249,14 @@ static float ssim_plane_16bit(SSIMDSPContext *dsp,
return ssim / ((height - 1) * (width - 1)); return ssim / ((height - 1) * (width - 1));
} }
static float ssim_plane(SSIMDSPContext *dsp, static double ssim_plane(SSIMDSPContext *dsp,
uint8_t *main, int main_stride, uint8_t *main, int main_stride,
uint8_t *ref, int ref_stride, uint8_t *ref, int ref_stride,
int width, int height, void *temp, int width, int height, void *temp,
int max) int max)
{ {
int z = 0, y; int z = 0, y;
float ssim = 0.0; double ssim = 0.0;
int (*sum0)[4] = temp; int (*sum0)[4] = temp;
int (*sum1)[4] = sum0 + SUM_LEN(width); int (*sum1)[4] = sum0 + SUM_LEN(width);
@ -279,7 +279,7 @@ static float ssim_plane(SSIMDSPContext *dsp,
static double ssim_db(double ssim, double weight) static double ssim_db(double ssim, double weight)
{ {
return 10 * log10(weight / (weight - ssim)); return (fabs(weight - ssim) > 1e-9) ? 10.0 * log10(weight / (weight - ssim)) : INFINITY;
} }
static int do_ssim(FFFrameSync *fs) static int do_ssim(FFFrameSync *fs)
@ -288,7 +288,7 @@ static int do_ssim(FFFrameSync *fs)
SSIMContext *s = ctx->priv; SSIMContext *s = ctx->priv;
AVFrame *master, *ref; AVFrame *master, *ref;
AVDictionary **metadata; AVDictionary **metadata;
float c[4], ssimv = 0.0; double c[4] = { 0 }, ssimv = 0.0;
int ret, i; int ret, i;
ret = ff_framesync_dualinput_get(fs, &master, &ref); ret = ff_framesync_dualinput_get(fs, &master, &ref);

View File

@ -169,8 +169,9 @@ SSIM_4X4_LINE 8
%endif %endif
INIT_XMM sse4 INIT_XMM sse4
cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w cglobal ssim_end_line, 3, 3, 7, sum0, sum1, w
pxor m0, m0 pxor m0, m0
pxor m6, m6
.loop: .loop:
mova m1, [sum0q+mmsize*0] mova m1, [sum0q+mmsize*0]
mova m2, [sum0q+mmsize*1] mova m2, [sum0q+mmsize*1]
@ -214,34 +215,46 @@ cglobal ssim_end_line, 3, 3, 6, sum0, sum1, w
mulps m4, m5 mulps m4, m5
mulps m3, m1 mulps m3, m1
divps m4, m3 ; ssim_endl divps m4, m3 ; ssim_endl
addps m0, m4 ; ssim mova m5, m4
cvtps2pd m3, m5
movhlps m5, m5
cvtps2pd m5, m5
addpd m0, m3 ; ssim
addpd m6, m5 ; ssim
add sum0q, mmsize*4 add sum0q, mmsize*4
add sum1q, mmsize*4 add sum1q, mmsize*4
sub wd, 4 sub wd, 4
jg .loop jg .loop
; subps the ones we added too much ; subpd the ones we added too much
test wd, wd test wd, wd
jz .end jz .end
add wd, 4 add wd, 4
test wd, 3
jz .skip3
test wd, 2 test wd, 2
jz .skip2 jz .skip2
psrldq m4, 8
.skip2:
test wd, 1 test wd, 1
jz .skip1 jz .skip1
psrldq m4, 4 .skip3:
psrldq m5, 8
subpd m6, m5
jmp .end
.skip2:
psrldq m5, 8
subpd m6, m5
subpd m0, m3
jmp .end
.skip1: .skip1:
subps m0, m4 psrldq m3, 16
subpd m6, m5
.end: .end:
addpd m0, m6
movhlps m4, m0 movhlps m4, m0
addps m0, m4 addpd m0, m4
movss m4, m0
shufps m0, m0, 1
addss m0, m4
%if ARCH_X86_32 %if ARCH_X86_32
movss r0m, m0 movsd r0m, m0
fld r0mp fld r0mp
%endif %endif
RET RET

View File

@ -28,7 +28,7 @@ void ff_ssim_4x4_line_ssse3(const uint8_t *buf, ptrdiff_t buf_stride,
void ff_ssim_4x4_line_xop (const uint8_t *buf, ptrdiff_t buf_stride, void ff_ssim_4x4_line_xop (const uint8_t *buf, ptrdiff_t buf_stride,
const uint8_t *ref, ptrdiff_t ref_stride, const uint8_t *ref, ptrdiff_t ref_stride,
int (*sums)[4], int w); int (*sums)[4], int w);
float ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w); double ff_ssim_end_line_sse4(const int (*sum0)[4], const int (*sum1)[4], int w);
void ff_ssim_init_x86(SSIMDSPContext *dsp) void ff_ssim_init_x86(SSIMDSPContext *dsp)
{ {