x86/tx_float: save a branch during coefficient deinterleaving

Directly branch into the special 64-point deinterleave
subroutine rather than going through the general deinterleave.

64-point transform timings on Zen 3:
Before:
   1974 decicycles in           av_tx (fft),16776864 runs,    352 skips
After:
   1956 decicycles in           av_tx (fft),16775378 runs,   1838 skips
This commit is contained in:
Lynne 2022-08-09 03:31:11 +02:00
parent 5cdf4c0bed
commit 98b32ef462
No known key found for this signature in database
GPG Key ID: A2FEA5F03F034464

View File

@ -1044,7 +1044,7 @@ ALIGN 16
add lutq, (mmsize/2)*8
%endif
cmp tgtq, 64
je .deinterleave
je .64pt_deint
SPLIT_RADIX_COMBINE_64
@ -1190,9 +1190,6 @@ FFT_SPLIT_RADIX_DEF 131072
; Final synthesis + deinterleaving code
;===============================================================================
.deinterleave:
cmp lenq, 64
je .64pt_deint
imul tmpq, lenq, 2
lea lutq, [4*lenq + tmpq]