diff --git a/libavcodec/x86/dcadsp_init.c b/libavcodec/x86/dcadsp_init.c index bb86c26037..1a19f6b807 100644 --- a/libavcodec/x86/dcadsp_init.c +++ b/libavcodec/x86/dcadsp_init.c @@ -103,10 +103,10 @@ av_cold void ff_synth_filter_init_x86(SynthFilterContext *s) if (EXTERNAL_SSE2(cpu_flags)) { s->synth_filter_float = synth_filter_sse2; } - if (EXTERNAL_AVX(cpu_flags)) { + if (EXTERNAL_AVX_FAST(cpu_flags)) { s->synth_filter_float = synth_filter_avx; } - if (EXTERNAL_FMA3(cpu_flags)) { + if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) { s->synth_filter_float = synth_filter_fma3; } #endif /* HAVE_YASM */ diff --git a/libavcodec/x86/dct_init.c b/libavcodec/x86/dct_init.c index 30c8f12bf2..daf2bb4e5d 100644 --- a/libavcodec/x86/dct_init.c +++ b/libavcodec/x86/dct_init.c @@ -34,6 +34,6 @@ av_cold void ff_dct_init_x86(DCTContext *s) s->dct32 = ff_dct32_float_sse; if (EXTERNAL_SSE2(cpu_flags)) s->dct32 = ff_dct32_float_sse2; - if (EXTERNAL_AVX(cpu_flags)) + if (EXTERNAL_AVX_FAST(cpu_flags)) s->dct32 = ff_dct32_float_avx; } diff --git a/libavcodec/x86/fft_init.c b/libavcodec/x86/fft_init.c index 5682230c8e..5085f11380 100644 --- a/libavcodec/x86/fft_init.c +++ b/libavcodec/x86/fft_init.c @@ -48,7 +48,7 @@ av_cold void ff_fft_init_x86(FFTContext *s) s->fft_calc = ff_fft_calc_sse; s->fft_permutation = FF_FFT_PERM_SWAP_LSBS; } - if (EXTERNAL_AVX(cpu_flags) && s->nbits >= 5) { + if (EXTERNAL_AVX_FAST(cpu_flags) && s->nbits >= 5) { /* AVX for SB */ s->imdct_half = ff_imdct_half_avx; s->fft_calc = ff_fft_calc_avx; diff --git a/libavcodec/x86/vp9dsp_init.c b/libavcodec/x86/vp9dsp_init.c index 979bd936ac..00e7125a0c 100644 --- a/libavcodec/x86/vp9dsp_init.c +++ b/libavcodec/x86/vp9dsp_init.c @@ -483,12 +483,14 @@ av_cold void ff_vp9dsp_init_x86(VP9DSPContext *dsp, int bpp) dsp->itxfm_add[TX_32X32][ADST_DCT] = dsp->itxfm_add[TX_32X32][DCT_ADST] = dsp->itxfm_add[TX_32X32][DCT_DCT] = ff_vp9_idct_idct_32x32_add_avx; - init_fpel(1, 0, 32, put, avx); - init_fpel(0, 0, 64, put, avx); init_lpf(avx); init_dir_tm_h_ipred(8, avx); init_dir_tm_h_ipred(16, avx); init_dir_tm_h_ipred(32, avx); + } + if (EXTERNAL_AVX_FAST(cpu_flags)) { + init_fpel(1, 0, 32, put, avx); + init_fpel(0, 0, 64, put, avx); init_ipred(32, avx, v, VERT); } diff --git a/libavfilter/x86/af_volume_init.c b/libavfilter/x86/af_volume_init.c index 57c7eab65f..88f5a9679a 100644 --- a/libavfilter/x86/af_volume_init.c +++ b/libavfilter/x86/af_volume_init.c @@ -52,7 +52,7 @@ av_cold void ff_volume_init_x86(VolumeContext *vol) vol->scale_samples = ff_scale_samples_s32_ssse3_atom; vol->samples_align = 4; } - if (EXTERNAL_AVX(cpu_flags)) { + if (EXTERNAL_AVX_FAST(cpu_flags)) { vol->scale_samples = ff_scale_samples_s32_avx; vol->samples_align = 8; } diff --git a/libavutil/x86/float_dsp_init.c b/libavutil/x86/float_dsp_init.c index 64b3a4d6bd..f211f2396b 100644 --- a/libavutil/x86/float_dsp_init.c +++ b/libavutil/x86/float_dsp_init.c @@ -85,14 +85,14 @@ av_cold void ff_float_dsp_init_x86(AVFloatDSPContext *fdsp) if (EXTERNAL_SSE2(cpu_flags)) { fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_sse2; } - if (EXTERNAL_AVX(cpu_flags)) { + if (EXTERNAL_AVX_FAST(cpu_flags)) { fdsp->vector_fmul = ff_vector_fmul_avx; fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_avx; fdsp->vector_dmul_scalar = ff_vector_dmul_scalar_avx; fdsp->vector_fmul_add = ff_vector_fmul_add_avx; fdsp->vector_fmul_reverse = ff_vector_fmul_reverse_avx; } - if (EXTERNAL_FMA3(cpu_flags)) { + if (EXTERNAL_FMA3(cpu_flags) && !(cpu_flags & AV_CPU_FLAG_AVXSLOW)) { fdsp->vector_fmac_scalar = ff_vector_fmac_scalar_fma3; fdsp->vector_fmul_add = ff_vector_fmul_add_fma3; } diff --git a/libavutil/x86/lls_init.c b/libavutil/x86/lls_init.c index f53190488a..81f141cbeb 100644 --- a/libavutil/x86/lls_init.c +++ b/libavutil/x86/lls_init.c @@ -35,7 +35,7 @@ av_cold void ff_init_lls_x86(LLSModel *m) if (m->indep_count >= 4) m->evaluate_lls = ff_evaluate_lls_sse2; } - if (EXTERNAL_AVX(cpu_flags)) { + if (EXTERNAL_AVX_FAST(cpu_flags)) { m->update_lls = ff_update_lls_avx; } } diff --git a/libswresample/x86/audio_convert_init.c b/libswresample/x86/audio_convert_init.c index 7f25d981a9..5e5e91d142 100644 --- a/libswresample/x86/audio_convert_init.c +++ b/libswresample/x86/audio_convert_init.c @@ -145,9 +145,11 @@ MULTI_CAPS_FUNC(SSE2, sse2) ac->simd_f = ff_unpack_2ch_int16_to_float_a_ssse3; } } - if(EXTERNAL_AVX(mm_flags)) { + if(EXTERNAL_AVX_FAST(mm_flags)) { if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_S32 || out_fmt == AV_SAMPLE_FMT_FLTP && in_fmt == AV_SAMPLE_FMT_S32P) ac->simd_f = ff_int32_to_float_a_avx; + } + if(EXTERNAL_AVX(mm_flags)) { if(channels == 6) { if( out_fmt == AV_SAMPLE_FMT_FLT && in_fmt == AV_SAMPLE_FMT_FLTP || out_fmt == AV_SAMPLE_FMT_S32 && in_fmt == AV_SAMPLE_FMT_S32P) ac->simd_f = ff_pack_6ch_float_to_float_a_avx; diff --git a/libswresample/x86/rematrix_init.c b/libswresample/x86/rematrix_init.c index 918479a4a8..5f2c5fe170 100644 --- a/libswresample/x86/rematrix_init.c +++ b/libswresample/x86/rematrix_init.c @@ -73,7 +73,7 @@ av_cold int swri_rematrix_init_x86(struct SwrContext *s){ s->mix_1_1_simd = ff_mix_1_1_a_float_sse; s->mix_2_1_simd = ff_mix_2_1_a_float_sse; } - if(EXTERNAL_AVX(mm_flags)) { + if(EXTERNAL_AVX_FAST(mm_flags)) { s->mix_1_1_simd = ff_mix_1_1_a_float_avx; s->mix_2_1_simd = ff_mix_2_1_a_float_avx; } diff --git a/libswresample/x86/resample_init.c b/libswresample/x86/resample_init.c index 93001d65cb..bc444cfb8b 100644 --- a/libswresample/x86/resample_init.c +++ b/libswresample/x86/resample_init.c @@ -67,11 +67,11 @@ av_cold void swri_resample_dsp_x86_init(ResampleContext *c) c->dsp.resample = c->linear ? ff_resample_linear_float_sse : ff_resample_common_float_sse; } - if (EXTERNAL_AVX(mm_flags)) { + if (EXTERNAL_AVX_FAST(mm_flags)) { c->dsp.resample = c->linear ? ff_resample_linear_float_avx : ff_resample_common_float_avx; } - if (EXTERNAL_FMA3(mm_flags)) { + if (EXTERNAL_FMA3(mm_flags) && !(mm_flags & AV_CPU_FLAG_AVXSLOW)) { c->dsp.resample = c->linear ? ff_resample_linear_float_fma3 : ff_resample_common_float_fma3; }