Disable neon in ffmpeg to fix videos and animated stickers on arm architectures

This commit is contained in:
Ilya Fedin 2020-03-11 20:09:37 +04:00 committed by John Preston
parent fda8a38bfc
commit fb99b9b547
2 changed files with 236 additions and 0 deletions

View File

@ -0,0 +1,225 @@
diff --git a/libavcodec/aarch64/Makefile b/libavcodec/aarch64/Makefile
index 00f93bf59f..52da7036f3 100644
--- a/libavcodec/aarch64/Makefile
+++ b/libavcodec/aarch64/Makefile
@@ -6,6 +6,7 @@ OBJS-$(CONFIG_H264DSP) += aarch64/h264dsp_init_aarch64.o
OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_init.o
OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_init_aarch64.o
OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_init_aarch64.o
+OBJS-$(CONFIG_IDCTDSP) += aarch64/idctdsp_init_aarch64.o
OBJS-$(CONFIG_MPEGAUDIODSP) += aarch64/mpegaudiodsp_init.o
OBJS-$(CONFIG_NEON_CLOBBER_TEST) += aarch64/neontest.o
OBJS-$(CONFIG_VIDEODSP) += aarch64/videodsp_init.o
@@ -21,6 +22,7 @@ OBJS-$(CONFIG_VC1DSP) += aarch64/vc1dsp_init_aarch64.o
OBJS-$(CONFIG_VORBIS_DECODER) += aarch64/vorbisdsp_init.o
OBJS-$(CONFIG_VP9_DECODER) += aarch64/vp9dsp_init_10bpp_aarch64.o \
aarch64/vp9dsp_init_12bpp_aarch64.o \
+ aarch64/vp9mc_aarch64.o \
aarch64/vp9dsp_init_aarch64.o
# ARMv8 optimizations
@@ -41,8 +43,7 @@ NEON-OBJS-$(CONFIG_H264PRED) += aarch64/h264pred_neon.o
NEON-OBJS-$(CONFIG_H264QPEL) += aarch64/h264qpel_neon.o \
aarch64/hpeldsp_neon.o
NEON-OBJS-$(CONFIG_HPELDSP) += aarch64/hpeldsp_neon.o
-NEON-OBJS-$(CONFIG_IDCTDSP) += aarch64/idctdsp_init_aarch64.o \
- aarch64/simple_idct_neon.o
+NEON-OBJS-$(CONFIG_IDCTDSP) += aarch64/simple_idct_neon.o
NEON-OBJS-$(CONFIG_MDCT) += aarch64/mdct_neon.o
NEON-OBJS-$(CONFIG_MPEGAUDIODSP) += aarch64/mpegaudiodsp_neon.o
NEON-OBJS-$(CONFIG_VP8DSP) += aarch64/vp8dsp_neon.o
diff --git a/libavcodec/aarch64/idctdsp_init_aarch64.c b/libavcodec/aarch64/idctdsp_init_aarch64.c
index 0406e60830..742a3372e3 100644
--- a/libavcodec/aarch64/idctdsp_init_aarch64.c
+++ b/libavcodec/aarch64/idctdsp_init_aarch64.c
@@ -21,6 +21,8 @@
*/
#include "libavutil/attributes.h"
+#include "libavutil/cpu.h"
+#include "libavutil/arm/cpu.h"
#include "libavcodec/avcodec.h"
#include "libavcodec/idctdsp.h"
#include "idct.h"
@@ -28,7 +30,9 @@
av_cold void ff_idctdsp_init_aarch64(IDCTDSPContext *c, AVCodecContext *avctx,
unsigned high_bit_depth)
{
- if (!avctx->lowres && !high_bit_depth) {
+ int cpu_flags = av_get_cpu_flags();
+
+ if (have_neon(cpu_flags) && !avctx->lowres && !high_bit_depth) {
if (avctx->idct_algo == FF_IDCT_AUTO ||
avctx->idct_algo == FF_IDCT_SIMPLEAUTO ||
avctx->idct_algo == FF_IDCT_SIMPLENEON) {
diff --git a/libavcodec/aarch64/vp9mc_16bpp_neon.S b/libavcodec/aarch64/vp9mc_16bpp_neon.S
index cac6428709..53b372c262 100644
--- a/libavcodec/aarch64/vp9mc_16bpp_neon.S
+++ b/libavcodec/aarch64/vp9mc_16bpp_neon.S
@@ -25,31 +25,6 @@
// const uint8_t *ref, ptrdiff_t ref_stride,
// int h, int mx, int my);
-function ff_vp9_copy128_aarch64, export=1
-1:
- ldp x5, x6, [x2]
- ldp x7, x8, [x2, #16]
- stp x5, x6, [x0]
- ldp x9, x10, [x2, #32]
- stp x7, x8, [x0, #16]
- subs w4, w4, #1
- ldp x11, x12, [x2, #48]
- stp x9, x10, [x0, #32]
- stp x11, x12, [x0, #48]
- ldp x5, x6, [x2, #64]
- ldp x7, x8, [x2, #80]
- stp x5, x6, [x0, #64]
- ldp x9, x10, [x2, #96]
- stp x7, x8, [x0, #80]
- ldp x11, x12, [x2, #112]
- stp x9, x10, [x0, #96]
- stp x11, x12, [x0, #112]
- add x2, x2, x3
- add x0, x0, x1
- b.ne 1b
- ret
-endfunc
-
function ff_vp9_avg64_16_neon, export=1
mov x5, x0
sub x1, x1, #64
diff --git a/libavcodec/aarch64/vp9mc_aarch64.S b/libavcodec/aarch64/vp9mc_aarch64.S
new file mode 100644
index 0000000000..f17a8cf04a
--- /dev/null
+++ b/libavcodec/aarch64/vp9mc_aarch64.S
@@ -0,0 +1,81 @@
+/*
+ * Copyright (c) 2016 Google Inc.
+ *
+ * This file is part of FFmpeg.
+ *
+ * FFmpeg is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Lesser General Public
+ * License as published by the Free Software Foundation; either
+ * version 2.1 of the License, or (at your option) any later version.
+ *
+ * FFmpeg is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
+ * Lesser General Public License for more details.
+ *
+ * You should have received a copy of the GNU Lesser General Public
+ * License along with FFmpeg; if not, write to the Free Software
+ * Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA
+ */
+
+#include "libavutil/aarch64/asm.S"
+
+// All public functions in this file have the following signature:
+// typedef void (*vp9_mc_func)(uint8_t *dst, ptrdiff_t dst_stride,
+// const uint8_t *ref, ptrdiff_t ref_stride,
+// int h, int mx, int my);
+
+function ff_vp9_copy128_aarch64, export=1
+1:
+ ldp x5, x6, [x2]
+ ldp x7, x8, [x2, #16]
+ stp x5, x6, [x0]
+ ldp x9, x10, [x2, #32]
+ stp x7, x8, [x0, #16]
+ subs w4, w4, #1
+ ldp x11, x12, [x2, #48]
+ stp x9, x10, [x0, #32]
+ stp x11, x12, [x0, #48]
+ ldp x5, x6, [x2, #64]
+ ldp x7, x8, [x2, #80]
+ stp x5, x6, [x0, #64]
+ ldp x9, x10, [x2, #96]
+ stp x7, x8, [x0, #80]
+ ldp x11, x12, [x2, #112]
+ stp x9, x10, [x0, #96]
+ stp x11, x12, [x0, #112]
+ add x2, x2, x3
+ add x0, x0, x1
+ b.ne 1b
+ ret
+endfunc
+
+function ff_vp9_copy64_aarch64, export=1
+1:
+ ldp x5, x6, [x2]
+ ldp x7, x8, [x2, #16]
+ stp x5, x6, [x0]
+ ldp x9, x10, [x2, #32]
+ stp x7, x8, [x0, #16]
+ subs w4, w4, #1
+ ldp x11, x12, [x2, #48]
+ stp x9, x10, [x0, #32]
+ stp x11, x12, [x0, #48]
+ add x2, x2, x3
+ add x0, x0, x1
+ b.ne 1b
+ ret
+endfunc
+
+function ff_vp9_copy32_aarch64, export=1
+1:
+ ldp x5, x6, [x2]
+ ldp x7, x8, [x2, #16]
+ stp x5, x6, [x0]
+ subs w4, w4, #1
+ stp x7, x8, [x0, #16]
+ add x2, x2, x3
+ add x0, x0, x1
+ b.ne 1b
+ ret
+endfunc
diff --git a/libavcodec/aarch64/vp9mc_neon.S b/libavcodec/aarch64/vp9mc_neon.S
index f67624ca04..abf2bae9db 100644
--- a/libavcodec/aarch64/vp9mc_neon.S
+++ b/libavcodec/aarch64/vp9mc_neon.S
@@ -25,23 +25,6 @@
// const uint8_t *ref, ptrdiff_t ref_stride,
// int h, int mx, int my);
-function ff_vp9_copy64_aarch64, export=1
-1:
- ldp x5, x6, [x2]
- ldp x7, x8, [x2, #16]
- stp x5, x6, [x0]
- ldp x9, x10, [x2, #32]
- stp x7, x8, [x0, #16]
- subs w4, w4, #1
- ldp x11, x12, [x2, #48]
- stp x9, x10, [x0, #32]
- stp x11, x12, [x0, #48]
- add x2, x2, x3
- add x0, x0, x1
- b.ne 1b
- ret
-endfunc
-
function ff_vp9_avg64_neon, export=1
mov x5, x0
1:
@@ -64,19 +47,6 @@ function ff_vp9_avg64_neon, export=1
ret
endfunc
-function ff_vp9_copy32_aarch64, export=1
-1:
- ldp x5, x6, [x2]
- ldp x7, x8, [x2, #16]
- stp x5, x6, [x0]
- subs w4, w4, #1
- stp x7, x8, [x0, #16]
- add x2, x2, x3
- add x0, x0, x1
- b.ne 1b
- ret
-endfunc
-
function ff_vp9_avg32_neon, export=1
1:
ld1 {v2.16b, v3.16b}, [x2], x3

View File

@ -127,6 +127,11 @@ parts:
- range-v3
- xxhash
patches:
plugin: dump
source: Telegram/Patches
prime: [-./*]
desktop-qt5:
source: https://github.com/ubuntu/snapcraft-desktop-helpers.git
source-subdir: qt
@ -229,6 +234,7 @@ parts:
- --disable-programs
- --disable-doc
- --disable-everything
- --disable-neon
- --enable-gpl
- --enable-version3
- --enable-libopus
@ -284,6 +290,11 @@ parts:
- --enable-demuxer=wav
- --enable-muxer=ogg
- --enable-muxer=opus
override-pull: |
snapcraftctl pull
patch -p1 < "$SNAPCRAFT_STAGE/ffmpeg.diff"
after:
- patches
range-v3:
source: https://github.com/ericniebler/range-v3.git