From: Reimar Döffinger
Currently it is done in several different ways, which
might cause needless dependencies or in case of
tx_float_neon.S is incorrect.
Signed-off-by: Reimar Döffinger
---
libavcodec/aarch64/fft_neon.S | 3 +-
libavcodec/aarch64/h264idct_neon.S | 6 +-
libavcodec/aarch64/hevcdsp_sao_neon.S | 3 +-
libavcodec/aarch64/mdct_neon.S | 18 ++
libavcodec/aarch64/me_cmp_neon.S | 6 +-
libavcodec/aarch64/synth_filter_neon.S | 3 +-
libavcodec/aarch64/vp9itxfm_neon.S | 28 -
libavcodec/aarch64/vp9lpf_16bpp_neon.S | 32 +--
libavcodec/aarch64/vp9lpf_neon.S | 80 +-
libavutil/aarch64/tx_float_neon.S | 52 -
10 files changed, 109 insertions(+), 122 deletions(-)
diff --git a/libavcodec/aarch64/fft_neon.S b/libavcodec/aarch64/fft_neon.S
index 9ff3f9c526..d7225511dd 100644
--- a/libavcodec/aarch64/fft_neon.S
+++ b/libavcodec/aarch64/fft_neon.S
@@ -342,8 +342,7 @@ endfunc
function fft\n\()_neon, align=6
AARCH64_VALID_JUMP_TARGET
AARCH64_SIGN_LINK_REGISTER
-sub sp, sp, #16
-stp x28, x30, [sp]
+stp x28, x30, [sp, #-16]!
add x28, x0, #\n4*2*8
bl fft\n2\()_neon
mov x0, x28
diff --git a/libavcodec/aarch64/h264idct_neon.S
b/libavcodec/aarch64/h264idct_neon.S
index 7d2879b0ce..375da31d65 100644
--- a/libavcodec/aarch64/h264idct_neon.S
+++ b/libavcodec/aarch64/h264idct_neon.S
@@ -157,8 +157,7 @@ function ff_h264_idct_add16intra_neon, export=1
endfunc
function ff_h264_idct_add8_neon, export=1
-sub sp, sp, #0x40
-stp x19, x20, [sp]
+stp x19, x20, [sp, #-0x40]!
mov x12, x30
ldp x6, x15, [x0] // dest[0], dest[1]
add x5, x1, #16*4 // block_offset
@@ -187,8 +186,7 @@ function ff_h264_idct_add8_neon, export=1
cselx6, x15, x6, eq
cmp x10, #20
b.lt1b
-ldp x19, x20, [sp]
-add sp, sp, #0x40
+ldp x19, x20, [sp], #0x40
ret x12
endfunc
diff --git a/libavcodec/aarch64/hevcdsp_sao_neon.S
b/libavcodec/aarch64/hevcdsp_sao_neon.S
index d4decfde3b..30e83dda5d 100644
--- a/libavcodec/aarch64/hevcdsp_sao_neon.S
+++ b/libavcodec/aarch64/hevcdsp_sao_neon.S
@@ -33,8 +33,7 @@
// int16_t *sao_offset_val, int sao_left_class,
// int width, int height)
function ff_hevc_sao_band_filter_8x8_8_neon, export=1
-sub sp, sp, #64
-stp xzr, xzr, [sp]
+stp xzr, xzr, [sp, #-64]!
stp xzr, xzr, [sp, #16]
stp xzr, xzr, [sp, #32]
stp xzr, xzr, [sp, #48]
diff --git a/libavcodec/aarch64/mdct_neon.S b/libavcodec/aarch64/mdct_neon.S
index 6091e72022..98b09bf1ab 100644
--- a/libavcodec/aarch64/mdct_neon.S
+++ b/libavcodec/aarch64/mdct_neon.S
@@ -23,8 +23,7 @@
#include "libavutil/aarch64/asm.S"
function ff_imdct_half_neon, export=1
-sub sp, sp, #32
-stp x19, x20, [sp]
+stp x19, x20, [sp, #-32]!
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #16]
mov x12, #1
@@ -120,17 +119,15 @@ function ff_imdct_half_neon, export=1
st2 {v4.2s,v5.2s}, [x0]
st2 {v6.2s,v7.2s}, [x8]
-ldp x19, x20, [sp]
ldr x30, [sp, #16]
AARCH64_VALIDATE_LINK_REGISTER
-add sp, sp, #32
+ldp x19, x20, [sp], #32
ret
endfunc
function ff_imdct_calc_neon, export=1
-sub sp, sp, #32
-stp x19, x20, [sp]
+stp x19, x20, [sp, #-32]!
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #16]
ldr w3, [x0, #28] // mdct_bits
@@ -163,18 +160,16 @@ function ff_imdct_calc_neon, export=1
subsx19, x19, #16
b.gt1b
-ldp x19, x20, [sp]
ldr x30, [sp, #16]
AARCH64_VALIDATE_LINK_REGISTER
-add sp, sp, #32
+ldp x19, x20, [sp], #32
ret
endfunc
function ff_mdct_calc_neon, export=1
-sub sp, sp, #32
-stp x19, x20, [sp]
+stp x19, x20, [sp, #-32]!
AARCH64_SIGN_LINK_REGISTER
str x30, [sp, #16]
@@ -323,10 +318,9 @@ function ff_mdct_calc_neon, export=1
st2 {v4.2s,v5.2s}, [x0]
st2 {v6.2s,v7.2s}, [x8]
-ldp x19, x20, [sp]
ldr