From: James Cowgill <[email protected]> In binutils 2.29, the behavior of the ADR instruction changed so that 1 is added to the address of a Thumb function (previously nothing was added). This allows the loaded address to be passed to a BLX instruction and the correct mode change will occur.
So that the behavior matches in binutils 2.29 and pre-2.29, use .eqv to pre-calculate the function address without the automatic +1 fixup. Then use these new symbols as the function addresses to be loaded. See: https://sourceware.org/bugzilla/show_bug.cgi?id=21458 The patch has modifications by Martin Storsjö, to fall back to plain defines, in case the assembler doesn't support .eqv directives. This makes sure it still works with old Apple tools with old binutils gas (since gas-preprocessor doesn't implement .eqv), and with the latest clang versions (where the assembler currently is capable of building all of our arm assembly without support from gas-preprocessor, but doesn't support .eqv). --- configure | 4 ++++ libavcodec/arm/h264idct_neon.S | 35 +++++++++++++++++++++++++++-------- 2 files changed, 31 insertions(+), 8 deletions(-) diff --git a/configure b/configure index 3e5784f..1307362 100755 --- a/configure +++ b/configure @@ -1666,6 +1666,7 @@ SYSTEM_FUNCS=" TOOLCHAIN_FEATURES=" as_arch_directive + as_eqv_directive as_fpu_directive as_func as_object_arch @@ -4435,6 +4436,9 @@ EOF check_as <<EOF && enable as_arch_directive .arch armv7-a EOF + check_as <<EOF && enable as_eqv_directive +.eqv foo, bar +EOF check_as <<EOF && enable as_fpu_directive .fpu neon EOF diff --git a/libavcodec/arm/h264idct_neon.S b/libavcodec/arm/h264idct_neon.S index f588f3e..706bdd3 100644 --- a/libavcodec/arm/h264idct_neon.S +++ b/libavcodec/arm/h264idct_neon.S @@ -20,6 +20,25 @@ #include "libavutil/arm/asm.S" +# In binutils 2.29, the behavior of the ADR instruction changed so that 1 is +# added to the address of a Thumb function (previously nothing was added). +# +# These .eqv are used to pre-calculate the correct address with +CONFIG_THUMB so +# that ADR will work with both old and new versions binutils. +# +# See: https://sourceware.org/bugzilla/show_bug.cgi?id=21458 +#if HAVE_AS_EQV_DIRECTIVE +.eqv eqv_ff_h264_idct_add_neon, X(ff_h264_idct_add_neon) + CONFIG_THUMB +.eqv eqv_ff_h264_idct_dc_add_neon, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB +.eqv eqv_ff_h264_idct8_add_neon, X(ff_h264_idct8_add_neon) + CONFIG_THUMB +.eqv eqv_ff_h264_idct8_dc_add_neon, X(ff_h264_idct8_dc_add_neon) + CONFIG_THUMB +#else +#define eqv_ff_h264_idct_add_neon X(ff_h264_idct_add_neon) + CONFIG_THUMB +#define eqv_ff_h264_idct_dc_add_neon X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB +#define eqv_ff_h264_idct8_add_neon X(ff_h264_idct8_add_neon) + CONFIG_THUMB +#define eqv_ff_h264_idct8_dc_add_neon X(ff_h264_idct8_dc_add_neon) + CONFIG_THUMB +#endif + function ff_h264_idct_add_neon, export=1 vld1.64 {d0-d3}, [r1,:128] vmov.i16 q15, #0 @@ -113,8 +132,8 @@ function ff_h264_idct_add16_neon, export=1 movne lr, #0 cmp lr, #0 ite ne - adrne lr, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB - adreq lr, X(ff_h264_idct_add_neon) + CONFIG_THUMB + adrne lr, eqv_ff_h264_idct_dc_add_neon + adreq lr, eqv_ff_h264_idct_add_neon blx lr 2: subs ip, ip, #1 add r1, r1, #32 @@ -138,8 +157,8 @@ function ff_h264_idct_add16intra_neon, export=1 cmp r8, #0 ldrsh r8, [r1] iteet ne - adrne lr, X(ff_h264_idct_add_neon) + CONFIG_THUMB - adreq lr, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB + adrne lr, eqv_ff_h264_idct_add_neon + adreq lr, eqv_ff_h264_idct_dc_add_neon cmpeq r8, #0 blxne lr subs ip, ip, #1 @@ -166,8 +185,8 @@ function ff_h264_idct_add8_neon, export=1 cmp r8, #0 ldrsh r8, [r1] iteet ne - adrne lr, X(ff_h264_idct_add_neon) + CONFIG_THUMB - adreq lr, X(ff_h264_idct_dc_add_neon) + CONFIG_THUMB + adrne lr, eqv_ff_h264_idct_add_neon + adreq lr, eqv_ff_h264_idct_dc_add_neon cmpeq r8, #0 blxne lr add r12, r12, #1 @@ -388,8 +407,8 @@ function ff_h264_idct8_add4_neon, export=1 movne lr, #0 cmp lr, #0 ite ne - adrne lr, X(ff_h264_idct8_dc_add_neon) + CONFIG_THUMB - adreq lr, X(ff_h264_idct8_add_neon) + CONFIG_THUMB + adrne lr, eqv_ff_h264_idct8_dc_add_neon + adreq lr, eqv_ff_h264_idct8_add_neon blx lr 2: subs r12, r12, #4 add r1, r1, #128 -- 2.7.4 _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
