Prior to ARMv6, the destination registers of the SMULL instruction must be distinct from the first source register. Marking the output early-clobber ensures it is allocated unique registers.
This restriction is dropped in ARMv6 and later, so allowing overlap between input and output registers there might give better code. Signed-off-by: Mans Rullgard <[email protected]> --- libavcodec/arm/mathops.h | 14 +++++++++++--- 1 files changed, 11 insertions(+), 3 deletions(-) diff --git a/libavcodec/arm/mathops.h b/libavcodec/arm/mathops.h index dfa9411..7c2acca 100644 --- a/libavcodec/arm/mathops.h +++ b/libavcodec/arm/mathops.h @@ -41,6 +41,8 @@ static inline av_const int MULL(int a, int b, unsigned shift) } #define MULH MULH +#define MUL64 MUL64 + #if HAVE_ARMV6 static inline av_const int MULH(int a, int b) { @@ -48,6 +50,13 @@ static inline av_const int MULH(int a, int b) __asm__ ("smmul %0, %1, %2" : "=r"(r) : "r"(a), "r"(b)); return r; } + +static inline av_const int64_t MUL64(int a, int b) +{ + int64_t x; + __asm__ ("smull %Q0, %R0, %1, %2" : "=r"(x) : "r"(a), "r"(b)); + return x; +} #else static inline av_const int MULH(int a, int b) { @@ -55,15 +64,14 @@ static inline av_const int MULH(int a, int b) __asm__ ("smull %0, %1, %2, %3" : "=&r"(lo), "=&r"(hi) : "r"(b), "r"(a)); return hi; } -#endif static inline av_const int64_t MUL64(int a, int b) { int64_t x; - __asm__ ("smull %Q0, %R0, %1, %2" : "=r"(x) : "r"(a), "r"(b)); + __asm__ ("smull %Q0, %R0, %1, %2" : "=&r"(x) : "r"(a), "r"(b)); return x; } -#define MUL64 MUL64 +#endif static inline av_const int64_t MAC64(int64_t d, int a, int b) { -- 1.7.4.5 _______________________________________________ libav-devel mailing list [email protected] https://lists.libav.org/mailman/listinfo/libav-devel
