On 6/2/17 1:36 AM, Sean McGovern wrote:
> From: James Almer <[email protected]>
>
> This makes ubsan happy and also considerably increases performance on
> big endian systems.
>
> Tested on an IBM POWER7 3.55 GHz
>
> Before:
>
> 2.24user 0.14system 0:02.39elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
> 2.26user 0.11system 0:02.38elapsed 99%CPU (0avgtext+0avgdata 2688maxresident)k
> 2.23user 0.15system 0:02.38elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
> 2.25user 0.12system 0:02.38elapsed 100%CPU (0avgtext+0avgdata
> 2624maxresident)k
> 2.20user 0.15system 0:02.36elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
>
> After:
>
> 1.86user 0.13system 0:02.00elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
> 1.89user 0.11system 0:02.01elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
> 1.85user 0.14system 0:02.00elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
> 1.84user 0.15system 0:01.99elapsed 99%CPU (0avgtext+0avgdata 2624maxresident)k
> 1.89user 0.13system 0:02.02elapsed 99%CPU (0avgtext+0avgdata 2688maxresident)k
>
> Tested-by: Nicolas George <[email protected]>
> Reviewed-by: Michael Niedermayer <[email protected]>
> Signed-off-by: James Almer <[email protected]>
> Signed-off-by: Sean McGovern <[email protected]>
> ---
> libavutil/md5.c | 20 ++++++++------------
> 1 file changed, 8 insertions(+), 12 deletions(-)
>
> diff --git a/libavutil/md5.c b/libavutil/md5.c
> index 1946d78..b0d5964 100644
> --- a/libavutil/md5.c
> +++ b/libavutil/md5.c
> @@ -83,15 +83,15 @@ static const uint32_t T[64] = { // T[i]=
> fabs(sin(i+1)<<32)
> a += T[i]; \
> \
> if (i < 32) { \
> - if (i < 16) \
> - a += (d ^ (b & (c ^ d))) + X[i & 15]; \
> - else \
> - a += (c ^ (d & (c ^ b))) + X[(1 + 5 * i) & 15]; \
> + if (i < 16) \
> + a += (d ^ (b & (c ^ d))) + AV_RL32(X + (i & 15)); \
> + else \
> + a += (c ^ (d & (c ^ b))) + AV_RL32(X + ((1 + 5 * i) & 15)); \
> } else { \
> if (i < 48) \
> - a += (b ^ c ^ d) + X[(5 + 3 * i) & 15]; \
> + a += (b ^ c ^ d) + AV_RL32(X + ((5 + 3 * i) & 15)); \
> else \
> - a += (c ^ (b | ~d)) + X[(7 * i) & 15]; \
> + a += (c ^ (b | ~d)) + AV_RL32(X + ((7 * i) & 15)); \
> } \
> a = b + (a << t | a >> (32 - t)); \
> } while (0)
> @@ -99,18 +99,14 @@ static const uint32_t T[64] = { // T[i]=
> fabs(sin(i+1)<<32)
> static void body(uint32_t ABCD[4], uint32_t X[16])
> {
> int t;
> - int i av_unused;
> unsigned int a = ABCD[3];
> unsigned int b = ABCD[2];
> unsigned int c = ABCD[1];
> unsigned int d = ABCD[0];
>
> -#if HAVE_BIGENDIAN
> - for (i = 0; i < 16; i++)
> - X[i] = av_bswap32(X[i]);
> -#endif
> -
> #if CONFIG_SMALL
> + int i av_unused;
int i, it is used for sure.
> +
> for (i = 0; i < 64; i++) {
> CORE(i, a, b, c, d);
> t = d;
>
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel