On 20/09/16 19:36, Diego Biurrun wrote:
> ptrdiff_t is the correct type for array strides and similar.

I can't say I looked at the detail with extreme care, but I didn't see anything 
else in the series which looked fishy.


Aside:

Seeing a lot of little functions like (picking a random one):

> -static void spatial_compensation_3(uint8_t *src, uint8_t *dst, int linesize)
> +static void spatial_compensation_3(uint8_t *src, uint8_t *dst, ptrdiff_t 
> stride)
>  {
>      int x, y;
>  
>      for (y = 0; y < 8; y++) {
>          for (x = 0; x < 8; x++)
>              dst[x] = src[area4 + ((y + 1) >> 1) + x];
> -        dst += linesize;
> +        dst += stride;
>      }
>  }

makes me wonder how much faster the C code would be if we just added the right 
type qualifiers to all the pointers.


"static void spatial_compensation_3(uint8_t *src, uint8_t *dst, int linesize)"

0000000000000000 <spatial_compensation_3>:
   0:   48 83 c6 07             add    $0x7,%rsi
   4:   41 b8 01 00 00 00       mov    $0x1,%r8d
   a:   66 0f 1f 44 00 00       nopw   0x0(%rax,%rax,1)
  10:   44 89 c1                mov    %r8d,%ecx
  13:   d1 f9                   sar    %ecx
  15:   48 63 c9                movslq %ecx,%rcx
  18:   8a 44 39 11             mov    0x11(%rcx,%rdi,1),%al
  1c:   88 46 f9                mov    %al,-0x7(%rsi)
  1f:   8a 44 39 12             mov    0x12(%rcx,%rdi,1),%al
  23:   88 46 fa                mov    %al,-0x6(%rsi)
  26:   8a 44 39 13             mov    0x13(%rcx,%rdi,1),%al
  2a:   88 46 fb                mov    %al,-0x5(%rsi)
  2d:   8a 44 39 14             mov    0x14(%rcx,%rdi,1),%al
  31:   88 46 fc                mov    %al,-0x4(%rsi)
  34:   8a 44 39 15             mov    0x15(%rcx,%rdi,1),%al
  38:   88 46 fd                mov    %al,-0x3(%rsi)
  3b:   8a 44 39 16             mov    0x16(%rcx,%rdi,1),%al
  3f:   88 46 fe                mov    %al,-0x2(%rsi)
  42:   8a 44 39 17             mov    0x17(%rcx,%rdi,1),%al
  46:   88 46 ff                mov    %al,-0x1(%rsi)
  49:   8a 44 39 18             mov    0x18(%rcx,%rdi,1),%al
  4d:   88 06                   mov    %al,(%rsi)
  4f:   41 ff c0                inc    %r8d
  52:   48 01 d6                add    %rdx,%rsi
  55:   41 83 f8 09             cmp    $0x9,%r8d
  59:   75 b5                   jne    10 <spatial_compensation_3+0x10>
  5b:   c3                      retq


"static void spatial_compensation_3(const uint8_t *restrict src, uint8_t 
*restrict dst, int linesize)"

0000000000000000 <spatial_compensation_3>:
   0:   48 8b 47 11             mov    0x11(%rdi),%rax
   4:   48 89 06                mov    %rax,(%rsi)
   7:   48 8b 47 12             mov    0x12(%rdi),%rax
   b:   48 89 04 16             mov    %rax,(%rsi,%rdx,1)
   f:   48 8b 47 12             mov    0x12(%rdi),%rax
  13:   48 89 04 56             mov    %rax,(%rsi,%rdx,2)
  17:   48 8d 04 52             lea    (%rdx,%rdx,2),%rax
  1b:   48 8b 4f 13             mov    0x13(%rdi),%rcx
  1f:   48 89 0c 06             mov    %rcx,(%rsi,%rax,1)
  23:   48 8b 4f 13             mov    0x13(%rdi),%rcx
  27:   48 89 0c 96             mov    %rcx,(%rsi,%rdx,4)
  2b:   4c 8d 04 92             lea    (%rdx,%rdx,4),%r8
  2f:   48 8b 4f 14             mov    0x14(%rdi),%rcx
  33:   4a 89 0c 06             mov    %rcx,(%rsi,%r8,1)
  37:   48 8b 4f 14             mov    0x14(%rdi),%rcx
  3b:   48 89 0c 46             mov    %rcx,(%rsi,%rax,2)
  3f:   48 6b c2 07             imul   $0x7,%rdx,%rax
  43:   48 8b 4f 15             mov    0x15(%rdi),%rcx
  47:   48 89 0c 06             mov    %rcx,(%rsi,%rax,1)
  4b:   c3                      retq


(From clang 3.6; gcc 6.1 succeeds in removing the dependency but fails to get 
rid of the single-byte moves, so it unrolls fully to 8 x (load 8 individual 
bytes, store 8 individual bytes).  Maybe that answers the question...)

_______________________________________________
libav-devel mailing list
libav-devel@libav.org
https://lists.libav.org/mailman/listinfo/libav-devel

Reply via email to