On 20/09/16 19:36, Diego Biurrun wrote:
> ptrdiff_t is the correct type for array strides and similar.
I can't say I looked at the detail with extreme care, but I didn't see anything
else in the series which looked fishy.
Aside:
Seeing a lot of little functions like (picking a random one):
> -static void spatial_compensation_3(uint8_t *src, uint8_t *dst, int linesize)
> +static void spatial_compensation_3(uint8_t *src, uint8_t *dst, ptrdiff_t
> stride)
> {
> int x, y;
>
> for (y = 0; y < 8; y++) {
> for (x = 0; x < 8; x++)
> dst[x] = src[area4 + ((y + 1) >> 1) + x];
> - dst += linesize;
> + dst += stride;
> }
> }
makes me wonder how much faster the C code would be if we just added the right
type qualifiers to all the pointers.
"static void spatial_compensation_3(uint8_t *src, uint8_t *dst, int linesize)"
0000000000000000 <spatial_compensation_3>:
0: 48 83 c6 07 add $0x7,%rsi
4: 41 b8 01 00 00 00 mov $0x1,%r8d
a: 66 0f 1f 44 00 00 nopw 0x0(%rax,%rax,1)
10: 44 89 c1 mov %r8d,%ecx
13: d1 f9 sar %ecx
15: 48 63 c9 movslq %ecx,%rcx
18: 8a 44 39 11 mov 0x11(%rcx,%rdi,1),%al
1c: 88 46 f9 mov %al,-0x7(%rsi)
1f: 8a 44 39 12 mov 0x12(%rcx,%rdi,1),%al
23: 88 46 fa mov %al,-0x6(%rsi)
26: 8a 44 39 13 mov 0x13(%rcx,%rdi,1),%al
2a: 88 46 fb mov %al,-0x5(%rsi)
2d: 8a 44 39 14 mov 0x14(%rcx,%rdi,1),%al
31: 88 46 fc mov %al,-0x4(%rsi)
34: 8a 44 39 15 mov 0x15(%rcx,%rdi,1),%al
38: 88 46 fd mov %al,-0x3(%rsi)
3b: 8a 44 39 16 mov 0x16(%rcx,%rdi,1),%al
3f: 88 46 fe mov %al,-0x2(%rsi)
42: 8a 44 39 17 mov 0x17(%rcx,%rdi,1),%al
46: 88 46 ff mov %al,-0x1(%rsi)
49: 8a 44 39 18 mov 0x18(%rcx,%rdi,1),%al
4d: 88 06 mov %al,(%rsi)
4f: 41 ff c0 inc %r8d
52: 48 01 d6 add %rdx,%rsi
55: 41 83 f8 09 cmp $0x9,%r8d
59: 75 b5 jne 10 <spatial_compensation_3+0x10>
5b: c3 retq
"static void spatial_compensation_3(const uint8_t *restrict src, uint8_t
*restrict dst, int linesize)"
0000000000000000 <spatial_compensation_3>:
0: 48 8b 47 11 mov 0x11(%rdi),%rax
4: 48 89 06 mov %rax,(%rsi)
7: 48 8b 47 12 mov 0x12(%rdi),%rax
b: 48 89 04 16 mov %rax,(%rsi,%rdx,1)
f: 48 8b 47 12 mov 0x12(%rdi),%rax
13: 48 89 04 56 mov %rax,(%rsi,%rdx,2)
17: 48 8d 04 52 lea (%rdx,%rdx,2),%rax
1b: 48 8b 4f 13 mov 0x13(%rdi),%rcx
1f: 48 89 0c 06 mov %rcx,(%rsi,%rax,1)
23: 48 8b 4f 13 mov 0x13(%rdi),%rcx
27: 48 89 0c 96 mov %rcx,(%rsi,%rdx,4)
2b: 4c 8d 04 92 lea (%rdx,%rdx,4),%r8
2f: 48 8b 4f 14 mov 0x14(%rdi),%rcx
33: 4a 89 0c 06 mov %rcx,(%rsi,%r8,1)
37: 48 8b 4f 14 mov 0x14(%rdi),%rcx
3b: 48 89 0c 46 mov %rcx,(%rsi,%rax,2)
3f: 48 6b c2 07 imul $0x7,%rdx,%rax
43: 48 8b 4f 15 mov 0x15(%rdi),%rcx
47: 48 89 0c 06 mov %rcx,(%rsi,%rax,1)
4b: c3 retq
(From clang 3.6; gcc 6.1 succeeds in removing the dependency but fails to get
rid of the single-byte moves, so it unrolls fully to 8 x (load 8 individual
bytes, store 8 individual bytes). Maybe that answers the question...)
_______________________________________________
libav-devel mailing list
[email protected]
https://lists.libav.org/mailman/listinfo/libav-devel