Issue |
143456
|
Summary |
[AVX2] SAD pattern detection is too strict
|
Labels |
new issue
|
Assignees |
|
Reporter |
adworacz
|
Reference code: [Zig Godbolt](https://github.com/llvm/llvm-project/commit/6f879d9eb1a111a0c99f2a69e4ad30b220f4926a)
Some opportunities for producing optimized sum of absolute differences (SAD) calculations are being missed. It looks like [prior support for this was overly restrictive](https://github.com/llvm/llvm-project/commit/6f879d9eb1a111a0c99f2a69e4ad30b220f4926a).
Essentially, the absolute difference is being calculated, when it should just be handled by the dedicated SAD instruction.
Here's the code inline:
```zig
const block_width = 8;
const T = u8;
const VT = @Vector(block_width, T);
export fn sad(noalias srcp: [*]const u8, noalias refp: [*]const u8, height: usize, stride: usize) u32 {
const src = "" * stride];
const ref = refp[0..height * stride];
var sum: u32 = 0;
const s: VT = src[0*stride..][0..block_width].*;
const r: VT = ref[0*stride..][0..block_width].*;
// Should work, but doesn't.
//const absdiff = @max(s,r) - @min(s,r);
//sum += @reduce(.Add, absdiff);
// Should work, but doesn't
//const VTI = @Vector(block_width, i16);
//sum += @reduce(.Add, @abs(@as(VTI, s) - @as(VTI, r)));
// Does work
const VTI = @Vector(block_width, i32);
sum += @reduce(.Add, @abs(@as(VTI, s) - @as(VTI, r)));
return sum;
}
```
Which produces:
```asm
sad:
push rbp
mov rbp, rsp
vmovq xmm0, qword ptr [rdi]
vmovq xmm1, qword ptr [rsi]
vpminub xmm2, xmm0, xmm1
vpmaxub xmm0, xmm0, xmm1
vpxor xmm1, xmm1, xmm1
vpsubb xmm0, xmm0, xmm2
vpsadbw xmm0, xmm0, xmm1
vpextrb eax, xmm0, 0
pop rbp
ret
```
But it should be:
```asm
sad:
push rbp
mov rbp, rsp
vmovq xmm0, qword ptr [rdi]
vmovq xmm1, qword ptr [rsi]
vpsadbw xmm0, xmm0, xmm1
vmovd eax, xmm0
pop rbp
ret
```
_______________________________________________
llvm-bugs mailing list
llvm-bugs@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs