| Issue |
174521
|
| Summary |
`vgf2p8affineqb` emit should be more aggressive
|
| Labels |
new issue
|
| Assignees |
|
| Reporter |
Validark
|
[Zig Godbolt](https://zig.godbo.lt/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:20,fontUsePx:'0',j:1,lang:zig,selection:(endColumn:1,endLineNumber:25,positionColumn:1,positionLineNumber:25,selectionStartColumn:1,selectionStartLineNumber:25,startColumn:1,startLineNumber:25),source:'const+V+%3D+@Vector(64,+u8)%3B%0Aconst+Vi+%3D+@Vector(64,+i8)%3B%0A%0Aexport+fn+foo(x:+V)+V+%7B%0A++++return+@bitCast(%0A++++++++@as(Vi,+@bitCast(x+%3C%3C+@splat(5)))+%3E%3E+@splat(7)%0A++++)%3B%0A%7D%0A%0Aexport+fn+bar(x:+V)+V+%7B%0A++++return+@select(u8,%0A++++++++(x+%26+@as(V,+@splat(0b100)))+!!%3D+@as(V,+@splat(0)),%0A++++++++@as(V,+@splat(0xFF)),%0A++++++++@as(V,+@splat(0)),%0A++++)%3B%0A%7D%0A%0Aexport+fn+foobar(x:+V)+V+%7B%0A++++return+@%22llvm.x86.vgf2p8affineqb.512%22(x,+@splat(4),+0)%3B%0A%7D%0A%0Aexport+fn+baz(x:+V,+y:+V)+V+%7B%0A++++return+y+-%25+foo(x)%3B%0A%7D%0A%0Aextern+fn+@%22llvm.x86.vgf2p8affineqb.512%22(@Vector(64,+u8),+@Vector(64,+u8),+i8)+@Vector(64,+u8)%3B%0A'),l:'5',n:'0',o:'Zig+source+%231',t:'0')),k:49.24267387136378,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:ztrunk,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'0',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:21,fontUsePx:'0',j:2,lang:zig,libs:!(),options:'-O+ReleaseFast+-mcpu%3Dznver5+-target+x86_64-linux+-fomit-frame-pointer',overrides:!(),selection:(endColumn:1,endLineNumber:1,positionColumn:1,positionLineNumber:1,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:1),l:'5',n:'0',o:'+zig+trunk+(Editor+%231)',t:'0')),k:50.75732612863623,l:'4',m:100,n:'0',o:'',s:0,t:'0')),l:'2',n:'0',o:'',t:'0')),version:4)
[LLVM Godbolt](https://llvm.godbo.lt/#z:OYLghAFBqd5QCxAYwPYBMCmBRdBLAF1QCcAaPECAMzwBtMA7AQwFtMQByARg9KtQYEAysib0QXACx8BBAKoBnTAAUAHpwAMvAFYTStJg1C1aANxakl9ZATwDKjdAGFUtAK4sGexwBk8DTAA5DwAjTGIQAGYuUgAHVAVCOwYXd089eMTbAT8A4JYwiOjLTGtshiECJmICVI8vGKtMG2TK6oJcoNDwqMaqmrr0vvbO/MLegEpLVDdiZHYOLBoAgGp0BVQAfVpUUVoV4kNgTAg8AA4VgFoYla4JlYBSSKcANkkV1RXzp%2BwVgAF%2BKgIE9Xu9Pt9Ir8HgAmACsGnuOz2mzcDGYbHQmyY6HQxEeAHYAEIPDQAQWwgmIAE8QCTSStHnCuI9IgARFYKBD7EFvD5fM4/Rnw0gc2IGAgrU4XWETOkMmGw6Es9lMTl4nlg/mChU3BRipgSqUrfGyskM4iYCUavkQqFw6F0h741mOslLfyYNYbba7MQHI4nc5XG53Fmgm0CyH/ELVYHPXngyN2%2BGI320FFo1iYTHY3EE4lkikEam0s1C5lPFUMdBhhNaqMKjQivXiyVBySm%2BlCgB0DFQVrZX2QLFiK1W1sT2qZIoAXuFUP4kmI8HPiHKhUrKxzMKoB%2BHwVwp7De/2VkRa5rbeuLXu67aN47na7Se7Vustki/RP678AahUDGxBxvuP5CgiKyfumqLotmWI4uqRJ0kWJbrjqypnkwdArHs3LxpeSb/CY5jdqoZwvN2pjAFQ0KxGcTBUMsmAAI4hN2sJcNCwF3gRjYit%2B94tgabYXB2IpBokwABLuKwIteloXhGR6HmSTouipbrNAYFoKZOUZ/ERLAkWRFFUTRdEMR6LFsRxXH4T8fF4YpkJiRceAsCw1TAPcMLRM%2Br6eu%2BPp7DpoF/DGM62U5yZNiF946qmyLQVmObwfmSGUjSqH2uhtBqrFPFwjFgmGkG0KdvKcK9pgwDoYYNb8QVirNvqJUXHcWWwpEtU4vlR5VcAfFMnJt52Q2cKRI%2BamkhwUy0JwsK8F4HBaKQqCcD4PgAGoALIrAAkgAShyMxzJ6Pk8KQBCaDNUwANYgPC3b4i8ZzQhokQaPi0JnBx0IAJz6JwkiLddq2cLwCggE2V3LTNpBwLASBoCOdDhOQlDI7EqMRCYNjEKit18HQBDhJDEAhKDIT%2BNUNLcLwVPMNSADyITaM0MMXcjbCCEzDC0LTK1YCEbjAE4Yi5ZwF1YB5RjiLDpD4BaLSmJgkPyzuzRuCTku8P4JNzfLtB4CEhzUi4WCg8Wbk66QKvECECSYKymAy8ARtGNdUxUAYwAKJteCYAA7kzsSMDb/CCCIYjsFIMiCIoKjqPLugxAYHsGfoxuQ5AUyoLE5Rq5cTNdZcLDILEbiVjODB2wtq128QeBYNnEBTE0LT2BAjiDA0pC%2BB6Yw9DEmRJAIPcZAko8MKM3QRI0pTs60/S1K49R6O35RtDUM8FEPljL%2BPwzbwPs8SG3J3zGfgMcHXS0rWtHAQbQyCSsWBP3BAuCECQjLRBMvAwy0BMO6IBJB/W7NCaI%2BI/ovD%2BtCfEXAzj4gQdfYGpAWASA0E2O%2BvAH4QyhpdT2s1ODQhBvLPBhDYbANtqTZIoCgA)
```zig
const V = @Vector(64, u8);
const Vi = @Vector(64, i8);
export fn foo(x: V) V {
return @bitCast(
@as(Vi, @bitCast(x << @splat(5))) >> @splat(7)
);
}
export fn bar(x: V) V {
return @select(u8,
(x & @as(V, @splat(0b100))) != @as(V, @splat(0)),
@as(V, @splat(0xFF)),
@as(V, @splat(0)),
);
}
```
```asm
.LCPI0_1:
.byte 4
.byte 2
.byte 1
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
foo:
vgf2p8affineqb zmm0, zmm0, qword ptr [rip + .LCPI0_1]{1to8}, 0
vpmovb2m k0, zmm0
vpmovm2b zmm0, k0
ret
.LCPI1_0:
.zero 64,4
bar:
vptestmb k0, zmm0, zmmword ptr [rip + .LCPI1_0]
vpmovm2b zmm0, k0
ret
```
Both of these could be:
```asm
.LCPI2_1:
.zero 8,4
foobar:
vgf2p8affineqb zmm0, zmm0, qword ptr [rip + .LCPI2_1]{1to8}, 0
ret
```
In real code, we often want to increment based on a condition which is effectively a bit-test:
```zig
export fn baz(x: V, y: V) V {
return y -% foo(x);
}
```
We get:
```asm
.LCPI3_2:
.byte 0
.byte 0
.byte 128
.byte 64
.byte 32
.byte 16
.byte 8
.byte 4
.LCPI3_3:
.zero 4,1
baz:
vgf2p8affineqb zmm0, zmm0, qword ptr [rip + .LCPI3_2]{1to8}, 0
vpandd zmm0, zmm0, dword ptr [rip + .LCPI3_3]{1to16}
vpaddb zmm0, zmm0, zmm1
ret
```
Could be:
```asm
.LCPI3_2:
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 0
.byte 4
baz:
vgf2p8affineqb zmm0, zmm0, qword ptr [rip + .LCPI3_2]{1to8}, 0
vpaddb zmm0, zmm0, zmm1
ret
```
Or:
```asm
.LCPI3_2:
.zero 8,4
baz:
vgf2p8affineqb zmm0, zmm0, qword ptr [rip + .LCPI3_2]{1to8}, 0
vpsubb zmm0, zmm1, zmm0
ret
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs