Issue 174521
Summary `vgf2p8affineqb` emit should be more aggressive
Labels new issue
Assignees
Reporter Validark
    [Zig Godbolt](https://zig.godbo.lt/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:20,fontUsePx:'0',j:1,lang:zig,selection:(endColumn:1,endLineNumber:25,positionColumn:1,positionLineNumber:25,selectionStartColumn:1,selectionStartLineNumber:25,startColumn:1,startLineNumber:25),source:'const+V+%3D+@Vector(64,+u8)%3B%0Aconst+Vi+%3D+@Vector(64,+i8)%3B%0A%0Aexport+fn+foo(x:+V)+V+%7B%0A++++return+@bitCast(%0A++++++++@as(Vi,+@bitCast(x+%3C%3C+@splat(5)))+%3E%3E+@splat(7)%0A++++)%3B%0A%7D%0A%0Aexport+fn+bar(x:+V)+V+%7B%0A++++return+@select(u8,%0A++++++++(x+%26+@as(V,+@splat(0b100)))+!!%3D+@as(V,+@splat(0)),%0A++++++++@as(V,+@splat(0xFF)),%0A++++++++@as(V,+@splat(0)),%0A++++)%3B%0A%7D%0A%0Aexport+fn+foobar(x:+V)+V+%7B%0A++++return+@%22llvm.x86.vgf2p8affineqb.512%22(x,+@splat(4),+0)%3B%0A%7D%0A%0Aexport+fn+baz(x:+V,+y:+V)+V+%7B%0A++++return+y+-%25+foo(x)%3B%0A%7D%0A%0Aextern+fn+@%22llvm.x86.vgf2p8affineqb.512%22(@Vector(64,+u8),+@Vector(64,+u8),+i8)+@Vector(64,+u8)%3B%0A'),l:'5',n:'0',o:'Zig+source+%231',t:'0')),k:49.24267387136378,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:ztrunk,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'0',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:21,fontUsePx:'0',j:2,lang:zig,libs:!(),options:'-O+ReleaseFast+-mcpu%3Dznver5+-target+x86_64-linux+-fomit-frame-pointer',overrides:!(),selection:(endColumn:1,endLineNumber:1,positionColumn:1,positionLineNumber:1,selectionStartColumn:1,selectionStartLineNumber:1,startColumn:1,startLineNumber:1),source:1),l:'5',n:'0',o:'+zig+trunk+(Editor+%231)',t:'0')),k:50.75732612863623,l:'4',m:100,n:'0',o:'',s:0,t:'0')),l:'2',n:'0',o:'',t:'0')),version:4)
[LLVM Godbolt](https://llvm.godbo.lt/#z:OYLghAFBqd5QCxAYwPYBMCmBRdBLAF1QCcAaPECAMzwBtMA7AQwFtMQByARg9KtQYEAysib0QXACx8BBAKoBnTAAUAHpwAMvAFYTStJg1C1aANxakl9ZATwDKjdAGFUtAK4sGexwBk8DTAA5DwAjTGIQAGYuUgAHVAVCOwYXd089eMTbAT8A4JYwiOjLTGtshiECJmICVI8vGKtMG2TK6oJcoNDwqMaqmrr0vvbO/MLegEpLVDdiZHYOLBoAgGp0BVQAfVpUUVoV4kNgTAg8AA4VgFoYla4JlYBSSKcANkkV1RXzp%2BwVgAF%2BKgIE9Xu9Pt9Ir8HgAmACsGnuOz2mzcDGYbHQmyY6HQxEeAHYAEIPDQAQWwgmIAE8QCTSStHnCuI9IgARFYKBD7EFvD5fM4/Rnw0gc2IGAgrU4XWETOkMmGw6Es9lMTl4nlg/mChU3BRipgSqUrfGyskM4iYCUavkQqFw6F0h741mOslLfyYNYbba7MQHI4nc5XG53Fmgm0CyH/ELVYHPXngyN2%2BGI320FFo1iYTHY3EE4lkikEam0s1C5lPFUMdBhhNaqMKjQivXiyVBySm%2BlCgB0DFQVrZX2QLFiK1W1sT2qZIoAXuFUP4kmI8HPiHKhUrKxzMKoB%2BHwVwp7De/2VkRa5rbeuLXu67aN47na7Se7Vustki/RP678AahUDGxBxvuP5CgiKyfumqLotmWI4uqRJ0kWJbrjqypnkwdArHs3LxpeSb/CY5jdqoZwvN2pjAFQ0KxGcTBUMsmAAI4hN2sJcNCwF3gRjYit%2B94tgabYXB2IpBokwABLuKwIteloXhGR6HmSTouipbrNAYFoKZOUZ/ERLAkWRFFUTRdEMR6LFsRxXH4T8fF4YpkJiRceAsCw1TAPcMLRM%2Br6eu%2BPp7DpoF/DGM62U5yZNiF946qmyLQVmObwfmSGUjSqH2uhtBqrFPFwjFgmGkG0KdvKcK9pgwDoYYNb8QVirNvqJUXHcWWwpEtU4vlR5VcAfFMnJt52Q2cKRI%2BamkhwUy0JwsK8F4HBaKQqCcD4PgAGoALIrAAkgAShyMxzJ6Pk8KQBCaDNUwANYgPC3b4i8ZzQhokQaPi0JnBx0IAJz6JwkiLddq2cLwCggE2V3LTNpBwLASBoCOdDhOQlDI7EqMRCYNjEKit18HQBDhJDEAhKDIT%2BNUNLcLwVPMNSADyITaM0MMXcjbCCEzDC0LTK1YCEbjAE4Yi5ZwF1YB5RjiLDpD4BaLSmJgkPyzuzRuCTku8P4JNzfLtB4CEhzUi4WCg8Wbk66QKvECECSYKymAy8ARtGNdUxUAYwAKJteCYAA7kzsSMDb/CCCIYjsFIMiCIoKjqPLugxAYHsGfoxuQ5AUyoLE5Rq5cTNdZcLDILEbiVjODB2wtq128QeBYNnEBTE0LT2BAjiDA0pC%2BB6Yw9DEmRJAIPcZAko8MKM3QRI0pTs60/S1K49R6O35RtDUM8FEPljL%2BPwzbwPs8SG3J3zGfgMcHXS0rWtHAQbQyCSsWBP3BAuCECQjLRBMvAwy0BMO6IBJB/W7NCaI%2BI/ovD%2BtCfEXAzj4gQdfYGpAWASA0E2O%2BvAH4QyhpdT2s1ODQhBvLPBhDYbANtqTZIoCgA)
```zig
const V = @Vector(64, u8);
const Vi = @Vector(64, i8);

export fn foo(x: V) V {
    return @bitCast(
        @as(Vi, @bitCast(x << @splat(5))) >> @splat(7)
    );
}

export fn bar(x: V) V {
    return @select(u8,
 (x & @as(V, @splat(0b100))) != @as(V, @splat(0)),
        @as(V, @splat(0xFF)),
        @as(V, @splat(0)),
 );
}
```

```asm
.LCPI0_1:
        .byte   4
        .byte   2
 .byte   1
        .byte   0
        .byte   0
        .byte   0
 .byte   0
        .byte   0
foo:
        vgf2p8affineqb  zmm0, zmm0, qword ptr [rip + .LCPI0_1]{1to8}, 0
        vpmovb2m        k0, zmm0
 vpmovm2b        zmm0, k0
        ret

.LCPI1_0:
        .zero 64,4
bar:
        vptestmb        k0, zmm0, zmmword ptr [rip + .LCPI1_0]
 vpmovm2b        zmm0, k0
        ret
```

Both of these could be:

```asm
.LCPI2_1:
        .zero   8,4
foobar:
 vgf2p8affineqb  zmm0, zmm0, qword ptr [rip + .LCPI2_1]{1to8}, 0
 ret
```

In real code, we often want to increment based on a condition which is effectively a bit-test:

```zig
export fn baz(x: V, y: V) V {
 return y -% foo(x);
}
```

We get:

```asm
.LCPI3_2:
        .byte 0
        .byte   0
        .byte   128
        .byte   64
        .byte 32
        .byte   16
        .byte   8
        .byte   4
.LCPI3_3:
 .zero   4,1
baz:
        vgf2p8affineqb  zmm0, zmm0, qword ptr [rip + .LCPI3_2]{1to8}, 0
        vpandd  zmm0, zmm0, dword ptr [rip + .LCPI3_3]{1to16}
        vpaddb  zmm0, zmm0, zmm1
        ret
```

Could be:

```asm
.LCPI3_2:
        .byte   0
        .byte   0
        .byte 0
        .byte   0
        .byte   0
        .byte   0
        .byte 0
        .byte   4
baz:
        vgf2p8affineqb  zmm0, zmm0, qword ptr [rip + .LCPI3_2]{1to8}, 0
        vpaddb  zmm0, zmm0, zmm1
 ret
```

Or:

```asm
.LCPI3_2:
        .zero   8,4
baz:
 vgf2p8affineqb  zmm0, zmm0, qword ptr [rip + .LCPI3_2]{1to8}, 0
 vpsubb  zmm0, zmm1, zmm0
        ret
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to