Issue 175305
Summary [AVX-512] SIMD 32-bit division by constants should use `vpmadd52huq`
Labels new issue
Assignees
Reporter Validark
    [Zig Godbolt](https://zig.godbo.lt/#z:OYLghAFBqd5QCxAYwPYBMCmBRdBLAF1QCcAaPECAMzwBtMA7AQwFtMQByARg9KtQYEAysib0QAJgAMfAQQCqAZ0wAFAB6cpvAFYgupWkwagAXnmCll9ZATwDKjdAGFUtAK4sGe0o4AyeBkwAOQ8AI0xiEABWABZSAAdURUI7Bhd3T29E5NsBf0CQlnDI2MtMa1yGIQImYgJ0jy99K0wbVOragnzgsIjouMUauobM5qGugJ6ivtiASktUN2Jkdg40BkGAakH0TYBSAGYAEU2AATwWRLqIPYkJHduJWcOAIT2pAEF1rdC3OlsGPtjmcLlcCDc7r9/gFHs8Dm9PvE3KFNt8CJsACofABKAHFsBiAPoACQ%2BQkJHwAagANQlRLgSABiQJOUNoAIAdENgJgCBzkEiObVkAgWYcThy1AAOABshJlMU2Rl2bM5ZmAhNCTGQAGtHJswGBxZsOYMmDyJITpXKFUqGLsdhyMbUeXzrRyqJgmAQlpghLziUxFBBVQEuS7efzBZ7vb7FKQTUwAG5qekSKhwhEfJEotGYnH4omk8lU2lpgCSjIAsh8Wfm8QSSWSKTS6QzmcrtgR0E6I27ZR6vT7iH6A0GQ392WHuZGBW5B7GR/HEym03gqCwmJn3l8BFtKXXTpTWkRiBApQm3Acnq8dzvMGowZsqID%2BKgILY2GbLiBNlenpsB57AA7FmmzgZsI7DoCpzjpSCanIo8SGOCn6YN%2B8SzLMmwAPQ7hBBFnHBCYciBYGERRBxSDI%2BEUYRsoxNRpC0XREHAZsABUmwMUxLGsZsXCKlxPE0Z8/EQVKUSibW4kCde0myZssQHHEfGscBMrARIzFiXRIFHNunz6XeiLIs%2BgKbug6BRBICAQBoZzHjYJDnpeCrzJsACev5HieLkXn%2B7nYb5zlngFbjufsoF8eumwQOcaSoJcaEQNhJAGmAWINkWzalm2EiVjW2FkWpEF5modISD5Tmnq5f42cVwKnAQxBuAwogEJg9mGTJFF5p5lXVX5YWXg1h4tW1HVdZ5PWsXmLBuLQhJuPE8R9I5w11W4Y3Gs1rXtd6XXxeOIW1eFXBSDEHkVWNXEDTtBzYIc2BnEhKEQA1s10VBSwWYty2rREt66eBxlGSDqJ7uiVCTYodaDK1NhReRFEPp1xCvjBjy0LQSYsJKA7JqmDIckm8SWdZtkcm4ACOHIMlKjzxTVLnaYFV0ISzZ5sxFHMbaFEA80FqJiLQ6xJhAQpuEQwVc4LblXcDvWEWjESY2c2O4/j7pE2mpPk0wVk2Qg1N0xIUQykzp0uXE7MedbZ627z9ty07wuiDj4uS0w0uoLLm1u4r8KleBqsY%2BZGt3DjeMEzKQqriTZMU8bpscmmVty%2BF7mc5tWd8w7W3u6LXtSzL/NnQrX2g8BRxK3xP3hzD7WKBypyazHOsJxI%2BvJ1TtNpwyVtvd6EBSNdCYzXXNcmR8D5Pi%2BmxameDn/v7At58V0UQ2gyUXJgmxJrUmwLVOyF4BEihDevld1qRW/KwRMRRFKDIAJxSk/OkPxB15RMBL93AOPoEOAlZRcBlFIGU79X5f1YlwMBECoFShgSA68BwDiSWAsBA4sC6LgJlFEV%2BEhNJSBQRDAi4D4HAUEhILgwDyEQToTEAh8DX5SBwXxYywdwbKxPrYM%2BF8gROEOMI4Ex1gwFyzvbYe4JPqbAALTl38m5Z4oFNgHGAgmGIAUpIJnpAmDRWj9CbBiDgkxBwooGTrhDBuFlDaUzsohZCI81AeT4XgARxBFA9TBh8Dg8xaCcCiLwLwHAtCkFQJwAAWuYbYixlj71uEA3gBBND%2BPmDqEAMRX4cgkDEYh2CpTUIODKDRF5AkcBiCEtJETOC8EvjIVJYT/GkDgLAJAO94h0AiOQSgnTumRBMBNBgOo%2BD/AvpQUINTQgBFqN5bgvAZnMGIJ5AA8qEbQJ5OA8FIDvNgghVkMFoPM8JWBfjACcKLS%2BCzSBYE3MYcQzTbl4BHG0JM6EakPlaNLVY4SAidQqeE2geBQjEDmS4LANSWoXG2bwd5xBQhJEwEcTA9zgDAuMGk%2BYVBDDAEUJSc%2BAB3VZa1Qk7P4IIEQYh2C0NkIIJQqgNBPN0NpQwmL1QGBBZfSA8xUDxEqNc%2BRqzNjYnKF6ZQjIgzonkSwOc4oTAMHhVEBRM50TWnlDEeRGK3BqAUfwFghB5FUDBWweRiR/kRF4KgeFxA8BYG5alMoFRUgOHtCMJoPh7TdEKMULISQUgCHdX6nIqRvW9EiM0MVbQBAdGGK4Ro3gWjRqqOMMN0wI2WHGEGsYnQ02%2Bq4PMRQ8SVh6ACUE6pTzIkcE2OqTYwydRxVwIQdKSSC0pKxRk6IUgOTAT/teGUDNSkygkDgipVTSAsD0ExUJ4Sq31JAI0jtBhOASArbOuppAmlaCwqQeFyR7AxCAA%3D%3D)
[LLVM Godbolt](https://llvm.godbo.lt/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:14,fontUsePx:'0',j:1,lang:llvm,selection:(endColumn:2,endLineNumber:7,positionColumn:1,positionLineNumber:1,selectionStartColumn:2,selectionStartLineNumber:7,startColumn:1,startLineNumber:1),source:'define+dso_local+range(i32+0,+14316558)+%3C8+x+i32%3E+@foo(i32+%250)+local_unnamed_addr+%7B%0AEntry:%0A++%251+%3D+insertelement+%3C1+x+i32%3E+poison,+i32+%250,+i64+0%0A++%252+%3D+shufflevector+%3C1+x+i32%3E+%251,+%3C1+x+i32%3E+poison,+%3C8+x+i32%3E+zeroinitializer%0A++%253+%3D+udiv+%3C8+x+i32%3E+%252,+%3Ci32+300,+i32+86400,+i32+604800,+i32+1209600,+i32+850,+i32+1320,+i32+5434,+i32+7672%3E%0A++ret+%3C8+x+i32%3E+%253%0A%7D%0A%0Adefine+dso_local+%3C8+x+i64%3E+@bar(i32+%250)+local_unnamed_addr+%7B%0AEntry:%0A++%251+%3D+zext+i32+%250+to+i64%0A++%252+%3D+insertelement+%3C1+x+i64%3E+poison,+i64+%251,+i64+0%0A++%253+%3D+shufflevector+%3C1+x+i64%3E+%252,+%3C1+x+i64%3E+poison,+%3c8+x+i32%3e+zeroinitializer%0a++%254+%3d+tail+call+fastcc+%3c8+x+i64%[email protected](%3C8+x+i64%3E+zeroinitializer,+%3C8+x+i64%3E+%253,+%3C8+x+i64%3E+%3Ci64+15011998760960,+i64+52124995696,+i64+7446427956,+i64+3723213978,+i64+10596705009664,+i64+3411817900032,+i64+828781676032,+i64+587017678336%3E)%0a++ret+%3c8+x+i64%3e+%254%0a%7d%0a%0adeclare+fastcc+%3c8+x+i64%[email protected](%3C8+x+i64%3E,+%3C8+x+i64%3E,+%3C8+x+i64%3E)+%231%0A'),l:'5',n:'0',o:'LLVM+IR+source+%231',t:'0')),k:50.76820307281229,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:llctrunk,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:14,fontUsePx:'0',j:1,lang:llvm,libs:!(),options:'-O3+-mcpu%3Dznver5',overrides:!(),selection:(endColumn:34,endLineNumber:47,positionColumn:34,positionLineNumber:47,selectionStartColumn:34,selectionStartLineNumber:47,startColumn:34,startLineNumber:47),source:1),l:'5',n:'0',o:'+llc+(trunk)+(Editor+%231)',t:'0')),k:49.23179692718771,l:'4',m:100,n:'0',o:'',s:0,t:'0')),l:'2',n:'0',o:'',t:'0')),version:4)

```llvm
define dso_local range(i32 0, 14316558) <8 x i32> @foo(i32 %0) local_unnamed_addr {
Entry:
  %1 = insertelement <1 x i32> poison, i32 %0, i64 0
  %2 = shufflevector <1 x i32> %1, <1 x i32> poison, <8 x i32> zeroinitializer
  %3 = udiv <8 x i32> %2, <i32 300, i32 86400, i32 604800, i32 1209600, i32 850, i32 1320, i32 5434, i32 7672>
  ret <8 x i32> %3
}
```

```zig
const V = @Vector(8, u32);

export fn foo(timestamp: u32) V {
    return @as(V, @splat(timestamp)) /
        @as(V, .{
            300,
 86400,
            7 * 86400,
            14 * 86400,
            850,
 1320,
            5434,
            7672,
 });
}
```

Currently results in:

```asm
.LCPI0_0:
        .long 3257812231
        .zero   4
        .long   1861606989
        .zero 4
        .long   1665926709
        .zero   4
        .long 1146518903
        .zero   4
.LCPI0_1:
        .long   0
        .long 0
        .long   0
        .long   0
        .long   1
        .long 0
        .long   0
        .long   0
.LCPI0_2:
        .long 458129845
        .long   3257812231
        .long   1861606989
 .long   1861606989
        .long   323385773
        .long   1665926709
 .long   1618714211
        .long   1146518903
.LCPI0_4:
        .long 5
        .long   16
        .long   18
        .long   19
 .long   5
        .long   9
        .long   11
        .long 11
.LCPI0_5:
        .byte   1
        .byte   9
        .byte   3
 .byte   11
        .byte   5
        .byte   13
        .byte   7
 .byte   15
foo:
        vpbroadcastd    ymm0, edi
        vpmuludq ymm1, ymm0, ymmword ptr [rip + .LCPI0_0]
        vpsrlvd ymm0, ymm0, ymmword ptr [rip + .LCPI0_1]
        vpmovsxbd       ymm2, qword ptr [rip + .LCPI0_5]
        vpmuludq        ymm0, ymm0, ymmword ptr [rip + .LCPI0_2]
 vpermi2d        ymm2, ymm0, ymm1
        vpsrlvd ymm0, ymm2, ymmword ptr [rip + .LCPI0_4]
        ret
```

Should be:

```asm
.LCPI1_0:
 .quad   15011998760960
        .quad   52124995696
        .quad 7446427956
        .quad   3723213978
        .quad   10596705009664
 .quad   3411817900032
        .quad   828781676032
        .quad 587017678336
bar:
        mov     eax, edi
        vpbroadcastq    zmm1, rax
        vpxor   xmm0, xmm0, xmm0
        vpmadd52huq     zmm0, zmm1, zmmword ptr [rip + .LCPI1_0]
        ret
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to