| Issue |
175305
|
| Summary |
[AVX-512] SIMD 32-bit division by constants should use `vpmadd52huq`
|
| Labels |
new issue
|
| Assignees |
|
| Reporter |
Validark
|
[Zig Godbolt](https://zig.godbo.lt/#z:OYLghAFBqd5QCxAYwPYBMCmBRdBLAF1QCcAaPECAMzwBtMA7AQwFtMQByARg9KtQYEAysib0QAJgAMfAQQCqAZ0wAFAB6cpvAFYgupWkwagAXnmCll9ZATwDKjdAGFUtAK4sGe0o4AyeBkwAOQ8AI0xiEABWABZSAAdURUI7Bhd3T29E5NsBf0CQlnDI2MtMa1yGIQImYgJ0jy99K0wbVOragnzgsIjouMUauobM5qGugJ6ivtiASktUN2Jkdg40BkGAakH0TYBSAGYAEU2AATwWRLqIPYkJHduJWcOAIT2pAEF1rdC3OlsGPtjmcLlcCDc7r9/gFHs8Dm9PvE3KFNt8CJsACofABKAHFsBiAPoACQ%2BQkJHwAagANQlRLgSABiQJOUNoAIAdENgJgCBzkEiObVkAgWYcThy1AAOABshJlMU2Rl2bM5ZmAhNCTGQAGtHJswGBxZsOYMmDyJITpXKFUqGLsdhyMbUeXzrRyqJgmAQlpghLziUxFBBVQEuS7efzBZ7vb7FKQTUwAG5qekSKhwhEfJEotGYnH4omk8lU2lpgCSjIAsh8Wfm8QSSWSKTS6QzmcrtgR0E6I27ZR6vT7iH6A0GQ392WHuZGBW5B7GR/HEym03gqCwmJn3l8BFtKXXTpTWkRiBApQm3Acnq8dzvMGowZsqID%2BKgILY2GbLiBNlenpsB57AA7FmmzgZsI7DoCpzjpSCanIo8SGOCn6YN%2B8SzLMmwAPQ7hBBFnHBCYciBYGERRBxSDI%2BEUYRsoxNRpC0XREHAZsABUmwMUxLGsZsXCKlxPE0Z8/EQVKUSibW4kCde0myZssQHHEfGscBMrARIzFiXRIFHNunz6XeiLIs%2BgKbug6BRBICAQBoZzHjYJDnpeCrzJsACev5HieLkXn%2B7nYb5zlngFbjufsoF8eumwQOcaSoJcaEQNhJAGmAWINkWzalm2EiVjW2FkWpEF5modISD5Tmnq5f42cVwKnAQxBuAwogEJg9mGTJFF5p5lXVX5YWXg1h4tW1HVdZ5PWsXmLBuLQhJuPE8R9I5w11W4Y3Gs1rXtd6XXxeOIW1eFXBSDEHkVWNXEDTtBzYIc2BnEhKEQA1s10VBSwWYty2rREt66eBxlGSDqJ7uiVCTYodaDK1NhReRFEPp1xCvjBjy0LQSYsJKA7JqmDIckm8SWdZtkcm4ACOHIMlKjzxTVLnaYFV0ISzZ5sxFHMbaFEA80FqJiLQ6xJhAQpuEQwVc4LblXcDvWEWjESY2c2O4/j7pE2mpPk0wVk2Qg1N0xIUQykzp0uXE7MedbZ627z9ty07wuiDj4uS0w0uoLLm1u4r8KleBqsY%2BZGt3DjeMEzKQqriTZMU8bpscmmVty%2BF7mc5tWd8w7W3u6LXtSzL/NnQrX2g8BRxK3xP3hzD7WKBypyazHOsJxI%2BvJ1TtNpwyVtvd6EBSNdCYzXXNcmR8D5Pi%2BmxameDn/v7At58V0UQ2gyUXJgmxJrUmwLVOyF4BEihDevld1qRW/KwRMRRFKDIAJxSk/OkPxB15RMBL93AOPoEOAlZRcBlFIGU79X5f1YlwMBECoFShgSA68BwDiSWAsBA4sC6LgJlFEV%2BEhNJSBQRDAi4D4HAUEhILgwDyEQToTEAh8DX5SBwXxYywdwbKxPrYM%2BF8gROEOMI4Ex1gwFyzvbYe4JPqbAALTl38m5Z4oFNgHGAgmGIAUpIJnpAmDRWj9CbBiDgkxBwooGTrhDBuFlDaUzsohZCI81AeT4XgARxBFA9TBh8Dg8xaCcCiLwLwHAtCkFQJwAAWuYbYixlj71uEA3gBBND%2BPmDqEAMRX4cgkDEYh2CpTUIODKDRF5AkcBiCEtJETOC8EvjIVJYT/GkDgLAJAO94h0AiOQSgnTumRBMBNBgOo%2BD/AvpQUINTQgBFqN5bgvAZnMGIJ5AA8qEbQJ5OA8FIDvNgghVkMFoPM8JWBfjACcKLS%2BCzSBYE3MYcQzTbl4BHG0JM6EakPlaNLVY4SAidQqeE2geBQjEDmS4LANSWoXG2bwd5xBQhJEwEcTA9zgDAuMGk%2BYVBDDAEUJSc%2BAB3VZa1Qk7P4IIEQYh2C0NkIIJQqgNBPN0NpQwmL1QGBBZfSA8xUDxEqNc%2BRqzNjYnKF6ZQjIgzonkSwOc4oTAMHhVEBRM50TWnlDEeRGK3BqAUfwFghB5FUDBWweRiR/kRF4KgeFxA8BYG5alMoFRUgOHtCMJoPh7TdEKMULISQUgCHdX6nIqRvW9EiM0MVbQBAdGGK4Ro3gWjRqqOMMN0wI2WHGEGsYnQ02%2Bq4PMRQ8SVh6ACUE6pTzIkcE2OqTYwydRxVwIQdKSSC0pKxRk6IUgOTAT/teGUDNSkygkDgipVTSAsD0ExUJ4Sq31JAI0jtBhOASArbOuppAmlaCwqQeFyR7AxCAA%3D%3D)
[LLVM Godbolt](https://llvm.godbo.lt/#g:!((g:!((g:!((h:codeEditor,i:(filename:'1',fontScale:14,fontUsePx:'0',j:1,lang:llvm,selection:(endColumn:2,endLineNumber:7,positionColumn:1,positionLineNumber:1,selectionStartColumn:2,selectionStartLineNumber:7,startColumn:1,startLineNumber:1),source:'define+dso_local+range(i32+0,+14316558)+%3C8+x+i32%3E+@foo(i32+%250)+local_unnamed_addr+%7B%0AEntry:%0A++%251+%3D+insertelement+%3C1+x+i32%3E+poison,+i32+%250,+i64+0%0A++%252+%3D+shufflevector+%3C1+x+i32%3E+%251,+%3C1+x+i32%3E+poison,+%3C8+x+i32%3E+zeroinitializer%0A++%253+%3D+udiv+%3C8+x+i32%3E+%252,+%3Ci32+300,+i32+86400,+i32+604800,+i32+1209600,+i32+850,+i32+1320,+i32+5434,+i32+7672%3E%0A++ret+%3C8+x+i32%3E+%253%0A%7D%0A%0Adefine+dso_local+%3C8+x+i64%3E+@bar(i32+%250)+local_unnamed_addr+%7B%0AEntry:%0A++%251+%3D+zext+i32+%250+to+i64%0A++%252+%3D+insertelement+%3C1+x+i64%3E+poison,+i64+%251,+i64+0%0A++%253+%3D+shufflevector+%3C1+x+i64%3E+%252,+%3C1+x+i64%3E+poison,+%3c8+x+i32%3e+zeroinitializer%0a++%254+%3d+tail+call+fastcc+%3c8+x+i64%[email protected](%3C8+x+i64%3E+zeroinitializer,+%3C8+x+i64%3E+%253,+%3C8+x+i64%3E+%3Ci64+15011998760960,+i64+52124995696,+i64+7446427956,+i64+3723213978,+i64+10596705009664,+i64+3411817900032,+i64+828781676032,+i64+587017678336%3E)%0a++ret+%3c8+x+i64%3e+%254%0a%7d%0a%0adeclare+fastcc+%3c8+x+i64%[email protected](%3C8+x+i64%3E,+%3C8+x+i64%3E,+%3C8+x+i64%3E)+%231%0A'),l:'5',n:'0',o:'LLVM+IR+source+%231',t:'0')),k:50.76820307281229,l:'4',n:'0',o:'',s:0,t:'0'),(g:!((h:compiler,i:(compiler:llctrunk,filters:(b:'0',binary:'1',binaryObject:'1',commentOnly:'0',debugCalls:'1',demangle:'0',directives:'0',execute:'1',intel:'0',libraryCode:'0',trim:'1',verboseDemangling:'0'),flagsViewOpen:'1',fontScale:14,fontUsePx:'0',j:1,lang:llvm,libs:!(),options:'-O3+-mcpu%3Dznver5',overrides:!(),selection:(endColumn:34,endLineNumber:47,positionColumn:34,positionLineNumber:47,selectionStartColumn:34,selectionStartLineNumber:47,startColumn:34,startLineNumber:47),source:1),l:'5',n:'0',o:'+llc+(trunk)+(Editor+%231)',t:'0')),k:49.23179692718771,l:'4',m:100,n:'0',o:'',s:0,t:'0')),l:'2',n:'0',o:'',t:'0')),version:4)
```llvm
define dso_local range(i32 0, 14316558) <8 x i32> @foo(i32 %0) local_unnamed_addr {
Entry:
%1 = insertelement <1 x i32> poison, i32 %0, i64 0
%2 = shufflevector <1 x i32> %1, <1 x i32> poison, <8 x i32> zeroinitializer
%3 = udiv <8 x i32> %2, <i32 300, i32 86400, i32 604800, i32 1209600, i32 850, i32 1320, i32 5434, i32 7672>
ret <8 x i32> %3
}
```
```zig
const V = @Vector(8, u32);
export fn foo(timestamp: u32) V {
return @as(V, @splat(timestamp)) /
@as(V, .{
300,
86400,
7 * 86400,
14 * 86400,
850,
1320,
5434,
7672,
});
}
```
Currently results in:
```asm
.LCPI0_0:
.long 3257812231
.zero 4
.long 1861606989
.zero 4
.long 1665926709
.zero 4
.long 1146518903
.zero 4
.LCPI0_1:
.long 0
.long 0
.long 0
.long 0
.long 1
.long 0
.long 0
.long 0
.LCPI0_2:
.long 458129845
.long 3257812231
.long 1861606989
.long 1861606989
.long 323385773
.long 1665926709
.long 1618714211
.long 1146518903
.LCPI0_4:
.long 5
.long 16
.long 18
.long 19
.long 5
.long 9
.long 11
.long 11
.LCPI0_5:
.byte 1
.byte 9
.byte 3
.byte 11
.byte 5
.byte 13
.byte 7
.byte 15
foo:
vpbroadcastd ymm0, edi
vpmuludq ymm1, ymm0, ymmword ptr [rip + .LCPI0_0]
vpsrlvd ymm0, ymm0, ymmword ptr [rip + .LCPI0_1]
vpmovsxbd ymm2, qword ptr [rip + .LCPI0_5]
vpmuludq ymm0, ymm0, ymmword ptr [rip + .LCPI0_2]
vpermi2d ymm2, ymm0, ymm1
vpsrlvd ymm0, ymm2, ymmword ptr [rip + .LCPI0_4]
ret
```
Should be:
```asm
.LCPI1_0:
.quad 15011998760960
.quad 52124995696
.quad 7446427956
.quad 3723213978
.quad 10596705009664
.quad 3411817900032
.quad 828781676032
.quad 587017678336
bar:
mov eax, edi
vpbroadcastq zmm1, rax
vpxor xmm0, xmm0, xmm0
vpmadd52huq zmm0, zmm1, zmmword ptr [rip + .LCPI1_0]
ret
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs