Issue 56684
Summary [X86] Failure to reassociate PMULUDQ mul-by-constant pairs of nodes
Labels backend:X86, missed-optimization
Assignees
Reporter RKSimon
    Noticed while working on https://reviews.llvm.org/D129765
```
define <4 x i32> @test_urem_even_allones_and_poweroftwo_and_one(<4 x i32> %X) nounwind {
  %urem = urem <4 x i32> %X, <i32 14, i32 4294967295, i32 16, i32 1>
  %cmp = icmp eq <4 x i32> %urem, <i32 0, i32 0, i32 0, i32 0>
  %ret = zext <4 x i1> %cmp to <4 x i32>
  ret <4 x i32> %ret
}
```
llc -mtriple=x86_64--
```
.LCPI0_0:
        .long   4294967295                      # 0xffffffff
        .long   0                               # 0x0
        .long   0                               # 0x0
        .long   0                               # 0x0
.LCPI0_1:
        .long   1                               # 0x1
        .long   1                               # 0x1
        .long   1                               # 0x1
        .long   1                               # 0x1
.LCPI0_2:
        .long   3067833783                      # 0xb6db6db7
        .long   4294967295                      # 0xffffffff
        .long   1                               # 0x1
        .long   0                               # 0x0
.LCPI0_3:
        .long   2147483648                      # 0x80000000
        .zero   4
        .long   268435456                       # 0x10000000
        .zero   4
.LCPI0_4:
        .long   2147483648                      # 0x80000000
        .long   2147483648                      # 0x80000000
        .long   2147483648                      # 0x80000000
        .long   2147483648                      # 0x80000000
.LCPI0_5:
        .long   2454267026                      # 0x92492492
        .long   2147483649                      # 0x80000001
        .long   2415919103                      # 0x8fffffff
        .long   2147483647                      # 0x7fffffff
test_urem_even_allones_and_poweroftwo_and_one: # @test_urem_even_allones_and_poweroftwo_and_one
        pshufd  $245, %xmm0, %xmm1              # xmm1 = xmm0[1,1,3,3]
        pmuludq .LCPI0_0(%rip), %xmm1
        movdqa  .LCPI0_1(%rip), %xmm2           # xmm2 = [1,1,1,1]
        pmuludq %xmm2, %xmm1
        pshufd  $237, %xmm1, %xmm3              # xmm3 = xmm1[1,3,2,3]
        pmuludq .LCPI0_2(%rip), %xmm0
        pmuludq .LCPI0_3(%rip), %xmm0
        pshufd  $237, %xmm0, %xmm4              # xmm4 = xmm0[1,3,2,3]
        punpckldq       %xmm3, %xmm4            # xmm4 = xmm4[0],xmm3[0],xmm4[1],xmm3[1]
        pshufd  $232, %xmm1, %xmm1              # xmm1 = xmm1[0,2,2,3]
        pshufd  $232, %xmm0, %xmm0              # xmm0 = xmm0[0,2,2,3]
        punpckldq       %xmm1, %xmm0            # xmm0 = xmm0[0],xmm1[0],xmm0[1],xmm1[1]
        por     %xmm4, %xmm0
        pxor    .LCPI0_4(%rip), %xmm0
        pcmpgtd .LCPI0_5(%rip), %xmm0
        pandn   %xmm2, %xmm0
        retq
```

It should be possible to merge the pmuludq mul-by-constant pairs. Naturally we have to be careful due to the implicit zero-extension of the instruction, but in many of these cases at least one of the pairs of elements are multiply-by-one. PMULDQ possibly has similar cases but I haven't found any (the PMULUDQ cases appear due to a expansion of a mixture of v4i32 multiplies and rotates).

https://gcc.godbolt.org/z/nfq8K7sTT
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to