Issue 66191
Summary [X86] Poor movmskps bit extraction
Labels backend:X86, llvm:SelectionDAG
Assignees
Reporter RKSimon
    ```c
#include <x86intrin.h>
void func();
void scatter_mask_vector_i32_epi32(__m128 mask) {
	if (_mm_movemask_ps(mask) & 0x8)
		func();
}
```
is optimized to the following ir:
```ll
define void @scatter_mask_vector_i32_epi32(<4 x float> noundef %mask) {
entry:
  %0 = bitcast <4 x float> %mask to <4 x i32>
  %1 = icmp slt <4 x i32> %0, zeroinitializer
  %2 = bitcast <4 x i1> %1 to i4
  %tobool.not = icmp sgt i4 %2, -1
  br i1 %tobool.not, label %if.end, label %if.then

if.then:
  tail call void @func()()
  br label %if.end

if.end:
  ret void
}

declare void @func()() local_unnamed_addr
```
```asm
scatter_mask_vector_i32_epi32:
 vmovmskps %xmm0, %eax
  shlb $4, %al
  sarb $4, %al
  js func()@PLT # TAILCALL
  retq
```
Changing the bit test to a lower bit in the mask creates much simpler code:
```c
#include <x86intrin.h>
void func();
void scatter_mask_vector_i32_epi32(__m128 mask) {
	if (_mm_movemask_ps(mask) & 0x2)
		func();
}
```
```ll
define void @scatter_mask_vector_i32_epi32(<4 x float> noundef %mask) {
entry:
  %0 = bitcast <4 x float> %mask to <4 x i32>
  %1 = icmp slt <4 x i32> %0, zeroinitializer
  %2 = bitcast <4 x i1> %1 to i4
  %3 = and i4 %2, 2
  %tobool.not = icmp eq i4 %3, 0
  br i1 %tobool.not, label %if.end, label %if.then

if.then:
  tail call void @func()()
  br label %if.end

if.end:
  ret void
}

declare void @func()() local_unnamed_addr #1
```
```asm
scatter_mask_vector_i32_epi32:
  vmovmskps %xmm0, %eax
  testb $2, %al
  jne func()@PLT # TAILCALL
 retq
```

With suitable value tracking we should be able to convert the i4 sext_inreg into a signbit test on the source mask value.
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to