Issue 86784
Summary WRONG code by SLP vectorizer
Labels new issue
Assignees
Reporter JonPsson1
    This program should print '2':

```
cat wrong0.i

int printf(const char *, ...);

int Res = 0, A = 3050;  // 3050 = 1011 1110 1010 
int *Aptr = &A;
long Arr[8] = {614, 614, 614, 614, 614, 614, 614, 614}; // 614 = 10 0110 0110             
unsigned char C = 0, C2 = 0;

unsigned char fun(unsigned ui1, unsigned ui2) { return (ui1 + ui2) & 0xff; }

void crc32_byte(char b) { Res = b; }

int main() {
  for (short IV = 0; IV <= 7; IV += 1) {
    *Aptr &= Arr[IV];  // 10 0110 0010         
 C ^= fun(C2 <= IV, Arr[IV]);
  }
  crc32_byte(A >> 8);  // 0b10   
 printf("checksum = %X\n", Res);
}
```
A (Aptr) is AND:ed in each iteration with 614 in 32 bits, which gives 0b10 in the second byte. SLPVectorizer however performs the AND in only 1 byte and then extends that result to 32 bits, and as a consequence loses the second byte and prints '0':
```

; Before SLP 
define dso_local noundef signext i32 @main() local_unnamed_addr #2 {
entry:
  %0 = load ptr, ptr @Aptr, align 8, !tbaa !8
  %1 = load i8, ptr @C2, align 2, !tbaa !10
  %.promoted = load i32, ptr %0, align 4, !tbaa !4
  %C.promoted = load i8, ptr @C, align 2, !tbaa !10
  %2 = load i64, ptr @Arr, align 8, !tbaa !11
  %3 = trunc i64 %2 to i32
 %conv3 = and i32 %.promoted, %3
  %cmp6 = icmp eq i8 %1, 0
  %conv7 = zext i1 %cmp6 to i32
  %add.i = add i32 %conv7, %3
  %conv.i23 = trunc i32 %add.i to i8
  %xor22 = xor i8 %C.promoted, %conv.i23
...

; *** IR Dump After SLPVectorizerPass on main *** 
define dso_local noundef signext i32 @main() local_unnamed_addr #2 {
entry:
  %0 = load ptr, ptr @Aptr, align 8, !tbaa !8
  %1 = load i8, ptr @C2, align 2, !tbaa !10
  %.promoted = load i32, ptr %0, align 4, !tbaa !4
  %C.promoted = load i8, ptr @C, align 2, !tbaa !10
  %2 = load <8 x i64>, ptr @Arr, align 8, !tbaa !11
  %3 = trunc <8 x i64> %2 to <8 x i8>
  %4 = extractelement <8 x i8> %3, i32 0
  %5 = sext i8 %4 to i32
  %conv3 = and i32 %.promoted, %5
 ...


```

```
clang -O0 -target s390x-linux-gnu -march=z16 wrong0.i -o a.out; ./a.out 
checksum = 2

clang -O3 -target s390x-linux-gnu -march=z16 wrong0.i -o a.out; ./a.out 
checksum = 0

clang -O3 -target s390x-linux-gnu -march=z16 wrong0.i -o a.out -mllvm -slp-vectorize-hor=false; ./a.out
checksum = 2
```
@alexey-bataev @patrick-rivos @nikic 
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to