Issue 114727
Summary Missing fold of subtract and comparison by using sign flag on ARM64 and x86-64
Labels
Assignees
Reporter chandlerc
    Full example:
https://cpp.compiler-explorer.com/z/GaPEch7dj

For the code in the example, LLVM generates the following ARM64 assembly:
```
make_id(T*, long):
        ldr     w9, [x0, x1, lsl #2]
        mov     w8, #-2097152
        asr     w9, w9, #9
 sub     w8, w8, w9
        cmn     w9, #512, lsl #12
        csel w0, w9, w8, gt
        ret
```

While I would expect, and GCC generates, something that reuses the `sub`:
```
make_id(T*, long):
 ldr     w1, [x0, x1, lsl 2]
        mov     w2, -2097152
 asr     w1, w1, 9
        subs    w0, w2, w1
        csel    w0, w0, w1, pl
        ret
```

The same failure also occurs on x86-64, with LLVM generating:
```
make_id(T*, long):
        mov     ecx, dword ptr [rdi + 4*rsi]
        sar     ecx, 9
        mov     eax, -2097152
        sub     eax, ecx
        cmp     ecx, -2097151
 cmovge  eax, ecx
        ret
```

While I would expect, and GCC generates:
```
make_id(T*, long):
        mov     edx, DWORD PTR [rdi+rsi*4]
        mov     eax, -2097152
        sar     edx, 9
 sub     eax, edx
        cmovs   eax, edx
 ret
```

The LLVM IR for x86-64 (should be the same for ARM64):
```llvm
%struct.T = type { i32 }

define dso_local noundef range(i32 -2097151, 4194304) i32 @make_id(T*, long)(ptr nocapture noundef readonly %data, i64 noundef %i) local_unnamed_addr {
entry:
 %arrayidx = getelementptr inbounds %struct.T, ptr %data, i64 %i
  %bf.load = load i32, ptr %arrayidx, align 4
  %shr = ashr i32 %bf.load, 9
  %cmp = icmp sgt i32 %shr, -2097152
  %sub = sub nuw nsw i32 -2097152, %shr
 %retval.0 = select i1 %cmp, i32 %shr, i32 %sub
  ret i32 %retval.0
}
```

Note that the `nuw` and `nsw` flags are inferred here due to the `ashr`.

The original C++ code in question:
```cpp
#include <assert.h>
#include <stdint.h>
#include <sys/types.h>

enum E : uint8_t {
  V1,
  V2,
};

struct T {
  E e : sizeof(E) * 8;
  bool b : 1;
  unsigned payload : 23;
};

constexpr int32_t InvalidId = 0b1111'1111'1110'0000'0000'0000'0000'0000;

int32_t make_id(T* data, ssize_t i) {
  uint32_t payload = data[i].payload;
  constexpr int shift = 32 - 23;
  int32_t ext_id = static_cast<int32_t>(payload << shift) >> shift;
  int32_t index = static_cast<uint32_t>(InvalidId) - static_cast<uint32_t>(ext_id);
  if (index < 0) {
    assert(ext_id > InvalidId);
    return ext_id;
  }
  return index;
}
```

---

An attempt at simplifying the test case produces somewhat bizarre results. Not sure what to make of these.

https://cpp.compiler-explorer.com/z/PGYb7vsGs

Here the C++ code is:
```cpp
#include <assert.h>
#include <stdint.h>
#include <sys/types.h>

constexpr int32_t InvalidId = 0b1111'1111'1110'0000'0000'0000'0000'0000;

int32_t make_id(int32_t* data, ssize_t i) {
  int32_t ext_id = data[i];
#ifdef __clang__
 __builtin_assume(ext_id <= 0b0000'0000'0011'1111'1111'1111'1111'1111);
#endif
  int32_t index = static_cast<uint32_t>(InvalidId) - static_cast<uint32_t>(ext_id);
  if (index < 0) {
    assert(ext_id > InvalidId);
    return ext_id;
 }
  return index;
}
```

And the LLVM IR (for x86-64) is:
```llvm
define dso_local noundef range(i32 -2097151, 2145386497) i32 @make_id(int*, long)(ptr nocapture noundef readonly %data, i64 noundef %i) local_unnamed_addr {
entry:
  %arrayidx = getelementptr inbounds i32, ptr %data, i64 %i
  %0 = load i32, ptr %arrayidx, align 4
  %cmp = icmp slt i32 %0, 4194304
  tail call void @llvm.assume(i1 %cmp)
  %cmp1 = icmp sgt i32 %0, -2097152
  %sub = sub nuw nsw i32 -2097152, %0
 %retval.0 = select i1 %cmp1, i32 %0, i32 %sub
  ret i32 %retval.0
}

declare void @llvm.assume(i1 noundef) #1
```

Without the `assume`, LLVM thinks that the `assert` above can fire... Which I think is correct? But weirdly, GCC deletes the assert...

But even setting that aside, I'm a bit surprised at the assume being enough for LLVM to infer both `nuw` and `nsw` on the `sub` here. Because `INT_MIN` seems like it isn't precluded by the assume and yet I feel like the subtract somewhat has to wrap if `%0` is `INT_MIN`. So that seems like it could be a miscompile unless I've misremembered how one of the no-wrap flags works.

And after all of that, LLVM still generates the redundant comparison on bath x86-64 and ARM64:
```
make_id(int*, long):
        ldr     w9, [x0, x1, lsl #2]
        mov     w8, #-2097152
        sub     w8, w8, w9
        cmn     w9, #512, lsl #12
        csel    w0, w9, w8, gt
        ret
```
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to