| Issue |
61615
|
| Summary |
clang x86 missed optimization for 2 dimension array accessed through always zero enum class
|
| Labels |
new issue
|
| Assignees |
|
| Reporter |
AMS21
|
Given the following code [godbolt](https://godbolt.org/z/16fv1xzTr)
```cpp
enum class E {
A = 0,
};
constexpr int t[1][1]{{1}};
int f1(E a) {
return t[static_cast<int>(a)][static_cast<int>(a)];
}
int f2(E a, E b) {
return t[static_cast<int>(a)][static_cast<int>(b)];
}
```
clang-trunk generates this assembly:
```asm
f1(E): # @f1(E)
movsxd rax, edi
lea rcx, [rip + t]
lea rcx, [rcx + 4*rax]
mov eax, dword ptr [rcx + 4*rax]
ret
f2(E, E): # @f2(E, E)
movsxd rax, edi
movsxd rcx, esi
lea rdx, [rip + t]
lea rax, [rdx + 4*rax]
mov eax, dword ptr [rax + 4*rcx]
ret
t:
.long 1 # 0x1
```
and this IR:
```asm
@_ZL1t = internal unnamed_addr constant [1 x [1 x i32]] [[1 x i32] [i32 1]], align 4, !dbg !0
define dso_local noundef i32 @_Z2f11E(i32 noundef %0) local_unnamed_addr #0 !dbg !23 {
call void @llvm.dbg.value(metadata i32 %0, metadata !27, metadata !DIExpression()), !dbg !28
%2 = sext i32 %0 to i64, !dbg !29
%3 = getelementptr inbounds [1 x [1 x i32]], ptr @_ZL1t, i64 0, i64 %2, i64 %2, !dbg !29
%4 = load i32, ptr %3, align 4, !dbg !29, !tbaa !30
ret i32 %4, !dbg !34
}
define dso_local noundef i32 @_Z2f21ES_(i32 noundef %0, i32 noundef %1) local_unnamed_addr #0 !dbg !35 {
call void @llvm.dbg.value(metadata i32 %0, metadata !39, metadata !DIExpression()), !dbg !41
call void @llvm.dbg.value(metadata i32 %1, metadata !40, metadata !DIExpression()), !dbg !41
%3 = sext i32 %0 to i64, !dbg !42
%4 = sext i32 %1 to i64, !dbg !42
%5 = getelementptr inbounds [1 x [1 x i32]], ptr @_ZL1t, i64 0, i64 %3, i64 %4, !dbg !42
%6 = load i32, ptr %5, align 4, !dbg !42, !tbaa !30
ret i32 %6, !dbg !43
}
declare void @llvm.dbg.value(metadata, metadata, metadata) #1
attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }
```
While gcc-trunk generates just:
```asm
f1(E):
mov eax, 1
ret
f2(E, E):
mov eax, 1
ret
```
Interestingly using just a plain enum without static_cast's clang generates the optimal code [godbolt](https://godbolt.org/z/YWW53Pv8z). But with the static_cast's it still doesn't [godbolt](https://godbolt.org/z/o35nrx966).
Using `__builtin_unreachable()` to explicitly tell the compiler that `a` and `b` are `E::A` works for `f1()` but strangely not for `f2()` [godbolt](https://godbolt.org/z/zosco43EW).
But using `__builtin_assume(...)` works for both [godbolt](https://godbolt.org/z/WvzP99TET).
Declaring the storage type of enum class `E` to `unsigned short` or `unsigned char` also fixes the problem [godbolt](https://godbolt.org/z/cW6eKdxG4) same goes for casting to `unsigned short` or `unsigned char` [godbolt](https://godbolt.org/z/8h3T8qhco).
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs