Issue 61615
Summary clang x86 missed optimization for 2 dimension array accessed through always zero enum class
Labels new issue
Assignees
Reporter AMS21
    Given the following code [godbolt](https://godbolt.org/z/16fv1xzTr)
```cpp
enum class E {
 A = 0,
};

constexpr int t[1][1]{{1}};

int f1(E a) {
 return t[static_cast<int>(a)][static_cast<int>(a)];
}

int f2(E a, E b) {
    return t[static_cast<int>(a)][static_cast<int>(b)];
}
```

clang-trunk generates this assembly:
```asm
f1(E):                                # @f1(E)
        movsxd  rax, edi
        lea     rcx, [rip + t]
 lea     rcx, [rcx + 4*rax]
        mov     eax, dword ptr [rcx + 4*rax]
        ret
f2(E, E):                              # @f2(E, E)
        movsxd  rax, edi
        movsxd  rcx, esi
        lea rdx, [rip + t]
        lea     rax, [rdx + 4*rax]
        mov     eax, dword ptr [rax + 4*rcx]
        ret
t:
        .long   1 # 0x1
```

and this IR:
```asm
@_ZL1t = internal unnamed_addr constant [1 x [1 x i32]] [[1 x i32] [i32 1]], align 4, !dbg !0

define dso_local noundef i32 @_Z2f11E(i32 noundef %0) local_unnamed_addr #0 !dbg !23 {
  call void @llvm.dbg.value(metadata i32 %0, metadata !27, metadata !DIExpression()), !dbg !28
  %2 = sext i32 %0 to i64, !dbg !29
  %3 = getelementptr inbounds [1 x [1 x i32]], ptr @_ZL1t, i64 0, i64 %2, i64 %2, !dbg !29
  %4 = load i32, ptr %3, align 4, !dbg !29, !tbaa !30
  ret i32 %4, !dbg !34
}

define dso_local noundef i32 @_Z2f21ES_(i32 noundef %0, i32 noundef %1) local_unnamed_addr #0 !dbg !35 {
  call void @llvm.dbg.value(metadata i32 %0, metadata !39, metadata !DIExpression()), !dbg !41
  call void @llvm.dbg.value(metadata i32 %1, metadata !40, metadata !DIExpression()), !dbg !41
  %3 = sext i32 %0 to i64, !dbg !42
  %4 = sext i32 %1 to i64, !dbg !42
  %5 = getelementptr inbounds [1 x [1 x i32]], ptr @_ZL1t, i64 0, i64 %3, i64 %4, !dbg !42
  %6 = load i32, ptr %5, align 4, !dbg !42, !tbaa !30
  ret i32 %6, !dbg !43
}

declare void @llvm.dbg.value(metadata, metadata, metadata) #1

attributes #0 = { mustprogress nofree norecurse nosync nounwind willreturn memory(none) uwtable "min-legal-vector-width"="0" "no-trapping-math"="true" "stack-protector-buffer-size"="8" "target-cpu"="x86-64" "target-features"="+cx8,+fxsr,+mmx,+sse,+sse2,+x87" "tune-cpu"="generic" }
attributes #1 = { nocallback nofree nosync nounwind speculatable willreturn memory(none) }

```

While gcc-trunk generates just:
```asm
f1(E):
        mov     eax, 1
 ret
f2(E, E):
        mov     eax, 1
 ret
```

Interestingly using just a plain enum without static_cast's clang generates the optimal code [godbolt](https://godbolt.org/z/YWW53Pv8z). But with the static_cast's it still doesn't [godbolt](https://godbolt.org/z/o35nrx966).

Using `__builtin_unreachable()` to explicitly tell the compiler that `a` and `b` are `E::A` works for `f1()` but strangely not for `f2()` [godbolt](https://godbolt.org/z/zosco43EW).
But using `__builtin_assume(...)` works for both [godbolt](https://godbolt.org/z/WvzP99TET).
Declaring the storage type of enum class `E` to `unsigned short` or `unsigned char` also fixes the problem [godbolt](https://godbolt.org/z/cW6eKdxG4) same goes for casting to `unsigned short` or `unsigned char` [godbolt](https://godbolt.org/z/8h3T8qhco).
_______________________________________________
llvm-bugs mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-bugs

Reply via email to