https://github.com/akadutta updated 
https://github.com/llvm/llvm-project/pull/176414

>From 02783dd2e1679984611cc62c588e4b8e25b91223 Mon Sep 17 00:00:00 2001
From: akadutta <[email protected]>
Date: Fri, 16 Jan 2026 09:14:20 -0600
Subject: [PATCH 1/2] Preserve multi-dimensional array structure in GEP
 optimization

---
 .../InstCombine/InstructionCombining.cpp      | 96 +++++++++++--------
 .../InstCombine/canonicalize-gep-constglob.ll |  4 +-
 llvm/test/Transforms/InstCombine/strcmp-3.ll  |  6 +-
 llvm/test/Transforms/InstCombine/strlen-8.ll  | 10 +-
 4 files changed, 67 insertions(+), 49 deletions(-)

diff --git a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp 
b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
index d24db3de8f7b3..0daf76227418b 100644
--- a/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
+++ b/llvm/lib/Transforms/InstCombine/InstructionCombining.cpp
@@ -2921,12 +2921,21 @@ Instruction 
*InstCombinerImpl::visitGEPOfGEP(GetElementPtrInst &GEP,
   Indices.append(GEP.op_begin() + 2, GEP.op_end());
 
   // Don't create GEPs with more than one non-zero index.
-  unsigned NumNonZeroIndices = count_if(Indices, [](Value *Idx) {
-    auto *C = dyn_cast<Constant>(Idx);
-    return !C || !C->isNullValue();
-  });
-  if (NumNonZeroIndices > 1)
-    return nullptr;
+  // Exception: For AMDGPU, preserve multi-dimensional array structure for
+  // better backend optimization (memory coalescing, vectorization). Check if
+  // the source element type is a multi-dimensional array.
+  Type *GEPSrcElemTy = GEP.getSourceElementType();
+  bool IsMultiDimArray_Strip = GEPSrcElemTy->isArrayTy() &&
+                               
GEPSrcElemTy->getArrayElementType()->isArrayTy();
+
+  if (!IsMultiDimArray_Strip) {
+    unsigned NumNonZeroIndices = count_if(Indices, [](Value *Idx) {
+      auto *C = dyn_cast<Constant>(Idx);
+      return !C || !C->isNullValue();
+    });
+    if (NumNonZeroIndices > 1)
+      return nullptr;
+  }
 
   return replaceInstUsesWith(
       GEP, Builder.CreateGEP(
@@ -3364,17 +3373,24 @@ Instruction 
*InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
                                drop_end(Indices), "", GEP.getNoWrapFlags()));
   }
 
-  // Strip leading zero indices.
-  auto *FirstIdx = dyn_cast<Constant>(Indices.front());
-  if (FirstIdx && FirstIdx->isNullValue() &&
-      !FirstIdx->getType()->isVectorTy()) {
-    gep_type_iterator GTI = gep_type_begin(GEP);
-    ++GTI;
-    if (!GTI.isStruct())
-      return replaceInstUsesWith(GEP, Builder.CreateGEP(GTI.getIndexedType(),
-                                                        
GEP.getPointerOperand(),
-                                                        drop_begin(Indices), 
"",
-                                                        GEP.getNoWrapFlags()));
+  // Strip leading zero indices (except for multi-dimensional arrays).
+  // Preserve structure for better backend optimization.
+  Type *GEPSrcElemTy = GEP.getSourceElementType();
+  bool IsMultiDimArray_Strip = GEPSrcElemTy->isArrayTy() &&
+                               
GEPSrcElemTy->getArrayElementType()->isArrayTy();
+
+  if (!IsMultiDimArray_Strip) {
+    auto *FirstIdx = dyn_cast<Constant>(Indices.front());
+    if (FirstIdx && FirstIdx->isNullValue() &&
+        !FirstIdx->getType()->isVectorTy()) {
+      gep_type_iterator GTI = gep_type_begin(GEP);
+      ++GTI;
+      if (!GTI.isStruct())
+        return replaceInstUsesWith(GEP, Builder.CreateGEP(GTI.getIndexedType(),
+                                                          
GEP.getPointerOperand(),
+                                                          drop_begin(Indices), 
"",
+                                                          
GEP.getNoWrapFlags()));
+    }
   }
 
   // Scalarize vector operands; prefer splat-of-gep.as canonical form.
@@ -3403,29 +3419,33 @@ Instruction 
*InstCombinerImpl::visitGetElementPtrInst(GetElementPtrInst &GEP) {
     return replaceInstUsesWith(GEP, Res);
   }
 
-  bool SeenNonZeroIndex = false;
-  for (auto [IdxNum, Idx] : enumerate(Indices)) {
-    auto *C = dyn_cast<Constant>(Idx);
-    if (C && C->isNullValue())
-      continue;
+  // GEP has multiple non-zero indices: Split it (except for multi-dim arrays).
+  // Preserve structure for better backend optimization.
+  if (!IsMultiDimArray_Strip) {
+    bool SeenNonZeroIndex = false;
+    for (auto [IdxNum, Idx] : enumerate(Indices)) {
+      auto *C = dyn_cast<Constant>(Idx);
+      if (C && C->isNullValue())
+        continue;
 
-    if (!SeenNonZeroIndex) {
-      SeenNonZeroIndex = true;
-      continue;
-    }
+      if (!SeenNonZeroIndex) {
+        SeenNonZeroIndex = true;
+        continue;
+      }
 
-    // GEP has multiple non-zero indices: Split it.
-    ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(IdxNum);
-    Value *FrontGEP =
-        Builder.CreateGEP(GEPEltType, PtrOp, FrontIndices,
-                          GEP.getName() + ".split", GEP.getNoWrapFlags());
-
-    SmallVector<Value *> BackIndices;
-    BackIndices.push_back(Constant::getNullValue(NewScalarIndexTy));
-    append_range(BackIndices, drop_begin(Indices, IdxNum));
-    return GetElementPtrInst::Create(
-        GetElementPtrInst::getIndexedType(GEPEltType, FrontIndices), FrontGEP,
-        BackIndices, GEP.getNoWrapFlags());
+      // GEP has multiple non-zero indices: Split it.
+      ArrayRef<Value *> FrontIndices = ArrayRef(Indices).take_front(IdxNum);
+      Value *FrontGEP =
+          Builder.CreateGEP(GEPEltType, PtrOp, FrontIndices,
+                            GEP.getName() + ".split", GEP.getNoWrapFlags());
+
+      SmallVector<Value *> BackIndices;
+      BackIndices.push_back(Constant::getNullValue(NewScalarIndexTy));
+      append_range(BackIndices, drop_begin(Indices, IdxNum));
+      return GetElementPtrInst::Create(
+          GetElementPtrInst::getIndexedType(GEPEltType, FrontIndices), 
FrontGEP,
+          BackIndices, GEP.getNoWrapFlags());
+    }
   }
 
   // Check to see if the inputs to the PHI node are getelementptr instructions.
diff --git a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll 
b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
index 129da3f9110ad..6d238ae497d07 100644
--- a/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
+++ b/llvm/test/Transforms/InstCombine/canonicalize-gep-constglob.ll
@@ -35,9 +35,7 @@ define ptr @xzy(i64 %x, i64 %y, i64 %z) {
 ; CHECK-LABEL: define ptr @xzy(
 ; CHECK-SAME: i64 [[X:%.*]], i64 [[Y:%.*]], i64 [[Z:%.*]]) {
 ; CHECK-NEXT:  entry:
-; CHECK-NEXT:    [[GEP_SPLIT:%.*]] = getelementptr inbounds [10 x [10 x i32]], 
ptr getelementptr inbounds nuw (i8, ptr @glob, i64 40), i64 [[X]]
-; CHECK-NEXT:    [[GEP_SPLIT1:%.*]] = getelementptr inbounds [10 x i32], ptr 
[[GEP_SPLIT]], i64 [[Z]]
-; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds i32, ptr [[GEP_SPLIT1]], 
i64 [[Y]]
+; CHECK-NEXT:    [[GEP:%.*]] = getelementptr inbounds [10 x [10 x [10 x 
i32]]], ptr getelementptr inbounds nuw (i8, ptr @glob, i64 40), i64 0, i64 
[[X]], i64 [[Z]], i64 [[Y]]
 ; CHECK-NEXT:    ret ptr [[GEP]]
 ;
 entry:
diff --git a/llvm/test/Transforms/InstCombine/strcmp-3.ll 
b/llvm/test/Transforms/InstCombine/strcmp-3.ll
index 72da736a0a9fd..2c4012b96e188 100644
--- a/llvm/test/Transforms/InstCombine/strcmp-3.ll
+++ b/llvm/test/Transforms/InstCombine/strcmp-3.ll
@@ -25,7 +25,7 @@ define i32 @fold_strcmp_a5i0_a5i1_to_0() {
 
 define i32 @call_strcmp_a5i0_a5iI(i64 %I) {
 ; CHECK-LABEL: @call_strcmp_a5i0_a5iI(
-; CHECK-NEXT:    [[Q:%.*]] = getelementptr [4 x i8], ptr @a5, i64 [[I:%.*]]
+; CHECK-NEXT:    [[Q:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5, i64 0, i64 
[[I:%.*]]
 ; CHECK-NEXT:    [[CMP:%.*]] = call i32 @strcmp(ptr noundef nonnull 
dereferenceable(4) @a5, ptr noundef nonnull dereferenceable(1) [[Q]])
 ; CHECK-NEXT:    ret i32 [[CMP]]
 ;
@@ -40,7 +40,7 @@ define i32 @call_strcmp_a5i0_a5iI(i64 %I) {
 
 define i32 @call_strcmp_a5iI_a5i0(i64 %I) {
 ; CHECK-LABEL: @call_strcmp_a5iI_a5i0(
-; CHECK-NEXT:    [[P:%.*]] = getelementptr [4 x i8], ptr @a5, i64 [[I:%.*]]
+; CHECK-NEXT:    [[P:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5, i64 0, i64 
[[I:%.*]]
 ; CHECK-NEXT:    [[CMP:%.*]] = call i32 @strcmp(ptr noundef nonnull 
dereferenceable(1) [[P]], ptr noundef nonnull dereferenceable(4) @a5)
 ; CHECK-NEXT:    ret i32 [[CMP]]
 ;
@@ -68,7 +68,7 @@ define i32 @fold_strcmp_a5i0_a5i1_p1_to_0() {
 
 define i32 @call_strcmp_a5i0_a5i1_pI(i64 %I) {
 ; CHECK-LABEL: @call_strcmp_a5i0_a5i1_pI(
-; CHECK-NEXT:    [[Q:%.*]] = getelementptr i8, ptr getelementptr inbounds nuw 
(i8, ptr @a5, i64 4), i64 [[I:%.*]]
+; CHECK-NEXT:    [[Q:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5, i64 0, i64 
1, i64 [[I:%.*]]
 ; CHECK-NEXT:    [[CMP:%.*]] = call i32 @strcmp(ptr noundef nonnull 
dereferenceable(4) @a5, ptr noundef nonnull dereferenceable(1) [[Q]])
 ; CHECK-NEXT:    ret i32 [[CMP]]
 ;
diff --git a/llvm/test/Transforms/InstCombine/strlen-8.ll 
b/llvm/test/Transforms/InstCombine/strlen-8.ll
index af12198069803..b4334ddd8f1ac 100644
--- a/llvm/test/Transforms/InstCombine/strlen-8.ll
+++ b/llvm/test/Transforms/InstCombine/strlen-8.ll
@@ -16,7 +16,7 @@ declare i64 @strlen(ptr)
 
 define i64 @fold_a5_4_i0_pI(i64 %I) {
 ; CHECK-LABEL: @fold_a5_4_i0_pI(
-; CHECK-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr @a5_4, i64 [[I:%.*]]
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5_4, i64 0, 
i64 0, i64 [[I:%.*]]
 ; CHECK-NEXT:    [[LEN:%.*]] = call i64 @strlen(ptr noundef nonnull 
dereferenceable(1) [[PTR]])
 ; CHECK-NEXT:    ret i64 [[LEN]]
 ;
@@ -30,7 +30,7 @@ define i64 @fold_a5_4_i0_pI(i64 %I) {
 
 define i64 @fold_a5_4_i1_pI(i64 %I) {
 ; CHECK-LABEL: @fold_a5_4_i1_pI(
-; CHECK-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr getelementptr inbounds 
nuw (i8, ptr @a5_4, i64 4), i64 [[I:%.*]]
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5_4, i64 0, 
i64 1, i64 [[I:%.*]]
 ; CHECK-NEXT:    [[LEN:%.*]] = call i64 @strlen(ptr noundef nonnull 
dereferenceable(1) [[PTR]])
 ; CHECK-NEXT:    ret i64 [[LEN]]
 ;
@@ -44,7 +44,7 @@ define i64 @fold_a5_4_i1_pI(i64 %I) {
 
 define i64 @fold_a5_4_i2_pI(i64 %I) {
 ; CHECK-LABEL: @fold_a5_4_i2_pI(
-; CHECK-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr getelementptr inbounds 
nuw (i8, ptr @a5_4, i64 8), i64 [[I:%.*]]
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5_4, i64 0, 
i64 2, i64 [[I:%.*]]
 ; CHECK-NEXT:    [[LEN:%.*]] = call i64 @strlen(ptr noundef nonnull 
dereferenceable(1) [[PTR]])
 ; CHECK-NEXT:    ret i64 [[LEN]]
 ;
@@ -58,7 +58,7 @@ define i64 @fold_a5_4_i2_pI(i64 %I) {
 
 define i64 @fold_a5_4_i3_pI_to_0(i64 %I) {
 ; CHECK-LABEL: @fold_a5_4_i3_pI_to_0(
-; CHECK-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr getelementptr inbounds 
nuw (i8, ptr @a5_4, i64 12), i64 [[I:%.*]]
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5_4, i64 0, 
i64 3, i64 [[I:%.*]]
 ; CHECK-NEXT:    [[LEN:%.*]] = call i64 @strlen(ptr noundef nonnull 
dereferenceable(1) [[PTR]])
 ; CHECK-NEXT:    ret i64 [[LEN]]
 ;
@@ -72,7 +72,7 @@ define i64 @fold_a5_4_i3_pI_to_0(i64 %I) {
 
 define i64 @fold_a5_4_i4_pI_to_0(i64 %I) {
 ; CHECK-LABEL: @fold_a5_4_i4_pI_to_0(
-; CHECK-NEXT:    [[PTR:%.*]] = getelementptr i8, ptr getelementptr inbounds 
nuw (i8, ptr @a5_4, i64 16), i64 [[I:%.*]]
+; CHECK-NEXT:    [[PTR:%.*]] = getelementptr [5 x [4 x i8]], ptr @a5_4, i64 0, 
i64 4, i64 [[I:%.*]]
 ; CHECK-NEXT:    [[LEN:%.*]] = call i64 @strlen(ptr noundef nonnull 
dereferenceable(1) [[PTR]])
 ; CHECK-NEXT:    ret i64 [[LEN]]
 ;

>From fe8abc6c745ab61d14d08ff22df1d16d9aa1fbfc Mon Sep 17 00:00:00 2001
From: akadutta <[email protected]>
Date: Fri, 16 Jan 2026 13:01:04 -0600
Subject: [PATCH 2/2] update clang test

---
 clang/test/CodeGen/union-tbaa1.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/clang/test/CodeGen/union-tbaa1.c b/clang/test/CodeGen/union-tbaa1.c
index 3f6ada5023f27..c512c011e588f 100644
--- a/clang/test/CodeGen/union-tbaa1.c
+++ b/clang/test/CodeGen/union-tbaa1.c
@@ -11,13 +11,13 @@ void bar(vect32 p[][2]);
 // CHECK-LABEL: define dso_local void @fred(
 // CHECK-SAME: i32 noundef [[NUM:%.*]], ptr noundef writeonly captures(none) 
initializes((0, 8)) [[VEC:%.*]], ptr noundef readonly captures(none) 
[[INDEX:%.*]], ptr noundef readonly captures(none) [[ARR:%.*]]) 
local_unnamed_addr #[[ATTR0:[0-9]+]] {
 // CHECK-NEXT:  [[ENTRY:.*:]]
-// CHECK-NEXT:    [[TMP:%.*]] = alloca [4 x [2 x %union.vect32]], align 8
+// CHECK-NEXT:    [[TMP:%.*]] = alloca [4 x [2 x [[UNION_VECT32:%.*]]]], align 
8
 // CHECK-NEXT:    call void @llvm.lifetime.start.p0(ptr nonnull [[TMP]]) 
#[[ATTR3:[0-9]+]]
 // CHECK-NEXT:    [[TMP0:%.*]] = load i32, ptr [[INDEX]], align 4, !tbaa 
[[TBAA2:![0-9]+]]
 // CHECK-NEXT:    [[ARRAYIDX:%.*]] = getelementptr inbounds [2 x i32], ptr 
[[ARR]], i32 [[TMP0]]
 // CHECK-NEXT:    [[TMP1:%.*]] = load i32, ptr [[ARRAYIDX]], align 4, !tbaa 
[[TBAA2]]
 // CHECK-NEXT:    [[MUL:%.*]] = mul i32 [[TMP1]], [[NUM]]
-// CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [2 x 
%union.vect32], ptr [[TMP]], i32 [[TMP0]]
+// CHECK-NEXT:    [[ARRAYIDX2:%.*]] = getelementptr inbounds [4 x [2 x 
[[UNION_VECT32]]]], ptr [[TMP]], i32 0, i32 [[TMP0]]
 // CHECK-NEXT:    store i32 [[MUL]], ptr [[ARRAYIDX2]], align 8, !tbaa 
[[TBAA6:![0-9]+]]
 // CHECK-NEXT:    [[ARRAYIDX5:%.*]] = getelementptr inbounds nuw i8, ptr 
[[ARRAYIDX]], i32 4
 // CHECK-NEXT:    [[TMP2:%.*]] = load i32, ptr [[ARRAYIDX5]], align 4, !tbaa 
[[TBAA2]]
@@ -27,7 +27,7 @@ void bar(vect32 p[][2]);
 // CHECK-NEXT:    [[TMP3:%.*]] = lshr i32 [[MUL]], 16
 // CHECK-NEXT:    store i32 [[TMP3]], ptr [[VEC]], align 4, !tbaa [[TBAA2]]
 // CHECK-NEXT:    [[TMP4:%.*]] = load i32, ptr [[INDEX]], align 4, !tbaa 
[[TBAA2]]
-// CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [2 x 
%union.vect32], ptr [[TMP]], i32 [[TMP4]]
+// CHECK-NEXT:    [[ARRAYIDX13:%.*]] = getelementptr inbounds [4 x [2 x 
[[UNION_VECT32]]]], ptr [[TMP]], i32 0, i32 [[TMP4]]
 // CHECK-NEXT:    [[ARRAYIDX15:%.*]] = getelementptr inbounds nuw i8, ptr 
[[ARRAYIDX13]], i32 6
 // CHECK-NEXT:    [[TMP5:%.*]] = load i16, ptr [[ARRAYIDX15]], align 2, !tbaa 
[[TBAA6]]
 // CHECK-NEXT:    [[CONV16:%.*]] = zext i16 [[TMP5]] to i32

_______________________________________________
cfe-commits mailing list
[email protected]
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to