[clang] [CIR] Upstream pointer arithmetic support (PR #138041)

Andy Kaylor via cfe-commits Wed, 30 Apr 2025 16:54:07 -0700

https://github.com/andykaylor updated 
https://github.com/llvm/llvm-project/pull/138041


>From ae1fd50a03a0e5f7c0c40463dc6a07f981f79f66 Mon Sep 17 00:00:00 2001
From: Andy Kaylor <akay...@nvidia.com>
Date: Wed, 30 Apr 2025 11:10:53 -0700
Subject: [PATCH 1/3] [CIR] Upstream pointer arithmetic support

This adds support for explicit pointer arithmetic.
---
 clang/lib/CIR/CodeGen/CIRGenBuilder.h      |  9 +++
 clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 91 ++++++++++++++++++++--
 clang/test/CIR/CodeGen/pointers.cpp        | 73 +++++++++++++++++
 3 files changed, 168 insertions(+), 5 deletions(-)
 create mode 100644 clang/test/CIR/CodeGen/pointers.cpp

diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h 
b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index 5f3bf8b416cde..7d16d70564227 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -185,6 +185,15 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
   }
   bool isInt(mlir::Type i) { return mlir::isa<cir::IntType>(i); }
 
+  //
+  // Constant creation helpers
+  // -------------------------
+  //
+  cir::ConstantOp getSInt32(int32_t c, mlir::Location loc) {
+    auto sInt32Ty = getSInt32Ty();
+    return create<cir::ConstantOp>(loc, cir::IntAttr::get(sInt32Ty, c));
+  }
+
   // Creates constant nullptr for pointer type ty.
   cir::ConstantOp getNullPtr(mlir::Type ty, mlir::Location loc) {
     assert(!cir::MissingFeatures::targetCodeGenInfoGetNullPointer());
diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index aef5b125a2877..9fc9bbc3e0c93 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -388,9 +388,26 @@ class ScalarExprEmitter : public 
StmtVisitor<ScalarExprEmitter, mlir::Value> {
         // NOTE(CIR): clang calls CreateAdd but folds this to a unary op
         value = emitUnaryOp(e, kind, input, /*nsw=*/false);
       }
-    } else if (isa<PointerType>(type)) {
-      cgf.cgm.errorNYI(e->getSourceRange(), "Unary inc/dec pointer");
-      return {};
+    } else if (const PointerType *ptr = type->getAs<PointerType>()) {
+      QualType type = ptr->getPointeeType();
+      if (cgf.getContext().getAsVariableArrayType(type)) {
+        // VLA types don't have constant size.
+        cgf.cgm.errorNYI(e->getSourceRange(), "Pointer arithmetic on VLA");
+        return {};
+      } else if (type->isFunctionType()) {
+        // Arithmetic on function pointers (!) is just +-1.
+        cgf.cgm.errorNYI(e->getSourceRange(),
+                         "Pointer arithmetic on function pointer");
+        return {};
+      } else {
+        // For everything else, we can just do a simple increment.
+        mlir::Location loc = cgf.getLoc(e->getSourceRange());
+        CIRGenBuilderTy &builder = cgf.getBuilder();
+        int amount = (isInc ? 1 : -1);
+        mlir::Value amt = builder.getSInt32(amount, loc);
+        assert(!cir::MissingFeatures::sanitizers());
+        value = builder.createPtrStride(loc, value, amt);
+      }
     } else if (type->isVectorType()) {
       cgf.cgm.errorNYI(e->getSourceRange(), "Unary inc/dec vector");
       return {};
@@ -1152,8 +1169,72 @@ getUnwidenedIntegerType(const ASTContext &astContext, 
const Expr *e) {
 static mlir::Value emitPointerArithmetic(CIRGenFunction &cgf,
                                          const BinOpInfo &op,
                                          bool isSubtraction) {
-  cgf.cgm.errorNYI(op.loc, "pointer arithmetic");
-  return {};
+  // Must have binary (not unary) expr here.  Unary pointer
+  // increment/decrement doesn't use this path.
+  const BinaryOperator *expr = cast<BinaryOperator>(op.e);
+
+  mlir::Value pointer = op.lhs;
+  Expr *pointerOperand = expr->getLHS();
+  mlir::Value index = op.rhs;
+  Expr *indexOperand = expr->getRHS();
+
+  // In a subtraction, the LHS is always the pointer.
+  if (!isSubtraction && !mlir::isa<cir::PointerType>(pointer.getType())) {
+    std::swap(pointer, index);
+    std::swap(pointerOperand, indexOperand);
+  }
+
+  // Some versions of glibc and gcc use idioms (particularly in their malloc
+  // routines) that add a pointer-sized integer (known to be a pointer value)
+  // to a null pointer in order to cast the value back to an integer or as
+  // part of a pointer alignment algorithm.  This is undefined behavior, but
+  // we'd like to be able to compile programs that use it.
+  //
+  // Normally, we'd generate a GEP with a null-pointer base here in response
+  // to that code, but it's also UB to dereference a pointer created that
+  // way.  Instead (as an acknowledged hack to tolerate the idiom) we will
+  // generate a direct cast of the integer value to a pointer.
+  //
+  // The idiom (p = nullptr + N) is not met if any of the following are true:
+  //
+  //   The operation is subtraction.
+  //   The index is not pointer-sized.
+  //   The pointer type is not byte-sized.
+  //
+  if (BinaryOperator::isNullPointerArithmeticExtension(
+          cgf.getContext(), op.opcode, expr->getLHS(), expr->getRHS()))
+    return cgf.getBuilder().createIntToPtr(index, pointer.getType());
+
+  // Differently from LLVM codegen, ABI bits for index sizes is handled during
+  // LLVM lowering.
+
+  // If this is subtraction, negate the index.
+  if (isSubtraction)
+    index = cgf.getBuilder().createNeg(index);
+
+  assert(!cir::MissingFeatures::sanitizers());
+
+  const PointerType *pointerType =
+      pointerOperand->getType()->getAs<PointerType>();
+  if (!pointerType) {
+    cgf.cgm.errorNYI("Objective-C:pointer arithmetic with non-pointer type");
+    return nullptr;
+  }
+
+  QualType elementType = pointerType->getPointeeType();
+  if (cgf.getContext().getAsVariableArrayType(elementType)) {
+    cgf.cgm.errorNYI("variable array type");
+    return nullptr;
+  }
+
+  if (elementType->isVoidType() || elementType->isFunctionType()) {
+    cgf.cgm.errorNYI("void* or function pointer arithmetic");
+    return nullptr;
+  }
+
+  assert(!cir::MissingFeatures::sanitizers());
+  return cgf.getBuilder().create<cir::PtrStrideOp>(
+      cgf.getLoc(op.e->getExprLoc()), pointer.getType(), pointer, index);
 }
 
 mlir::Value ScalarExprEmitter::emitMul(const BinOpInfo &ops) {
diff --git a/clang/test/CIR/CodeGen/pointers.cpp 
b/clang/test/CIR/CodeGen/pointers.cpp
new file mode 100644
index 0000000000000..c791356e15bb6
--- /dev/null
+++ b/clang/test/CIR/CodeGen/pointers.cpp
@@ -0,0 +1,73 @@
+// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o 
%t.cir
+// RUN: FileCheck --input-file=%t.cir %s
+
+// Should generate basic pointer arithmetics.
+void foo(int *iptr, char *cptr, unsigned ustride) {
+  iptr + 2;
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<2> : !s32i
+  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), 
!cir.ptr<!s32i>
+  cptr + 3;
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<3> : !s32i
+  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s8i>, %[[#STRIDE]] : !s32i), 
!cir.ptr<!s8i>
+  iptr - 2;
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<2> : !s32i
+  // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#STRIDE]]) : !s32i, !s32i
+  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#NEGSTRIDE]] : 
!s32i), !cir.ptr<!s32i>
+  cptr - 3;
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<3> : !s32i
+  // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#STRIDE]]) : !s32i, !s32i
+  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s8i>, %[[#NEGSTRIDE]] : !s32i), 
!cir.ptr<!s8i>
+  iptr + ustride;
+  // CHECK: %[[#STRIDE:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !u32i), 
!cir.ptr<!s32i>
+
+  // Must convert unsigned stride to a signed one.
+  iptr - ustride;
+  // CHECK: %[[#STRIDE:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i
+  // CHECK: %[[#SIGNSTRIDE:]] = cir.cast(integral, %[[#STRIDE]] : !u32i), !s32i
+  // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#SIGNSTRIDE]]) : !s32i, 
!s32i
+  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#NEGSTRIDE]] : 
!s32i), !cir.ptr<!s32i>
+
+  iptr++;
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<1> : !s32i
+  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), 
!cir.ptr<!s32i>
+
+  iptr--;
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<-1> : !s32i
+  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), 
!cir.ptr<!s32i>
+}
+
+void testPointerSubscriptAccess(int *ptr) {
+// CHECK: testPointerSubscriptAccess
+  ptr[1];
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<1> : !s32i
+  // CHECK: %[[#PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, 
!cir.ptr<!s32i>
+  // CHECK: cir.ptr_stride(%[[#PTR]] : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), 
!cir.ptr<!s32i>
+}
+
+void testPointerMultiDimSubscriptAccess(int **ptr) {
+// CHECK: testPointerMultiDimSubscriptAccess
+  ptr[1][2];
+  // CHECK: %[[#STRIDE2:]] = cir.const #cir.int<2> : !s32i
+  // CHECK: %[[#STRIDE1:]] = cir.const #cir.int<1> : !s32i
+  // CHECK: %[[#PTR1:]] = cir.load %{{.+}} : 
!cir.ptr<!cir.ptr<!cir.ptr<!s32i>>>, !cir.ptr<!cir.ptr<!s32i>>
+  // CHECK: %[[#PTR2:]] = cir.ptr_stride(%[[#PTR1]] : 
!cir.ptr<!cir.ptr<!s32i>>, %[[#STRIDE1]] : !s32i), !cir.ptr<!cir.ptr<!s32i>>
+  // CHECK: %[[#PTR3:]] = cir.load %[[#PTR2]] : !cir.ptr<!cir.ptr<!s32i>>, 
!cir.ptr<!s32i>
+  // CHECK: cir.ptr_stride(%[[#PTR3]] : !cir.ptr<!s32i>, %[[#STRIDE2]] : 
!s32i), !cir.ptr<!s32i>
+}
+
+// This test is meant to verify code that handles the 'p = nullptr + n' idiom
+// used by some versions of glibc and gcc.  This is undefined behavior but
+// it is intended there to act like a conversion from a pointer-sized integer
+// to a pointer, and we would like to tolerate that.
+
+#define NULLPTRINT ((int*)0)
+
+// This should get the inttoptr instruction.
+int *testGnuNullPtrArithmetic(unsigned n) {
+// CHECK: testGnuNullPtrArithmetic
+  return NULLPTRINT + n;
+  // CHECK: %[[NULLPTR:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i>
+  // CHECK: %[[N:.*]] = cir.load %{{.*}} : !cir.ptr<!u32i>, !u32i
+  // CHECK: %[[RESULT:.*]] = cir.ptr_stride(%[[NULLPTR]] : !cir.ptr<!s32i>, 
%[[N]] : !u32i), !cir.ptr<!s32i>
+}

>From b7a6299c4e04d7503c9b4b609fef7903d7764cac Mon Sep 17 00:00:00 2001
From: Andy Kaylor <akay...@nvidia.com>
Date: Wed, 30 Apr 2025 16:31:44 -0700
Subject: [PATCH 2/3] Address review feedback

---
 clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 9 ++++++++-
 clang/test/CIR/CodeGen/pointers.cpp        | 4 ++++
 2 files changed, 12 insertions(+), 1 deletion(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index 9fc9bbc3e0c93..cfd2d0d89a207 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -1178,11 +1178,18 @@ static mlir::Value emitPointerArithmetic(CIRGenFunction 
&cgf,
   mlir::Value index = op.rhs;
   Expr *indexOperand = expr->getRHS();
 
-  // In a subtraction, the LHS is always the pointer.
+  // In the case of subtraction, the FE has ensured that the LHS is always the
+  // pointer. However, addition can have the pointer on either side. We will
+  // always have a pointer operand and an integer operand, so if the LHS wasn't
+  // a pointer, we need to swap our values.
   if (!isSubtraction && !mlir::isa<cir::PointerType>(pointer.getType())) {
     std::swap(pointer, index);
     std::swap(pointerOperand, indexOperand);
   }
+  assert(mlir::isa<cir::PointerType>(pointer.getType()) &&
+         "Need a pointer operand");
+  assert(mlir::isa<cir::IntType>(index.getType()) &&
+         "Need an integer operand");
 
   // Some versions of glibc and gcc use idioms (particularly in their malloc
   // routines) that add a pointer-sized integer (known to be a pointer value)
diff --git a/clang/test/CIR/CodeGen/pointers.cpp 
b/clang/test/CIR/CodeGen/pointers.cpp
index c791356e15bb6..06beecf076e20 100644
--- a/clang/test/CIR/CodeGen/pointers.cpp
+++ b/clang/test/CIR/CodeGen/pointers.cpp
@@ -28,6 +28,10 @@ void foo(int *iptr, char *cptr, unsigned ustride) {
   // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#SIGNSTRIDE]]) : !s32i, 
!s32i
   // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#NEGSTRIDE]] : 
!s32i), !cir.ptr<!s32i>
 
+  4 + iptr;
+  // CHECK: %[[#STRIDE:]] = cir.const #cir.int<4> : !s32i
+  // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), 
!cir.ptr<!s32i>
+
   iptr++;
   // CHECK: %[[#STRIDE:]] = cir.const #cir.int<1> : !s32i
   // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), 
!cir.ptr<!s32i>

>From 5636bb03e89260656905e908688034a6fda8228f Mon Sep 17 00:00:00 2001
From: Andy Kaylor <akay...@nvidia.com>
Date: Wed, 30 Apr 2025 16:53:39 -0700
Subject: [PATCH 3/3] Fix formatting

---
 clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
index cfd2d0d89a207..78eb3cbd430bc 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp
@@ -1188,8 +1188,7 @@ static mlir::Value emitPointerArithmetic(CIRGenFunction 
&cgf,
   }
   assert(mlir::isa<cir::PointerType>(pointer.getType()) &&
          "Need a pointer operand");
-  assert(mlir::isa<cir::IntType>(index.getType()) &&
-         "Need an integer operand");
+  assert(mlir::isa<cir::IntType>(index.getType()) && "Need an integer 
operand");
 
   // Some versions of glibc and gcc use idioms (particularly in their malloc
   // routines) that add a pointer-sized integer (known to be a pointer value)

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[clang] [CIR] Upstream pointer arithmetic support (PR #138041)

Reply via email to