https://github.com/andykaylor updated https://github.com/llvm/llvm-project/pull/138041
>From ae1fd50a03a0e5f7c0c40463dc6a07f981f79f66 Mon Sep 17 00:00:00 2001 From: Andy Kaylor <akay...@nvidia.com> Date: Wed, 30 Apr 2025 11:10:53 -0700 Subject: [PATCH 1/2] [CIR] Upstream pointer arithmetic support This adds support for explicit pointer arithmetic. --- clang/lib/CIR/CodeGen/CIRGenBuilder.h | 9 +++ clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 91 ++++++++++++++++++++-- clang/test/CIR/CodeGen/pointers.cpp | 73 +++++++++++++++++ 3 files changed, 168 insertions(+), 5 deletions(-) create mode 100644 clang/test/CIR/CodeGen/pointers.cpp diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index 5f3bf8b416cde..7d16d70564227 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -185,6 +185,15 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { } bool isInt(mlir::Type i) { return mlir::isa<cir::IntType>(i); } + // + // Constant creation helpers + // ------------------------- + // + cir::ConstantOp getSInt32(int32_t c, mlir::Location loc) { + auto sInt32Ty = getSInt32Ty(); + return create<cir::ConstantOp>(loc, cir::IntAttr::get(sInt32Ty, c)); + } + // Creates constant nullptr for pointer type ty. cir::ConstantOp getNullPtr(mlir::Type ty, mlir::Location loc) { assert(!cir::MissingFeatures::targetCodeGenInfoGetNullPointer()); diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index aef5b125a2877..9fc9bbc3e0c93 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -388,9 +388,26 @@ class ScalarExprEmitter : public StmtVisitor<ScalarExprEmitter, mlir::Value> { // NOTE(CIR): clang calls CreateAdd but folds this to a unary op value = emitUnaryOp(e, kind, input, /*nsw=*/false); } - } else if (isa<PointerType>(type)) { - cgf.cgm.errorNYI(e->getSourceRange(), "Unary inc/dec pointer"); - return {}; + } else if (const PointerType *ptr = type->getAs<PointerType>()) { + QualType type = ptr->getPointeeType(); + if (cgf.getContext().getAsVariableArrayType(type)) { + // VLA types don't have constant size. + cgf.cgm.errorNYI(e->getSourceRange(), "Pointer arithmetic on VLA"); + return {}; + } else if (type->isFunctionType()) { + // Arithmetic on function pointers (!) is just +-1. + cgf.cgm.errorNYI(e->getSourceRange(), + "Pointer arithmetic on function pointer"); + return {}; + } else { + // For everything else, we can just do a simple increment. + mlir::Location loc = cgf.getLoc(e->getSourceRange()); + CIRGenBuilderTy &builder = cgf.getBuilder(); + int amount = (isInc ? 1 : -1); + mlir::Value amt = builder.getSInt32(amount, loc); + assert(!cir::MissingFeatures::sanitizers()); + value = builder.createPtrStride(loc, value, amt); + } } else if (type->isVectorType()) { cgf.cgm.errorNYI(e->getSourceRange(), "Unary inc/dec vector"); return {}; @@ -1152,8 +1169,72 @@ getUnwidenedIntegerType(const ASTContext &astContext, const Expr *e) { static mlir::Value emitPointerArithmetic(CIRGenFunction &cgf, const BinOpInfo &op, bool isSubtraction) { - cgf.cgm.errorNYI(op.loc, "pointer arithmetic"); - return {}; + // Must have binary (not unary) expr here. Unary pointer + // increment/decrement doesn't use this path. + const BinaryOperator *expr = cast<BinaryOperator>(op.e); + + mlir::Value pointer = op.lhs; + Expr *pointerOperand = expr->getLHS(); + mlir::Value index = op.rhs; + Expr *indexOperand = expr->getRHS(); + + // In a subtraction, the LHS is always the pointer. + if (!isSubtraction && !mlir::isa<cir::PointerType>(pointer.getType())) { + std::swap(pointer, index); + std::swap(pointerOperand, indexOperand); + } + + // Some versions of glibc and gcc use idioms (particularly in their malloc + // routines) that add a pointer-sized integer (known to be a pointer value) + // to a null pointer in order to cast the value back to an integer or as + // part of a pointer alignment algorithm. This is undefined behavior, but + // we'd like to be able to compile programs that use it. + // + // Normally, we'd generate a GEP with a null-pointer base here in response + // to that code, but it's also UB to dereference a pointer created that + // way. Instead (as an acknowledged hack to tolerate the idiom) we will + // generate a direct cast of the integer value to a pointer. + // + // The idiom (p = nullptr + N) is not met if any of the following are true: + // + // The operation is subtraction. + // The index is not pointer-sized. + // The pointer type is not byte-sized. + // + if (BinaryOperator::isNullPointerArithmeticExtension( + cgf.getContext(), op.opcode, expr->getLHS(), expr->getRHS())) + return cgf.getBuilder().createIntToPtr(index, pointer.getType()); + + // Differently from LLVM codegen, ABI bits for index sizes is handled during + // LLVM lowering. + + // If this is subtraction, negate the index. + if (isSubtraction) + index = cgf.getBuilder().createNeg(index); + + assert(!cir::MissingFeatures::sanitizers()); + + const PointerType *pointerType = + pointerOperand->getType()->getAs<PointerType>(); + if (!pointerType) { + cgf.cgm.errorNYI("Objective-C:pointer arithmetic with non-pointer type"); + return nullptr; + } + + QualType elementType = pointerType->getPointeeType(); + if (cgf.getContext().getAsVariableArrayType(elementType)) { + cgf.cgm.errorNYI("variable array type"); + return nullptr; + } + + if (elementType->isVoidType() || elementType->isFunctionType()) { + cgf.cgm.errorNYI("void* or function pointer arithmetic"); + return nullptr; + } + + assert(!cir::MissingFeatures::sanitizers()); + return cgf.getBuilder().create<cir::PtrStrideOp>( + cgf.getLoc(op.e->getExprLoc()), pointer.getType(), pointer, index); } mlir::Value ScalarExprEmitter::emitMul(const BinOpInfo &ops) { diff --git a/clang/test/CIR/CodeGen/pointers.cpp b/clang/test/CIR/CodeGen/pointers.cpp new file mode 100644 index 0000000000000..c791356e15bb6 --- /dev/null +++ b/clang/test/CIR/CodeGen/pointers.cpp @@ -0,0 +1,73 @@ +// RUN: %clang_cc1 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --input-file=%t.cir %s + +// Should generate basic pointer arithmetics. +void foo(int *iptr, char *cptr, unsigned ustride) { + iptr + 2; + // CHECK: %[[#STRIDE:]] = cir.const #cir.int<2> : !s32i + // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i> + cptr + 3; + // CHECK: %[[#STRIDE:]] = cir.const #cir.int<3> : !s32i + // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s8i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s8i> + iptr - 2; + // CHECK: %[[#STRIDE:]] = cir.const #cir.int<2> : !s32i + // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#STRIDE]]) : !s32i, !s32i + // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s32i> + cptr - 3; + // CHECK: %[[#STRIDE:]] = cir.const #cir.int<3> : !s32i + // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#STRIDE]]) : !s32i, !s32i + // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s8i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s8i> + iptr + ustride; + // CHECK: %[[#STRIDE:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i + // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !u32i), !cir.ptr<!s32i> + + // Must convert unsigned stride to a signed one. + iptr - ustride; + // CHECK: %[[#STRIDE:]] = cir.load %{{.+}} : !cir.ptr<!u32i>, !u32i + // CHECK: %[[#SIGNSTRIDE:]] = cir.cast(integral, %[[#STRIDE]] : !u32i), !s32i + // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#SIGNSTRIDE]]) : !s32i, !s32i + // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s32i> + + iptr++; + // CHECK: %[[#STRIDE:]] = cir.const #cir.int<1> : !s32i + // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i> + + iptr--; + // CHECK: %[[#STRIDE:]] = cir.const #cir.int<-1> : !s32i + // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i> +} + +void testPointerSubscriptAccess(int *ptr) { +// CHECK: testPointerSubscriptAccess + ptr[1]; + // CHECK: %[[#STRIDE:]] = cir.const #cir.int<1> : !s32i + // CHECK: %[[#PTR:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i> + // CHECK: cir.ptr_stride(%[[#PTR]] : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i> +} + +void testPointerMultiDimSubscriptAccess(int **ptr) { +// CHECK: testPointerMultiDimSubscriptAccess + ptr[1][2]; + // CHECK: %[[#STRIDE2:]] = cir.const #cir.int<2> : !s32i + // CHECK: %[[#STRIDE1:]] = cir.const #cir.int<1> : !s32i + // CHECK: %[[#PTR1:]] = cir.load %{{.+}} : !cir.ptr<!cir.ptr<!cir.ptr<!s32i>>>, !cir.ptr<!cir.ptr<!s32i>> + // CHECK: %[[#PTR2:]] = cir.ptr_stride(%[[#PTR1]] : !cir.ptr<!cir.ptr<!s32i>>, %[[#STRIDE1]] : !s32i), !cir.ptr<!cir.ptr<!s32i>> + // CHECK: %[[#PTR3:]] = cir.load %[[#PTR2]] : !cir.ptr<!cir.ptr<!s32i>>, !cir.ptr<!s32i> + // CHECK: cir.ptr_stride(%[[#PTR3]] : !cir.ptr<!s32i>, %[[#STRIDE2]] : !s32i), !cir.ptr<!s32i> +} + +// This test is meant to verify code that handles the 'p = nullptr + n' idiom +// used by some versions of glibc and gcc. This is undefined behavior but +// it is intended there to act like a conversion from a pointer-sized integer +// to a pointer, and we would like to tolerate that. + +#define NULLPTRINT ((int*)0) + +// This should get the inttoptr instruction. +int *testGnuNullPtrArithmetic(unsigned n) { +// CHECK: testGnuNullPtrArithmetic + return NULLPTRINT + n; + // CHECK: %[[NULLPTR:.*]] = cir.const #cir.ptr<null> : !cir.ptr<!s32i> + // CHECK: %[[N:.*]] = cir.load %{{.*}} : !cir.ptr<!u32i>, !u32i + // CHECK: %[[RESULT:.*]] = cir.ptr_stride(%[[NULLPTR]] : !cir.ptr<!s32i>, %[[N]] : !u32i), !cir.ptr<!s32i> +} >From b7a6299c4e04d7503c9b4b609fef7903d7764cac Mon Sep 17 00:00:00 2001 From: Andy Kaylor <akay...@nvidia.com> Date: Wed, 30 Apr 2025 16:31:44 -0700 Subject: [PATCH 2/2] Address review feedback --- clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp | 9 ++++++++- clang/test/CIR/CodeGen/pointers.cpp | 4 ++++ 2 files changed, 12 insertions(+), 1 deletion(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp index 9fc9bbc3e0c93..cfd2d0d89a207 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprScalar.cpp @@ -1178,11 +1178,18 @@ static mlir::Value emitPointerArithmetic(CIRGenFunction &cgf, mlir::Value index = op.rhs; Expr *indexOperand = expr->getRHS(); - // In a subtraction, the LHS is always the pointer. + // In the case of subtraction, the FE has ensured that the LHS is always the + // pointer. However, addition can have the pointer on either side. We will + // always have a pointer operand and an integer operand, so if the LHS wasn't + // a pointer, we need to swap our values. if (!isSubtraction && !mlir::isa<cir::PointerType>(pointer.getType())) { std::swap(pointer, index); std::swap(pointerOperand, indexOperand); } + assert(mlir::isa<cir::PointerType>(pointer.getType()) && + "Need a pointer operand"); + assert(mlir::isa<cir::IntType>(index.getType()) && + "Need an integer operand"); // Some versions of glibc and gcc use idioms (particularly in their malloc // routines) that add a pointer-sized integer (known to be a pointer value) diff --git a/clang/test/CIR/CodeGen/pointers.cpp b/clang/test/CIR/CodeGen/pointers.cpp index c791356e15bb6..06beecf076e20 100644 --- a/clang/test/CIR/CodeGen/pointers.cpp +++ b/clang/test/CIR/CodeGen/pointers.cpp @@ -28,6 +28,10 @@ void foo(int *iptr, char *cptr, unsigned ustride) { // CHECK: %[[#NEGSTRIDE:]] = cir.unary(minus, %[[#SIGNSTRIDE]]) : !s32i, !s32i // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#NEGSTRIDE]] : !s32i), !cir.ptr<!s32i> + 4 + iptr; + // CHECK: %[[#STRIDE:]] = cir.const #cir.int<4> : !s32i + // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i> + iptr++; // CHECK: %[[#STRIDE:]] = cir.const #cir.int<1> : !s32i // CHECK: cir.ptr_stride(%{{.+}} : !cir.ptr<!s32i>, %[[#STRIDE]] : !s32i), !cir.ptr<!s32i> _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits