https://github.com/HendrikHuebner updated https://github.com/llvm/llvm-project/pull/168578
From 185d4f496ffb5c9299089606213c52cb5b7a60bb Mon Sep 17 00:00:00 2001 From: hhuebner <[email protected]> Date: Tue, 18 Nov 2025 18:50:47 +0100 Subject: [PATCH 1/3] [CIR] builtin operator new/delete --- clang/include/clang/CIR/MissingFeatures.h | 1 + clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 9 ++++++++ clang/lib/CIR/CodeGen/CIRGenExpr.cpp | 1 + clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp | 26 +++++++++++++++++++++++ clang/lib/CIR/CodeGen/CIRGenFunction.h | 3 +++ 5 files changed, 40 insertions(+) diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 567c79a27c07b..477d8046e18c0 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -200,6 +200,7 @@ struct MissingFeatures { static bool aggValueSlotMayOverlap() { return false; } static bool aggValueSlotVolatile() { return false; } static bool alignCXXRecordDecl() { return false; } + static bool allocToken() { return false; } static bool appleKext() { return false; } static bool armComputeVolatileBitfields() { return false; } static bool asmGoto() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index 77f19343653db..c038f0be81137 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -18,6 +18,7 @@ #include "mlir/IR/BuiltinAttributes.h" #include "mlir/IR/Value.h" #include "mlir/Support/LLVM.h" +#include "clang/AST/DeclBase.h" #include "clang/AST/Expr.h" #include "clang/AST/GlobalDecl.h" #include "clang/Basic/Builtins.h" @@ -520,6 +521,13 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, cir::PrefetchOp::create(builder, loc, address, locality, isWrite); return RValue::get(nullptr); } + case Builtin::BI__builtin_operator_new: + return emitNewOrDeleteBuiltinCall( + e->getCallee()->getType()->castAs<FunctionProtoType>(), e, false); + case Builtin::BI__builtin_operator_delete: + emitNewOrDeleteBuiltinCall( + e->getCallee()->getType()->castAs<FunctionProtoType>(), e, true); + return RValue::get(nullptr); } // If this is an alias for a lib function (e.g. __builtin_sin), emit @@ -559,6 +567,7 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, std::string("unimplemented builtin call: ") + getContext().BuiltinInfo.getName(builtinID)); return getUndefRValue(e->getType()); + } static mlir::Value emitTargetArchBuiltinExpr(CIRGenFunction *cgf, diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index 91a59d60fcb3e..57b49f4640c4c 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -2006,6 +2006,7 @@ RValue CIRGenFunction::emitCallExpr(const clang::CallExpr *e, return emitCall(e->getCallee()->getType(), callee, e, returnValue); } + /// Emit code to compute the specified expression, ignoring the result. void CIRGenFunction::emitIgnoredExpr(const Expr *e) { if (e->isPRValue()) { diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp index 007d873ff5db6..345bb0e6bed9f 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp @@ -610,6 +610,32 @@ static RValue emitNewDeleteCall(CIRGenFunction &cgf, return rv; } +RValue CIRGenFunction::emitNewOrDeleteBuiltinCall(const FunctionProtoType *type, + const CallExpr *callExpr, + bool isDelete) { + CallArgList args; + emitCallArgs(args, type, callExpr->arguments()); + // Find the allocation or deallocation function that we're calling. + ASTContext &astContext = getContext(); + DeclarationName name = astContext.DeclarationNames.getCXXOperatorName( + isDelete ? OO_Delete : OO_New); + + clang::DeclContextLookupResult lookupResult = astContext.getTranslationUnitDecl()->lookup(name); + for (const auto *decl : lookupResult) { + if (const auto *funcDecl = dyn_cast<FunctionDecl>(decl)) { + if (astContext.hasSameType(funcDecl->getType(), QualType(type, 0))) { + // Used for -fsanitize=alloc-token + assert(!cir::MissingFeatures::allocToken()); + + // Emit the call to operator new/delete. + return emitNewDeleteCall(*this, funcDecl, type, args); + } + } + } + + llvm_unreachable("predeclared global operator new/delete is missing"); +} + namespace { /// Calls the given 'operator delete' on a single object. struct CallObjectDelete final : EHScopeStack::Cleanup { diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 00f289bcd1bb2..060a31edea2cf 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1476,6 +1476,9 @@ class CIRGenFunction : public CIRGenTypeCache { RValue emitCXXPseudoDestructorExpr(const CXXPseudoDestructorExpr *expr); + RValue emitNewOrDeleteBuiltinCall(const FunctionProtoType* type, + const CallExpr* call, bool isDelete); + void emitCXXTemporary(const CXXTemporary *temporary, QualType tempType, Address ptr); From 948d5e7945bdd19ee4f7f920296aab117c2ccf21 Mon Sep 17 00:00:00 2001 From: hhuebner <[email protected]> Date: Tue, 18 Nov 2025 18:54:20 +0100 Subject: [PATCH 2/3] formatting --- clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp | 1 - clang/lib/CIR/CodeGen/CIRGenExpr.cpp | 1 - clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp | 7 ++++--- clang/lib/CIR/CodeGen/CIRGenFunction.h | 4 ++-- 4 files changed, 6 insertions(+), 7 deletions(-) diff --git a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp index c038f0be81137..2fbad2ecce0d3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenBuiltin.cpp @@ -567,7 +567,6 @@ RValue CIRGenFunction::emitBuiltinExpr(const GlobalDecl &gd, unsigned builtinID, std::string("unimplemented builtin call: ") + getContext().BuiltinInfo.getName(builtinID)); return getUndefRValue(e->getType()); - } static mlir::Value emitTargetArchBuiltinExpr(CIRGenFunction *cgf, diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index 57b49f4640c4c..91a59d60fcb3e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -2006,7 +2006,6 @@ RValue CIRGenFunction::emitCallExpr(const clang::CallExpr *e, return emitCall(e->getCallee()->getType(), callee, e, returnValue); } - /// Emit code to compute the specified expression, ignoring the result. void CIRGenFunction::emitIgnoredExpr(const Expr *e) { if (e->isPRValue()) { diff --git a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp index 345bb0e6bed9f..f28887df34212 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExprCXX.cpp @@ -611,8 +611,8 @@ static RValue emitNewDeleteCall(CIRGenFunction &cgf, } RValue CIRGenFunction::emitNewOrDeleteBuiltinCall(const FunctionProtoType *type, - const CallExpr *callExpr, - bool isDelete) { + const CallExpr *callExpr, + bool isDelete) { CallArgList args; emitCallArgs(args, type, callExpr->arguments()); // Find the allocation or deallocation function that we're calling. @@ -620,7 +620,8 @@ RValue CIRGenFunction::emitNewOrDeleteBuiltinCall(const FunctionProtoType *type, DeclarationName name = astContext.DeclarationNames.getCXXOperatorName( isDelete ? OO_Delete : OO_New); - clang::DeclContextLookupResult lookupResult = astContext.getTranslationUnitDecl()->lookup(name); + clang::DeclContextLookupResult lookupResult = + astContext.getTranslationUnitDecl()->lookup(name); for (const auto *decl : lookupResult) { if (const auto *funcDecl = dyn_cast<FunctionDecl>(decl)) { if (astContext.hasSameType(funcDecl->getType(), QualType(type, 0))) { diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index 060a31edea2cf..0cfcc2be0255e 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -1476,8 +1476,8 @@ class CIRGenFunction : public CIRGenTypeCache { RValue emitCXXPseudoDestructorExpr(const CXXPseudoDestructorExpr *expr); - RValue emitNewOrDeleteBuiltinCall(const FunctionProtoType* type, - const CallExpr* call, bool isDelete); + RValue emitNewOrDeleteBuiltinCall(const FunctionProtoType *type, + const CallExpr *call, bool isDelete); void emitCXXTemporary(const CXXTemporary *temporary, QualType tempType, Address ptr); From 1c5877f4e984d880f9f66fa72e13244a53673cbf Mon Sep 17 00:00:00 2001 From: hhuebner <[email protected]> Date: Tue, 18 Nov 2025 18:54:54 +0100 Subject: [PATCH 3/3] Add test --- clang/lib/CIR/CodeGen/CIRGenTBAA.cpp | 485 ++++++++++++++++++ clang/lib/CIR/CodeGen/CIRGenTBAA.h | 194 +++++++ clang/test/CIR/CodeGen/builtin_new_delete.cpp | 44 ++ 3 files changed, 723 insertions(+) create mode 100644 clang/lib/CIR/CodeGen/CIRGenTBAA.cpp create mode 100644 clang/lib/CIR/CodeGen/CIRGenTBAA.h create mode 100644 clang/test/CIR/CodeGen/builtin_new_delete.cpp diff --git a/clang/lib/CIR/CodeGen/CIRGenTBAA.cpp b/clang/lib/CIR/CodeGen/CIRGenTBAA.cpp new file mode 100644 index 0000000000000..b0750a9c77c42 --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenTBAA.cpp @@ -0,0 +1,485 @@ +#include "CIRGenTBAA.h" +#include "CIRGenCXXABI.h" +#include "CIRGenTypes.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/MLIRContext.h" +#include "mlir/Interfaces/DataLayoutInterfaces.h" +#include "mlir/Support/LLVM.h" +#include "clang/AST/ASTContext.h" +#include "clang/AST/RecordLayout.h" +#include "clang/AST/Type.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" +#include "clang/CIR/Dialect/IR/CIRTypes.h" +#include "clang/CIR/MissingFeatures.h" +#include "llvm/Support/ErrorHandling.h" +#include <cassert> +namespace clang::CIRGen { + +cir::TBAAAttr tbaa_NYI(mlir::MLIRContext *mlirContext) { + return cir::TBAAAttr::get(mlirContext); +} + +CIRGenTBAA::CIRGenTBAA(mlir::MLIRContext *mlirContext, + clang::ASTContext &astContext, CIRGenTypes &types, + mlir::ModuleOp moduleOp, + const clang::CodeGenOptions &codeGenOpts, + const clang::LangOptions &features) + : mlirContext(mlirContext), astContext(astContext), types(types), + moduleOp(moduleOp), codeGenOpts(codeGenOpts), features(features) {} + +cir::TBAAAttr CIRGenTBAA::getChar() { + return cir::TBAAOmnipotentCharAttr::get(mlirContext); +} + +static bool typeHasMayAlias(clang::QualType qty) { + // Tagged types have declarations, and therefore may have attributes. + if (auto *td = qty->getAsTagDecl()) + if (td->hasAttr<MayAliasAttr>()) + return true; + + // Also look for may_alias as a declaration attribute on a typedef. + // FIXME: We should follow GCC and model may_alias as a type attribute + // rather than as a declaration attribute. + while (auto *tt = qty->getAs<TypedefType>()) { + if (tt->getDecl()->hasAttr<MayAliasAttr>()) + return true; + qty = tt->desugar(); + } + return false; +} + +/// Check if the given type is a valid base type to be used in access tags. +static bool isValidBaseType(clang::QualType qty) { + if (const clang::RecordType *tty = qty->getAs<clang::RecordType>()) { + const clang::RecordDecl *rd = tty->getDecl()->getDefinition(); + // Incomplete types are not valid base access types. + if (!rd) + return false; + if (rd->hasFlexibleArrayMember()) + return false; + // rd can be struct, union, class, interface or enum. + // For now, we only handle struct and class. + if (rd->isStruct() || rd->isClass()) + return true; + } + return false; +} + +cir::TBAAScalarAttr CIRGenTBAA::getScalarTypeInfo(clang::QualType qty) { + const clang::Type *ty = astContext.getCanonicalType(qty).getTypePtr(); + assert(mlir::isa<clang::BuiltinType>(ty)); + const clang::BuiltinType *bty = mlir::dyn_cast<BuiltinType>(ty); + return cir::TBAAScalarAttr::get(mlirContext, bty->getName(features), + types.convertType(qty)); +} + +cir::TBAAAttr CIRGenTBAA::getTypeInfoHelper(clang::QualType qty) { + const clang::Type *ty = astContext.getCanonicalType(qty).getTypePtr(); + // Handle builtin types. + if (const clang::BuiltinType *bty = mlir::dyn_cast<BuiltinType>(ty)) { + switch (bty->getKind()) { + // Character types are special and can alias anything. + // In C++, this technically only includes "char" and "unsigned char", + // and not "signed char". In C, it includes all three. For now, + // the risk of exploiting this detail in C++ seems likely to outweigh + // the benefit. + case BuiltinType::Char_U: + case BuiltinType::Char_S: + case BuiltinType::UChar: + case BuiltinType::SChar: + return getChar(); + + // Unsigned types can alias their corresponding signed types. + case BuiltinType::UShort: + return getScalarTypeInfo(astContext.ShortTy); + case BuiltinType::UInt: + return getScalarTypeInfo(astContext.IntTy); + case BuiltinType::ULong: + return getScalarTypeInfo(astContext.LongTy); + case BuiltinType::ULongLong: + return getScalarTypeInfo(astContext.LongLongTy); + case BuiltinType::UInt128: + return getScalarTypeInfo(astContext.Int128Ty); + + case BuiltinType::UShortFract: + return getScalarTypeInfo(astContext.ShortFractTy); + case BuiltinType::UFract: + return getScalarTypeInfo(astContext.FractTy); + case BuiltinType::ULongFract: + return getScalarTypeInfo(astContext.LongFractTy); + + case BuiltinType::SatUShortFract: + return getScalarTypeInfo(astContext.SatShortFractTy); + case BuiltinType::SatUFract: + return getScalarTypeInfo(astContext.SatFractTy); + case BuiltinType::SatULongFract: + return getScalarTypeInfo(astContext.SatLongFractTy); + + case BuiltinType::UShortAccum: + return getScalarTypeInfo(astContext.ShortAccumTy); + case BuiltinType::UAccum: + return getScalarTypeInfo(astContext.AccumTy); + case BuiltinType::ULongAccum: + return getScalarTypeInfo(astContext.LongAccumTy); + + case BuiltinType::SatUShortAccum: + return getScalarTypeInfo(astContext.SatShortAccumTy); + case BuiltinType::SatUAccum: + return getScalarTypeInfo(astContext.SatAccumTy); + case BuiltinType::SatULongAccum: + return getScalarTypeInfo(astContext.SatLongAccumTy); + + // Treat all other builtin types as distinct types. This includes + // treating wchar_t, char16_t, and char32_t as distinct from their + // "underlying types". + default: + return getScalarTypeInfo(qty); + } + } + // C++1z [basic.lval]p10: "If a program attempts to access the stored value of + // an object through a glvalue of other than one of the following types the + // behavior is undefined: [...] a char, unsigned char, or std::byte type." + if (ty->isStdByteType()) + return getChar(); + + // Handle pointers and references. + // + // C has a very strict rule for pointer aliasing. C23 6.7.6.1p2: + // For two pointer types to be compatible, both shall be identically + // qualified and both shall be pointers to compatible types. + // + // This rule is impractically strict; we want to at least ignore CVR + // qualifiers. Distinguishing by CVR qualifiers would make it UB to + // e.g. cast a `char **` to `const char * const *` and dereference it, + // which is too common and useful to invalidate. C++'s similar types + // rule permits qualifier differences in these nested positions; in fact, + // C++ even allows that cast as an implicit conversion. + // + // Other qualifiers could theoretically be distinguished, especially if + // they involve a significant representation difference. We don't + // currently do so, however. + if (ty->isPointerType() || ty->isReferenceType()) { + auto anyPtr = cir::TBAAScalarAttr::get(mlirContext, "any pointer", + types.convertType(qty)); + if (!codeGenOpts.PointerTBAA) + return anyPtr; + // C++ [basic.lval]p11 permits objects to accessed through an l-value of + // similar type. Two types are similar under C++ [conv.qual]p2 if the + // decomposition of the types into pointers, member pointers, and arrays has + // the same structure when ignoring cv-qualifiers at each level of the + // decomposition. Meanwhile, C makes T(*)[] and T(*)[N] compatible, which + // would really complicate any attempt to distinguish pointers to arrays by + // their bounds. It's simpler, and much easier to explain to users, to + // simply treat all pointers to arrays as pointers to their element type for + // aliasing purposes. So when creating a TBAA tag for a pointer type, we + // recursively ignore both qualifiers and array types when decomposing the + // pointee type. The only meaningful remaining structure is the number of + // pointer types we encountered along the way, so we just produce the tag + // "p<depth> <base type tag>". If we do find a member pointer type, for now + // we just conservatively bail out with AnyPtr (below) rather than trying to + // create a tag that honors the similar-type rules while still + // distinguishing different kinds of member pointer. + unsigned ptrDepth = 0; + do { + ptrDepth++; + ty = ty->getPointeeType()->getBaseElementTypeUnsafe(); + } while (ty->isPointerType()); + assert(!isa<VariableArrayType>(ty)); + // When the underlying type is a builtin type, we compute the pointee type + // string recursively, which is implicitly more forgiving than the standards + // require. Effectively, we are turning the question "are these types + // compatible/similar" into "are accesses to these types allowed to alias". + // In both C and C++, the latter question has special carve-outs for + // signedness mismatches that only apply at the top level. As a result, we + // are allowing e.g. `int *` l-values to access `unsigned *` objects. + SmallString<256> tyName; + + if (isa<BuiltinType>(ty)) { + auto scalarAttr = getScalarTypeInfo(ty->getCanonicalTypeInternal()); + tyName = scalarAttr.getId(); + } else { + // Be conservative if the type isn't a RecordType. We are specifically + // required to do this for member pointers until we implement the + // similar-types rule. + const auto *rt = ty->getAs<RecordType>(); + if (!rt) + return anyPtr; + + // For unnamed structs or unions C's compatible types rule applies. Two + // compatible types in different compilation units can have different + // mangled names, meaning the metadata emitted below would incorrectly + // mark them as no-alias. Use AnyPtr for such types in both C and C++, as + // C and C++ types may be visible when doing LTO. + // + // Note that using AnyPtr is overly conservative. We could summarize the + // members of the type, as per the C compatibility rule in the future. + // This also covers anonymous structs and unions, which have a different + // compatibility rule, but it doesn't matter because you can never have a + // pointer to an anonymous struct or union. + if (!rt->getDecl()->getDeclName()) + return anyPtr; + + // For non-builtin types use the mangled name of the canonical type. + llvm::raw_svector_ostream tyOut(tyName); + types.getCXXABI().getMangleContext().mangleCanonicalTypeName( + QualType(ty, 0), tyOut); + } + + SmallString<256> outName("p"); + outName += std::to_string(ptrDepth); + outName += " "; + outName += tyName; + return cir::TBAAScalarAttr::get(mlirContext, outName, + types.convertType(qty), anyPtr); + } + // Accesses to arrays are accesses to objects of their element types. + if (codeGenOpts.NewStructPathTBAA && ty->isArrayType()) { + assert(!cir::MissingFeatures::tbaaNewStructPath()); + return tbaa_NYI(mlirContext); + } + // Enum types are distinct types. In C++ they have "underlying types", + // however they aren't related for TBAA. + if (const EnumType *ety = dyn_cast<EnumType>(ty)) { + if (!features.CPlusPlus) + return getTypeInfo(ety->getDecl()->getIntegerType()); + + // In C++ mode, types have linkage, so we can rely on the ODR and + // on their mangled names, if they're external. + // TODO: Is there a way to get a program-wide unique name for a + // decl with local linkage or no linkage? + if (!ety->getDecl()->isExternallyVisible()) + return getChar(); + + SmallString<256> outName; + llvm::raw_svector_ostream out(outName); + types.getCXXABI().getMangleContext().mangleCanonicalTypeName( + QualType(ety, 0), out); + return cir::TBAAScalarAttr::get(mlirContext, outName, + types.convertType(qty)); + } + if (const auto *eit = dyn_cast<BitIntType>(ty)) { + SmallString<256> outName; + llvm::raw_svector_ostream out(outName); + // Don't specify signed/unsigned since integer types can alias despite sign + // differences. + out << "_BitInt(" << eit->getNumBits() << ')'; + return cir::TBAAScalarAttr::get(mlirContext, outName, + types.convertType(qty)); + } + // For now, handle any other kind of type conservatively. + return getChar(); +} + +cir::TBAAAttr CIRGenTBAA::getTypeInfo(clang::QualType qty) { + // At -O0 or relaxed aliasing, TBAA is not emitted for regular types. + if (codeGenOpts.OptimizationLevel == 0 || codeGenOpts.RelaxedAliasing) { + return nullptr; + } + + // If the type has the may_alias attribute (even on a typedef), it is + // effectively in the general char alias class. + if (typeHasMayAlias(qty)) { + assert(!cir::MissingFeatures::tbaaMayAlias()); + return getChar(); + } + // We need this function to not fall back to returning the "omnipotent char" + // type node for aggregate and union types. Otherwise, any dereference of an + // aggregate will result into the may-alias access descriptor, meaning all + // subsequent accesses to direct and indirect members of that aggregate will + // be considered may-alias too. + // function. + if (isValidBaseType(qty)) { + assert(!cir::MissingFeatures::tbaaTagForStruct()); + return getValidBaseTypeInfo(qty); + } + + const clang::Type *ty = astContext.getCanonicalType(qty).getTypePtr(); + if (metadataCache.contains(ty)) { + return metadataCache[ty]; + } + + // Note that the following helper call is allowed to add new nodes to the + // cache, which invalidates all its previously obtained iterators. So we + // first generate the node for the type and then add that node to the + // cache. + auto typeNode = getTypeInfoHelper(qty); + return metadataCache[ty] = typeNode; +} + +TBAAAccessInfo CIRGenTBAA::getAccessInfo(clang::QualType accessType) { + // Pointee values may have incomplete types, but they shall never be + // dereferenced. + if (accessType->isIncompleteType()) { + assert(!cir::MissingFeatures::tbaaIncompleteType()); + return TBAAAccessInfo::getIncompleteInfo(); + } + + if (typeHasMayAlias(accessType)) { + assert(!cir::MissingFeatures::tbaaMayAlias()); + return TBAAAccessInfo::getMayAliasInfo(); + } + + uint64_t size = astContext.getTypeSizeInChars(accessType).getQuantity(); + return TBAAAccessInfo(getTypeInfo(accessType), size); +} + +TBAAAccessInfo CIRGenTBAA::getVTablePtrAccessInfo(mlir::Type vtablePtrType) { + const mlir::DataLayout dataLayout(moduleOp); + auto size = dataLayout.getTypeSize(vtablePtrType); + return TBAAAccessInfo( + cir::TBAAVTablePointerAttr::get(mlirContext, vtablePtrType), size); +} + +mlir::ArrayAttr CIRGenTBAA::getTBAAStructInfo(clang::QualType qty) { + assert(!cir::MissingFeatures::tbaaStruct() && "tbaa.struct NYI"); + return mlir::ArrayAttr(); +} + +cir::TBAAAttr CIRGenTBAA::getBaseTypeInfo(clang::QualType qty) { + return isValidBaseType(qty) ? getValidBaseTypeInfo(qty) : nullptr; +} + +cir::TBAAAttr CIRGenTBAA::getValidBaseTypeInfo(clang::QualType qty) { + assert(isValidBaseType(qty) && "Must be a valid base type"); + + const clang::Type *ty = astContext.getCanonicalType(qty).getTypePtr(); + + // nullptr is a valid value in the cache, so use find rather than [] + auto iter = baseTypeMetadataCache.find(ty); + if (iter != baseTypeMetadataCache.end()) + return iter->second; + + // First calculate the metadata, before recomputinyg the insertion point, as + // the helper can recursively call us. + auto typeNode = getBaseTypeInfoHelper(ty); + LLVM_ATTRIBUTE_UNUSED auto inserted = + baseTypeMetadataCache.insert({ty, typeNode}); + assert(inserted.second && "BaseType metadata was already inserted"); + + return typeNode; +} +cir::TBAAAttr CIRGenTBAA::getBaseTypeInfoHelper(const clang::Type *ty) { + using namespace clang; + if (auto *tty = mlir::dyn_cast<clang::RecordType>(ty)) { + const clang::RecordDecl *rd = tty->getDecl()->getDefinition(); + const ASTRecordLayout &layout = astContext.getASTRecordLayout(rd); + SmallVector<cir::TBAAMemberAttr, 4> fields; + if (const CXXRecordDecl *cxxrd = dyn_cast<CXXRecordDecl>(rd)) { + // Handle C++ base classes. Non-virtual bases can treated a kind of + // field. Virtual bases are more complex and omitted, but avoid an + // incomplete view for NewStructPathTBAA. + if (codeGenOpts.NewStructPathTBAA && cxxrd->getNumVBases() != 0) + return nullptr; + for (const CXXBaseSpecifier &cxxBaseSpecifier : cxxrd->bases()) { + if (cxxBaseSpecifier.isVirtual()) + continue; + QualType baseQTy = cxxBaseSpecifier.getType(); + const CXXRecordDecl *baseRD = baseQTy->getAsCXXRecordDecl(); + if (baseRD->isEmpty()) + continue; + auto typeNode = isValidBaseType(baseQTy) ? getValidBaseTypeInfo(baseQTy) + : getTypeInfo(baseQTy); + if (!typeNode) + return nullptr; + uint64_t offset = layout.getBaseClassOffset(baseRD).getQuantity(); + [[maybe_unused]] uint64_t size = + astContext.getASTRecordLayout(baseRD).getDataSize().getQuantity(); + fields.push_back( + cir::TBAAMemberAttr::get(mlirContext, typeNode, offset)); + } + // The order in which base class subobjects are allocated is + // unspecified, so may differ from declaration order. In particular, + // Itanium ABI will allocate a primary base first. Since we exclude + // empty subobjects, the objects are not overlapping and their offsets + // are unique. + llvm::sort(fields, [](const cir::TBAAMemberAttr &lhs, + const cir::TBAAMemberAttr &rhs) { + return lhs.getOffset() < rhs.getOffset(); + }); + } + for (FieldDecl *field : rd->fields()) { + if (field->isZeroSize(astContext) || field->isUnnamedBitField()) + continue; + QualType fieldQTy = field->getType(); + auto typeNode = isValidBaseType(fieldQTy) ? getValidBaseTypeInfo(fieldQTy) + : getTypeInfo(fieldQTy); + if (!typeNode) + return nullptr; + + uint64_t bitOffset = layout.getFieldOffset(field->getFieldIndex()); + uint64_t offset = astContext.toCharUnitsFromBits(bitOffset).getQuantity(); + [[maybe_unused]] uint64_t size = + astContext.getTypeSizeInChars(fieldQTy).getQuantity(); + fields.push_back(cir::TBAAMemberAttr::get(mlirContext, typeNode, offset)); + } + + SmallString<256> outName; + if (features.CPlusPlus) { + // Don't use the mangler for C code. + llvm::raw_svector_ostream out(outName); + types.getCXXABI().getMangleContext().mangleCanonicalTypeName( + QualType(ty, 0), out); + } else { + outName = rd->getName(); + } + + if (codeGenOpts.NewStructPathTBAA) { + assert(!cir::MissingFeatures::tbaaNewStructPath()); + return nullptr; + } + return cir::TBAAStructAttr::get(mlirContext, outName, fields); + } + return nullptr; +} +cir::TBAAAttr CIRGenTBAA::getAccessTagInfo(TBAAAccessInfo tbaaInfo) { + assert(!tbaaInfo.isIncomplete() && + "Access to an object of an incomplete type!"); + + if (tbaaInfo.isMayAlias()) { + assert(!cir::MissingFeatures::tbaaMayAlias()); + tbaaInfo = TBAAAccessInfo(getChar(), tbaaInfo.size); + } + if (!tbaaInfo.accessType) { + return nullptr; + } + + if (!codeGenOpts.StructPathTBAA) + tbaaInfo = TBAAAccessInfo(tbaaInfo.accessType, tbaaInfo.size); + + if (!tbaaInfo.baseType) { + tbaaInfo.baseType = tbaaInfo.accessType; + assert(!tbaaInfo.offset && + "Nonzero offset for an access with no base type!"); + } + if (codeGenOpts.NewStructPathTBAA) { + assert(!cir::MissingFeatures::tbaaNewStructPath()); + return tbaa_NYI(mlirContext); + } + if (tbaaInfo.baseType == tbaaInfo.accessType) { + return tbaaInfo.accessType; + } + return cir::TBAATagAttr::get(mlirContext, tbaaInfo.baseType, + tbaaInfo.accessType, tbaaInfo.offset); +} + +TBAAAccessInfo CIRGenTBAA::mergeTBAAInfoForCast(TBAAAccessInfo sourceInfo, + TBAAAccessInfo targetInfo) { + assert(!cir::MissingFeatures::tbaaMergeTBAAInfo()); + return TBAAAccessInfo(); +} + +TBAAAccessInfo +CIRGenTBAA::mergeTBAAInfoForConditionalOperator(TBAAAccessInfo infoA, + TBAAAccessInfo infoB) { + assert(!cir::MissingFeatures::tbaaMergeTBAAInfo()); + return TBAAAccessInfo(); +} + +TBAAAccessInfo +CIRGenTBAA::mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo destInfo, + TBAAAccessInfo srcInfo) { + assert(!cir::MissingFeatures::tbaaMergeTBAAInfo()); + return TBAAAccessInfo(); +} + +} // namespace clang::CIRGen diff --git a/clang/lib/CIR/CodeGen/CIRGenTBAA.h b/clang/lib/CIR/CodeGen/CIRGenTBAA.h new file mode 100644 index 0000000000000..3272c1630916d --- /dev/null +++ b/clang/lib/CIR/CodeGen/CIRGenTBAA.h @@ -0,0 +1,194 @@ +//===--- CIRGenTBAA.h - TBAA information for LLVM CIRGen --------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This is the code that manages TBAA information and defines the TBAA policy +// for the optimizer to use. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENTBAA_H +#define LLVM_CLANG_LIB_CIR_CODEGEN_CIRGENTBAA_H +#include "mlir/IR/Attributes.h" +#include "mlir/IR/BuiltinAttributes.h" +#include "mlir/IR/BuiltinOps.h" +#include "mlir/IR/MLIRContext.h" +#include "clang/AST/Type.h" +#include "clang/Basic/CodeGenOptions.h" +#include "clang/CIR/Dialect/IR/CIRAttrs.h" +namespace clang::CIRGen { +class CIRGenTypes; +enum class TBAAAccessKind : unsigned { + Ordinary, + MayAlias, + Incomplete, +}; +// Describes a memory access in terms of TBAA. +struct TBAAAccessInfo { + TBAAAccessInfo(TBAAAccessKind kind, cir::TBAAAttr baseType, + cir::TBAAAttr accessType, uint64_t offset, uint64_t size) + : kind(kind), baseType(baseType), accessType(accessType), offset(offset) { + } + + TBAAAccessInfo(cir::TBAAAttr baseType, cir::TBAAAttr accessType, + uint64_t offset, uint64_t size) + : kind(TBAAAccessKind::Ordinary), baseType(baseType), + accessType(accessType), offset(offset) {} + + explicit TBAAAccessInfo(cir::TBAAAttr accessType, uint64_t size) + : TBAAAccessInfo(TBAAAccessKind::Ordinary, /* baseType= */ {}, accessType, + /* offset= */ 0, size) {} + + TBAAAccessInfo() + : TBAAAccessInfo(/* accessType= */ nullptr, /* size= */ 0) {}; + + static TBAAAccessInfo getMayAliasInfo() { + return TBAAAccessInfo(TBAAAccessKind::MayAlias, /* baseType= */ {}, + /* accessType= */ nullptr, + /* offset= */ 0, /* size= */ 0); + } + + bool isMayAlias() const { return kind == TBAAAccessKind::MayAlias; } + + static TBAAAccessInfo getIncompleteInfo() { + return TBAAAccessInfo(TBAAAccessKind::Incomplete, /* baseType= */ {}, + /* accessType= */ {}, + /* offset= */ 0, /* size= */ 0); + } + + bool isIncomplete() const { return kind == TBAAAccessKind::Incomplete; } + + bool operator==(const TBAAAccessInfo &other) const { + return kind == other.kind && baseType == other.baseType && + accessType == other.accessType && offset == other.offset && + size == other.size; + } + + bool operator!=(const TBAAAccessInfo &other) const { + return !(*this == other); + } + + explicit operator bool() const { return *this != TBAAAccessInfo(); } + + /// The kind of the access descriptor. + TBAAAccessKind kind; + + /// The base/leading access type. May be null if this access + /// descriptor represents an access that is not considered to be an access + /// to an aggregate or union member. + cir::TBAAAttr baseType; + + /// The final access type. May be null if there is no TBAA + /// information available about this access. + cir::TBAAAttr accessType; + + /// The byte offset of the final access within the base one. Must be + /// zero if the base access type is not specified. + uint64_t offset; + + /// The size of access, in bytes. + uint64_t size; +}; + +/// This class organizes the cross-module state that is used while lowering AST +/// types to LLVM types. +class CIRGenTBAA { + mlir::MLIRContext *mlirContext; + [[maybe_unused]] clang::ASTContext &astContext; + [[maybe_unused]] CIRGenTypes &types; + mlir::ModuleOp moduleOp; + [[maybe_unused]] const clang::CodeGenOptions &codeGenOpts; + [[maybe_unused]] const clang::LangOptions &features; + + llvm::DenseMap<const Type *, cir::TBAAAttr> metadataCache; + llvm::DenseMap<const Type *, cir::TBAAAttr> baseTypeMetadataCache; + + cir::TBAAAttr getChar(); + + // An internal helper function to generate metadata used + // to describe accesses to objects of the given type. + cir::TBAAAttr getTypeInfoHelper(clang::QualType qty); + cir::TBAAScalarAttr getScalarTypeInfo(clang::QualType qty); + + cir::TBAAAttr getValidBaseTypeInfo(clang::QualType qty); + cir::TBAAAttr getBaseTypeInfoHelper(const clang::Type *ty); + +public: + CIRGenTBAA(mlir::MLIRContext *mlirContext, clang::ASTContext &astContext, + CIRGenTypes &types, mlir::ModuleOp moduleOp, + const clang::CodeGenOptions &codeGenOpts, + const clang::LangOptions &features); + + /// Get attribute used to describe accesses to objects of the given type. + cir::TBAAAttr getTypeInfo(clang::QualType qty); + + /// Get TBAA information that describes an access to an object of the given + /// type. + TBAAAccessInfo getAccessInfo(clang::QualType accessType); + + /// Get the TBAA information that describes an access to a virtual table + /// pointer. + TBAAAccessInfo getVTablePtrAccessInfo(mlir::Type vtablePtrType); + + /// Get the TBAAStruct attributes to be used for a memcpy of the given type. + mlir::ArrayAttr getTBAAStructInfo(clang::QualType qty); + + /// Get attribute that describes the given base access type. Return null if + /// the type is not suitable for use in TBAA access tags. + cir::TBAAAttr getBaseTypeInfo(clang::QualType qty); + + /// Get TBAA tag for a given memory access. + cir::TBAAAttr getAccessTagInfo(TBAAAccessInfo tbaaInfo); + + /// Get merged TBAA information for the purpose of type casts. + TBAAAccessInfo mergeTBAAInfoForCast(TBAAAccessInfo sourceInfo, + TBAAAccessInfo targetInfo); + + /// Get merged TBAA information for the purpose of conditional operator. + TBAAAccessInfo mergeTBAAInfoForConditionalOperator(TBAAAccessInfo infoA, + TBAAAccessInfo infoB); + + /// Get merged TBAA information for the purpose of memory transfer calls. + TBAAAccessInfo mergeTBAAInfoForMemoryTransfer(TBAAAccessInfo destInfo, + TBAAAccessInfo srcInfo); +}; +} // namespace clang::CIRGen +namespace llvm { +template <> struct DenseMapInfo<clang::CIRGen::TBAAAccessInfo> { + static clang::CIRGen::TBAAAccessInfo getEmptyKey() { + unsigned unsignedKey = DenseMapInfo<unsigned>::getEmptyKey(); + return clang::CIRGen::TBAAAccessInfo( + static_cast<clang::CIRGen::TBAAAccessKind>(unsignedKey), + DenseMapInfo<cir::TBAAAttr>::getEmptyKey(), + DenseMapInfo<cir::TBAAAttr>::getEmptyKey(), + DenseMapInfo<uint64_t>::getEmptyKey(), + DenseMapInfo<uint64_t>::getEmptyKey()); + } + static clang::CIRGen::TBAAAccessInfo getTombstoneKey() { + unsigned unsignedKey = DenseMapInfo<unsigned>::getTombstoneKey(); + return clang::CIRGen::TBAAAccessInfo( + static_cast<clang::CIRGen::TBAAAccessKind>(unsignedKey), + DenseMapInfo<cir::TBAAAttr>::getTombstoneKey(), + DenseMapInfo<cir::TBAAAttr>::getTombstoneKey(), + DenseMapInfo<uint64_t>::getTombstoneKey(), + DenseMapInfo<uint64_t>::getTombstoneKey()); + } + static unsigned getHashValue(const clang::CIRGen::TBAAAccessInfo &val) { + auto kindValue = static_cast<unsigned>(val.kind); + return DenseMapInfo<unsigned>::getHashValue(kindValue) ^ + DenseMapInfo<cir::TBAAAttr>::getHashValue(val.baseType) ^ + DenseMapInfo<cir::TBAAAttr>::getHashValue(val.accessType) ^ + DenseMapInfo<uint64_t>::getHashValue(val.offset) ^ + DenseMapInfo<uint64_t>::getHashValue(val.size); + } + static bool isEqual(const clang::CIRGen::TBAAAccessInfo &lhs, + const clang::CIRGen::TBAAAccessInfo &rhs) { + return lhs == rhs; + } +}; +} // namespace llvm +#endif diff --git a/clang/test/CIR/CodeGen/builtin_new_delete.cpp b/clang/test/CIR/CodeGen/builtin_new_delete.cpp new file mode 100644 index 0000000000000..d540bfcf8a36d --- /dev/null +++ b/clang/test/CIR/CodeGen/builtin_new_delete.cpp @@ -0,0 +1,44 @@ +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -std=c++20 -triple x86_64-unknown-linux-gnu -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + + +void test_builtins_basic() { + __builtin_operator_delete(__builtin_operator_new(4)); + // CIR-LABEL: test_builtins_basic + // CIR: [[P:%.*]] = cir.call @_Znwm({{%.*}}) : (!u64i) -> !cir.ptr<!void> + // CIR: cir.call @_ZdlPv([[P]]) {{.*}}: (!cir.ptr<!void>) -> () + // CIR: cir.return + + // LLVM-LABEL: test_builtins_basic + // LLVM: [[P:%.*]] = call ptr @_Znwm(i64 4) + // LLVM: call void @_ZdlPv(ptr [[P]]) + // LLVM: ret void + + // OGCG-LABEL: test_builtins_basic + // OGCG: [[P:%.*]] = call {{.*}} ptr @_Znwm(i64 {{.*}} 4) + // OGCG: call void @_ZdlPv(ptr {{.*}} [[P]]) + // OGCG: ret void +} + +void test_sized_delete() { + __builtin_operator_delete(__builtin_operator_new(4), 4); + + // CIR-LABEL: test_sized_delete + // CIR: [[P:%.*]] = cir.call @_Znwm({{%.*}}) : (!u64i) -> !cir.ptr<!void> + // CIR: cir.call @_ZdlPvm([[P]], {{%.*}}) {{.*}}: (!cir.ptr<!void>, !u64i) -> () + // CIR: cir.return + + // LLVM-LABEL: test_sized_delete + // LLVM: [[P:%.*]] = call ptr @_Znwm(i64 4) + // LLVM: call void @_ZdlPvm(ptr [[P]], i64 4) + // LLVM: ret void + + // OGCG-LABEL: test_sized_delete + // OGCG: [[P:%.*]] = call {{.*}} ptr @_Znwm(i64 {{.*}} 4) + // OGCG: call void @_ZdlPvm(ptr {{.*}} [[P]], i64 {{.*}} 4) + // OGCG: ret void +} _______________________________________________ cfe-commits mailing list [email protected] https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
