llvmbot wrote:
<!--LLVM PR SUMMARY COMMENT--> @llvm/pr-subscribers-clang Author: Andy Kaylor (andykaylor) <details> <summary>Changes</summary> This adds the minimal support needed to handle string literals. --- Full diff: https://github.com/llvm/llvm-project/pull/140796.diff 10 Files Affected: - (modified) clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h (+7) - (modified) clang/include/clang/CIR/MissingFeatures.h (+1) - (modified) clang/lib/CIR/CodeGen/CIRGenBuilder.h (+28) - (modified) clang/lib/CIR/CodeGen/CIRGenExpr.cpp (+10) - (modified) clang/lib/CIR/CodeGen/CIRGenFunction.cpp (+2) - (modified) clang/lib/CIR/CodeGen/CIRGenFunction.h (+2) - (modified) clang/lib/CIR/CodeGen/CIRGenModule.cpp (+102) - (modified) clang/lib/CIR/CodeGen/CIRGenModule.h (+11) - (modified) clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp (+12) - (added) clang/test/CIR/CodeGen/string-literals.c (+56) ``````````diff diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h index b680e4162a5ce..738f33bf36c9e 100644 --- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h +++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h @@ -13,6 +13,7 @@ #include "clang/CIR/Dialect/IR/CIRAttrs.h" #include "clang/CIR/Dialect/IR/CIRDialect.h" #include "clang/CIR/Dialect/IR/CIRTypes.h" +#include "clang/CIR/MissingFeatures.h" #include "llvm/ADT/STLForwardCompat.h" #include "llvm/Support/ErrorHandling.h" @@ -177,6 +178,12 @@ class CIRBaseBuilderTy : public mlir::OpBuilder { return create<cir::AllocaOp>(loc, addrType, type, name, alignment); } + mlir::Value createGetGlobal(mlir::Location loc, cir::GlobalOp global) { + assert(!cir::MissingFeatures::addressSpace()); + return create<cir::GetGlobalOp>(loc, getPointerTo(global.getSymType()), + global.getSymName()); + } + cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr, bool isVolatile = false, uint64_t alignment = 0) { mlir::IntegerAttr intAttr; diff --git a/clang/include/clang/CIR/MissingFeatures.h b/clang/include/clang/CIR/MissingFeatures.h index 7b33d94483d5f..d43e2d9f461d1 100644 --- a/clang/include/clang/CIR/MissingFeatures.h +++ b/clang/include/clang/CIR/MissingFeatures.h @@ -38,6 +38,7 @@ struct MissingFeatures { static bool opGlobalWeakRef() { return false; } static bool opGlobalLinkage() { return false; } static bool opGlobalSetVisitibility() { return false; } + static bool opGlobalUnnamedAddr() { return false; } static bool supportIFuncAttr() { return false; } static bool supportVisibility() { return false; } diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h b/clang/lib/CIR/CodeGen/CIRGenBuilder.h index b1b0826a4e44a..aff8b8949f3ad 100644 --- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h +++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h @@ -26,6 +26,34 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy { CIRGenBuilderTy(mlir::MLIRContext &mlirContext, const CIRGenTypeCache &tc) : CIRBaseBuilderTy(mlirContext), typeCache(tc) {} + /// Get a cir::ConstArrayAttr for a string literal. + /// Note: This is different from what is returned by + /// mlir::Builder::getStringAttr() which is an mlir::StringAttr. + mlir::Attribute getString(llvm::StringRef str, mlir::Type eltTy, + unsigned size) { + unsigned finalSize = size ? size : str.size(); + + size_t lastNonZeroPos = str.find_last_not_of('\0'); + // If the string is full of null bytes, emit a #cir.zero rather than + // a #cir.const_array. + if (lastNonZeroPos == llvm::StringRef::npos) { + auto arrayTy = cir::ArrayType::get(eltTy, finalSize); + return cir::ZeroAttr::get(arrayTy); + } + // We emit trailing zeros only if there are multiple trailing zeros. + int trailingZerosNum = 0; + if (finalSize > lastNonZeroPos + 2) + trailingZerosNum = finalSize - lastNonZeroPos - 1; + auto truncatedArrayTy = + cir::ArrayType::get(eltTy, finalSize - trailingZerosNum); + auto fullArrayTy = cir::ArrayType::get(eltTy, finalSize); + return cir::ConstArrayAttr::get( + fullArrayTy, + mlir::StringAttr::get(str.drop_back(trailingZerosNum), + truncatedArrayTy), + trailingZerosNum); + } + std::string getUniqueAnonRecordName() { return getUniqueRecordName("anon"); } std::string getUniqueRecordName(const std::string &baseName) { diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp index c5fe3c1378624..a8fecafe4a1f3 100644 --- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp @@ -743,6 +743,16 @@ CIRGenFunction::emitArraySubscriptExpr(const clang::ArraySubscriptExpr *e) { return lv; } +LValue CIRGenFunction::emitStringLiteralLValue(const StringLiteral *e) { + cir::GlobalOp globalOp = cgm.getGlobalForStringLiteral(e); + assert(!cir::MissingFeatures::opGlobalAlignment()); + mlir::Value addr = + builder.createGetGlobal(getLoc(e->getSourceRange()), globalOp); + return makeAddrLValue( + Address(addr, globalOp.getSymType(), CharUnits::fromQuantity(1)), + e->getType(), AlignmentSource::Decl); +} + /// Casts are never lvalues unless that cast is to a reference type. If the cast /// is to a reference, we can have the usual lvalue result, otherwise if a cast /// is needed by the code generator in an lvalue context, then it must mean that diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp index c3798de79d969..ce88e656a38e8 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp @@ -531,6 +531,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) { return emitArraySubscriptExpr(cast<ArraySubscriptExpr>(e)); case Expr::UnaryOperatorClass: return emitUnaryOpLValue(cast<UnaryOperator>(e)); + case Expr::StringLiteralClass: + return emitStringLiteralLValue(cast<StringLiteral>(e)); case Expr::MemberExprClass: return emitMemberExpr(cast<MemberExpr>(e)); case Expr::BinaryOperatorClass: diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h b/clang/lib/CIR/CodeGen/CIRGenFunction.h index ce080f481da6b..74f2e4043933d 100644 --- a/clang/lib/CIR/CodeGen/CIRGenFunction.h +++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h @@ -695,6 +695,8 @@ class CIRGenFunction : public CIRGenTypeCache { mlir::Value emitStoreThroughBitfieldLValue(RValue src, LValue dstresult); + LValue emitStringLiteralLValue(const StringLiteral *e); + mlir::LogicalResult emitSwitchBody(const clang::Stmt *s); mlir::LogicalResult emitSwitchCase(const clang::SwitchCase &s, bool buildingTopLevelCase); diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp b/clang/lib/CIR/CodeGen/CIRGenModule.cpp index e170498b67548..5bae8908d5dbb 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp +++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp @@ -562,6 +562,30 @@ void CIRGenModule::emitGlobalDefinition(clang::GlobalDecl gd, llvm_unreachable("Invalid argument to CIRGenModule::emitGlobalDefinition"); } +mlir::Attribute +CIRGenModule::getConstantArrayFromStringLiteral(const StringLiteral *e) { + assert(!e->getType()->isPointerType() && "Strings are always arrays"); + + // Don't emit it as the address of the string, emit the string data itself + // as an inline array. + if (e->getCharByteWidth() == 1) { + SmallString<64> str(e->getString()); + + // Resize the string to the right size, which is indicated by its type. + const ConstantArrayType *cat = + astContext.getAsConstantArrayType(e->getType()); + uint64_t finalSize = cat->getZExtSize(); + str.resize(finalSize); + + mlir::Type eltTy = convertType(cat->getElementType()); + return builder.getString(str, eltTy, finalSize); + } + + errorNYI(e->getSourceRange(), + "getConstantArrayFromStringLiteral: wide characters"); + return mlir::Attribute(); +} + static bool shouldBeInCOMDAT(CIRGenModule &cgm, const Decl &d) { assert(!cir::MissingFeatures::supportComdat()); @@ -749,6 +773,84 @@ CIRGenModule::getCIRLinkageVarDefinition(const VarDecl *vd, bool isConstant) { return getCIRLinkageForDeclarator(vd, linkage, isConstant); } +static cir::GlobalOp generateStringLiteral(mlir::Location loc, + mlir::TypedAttr c, CIRGenModule &cgm, + StringRef globalName) { + assert(!cir::MissingFeatures::addressSpace()); + + // Create a global variable for this string + // FIXME(cir): check for insertion point in module level. + cir::GlobalOp gv = + CIRGenModule::createGlobalOp(cgm, loc, globalName, c.getType()); + + // Set up extra information and add to the module + assert(!cir::MissingFeatures::opGlobalAlignment()); + assert(!cir::MissingFeatures::opGlobalLinkage()); + assert(!cir::MissingFeatures::opGlobalThreadLocal()); + assert(!cir::MissingFeatures::opGlobalUnnamedAddr()); + CIRGenModule::setInitializer(gv, c); + assert(!cir::MissingFeatures::supportComdat()); + assert(!cir::MissingFeatures::opGlobalDSOLocal()); + return gv; +} + +// LLVM IR automatically uniques names when new llvm::GlobalVariables are +// created. This is handy, for example, when creating globals for string +// literals. Since we don't do that when creating cir::GlobalOp's, we need +// a mechanism to generate a unique name in advance. +// +// For now, this mechanism is only used in cases where we know that the +// name is compiler-generated, so we don't use the MLIR symbol table for +// the lookup. +std::string CIRGenModule::getUniqueGlobalName(const std::string &baseName) { + // If this is the first time we've generated a name for this basename, use + // it as is and start a counter for this base name. + auto it = cgGlobalNames.find(baseName); + if (it == cgGlobalNames.end()) { + cgGlobalNames[baseName] = 0; + return baseName; + } + + std::string result = + baseName + "." + std::to_string(cgGlobalNames[baseName]++); + // There should not be any symbol with this name in the module. + assert(!mlir::SymbolTable::lookupSymbolIn(theModule, result)); + return result; +} + +/// Return a pointer to a constant array for the given string literal. +cir::GlobalOp CIRGenModule::getGlobalForStringLiteral(const StringLiteral *s, + StringRef name) { + mlir::Attribute c = getConstantArrayFromStringLiteral(s); + + if (getLangOpts().WritableStrings) { + errorNYI(s->getSourceRange(), + "getGlobalForStringLiteral: Writable strings"); + } + + // Mangle the string literal if that's how the ABI merges duplicate strings. + // Don't do it if they are writable, since we don't want writes in one TU to + // affect strings in another. + if (getCXXABI().getMangleContext().shouldMangleStringLiteral(s) && + !getLangOpts().WritableStrings) { + errorNYI(s->getSourceRange(), + "getGlobalForStringLiteral: mangle string literals"); + } + + // Unlike LLVM IR, CIR doesn't automatically unique names for globals, so + // we need to do that explicitly. + std::string uniqueName = getUniqueGlobalName(name.str()); + mlir::Location loc = getLoc(s->getSourceRange()); + auto typedC = llvm::cast<mlir::TypedAttr>(c); + assert(!cir::MissingFeatures::opGlobalAlignment()); + cir::GlobalOp gv = generateStringLiteral(loc, typedC, *this, uniqueName); + assert(!cir::MissingFeatures::opGlobalDSOLocal()); + + assert(!cir::MissingFeatures::sanitizers()); + + return gv; +} + void CIRGenModule::emitDeclContext(const DeclContext *dc) { for (Decl *decl : dc->decls()) { // Unlike other DeclContexts, the contents of an ObjCImplDecl at TU scope diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h b/clang/lib/CIR/CodeGen/CIRGenModule.h index b67239fcff44b..9828e1068e4fb 100644 --- a/clang/lib/CIR/CodeGen/CIRGenModule.h +++ b/clang/lib/CIR/CodeGen/CIRGenModule.h @@ -126,6 +126,9 @@ class CIRGenModule : public CIRGenTypeCache { llvm::StringRef name, mlir::Type t, mlir::Operation *insertPoint = nullptr); + llvm::StringMap<unsigned> cgGlobalNames; + std::string getUniqueGlobalName(const std::string &baseName); + /// Return the mlir::Value for the address of the given global variable. /// If Ty is non-null and if the global doesn't exist, then it will be created /// with the specified type instead of whatever the normal requested type @@ -136,6 +139,14 @@ class CIRGenModule : public CIRGenTypeCache { getAddrOfGlobalVar(const VarDecl *d, mlir::Type ty = {}, ForDefinition_t isForDefinition = NotForDefinition); + /// Return a constant array for the given string. + mlir::Attribute getConstantArrayFromStringLiteral(const StringLiteral *e); + + /// Return a global symbol reference to a constant array for the given string + /// literal. + cir::GlobalOp getGlobalForStringLiteral(const StringLiteral *S, + llvm::StringRef Name = ".str"); + const TargetCIRGenInfo &getTargetCIRGenInfo(); /// Helpers to convert the presumed location of Clang's SourceLocation to an diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp index 365569ce1f48a..2516007afd561 100644 --- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp +++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp @@ -270,6 +270,18 @@ mlir::Value CIRAttrToValue::visitCirAttr(cir::ConstArrayAttr attr) { result = rewriter.create<mlir::LLVM::InsertValueOp>(loc, result, init, idx); } + } else if (auto strAttr = mlir::dyn_cast<mlir::StringAttr>(attr.getElts())) { + // TODO(cir): this diverges from traditional lowering. Normally the string + // would be a global constant that is memcopied. + auto arrayTy = mlir::dyn_cast<cir::ArrayType>(strAttr.getType()); + assert(arrayTy && "String attribute must have an array type"); + mlir::Type eltTy = arrayTy.getElementType(); + for (auto [idx, elt] : llvm::enumerate(strAttr)) { + auto init = rewriter.create<mlir::LLVM::ConstantOp>( + loc, converter->convertType(eltTy), elt); + result = + rewriter.create<mlir::LLVM::InsertValueOp>(loc, result, init, idx); + } } else { llvm_unreachable("unexpected ConstArrayAttr elements"); } diff --git a/clang/test/CIR/CodeGen/string-literals.c b/clang/test/CIR/CodeGen/string-literals.c new file mode 100644 index 0000000000000..873b00d9c9a98 --- /dev/null +++ b/clang/test/CIR/CodeGen/string-literals.c @@ -0,0 +1,56 @@ +// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s -o %t.cir +// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s +// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm %s -o %t-cir.ll +// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s +// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -emit-llvm %s -o %t.ll +// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s + +// LLVM: @[[STR1_GLOBAL:.*]] = dso_local global [2 x i8] c"1\00" +// LLVM: @[[STR2_GLOBAL:.*]] = dso_local global [1 x i8] zeroinitializer +// LLVM: @[[STR3_GLOBAL:.*]] = dso_local global [2 x i8] zeroinitializer + +// OGCG: @[[STR1_GLOBAL:.*]] = private unnamed_addr constant [2 x i8] c"1\00" +// OGCG: @[[STR2_GLOBAL:.*]] = private unnamed_addr constant [1 x i8] zeroinitializer +// OGCG: @[[STR3_GLOBAL:.*]] = private unnamed_addr constant [2 x i8] zeroinitializer + +char *f1() { + return "1"; +} + +// CIR: cir.global external @[[STR1_GLOBAL:.*]] = #cir.const_array<"1\00" : !cir.array<!s8i x 2>> : !cir.array<!s8i x 2> +// CIR: cir.func @f1() +// CIR: %[[STR:.*]] = cir.get_global @[[STR1_GLOBAL]] : !cir.ptr<!cir.array<!s8i x 2>> + +// LLVM: define ptr @f1() +// LLVM: store ptr @[[STR1_GLOBAL]], ptr {{.*}} + +// OGCG: define {{.*}}ptr @f1() +// OGCG: ret ptr @[[STR1_GLOBAL]] + +char *f2() { + return ""; +} + +// CIR: cir.global external @[[STR2_GLOBAL:.*]] = #cir.zero : !cir.array<!s8i x 1> +// CIR: cir.func @f2() +// CIR: %[[STR2:.*]] = cir.get_global @[[STR2_GLOBAL]] : !cir.ptr<!cir.array<!s8i x 1>> + +// LLVM: define ptr @f2() +// LLVM: store ptr @[[STR2_GLOBAL]], ptr {{.*}} + +// OGCG: define {{.*}}ptr @f2() +// OGCG: ret ptr @[[STR2_GLOBAL]] + +char *f3() { + return "\00"; +} + +// CIR: cir.global external @[[STR3_GLOBAL:.*]] = #cir.zero : !cir.array<!s8i x 2> +// CIR: cir.func @f3() +// CIR: %[[STR3:.*]] = cir.get_global @[[STR3_GLOBAL]] : !cir.ptr<!cir.array<!s8i x 2>> + +// LLVM: define ptr @f3() +// LLVM: store ptr @[[STR3_GLOBAL]], ptr {{.*}} + +// OGCG: define {{.*}}ptr @f3() +// OGCG: ret ptr @[[STR3_GLOBAL]] `````````` </details> https://github.com/llvm/llvm-project/pull/140796 _______________________________________________ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits