llvmbot wrote:

<!--LLVM PR SUMMARY COMMENT-->

@llvm/pr-subscribers-clang

Author: Andy Kaylor (andykaylor)

<details>
<summary>Changes</summary>

This adds the minimal support needed to handle string literals.

---
Full diff: https://github.com/llvm/llvm-project/pull/140796.diff


10 Files Affected:

- (modified) clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h (+7) 
- (modified) clang/include/clang/CIR/MissingFeatures.h (+1) 
- (modified) clang/lib/CIR/CodeGen/CIRGenBuilder.h (+28) 
- (modified) clang/lib/CIR/CodeGen/CIRGenExpr.cpp (+10) 
- (modified) clang/lib/CIR/CodeGen/CIRGenFunction.cpp (+2) 
- (modified) clang/lib/CIR/CodeGen/CIRGenFunction.h (+2) 
- (modified) clang/lib/CIR/CodeGen/CIRGenModule.cpp (+102) 
- (modified) clang/lib/CIR/CodeGen/CIRGenModule.h (+11) 
- (modified) clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp (+12) 
- (added) clang/test/CIR/CodeGen/string-literals.c (+56) 


``````````diff
diff --git a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h 
b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
index b680e4162a5ce..738f33bf36c9e 100644
--- a/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
+++ b/clang/include/clang/CIR/Dialect/Builder/CIRBaseBuilder.h
@@ -13,6 +13,7 @@
 #include "clang/CIR/Dialect/IR/CIRAttrs.h"
 #include "clang/CIR/Dialect/IR/CIRDialect.h"
 #include "clang/CIR/Dialect/IR/CIRTypes.h"
+#include "clang/CIR/MissingFeatures.h"
 #include "llvm/ADT/STLForwardCompat.h"
 #include "llvm/Support/ErrorHandling.h"
 
@@ -177,6 +178,12 @@ class CIRBaseBuilderTy : public mlir::OpBuilder {
     return create<cir::AllocaOp>(loc, addrType, type, name, alignment);
   }
 
+  mlir::Value createGetGlobal(mlir::Location loc, cir::GlobalOp global) {
+    assert(!cir::MissingFeatures::addressSpace());
+    return create<cir::GetGlobalOp>(loc, getPointerTo(global.getSymType()),
+                                    global.getSymName());
+  }
+
   cir::LoadOp createLoad(mlir::Location loc, mlir::Value ptr,
                          bool isVolatile = false, uint64_t alignment = 0) {
     mlir::IntegerAttr intAttr;
diff --git a/clang/include/clang/CIR/MissingFeatures.h 
b/clang/include/clang/CIR/MissingFeatures.h
index 7b33d94483d5f..d43e2d9f461d1 100644
--- a/clang/include/clang/CIR/MissingFeatures.h
+++ b/clang/include/clang/CIR/MissingFeatures.h
@@ -38,6 +38,7 @@ struct MissingFeatures {
   static bool opGlobalWeakRef() { return false; }
   static bool opGlobalLinkage() { return false; }
   static bool opGlobalSetVisitibility() { return false; }
+  static bool opGlobalUnnamedAddr() { return false; }
 
   static bool supportIFuncAttr() { return false; }
   static bool supportVisibility() { return false; }
diff --git a/clang/lib/CIR/CodeGen/CIRGenBuilder.h 
b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
index b1b0826a4e44a..aff8b8949f3ad 100644
--- a/clang/lib/CIR/CodeGen/CIRGenBuilder.h
+++ b/clang/lib/CIR/CodeGen/CIRGenBuilder.h
@@ -26,6 +26,34 @@ class CIRGenBuilderTy : public cir::CIRBaseBuilderTy {
   CIRGenBuilderTy(mlir::MLIRContext &mlirContext, const CIRGenTypeCache &tc)
       : CIRBaseBuilderTy(mlirContext), typeCache(tc) {}
 
+  /// Get a cir::ConstArrayAttr for a string literal.
+  /// Note: This is different from what is returned by
+  /// mlir::Builder::getStringAttr() which is an mlir::StringAttr.
+  mlir::Attribute getString(llvm::StringRef str, mlir::Type eltTy,
+                            unsigned size) {
+    unsigned finalSize = size ? size : str.size();
+
+    size_t lastNonZeroPos = str.find_last_not_of('\0');
+    // If the string is full of null bytes, emit a #cir.zero rather than
+    // a #cir.const_array.
+    if (lastNonZeroPos == llvm::StringRef::npos) {
+      auto arrayTy = cir::ArrayType::get(eltTy, finalSize);
+      return cir::ZeroAttr::get(arrayTy);
+    }
+    // We emit trailing zeros only if there are multiple trailing zeros.
+    int trailingZerosNum = 0;
+    if (finalSize > lastNonZeroPos + 2)
+      trailingZerosNum = finalSize - lastNonZeroPos - 1;
+    auto truncatedArrayTy =
+        cir::ArrayType::get(eltTy, finalSize - trailingZerosNum);
+    auto fullArrayTy = cir::ArrayType::get(eltTy, finalSize);
+    return cir::ConstArrayAttr::get(
+        fullArrayTy,
+        mlir::StringAttr::get(str.drop_back(trailingZerosNum),
+                              truncatedArrayTy),
+        trailingZerosNum);
+  }
+
   std::string getUniqueAnonRecordName() { return getUniqueRecordName("anon"); }
 
   std::string getUniqueRecordName(const std::string &baseName) {
diff --git a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp 
b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
index c5fe3c1378624..a8fecafe4a1f3 100644
--- a/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenExpr.cpp
@@ -743,6 +743,16 @@ CIRGenFunction::emitArraySubscriptExpr(const 
clang::ArraySubscriptExpr *e) {
   return lv;
 }
 
+LValue CIRGenFunction::emitStringLiteralLValue(const StringLiteral *e) {
+  cir::GlobalOp globalOp = cgm.getGlobalForStringLiteral(e);
+  assert(!cir::MissingFeatures::opGlobalAlignment());
+  mlir::Value addr =
+      builder.createGetGlobal(getLoc(e->getSourceRange()), globalOp);
+  return makeAddrLValue(
+      Address(addr, globalOp.getSymType(), CharUnits::fromQuantity(1)),
+      e->getType(), AlignmentSource::Decl);
+}
+
 /// Casts are never lvalues unless that cast is to a reference type. If the 
cast
 /// is to a reference, we can have the usual lvalue result, otherwise if a cast
 /// is needed by the code generator in an lvalue context, then it must mean 
that
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp 
b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
index c3798de79d969..ce88e656a38e8 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.cpp
@@ -531,6 +531,8 @@ LValue CIRGenFunction::emitLValue(const Expr *e) {
     return emitArraySubscriptExpr(cast<ArraySubscriptExpr>(e));
   case Expr::UnaryOperatorClass:
     return emitUnaryOpLValue(cast<UnaryOperator>(e));
+  case Expr::StringLiteralClass:
+    return emitStringLiteralLValue(cast<StringLiteral>(e));
   case Expr::MemberExprClass:
     return emitMemberExpr(cast<MemberExpr>(e));
   case Expr::BinaryOperatorClass:
diff --git a/clang/lib/CIR/CodeGen/CIRGenFunction.h 
b/clang/lib/CIR/CodeGen/CIRGenFunction.h
index ce080f481da6b..74f2e4043933d 100644
--- a/clang/lib/CIR/CodeGen/CIRGenFunction.h
+++ b/clang/lib/CIR/CodeGen/CIRGenFunction.h
@@ -695,6 +695,8 @@ class CIRGenFunction : public CIRGenTypeCache {
 
   mlir::Value emitStoreThroughBitfieldLValue(RValue src, LValue dstresult);
 
+  LValue emitStringLiteralLValue(const StringLiteral *e);
+
   mlir::LogicalResult emitSwitchBody(const clang::Stmt *s);
   mlir::LogicalResult emitSwitchCase(const clang::SwitchCase &s,
                                      bool buildingTopLevelCase);
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.cpp 
b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
index e170498b67548..5bae8908d5dbb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.cpp
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.cpp
@@ -562,6 +562,30 @@ void CIRGenModule::emitGlobalDefinition(clang::GlobalDecl 
gd,
   llvm_unreachable("Invalid argument to CIRGenModule::emitGlobalDefinition");
 }
 
+mlir::Attribute
+CIRGenModule::getConstantArrayFromStringLiteral(const StringLiteral *e) {
+  assert(!e->getType()->isPointerType() && "Strings are always arrays");
+
+  // Don't emit it as the address of the string, emit the string data itself
+  // as an inline array.
+  if (e->getCharByteWidth() == 1) {
+    SmallString<64> str(e->getString());
+
+    // Resize the string to the right size, which is indicated by its type.
+    const ConstantArrayType *cat =
+        astContext.getAsConstantArrayType(e->getType());
+    uint64_t finalSize = cat->getZExtSize();
+    str.resize(finalSize);
+
+    mlir::Type eltTy = convertType(cat->getElementType());
+    return builder.getString(str, eltTy, finalSize);
+  }
+
+  errorNYI(e->getSourceRange(),
+           "getConstantArrayFromStringLiteral: wide characters");
+  return mlir::Attribute();
+}
+
 static bool shouldBeInCOMDAT(CIRGenModule &cgm, const Decl &d) {
   assert(!cir::MissingFeatures::supportComdat());
 
@@ -749,6 +773,84 @@ CIRGenModule::getCIRLinkageVarDefinition(const VarDecl 
*vd, bool isConstant) {
   return getCIRLinkageForDeclarator(vd, linkage, isConstant);
 }
 
+static cir::GlobalOp generateStringLiteral(mlir::Location loc,
+                                           mlir::TypedAttr c, CIRGenModule 
&cgm,
+                                           StringRef globalName) {
+  assert(!cir::MissingFeatures::addressSpace());
+
+  // Create a global variable for this string
+  // FIXME(cir): check for insertion point in module level.
+  cir::GlobalOp gv =
+      CIRGenModule::createGlobalOp(cgm, loc, globalName, c.getType());
+
+  // Set up extra information and add to the module
+  assert(!cir::MissingFeatures::opGlobalAlignment());
+  assert(!cir::MissingFeatures::opGlobalLinkage());
+  assert(!cir::MissingFeatures::opGlobalThreadLocal());
+  assert(!cir::MissingFeatures::opGlobalUnnamedAddr());
+  CIRGenModule::setInitializer(gv, c);
+  assert(!cir::MissingFeatures::supportComdat());
+  assert(!cir::MissingFeatures::opGlobalDSOLocal());
+  return gv;
+}
+
+// LLVM IR automatically uniques names when new llvm::GlobalVariables are
+// created. This is handy, for example, when creating globals for string
+// literals. Since we don't do that when creating cir::GlobalOp's, we need
+// a mechanism to generate a unique name in advance.
+//
+// For now, this mechanism is only used in cases where we know that the
+// name is compiler-generated, so we don't use the MLIR symbol table for
+// the lookup.
+std::string CIRGenModule::getUniqueGlobalName(const std::string &baseName) {
+  // If this is the first time we've generated a name for this basename, use
+  // it as is and start a counter for this base name.
+  auto it = cgGlobalNames.find(baseName);
+  if (it == cgGlobalNames.end()) {
+    cgGlobalNames[baseName] = 0;
+    return baseName;
+  }
+
+  std::string result =
+      baseName + "." + std::to_string(cgGlobalNames[baseName]++);
+  // There should not be any symbol with this name in the module.
+  assert(!mlir::SymbolTable::lookupSymbolIn(theModule, result));
+  return result;
+}
+
+/// Return a pointer to a constant array for the given string literal.
+cir::GlobalOp CIRGenModule::getGlobalForStringLiteral(const StringLiteral *s,
+                                                      StringRef name) {
+  mlir::Attribute c = getConstantArrayFromStringLiteral(s);
+
+  if (getLangOpts().WritableStrings) {
+    errorNYI(s->getSourceRange(),
+             "getGlobalForStringLiteral: Writable strings");
+  }
+
+  // Mangle the string literal if that's how the ABI merges duplicate strings.
+  // Don't do it if they are writable, since we don't want writes in one TU to
+  // affect strings in another.
+  if (getCXXABI().getMangleContext().shouldMangleStringLiteral(s) &&
+      !getLangOpts().WritableStrings) {
+    errorNYI(s->getSourceRange(),
+             "getGlobalForStringLiteral: mangle string literals");
+  }
+
+  // Unlike LLVM IR, CIR doesn't automatically unique names for globals, so
+  // we need to do that explicitly.
+  std::string uniqueName = getUniqueGlobalName(name.str());
+  mlir::Location loc = getLoc(s->getSourceRange());
+  auto typedC = llvm::cast<mlir::TypedAttr>(c);
+  assert(!cir::MissingFeatures::opGlobalAlignment());
+  cir::GlobalOp gv = generateStringLiteral(loc, typedC, *this, uniqueName);
+  assert(!cir::MissingFeatures::opGlobalDSOLocal());
+
+  assert(!cir::MissingFeatures::sanitizers());
+
+  return gv;
+}
+
 void CIRGenModule::emitDeclContext(const DeclContext *dc) {
   for (Decl *decl : dc->decls()) {
     // Unlike other DeclContexts, the contents of an ObjCImplDecl at TU scope
diff --git a/clang/lib/CIR/CodeGen/CIRGenModule.h 
b/clang/lib/CIR/CodeGen/CIRGenModule.h
index b67239fcff44b..9828e1068e4fb 100644
--- a/clang/lib/CIR/CodeGen/CIRGenModule.h
+++ b/clang/lib/CIR/CodeGen/CIRGenModule.h
@@ -126,6 +126,9 @@ class CIRGenModule : public CIRGenTypeCache {
                                       llvm::StringRef name, mlir::Type t,
                                       mlir::Operation *insertPoint = nullptr);
 
+  llvm::StringMap<unsigned> cgGlobalNames;
+  std::string getUniqueGlobalName(const std::string &baseName);
+
   /// Return the mlir::Value for the address of the given global variable.
   /// If Ty is non-null and if the global doesn't exist, then it will be 
created
   /// with the specified type instead of whatever the normal requested type
@@ -136,6 +139,14 @@ class CIRGenModule : public CIRGenTypeCache {
   getAddrOfGlobalVar(const VarDecl *d, mlir::Type ty = {},
                      ForDefinition_t isForDefinition = NotForDefinition);
 
+  /// Return a constant array for the given string.
+  mlir::Attribute getConstantArrayFromStringLiteral(const StringLiteral *e);
+
+  /// Return a global symbol reference to a constant array for the given string
+  /// literal.
+  cir::GlobalOp getGlobalForStringLiteral(const StringLiteral *S,
+                                          llvm::StringRef Name = ".str");
+
   const TargetCIRGenInfo &getTargetCIRGenInfo();
 
   /// Helpers to convert the presumed location of Clang's SourceLocation to an
diff --git a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp 
b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
index 365569ce1f48a..2516007afd561 100644
--- a/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
+++ b/clang/lib/CIR/Lowering/DirectToLLVM/LowerToLLVM.cpp
@@ -270,6 +270,18 @@ mlir::Value 
CIRAttrToValue::visitCirAttr(cir::ConstArrayAttr attr) {
       result =
           rewriter.create<mlir::LLVM::InsertValueOp>(loc, result, init, idx);
     }
+  } else if (auto strAttr = mlir::dyn_cast<mlir::StringAttr>(attr.getElts())) {
+    // TODO(cir): this diverges from traditional lowering. Normally the string
+    // would be a global constant that is memcopied.
+    auto arrayTy = mlir::dyn_cast<cir::ArrayType>(strAttr.getType());
+    assert(arrayTy && "String attribute must have an array type");
+    mlir::Type eltTy = arrayTy.getElementType();
+    for (auto [idx, elt] : llvm::enumerate(strAttr)) {
+      auto init = rewriter.create<mlir::LLVM::ConstantOp>(
+          loc, converter->convertType(eltTy), elt);
+      result =
+          rewriter.create<mlir::LLVM::InsertValueOp>(loc, result, init, idx);
+    }
   } else {
     llvm_unreachable("unexpected ConstArrayAttr elements");
   }
diff --git a/clang/test/CIR/CodeGen/string-literals.c 
b/clang/test/CIR/CodeGen/string-literals.c
new file mode 100644
index 0000000000000..873b00d9c9a98
--- /dev/null
+++ b/clang/test/CIR/CodeGen/string-literals.c
@@ -0,0 +1,56 @@
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-cir %s 
-o %t.cir
+// RUN: FileCheck --check-prefix=CIR --input-file=%t.cir %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -fclangir -emit-llvm 
%s -o %t-cir.ll
+// RUN: FileCheck --check-prefix=LLVM --input-file=%t-cir.ll %s
+// RUN: %clang_cc1 -triple aarch64-none-linux-android21 -emit-llvm %s -o %t.ll
+// RUN: FileCheck --check-prefix=OGCG --input-file=%t.ll %s
+
+// LLVM: @[[STR1_GLOBAL:.*]] = dso_local global [2 x i8] c"1\00"
+// LLVM: @[[STR2_GLOBAL:.*]] = dso_local global [1 x i8] zeroinitializer
+// LLVM: @[[STR3_GLOBAL:.*]] = dso_local global [2 x i8] zeroinitializer
+
+// OGCG: @[[STR1_GLOBAL:.*]] = private unnamed_addr constant [2 x i8] c"1\00"
+// OGCG: @[[STR2_GLOBAL:.*]] = private unnamed_addr constant [1 x i8] 
zeroinitializer
+// OGCG: @[[STR3_GLOBAL:.*]] = private unnamed_addr constant [2 x i8] 
zeroinitializer
+
+char *f1() {
+  return "1";
+}
+
+// CIR: cir.global external @[[STR1_GLOBAL:.*]] = #cir.const_array<"1\00" : 
!cir.array<!s8i x 2>> : !cir.array<!s8i x 2>
+// CIR: cir.func @f1()
+// CIR:   %[[STR:.*]] = cir.get_global @[[STR1_GLOBAL]] : 
!cir.ptr<!cir.array<!s8i x 2>>
+
+// LLVM: define ptr @f1()
+// LLVM:   store ptr @[[STR1_GLOBAL]], ptr {{.*}}
+
+// OGCG: define {{.*}}ptr @f1()
+// OGCG:   ret ptr @[[STR1_GLOBAL]]
+
+char *f2() {
+  return "";
+}
+
+// CIR: cir.global external @[[STR2_GLOBAL:.*]] = #cir.zero : !cir.array<!s8i 
x 1>
+// CIR: cir.func @f2()
+// CIR:   %[[STR2:.*]] = cir.get_global @[[STR2_GLOBAL]] : 
!cir.ptr<!cir.array<!s8i x 1>>
+
+// LLVM: define ptr @f2()
+// LLVM:   store ptr @[[STR2_GLOBAL]], ptr {{.*}}
+
+// OGCG: define {{.*}}ptr @f2()
+// OGCG:   ret ptr @[[STR2_GLOBAL]]
+
+char *f3() {
+  return "\00";
+}
+
+// CIR: cir.global external @[[STR3_GLOBAL:.*]] = #cir.zero : !cir.array<!s8i 
x 2>
+// CIR: cir.func @f3()
+// CIR:   %[[STR3:.*]] = cir.get_global @[[STR3_GLOBAL]] : 
!cir.ptr<!cir.array<!s8i x 2>>
+
+// LLVM: define ptr @f3()
+// LLVM:   store ptr @[[STR3_GLOBAL]], ptr {{.*}}
+
+// OGCG: define {{.*}}ptr @f3()
+// OGCG:   ret ptr @[[STR3_GLOBAL]]

``````````

</details>


https://github.com/llvm/llvm-project/pull/140796
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to