Hi Craig,

I liked your idea. I verified that it fixes my test case. I also improved
your original patch as follows:

1. Updated 'GetAddrOfConstantString' and 'GetAddrOfConstantCString' APIs to
default assign Alignment to 1.
2. Undid your changes to all call sites calling those APIs with alignment of
1.

The new patch is attached. John, please let me know if the new patch looks
ok.

Thanks,
Sundeep

> -----Original Message-----
> From: Craig Topper [mailto:[email protected]]
> Sent: Wednesday, August 03, 2011 2:15 AM
> To: [email protected]; [email protected]
> Subject: [cfe-commits] PATCH: wide strings alignment fix in clang
> 
> The previous patch no longer applies due to the my unicode string
> literal patch that went in last week. I took a stab at a new patch.
> Rather than using the different string kinds to get the proper
> alignment, I queried ASTContext for the alignment of the type contained
> in the StringLiteral. I push the alignment as an argument down to the
> functions that actually look up the string in the map. If we find a
> match in the map, I max the old alignment and the new alignment.
> 
> ~Craig
> 
> >> I have been working on a bug in clang dealing with alignment of wide
> >> strings. Clang is aligning wide string literals to 1 byte boundary.
> >> However, LLVM treats wide strings as 4 byte aligned and generates
> >> memory operations accessing 4 bytes at a time. This works fine on
> >> architectures that allow unaligned access. But for architectures
> >> which don't allow unaligned access, this results in an exception and
> segfault.
> >
> > +      unsigned Align = TI.getWCharAlign() / TI.getCharAlign();
> >
> > This should be dividing by the bit-width of char, not by its
> alignment.
> >
> > Also, there are multiple kinds of wide strings ? official wide
> strings
> > (L), and then UTF-16 (u) and UTF-32 (U) wide strings.  It would be
> > good if this worked for all of them.  You should also make sure that
> > you never
> > *decrease* the alignment of a string, as you might if there were a
> > utf32 string literal followed by a utf16 string literal which
> expanded
> > to the same sequence of bytes.
> >
> > Also, in the future, please give your patch files some sort of
> > sensible file extension, preferably ".txt".
> >
> > John.
Index: test/CodeGen/string-literal.c
===================================================================
--- test/CodeGen/string-literal.c       (revision 136793)
+++ test/CodeGen/string-literal.c       (working copy)
@@ -14,26 +14,26 @@
   // CHECK-CPP0X: internal unnamed_addr constant [10 x i8] 
c"\E1\84\A0\C8\A0\F4\82\80\B0\00", align 1
   char b[10] = "\u1120\u0220\U00102030";
 
-  // CHECK-C: private unnamed_addr constant [12 x i8] 
c"A\00\00\00B\00\00\00\00\00\00\00", align 1
-  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] 
c"A\00\00\00B\00\00\00\00\00\00\00", align 1
+  // CHECK-C: private unnamed_addr constant [12 x i8] 
c"A\00\00\00B\00\00\00\00\00\00\00", align 4
+  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] 
c"A\00\00\00B\00\00\00\00\00\00\00", align 4
   const wchar_t *foo = L"AB";
 
-  // CHECK-C: private unnamed_addr constant [12 x i8] 
c"4\12\00\00\0B\F0\10\00\00\00\00\00", align 1
-  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] 
c"4\12\00\00\0B\F0\10\00\00\00\00\00", align 1
+  // CHECK-C: private unnamed_addr constant [12 x i8] 
c"4\12\00\00\0B\F0\10\00\00\00\00\00", align 4
+  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] 
c"4\12\00\00\0B\F0\10\00\00\00\00\00", align 4
   const wchar_t *bar = L"\u1234\U0010F00B";
 
 #if __cplusplus >= 201103L
-  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] 
c"C\00\00\00D\00\00\00\00\00\00\00", align 1
+  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] 
c"C\00\00\00D\00\00\00\00\00\00\00", align 4
   const char32_t *c = U"CD";
 
-  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] 
c"5\12\00\00\0C\F0\10\00\00\00\00\00", align 1
+  // CHECK-CPP0X: private unnamed_addr constant [12 x i8] 
c"5\12\00\00\0C\F0\10\00\00\00\00\00", align 4
   const char32_t *d = U"\u1235\U0010F00C";
 
-  // CHECK-CPP0X: private unnamed_addr constant [6 x i8] c"E\00F\00\00\00", 
align 1
+  // CHECK-CPP0X: private unnamed_addr constant [6 x i8] c"E\00F\00\00\00", 
align 2
   const char16_t *e = u"EF";
 
   // This should convert to utf16.
-  // CHECK-CPP0X: private unnamed_addr constant [10 x i8] c" \11 
\02\C8\DB0\DC\00\00", align 1
+  // CHECK-CPP0X: private unnamed_addr constant [10 x i8] c" \11 
\02\C8\DB0\DC\00\00", align 2
   const char16_t *f = u"\u1120\u0220\U00102030";
 
   // CHECK-CPP0X: private unnamed_addr constant [4 x i8] c"def\00", align 1
Index: lib/CodeGen/CodeGenModule.h
===================================================================
--- lib/CodeGen/CodeGenModule.h (revision 136793)
+++ lib/CodeGen/CodeGenModule.h (working copy)
@@ -545,7 +545,8 @@
   /// \param GlobalName If provided, the name to use for the global
   /// (if one is created).
   llvm::Constant *GetAddrOfConstantString(StringRef Str,
-                                          const char *GlobalName=0);
+                                          const char *GlobalName=0,
+                                          unsigned Alignment=1);
 
   /// GetAddrOfConstantCString - Returns a pointer to a character array
   /// containing the literal and a terminating '\0' character. The result has
@@ -554,7 +555,8 @@
   /// \param GlobalName If provided, the name to use for the global (if one is
   /// created).
   llvm::Constant *GetAddrOfConstantCString(const std::string &str,
-                                           const char *GlobalName=0);
+                                           const char *GlobalName=0,
+                                           unsigned Alignment=1);
   
   /// GetAddrOfCXXConstructor - Return the address of the constructor of the
   /// given type.
Index: lib/CodeGen/CodeGenModule.cpp
===================================================================
--- lib/CodeGen/CodeGenModule.cpp       (revision 136793)
+++ lib/CodeGen/CodeGenModule.cpp       (working copy)
@@ -1923,7 +1923,10 @@
 CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S) {
   // FIXME: This can be more efficient.
   // FIXME: We shouldn't need to bitcast the constant in the wide string case.
-  llvm::Constant *C = GetAddrOfConstantString(GetStringForStringLiteral(S));
+  CharUnits Align = getContext().getTypeAlignInChars(S->getType());
+  llvm::Constant *C = GetAddrOfConstantString(GetStringForStringLiteral(S),
+                                              /* GlobalName */ 0,
+                                              Align.getQuantity());
   if (S->isWide() || S->isUTF16() || S->isUTF32()) {
     llvm::Type *DestTy =
         llvm::PointerType::getUnqual(getTypes().ConvertType(S->getType()));
@@ -1947,7 +1950,8 @@
 static llvm::Constant *GenerateStringLiteral(StringRef str,
                                              bool constant,
                                              CodeGenModule &CGM,
-                                             const char *GlobalName) {
+                                             const char *GlobalName,
+                                             unsigned Alignment) {
   // Create Constant for this string literal. Don't add a '\0'.
   llvm::Constant *C =
       llvm::ConstantArray::get(CGM.getLLVMContext(), str, false);
@@ -1957,7 +1961,7 @@
     new llvm::GlobalVariable(CGM.getModule(), C->getType(), constant,
                              llvm::GlobalValue::PrivateLinkage,
                              C, GlobalName);
-  GV->setAlignment(1);
+  GV->setAlignment(Alignment);
   GV->setUnnamedAddr(true);
   return GV;
 }
@@ -1971,7 +1975,8 @@
 ///
 /// The result has pointer to array type.
 llvm::Constant *CodeGenModule::GetAddrOfConstantString(StringRef Str,
-                                                       const char *GlobalName) 
{
+                                                       const char *GlobalName,
+                                                       unsigned Alignment) {
   bool IsConstant = !Features.WritableStrings;
 
   // Get the default prefix if a name wasn't specified.
@@ -1980,16 +1985,22 @@
 
   // Don't share any string literals if strings aren't constant.
   if (!IsConstant)
-    return GenerateStringLiteral(Str, false, *this, GlobalName);
+    return GenerateStringLiteral(Str, false, *this, GlobalName, Alignment);
 
   llvm::StringMapEntry<llvm::Constant *> &Entry =
     ConstantStringMap.GetOrCreateValue(Str);
 
-  if (Entry.getValue())
-    return Entry.getValue();
+  if (llvm::Constant *C = Entry.getValue()) {
+    if (llvm::GlobalVariable *GV = dyn_cast<llvm::GlobalVariable>(C)) {
+      if (Alignment > GV->getAlignment()) {
+        GV->setAlignment(Alignment);
+      }
+    }
+    return C;
+  }
 
   // Create a global variable for this.
-  llvm::Constant *C = GenerateStringLiteral(Str, true, *this, GlobalName);
+  llvm::Constant *C = GenerateStringLiteral(Str, true, *this, GlobalName, 
Alignment);
   Entry.setValue(C);
   return C;
 }
@@ -1998,9 +2009,10 @@
 /// array containing the literal and a terminating '\0'
 /// character. The result has pointer to array type.
 llvm::Constant *CodeGenModule::GetAddrOfConstantCString(const std::string &Str,
-                                                        const char 
*GlobalName){
+                                                        const char *GlobalName,
+                                                        unsigned Alignment) {
   StringRef StrWithNull(Str.c_str(), Str.size() + 1);
-  return GetAddrOfConstantString(StrWithNull, GlobalName);
+  return GetAddrOfConstantString(StrWithNull, GlobalName, Alignment);
 }
 
 /// EmitObjCPropertyImplementations - Emit information for synthesized
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to