On Sun, Mar 23, 2014 at 10:47 AM, David Majnemer <[email protected]>wrote:
> Author: majnemer > Date: Sun Mar 23 12:47:16 2014 > New Revision: 204562 > > URL: http://llvm.org/viewvc/llvm-project?rev=204562&view=rev > Log: > MS ABI: Eliminate Duplicate Strings > > COFF doesn't have mergeable sections so LLVM/clang's normal tactics for > string deduplication will not have any effect. > > To remedy this we place each string inside it's own section and mark > the section as IMAGE_COMDAT_SELECT_ANY. However, we can only do this if > the > string has an external name that we can generate from it's contents. > > To be compatible with MSVC, we must use their scheme. Otherwise identical > strings in translation units from clang may not be deduplicated with > translation units in MSVC. > > This fixes PR18248. > > N.B. We will not attempt to do anything with a string literal which is not > of > type 'char' or 'wchar_t' because their compiler does not support unicode > string literals as of this date. > > Modified: > cfe/trunk/include/clang/AST/Mangle.h > cfe/trunk/lib/AST/ItaniumMangle.cpp > cfe/trunk/lib/AST/MicrosoftMangle.cpp > cfe/trunk/lib/CodeGen/CodeGenModule.cpp > cfe/trunk/lib/CodeGen/CodeGenModule.h > cfe/trunk/test/CodeGen/wchar-const.c > > Modified: cfe/trunk/include/clang/AST/Mangle.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/Mangle.h?rev=204562&r1=204561&r2=204562&view=diff > > ============================================================================== > --- cfe/trunk/include/clang/AST/Mangle.h (original) > +++ cfe/trunk/include/clang/AST/Mangle.h Sun Mar 23 12:47:16 2014 > @@ -31,9 +31,10 @@ namespace clang { > class FunctionDecl; > class NamedDecl; > class ObjCMethodDecl; > - class VarDecl; > + class StringLiteral; > struct ThisAdjustment; > struct ThunkInfo; > + class VarDecl; > > /// MangleBuffer - a convenient class for storing a name which is > /// either the result of a mangling or is a constant string with > @@ -117,6 +118,7 @@ public: > > bool shouldMangleDeclName(const NamedDecl *D); > virtual bool shouldMangleCXXName(const NamedDecl *D) = 0; > + virtual bool shouldMangleStringLiteral(const StringLiteral *SL) = 0; > > // FIXME: consider replacing raw_ostream & with something like > SmallString &. > void mangleName(const NamedDecl *D, raw_ostream &); > @@ -135,6 +137,7 @@ public: > raw_ostream &) = 0; > virtual void mangleCXXDtor(const CXXDestructorDecl *D, CXXDtorType Type, > raw_ostream &) = 0; > + virtual void mangleStringLiteral(const StringLiteral *SL, raw_ostream > &) = 0; > > void mangleGlobalBlock(const BlockDecl *BD, > const NamedDecl *ID, > > Modified: cfe/trunk/lib/AST/ItaniumMangle.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ItaniumMangle.cpp?rev=204562&r1=204561&r2=204562&view=diff > > ============================================================================== > --- cfe/trunk/lib/AST/ItaniumMangle.cpp (original) > +++ cfe/trunk/lib/AST/ItaniumMangle.cpp Sun Mar 23 12:47:16 2014 > @@ -21,6 +21,7 @@ > #include "clang/AST/DeclCXX.h" > #include "clang/AST/DeclObjC.h" > #include "clang/AST/DeclTemplate.h" > +#include "clang/AST/Expr.h" > #include "clang/AST/ExprCXX.h" > #include "clang/AST/ExprObjC.h" > #include "clang/AST/TypeLoc.h" > @@ -126,6 +127,9 @@ public: > /// @{ > > bool shouldMangleCXXName(const NamedDecl *D) override; > + bool shouldMangleStringLiteral(const StringLiteral *) override { > + return false; > + } > void mangleCXXName(const NamedDecl *D, raw_ostream &) override; > void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk, > raw_ostream &) override; > @@ -153,6 +157,8 @@ public: > void mangleItaniumThreadLocalWrapper(const VarDecl *D, > raw_ostream &) override; > > + void mangleStringLiteral(const StringLiteral *, raw_ostream &) override; > + > bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) { > // Lambda closure types are already numbered. > if (isLambda(ND)) > @@ -3774,6 +3780,10 @@ void ItaniumMangleContextImpl::mangleTyp > mangleCXXRTTIName(Ty, Out); > } > > +void ItaniumMangleContextImpl::mangleStringLiteral(const StringLiteral *, > raw_ostream &) { > + llvm_unreachable("Can't mangle string literals"); > +} > + > ItaniumMangleContext * > ItaniumMangleContext::create(ASTContext &Context, DiagnosticsEngine > &Diags) { > return new ItaniumMangleContextImpl(Context, Diags); > > Modified: cfe/trunk/lib/AST/MicrosoftMangle.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/MicrosoftMangle.cpp?rev=204562&r1=204561&r2=204562&view=diff > > ============================================================================== > --- cfe/trunk/lib/AST/MicrosoftMangle.cpp (original) > +++ cfe/trunk/lib/AST/MicrosoftMangle.cpp Sun Mar 23 12:47:16 2014 > @@ -20,6 +20,7 @@ > #include "clang/AST/DeclCXX.h" > #include "clang/AST/DeclObjC.h" > #include "clang/AST/DeclTemplate.h" > +#include "clang/AST/Expr.h" > #include "clang/AST/ExprCXX.h" > #include "clang/AST/VTableBuilder.h" > #include "clang/Basic/ABI.h" > @@ -27,6 +28,9 @@ > #include "clang/Basic/TargetInfo.h" > #include "llvm/ADT/StringExtras.h" > #include "llvm/ADT/StringMap.h" > +#include "llvm/Support/MathExtras.h" > + > +#include <algorithm> > > using namespace clang; > > @@ -93,6 +97,7 @@ public: > MicrosoftMangleContextImpl(ASTContext &Context, DiagnosticsEngine > &Diags) > : MicrosoftMangleContext(Context, Diags) {} > bool shouldMangleCXXName(const NamedDecl *D) override; > + bool shouldMangleStringLiteral(const StringLiteral *SL) override; > void mangleCXXName(const NamedDecl *D, raw_ostream &Out) override; > void mangleVirtualMemPtrThunk(const CXXMethodDecl *MD, raw_ostream &) > override; > void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk, > @@ -118,6 +123,7 @@ public: > void mangleDynamicInitializer(const VarDecl *D, raw_ostream &Out) > override; > void mangleDynamicAtExitDestructor(const VarDecl *D, > raw_ostream &Out) override; > + void mangleStringLiteral(const StringLiteral *SL, raw_ostream &Out) > override; > bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) { > // Lambda closure types are already numbered. > if (isLambda(ND)) > @@ -321,6 +327,13 @@ bool MicrosoftMangleContextImpl::shouldM > return true; > } > > +bool > +MicrosoftMangleContextImpl::shouldMangleStringLiteral(const StringLiteral > *SL) { > + return SL->isAscii() || SL->isWide(); > + // TODO: This needs to be updated when MSVC gains support for Unicode > + // literals. > +} > + > void MicrosoftCXXNameMangler::mangle(const NamedDecl *D, > StringRef Prefix) { > // MSVC doesn't mangle C++ names the same way it mangles extern "C" > names. > @@ -2315,6 +2328,162 @@ MicrosoftMangleContextImpl::mangleDynami > mangleInitFiniStub(D, Out, 'F'); > } > > +void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral > *SL, > + raw_ostream &Out) { > + // <char-type> ::= 0 # char > + // ::= 1 # wchar_t > + // ::= ??? # char16_t/char32_t will need a mangling too... > + // > + // <literal-length> ::= <non-negative integer> # the length of the > literal > + // > + // <encoded-crc> ::= <hex digit>+ @ # crc of the literal > including > + // # null-terminator > + // > + // <encoded-string> ::= <simple character> # uninteresting > character > + // ::= '?$' <hex digit> <hex digit> # these two nibbles > + // # encode the byte > for the > + // # character > + // ::= '?' [a-z] # \xe1 - \xfa > + // ::= '?' [A-Z] # \xc1 - \xda > + // ::= '?' [0-9] # [,/\:. \n\t'-] > + // > + // <literal> ::= '??_C@_' <char-type> <literal-length> <encoded-crc> > + // <encoded-string> '@' > + MicrosoftCXXNameMangler Mangler(*this, Out); > + Mangler.getStream() << "\01??_C@_"; > + > + // <char-type>: The "kind" of string literal is encoded into the > mangled name. > + // TODO: This needs to be updated when MSVC gains support for unicode > + // literals. > + if (SL->isAscii()) > + Mangler.getStream() << '0'; > + else if (SL->isWide()) > + Mangler.getStream() << '1'; > + else > + llvm_unreachable("unexpected string literal kind!"); > + > + // <literal-length>: The next part of the mangled name consists of the > length > + // of the string. > + // The StringLiteral does not consider the NUL terminator byte(s) but > the > + // mangling does. > + // N.B. The length is in terms of bytes, not characters. > + Mangler.mangleNumber(SL->getByteLength() + SL->getCharByteWidth()); > + > + // We will use the "Rocksoft^tm Model CRC Algorithm" to describe the > + // properties of our CRC: > + // Width : 32 > + // Poly : 04C11DB7 > + // Init : FFFFFFFF > + // RefIn : True > + // RefOut : True > + // XorOut : 00000000 > + // Check : 340BC6D9 > + uint32_t CRC = 0xFFFFFFFFU; > + > + auto UpdateCRC = [&CRC](char Byte) { > + for (unsigned i = 0; i < 8; ++i) { > + bool Bit = CRC & 0x80000000U; > + if (Byte & (1U << i)) > + Bit = !Bit; > + CRC <<= 1; > + if (Bit) > + CRC ^= 0x04C11DB7U; > + } > + }; > + > + // CRC all the bytes of the StringLiteral. > + for (char Byte : SL->getBytes()) > + UpdateCRC(Byte); > + > + // The NUL terminator byte(s) were not present earlier, > + // we need to manually process those bytes into the CRC. > + for (unsigned NullTerminator = 0; NullTerminator < > SL->getCharByteWidth(); > + ++NullTerminator) > + UpdateCRC('\x00'); > + > + // The literature refers to the process of reversing the bits in the > final CRC > + // output as "reflection". > + CRC = llvm::reverseBits(CRC); > + > + // <encoded-crc>: The CRC is encoded utilizing the standard number > mangling > + // scheme. > + Mangler.mangleNumber(CRC); > + > + // <encoded-crc>: The mangled name also contains the first 32 > _characters_ > + // (including null-terminator bytes) of the StringLiteral. > + // Each character is encoded by splitting them into bytes and then > encoding > + // the constituent bytes. > + auto MangleCharacter = [&Mangler](char Byte) { > + // There are five different manglings for characters: > + // - ?[0-9]: The set of [,/\:. \n\t'-]. > + // - [a-zA-Z0-9_$]: A one-to-one mapping. > + // - ?[a-z]: The range from \xe1 to \xfa. > + // - ?[A-Z]: The range from \xc1 to \xda. > + // - ?$XX: A fallback which maps nibbles. > + static const char SpecialMap[] = {',', '/', '\\', ':', '.', > + ' ', '\n', '\t', '\'', '-'}; > + const char *SpecialMapI = > + std::find(std::begin(SpecialMap), std::end(SpecialMap), Byte); > Do we generate good code for this? Maybe move it down into the 'else' block to avoid the linear scan in the common case? > + if (SpecialMapI != std::end(SpecialMap)) { > + Mangler.getStream() << '?' << SpecialMapI - SpecialMap; > + } else if ((Byte >= 'a' && Byte <= 'z') || (Byte >= 'A' && Byte <= > 'Z') || > + (Byte >= '0' && Byte <= '9') || Byte == '_' || Byte == > '$') { > + Mangler.getStream() << Byte; > + } else if (Byte >= '\xe1' && Byte <= '\xfa') { > + Mangler.getStream() << '?' << (char)('a' + Byte - '\xe1'); > + } else if (Byte >= '\xc1' && Byte <= '\xda') { > + Mangler.getStream() << '?' << (char)('A' + Byte - '\xc1'); > + } else { > + Mangler.getStream() << "?$"; > + Mangler.getStream() << (char)('A' + ((Byte >> 4) & 0xf)); > + Mangler.getStream() << (char)('A' + (Byte & 0xf)); > + } > + }; > + > + // Enforce our 32 character max. > + unsigned MaxBytes = 32 * SL->getCharByteWidth(); > + StringRef Bytes = SL->getBytes().substr(0, MaxBytes); > + size_t BytesLength = Bytes.size(); > + > + if (SL->isAscii()) { > + // A character maps directly to a byte for ASCII StringLiterals. > + for (char Byte : Bytes) > + MangleCharacter(Byte); > + } else if (SL->isWide()) { > + // The ordering of bytes in a wide StringLiteral is like so: > + // A B C D ... > + // However, they are mangled in the following order: > + // B A D C ... > I don't believe you =) The ordering of bytes in a wide StringLiteral depends on the endianness of the host machine. If the strings are mangled in big-endian order, we should do that properly rather than assuming the host is little-endian. > + for (size_t i = 0; i != BytesLength;) { > + char FirstByte = Bytes[i]; > + ++i; > + if (i != BytesLength) { > + char SecondByte = Bytes[i]; > + ++i; > + MangleCharacter(SecondByte); > + } > + MangleCharacter(FirstByte); > + } > + } else { > + llvm_unreachable("unexpected string literal kind!"); > + // TODO: This needs to be updated when MSVC gains support for Unicode > + // literals. > + } > + > + // We should also encode the NUL terminator(s) if we encoded less than > 32 > + // characters. > "less" should be "fewer" =) > + if (BytesLength < MaxBytes) { > + size_t PaddingBytes = SL->getCharByteWidth(); > + size_t BytesLeft = MaxBytes - BytesLength; > + if (BytesLeft < PaddingBytes) > + PaddingBytes = BytesLeft; > + for (unsigned i = 0; i < PaddingBytes; ++i) > + MangleCharacter('\x00'); > + } > + > + Mangler.getStream() << '@'; > +} > + > MicrosoftMangleContext * > MicrosoftMangleContext::create(ASTContext &Context, DiagnosticsEngine > &Diags) { > return new MicrosoftMangleContextImpl(Context, Diags); > > Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=204562&r1=204561&r2=204562&view=diff > > ============================================================================== > --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) > +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Sun Mar 23 12:47:16 2014 > @@ -2572,25 +2572,67 @@ CodeGenModule::GetConstantArrayFromStrin > llvm::Constant * > CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral *S) > { > CharUnits Align = getContext().getAlignOfGlobalVarInChars(S->getType()); > - if (S->isAscii() || S->isUTF8()) { > - SmallString<64> Str(S->getString()); > - > - // Resize the string to the right size, which is indicated by its > type. > - const ConstantArrayType *CAT = > Context.getAsConstantArrayType(S->getType()); > - Str.resize(CAT->getSize().getZExtValue()); > - return GetAddrOfConstantString(Str, /*GlobalName*/ 0, > Align.getQuantity()); > + > + llvm::StringMapEntry<llvm::GlobalVariable *> *Entry = nullptr; > + llvm::GlobalVariable *GV = nullptr; > + if (!LangOpts.WritableStrings) { > + llvm::StringMap<llvm::GlobalVariable *> *ConstantStringMap = nullptr; > + switch (S->getCharByteWidth()) { > + case 1: > + ConstantStringMap = &Constant1ByteStringMap; > + break; > + case 2: > + ConstantStringMap = &Constant2ByteStringMap; > + break; > + case 4: > + ConstantStringMap = &Constant4ByteStringMap; > + break; > + default: > + llvm_unreachable("unhandled byte width!"); > + } > + Entry = &ConstantStringMap->GetOrCreateValue(S->getBytes()); > + GV = Entry->getValue(); > + } > + > + if (!GV) { > + StringRef GlobalVariableName; > + llvm::GlobalValue::LinkageTypes LT; > + if (!LangOpts.WritableStrings && > + getCXXABI().getMangleContext().shouldMangleStringLiteral(S)) { > + LT = llvm::GlobalValue::LinkOnceODRLinkage; > + > + SmallString<256> Buffer; > + llvm::raw_svector_ostream Out(Buffer); > + getCXXABI().getMangleContext().mangleStringLiteral(S, Out); > + Out.flush(); > + > + size_t Length = Buffer.size(); > + char *Name = MangledNamesAllocator.Allocate<char>(Length); > Do you really need an allocation and a copy here? Moving 'Buffer' outside the 'if' would seem sufficient -- creating the GlobalVariable will make its own copy. + std::copy(Buffer.begin(), Buffer.end(), Name); > + GlobalVariableName = StringRef(Name, Length); > + } else { > + LT = llvm::GlobalValue::PrivateLinkage;; > + GlobalVariableName = ".str"; > + } > + > + // OpenCL v1.2 s6.5.3: a string literal is in the constant address > space. > + unsigned AddrSpace = 0; > + if (getLangOpts().OpenCL) > + AddrSpace = > getContext().getTargetAddressSpace(LangAS::opencl_constant); > + > + llvm::Constant *C = GetConstantArrayFromStringLiteral(S); > + GV = new llvm::GlobalVariable( > + getModule(), C->getType(), !LangOpts.WritableStrings, LT, C, > + GlobalVariableName, /*InsertBefore=*/nullptr, > + llvm::GlobalVariable::NotThreadLocal, AddrSpace); > + GV->setUnnamedAddr(true); > + if (Entry) > + Entry->setValue(GV); > } > > - // FIXME: the following does not memoize wide strings. > - llvm::Constant *C = GetConstantArrayFromStringLiteral(S); > - llvm::GlobalVariable *GV = > - new llvm::GlobalVariable(getModule(),C->getType(), > - !LangOpts.WritableStrings, > - llvm::GlobalValue::PrivateLinkage, > - C,".str"); > + if (Align.getQuantity() > GV->getAlignment()) > + GV->setAlignment(Align.getQuantity()); > > - GV->setAlignment(Align.getQuantity()); > - GV->setUnnamedAddr(true); > return GV; > } > > @@ -2615,7 +2657,7 @@ static llvm::GlobalVariable *GenerateStr > llvm::Constant *C = > llvm::ConstantDataArray::getString(CGM.getLLVMContext(), str, > false); > > - // OpenCL v1.1 s6.5.3: a string literal is in the constant address > space. > + // OpenCL v1.2 s6.5.3: a string literal is in the constant address > space. > unsigned AddrSpace = 0; > if (CGM.getLangOpts().OpenCL) > AddrSpace = > CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant); > @@ -2654,7 +2696,7 @@ llvm::Constant *CodeGenModule::GetAddrOf > return GenerateStringLiteral(Str, false, *this, GlobalName, > Alignment); > > llvm::StringMapEntry<llvm::GlobalVariable *> &Entry = > - ConstantStringMap.GetOrCreateValue(Str); > + Constant1ByteStringMap.GetOrCreateValue(Str); > > if (llvm::GlobalVariable *GV = Entry.getValue()) { > if (Alignment > GV->getAlignment()) { > > Modified: cfe/trunk/lib/CodeGen/CodeGenModule.h > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.h?rev=204562&r1=204561&r2=204562&view=diff > > ============================================================================== > --- cfe/trunk/lib/CodeGen/CodeGenModule.h (original) > +++ cfe/trunk/lib/CodeGen/CodeGenModule.h Sun Mar 23 12:47:16 2014 > @@ -319,7 +319,10 @@ class CodeGenModule : public CodeGenType > llvm::StringMap<llvm::Constant*> AnnotationStrings; > > llvm::StringMap<llvm::Constant*> CFConstantStringMap; > - llvm::StringMap<llvm::GlobalVariable*> ConstantStringMap; > + > + llvm::StringMap<llvm::GlobalVariable *> Constant1ByteStringMap; > + llvm::StringMap<llvm::GlobalVariable *> Constant2ByteStringMap; > + llvm::StringMap<llvm::GlobalVariable *> Constant4ByteStringMap; > llvm::DenseMap<const Decl*, llvm::Constant *> StaticLocalDeclMap; > llvm::DenseMap<const Decl*, llvm::GlobalVariable*> > StaticLocalDeclGuardMap; > llvm::DenseMap<const Expr*, llvm::Constant *> > MaterializedGlobalTemporaryMap; > > Modified: cfe/trunk/test/CodeGen/wchar-const.c > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/wchar-const.c?rev=204562&r1=204561&r2=204562&view=diff > > ============================================================================== > --- cfe/trunk/test/CodeGen/wchar-const.c (original) > +++ cfe/trunk/test/CodeGen/wchar-const.c Sun Mar 23 12:47:16 2014 > @@ -15,7 +15,7 @@ typedef __WCHAR_TYPE__ wchar_t; > > > // CHECK-DAR: private unnamed_addr constant [18 x i32] [i32 84, > -// CHECK-WIN: private unnamed_addr constant [18 x i16] [i16 84, > +// CHECK-WIN: linkonce_odr unnamed_addr constant [18 x i16] [i16 84, > extern void foo(const wchar_t* p); > int main (int argc, const char * argv[]) > { You seem to be very short on tests; did you forget to svn add a file?
_______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
