Thanks for the post-commit review! On Sun Mar 23 2014 at 11:34:25 AM, Richard Smith <[email protected]> wrote:
> On Sun, Mar 23, 2014 at 10:47 AM, David Majnemer <[email protected] > > wrote: > >> Author: majnemer >> Date: Sun Mar 23 12:47:16 2014 >> New Revision: 204562 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=204562&view=rev >> Log: >> MS ABI: Eliminate Duplicate Strings >> >> COFF doesn't have mergeable sections so LLVM/clang's normal tactics for >> string deduplication will not have any effect. >> >> To remedy this we place each string inside it's own section and mark >> the section as IMAGE_COMDAT_SELECT_ANY. However, we can only do this if >> the >> string has an external name that we can generate from it's contents. >> >> To be compatible with MSVC, we must use their scheme. Otherwise identical >> strings in translation units from clang may not be deduplicated with >> translation units in MSVC. >> >> This fixes PR18248. >> >> N.B. We will not attempt to do anything with a string literal which is >> not of >> type 'char' or 'wchar_t' because their compiler does not support unicode >> string literals as of this date. >> >> Modified: >> cfe/trunk/include/clang/AST/Mangle.h >> cfe/trunk/lib/AST/ItaniumMangle.cpp >> cfe/trunk/lib/AST/MicrosoftMangle.cpp >> cfe/trunk/lib/CodeGen/CodeGenModule.cpp >> cfe/trunk/lib/CodeGen/CodeGenModule.h >> cfe/trunk/test/CodeGen/wchar-const.c >> >> Modified: cfe/trunk/include/clang/AST/Mangle.h >> URL: >> http://llvm.org/viewvc/llvm-project/cfe/trunk/include/clang/AST/Mangle.h?rev=204562&r1=204561&r2=204562&view=diff >> >> ============================================================================== >> --- cfe/trunk/include/clang/AST/Mangle.h (original) >> +++ cfe/trunk/include/clang/AST/Mangle.h Sun Mar 23 12:47:16 2014 >> @@ -31,9 +31,10 @@ namespace clang { >> class FunctionDecl; >> class NamedDecl; >> class ObjCMethodDecl; >> - class VarDecl; >> + class StringLiteral; >> struct ThisAdjustment; >> struct ThunkInfo; >> + class VarDecl; >> >> /// MangleBuffer - a convenient class for storing a name which is >> /// either the result of a mangling or is a constant string with >> @@ -117,6 +118,7 @@ public: >> >> bool shouldMangleDeclName(const NamedDecl *D); >> virtual bool shouldMangleCXXName(const NamedDecl *D) = 0; >> + virtual bool shouldMangleStringLiteral(const StringLiteral *SL) = 0; >> >> // FIXME: consider replacing raw_ostream & with something like >> SmallString &. >> void mangleName(const NamedDecl *D, raw_ostream &); >> @@ -135,6 +137,7 @@ public: >> raw_ostream &) = 0; >> virtual void mangleCXXDtor(const CXXDestructorDecl *D, CXXDtorType >> Type, >> raw_ostream &) = 0; >> + virtual void mangleStringLiteral(const StringLiteral *SL, raw_ostream >> &) = 0; >> >> void mangleGlobalBlock(const BlockDecl *BD, >> const NamedDecl *ID, >> >> Modified: cfe/trunk/lib/AST/ItaniumMangle.cpp >> URL: >> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/ItaniumMangle.cpp?rev=204562&r1=204561&r2=204562&view=diff >> >> ============================================================================== >> --- cfe/trunk/lib/AST/ItaniumMangle.cpp (original) >> +++ cfe/trunk/lib/AST/ItaniumMangle.cpp Sun Mar 23 12:47:16 2014 >> @@ -21,6 +21,7 @@ >> #include "clang/AST/DeclCXX.h" >> #include "clang/AST/DeclObjC.h" >> #include "clang/AST/DeclTemplate.h" >> +#include "clang/AST/Expr.h" >> #include "clang/AST/ExprCXX.h" >> #include "clang/AST/ExprObjC.h" >> #include "clang/AST/TypeLoc.h" >> @@ -126,6 +127,9 @@ public: >> /// @{ >> >> bool shouldMangleCXXName(const NamedDecl *D) override; >> + bool shouldMangleStringLiteral(const StringLiteral *) override { >> + return false; >> + } >> void mangleCXXName(const NamedDecl *D, raw_ostream &) override; >> void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk, >> raw_ostream &) override; >> @@ -153,6 +157,8 @@ public: >> void mangleItaniumThreadLocalWrapper(const VarDecl *D, >> raw_ostream &) override; >> >> + void mangleStringLiteral(const StringLiteral *, raw_ostream &) >> override; >> + >> bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) { >> // Lambda closure types are already numbered. >> if (isLambda(ND)) >> @@ -3774,6 +3780,10 @@ void ItaniumMangleContextImpl::mangleTyp >> mangleCXXRTTIName(Ty, Out); >> } >> >> +void ItaniumMangleContextImpl::mangleStringLiteral(const StringLiteral >> *, raw_ostream &) { >> + llvm_unreachable("Can't mangle string literals"); >> +} >> + >> ItaniumMangleContext * >> ItaniumMangleContext::create(ASTContext &Context, DiagnosticsEngine >> &Diags) { >> return new ItaniumMangleContextImpl(Context, Diags); >> >> Modified: cfe/trunk/lib/AST/MicrosoftMangle.cpp >> URL: >> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/AST/MicrosoftMangle.cpp?rev=204562&r1=204561&r2=204562&view=diff >> >> ============================================================================== >> --- cfe/trunk/lib/AST/MicrosoftMangle.cpp (original) >> +++ cfe/trunk/lib/AST/MicrosoftMangle.cpp Sun Mar 23 12:47:16 2014 >> @@ -20,6 +20,7 @@ >> #include "clang/AST/DeclCXX.h" >> #include "clang/AST/DeclObjC.h" >> #include "clang/AST/DeclTemplate.h" >> +#include "clang/AST/Expr.h" >> #include "clang/AST/ExprCXX.h" >> #include "clang/AST/VTableBuilder.h" >> #include "clang/Basic/ABI.h" >> @@ -27,6 +28,9 @@ >> #include "clang/Basic/TargetInfo.h" >> #include "llvm/ADT/StringExtras.h" >> #include "llvm/ADT/StringMap.h" >> +#include "llvm/Support/MathExtras.h" >> + >> +#include <algorithm> >> >> using namespace clang; >> >> @@ -93,6 +97,7 @@ public: >> MicrosoftMangleContextImpl(ASTContext &Context, DiagnosticsEngine >> &Diags) >> : MicrosoftMangleContext(Context, Diags) {} >> bool shouldMangleCXXName(const NamedDecl *D) override; >> + bool shouldMangleStringLiteral(const StringLiteral *SL) override; >> void mangleCXXName(const NamedDecl *D, raw_ostream &Out) override; >> void mangleVirtualMemPtrThunk(const CXXMethodDecl *MD, raw_ostream &) >> override; >> void mangleThunk(const CXXMethodDecl *MD, const ThunkInfo &Thunk, >> @@ -118,6 +123,7 @@ public: >> void mangleDynamicInitializer(const VarDecl *D, raw_ostream &Out) >> override; >> void mangleDynamicAtExitDestructor(const VarDecl *D, >> raw_ostream &Out) override; >> + void mangleStringLiteral(const StringLiteral *SL, raw_ostream &Out) >> override; >> bool getNextDiscriminator(const NamedDecl *ND, unsigned &disc) { >> // Lambda closure types are already numbered. >> if (isLambda(ND)) >> @@ -321,6 +327,13 @@ bool MicrosoftMangleContextImpl::shouldM >> return true; >> } >> >> +bool >> +MicrosoftMangleContextImpl::shouldMangleStringLiteral(const >> StringLiteral *SL) { >> + return SL->isAscii() || SL->isWide(); >> + // TODO: This needs to be updated when MSVC gains support for Unicode >> + // literals. >> +} >> + >> void MicrosoftCXXNameMangler::mangle(const NamedDecl *D, >> StringRef Prefix) { >> // MSVC doesn't mangle C++ names the same way it mangles extern "C" >> names. >> @@ -2315,6 +2328,162 @@ MicrosoftMangleContextImpl::mangleDynami >> mangleInitFiniStub(D, Out, 'F'); >> } >> >> +void MicrosoftMangleContextImpl::mangleStringLiteral(const StringLiteral >> *SL, >> + raw_ostream &Out) { >> + // <char-type> ::= 0 # char >> + // ::= 1 # wchar_t >> + // ::= ??? # char16_t/char32_t will need a mangling too... >> + // >> + // <literal-length> ::= <non-negative integer> # the length of the >> literal >> + // >> + // <encoded-crc> ::= <hex digit>+ @ # crc of the literal >> including >> + // # null-terminator >> + // >> + // <encoded-string> ::= <simple character> # uninteresting >> character >> + // ::= '?$' <hex digit> <hex digit> # these two >> nibbles >> + // # encode the byte >> for the >> + // # character >> + // ::= '?' [a-z] # \xe1 - \xfa >> + // ::= '?' [A-Z] # \xc1 - \xda >> + // ::= '?' [0-9] # [,/\:. \n\t'-] >> + // >> + // <literal> ::= '??_C@_' <char-type> <literal-length> <encoded-crc> >> + // <encoded-string> '@' >> + MicrosoftCXXNameMangler Mangler(*this, Out); >> + Mangler.getStream() << "\01??_C@_"; >> + >> + // <char-type>: The "kind" of string literal is encoded into the >> mangled name. >> + // TODO: This needs to be updated when MSVC gains support for unicode >> + // literals. >> + if (SL->isAscii()) >> + Mangler.getStream() << '0'; >> + else if (SL->isWide()) >> + Mangler.getStream() << '1'; >> + else >> + llvm_unreachable("unexpected string literal kind!"); >> + >> + // <literal-length>: The next part of the mangled name consists of the >> length >> + // of the string. >> + // The StringLiteral does not consider the NUL terminator byte(s) but >> the >> + // mangling does. >> + // N.B. The length is in terms of bytes, not characters. >> + Mangler.mangleNumber(SL->getByteLength() + SL->getCharByteWidth()); >> + >> + // We will use the "Rocksoft^tm Model CRC Algorithm" to describe the >> + // properties of our CRC: >> + // Width : 32 >> + // Poly : 04C11DB7 >> + // Init : FFFFFFFF >> + // RefIn : True >> + // RefOut : True >> + // XorOut : 00000000 >> + // Check : 340BC6D9 >> + uint32_t CRC = 0xFFFFFFFFU; >> + >> + auto UpdateCRC = [&CRC](char Byte) { >> + for (unsigned i = 0; i < 8; ++i) { >> + bool Bit = CRC & 0x80000000U; >> + if (Byte & (1U << i)) >> + Bit = !Bit; >> + CRC <<= 1; >> + if (Bit) >> + CRC ^= 0x04C11DB7U; >> + } >> + }; >> + >> + // CRC all the bytes of the StringLiteral. >> + for (char Byte : SL->getBytes()) >> + UpdateCRC(Byte); >> + >> + // The NUL terminator byte(s) were not present earlier, >> + // we need to manually process those bytes into the CRC. >> + for (unsigned NullTerminator = 0; NullTerminator < >> SL->getCharByteWidth(); >> + ++NullTerminator) >> + UpdateCRC('\x00'); >> + >> + // The literature refers to the process of reversing the bits in the >> final CRC >> + // output as "reflection". >> + CRC = llvm::reverseBits(CRC); >> + >> + // <encoded-crc>: The CRC is encoded utilizing the standard number >> mangling >> + // scheme. >> + Mangler.mangleNumber(CRC); >> + >> + // <encoded-crc>: The mangled name also contains the first 32 >> _characters_ >> + // (including null-terminator bytes) of the StringLiteral. >> + // Each character is encoded by splitting them into bytes and then >> encoding >> + // the constituent bytes. >> + auto MangleCharacter = [&Mangler](char Byte) { >> + // There are five different manglings for characters: >> + // - ?[0-9]: The set of [,/\:. \n\t'-]. >> + // - [a-zA-Z0-9_$]: A one-to-one mapping. >> + // - ?[a-z]: The range from \xe1 to \xfa. >> + // - ?[A-Z]: The range from \xc1 to \xda. >> + // - ?$XX: A fallback which maps nibbles. >> + static const char SpecialMap[] = {',', '/', '\\', ':', '.', >> + ' ', '\n', '\t', '\'', '-'}; >> + const char *SpecialMapI = >> + std::find(std::begin(SpecialMap), std::end(SpecialMap), Byte); >> > > Do we generate good code for this? Maybe move it down into the 'else' > block to avoid the linear scan in the common case? > I was hoping that we would turn this into a memchr or a simple vector operation, we aren't that lucky though :/ I've verified that the new formulation has an IR switch instruction. > > >> + if (SpecialMapI != std::end(SpecialMap)) { >> + Mangler.getStream() << '?' << SpecialMapI - SpecialMap; >> + } else if ((Byte >= 'a' && Byte <= 'z') || (Byte >= 'A' && Byte <= >> 'Z') || >> + (Byte >= '0' && Byte <= '9') || Byte == '_' || Byte == >> '$') { >> + Mangler.getStream() << Byte; >> + } else if (Byte >= '\xe1' && Byte <= '\xfa') { >> + Mangler.getStream() << '?' << (char)('a' + Byte - '\xe1'); >> + } else if (Byte >= '\xc1' && Byte <= '\xda') { >> + Mangler.getStream() << '?' << (char)('A' + Byte - '\xc1'); >> + } else { >> + Mangler.getStream() << "?$"; >> + Mangler.getStream() << (char)('A' + ((Byte >> 4) & 0xf)); >> + Mangler.getStream() << (char)('A' + (Byte & 0xf)); >> + } >> + }; >> + >> + // Enforce our 32 character max. >> + unsigned MaxBytes = 32 * SL->getCharByteWidth(); >> + StringRef Bytes = SL->getBytes().substr(0, MaxBytes); >> + size_t BytesLength = Bytes.size(); >> + >> + if (SL->isAscii()) { >> + // A character maps directly to a byte for ASCII StringLiterals. >> + for (char Byte : Bytes) >> + MangleCharacter(Byte); >> + } else if (SL->isWide()) { >> + // The ordering of bytes in a wide StringLiteral is like so: >> + // A B C D ... >> + // However, they are mangled in the following order: >> + // B A D C ... >> > > I don't believe you =) > > The ordering of bytes in a wide StringLiteral depends on the endianness of > the host machine. If the strings are mangled in big-endian order, we should > do that properly rather than assuming the host is little-endian. > Yep, sorry about that. On the bright side, I now recast this as an operation on code-points instead of byte which simplified the code a bit. > > >> + for (size_t i = 0; i != BytesLength;) { >> + char FirstByte = Bytes[i]; >> + ++i; >> + if (i != BytesLength) { >> + char SecondByte = Bytes[i]; >> + ++i; >> + MangleCharacter(SecondByte); >> + } >> + MangleCharacter(FirstByte); >> + } >> + } else { >> + llvm_unreachable("unexpected string literal kind!"); >> + // TODO: This needs to be updated when MSVC gains support for Unicode >> + // literals. >> + } >> + >> + // We should also encode the NUL terminator(s) if we encoded less than >> 32 >> + // characters. >> > > "less" should be "fewer" =) > I ended up replacing the offending sentence with something different. > > >> + if (BytesLength < MaxBytes) { >> + size_t PaddingBytes = SL->getCharByteWidth(); >> + size_t BytesLeft = MaxBytes - BytesLength; >> + if (BytesLeft < PaddingBytes) >> + PaddingBytes = BytesLeft; >> + for (unsigned i = 0; i < PaddingBytes; ++i) >> + MangleCharacter('\x00'); >> + } >> + >> + Mangler.getStream() << '@'; >> +} >> + >> MicrosoftMangleContext * >> MicrosoftMangleContext::create(ASTContext &Context, DiagnosticsEngine >> &Diags) { >> return new MicrosoftMangleContextImpl(Context, Diags); >> >> Modified: cfe/trunk/lib/CodeGen/CodeGenModule.cpp >> URL: >> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.cpp?rev=204562&r1=204561&r2=204562&view=diff >> >> ============================================================================== >> --- cfe/trunk/lib/CodeGen/CodeGenModule.cpp (original) >> +++ cfe/trunk/lib/CodeGen/CodeGenModule.cpp Sun Mar 23 12:47:16 2014 >> @@ -2572,25 +2572,67 @@ CodeGenModule::GetConstantArrayFromStrin >> llvm::Constant * >> CodeGenModule::GetAddrOfConstantStringFromLiteral(const StringLiteral >> *S) { >> CharUnits Align = >> getContext().getAlignOfGlobalVarInChars(S->getType()); >> - if (S->isAscii() || S->isUTF8()) { >> - SmallString<64> Str(S->getString()); >> - >> - // Resize the string to the right size, which is indicated by its >> type. >> - const ConstantArrayType *CAT = >> Context.getAsConstantArrayType(S->getType()); >> - Str.resize(CAT->getSize().getZExtValue()); >> - return GetAddrOfConstantString(Str, /*GlobalName*/ 0, >> Align.getQuantity()); >> + >> + llvm::StringMapEntry<llvm::GlobalVariable *> *Entry = nullptr; >> + llvm::GlobalVariable *GV = nullptr; >> + if (!LangOpts.WritableStrings) { >> + llvm::StringMap<llvm::GlobalVariable *> *ConstantStringMap = nullptr; >> + switch (S->getCharByteWidth()) { >> + case 1: >> + ConstantStringMap = &Constant1ByteStringMap; >> + break; >> + case 2: >> + ConstantStringMap = &Constant2ByteStringMap; >> + break; >> + case 4: >> + ConstantStringMap = &Constant4ByteStringMap; >> + break; >> + default: >> + llvm_unreachable("unhandled byte width!"); >> + } >> + Entry = &ConstantStringMap->GetOrCreateValue(S->getBytes()); >> + GV = Entry->getValue(); >> + } >> + >> + if (!GV) { >> + StringRef GlobalVariableName; >> + llvm::GlobalValue::LinkageTypes LT; >> + if (!LangOpts.WritableStrings && >> + getCXXABI().getMangleContext().shouldMangleStringLiteral(S)) { >> + LT = llvm::GlobalValue::LinkOnceODRLinkage; >> + >> + SmallString<256> Buffer; >> + llvm::raw_svector_ostream Out(Buffer); >> + getCXXABI().getMangleContext().mangleStringLiteral(S, Out); >> + Out.flush(); >> + >> + size_t Length = Buffer.size(); >> + char *Name = MangledNamesAllocator.Allocate<char>(Length); >> > > Do you really need an allocation and a copy here? Moving 'Buffer' outside > the 'if' would seem sufficient -- creating the GlobalVariable will make its > own copy. > I didn't, it's code that survived from a different way of phrasing the memoization. > > + std::copy(Buffer.begin(), Buffer.end(), Name); >> + GlobalVariableName = StringRef(Name, Length); >> + } else { >> + LT = llvm::GlobalValue::PrivateLinkage;; >> + GlobalVariableName = ".str"; >> + } >> + >> + // OpenCL v1.2 s6.5.3: a string literal is in the constant address >> space. >> + unsigned AddrSpace = 0; >> + if (getLangOpts().OpenCL) >> + AddrSpace = >> getContext().getTargetAddressSpace(LangAS::opencl_constant); >> + >> + llvm::Constant *C = GetConstantArrayFromStringLiteral(S); >> + GV = new llvm::GlobalVariable( >> + getModule(), C->getType(), !LangOpts.WritableStrings, LT, C, >> + GlobalVariableName, /*InsertBefore=*/nullptr, >> + llvm::GlobalVariable::NotThreadLocal, AddrSpace); >> + GV->setUnnamedAddr(true); >> + if (Entry) >> + Entry->setValue(GV); >> } >> >> - // FIXME: the following does not memoize wide strings. >> - llvm::Constant *C = GetConstantArrayFromStringLiteral(S); >> - llvm::GlobalVariable *GV = >> - new llvm::GlobalVariable(getModule(),C->getType(), >> - !LangOpts.WritableStrings, >> - llvm::GlobalValue::PrivateLinkage, >> - C,".str"); >> + if (Align.getQuantity() > GV->getAlignment()) >> + GV->setAlignment(Align.getQuantity()); >> >> - GV->setAlignment(Align.getQuantity()); >> - GV->setUnnamedAddr(true); >> return GV; >> } >> >> @@ -2615,7 +2657,7 @@ static llvm::GlobalVariable *GenerateStr >> llvm::Constant *C = >> llvm::ConstantDataArray::getString(CGM.getLLVMContext(), str, >> false); >> >> - // OpenCL v1.1 s6.5.3: a string literal is in the constant address >> space. >> + // OpenCL v1.2 s6.5.3: a string literal is in the constant address >> space. >> unsigned AddrSpace = 0; >> if (CGM.getLangOpts().OpenCL) >> AddrSpace = >> CGM.getContext().getTargetAddressSpace(LangAS::opencl_constant); >> @@ -2654,7 +2696,7 @@ llvm::Constant *CodeGenModule::GetAddrOf >> return GenerateStringLiteral(Str, false, *this, GlobalName, >> Alignment); >> >> llvm::StringMapEntry<llvm::GlobalVariable *> &Entry = >> - ConstantStringMap.GetOrCreateValue(Str); >> + Constant1ByteStringMap.GetOrCreateValue(Str); >> >> if (llvm::GlobalVariable *GV = Entry.getValue()) { >> if (Alignment > GV->getAlignment()) { >> >> Modified: cfe/trunk/lib/CodeGen/CodeGenModule.h >> URL: >> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/CodeGen/CodeGenModule.h?rev=204562&r1=204561&r2=204562&view=diff >> >> ============================================================================== >> --- cfe/trunk/lib/CodeGen/CodeGenModule.h (original) >> +++ cfe/trunk/lib/CodeGen/CodeGenModule.h Sun Mar 23 12:47:16 2014 >> @@ -319,7 +319,10 @@ class CodeGenModule : public CodeGenType >> llvm::StringMap<llvm::Constant*> AnnotationStrings; >> >> llvm::StringMap<llvm::Constant*> CFConstantStringMap; >> - llvm::StringMap<llvm::GlobalVariable*> ConstantStringMap; >> + >> + llvm::StringMap<llvm::GlobalVariable *> Constant1ByteStringMap; >> + llvm::StringMap<llvm::GlobalVariable *> Constant2ByteStringMap; >> + llvm::StringMap<llvm::GlobalVariable *> Constant4ByteStringMap; >> llvm::DenseMap<const Decl*, llvm::Constant *> StaticLocalDeclMap; >> llvm::DenseMap<const Decl*, llvm::GlobalVariable*> >> StaticLocalDeclGuardMap; >> llvm::DenseMap<const Expr*, llvm::Constant *> >> MaterializedGlobalTemporaryMap; >> >> Modified: cfe/trunk/test/CodeGen/wchar-const.c >> URL: >> http://llvm.org/viewvc/llvm-project/cfe/trunk/test/CodeGen/wchar-const.c?rev=204562&r1=204561&r2=204562&view=diff >> >> ============================================================================== >> --- cfe/trunk/test/CodeGen/wchar-const.c (original) >> +++ cfe/trunk/test/CodeGen/wchar-const.c Sun Mar 23 12:47:16 2014 >> @@ -15,7 +15,7 @@ typedef __WCHAR_TYPE__ wchar_t; >> >> >> // CHECK-DAR: private unnamed_addr constant [18 x i32] [i32 84, >> -// CHECK-WIN: private unnamed_addr constant [18 x i16] [i16 84, >> +// CHECK-WIN: linkonce_odr unnamed_addr constant [18 x i16] [i16 84, >> extern void foo(const wchar_t* p); >> int main (int argc, const char * argv[]) >> { > > > You seem to be very short on tests; did you forget to svn add a file? > Yep, that was indeed the case. Sorry about that! These should all be addressed in r204586.
_______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
