riccibruno updated this revision to Diff 174032.
riccibruno added a comment.

Factored out some NFSs which I will submit separately.


Repository:
  rC Clang

https://reviews.llvm.org/D54166

Files:
  include/clang/AST/Expr.h
  include/clang/AST/Stmt.h
  lib/AST/Expr.cpp
  lib/Serialization/ASTReaderStmt.cpp
  lib/Serialization/ASTWriterStmt.cpp

Index: lib/Serialization/ASTWriterStmt.cpp
===================================================================
--- lib/Serialization/ASTWriterStmt.cpp
+++ lib/Serialization/ASTWriterStmt.cpp
@@ -518,17 +518,23 @@
 
 void ASTStmtWriter::VisitStringLiteral(StringLiteral *E) {
   VisitExpr(E);
-  Record.push_back(E->getByteLength());
+
+  // Store the various bits of data of StringLiteral.
   Record.push_back(E->getNumConcatenated());
+  Record.push_back(E->getLength());
+  Record.push_back(E->getCharByteWidth());
   Record.push_back(E->getKind());
   Record.push_back(E->isPascal());
-  // FIXME: String data should be stored as a blob at the end of the
-  // StringLiteral. However, we can't do so now because we have no
-  // provision for coping with abbreviations when we're jumping around
-  // the AST file during deserialization.
-  Record.append(E->getBytes().begin(), E->getBytes().end());
+
+  // Store the trailing array of SourceLocation.
   for (unsigned I = 0, N = E->getNumConcatenated(); I != N; ++I)
     Record.AddSourceLocation(E->getStrTokenLoc(I));
+
+  // Store the trailing array of char holding the string data.
+  StringRef StrData = E->getBytes();
+  for (unsigned I = 0, N = E->getByteLength(); I != N; ++I)
+    Record.push_back(StrData[I]);
+
   Code = serialization::EXPR_STRING_LITERAL;
 }
 
Index: lib/Serialization/ASTReaderStmt.cpp
===================================================================
--- lib/Serialization/ASTReaderStmt.cpp
+++ lib/Serialization/ASTReaderStmt.cpp
@@ -595,22 +595,35 @@
 
 void ASTStmtReader::VisitStringLiteral(StringLiteral *E) {
   VisitExpr(E);
-  unsigned Len = Record.readInt();
-  assert(Record.peekInt() == E->getNumConcatenated() &&
-         "Wrong number of concatenated tokens!");
-  Record.skipInts(1);
-  auto kind = static_cast<StringLiteral::StringKind>(Record.readInt());
-  bool isPascal = Record.readInt();
 
-  // Read string data
-  auto B = &Record.peekInt();
-  SmallString<16> Str(B, B + Len);
-  E->setString(Record.getContext(), Str, kind, isPascal);
-  Record.skipInts(Len);
-
-  // Read source locations
-  for (unsigned I = 0, N = E->getNumConcatenated(); I != N; ++I)
+  // NumConcatenated, Length and CharByteWidth are set by the empty
+  // ctor since they are needed to allocate storage for the trailing objects.
+  unsigned NumConcatenated = Record.readInt();
+  unsigned Length = Record.readInt();
+  unsigned CharByteWidth = Record.readInt();
+  assert((NumConcatenated == E->getNumConcatenated()) &&
+         "Wrong number of concatenated tokens!");
+  assert((Length == E->getLength()) && "Wrong Length!");
+  assert((CharByteWidth == E->getCharByteWidth()) && "Wrong character width!");
+  E->StringLiteralBits.Kind = Record.readInt();
+  E->StringLiteralBits.IsPascal = Record.readInt();
+
+  // The character width is originally computed via mapCharByteWidth.
+  // Check that the deserialized character width is consistant with the result
+  // of calling mapCharByteWidth.
+  assert((CharByteWidth ==
+          StringLiteral::mapCharByteWidth(Record.getContext().getTargetInfo(),
+                                          E->getKind())) &&
+         "Wrong character width!");
+
+  // Deserialize the trailing array of SourceLocation.
+  for (unsigned I = 0; I < NumConcatenated; ++I)
     E->setStrTokenLoc(I, ReadSourceLocation());
+
+  // Deserialize the trailing array of char holding the string data.
+  char *StrData = E->getStrDataAsChar();
+  for (unsigned I = 0; I < Length * CharByteWidth; ++I)
+    StrData[I] = Record.readInt();
 }
 
 void ASTStmtReader::VisitCharacterLiteral(CharacterLiteral *E) {
@@ -2423,8 +2436,11 @@
       break;
 
     case EXPR_STRING_LITERAL:
-      S = StringLiteral::CreateEmpty(Context,
-                                     Record[ASTStmtReader::NumExprFields + 1]);
+      S = StringLiteral::CreateEmpty(
+          Context,
+          /* NumConcatenated=*/Record[ASTStmtReader::NumExprFields + 0],
+          /* Length=*/Record[ASTStmtReader::NumExprFields + 1],
+          /* CharByteWidth=*/Record[ASTStmtReader::NumExprFields + 2]);
       break;
 
     case EXPR_CHARACTER_LITERAL:
Index: lib/AST/Expr.cpp
===================================================================
--- lib/AST/Expr.cpp
+++ lib/AST/Expr.cpp
@@ -912,42 +912,80 @@
   return CharByteWidth;
 }
 
-StringLiteral *StringLiteral::Create(const ASTContext &C, StringRef Str,
-                                     StringKind Kind, bool Pascal, QualType Ty,
-                                     const SourceLocation *Loc,
-                                     unsigned NumStrs) {
-  assert(C.getAsConstantArrayType(Ty) &&
+StringLiteral::StringLiteral(const ASTContext &Ctx, StringRef Str,
+                             StringKind Kind, bool Pascal, QualType Ty,
+                             const SourceLocation *Loc,
+                             unsigned NumConcatenated)
+    : Expr(StringLiteralClass, Ty, VK_LValue, OK_Ordinary, false, false, false,
+           false) {
+  assert(Ctx.getAsConstantArrayType(Ty) &&
          "StringLiteral must be of constant array type!");
+  unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind);
+  unsigned ByteLength = Str.size();
+  assert((ByteLength % CharByteWidth == 0) &&
+         "The size of the data must be a multiple of CharByteWidth!");
+
+  // Avoid the expensive division. The compiler should be able to figure it
+  // out by itself. However as of clang 7, even with the appropriate
+  // llvm_unreachable added just here, it is not able to do so.
+  unsigned Length;
+  switch (CharByteWidth) {
+  case 1:
+    Length = ByteLength;
+    break;
+  case 2:
+    Length = ByteLength / 2;
+    break;
+  case 4:
+    Length = ByteLength / 4;
+    break;
+  default:
+    llvm_unreachable("Unsupported character width!");
+  }
 
-  // Allocate enough space for the StringLiteral plus an array of locations for
-  // any concatenated string tokens.
-  void *Mem =
-      C.Allocate(sizeof(StringLiteral) + sizeof(SourceLocation) * (NumStrs - 1),
-                 alignof(StringLiteral));
-  StringLiteral *SL = new (Mem) StringLiteral(Ty);
+  StringLiteralBits.Kind = Kind;
+  StringLiteralBits.CharByteWidth = CharByteWidth;
+  StringLiteralBits.IsPascal = Pascal;
+  StringLiteralBits.NumConcatenated = NumConcatenated;
+  *getTrailingObjects<unsigned>() = Length;
 
-  // OPTIMIZE: could allocate this appended to the StringLiteral.
-  SL->setString(C,Str,Kind,Pascal);
+  // Initialize the trailing array of SourceLocation.
+  // This is safe since SourceLocation is POD-like.
+  std::memcpy(getTrailingObjects<SourceLocation>(), Loc,
+              NumConcatenated * sizeof(SourceLocation));
 
-  SL->TokLocs[0] = Loc[0];
-  SL->NumConcatenated = NumStrs;
+  // Initialize the trailing array of char holding the string data.
+  std::memcpy(getTrailingObjects<char>(), Str.data(), ByteLength);
+}
 
-  if (NumStrs != 1)
-    memcpy(&SL->TokLocs[1], Loc+1, sizeof(SourceLocation)*(NumStrs-1));
-  return SL;
+StringLiteral::StringLiteral(EmptyShell Empty, unsigned NumConcatenated,
+                             unsigned Length, unsigned CharByteWidth)
+    : Expr(StringLiteralClass, Empty) {
+  StringLiteralBits.CharByteWidth = CharByteWidth;
+  StringLiteralBits.NumConcatenated = NumConcatenated;
+  *getTrailingObjects<unsigned>() = Length;
 }
 
-StringLiteral *StringLiteral::CreateEmpty(const ASTContext &C,
-                                          unsigned NumStrs) {
-  void *Mem =
-      C.Allocate(sizeof(StringLiteral) + sizeof(SourceLocation) * (NumStrs - 1),
-                 alignof(StringLiteral));
-  StringLiteral *SL =
-      new (Mem) StringLiteral(C.adjustStringLiteralBaseType(QualType()));
-  SL->CharByteWidth = 0;
-  SL->Length = 0;
-  SL->NumConcatenated = NumStrs;
-  return SL;
+StringLiteral *StringLiteral::Create(const ASTContext &Ctx, StringRef Str,
+                                     StringKind Kind, bool Pascal, QualType Ty,
+                                     const SourceLocation *Loc,
+                                     unsigned NumConcatenated) {
+  void *Mem = Ctx.Allocate(totalSizeToAlloc<unsigned, SourceLocation, char>(
+                               1, NumConcatenated, Str.size()),
+                           alignof(StringLiteral));
+  return new (Mem)
+      StringLiteral(Ctx, Str, Kind, Pascal, Ty, Loc, NumConcatenated);
+}
+
+StringLiteral *StringLiteral::CreateEmpty(const ASTContext &Ctx,
+                                          unsigned NumConcatenated,
+                                          unsigned Length,
+                                          unsigned CharByteWidth) {
+  void *Mem = Ctx.Allocate(totalSizeToAlloc<unsigned, SourceLocation, char>(
+                               1, NumConcatenated, Length * CharByteWidth),
+                           alignof(StringLiteral));
+  return new (Mem)
+      StringLiteral(EmptyShell(), NumConcatenated, Length, CharByteWidth);
 }
 
 void StringLiteral::outputString(raw_ostream &OS) const {
@@ -1046,42 +1084,6 @@
   OS << '"';
 }
 
-void StringLiteral::setString(const ASTContext &C, StringRef Str,
-                              StringKind Kind, bool IsPascal) {
-  //FIXME: we assume that the string data comes from a target that uses the same
-  // code unit size and endianness for the type of string.
-  this->Kind = Kind;
-  this->IsPascal = IsPascal;
-
-  CharByteWidth = mapCharByteWidth(C.getTargetInfo(),Kind);
-  assert((Str.size()%CharByteWidth == 0)
-         && "size of data must be multiple of CharByteWidth");
-  Length = Str.size()/CharByteWidth;
-
-  switch(CharByteWidth) {
-    case 1: {
-      char *AStrData = new (C) char[Length];
-      std::memcpy(AStrData,Str.data(),Length*sizeof(*AStrData));
-      StrData.asChar = AStrData;
-      break;
-    }
-    case 2: {
-      uint16_t *AStrData = new (C) uint16_t[Length];
-      std::memcpy(AStrData,Str.data(),Length*sizeof(*AStrData));
-      StrData.asUInt16 = AStrData;
-      break;
-    }
-    case 4: {
-      uint32_t *AStrData = new (C) uint32_t[Length];
-      std::memcpy(AStrData,Str.data(),Length*sizeof(*AStrData));
-      StrData.asUInt32 = AStrData;
-      break;
-    }
-    default:
-      llvm_unreachable("unsupported CharByteWidth");
-  }
-}
-
 /// getLocationOfByte - Return a source location that points to the specified
 /// byte of this string literal.
 ///
Index: include/clang/AST/Stmt.h
===================================================================
--- include/clang/AST/Stmt.h
+++ include/clang/AST/Stmt.h
@@ -366,6 +366,28 @@
     unsigned IsExact : 1;
   };
 
+  class StringLiteralBitfields {
+    friend class ASTStmtReader;
+    friend class StringLiteral;
+
+    unsigned : NumExprBits;
+
+    /// The kind of this string literal.
+    /// One of the enumeration values of StringLiteral::StringKind.
+    unsigned Kind : 3;
+
+    /// The width of a single character in bytes. Only values of 1, 2,
+    /// and 4 bytes are supported. StringLiteral::mapCharByteWidth maps
+    /// the target + string kind to the appropriate CharByteWidth.
+    unsigned CharByteWidth : 3;
+
+    unsigned IsPascal : 1;
+
+    /// The number of concatenated token this string is made of.
+    /// This is the number of trailing SourceLocation.
+    unsigned NumConcatenated;
+  };
+
   class CharacterLiteralBitfields {
     friend class CharacterLiteral;
 
@@ -563,6 +585,7 @@
     PredefinedExprBitfields PredefinedExprBits;
     DeclRefExprBitfields DeclRefExprBits;
     FloatingLiteralBitfields FloatingLiteralBits;
+    StringLiteralBitfields StringLiteralBits;
     CharacterLiteralBitfields CharacterLiteralBits;
     UnaryOperatorBitfields UnaryOperatorBits;
     UnaryExprOrTypeTraitExprBitfields UnaryExprOrTypeTraitExprBits;
Index: include/clang/AST/Expr.h
===================================================================
--- include/clang/AST/Expr.h
+++ include/clang/AST/Expr.h
@@ -1568,97 +1568,131 @@
 ///   char X[2] = "foobar";
 /// In this case, getByteLength() will return 6, but the string literal will
 /// have type "char[2]".
-class StringLiteral : public Expr {
+class StringLiteral final
+    : public Expr,
+      private llvm::TrailingObjects<StringLiteral, unsigned, SourceLocation,
+                                    char> {
+  friend class ASTStmtReader;
+  friend TrailingObjects;
+
+  /// StringLiteral is followed by several trailing objects. They are in order:
+  ///
+  /// * A single unsigned storing the length in characters of this string. The
+  ///   length in bytes is this length times the width of a single character.
+  ///   Always present and stored as a trailing objects because storing it in
+  ///   StringLiteral would increase the size of StringLiteral by sizeof(void *)
+  ///   due to alignment requirements. If you add some data to StringLiteral,
+  ///   consider moving it inside StringLiteral.
+  ///
+  /// * An array of getNumConcatenated() SourceLocation, one for each of the
+  ///   token this string is made of.
+  ///
+  /// * An array of getByteLength() char used to store the string data.
+
 public:
   enum StringKind { Ascii, Wide, UTF8, UTF16, UTF32 };
 
 private:
-  friend class ASTStmtReader;
+  unsigned numTrailingObjects(OverloadToken<unsigned>) const { return 1; }
+  unsigned numTrailingObjects(OverloadToken<SourceLocation>) const {
+    return getNumConcatenated();
+  }
 
-  union {
-    const char *asChar;
-    const uint16_t *asUInt16;
-    const uint32_t *asUInt32;
-  } StrData;
-  unsigned Length;
-  unsigned CharByteWidth : 4;
-  unsigned Kind : 3;
-  unsigned IsPascal : 1;
-  unsigned NumConcatenated;
-  SourceLocation TokLocs[1];
-
-  StringLiteral(QualType Ty) :
-    Expr(StringLiteralClass, Ty, VK_LValue, OK_Ordinary, false, false, false,
-         false) {}
-
-  static int mapCharByteWidth(TargetInfo const &target,StringKind k);
+  unsigned numTrailingObjects(OverloadToken<char>) const {
+    return getByteLength();
+  }
+
+  char *getStrDataAsChar() { return getTrailingObjects<char>(); }
+  const char *getStrDataAsChar() const { return getTrailingObjects<char>(); }
+
+  const uint16_t *getStrDataAsUInt16() const {
+    return reinterpret_cast<const uint16_t *>(getTrailingObjects<char>());
+  }
+
+  const uint32_t *getStrDataAsUInt32() const {
+    return reinterpret_cast<const uint32_t *>(getTrailingObjects<char>());
+  }
+
+  /// Build a string literal.
+  StringLiteral(const ASTContext &Ctx, StringRef Str, StringKind Kind,
+                bool Pascal, QualType Ty, const SourceLocation *Loc,
+                unsigned NumConcatenated);
+
+  /// Build an empty string literal.
+  StringLiteral(EmptyShell Empty, unsigned NumConcatenated, unsigned Length,
+                unsigned CharByteWidth);
+
+  /// Map a target and string kind to the appropriate character width.
+  static unsigned mapCharByteWidth(TargetInfo const &Target, StringKind SK);
+
+  /// Set one of the string literal token.
+  void setStrTokenLoc(unsigned TokNum, SourceLocation L) {
+    assert(TokNum < getNumConcatenated() && "Invalid tok number");
+    getTrailingObjects<SourceLocation>()[TokNum] = L;
+  }
 
 public:
   /// This is the "fully general" constructor that allows representation of
   /// strings formed from multiple concatenated tokens.
-  static StringLiteral *Create(const ASTContext &C, StringRef Str,
+  static StringLiteral *Create(const ASTContext &Ctx, StringRef Str,
                                StringKind Kind, bool Pascal, QualType Ty,
-                               const SourceLocation *Loc, unsigned NumStrs);
+                               const SourceLocation *Loc,
+                               unsigned NumConcatenated);
 
   /// Simple constructor for string literals made from one token.
-  static StringLiteral *Create(const ASTContext &C, StringRef Str,
+  static StringLiteral *Create(const ASTContext &Ctx, StringRef Str,
                                StringKind Kind, bool Pascal, QualType Ty,
                                SourceLocation Loc) {
-    return Create(C, Str, Kind, Pascal, Ty, &Loc, 1);
+    return Create(Ctx, Str, Kind, Pascal, Ty, &Loc, 1);
   }
 
   /// Construct an empty string literal.
-  static StringLiteral *CreateEmpty(const ASTContext &C, unsigned NumStrs);
+  static StringLiteral *CreateEmpty(const ASTContext &Ctx,
+                                    unsigned NumConcatenated, unsigned Length,
+                                    unsigned CharByteWidth);
 
   StringRef getString() const {
-    assert(CharByteWidth==1
-           && "This function is used in places that assume strings use char");
-    return StringRef(StrData.asChar, getByteLength());
+    assert(getCharByteWidth() == 1 &&
+           "This function is used in places that assume strings use char");
+    return StringRef(getStrDataAsChar(), getByteLength());
   }
 
   /// Allow access to clients that need the byte representation, such as
   /// ASTWriterStmt::VisitStringLiteral().
   StringRef getBytes() const {
     // FIXME: StringRef may not be the right type to use as a result for this.
-    if (CharByteWidth == 1)
-      return StringRef(StrData.asChar, getByteLength());
-    if (CharByteWidth == 4)
-      return StringRef(reinterpret_cast<const char*>(StrData.asUInt32),
-                       getByteLength());
-    assert(CharByteWidth == 2 && "unsupported CharByteWidth");
-    return StringRef(reinterpret_cast<const char*>(StrData.asUInt16),
-                     getByteLength());
+    return StringRef(getStrDataAsChar(), getByteLength());
   }
 
   void outputString(raw_ostream &OS) const;
 
   uint32_t getCodeUnit(size_t i) const {
-    assert(i < Length && "out of bounds access");
-    if (CharByteWidth == 1)
-      return static_cast<unsigned char>(StrData.asChar[i]);
-    if (CharByteWidth == 4)
-      return StrData.asUInt32[i];
-    assert(CharByteWidth == 2 && "unsupported CharByteWidth");
-    return StrData.asUInt16[i];
+    assert(i < getLength() && "out of bounds access");
+    switch (getCharByteWidth()) {
+    case 1:
+      return static_cast<unsigned char>(getStrDataAsChar()[i]);
+    case 2:
+      return getStrDataAsUInt16()[i];
+    case 4:
+      return getStrDataAsUInt32()[i];
+    }
+    llvm_unreachable("Unsupported character width!");
   }
 
-  unsigned getByteLength() const { return CharByteWidth*Length; }
-  unsigned getLength() const { return Length; }
-  unsigned getCharByteWidth() const { return CharByteWidth; }
-
-  /// Sets the string data to the given string data.
-  void setString(const ASTContext &C, StringRef Str,
-                 StringKind Kind, bool IsPascal);
-
-  StringKind getKind() const { return static_cast<StringKind>(Kind); }
+  unsigned getByteLength() const { return getCharByteWidth() * getLength(); }
+  unsigned getLength() const { return *getTrailingObjects<unsigned>(); }
+  unsigned getCharByteWidth() const { return StringLiteralBits.CharByteWidth; }
 
+  StringKind getKind() const {
+    return static_cast<StringKind>(StringLiteralBits.Kind);
+  }
 
-  bool isAscii() const { return Kind == Ascii; }
-  bool isWide() const { return Kind == Wide; }
-  bool isUTF8() const { return Kind == UTF8; }
-  bool isUTF16() const { return Kind == UTF16; }
-  bool isUTF32() const { return Kind == UTF32; }
-  bool isPascal() const { return IsPascal; }
+  bool isAscii() const { return getKind() == Ascii; }
+  bool isWide() const { return getKind() == Wide; }
+  bool isUTF8() const { return getKind() == UTF8; }
+  bool isUTF16() const { return getKind() == UTF16; }
+  bool isUTF32() const { return getKind() == UTF32; }
+  bool isPascal() const { return StringLiteralBits.IsPascal; }
 
   bool containsNonAscii() const {
     for (auto c : getString())
@@ -1676,15 +1710,14 @@
 
   /// getNumConcatenated - Get the number of string literal tokens that were
   /// concatenated in translation phase #6 to form this string literal.
-  unsigned getNumConcatenated() const { return NumConcatenated; }
+  unsigned getNumConcatenated() const {
+    return StringLiteralBits.NumConcatenated;
+  }
 
+  /// Get one of the string literal token.
   SourceLocation getStrTokenLoc(unsigned TokNum) const {
-    assert(TokNum < NumConcatenated && "Invalid tok number");
-    return TokLocs[TokNum];
-  }
-  void setStrTokenLoc(unsigned TokNum, SourceLocation L) {
-    assert(TokNum < NumConcatenated && "Invalid tok number");
-    TokLocs[TokNum] = L;
+    assert(TokNum < getNumConcatenated() && "Invalid tok number");
+    return getTrailingObjects<SourceLocation>()[TokNum];
   }
 
   /// getLocationOfByte - Return a source location that points to the specified
@@ -1701,14 +1734,18 @@
                     unsigned *StartTokenByteOffset = nullptr) const;
 
   typedef const SourceLocation *tokloc_iterator;
-  tokloc_iterator tokloc_begin() const { return TokLocs; }
-  tokloc_iterator tokloc_end() const { return TokLocs + NumConcatenated; }
 
-  SourceLocation getBeginLoc() const LLVM_READONLY { return TokLocs[0]; }
-  SourceLocation getEndLoc() const LLVM_READONLY {
-    return TokLocs[NumConcatenated - 1];
+  tokloc_iterator tokloc_begin() const {
+    return getTrailingObjects<SourceLocation>();
+  }
+
+  tokloc_iterator tokloc_end() const {
+    return getTrailingObjects<SourceLocation>() + getNumConcatenated();
   }
 
+  SourceLocation getBeginLoc() const LLVM_READONLY { return *tokloc_begin(); }
+  SourceLocation getEndLoc() const LLVM_READONLY { return *(tokloc_end() - 1); }
+
   static bool classof(const Stmt *T) {
     return T->getStmtClass() == StringLiteralClass;
   }
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to