[PATCH] D54166: [AST] Store the string data in StringLiteral in a trailing array of chars

2018-11-15 Thread Phabricator via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL346969: [AST] Store the string data in StringLiteral in a 
trailing array of chars (authored by brunoricci, committed by ).
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D54166?vs=174032=174228#toc

Repository:
  rL LLVM

https://reviews.llvm.org/D54166

Files:
  cfe/trunk/include/clang/AST/Expr.h
  cfe/trunk/include/clang/AST/Stmt.h
  cfe/trunk/lib/AST/Expr.cpp
  cfe/trunk/lib/Serialization/ASTReaderStmt.cpp
  cfe/trunk/lib/Serialization/ASTWriterStmt.cpp

Index: cfe/trunk/include/clang/AST/Expr.h
===
--- cfe/trunk/include/clang/AST/Expr.h
+++ cfe/trunk/include/clang/AST/Expr.h
@@ -1568,98 +1568,131 @@
 ///   char X[2] = "foobar";
 /// In this case, getByteLength() will return 6, but the string literal will
 /// have type "char[2]".
-class StringLiteral : public Expr {
+class StringLiteral final
+: public Expr,
+  private llvm::TrailingObjects {
+  friend class ASTStmtReader;
+  friend TrailingObjects;
+
+  /// StringLiteral is followed by several trailing objects. They are in order:
+  ///
+  /// * A single unsigned storing the length in characters of this string. The
+  ///   length in bytes is this length times the width of a single character.
+  ///   Always present and stored as a trailing objects because storing it in
+  ///   StringLiteral would increase the size of StringLiteral by sizeof(void *)
+  ///   due to alignment requirements. If you add some data to StringLiteral,
+  ///   consider moving it inside StringLiteral.
+  ///
+  /// * An array of getNumConcatenated() SourceLocation, one for each of the
+  ///   token this string is made of.
+  ///
+  /// * An array of getByteLength() char used to store the string data.
+
 public:
   enum StringKind { Ascii, Wide, UTF8, UTF16, UTF32 };
 
 private:
-  friend class ASTStmtReader;
+  unsigned numTrailingObjects(OverloadToken) const { return 1; }
+  unsigned numTrailingObjects(OverloadToken) const {
+return getNumConcatenated();
+  }
 
-  union {
-const char *asChar;
-const uint16_t *asUInt16;
-const uint32_t *asUInt32;
-  } StrData;
-  unsigned Length;
-  unsigned CharByteWidth : 4;
-  unsigned Kind : 3;
-  unsigned IsPascal : 1;
-  unsigned NumConcatenated;
-  SourceLocation TokLocs[1];
-
-  StringLiteral(QualType Ty) :
-Expr(StringLiteralClass, Ty, VK_LValue, OK_Ordinary, false, false, false,
- false) {}
+  unsigned numTrailingObjects(OverloadToken) const {
+return getByteLength();
+  }
+
+  char *getStrDataAsChar() { return getTrailingObjects(); }
+  const char *getStrDataAsChar() const { return getTrailingObjects(); }
+
+  const uint16_t *getStrDataAsUInt16() const {
+return reinterpret_cast(getTrailingObjects());
+  }
+
+  const uint32_t *getStrDataAsUInt32() const {
+return reinterpret_cast(getTrailingObjects());
+  }
+
+  /// Build a string literal.
+  StringLiteral(const ASTContext , StringRef Str, StringKind Kind,
+bool Pascal, QualType Ty, const SourceLocation *Loc,
+unsigned NumConcatenated);
+
+  /// Build an empty string literal.
+  StringLiteral(EmptyShell Empty, unsigned NumConcatenated, unsigned Length,
+unsigned CharByteWidth);
 
   /// Map a target and string kind to the appropriate character width.
   static unsigned mapCharByteWidth(TargetInfo const , StringKind SK);
 
+  /// Set one of the string literal token.
+  void setStrTokenLoc(unsigned TokNum, SourceLocation L) {
+assert(TokNum < getNumConcatenated() && "Invalid tok number");
+getTrailingObjects()[TokNum] = L;
+  }
+
 public:
   /// This is the "fully general" constructor that allows representation of
   /// strings formed from multiple concatenated tokens.
-  static StringLiteral *Create(const ASTContext , StringRef Str,
+  static StringLiteral *Create(const ASTContext , StringRef Str,
StringKind Kind, bool Pascal, QualType Ty,
-   const SourceLocation *Loc, unsigned NumStrs);
+   const SourceLocation *Loc,
+   unsigned NumConcatenated);
 
   /// Simple constructor for string literals made from one token.
-  static StringLiteral *Create(const ASTContext , StringRef Str,
+  static StringLiteral *Create(const ASTContext , StringRef Str,
StringKind Kind, bool Pascal, QualType Ty,
SourceLocation Loc) {
-return Create(C, Str, Kind, Pascal, Ty, , 1);
+return Create(Ctx, Str, Kind, Pascal, Ty, , 1);
   }
 
   /// Construct an empty string literal.
-  static StringLiteral *CreateEmpty(const ASTContext , unsigned NumStrs);
+  static StringLiteral *CreateEmpty(const ASTContext ,
+unsigned NumConcatenated, unsigned Length,
+ 

[PATCH] D54166: [AST] Store the string data in StringLiteral in a trailing array of chars

2018-11-14 Thread Bruno Ricci via Phabricator via cfe-commits
riccibruno added a comment.

In https://reviews.llvm.org/D54166#1299230, @rjmccall wrote:

> In https://reviews.llvm.org/D54166#1299149, @riccibruno wrote:
>
> > In https://reviews.llvm.org/D54166#1298889, @rjmccall wrote:
> >
> > > IIRC, abbreviations just silently don't take effect if the record doesn't 
> > > conform; so things will appear to work, but the size on disk will be 
> > > bigger.
> >
> >
> > I looked at where the abbreviations are defined, and it seems that the only 
> > abbreviations for
> >  statements/expressions are for `DeclRefExpr`, `IntegerLiteral`, 
> > `CharacterLiteral`
> >  and `ImplicitCastExpr` (grep for `EmitAbbrev` in `Serialization/`,
> >  for some reasons they are emitted in `WriteDeclAbbrev()`...).
> >
> > And indeed changing the serialization format of `CharacterLiteral` triggers 
> > various assertions
> >  because of the abbreviation. Therefore unless I am missing something no 
> > other statement/expression
> >  has currently an abbreviation. I suspect therefore that someone could go 
> > wild and cut the on-disk
> >  size of the serialization format significantly here.
> >
> > I looked at the size of the generated pch for all of Boost, and I am only 
> > seeing an increase of
> >  about 8k, which is entirely attributable to the fact that I am adding one 
> > field to the serialization
> >  format. I can rework it to remove this additional field if needed.
>
>
> If you're generally interested in improving build times, working on the 
> serialization format would probably be really valuable.  We don't generally 
> insist on a zero-regressions policy in other commits, though.


I was planning to finish packing the other expression classes to minimize the 
in-memory footprint
of the AST first. I will try to take a look at serialization if I have some 
time but no promise for now.
Thanks for the review !


Repository:
  rC Clang

https://reviews.llvm.org/D54166



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D54166: [AST] Store the string data in StringLiteral in a trailing array of chars

2018-11-14 Thread John McCall via Phabricator via cfe-commits
rjmccall accepted this revision.
rjmccall added a comment.
This revision is now accepted and ready to land.

LGTM.


Repository:
  rC Clang

https://reviews.llvm.org/D54166



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D54166: [AST] Store the string data in StringLiteral in a trailing array of chars

2018-11-14 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

In https://reviews.llvm.org/D54166#1299149, @riccibruno wrote:

> In https://reviews.llvm.org/D54166#1298889, @rjmccall wrote:
>
> > IIRC, abbreviations just silently don't take effect if the record doesn't 
> > conform; so things will appear to work, but the size on disk will be bigger.
>
>
> I looked at where the abbreviations are defined, and it seems that the only 
> abbreviations for
>  statements/expressions are for `DeclRefExpr`, `IntegerLiteral`, 
> `CharacterLiteral`
>  and `ImplicitCastExpr` (grep for `EmitAbbrev` in `Serialization/`,
>  for some reasons they are emitted in `WriteDeclAbbrev()`...).
>
> And indeed changing the serialization format of `CharacterLiteral` triggers 
> various assertions
>  because of the abbreviation. Therefore unless I am missing something no 
> other statement/expression
>  has currently an abbreviation. I suspect therefore that someone could go 
> wild and cut the on-disk
>  size of the serialization format significantly here.
>
> I looked at the size of the generated pch for all of Boost, and I am only 
> seeing an increase of
>  about 8k, which is entirely attributable to the fact that I am adding one 
> field to the serialization
>  format. I can rework it to remove this additional field if needed.


If you're generally interested in improving build times, working on the 
serialization format would probably be really valuable.  We don't generally 
insist on a zero-regressions policy in other commits, though.


Repository:
  rC Clang

https://reviews.llvm.org/D54166



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D54166: [AST] Store the string data in StringLiteral in a trailing array of chars

2018-11-14 Thread Bruno Ricci via Phabricator via cfe-commits
riccibruno marked 2 inline comments as done.
riccibruno added a comment.

In https://reviews.llvm.org/D54166#1298889, @rjmccall wrote:

> IIRC, abbreviations just silently don't take effect if the record doesn't 
> conform; so things will appear to work, but the size on disk will be bigger.


I looked at where the abbreviations are defined, and it seems that the only 
abbreviations for
statements/expressions are for `DeclRefExpr`, `IntegerLiteral`, 
`CharacterLiteral`
and `ImplicitCastExpr` (grep for `EmitAbbrev` in `Serialization/`,
for some reasons they are emitted in `WriteDeclAbbrev()`...).

And indeed changing the serialization format of `CharacterLiteral` triggers 
various assertions
because of the abbreviation. Therefore unless I am missing something no other 
statement/expression
has currently an abbreviation. I suspect therefore that someone could go wild 
and cut the on-disk
size of the serialization format significantly here.

I looked at the size of the generated pch for all of Boost, and I am only 
seeing an increase of
about 8k, which is entirely attributable to the fact that I am adding one field 
to the serialization
format. I can rework it to remove this additional field if needed.


Repository:
  rC Clang

https://reviews.llvm.org/D54166



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D54166: [AST] Store the string data in StringLiteral in a trailing array of chars

2018-11-14 Thread John McCall via Phabricator via cfe-commits
rjmccall added a comment.

IIRC, abbreviations just silently don't take effect if the record doesn't 
conform; so things will appear to work, but the size on disk will be bigger.




Comment at: include/clang/AST/Expr.h:1615
+  }
+
+  /// Build a string literal.

riccibruno wrote:
> Note that the trailing array of chars is aligned to 4 bytes
> since it is after the array of `SourceLocation`.
> Therefore I believe that the `uint16_t *` and `uint32_t *`
> point to properly aligned memory. However I can add an
> assertion here if needed.
I think it's fine.


Repository:
  rC Clang

https://reviews.llvm.org/D54166



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D54166: [AST] Store the string data in StringLiteral in a trailing array of chars

2018-11-14 Thread Bruno Ricci via Phabricator via cfe-commits
riccibruno marked 2 inline comments as done.
riccibruno added inline comments.



Comment at: include/clang/AST/Expr.h:1615
+  }
+
+  /// Build a string literal.

Note that the trailing array of chars is aligned to 4 bytes
since it is after the array of `SourceLocation`.
Therefore I believe that the `uint16_t *` and `uint32_t *`
point to properly aligned memory. However I can add an
assertion here if needed.


Repository:
  rC Clang

https://reviews.llvm.org/D54166



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D54166: [AST] Store the string data in StringLiteral in a trailing array of chars

2018-11-14 Thread Bruno Ricci via Phabricator via cfe-commits
riccibruno updated this revision to Diff 174032.
riccibruno added a comment.

Factored out some NFSs which I will submit separately.


Repository:
  rC Clang

https://reviews.llvm.org/D54166

Files:
  include/clang/AST/Expr.h
  include/clang/AST/Stmt.h
  lib/AST/Expr.cpp
  lib/Serialization/ASTReaderStmt.cpp
  lib/Serialization/ASTWriterStmt.cpp

Index: lib/Serialization/ASTWriterStmt.cpp
===
--- lib/Serialization/ASTWriterStmt.cpp
+++ lib/Serialization/ASTWriterStmt.cpp
@@ -518,17 +518,23 @@
 
 void ASTStmtWriter::VisitStringLiteral(StringLiteral *E) {
   VisitExpr(E);
-  Record.push_back(E->getByteLength());
+
+  // Store the various bits of data of StringLiteral.
   Record.push_back(E->getNumConcatenated());
+  Record.push_back(E->getLength());
+  Record.push_back(E->getCharByteWidth());
   Record.push_back(E->getKind());
   Record.push_back(E->isPascal());
-  // FIXME: String data should be stored as a blob at the end of the
-  // StringLiteral. However, we can't do so now because we have no
-  // provision for coping with abbreviations when we're jumping around
-  // the AST file during deserialization.
-  Record.append(E->getBytes().begin(), E->getBytes().end());
+
+  // Store the trailing array of SourceLocation.
   for (unsigned I = 0, N = E->getNumConcatenated(); I != N; ++I)
 Record.AddSourceLocation(E->getStrTokenLoc(I));
+
+  // Store the trailing array of char holding the string data.
+  StringRef StrData = E->getBytes();
+  for (unsigned I = 0, N = E->getByteLength(); I != N; ++I)
+Record.push_back(StrData[I]);
+
   Code = serialization::EXPR_STRING_LITERAL;
 }
 
Index: lib/Serialization/ASTReaderStmt.cpp
===
--- lib/Serialization/ASTReaderStmt.cpp
+++ lib/Serialization/ASTReaderStmt.cpp
@@ -595,22 +595,35 @@
 
 void ASTStmtReader::VisitStringLiteral(StringLiteral *E) {
   VisitExpr(E);
-  unsigned Len = Record.readInt();
-  assert(Record.peekInt() == E->getNumConcatenated() &&
- "Wrong number of concatenated tokens!");
-  Record.skipInts(1);
-  auto kind = static_cast(Record.readInt());
-  bool isPascal = Record.readInt();
 
-  // Read string data
-  auto B = ();
-  SmallString<16> Str(B, B + Len);
-  E->setString(Record.getContext(), Str, kind, isPascal);
-  Record.skipInts(Len);
-
-  // Read source locations
-  for (unsigned I = 0, N = E->getNumConcatenated(); I != N; ++I)
+  // NumConcatenated, Length and CharByteWidth are set by the empty
+  // ctor since they are needed to allocate storage for the trailing objects.
+  unsigned NumConcatenated = Record.readInt();
+  unsigned Length = Record.readInt();
+  unsigned CharByteWidth = Record.readInt();
+  assert((NumConcatenated == E->getNumConcatenated()) &&
+ "Wrong number of concatenated tokens!");
+  assert((Length == E->getLength()) && "Wrong Length!");
+  assert((CharByteWidth == E->getCharByteWidth()) && "Wrong character width!");
+  E->StringLiteralBits.Kind = Record.readInt();
+  E->StringLiteralBits.IsPascal = Record.readInt();
+
+  // The character width is originally computed via mapCharByteWidth.
+  // Check that the deserialized character width is consistant with the result
+  // of calling mapCharByteWidth.
+  assert((CharByteWidth ==
+  StringLiteral::mapCharByteWidth(Record.getContext().getTargetInfo(),
+  E->getKind())) &&
+ "Wrong character width!");
+
+  // Deserialize the trailing array of SourceLocation.
+  for (unsigned I = 0; I < NumConcatenated; ++I)
 E->setStrTokenLoc(I, ReadSourceLocation());
+
+  // Deserialize the trailing array of char holding the string data.
+  char *StrData = E->getStrDataAsChar();
+  for (unsigned I = 0; I < Length * CharByteWidth; ++I)
+StrData[I] = Record.readInt();
 }
 
 void ASTStmtReader::VisitCharacterLiteral(CharacterLiteral *E) {
@@ -2423,8 +2436,11 @@
   break;
 
 case EXPR_STRING_LITERAL:
-  S = StringLiteral::CreateEmpty(Context,
- Record[ASTStmtReader::NumExprFields + 1]);
+  S = StringLiteral::CreateEmpty(
+  Context,
+  /* NumConcatenated=*/Record[ASTStmtReader::NumExprFields + 0],
+  /* Length=*/Record[ASTStmtReader::NumExprFields + 1],
+  /* CharByteWidth=*/Record[ASTStmtReader::NumExprFields + 2]);
   break;
 
 case EXPR_CHARACTER_LITERAL:
Index: lib/AST/Expr.cpp
===
--- lib/AST/Expr.cpp
+++ lib/AST/Expr.cpp
@@ -912,42 +912,80 @@
   return CharByteWidth;
 }
 
-StringLiteral *StringLiteral::Create(const ASTContext , StringRef Str,
- StringKind Kind, bool Pascal, QualType Ty,
- const SourceLocation *Loc,
- unsigned NumStrs) {
-  assert(C.getAsConstantArrayType(Ty) &&
+StringLiteral::StringLiteral(const 

[PATCH] D54166: [AST] Store the string data in StringLiteral in a trailing array of chars

2018-11-12 Thread David Blaikie via Phabricator via cfe-commits
dblaikie added a comment.

In https://reviews.llvm.org/D54166#1295730, @riccibruno wrote:

> @dblaikie Thanks for looking at this patch !
>
> I have a set of patches shrinking the other statements/expressions.
>  Can I add you to review some of these too ? Most of them consists of just 
> moving
>  some data. I added rsmith but I think that he is busy with the standard.


Sure thing! do pull out as many bits of cleanup, renaming, refactoring, etc, 
into separate patches (no worries if you have to also send those for review 
(not sure if you have commit access/what level of stuff you want to 
review-after-commit, etc) - still much easier to review in little pieces than 
all together)


Repository:
  rC Clang

https://reviews.llvm.org/D54166



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D54166: [AST] Store the string data in StringLiteral in a trailing array of chars

2018-11-12 Thread Bruno Ricci via Phabricator via cfe-commits
riccibruno added a comment.

@dblaikie Thanks for looking at this patch !

I have a set of patches shrinking the other statements/expressions.
Can I add you to review some of these too ? Most of them consists of just moving
some data. I added rsmith but I think that he is busy with the standard.




Comment at: include/clang/AST/Expr.h:1701
+for (auto c : getString())
+  if (!isASCII(c))
 return true;

They are not needed, I just could not resist.
I can factor these changes out of this patch and submit it as an NFC.


Repository:
  rC Clang

https://reviews.llvm.org/D54166



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D54166: [AST] Store the string data in StringLiteral in a trailing array of chars

2018-11-12 Thread David Blaikie via Phabricator via cfe-commits
dblaikie added inline comments.



Comment at: include/clang/AST/Expr.h:1700-1701
   bool containsNonAscii() const {
-StringRef Str = getString();
-for (unsigned i = 0, e = Str.size(); i != e; ++i)
-  if (!isASCII(Str[i]))
+for (auto c : getString())
+  if (!isASCII(c))
 return true;

Seems like changes like this are unrelated refactoring? Or are they in some way 
needed for this change? 

(also the isXXX functions above - might be neat/tidy to pull those out 
separately (as an NFC change) so that then this change (that modifies the 
implementation of getKind) is just that, without having to refactor all the 
other bits too? But not a big deal either way there, really)


Repository:
  rC Clang

https://reviews.llvm.org/D54166



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
http://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D54166: [AST] Store the string data in StringLiteral in a trailing array of chars

2018-11-06 Thread Bruno Ricci via Phabricator via cfe-commits
riccibruno created this revision.
riccibruno added a reviewer: rsmith.
riccibruno added a project: clang.
Herald added a subscriber: cfe-commits.

Use the newly available space in the bit-fields of `Stmt` and store the
string data in a trailing array of `char`s after the trailing array
of `SourceLocation`. This cuts the size of `StringLiteral` by 2 pointers.

Also refactor slightly `StringLiteral::Create` and `StringLiteral::CreateEmpty`
so that `StringLiteral::Create` is just responsible for the allocation, and the
constructor is responsible for doing all the initialization. This match what
is done for the other classes in general.

Two points I am wondering about:

The original version used type punning through an union. Here I am just
`reinterpret_cast`ing back and forth between `char *` and `uint16_t 
*`/`uint32_t *`.

There is a FIXME in `ASTWriterStmt.cpp` (see the end of the diff) which says 
that
storing the string past the `StringLiteral` would cause problems with 
abbreviations.
The note dates from 2009 and everything *seems* to be working just fine. However
I am not familiar with this and it is possible I am missing something here.


Repository:
  rC Clang

https://reviews.llvm.org/D54166

Files:
  include/clang/AST/Expr.h
  include/clang/AST/Stmt.h
  lib/AST/Expr.cpp
  lib/Serialization/ASTReaderStmt.cpp
  lib/Serialization/ASTWriterStmt.cpp

Index: lib/Serialization/ASTWriterStmt.cpp
===
--- lib/Serialization/ASTWriterStmt.cpp
+++ lib/Serialization/ASTWriterStmt.cpp
@@ -527,17 +527,23 @@
 
 void ASTStmtWriter::VisitStringLiteral(StringLiteral *E) {
   VisitExpr(E);
-  Record.push_back(E->getByteLength());
+
+  // Store the various bits of data of StringLiteral.
   Record.push_back(E->getNumConcatenated());
+  Record.push_back(E->getLength());
+  Record.push_back(E->getCharByteWidth());
   Record.push_back(E->getKind());
   Record.push_back(E->isPascal());
-  // FIXME: String data should be stored as a blob at the end of the
-  // StringLiteral. However, we can't do so now because we have no
-  // provision for coping with abbreviations when we're jumping around
-  // the AST file during deserialization.
-  Record.append(E->getBytes().begin(), E->getBytes().end());
+
+  // Store the trailing array of SourceLocation.
   for (unsigned I = 0, N = E->getNumConcatenated(); I != N; ++I)
 Record.AddSourceLocation(E->getStrTokenLoc(I));
+
+  // Store the trailing array of char holding the string data.
+  StringRef StrData = E->getBytes();
+  for (unsigned I = 0, N = E->getByteLength(); I != N; ++I)
+Record.push_back(StrData[I]);
+
   Code = serialization::EXPR_STRING_LITERAL;
 }
 
Index: lib/Serialization/ASTReaderStmt.cpp
===
--- lib/Serialization/ASTReaderStmt.cpp
+++ lib/Serialization/ASTReaderStmt.cpp
@@ -602,22 +602,35 @@
 
 void ASTStmtReader::VisitStringLiteral(StringLiteral *E) {
   VisitExpr(E);
-  unsigned Len = Record.readInt();
-  assert(Record.peekInt() == E->getNumConcatenated() &&
- "Wrong number of concatenated tokens!");
-  Record.skipInts(1);
-  auto kind = static_cast(Record.readInt());
-  bool isPascal = Record.readInt();
 
-  // Read string data
-  auto B = ();
-  SmallString<16> Str(B, B + Len);
-  E->setString(Record.getContext(), Str, kind, isPascal);
-  Record.skipInts(Len);
-
-  // Read source locations
-  for (unsigned I = 0, N = E->getNumConcatenated(); I != N; ++I)
+  // NumConcatenated, Length and CharByteWidth are set by the empty
+  // ctor since they are needed to allocate storage for the trailing objects.
+  unsigned NumConcatenated = Record.readInt();
+  unsigned Length = Record.readInt();
+  unsigned CharByteWidth = Record.readInt();
+  assert((NumConcatenated == E->getNumConcatenated()) &&
+ "Wrong number of concatenated tokens!");
+  assert((Length == E->getLength()) && "Wrong Length!");
+  assert((CharByteWidth == E->getCharByteWidth()) && "Wrong character width!");
+  E->StringLiteralBits.Kind = Record.readInt();
+  E->StringLiteralBits.IsPascal = Record.readInt();
+
+  // The character width is originally computed via mapCharByteWidth.
+  // Check that the deserialized character width is consistant with the result
+  // of calling mapCharByteWidth.
+  assert((CharByteWidth ==
+  StringLiteral::mapCharByteWidth(Record.getContext().getTargetInfo(),
+  E->getKind())) &&
+ "Wrong character width!");
+
+  // Deserialize the trailing array of SourceLocation.
+  for (unsigned I = 0; I < NumConcatenated; ++I)
 E->setStrTokenLoc(I, ReadSourceLocation());
+
+  // Deserialize the trailing array of char holding the string data.
+  char *StrData = E->getStrDataAsChar();
+  for (unsigned I = 0; I < Length * CharByteWidth; ++I)
+StrData[I] = Record.readInt();
 }
 
 void ASTStmtReader::VisitCharacterLiteral(CharacterLiteral *E) {
@@ -2433,8 +2446,11 @@