--- C:/Users/Mr. Meredith/AppData/Local/Temp/LiteralSupport-0.2.h	Fri Jul 10 20:44:36 2009
+++ d:/OpenSource/Live/llvm/tools/clang/include/clang/Lex/LiteralSupport.h	Fri Jul 10 17:34:43 2009
@@ -146,7 +146,7 @@
   
   unsigned MaxTokenLength;
   unsigned SizeBound;
-  unsigned wchar_tByteWidth;
+  unsigned CodeUnitWidth;
   llvm::SmallString<512> ResultBuf;
   char *ResultPtr; // cursor
 public:
@@ -155,14 +155,13 @@
   bool hadError;
   bool AnyWide;
   bool Pascal;
+  bool PureNarrowString;
   
   const char *GetString() { return &ResultBuf[0]; }
   unsigned GetStringLength() const { return ResultPtr-&ResultBuf[0]; }
 
   unsigned GetNumStringChars() const {
-    if (AnyWide)
-      return GetStringLength() / wchar_tByteWidth;
-    return GetStringLength();
+    return GetStringLength() / CodeUnitWidth;
   }  
   /// getOffsetOfStringByte - This function returns the offset of the
   /// specified byte of the string data represented by Token.  This handles
--- C:/Users/Mr. Meredith/AppData/Local/Temp/LiteralSupport-0.8.cpp	Fri Jul 10 21:08:19 2009
+++ d:/OpenSource/Live/llvm/tools/clang/lib/Lex/LiteralSupport.cpp	Fri Jul 10 20:59:34 2009
@@ -273,6 +273,17 @@
 ///       floating-constant: [C99 6.4.4.2]
 ///         TODO: add rules...
 ///
+/// [C++0x] user-defined-integer-literal:   [TODO]
+///           decimal-literal ud-suffix
+///           octal-literal ud-suffix
+///           hexadecimal-literal ud-suffix 
+/// [C++0x] user-defined-floating-literal:  [TODO]
+///           fractional-constant [exponent-part] ud-suffix
+///           digit-sequence exponent-part ud-suffix
+/// [C++0x] user-defined-integer-literal:   [TODO]
+///           integer-literal ud-suffix
+/// [C++0x] ud-suffix:                      [TODO]
+///           identifier
 NumericLiteralParser::
 NumericLiteralParser(const char *begin, const char *end,
                      SourceLocation TokLoc, Preprocessor &pp)
@@ -698,10 +709,59 @@
     Value = (signed char)Value;
 }
 
+/// Decodes the prefix before a string literal, consuming characters up to
+/// the opening quote character of the string. Possilble values are any in
+/// the set [L U u u8] followed by an optional R to indicate a raw string.
+/// Values given indicate the idea of promotion in string concatenation. As
+/// the size of wchar_t is platform dependant, and char16_t is never wider
+/// than char32_t, we assume we can always promote in the following sequence:
+///    char -> char16_t -> wchar_t -> char32_t
+/// Raw string literals are flagged by setting the 4th bit
+/// u8 literals are stored as char strings, but cannot promote.  u8 can be
+/// checked for by testing the 3rd bit of the result.
+/// On return, the passed buffer pointer should be pointing at the initial
+/// '"' character.
+static unsigned int DecodeStringLiteralPrefix(char const *& TokenBuf) {
+  unsigned int Result = 0;
+  switch (*TokenBuf) {
+  case 'L':
+    ++TokenBuf;
+    Result = 2;
+    break;
+  case 'U':
+    ++TokenBuf;
+    Result = 3;
+    break;
+  case 'u':
+    ++TokenBuf;
+    if(*TokenBuf == '8') {
+      ++TokenBuf;
+      Result = 4;
+    } else
+      Result = 1;
+    break;
+  }
+
+  if (*TokenBuf == 'R') {
+    ++TokenBuf;
+    Result += 8;
+  }
+
+  assert(*TokenBuf == '"' && "Invalid string literal prefix");
+  return Result;
+}
 
 ///       string-literal: [C99 6.4.5]
 ///          " [s-char-sequence] "
 ///         L" [s-char-sequence] "
+/// [C++0x]   u8" [s-char-sequence] "
+/// [C++0x]    u" [s-char-sequence] "
+/// [C++0x]    U" [s-char-sequence] "
+/// [C++0x]     R raw-string
+/// [C++0x]    LR raw-string
+/// [C++0x]   u8R raw-string
+/// [C++0x]    uR raw-string
+/// [C++0x]    UR raw-string
 ///       s-char-sequence:
 ///         s-char
 ///         s-char-sequence s-char
@@ -732,7 +792,29 @@
 ///         \U hex-quad hex-quad
 ///       hex-quad:
 ///         hex-digit hex-digit hex-digit hex-digit
-///
+/// [C++0x] raw-string:
+///           " [d-char-sequence] '[' [r-char-sequence] ']' [d-char-sequence] "
+/// [C++0x] r-char-sequence:
+///           r-char
+///           r-char-sequence r-char
+/// [C++0x] r-char:
+///           any member of the source character set, except
+///             (1), a backslash \followed by a u or U, or
+///             (2), a right square bracket ] followed by the initial d-char-sequence
+///             (which may be empty) followed by a double quote ".
+///           universal-character-name
+/// [C++0x] d-char-sequence:
+///           d-char
+///           d-char-sequence d-char
+/// [C++0x] d-char:
+///           any member of the basic source character set except:
+///             space, the left square bracket [, the right square bracket ],
+///             and the control characters representing horizontal tab,
+///             vertical tab, form feed, and newline.
+/// [C++0x] user-defined-string-literal:
+///           string-literal ud-suffix
+/// [C++0x] ud-suffix:
+///           identifier
 StringLiteralParser::
 StringLiteralParser(const Token *StringToks, unsigned NumStringToks,
                     Preprocessor &pp) : PP(pp) {
@@ -740,12 +822,28 @@
   // computing a bound on the concatenated string length, and see whether any
   // piece is a wide-string.  If any of the string portions is a wide-string
   // literal, the result is a wide-string literal [C99 6.4.5p4].
-  MaxTokenLength = StringToks[0].getLength();
+  // Literal concatenation between strings of different unicode character
+  // types is conditionally supported [C++0x lex.string p12]. However, it is
+  // an error to attempt to concatenate a u8 string literal with a wide
+  // string literal.
+  assert((NumStringToks > 0) && "Assumes a non-empty set of tokens");
+
+  llvm::SmallString<512> TokenBuf;
+  TokenBuf.resize(StringToks[0].getLength());
+
+  const char *ThisTokBuf = &TokenBuf[0];
+  MaxTokenLength = PP.getSpelling(StringToks[0], ThisTokBuf);
   SizeBound = StringToks[0].getLength()-2;  // -2 for "".
-  AnyWide = StringToks[0].is(tok::wide_string_literal);
+  unsigned int PrefixCode = DecodeStringLiteralPrefix(ThisTokBuf);
+  unsigned int Representation = PrefixCode & 3;
   
   hadError = false;
 
+  SourceLocation Firstu8Loc;
+  bool Anyu8 = PrefixCode & 4;
+  if (Anyu8)
+    Firstu8Loc = StringToks[0].getLocation();
+
   // Implement Translation Phase #6: concatenation of string literals
   /// (C99 5.1.1.2p1).  The common case is only one string fragment.
   for (unsigned i = 1; i != NumStringToks; ++i) {
@@ -757,41 +855,78 @@
     if (StringToks[i].getLength() > MaxTokenLength) 
       MaxTokenLength = StringToks[i].getLength();
     
-    // Remember if we see any wide strings.
-    AnyWide |= StringToks[i].is(tok::wide_string_literal);
+    // Check for concatenating literals of different types
+    ThisTokBuf = &TokenBuf[0];
+    unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
+    PrefixCode = DecodeStringLiteralPrefix(ThisTokBuf) & 3;
+    if (Representation < (PrefixCode & 3))
+      Representation = (PrefixCode & 3); // Generally support promotion
+    if (PrefixCode & 4 && !Anyu8) {
+      Anyu8 = true;
+      Firstu8Loc = StringToks[i].getLocation();
+    }
+  }
+
+  // AnyWide flag is used for two different purposes and needs to be split.
+  // First it is used to determine the underlying data type, which can now
+  // be 4 different character types, and u8 and char literals are treated
+  // equivalently.  Secondly it looks for the special case where all none
+  // of the literals had a prefix, as certain parts of the grammar require
+  // narrow string literals, and in this case raw and u8 literals should
+  // also be excluded.
+  AnyWide = (Representation != 0);
+
+  if (Anyu8 && Representation != 0) {
+    // u8 literals cannot concatenate with wide string literals.
+    // Concatenating with utf-16/32 literals is conditionally supported, and
+    // we (currently) choose not to support this, to simplify tracking
+    // information for concetenating with following tokens.
+    PP.Diag(Firstu8Loc, diag::err_invalid_u8_string_concatenation)  
+      << SourceRange(StringToks[0].getLocation(),
+                     StringToks[NumStringToks-1].getLocation());
+    hadError = 1;
+    // Returning early, ensure remaining fields initialized to avoid underfined
+    // behavaiour in clients.  The effect is to return a valid empty string
+    // representation with the error flag set, allowing the parser to resume.
+    Pascal = false;
+    PureNarrowString = false;
+    return;   //  Should we resume parsing and concatenate remaining tokens?
   }
 
   // Include space for the null terminator.
   ++SizeBound;
   
-  // TODO: K&R warning: "traditional C rejects string constant concatenation"
-  
-  // Get the width in bytes of wchar_t.  If no wchar_t strings are used, do not
-  // query the target.  As such, wchar_tByteWidth is only valid if AnyWide=true.
-  wchar_tByteWidth = ~0U;
-  if (AnyWide) {
-    wchar_tByteWidth = PP.getTargetInfo().getWCharWidth();
-    assert((wchar_tByteWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
-    wchar_tByteWidth /= 8;
+  // Determine the width of a single code unit of storage.
+  switch(Representation & 3) {
+  case 0:  // char
+    CodeUnitWidth = 1U;
+    break;
+  case 1:  // char16_t
+    CodeUnitWidth = 2U; // FIXME : lookup correct width of char16_t for target 
+    break;
+  case 2:  // wchar_t
+    CodeUnitWidth = PP.getTargetInfo().getWCharWidth();
+    assert((CodeUnitWidth & 7) == 0 && "Assumes wchar_t is byte multiple!");
+    CodeUnitWidth /= 8;
+    break;
+  case 3:  // char32_t
+    CodeUnitWidth = 4U; // FIXME : lookup correct width of char32_t for target 
+    break;
   }
   
   // The output buffer size needs to be large enough to hold wide characters.
   // This is a worst-case assumption which basically corresponds to L"" "long".
-  if (AnyWide)
-    SizeBound *= wchar_tByteWidth;
+  SizeBound *= CodeUnitWidth;
   
   // Size the temporary buffer to hold the result string data.
   ResultBuf.resize(SizeBound);
   
-  // Likewise, but for each string piece.
-  llvm::SmallString<512> TokenBuf;
-  TokenBuf.resize(MaxTokenLength);
-  
   // Loop over all the strings, getting their spelling, and expanding them to
   // wide strings as appropriate.
   ResultPtr = &ResultBuf[0];   // Next byte to fill in.
   
   Pascal = false;
+  PureNarrowString = true;
   
   for (unsigned i = 0, e = NumStringToks; i != e; ++i) {
     const char *ThisTokBuf = &TokenBuf[0];
@@ -800,19 +935,41 @@
     // and 'spelled' tokens can only shrink.
     unsigned ThisTokLen = PP.getSpelling(StringToks[i], ThisTokBuf);
     const char *ThisTokEnd = ThisTokBuf+ThisTokLen-1;  // Skip end quote.
+    assert(ThisTokEnd[1] == '"' && "Lexer should not yet support ud-suffix");
     
     // TODO: Input character set mapping support.
     
-    // Skip L marker for wide strings.
-    bool ThisIsWide = false;
-    if (ThisTokBuf[0] == 'L') {
-      ++ThisTokBuf;
-      ThisIsWide = true;
+    // Skip prefix for funky strings.
+    PrefixCode = DecodeStringLiteralPrefix(ThisTokBuf);
+    unsigned int RepKind = PrefixCode & 3;
+    bool IsRawString = PrefixCode & 8;
+    if (PrefixCode != 0)
+      PureNarrowString = false;
+
+    if (!PP.getLangOptions().CPlusPlus0x) {
+      if (RepKind & 1) // char16_t or char32_t, supported in C99 via TR19769
+        PP.Diag(StringToks[i].getLocation(), 
+          diag::warn_unicode_requires_cpp0x);
+      if (IsRawString) 
+        PP.Diag(StringToks[i].getLocation(), 
+          diag::warn_raw_strings_require_cpp0x);
     }
     
     assert(ThisTokBuf[0] == '"' && "Expected quote, lexer broken?");
     ++ThisTokBuf;
     
+    // Skip any raw string delimiter, already validated by lexer
+    if (IsRawString) {
+      while (ThisTokBuf[0] != '[') {
+        ++ThisTokBuf;
+        --ThisTokEnd;
+        assert(ThisTokBuf < ThisTokEnd && "Corrupted raw string delimiter");
+      }
+      assert(ThisTokEnd[0] == ']' && "Corrupted raw string delimiter");
+      ++ThisTokBuf;
+      --ThisTokEnd;
+    }
+    
     // Check if this is a pascal string
     if (pp.getLangOptions().PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
         ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
@@ -834,6 +991,12 @@
           ++ThisTokBuf;
         } while (ThisTokBuf != ThisTokEnd && ThisTokBuf[0] != '\\');
         
+        if (IsRawString && ThisTokBuf[1] != 'u' && ThisTokBuf[1] != 'U') {
+          ++ThisTokBuf;
+          if (ThisTokBuf != ThisTokEnd)
+            continue;
+        }
+ 
         // Copy the character span over.
         unsigned Len = ThisTokBuf-InStart;
         if (!AnyWide) {
@@ -844,7 +1007,7 @@
           for (; Len; --Len, ++InStart) {
             *ResultPtr++ = InStart[0];
             // Add zeros at the end.
-            for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+            for (unsigned i = 1, e = CodeUnitWidth; i != e; ++i)
               *ResultPtr++ = 0;
           }
         }
@@ -853,19 +1016,19 @@
       // Is this a Universal Character Name escape?
       if (ThisTokBuf[1] == 'u' || ThisTokBuf[1] == 'U') {
         ProcessUCNEscape(ThisTokBuf, ThisTokEnd, ResultPtr, 
-                         hadError, StringToks[i].getLocation(), ThisIsWide, PP);
+                         hadError, StringToks[i].getLocation(), RepKind > 0, PP);
         continue;
       }
       // Otherwise, this is a non-UCN escape character.  Process it.
       unsigned ResultChar = ProcessCharEscape(ThisTokBuf, ThisTokEnd, hadError,
                                               StringToks[i].getLocation(),
-                                              ThisIsWide, PP);
+                                              RepKind > 0, PP);
       
       // Note: our internal rep of wide char tokens is always little-endian.
       *ResultPtr++ = ResultChar & 0xFF;
       
       if (AnyWide) {
-        for (unsigned i = 1, e = wchar_tByteWidth; i != e; ++i)
+        for (unsigned i = 1, e = CodeUnitWidth; i != e; ++i)
           *ResultPtr++ = ResultChar >> i*8;
       }
     }
--- C:/Users/Mr. Meredith/AppData/Local/Temp/Pragma-0.2.cpp	Fri Jul 10 21:10:50 2009
+++ d:/OpenSource/Live/llvm/tools/clang/lib/Lex/Pragma.cpp	Fri Jul 10 20:33:31 2009
@@ -379,7 +379,7 @@
 
     // Concatenate and parse the strings.
     StringLiteralParser Literal(&StrToks[0], StrToks.size(), *this);
-    assert(!Literal.AnyWide && "Didn't allow wide strings in");
+    assert(Literal.PureNarrowString && "Didn't allow wide strings in");
     if (Literal.hadError)
       return;
     if (Literal.Pascal) {
@@ -567,7 +567,7 @@
     
     // Concatenate and parse the strings.
     StringLiteralParser Literal(&StrToks[0], StrToks.size(), PP);
-    assert(!Literal.AnyWide && "Didn't allow wide strings in");
+    assert(Literal.PureNarrowString && "Didn't allow wide strings in");
     if (Literal.hadError)
       return;
     if (Literal.Pascal) {
--- C:/Users/Mr. Meredith/AppData/Local/Temp/PPDirectives-0.2.cpp	Fri Jul 10 21:14:22 2009
+++ d:/OpenSource/Live/llvm/tools/clang/lib/Lex/PPDirectives.cpp	Fri Jul 10 21:13:27 2009
@@ -695,7 +695,7 @@
   } else {
     // Parse and validate the string, converting it into a unique ID.
     StringLiteralParser Literal(&StrTok, 1, *this);
-    assert(!Literal.AnyWide && "Didn't allow wide strings in");
+    assert(Literal.PureNarrowString && "Didn't allow wide strings in");
     if (Literal.hadError)
       return DiscardUntilEndOfDirective();
     if (Literal.Pascal) {
@@ -826,7 +826,7 @@
   } else {
     // Parse and validate the string, converting it into a unique ID.
     StringLiteralParser Literal(&StrTok, 1, *this);
-    assert(!Literal.AnyWide && "Didn't allow wide strings in");
+    assert(Literal.PureNarrowString && "Didn't allow wide strings in");
     if (Literal.hadError)
       return DiscardUntilEndOfDirective();
     if (Literal.Pascal) {
--- C:/Users/Mr. Meredith/AppData/Local/Temp/DiagnosticLexKinds-0.td	Fri Jul 10 21:15:59 2009
+++ d:/OpenSource/Live/llvm/tools/clang/include/clang/Basic/DiagnosticLexKinds.td	Fri Jul 10 20:06:39 2009
@@ -88,6 +88,12 @@
 def err_pascal_string_too_long : Error<"Pascal string is too long">;
 def warn_octal_escape_too_large : ExtWarn<"octal escape sequence out of range">;
 def warn_hex_escape_too_large : ExtWarn<"hex escape sequence out of range">;
+def err_invalid_u8_string_concatenation : Error<
+  "u8 string literals cannot be combined with wider encodings">;
+def warn_unicode_requires_cpp0x : Extension<
+  "Unicode string literals are an introduced by C++0x">;
+def warn_raw_strings_require_cpp0x : Extension<
+  "Raw string literals are an extension introduced by C++0x">;
 
 //===----------------------------------------------------------------------===//
 // Preprocessor Diagnostics
