diff --git include/clang/Basic/ConvertUTF.h include/clang/Basic/ConvertUTF.h
index cdc4269..38956ee 100644
--- include/clang/Basic/ConvertUTF.h
+++ include/clang/Basic/ConvertUTF.h
@@ -154,7 +154,6 @@ ConversionResult ConvertUTF16toUTF32 (
 ConversionResult ConvertUTF32toUTF16 (
   const UTF32** sourceStart, const UTF32* sourceEnd,
   UTF16** targetStart, UTF16* targetEnd, ConversionFlags flags);
-#endif
 
 Boolean isLegalUTF8Sequence(const UTF8 *source, const UTF8 *sourceEnd);
 
@@ -162,6 +161,16 @@ Boolean isLegalUTF8String(const UTF8 **source, const UTF8 *sourceEnd);
 
 unsigned getNumBytesForUTF8(UTF8 firstByte);
 
+static inline ConversionResult convertUTF8Sequence(const UTF8 **source,
+                                                   const UTF8 *sourceEnd,
+                                                   UTF32 *target,
+                                                   ConversionFlags flags) {
+  unsigned size = getNumBytesForUTF8(**source);
+  if (size > sourceEnd - *source)
+    return sourceExhausted;
+  return ConvertUTF8toUTF32(source, *source + size, &target, target + 1, flags);
+}
+
 #ifdef __cplusplus
 }
 
@@ -201,3 +210,5 @@ bool ConvertCodePointToUTF8(unsigned Source, char *&ResultPtr);
 #endif
 
 /* --------------------------------------------------------------------- */
+
+#endif
diff --git include/clang/Basic/DiagnosticLexKinds.td include/clang/Basic/DiagnosticLexKinds.td
index 3c036a3..fc4c079 100644
--- include/clang/Basic/DiagnosticLexKinds.td
+++ include/clang/Basic/DiagnosticLexKinds.td
@@ -93,7 +93,17 @@ def ext_multichar_character_literal : ExtWarn<
   "multi-character character constant">, InGroup<MultiChar>;
 def ext_four_char_character_literal : Extension<
   "multi-character character constant">, InGroup<FourByteMultiChar>;
-  
+
+
+def err_invalid_utf8 : Error<
+  "source file is not valid UTF-8">;
+def err_non_ascii : Error<
+  "non-ASCII characters are not allowed outside of literals and identifiers">;
+def err_unicode_invalid_in_id : Error<
+  "invalid Unicode identifier character">;
+def ext_unicode_whitespace : ExtWarn<
+  "treating Unicode character as whitespace">,
+  InGroup<DiagGroup<"unicode-whitespace">>;
 
 // Literal
 def ext_nonstandard_escape : Extension<
diff --git include/clang/Lex/Lexer.h include/clang/Lex/Lexer.h
index d36189f..765f389 100644
--- include/clang/Lex/Lexer.h
+++ include/clang/Lex/Lexer.h
@@ -437,6 +437,11 @@ private:
   ///
   void LexTokenInternal(Token &Result);
 
+  /// Given that a token begins with the Unicode character \p C, figure out
+  /// what kind of token it is and dispatch to the appropriate lexing helper
+  /// function.
+  void LexUnicode(Token &Result, uint32_t C, const char *CurPtr);
+
   /// FormTokenWithChars - When we lex a token, we have identified a span
   /// starting at BufferPtr, going to TokEnd that forms the token.  This method
   /// takes that range and assigns it to the token as its location and size.  In
@@ -579,6 +584,20 @@ private:
   void cutOffLexing() { BufferPtr = BufferEnd; }
 
   bool isHexaLiteral(const char *Start, const LangOptions &LangOpts);
+
+
+  /// Read a universal character name.
+  ///
+  /// \param CurPtr The position in the source buffer after the initial "\u"
+  ///               or "\U". This parameter will be updated to point to the
+  ///               character after the UCN.
+  /// \param Kind Either 'u' or 'U', specifying a four-digit or eight-digit
+  ///             Unicode identifier.
+  /// \param Tok The token being formed, if any.
+  ///
+  /// \return The Unicode codepoint specified by the UCN, or 0 if the UCN is
+  ///         invalid.
+  uint32_t readUCN(const char *&CurPtr, char Kind, Token &Tok);
 };
 
 
diff --git include/clang/Lex/Token.h include/clang/Lex/Token.h
index 06ff56e..e77264e 100644
--- include/clang/Lex/Token.h
+++ include/clang/Lex/Token.h
@@ -74,9 +74,10 @@ public:
     StartOfLine   = 0x01,  // At start of line or only after whitespace.
     LeadingSpace  = 0x02,  // Whitespace exists before this token.
     DisableExpand = 0x04,  // This identifier may never be macro expanded.
-    NeedsCleaning = 0x08,   // Contained an escaped newline or trigraph.
+    NeedsCleaning = 0x08,  // Contained an escaped newline or trigraph.
     LeadingEmptyMacro = 0x10, // Empty macro exists before this token.
-    HasUDSuffix = 0x20     // This string or character literal has a ud-suffix.
+    HasUDSuffix = 0x20,    // This string or character literal has a ud-suffix.
+    HasUCN = 0x40          // This identifier contains a UCN.
   };
 
   tok::TokenKind getKind() const { return (tok::TokenKind)Kind; }
diff --git lib/Lex/Lexer.cpp lib/Lex/Lexer.cpp
index 15b1061..cd49a8f 100644
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -25,6 +25,7 @@
 //===----------------------------------------------------------------------===//
 
 #include "clang/Lex/Lexer.h"
+#include "clang/Basic/ConvertUTF.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Lex/CodeCompletionHandler.h"
 #include "clang/Lex/LexDiagnostic.h"
@@ -371,10 +372,12 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
   if (Tok.is(tok::raw_identifier))
     TokStart = Tok.getRawIdentifierData();
-  else if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
-    // Just return the string from the identifier table, which is very quick.
-    Buffer = II->getNameStart();
-    return II->getLength();
+  else if (!(Tok.getFlags() & Token::HasUCN)) {
+    if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
+      // Just return the string from the identifier table, which is very quick.
+      Buffer = II->getNameStart();
+      return II->getLength();
+    }
   }
 
   // NOTE: this can be checked even after testing for an IdentifierInfo.
@@ -1376,7 +1379,6 @@ SourceLocation Lexer::findLocationAfterToken(SourceLocation Loc,
 ///   2. If this is an escaped newline (potentially with whitespace between
 ///      the backslash and newline), implicitly skip the newline and return
 ///      the char after it.
-///   3. If this is a UCN, return it.  FIXME: C++ UCN's?
 ///
 /// This handles the slow/uncommon case of the getCharAndSize method.  Here we
 /// know that we can accumulate into Size, and that we have already incremented
@@ -1509,6 +1511,73 @@ void Lexer::SkipBytes(unsigned Bytes, bool StartOfLine) {
   IsAtStartOfLine = StartOfLine;
 }
 
+namespace {
+  struct UCNCharRange {
+    unsigned Lower;
+    unsigned Upper;
+  };
+  
+  // C11 D.1, C++11 [charname.allowed]
+  // FIXME: C99 and C++03 each have a different set of allowed UCNs.
+  const UCNCharRange UCNAllowedCharRanges[] = {
+    // 1
+    { 0x00A8, 0x00A8 }, { 0x00AA, 0x00AA }, { 0x00AD, 0x00AD },
+    { 0x00AF, 0x00AF }, { 0x00B2, 0x00B5 }, { 0x00B7, 0x00BA },
+    { 0x00BC, 0x00BE }, { 0x00C0, 0x00D6 }, { 0x00D8, 0x00F6 },
+    { 0x00F8, 0x00FF },
+    // 2
+    { 0x0100, 0x167F }, { 0x1681, 0x180D }, { 0x180F, 0x1FFF },
+    // 3
+    { 0x200B, 0x200D }, { 0x202A, 0x202E }, { 0x203F, 0x2040 },
+    { 0x2054, 0x2054 }, { 0x2060, 0x206F },
+    // 4
+    { 0x2070, 0x218F }, { 0x2460, 0x24FF }, { 0x2776, 0x2793 },
+    { 0x2C00, 0x2DFF }, { 0x2E80, 0x2FFF },
+    // 5
+    { 0x3004, 0x3007 }, { 0x3021, 0x302F }, { 0x3031, 0x303F },
+    // 6
+    { 0x3040, 0xD7FF },
+    // 7
+    { 0xF900, 0xFD3D }, { 0xFD40, 0xFDCF }, { 0xFDF0, 0xFE44 },
+    { 0xFE47, 0xFFFD },
+    // 8
+    { 0x10000, 0x1FFFD }, { 0x20000, 0x2FFFD }, { 0x30000, 0x3FFFD },
+    { 0x40000, 0x4FFFD }, { 0x50000, 0x5FFFD }, { 0x60000, 0x6FFFD },
+    { 0x70000, 0x7FFFD }, { 0x80000, 0x8FFFD }, { 0x90000, 0x9FFFD },
+    { 0xA0000, 0xAFFFD }, { 0xB0000, 0xBFFFD }, { 0xC0000, 0xCFFFD },
+    { 0xD0000, 0xDFFFD }, { 0xE0000, 0xEFFFD }
+  };
+}
+
+static bool isAllowedIDChar(unsigned c) {
+  unsigned LowPoint = 0;
+  unsigned HighPoint = llvm::array_lengthof(UCNAllowedCharRanges);
+
+  // Binary search the UCNAllowedCharRanges set.
+  while (HighPoint != LowPoint) {
+    unsigned MidPoint = (HighPoint + LowPoint) / 2;
+    if (c < UCNAllowedCharRanges[MidPoint].Lower)
+      HighPoint = MidPoint;
+    else if (c > UCNAllowedCharRanges[MidPoint].Upper)
+      LowPoint = MidPoint + 1;
+    else
+      return true;
+  }
+
+  return false;
+}
+
+static bool isAllowedInitiallyIDChar(unsigned c) {
+  // C11 D.2, C++11 [charname.disallowed]
+  // FIXME: C99 only forbids "digits", presumably as described in C99 Annex D.
+  // FIXME: C++03 does not forbid any initial characters.
+  return !(0x0300 <= c && c <= 0x036F) &&
+         !(0x1DC0 <= c && c <= 0x1DFF) &&
+         !(0x20D0 <= c && c <= 0x20FF) &&
+         !(0xFE20 <= c && c <= 0xFE2F);
+}
+
+
 void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
   // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
   unsigned Size;
@@ -1520,11 +1589,11 @@ void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
 
   // Fast path, no $,\,? in identifier found.  '\' might be an escaped newline
   // or UCN, and ? might be a trigraph for '\', an escaped newline or UCN.
-  // FIXME: UCNs.
   //
   // TODO: Could merge these checks into a CharInfo flag to make the comparison
   // cheaper
-  if (C != '\\' && C != '?' && (C != '$' || !LangOpts.DollarIdents)) {
+  if (isascii(C) && C != '\\' && C != '?' &&
+      (C != '$' || !LangOpts.DollarIdents)) {
 FinishIdentifier:
     const char *IdStart = BufferPtr;
     FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
@@ -1561,8 +1630,39 @@ FinishIdentifier:
       CurPtr = ConsumeChar(CurPtr, Size, Result);
       C = getCharAndSize(CurPtr, Size);
       continue;
-    } else if (!isIdentifierBody(C)) { // FIXME: UCNs.
-      // Found end of identifier.
+
+    } else if (C == '\\') {
+      unsigned NextSize;
+      C = getCharAndSize(CurPtr+Size, NextSize);
+
+      if (C != 'u' && C != 'U')
+        goto FinishIdentifier;
+
+      const char *UCNPtr = CurPtr;
+      UCNPtr = ConsumeChar(UCNPtr, Size, Result);
+      UCNPtr = ConsumeChar(UCNPtr, NextSize, Result);
+
+      uint32_t CodePoint = readUCN(UCNPtr, C, Result);
+      if (CodePoint == 0 || !isAllowedIDChar(CodePoint))
+        goto FinishIdentifier;
+
+      CurPtr = UCNPtr;
+      C = getCharAndSize(CurPtr, Size);
+      continue;
+    } else if (!isascii(C)) {
+      const char *UnicodePtr = CurPtr;
+      UTF32 CodePoint;
+      ConversionResult Result = convertUTF8Sequence((const UTF8 **)&UnicodePtr,
+                                                    (const UTF8 *)BufferEnd,
+                                                    &CodePoint,
+                                                    strictConversion);
+      if (Result != conversionOK || !isAllowedIDChar(CodePoint))
+        goto FinishIdentifier;
+
+      CurPtr = UnicodePtr;
+      C = getCharAndSize(CurPtr, Size);
+      continue;
+    } else if (!isIdentifierBody(C)) {
       goto FinishIdentifier;
     }
 
@@ -1570,7 +1670,7 @@ FinishIdentifier:
     CurPtr = ConsumeChar(CurPtr, Size, Result);
 
     C = getCharAndSize(CurPtr, Size);
-    while (isIdentifierBody(C)) { // FIXME: UCNs.
+    while (isIdentifierBody(C)) {
       CurPtr = ConsumeChar(CurPtr, Size, Result);
       C = getCharAndSize(CurPtr, Size);
     }
@@ -1595,7 +1695,7 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
   unsigned Size;
   char C = getCharAndSize(CurPtr, Size);
   char PrevCh = 0;
-  while (isNumberBody(C)) { // FIXME: UCNs in ud-suffix.
+  while (isNumberBody(C)) {
     CurPtr = ConsumeChar(CurPtr, Size, Result);
     PrevCh = C;
     C = getCharAndSize(CurPtr, Size);
@@ -2592,6 +2692,131 @@ bool Lexer::isCodeCompletionPoint(const char *CurPtr) const {
   return false;
 }
 
+// FIXME: Move to MathExtras.h
+static unsigned hexDigitValue(char C) {
+  if (C >= '0' && C <= '9') return C-'0';
+  if (C >= 'a' && C <= 'f') return C-'a'+10;
+  assert(C >= 'A' && C <= 'F');
+  return C-'A'+10;
+}
+
+uint32_t Lexer::readUCN(const char *&CurPtr, char Kind, Token &Tok) {
+  assert(LangOpts.CPlusPlus || LangOpts.C99);
+  assert(Kind == 'u' || Kind == 'U');
+
+  Tok.setFlag(Token::HasUCN);
+
+  unsigned NumHexDigits;
+  if (Kind == 'u')
+    NumHexDigits = 4;
+  else
+    NumHexDigits = 8;
+
+  uint32_t CodePoint = 0;
+  for (unsigned i = 0; i < NumHexDigits; ++i) {
+    unsigned CharSize;
+    char C = getCharAndSize(CurPtr, CharSize);
+
+    if (!isxdigit(C)) {
+      if (!isLexingRawMode()) {
+        if (i == 0) {
+          // FIXME: The diagnostic message hard-codes "\u" (not "\U")
+          Diag(BufferPtr, diag::err_ucn_escape_no_digits);
+        } else {
+          // FIXME: if i == 4 and NumHexDigits == 8, suggest a fixit to \u.
+          Diag(BufferPtr, diag::err_ucn_escape_incomplete);
+        }
+      }
+      
+      return 0;
+    }
+
+    CodePoint <<= 4;
+    CodePoint += hexDigitValue(C);
+
+    CurPtr = ConsumeChar(CurPtr, CharSize, Tok);
+  }
+
+  // C99 6.4.3p2: A universal character name shall not specify a character whose
+  //   short identifier is less than 00A0 other than 0024 ($), 0040 (@), or
+  //   0060 (‘), nor one in the range D800 through DFFF inclusive.)
+  if (CodePoint < 0x9F) {
+    if (CodePoint == 0x24 || CodePoint == 0x40 || CodePoint == 0x60)
+      return CodePoint;
+
+    if (!isLexingRawMode()) {
+      if (iscntrl(CodePoint) || !isascii(CodePoint))
+        Diag(BufferPtr, diag::err_ucn_control_character);
+      else {
+        char C[] = {(char)CodePoint, '\0'};
+        Diag(BufferPtr, diag::err_ucn_escape_basic_scs) << C;
+      }
+    }
+
+    return 0;
+    
+  } else if ((!LangOpts.CPlusPlus || LangOpts.CPlusPlus11) &&
+             (CodePoint >= 0xD800 && CodePoint <= 0xDFFF)) {
+    // C++03 allows UCNs representing surrogate characters. C99 and C++11 don't.
+    if (!isLexingRawMode())
+      Diag(BufferPtr, diag::err_ucn_escape_invalid);
+    return 0;
+  }
+
+  return CodePoint;
+}
+
+static bool isUnicodeWhitespace(uint32_t C) {
+  return (C == 0x0085 || C == 0x00A0 || C == 0x1680 ||
+          C == 0x180E || (C >= 0x2000 && C <= 0x200A) ||
+          C == 0x2028 || C == 0x2029 || C == 0x202F ||
+          C == 0x205F || C == 0x3000);
+}
+
+void Lexer::LexUnicode(Token &Result, uint32_t C, const char *CurPtr) {
+  if (isUnicodeWhitespace(C)) {
+    if (!isLexingRawMode()) {
+      CharSourceRange CharRange =
+        CharSourceRange::getCharRange(getSourceLocation(),
+                                      getSourceLocation(CurPtr));
+      Diag(BufferPtr, diag::ext_unicode_whitespace)
+        << CharRange;
+    }
+
+    Result.setFlag(Token::LeadingSpace);
+    if (SkipWhitespace(Result, CurPtr))
+      return; // KeepWhitespaceMode
+
+    return LexTokenInternal(Result);
+  }
+
+  if (isAllowedIDChar(C) && isAllowedInitiallyIDChar(C)) {
+    MIOpt.ReadToken();
+    return LexIdentifier(Result, CurPtr);
+  }
+
+  if (!isascii(*BufferPtr) && !isAllowedIDChar(C)) {
+    // Non-ASCII characters tend to creep into source code unintentionally.
+    // Instead of letting the parser complain about the unknown token,
+    // just drop the character.
+    if (!isLexingRawMode()) {
+      CharSourceRange CharRange =
+        CharSourceRange::getCharRange(getSourceLocation(),
+                                      getSourceLocation(CurPtr));
+      Diag(BufferPtr, diag::err_non_ascii)
+        << FixItHint::CreateRemoval(CharRange);
+    }
+
+    BufferPtr = CurPtr;
+    return LexTokenInternal(Result);
+  }
+
+  // Otherwise, we have an explicit UCN or a character that's unlikely to show
+  // up by accident.
+  MIOpt.ReadToken();
+  FormTokenWithChars(Result, CurPtr, tok::unknown);
+}
+
 
 /// LexTokenInternal - This implements a simple C family lexer.  It is an
 /// extremely performance critical piece of code.  This assumes that the buffer
@@ -3243,12 +3468,45 @@ LexNextToken:
       Kind = tok::unknown;
     break;
 
+  // UCNs (C99 6.4.3, C++11 [lex.charset]p2)
   case '\\':
-    // FIXME: UCN's.
-    // FALL THROUGH.
-  default:
+    Char = getCharAndSize(CurPtr, SizeTmp);
+    if (Char == 'u' || Char == 'U') {
+      CurPtr = ConsumeChar(CurPtr, SizeTmp, Result);
+      if (uint32_t CodePoint = readUCN(CurPtr, Char, Result))
+        return LexUnicode(Result, CodePoint, CurPtr);
+    }
+
     Kind = tok::unknown;
     break;
+
+  default: {
+    if (isascii(Char)) {
+      Kind = tok::unknown;
+      break;
+    }
+
+    UTF32 CodePoint;
+
+    // We can't just reset CurPtr to BufferPtr because BufferPtr may point to
+    // an escaped newline.
+    --CurPtr;
+    ConversionResult Status = convertUTF8Sequence((const UTF8 **)&CurPtr,
+                                                  (const UTF8 *)BufferEnd,
+                                                  &CodePoint,
+                                                  strictConversion);
+    if (Status == conversionOK)
+      return LexUnicode(Result, CodePoint, CurPtr);
+    
+    // Non-ASCII characters tend to creep into source code unintentionally.
+    // Instead of letting the parser complain about the unknown token,
+    // just warn that we don't have valid UTF-8, then drop the character.
+    if (!isLexingRawMode())
+      Diag(CurPtr, diag::err_invalid_utf8);
+
+    BufferPtr = CurPtr+1;
+    goto LexNextToken;
+  }
   }
 
   // Notify MIOpt that we read a non-whitespace/non-comment token.
diff --git lib/Lex/Preprocessor.cpp lib/Lex/Preprocessor.cpp
index df2c98d..7e3a537 100644
--- lib/Lex/Preprocessor.cpp
+++ lib/Lex/Preprocessor.cpp
@@ -27,6 +27,7 @@
 
 #include "clang/Lex/Preprocessor.h"
 #include "MacroArgs.h"
+#include "clang/Basic/ConvertUTF.h"
 #include "clang/Basic/FileManager.h"
 #include "clang/Basic/SourceManager.h"
 #include "clang/Basic/TargetInfo.h"
@@ -43,6 +44,7 @@
 #include "clang/Lex/ScratchBuffer.h"
 #include "llvm/ADT/APFloat.h"
 #include "llvm/ADT/SmallString.h"
+#include "llvm/ADT/STLExtras.h"
 #include "llvm/Support/Capacity.h"
 #include "llvm/Support/MemoryBuffer.h"
 #include "llvm/Support/raw_ostream.h"
@@ -399,7 +401,7 @@ StringRef Preprocessor::getSpelling(const Token &Tok,
                                           SmallVectorImpl<char> &Buffer,
                                           bool *Invalid) const {
   // NOTE: this has to be checked *before* testing for an IdentifierInfo.
-  if (Tok.isNot(tok::raw_identifier)) {
+  if (Tok.isNot(tok::raw_identifier) && !(Tok.getFlags() & Token::HasUCN)) {
     // Try the fast path.
     if (const IdentifierInfo *II = Tok.getIdentifierInfo())
       return II->getName();
@@ -497,6 +499,24 @@ void Preprocessor::EndSourceFile() {
 // Lexer Event Handling.
 //===----------------------------------------------------------------------===//
 
+// FIXME: Move to MathExtras.h
+static int hexDigitValue(char C) {
+  if (C >= '0' && C <= '9') return C-'0';
+  if (C >= 'a' && C <= 'f') return C-'a'+10;
+  assert(C >= 'A' && C <= 'F');
+  return C-'A'+10;
+}
+
+static void appendCodePoint(unsigned Codepoint,
+                            llvm::SmallVectorImpl<char> &Str) {
+  char ResultBuf[4];
+  char *ResultPtr = ResultBuf;
+  bool Res = ConvertCodePointToUTF8(Codepoint, ResultPtr);
+  (void)Res;
+  assert(Res && "Unexpected conversion failure");
+  Str.append(ResultBuf, ResultPtr);
+}
+
 /// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
 /// identifier information for the token and install it into the token,
 /// updating the token kind accordingly.
@@ -505,14 +525,51 @@ IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
 
   // Look up this token, see if it is a macro, or if it is a language keyword.
   IdentifierInfo *II;
-  if (!Identifier.needsCleaning()) {
+  if (!Identifier.needsCleaning() && !(Identifier.getFlags() & Token::HasUCN)) {
     // No cleaning needed, just use the characters from the lexed buffer.
     II = getIdentifierInfo(StringRef(Identifier.getRawIdentifierData(),
-                                           Identifier.getLength()));
+                                     Identifier.getLength()));
   } else {
     // Cleaning needed, alloca a buffer, clean into it, then use the buffer.
     SmallString<64> IdentifierBuffer;
     StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
+
+    SmallString<64> UCNIdentifierBuffer;
+    if (Identifier.getFlags() & Token::HasUCN) {
+      for (StringRef::iterator I = CleanedStr.begin(), E = CleanedStr.end();
+           I != E; ++I) {
+        if (*I != '\\') {
+          UCNIdentifierBuffer.push_back(*I);
+          continue;
+        }
+
+        ++I;
+        
+        unsigned NumHexDigits;
+        if (*I == 'u')
+          NumHexDigits = 4;
+        else
+          NumHexDigits = 8;
+
+        uint32_t CodePoint = 0;
+        for (++I; NumHexDigits != 0 && I != E; ++I, --NumHexDigits) {
+          if (!isxdigit(*I))
+            break;
+          
+          CodePoint <<= 4;
+          CodePoint += hexDigitValue(*I);
+        }
+
+        if (NumHexDigits != 0)
+          CodePoint = UNI_REPLACEMENT_CHAR;
+
+        appendCodePoint(CodePoint, UCNIdentifierBuffer);
+        --I;
+      }
+
+      CleanedStr = UCNIdentifierBuffer;
+    }
+
     II = getIdentifierInfo(CleanedStr);
   }
 
diff --git test/CXX/over/over.oper/over.literal/p8.cpp test/CXX/over/over.oper/over.literal/p8.cpp
index 6f63610..70a1843 100644
--- test/CXX/over/over.oper/over.literal/p8.cpp
+++ test/CXX/over/over.oper/over.literal/p8.cpp
@@ -7,8 +7,7 @@ namespace std {
 
 void operator "" _km(long double); // ok
 string operator "" _i18n(const char*, std::size_t); // ok
-// FIXME: This should be accepted once we support UCNs
-template<char...> int operator "" \u03C0(); // ok, UCN for lowercase pi // expected-error {{expected identifier}}
+template<char...> int operator "" \u03C0(); // ok, UCN for lowercase pi // expected-warning {{reserved}}
 float operator ""E(const char *); // expected-error {{invalid suffix on literal}} expected-warning {{reserved}}
 float operator " " B(const char *); // expected-error {{must be '""'}} expected-warning {{reserved}}
 string operator "" 5X(const char *, std::size_t); // expected-error {{expected identifier}}
diff --git test/FixIt/fixit-unicode.c test/FixIt/fixit-unicode.c
index 2af5e08..c45ba06 100644
--- test/FixIt/fixit-unicode.c
+++ test/FixIt/fixit-unicode.c
@@ -8,13 +8,15 @@ struct Foo {
 // PR13312
 void test1() {
   struct Foo foo;
-  (&foo)☃>bar = 42;
+  foo.bar = 42☃
+// CHECK: error: non-ASCII characters are not allowed outside of literals and identifiers
+// CHECK: {{^              \^}}
 // CHECK: error: expected ';' after expression
 // Make sure we emit the fixit right in front of the snowman.
-// CHECK: {{^        \^}}
-// CHECK: {{^        ;}}
+// CHECK: {{^              \^}}
+// CHECK: {{^              ;}}
 
-// CHECK-MACHINE: fix-it:"{{.*}}fixit-unicode.c":{11:9-11:9}:";"
+// CHECK-MACHINE: fix-it:"{{.*}}fixit-unicode.c":{[[@LINE-8]]:15-[[@LINE-8]]:15}:";"
 }
 
 
@@ -29,5 +31,5 @@ void test2() {
 // because different systems will render the delta differently (either as a
 // character, or as <U+2206>.) The fixit should line up with the %d regardless.
 
-// CHECK-MACHINE: fix-it:"{{.*}}fixit-unicode.c":{23:16-23:18}:"%ld"
+// CHECK-MACHINE: fix-it:"{{.*}}fixit-unicode.c":{[[@LINE-9]]:16-[[@LINE-9]]:18}:"%ld"
 }
diff --git test/Preprocessor/ucn-pp-identifier.c test/Preprocessor/ucn-pp-identifier.c
new file mode 100644
index 0000000..0bc6e6c
--- /dev/null
+++ test/Preprocessor/ucn-pp-identifier.c
@@ -0,0 +1,70 @@
+// RUN: %clang_cc1 %s -fsyntax-only -std=c99 -pedantic -verify -Wundef
+// RUN: %clang_cc1 %s -fsyntax-only -x c++ -pedantic -verify -Wundef
+
+#define \u00FC
+#define a\u00FD() 0
+#ifndef \u00FC
+#error "This should never happen"
+#endif
+
+#if a\u00FD()
+#error "This should never happen"
+#endif
+
+#if a\U000000FD()
+#error "This should never happen"
+#endif
+
+#if \uarecool // expected-error{{incomplete universal character name}} expected-error {{invalid token at start of a preprocessor expression}}
+#endif
+#if \U0001000  // expected-error{{incomplete universal character name}} expected-error {{invalid token at start of a preprocessor expression}}
+#endif
+
+// Make sure we reject disallowed UCNs
+#define \ufffe // expected-error {{macro names must be identifiers}}
+#define \U10000000  // expected-error {{macro names must be identifiers}}
+#define \u0061  // expected-error {{character 'a' cannot be specified by a universal character name}} expected-error {{macro names must be identifiers}}
+
+// FIXME: Not clear what our behavior should be here; \u0024 is "$".
+#define a\u0024  // expected-warning {{whitespace}}
+
+#if \u0110 // expected-warning {{is not defined, evaluates to 0}}
+#endif
+
+
+#define \u0110 1 / 0
+#if \u0110 // expected-error {{division by zero in preprocessor expression}}
+#endif
+
+#define STRINGIZE(X) # X
+
+extern int check_size[sizeof(STRINGIZE(\u0112)) == 3 ? 1 : -1];
+
+
+#ifndef __cplusplus
+
+#define newline_1_\u00F\
+C 1
+#define newline_2_\u00\
+F\
+C 1
+#define newline_3_\u\
+00\
+FC 1
+#define newline_4_\\
+u00FC 1
+#define newline_5_\\
+u\
+\
+0\
+0\
+F\
+C 1
+
+#if (newline_1_\u00FC && newline_2_\u00FC && newline_3_\u00FC && \
+     newline_4_\u00FC && newline_5_\u00FC)
+#else
+#error "Line splicing failed to produce UCNs"
+#endif
+
+#endif
