[PATCH] D105759: [WIP] Implement P2361 Unevaluated string literals

Corentin Jabot via Phabricator via cfe-commits Sat, 10 Jul 2021 06:54:01 -0700

cor3ntin updated this revision to Diff 357714.
cor3ntin added a comment.

clang-format



Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D105759/new/

https://reviews.llvm.org/D105759

Files:
  clang/include/clang/AST/Expr.h
  clang/include/clang/Basic/DiagnosticLexKinds.td
  clang/include/clang/Lex/LiteralSupport.h
  clang/include/clang/Parse/Parser.h
  clang/include/clang/Sema/Sema.h
  clang/lib/AST/Expr.cpp
  clang/lib/Frontend/FrontendAction.cpp
  clang/lib/Lex/LiteralSupport.cpp
  clang/lib/Lex/PPDirectives.cpp
  clang/lib/Lex/PPMacroExpansion.cpp
  clang/lib/Lex/Pragma.cpp
  clang/lib/Parse/ParseDecl.cpp
  clang/lib/Parse/ParseDeclCXX.cpp
  clang/lib/Parse/ParseExpr.cpp
  clang/lib/Parse/Parser.cpp
  clang/lib/Sema/SemaDeclCXX.cpp
  clang/lib/Sema/SemaExpr.cpp
  clang/lib/Sema/SemaExprCXX.cpp
  clang/lib/Sema/SemaInit.cpp
  clang/lib/Sema/SemaStmtAsm.cpp
  clang/test/CXX/dcl.dcl/dcl.link/p2.cpp
  clang/test/CXX/dcl.dcl/p4-0x.cpp
  clang/test/FixIt/fixit-static-assert.cpp
  clang/test/Parser/asm.c
  clang/test/Parser/asm.cpp
  clang/test/Parser/attr-availability.c
  clang/test/Sema/asm.c
  clang/test/SemaCXX/static-assert.cpp

Index: clang/test/SemaCXX/static-assert.cpp
===================================================================
--- clang/test/SemaCXX/static-assert.cpp
+++ clang/test/SemaCXX/static-assert.cpp
@@ -28,12 +28,15 @@
 S<char> s1; // expected-note {{in instantiation of template class 'S<char>' requested here}}
 S<int> s2;
 
-static_assert(false, L"\xFFFFFFFF"); // expected-error {{static_assert failed L"\xFFFFFFFF"}}
-static_assert(false, u"\U000317FF"); // expected-error {{static_assert failed u"\U000317FF"}}
+static_assert(false, L"\xFFFFFFFF"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} expected-error {{hex escape sequence out of range}}
+static_assert(false, u"\U000317FF"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
 // FIXME: render this as u8"\u03A9"
-static_assert(false, u8"Î©"); // expected-error {{static_assert failed u8"\316\251"}}
-static_assert(false, L"\u1234"); // expected-error {{static_assert failed L"\x1234"}}
-static_assert(false, L"\x1ff" "0\x123" "fx\xfffff" "goop"); // expected-error {{static_assert failed L"\x1FF""0\x123""fx\xFFFFFgoop"}}
+static_assert(false, u8"Î©");     // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
+static_assert(false, L"\u1234"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
+static_assert(false, L"\x1ff"
+                     "0\x123"
+                     "fx\xfffff"
+                     "goop"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}} expected-error 3{{hex escape sequence out of range}}
 
 template<typename T> struct AlwaysFails {
   // Only give one error here.
Index: clang/test/Sema/asm.c
===================================================================
--- clang/test/Sema/asm.c
+++ clang/test/Sema/asm.c
@@ -37,14 +37,19 @@
   asm ("nop" : "=c" (a) : "r" (no_clobber_conflict) : "%rcx"); // expected-error {{asm-specifier for input or output variable conflicts with asm clobber list}}
   asm ("nop" : "=r" (no_clobber_conflict) : "c" (c) : "%rcx"); // expected-error {{asm-specifier for input or output variable conflicts with asm clobber list}}
   asm ("nop" : "=r" (clobber_conflict) : "c" (c) : "%rcx"); // expected-error {{asm-specifier for input or output variable conflicts with asm clobber list}}
-  asm ("nop" : "=a" (a) : "b" (b) : "%rcx", "%rbx"); // expected-error {{asm-specifier for input or output variable conflicts with asm clobber list}} 
+  asm("nop"
+      : "=a"(a)
+      : "b"(b)
+      : "%rcx", "%rbx"); // expected-error {{asm-specifier for input or output variable conflicts with asm clobber list}}
 }
 
 // rdar://6094010
 void test3() {
   int x;
-  asm(L"foo" : "=r"(x)); // expected-error {{wide string}}
-  asm("foo" : L"=r"(x)); // expected-error {{wide string}}
+  asm(L"foo"
+      : "=r"(x)); // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
+  asm("foo"
+      : L"=r"(x)); // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
 }
 
 // <rdar://problem/6156893>
Index: clang/test/Parser/attr-availability.c
===================================================================
--- clang/test/Parser/attr-availability.c
+++ clang/test/Parser/attr-availability.c
@@ -20,15 +20,18 @@
 
 void f7() __attribute__((availability(macosx,message=L"wide"))); // expected-error {{expected string literal for optional message in 'availability' attribute}}
 
-void f8() __attribute__((availability(macosx,message="a" L"b"))); // expected-error {{expected string literal for optional message in 'availability' attribute}}
+void f8() __attribute__((availability(macosx, message = "a"
+                                                        L"b"))); // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
 
 void f9() __attribute__((availability(macosx,message=u8"b"))); // expected-error {{expected string literal for optional message in 'availability' attribute}}
 
-void f10() __attribute__((availability(macosx,message="a" u8"b"))); // expected-error {{expected string literal for optional message in 'availability' attribute}}
+void f10() __attribute__((availability(macosx, message = "a"
+                                                         u8"b"))); // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
 
 void f11() __attribute__((availability(macosx,message=u"b"))); // expected-error {{expected string literal for optional message in 'availability' attribute}}
 
-void f12() __attribute__((availability(macosx,message="a" u"b"))); // expected-error {{expected string literal for optional message in 'availability' attribute}}
+void f12() __attribute__((availability(macosx, message = "a"
+                                                         u"b"))); // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
 
 // rdar://10095131
 enum E{
Index: clang/test/Parser/asm.cpp
===================================================================
--- clang/test/Parser/asm.cpp
+++ clang/test/Parser/asm.cpp
@@ -1,9 +1,10 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -std=c++11 %s
 
 int foo1 asm ("bar1");
-int foo2 asm (L"bar2"); // expected-error {{cannot use wide string literal in 'asm'}}
-int foo3 asm (u8"bar3"); // expected-error {{cannot use unicode string literal in 'asm'}}
-int foo4 asm (u"bar4"); // expected-error {{cannot use unicode string literal in 'asm'}}
-int foo5 asm (U"bar5"); // expected-error {{cannot use unicode string literal in 'asm'}}
+int foo2 asm(L"bar2");   // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
+int foo3 asm(u8"bar3");  // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
+int foo4 asm(u"bar4");   // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
+int foo5 asm(U"bar5");   // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
 int foo6 asm ("bar6"_x); // expected-error {{string literal with user-defined suffix cannot be used here}}
-int foo6 asm ("" L"bar7"); // expected-error {{cannot use wide string literal in 'asm'}}
+int foo6 asm(""
+             L"bar7"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
Index: clang/test/Parser/asm.c
===================================================================
--- clang/test/Parser/asm.c
+++ clang/test/Parser/asm.c
@@ -11,7 +11,9 @@
 
 void f2() {
   asm("foo" : "=r" (a)); // expected-error {{use of undeclared identifier 'a'}}
-  asm("foo" : : "r" (b)); // expected-error {{use of undeclared identifier 'b'}} 
+  asm("foo"
+      :
+      : "r"(b)); // expected-error {{use of undeclared identifier 'b'}}
 }
 
 void a() __asm__(""); // expected-error {{cannot use an empty string literal in 'asm'}}
@@ -23,7 +25,7 @@
 __asm ; // expected-error {{expected '(' after 'asm'}}
 
 // <rdar://problem/10465079> - Don't crash on wide string literals in 'asm'.
-int foo asm (L"bar"); // expected-error {{cannot use wide string literal in 'asm'}}
+int foo asm(L"bar"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
 
 asm() // expected-error {{expected string literal in 'asm'}}
 // expected-error@-1 {{expected ';' after top-level asm block}}
Index: clang/test/FixIt/fixit-static-assert.cpp
===================================================================
--- clang/test/FixIt/fixit-static-assert.cpp
+++ clang/test/FixIt/fixit-static-assert.cpp
@@ -11,8 +11,6 @@
 // String literal prefixes are good.
 static_assert(true && R"(RawString)");
 // CHECK-DAG: {[[@LINE-1]]:20-[[@LINE-1]]:22}:","
-static_assert(true && L"RawString");
-// CHECK-DAG: {[[@LINE-1]]:20-[[@LINE-1]]:22}:","
 
 static_assert(true);
 // CHECK-DAG: {[[@LINE-1]]:19-[[@LINE-1]]:19}:", \"\""
Index: clang/test/CXX/dcl.dcl/p4-0x.cpp
===================================================================
--- clang/test/CXX/dcl.dcl/p4-0x.cpp
+++ clang/test/CXX/dcl.dcl/p4-0x.cpp
@@ -18,4 +18,6 @@
 static_assert(T(), "");
 static_assert(U(), ""); // expected-error {{ambiguous}}
 
-static_assert(false, L"\x14hi" "!" R"x(")x"); // expected-error {{static_assert failed L"\024hi!\""}}
+static_assert(false, L"\x14hi"
+                     "!"
+                     R"x(")x"); // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
Index: clang/test/CXX/dcl.dcl/dcl.link/p2.cpp
===================================================================
--- clang/test/CXX/dcl.dcl/dcl.link/p2.cpp
+++ clang/test/CXX/dcl.dcl/dcl.link/p2.cpp
@@ -8,7 +8,7 @@
 extern "C" plusplus {
 }
 
-extern u8"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}}
-extern L"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}}
-extern u"C++" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}}
-extern U"C" {} // expected-error {{string literal in language linkage specifier cannot have an encoding-prefix}}
+extern u8"C" {}  // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
+extern L"C" {}   // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
+extern u"C++" {} // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
+extern U"C" {}   // expected-error {{an unevaluated string literal cannot have an encoding prefix}}
Index: clang/lib/Sema/SemaStmtAsm.cpp
===================================================================
--- clang/lib/Sema/SemaStmtAsm.cpp
+++ clang/lib/Sema/SemaStmtAsm.cpp
@@ -254,7 +254,7 @@
   SmallVector<TargetInfo::ConstraintInfo, 4> OutputConstraintInfos;
 
   // The parser verifies that there is a string literal here.
-  assert(AsmString->isAscii());
+  assert(AsmString->isUnevaluated());
 
   FunctionDecl *FD = dyn_cast<FunctionDecl>(getCurLexicalContext());
   llvm::StringMap<bool> FeatureMap;
@@ -262,7 +262,7 @@
 
   for (unsigned i = 0; i != NumOutputs; i++) {
     StringLiteral *Literal = Constraints[i];
-    assert(Literal->isAscii());
+    assert(Literal->isUnevaluated());
 
     StringRef OutputName;
     if (Names[i])
@@ -353,7 +353,7 @@
 
   for (unsigned i = NumOutputs, e = NumOutputs + NumInputs; i != e; i++) {
     StringLiteral *Literal = Constraints[i];
-    assert(Literal->isAscii());
+    assert(Literal->isUnevaluated());
 
     StringRef InputName;
     if (Names[i])
@@ -458,7 +458,7 @@
   // Check that the clobbers are valid.
   for (unsigned i = 0; i != NumClobbers; i++) {
     StringLiteral *Literal = Clobbers[i];
-    assert(Literal->isAscii());
+    assert(Literal->isUnevaluated());
 
     StringRef Clobber = Literal->getString();
 
Index: clang/lib/Sema/SemaInit.cpp
===================================================================
--- clang/lib/Sema/SemaInit.cpp
+++ clang/lib/Sema/SemaInit.cpp
@@ -129,6 +129,9 @@
     if (IsWideCharCompatible(ElemTy, Context))
       return SIF_IncompatWideStringIntoWideChar;
     return SIF_Other;
+  case StringLiteral::Unevaluated:
+    assert(false && "Unevaluated string literal in initialization");
+    break;
   }
 
   llvm_unreachable("missed a StringLiteral kind?");
Index: clang/lib/Sema/SemaExprCXX.cpp
===================================================================
--- clang/lib/Sema/SemaExprCXX.cpp
+++ clang/lib/Sema/SemaExprCXX.cpp
@@ -3960,6 +3960,9 @@
             case StringLiteral::Wide:
               return Context.typesAreCompatible(Context.getWideCharType(),
                                                 QualType(ToPointeeType, 0));
+            case StringLiteral::Unevaluated:
+              assert(false && "Unevaluated string literal in expression");
+              break;
           }
         }
       }
Index: clang/lib/Sema/SemaExpr.cpp
===================================================================
--- clang/lib/Sema/SemaExpr.cpp
+++ clang/lib/Sema/SemaExpr.cpp
@@ -1791,6 +1791,29 @@
   return S.BuildLiteralOperatorCall(R, OpNameInfo, Args, LitEndLoc);
 }
 
+ExprResult Sema::ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks) {
+  StringLiteralParser Literal(StringToks, PP, true);
+  if (Literal.hadError)
+    return ExprError();
+
+  SmallVector<SourceLocation, 4> StringTokLocs;
+  for (const Token &Tok : StringToks)
+    StringTokLocs.push_back(Tok.getLocation());
+
+  StringLiteral *Lit = StringLiteral::Create(
+      Context, Literal.GetString(), StringLiteral::Unevaluated, false, {},
+      &StringTokLocs[0], StringTokLocs.size());
+
+  if (!Literal.getUDSuffix().empty()) {
+    SourceLocation UDSuffixLoc =
+        getUDSuffixLoc(*this, StringTokLocs[Literal.getUDSuffixToken()],
+                       Literal.getUDSuffixOffset());
+    return ExprError(Diag(UDSuffixLoc, diag::err_invalid_string_udl));
+  }
+
+  return Lit;
+}
+
 /// ActOnStringLiteral - The specified tokens were lexed as pasted string
 /// fragments (e.g. "foo" "bar" L"baz").  The result string has to handle string
 /// concatenation ([C99 5.1.1.2, translation phase #6]), so it may come from
Index: clang/lib/Sema/SemaDeclCXX.cpp
===================================================================
--- clang/lib/Sema/SemaDeclCXX.cpp
+++ clang/lib/Sema/SemaDeclCXX.cpp
@@ -16043,11 +16043,7 @@
                                            Expr *LangStr,
                                            SourceLocation LBraceLoc) {
   StringLiteral *Lit = cast<StringLiteral>(LangStr);
-  if (!Lit->isAscii()) {
-    Diag(LangStr->getExprLoc(), diag::err_language_linkage_spec_not_ascii)
-      << LangStr->getSourceRange();
-    return nullptr;
-  }
+  assert(Lit->isUnevaluated() && "Unexpected string literal kind");
 
   StringRef Lang = Lit->getString();
   LinkageSpecDecl::LanguageIDs Language;
Index: clang/lib/Parse/Parser.cpp
===================================================================
--- clang/lib/Parse/Parser.cpp
+++ clang/lib/Parse/Parser.cpp
@@ -1527,15 +1527,9 @@
     return ExprError();
   }
 
-  ExprResult AsmString(ParseStringLiteralExpression());
+  ExprResult AsmString(ParseUnevaluatedStringLiteralExpression());
   if (!AsmString.isInvalid()) {
     const auto *SL = cast<StringLiteral>(AsmString.get());
-    if (!SL->isAscii()) {
-      Diag(Tok, diag::err_asm_operand_wide_string_literal)
-        << SL->isWide()
-        << SL->getSourceRange();
-      return ExprError();
-    }
     if (ForAsmLabel && SL->getString().empty()) {
       Diag(Tok, diag::err_asm_operand_wide_string_literal)
           << 2 /* an empty */ << SL->getSourceRange();
Index: clang/lib/Parse/ParseExpr.cpp
===================================================================
--- clang/lib/Parse/ParseExpr.cpp
+++ clang/lib/Parse/ParseExpr.cpp
@@ -3164,6 +3164,15 @@
 ///         string-literal
 /// \verbatim
 ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral) {
+  return ParseStringLiteralExpression(AllowUserDefinedLiteral, false);
+}
+
+ExprResult Parser::ParseUnevaluatedStringLiteralExpression() {
+  return ParseStringLiteralExpression(false, true);
+}
+
+ExprResult Parser::ParseStringLiteralExpression(bool AllowUserDefinedLiteral,
+                                                bool Unevaluated) {
   assert(isTokenStringLiteral() && "Not a string literal!");
 
   // String concat.  Note that keywords like __func__ and __FUNCTION__ are not
@@ -3175,6 +3184,11 @@
     ConsumeStringToken();
   } while (isTokenStringLiteral());
 
+  if (Unevaluated) {
+    assert(!AllowUserDefinedLiteral && "UDL are always evaluated");
+    return Actions.ActOnUnevaluatedStringLiteral(StringToks);
+  }
+
   // Pass the set of string tokens, ready for concatenation, to the actions.
   return Actions.ActOnStringLiteral(StringToks,
                                     AllowUserDefinedLiteral ? getCurScope()
Index: clang/lib/Parse/ParseDeclCXX.cpp
===================================================================
--- clang/lib/Parse/ParseDeclCXX.cpp
+++ clang/lib/Parse/ParseDeclCXX.cpp
@@ -334,7 +334,7 @@
 ///
 Decl *Parser::ParseLinkage(ParsingDeclSpec &DS, DeclaratorContext Context) {
   assert(isTokenStringLiteral() && "Not a string literal!");
-  ExprResult Lang = ParseStringLiteralExpression(false);
+  ExprResult Lang = ParseUnevaluatedStringLiteralExpression();
 
   ParseScope LinkageScope(this, Scope::DeclScope);
   Decl *LinkageSpec =
@@ -967,7 +967,7 @@
       return nullptr;
     }
 
-    AssertMessage = ParseStringLiteralExpression();
+    AssertMessage = ParseUnevaluatedStringLiteralExpression();
     if (AssertMessage.isInvalid()) {
       SkipMalformedDecl();
       return nullptr;
@@ -4510,7 +4510,7 @@
     Toks[0].setLiteralData(StrBuffer.data());
     Toks[0].setLength(StrBuffer.size());
     StringLiteral *UuidString =
-        cast<StringLiteral>(Actions.ActOnStringLiteral(Toks, nullptr).get());
+        cast<StringLiteral>(Actions.ActOnStringLiteral(Toks).get());
     ArgExprs.push_back(UuidString);
   }
 
Index: clang/lib/Parse/ParseDecl.cpp
===================================================================
--- clang/lib/Parse/ParseDecl.cpp
+++ clang/lib/Parse/ParseDecl.cpp
@@ -1137,20 +1137,9 @@
         return;
       }
       if (Keyword == Ident_message)
-        MessageExpr = ParseStringLiteralExpression();
+        MessageExpr = ParseUnevaluatedStringLiteralExpression();
       else
-        ReplacementExpr = ParseStringLiteralExpression();
-      // Also reject wide string literals.
-      if (StringLiteral *MessageStringLiteral =
-              cast_or_null<StringLiteral>(MessageExpr.get())) {
-        if (!MessageStringLiteral->isAscii()) {
-          Diag(MessageStringLiteral->getSourceRange().getBegin(),
-               diag::err_expected_string_literal)
-            << /*Source='availability attribute'*/ 2;
-          SkipUntil(tok::r_paren, StopAtSemi);
-          return;
-        }
-      }
+        ReplacementExpr = ParseUnevaluatedStringLiteralExpression();
       if (Keyword == Ident_message)
         break;
       else
@@ -1329,19 +1318,19 @@
       if (HadLanguage) {
         Diag(KeywordLoc, diag::err_external_source_symbol_duplicate_clause)
             << Keyword;
-        ParseStringLiteralExpression();
+        ParseUnevaluatedStringLiteralExpression();
         continue;
       }
-      Language = ParseStringLiteralExpression();
+      Language = ParseUnevaluatedStringLiteralExpression();
     } else {
       assert(Keyword == Ident_defined_in && "Invalid clause keyword!");
       if (HadDefinedIn) {
         Diag(KeywordLoc, diag::err_external_source_symbol_duplicate_clause)
             << Keyword;
-        ParseStringLiteralExpression();
+        ParseUnevaluatedStringLiteralExpression();
         continue;
       }
-      DefinedInExpr = ParseStringLiteralExpression();
+      DefinedInExpr = ParseUnevaluatedStringLiteralExpression();
     }
   } while (TryConsumeToken(tok::comma));
 
Index: clang/lib/Lex/Pragma.cpp
===================================================================
--- clang/lib/Lex/Pragma.cpp
+++ clang/lib/Lex/Pragma.cpp
@@ -1082,7 +1082,7 @@
       if (DiagName.is(tok::eod))
         PP.getDiagnostics().dump();
       else if (DiagName.is(tok::string_literal) && !DiagName.hasUDSuffix()) {
-        StringLiteralParser Literal(DiagName, PP);
+        StringLiteralParser Literal(DiagName, PP, /*Unevaluated*/ true);
         if (Literal.hadError)
           return;
         PP.getDiagnostics().dump(Literal.GetString());
Index: clang/lib/Lex/PPMacroExpansion.cpp
===================================================================
--- clang/lib/Lex/PPMacroExpansion.cpp
+++ clang/lib/Lex/PPMacroExpansion.cpp
@@ -1815,7 +1815,7 @@
     if (!Tok.isAnnotation() && Tok.getIdentifierInfo())
       Tok.setKind(tok::identifier);
     else if (Tok.is(tok::string_literal) && !Tok.hasUDSuffix()) {
-      StringLiteralParser Literal(Tok, *this);
+      StringLiteralParser Literal(Tok, *this, /*Unevaluated*/ true);
       if (Literal.hadError)
         return;
 
Index: clang/lib/Lex/PPDirectives.cpp
===================================================================
--- clang/lib/Lex/PPDirectives.cpp
+++ clang/lib/Lex/PPDirectives.cpp
@@ -1280,17 +1280,11 @@
     return;
   } else {
     // Parse and validate the string, converting it into a unique ID.
-    StringLiteralParser Literal(StrTok, *this);
-    assert(Literal.isAscii() && "Didn't allow wide strings in");
+    StringLiteralParser Literal(StrTok, *this, /*Unevaluated*/ true);
     if (Literal.hadError) {
       DiscardUntilEndOfDirective();
       return;
     }
-    if (Literal.Pascal) {
-      Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
-      DiscardUntilEndOfDirective();
-      return;
-    }
     FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
 
     // Verify that there is nothing after the string, other than EOD.  Because
@@ -1430,17 +1424,11 @@
     return;
   } else {
     // Parse and validate the string, converting it into a unique ID.
-    StringLiteralParser Literal(StrTok, *this);
-    assert(Literal.isAscii() && "Didn't allow wide strings in");
+    StringLiteralParser Literal(StrTok, *this, /*Unevaluated*/ true);
     if (Literal.hadError) {
       DiscardUntilEndOfDirective();
       return;
     }
-    if (Literal.Pascal) {
-      Diag(StrTok, diag::err_pp_linemarker_invalid_filename);
-      DiscardUntilEndOfDirective();
-      return;
-    }
     FilenameID = SourceMgr.getLineTableFilenameID(Literal.GetString());
 
     // If a filename was present, read any flags that are present.
Index: clang/lib/Lex/LiteralSupport.cpp
===================================================================
--- clang/lib/Lex/LiteralSupport.cpp
+++ clang/lib/Lex/LiteralSupport.cpp
@@ -86,14 +86,26 @@
     MakeCharSourceRange(Features, TokLoc, TokBegin, TokRangeBegin, TokRangeEnd);
 }
 
+static bool EscapeValidInUnevaluatedStringLiteral(char Escape) {
+  switch (Escape) {
+  case '\\':
+  case '\'':
+  case '"':
+  case '?':
+  case 'n':
+  case 't':
+    return true;
+  }
+  return false;
+}
+
 /// ProcessCharEscape - Parse a standard C escape sequence, which can occur in
 /// either a character or a string literal.
-static unsigned ProcessCharEscape(const char *ThisTokBegin,
-                                  const char *&ThisTokBuf,
-                                  const char *ThisTokEnd, bool &HadError,
-                                  FullSourceLoc Loc, unsigned CharWidth,
-                                  DiagnosticsEngine *Diags,
-                                  const LangOptions &Features) {
+static unsigned
+ProcessCharEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
+                  const char *ThisTokEnd, bool &HadError, FullSourceLoc Loc,
+                  unsigned CharWidth, DiagnosticsEngine *Diags,
+                  const LangOptions &Features, bool Unevaluated) {
   const char *EscapeBegin = ThisTokBuf;
 
   // Skip the '\' char.
@@ -224,6 +236,11 @@
     break;
   }
 
+  if (Unevaluated && !EscapeValidInUnevaluatedStringLiteral(*EscapeBegin)) {
+    Diag(Diags, Features, Loc, ThisTokBegin, EscapeBegin, ThisTokBuf,
+         diag::err_unevaluated_string_invalid_escape_sequence)
+        << *EscapeBegin;
+  }
   return ResultChar;
 }
 
@@ -1380,10 +1397,10 @@
       continue;
     }
     unsigned CharWidth = getCharWidth(Kind, PP.getTargetInfo());
-    uint64_t result =
-      ProcessCharEscape(TokBegin, begin, end, HadError,
-                        FullSourceLoc(Loc,PP.getSourceManager()),
-                        CharWidth, &PP.getDiagnostics(), PP.getLangOpts());
+    uint64_t result = ProcessCharEscape(
+        TokBegin, begin, end, HadError,
+        FullSourceLoc(Loc, PP.getSourceManager()), CharWidth,
+        &PP.getDiagnostics(), PP.getLangOpts(), /*Unevaluated*/ false);
     *buffer_begin++ = result;
   }
 
@@ -1491,13 +1508,15 @@
 ///         hex-digit hex-digit hex-digit hex-digit
 /// \endverbatim
 ///
-StringLiteralParser::
-StringLiteralParser(ArrayRef<Token> StringToks,
-                    Preprocessor &PP, bool Complain)
-  : SM(PP.getSourceManager()), Features(PP.getLangOpts()),
-    Target(PP.getTargetInfo()), Diags(Complain ? &PP.getDiagnostics() :nullptr),
-    MaxTokenLength(0), SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
-    ResultPtr(ResultBuf.data()), hadError(false), Pascal(false) {
+StringLiteralParser::StringLiteralParser(ArrayRef<Token> StringToks,
+                                         Preprocessor &PP, bool Unevaluated,
+                                         bool Complain)
+    : SM(PP.getSourceManager()), Features(PP.getLangOpts()),
+      Target(PP.getTargetInfo()),
+      Diags(Complain ? &PP.getDiagnostics() : nullptr), MaxTokenLength(0),
+      SizeBound(0), CharByteWidth(0), Kind(tok::unknown),
+      ResultPtr(ResultBuf.data()), hadError(false), Pascal(false),
+      Unevaluated(Unevaluated) {
   init(StringToks);
 }
 
@@ -1515,11 +1534,30 @@
   MaxTokenLength = StringToks[0].getLength();
   assert(StringToks[0].getLength() >= 2 && "literal token is invalid!");
   SizeBound = StringToks[0].getLength()-2;  // -2 for "".
-  Kind = StringToks[0].getKind();
-
   hadError = false;
 
-  // Implement Translation Phase #6: concatenation of string literals
+  // Determines the kind of string from the prefix
+  Kind = tok::string_literal;
+  for (const auto &Tok : StringToks) {
+    // Unevaluated string literals can never have a prefix
+    if (Unevaluated && Tok.getKind() != tok::string_literal) {
+      if (Diags)
+        Diags->Report(Tok.getLocation(), diag::err_unevaluated_string_prefix);
+      hadError = true;
+      continue;
+    }
+    if (Tok.is(tok::string_literal))
+      continue;
+    if (Tok.is(Kind) || Kind == tok::string_literal) {
+      Kind = Tok.getKind();
+      continue;
+    }
+    if (Diags) {
+      Diags->Report(Tok.getLocation(), diag::err_unsupported_string_concat);
+      hadError = true;
+    }
+  }
+
   /// (C99 5.1.1.2p1).  The common case is only one string fragment.
   for (unsigned i = 1; i != StringToks.size(); ++i) {
     if (StringToks[i].getLength() < 2)
@@ -1533,19 +1571,6 @@
     // Remember maximum string piece length.
     if (StringToks[i].getLength() > MaxTokenLength)
       MaxTokenLength = StringToks[i].getLength();
-
-    // Remember if we see any wide or utf-8/16/32 strings.
-    // Also check for illegal concatenations.
-    if (StringToks[i].isNot(Kind) && StringToks[i].isNot(tok::string_literal)) {
-      if (isAscii()) {
-        Kind = StringToks[i].getKind();
-      } else {
-        if (Diags)
-          Diags->Report(StringToks[i].getLocation(),
-                        diag::err_unsupported_string_concat);
-        hadError = true;
-      }
-    }
   }
 
   // Include space for the null terminator.
@@ -1620,13 +1645,16 @@
         // result of a concatenation involving at least one user-defined-string-
         // literal, all the participating user-defined-string-literals shall
         // have the same ud-suffix.
-        if (UDSuffixBuf != UDSuffix) {
+        const bool UnevaluatedStringHasUDL = Unevaluated && !UDSuffix.empty();
+        if (UDSuffixBuf != UDSuffix || UnevaluatedStringHasUDL) {
           if (Diags) {
             SourceLocation TokLoc = StringToks[i].getLocation();
-            Diags->Report(TokLoc, diag::err_string_concat_mixed_suffix)
-              << UDSuffixBuf << UDSuffix
-              << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
-              << SourceRange(TokLoc, TokLoc);
+            Diags->Report(TokLoc, UnevaluatedStringHasUDL
+                                      ? diag::err_unevaluated_string_udl
+                                      : diag::err_string_concat_mixed_suffix)
+                << UDSuffixBuf << UDSuffix
+                << SourceRange(UDSuffixTokLoc, UDSuffixTokLoc)
+                << SourceRange(TokLoc, TokLoc);
           }
           hadError = true;
         }
@@ -1697,8 +1725,9 @@
       ++ThisTokBuf; // skip "
 
       // Check if this is a pascal string
-      if (Features.PascalStrings && ThisTokBuf + 1 != ThisTokEnd &&
-          ThisTokBuf[0] == '\\' && ThisTokBuf[1] == 'p') {
+      if (!Unevaluated && Features.PascalStrings &&
+          ThisTokBuf + 1 != ThisTokEnd && ThisTokBuf[0] == '\\' &&
+          ThisTokBuf[1] == 'p') {
 
         // If the \p sequence is found in the first token, we have a pascal string
         // Otherwise, if we already have a pascal string, ignore the first \p
@@ -1733,9 +1762,9 @@
         }
         // Otherwise, this is a non-UCN escape character.  Process it.
         unsigned ResultChar =
-          ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError,
-                            FullSourceLoc(StringToks[i].getLocation(), SM),
-                            CharByteWidth*8, Diags, Features);
+            ProcessCharEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, hadError,
+                              FullSourceLoc(StringToks[i].getLocation(), SM),
+                              CharByteWidth * 8, Diags, Features, Unevaluated);
 
         if (CharByteWidth == 4) {
           // FIXME: Make the type of the result buffer correct instead of
@@ -1757,6 +1786,7 @@
     }
   }
 
+  assert((!Pascal || !Unevaluated) && "Pascal string in unevaluated context");
   if (Pascal) {
     if (CharByteWidth == 4) {
       // FIXME: Make the type of the result buffer correct instead of
@@ -1929,8 +1959,8 @@
       ByteNo -= Len;
     } else {
       ProcessCharEscape(SpellingStart, SpellingPtr, SpellingEnd, HadError,
-                        FullSourceLoc(Tok.getLocation(), SM),
-                        CharByteWidth*8, Diags, Features);
+                        FullSourceLoc(Tok.getLocation(), SM), CharByteWidth * 8,
+                        Diags, Features, false);
       --ByteNo;
     }
     assert(!HadError && "This method isn't valid on erroneous strings");
Index: clang/lib/Frontend/FrontendAction.cpp
===================================================================
--- clang/lib/Frontend/FrontendAction.cpp
+++ clang/lib/Frontend/FrontendAction.cpp
@@ -265,7 +265,7 @@
   if (T.isAtStartOfLine() || T.getKind() != tok::string_literal)
     return SourceLocation();
 
-  StringLiteralParser Literal(T, CI.getPreprocessor());
+  StringLiteralParser Literal(T, CI.getPreprocessor(), /*Unevaluated*/ true);
   if (Literal.hadError)
     return SourceLocation();
   RawLexer->LexFromRawLexer(T);
Index: clang/lib/AST/Expr.cpp
===================================================================
--- clang/lib/AST/Expr.cpp
+++ clang/lib/AST/Expr.cpp
@@ -1056,6 +1056,9 @@
   case UTF32:
     CharByteWidth = Target.getChar32Width();
     break;
+  case Unevaluated:
+    assert(false && "Unevaluated strings have no byte width");
+    break;
   }
   assert((CharByteWidth & 7) == 0 && "Assumes character size is byte multiple");
   CharByteWidth /= 8;
@@ -1069,35 +1072,43 @@
                              const SourceLocation *Loc,
                              unsigned NumConcatenated)
     : Expr(StringLiteralClass, Ty, VK_LValue, OK_Ordinary) {
-  assert(Ctx.getAsConstantArrayType(Ty) &&
-         "StringLiteral must be of constant array type!");
-  unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind);
+
   unsigned ByteLength = Str.size();
-  assert((ByteLength % CharByteWidth == 0) &&
-         "The size of the data must be a multiple of CharByteWidth!");
-
-  // Avoid the expensive division. The compiler should be able to figure it
-  // out by itself. However as of clang 7, even with the appropriate
-  // llvm_unreachable added just here, it is not able to do so.
-  unsigned Length;
-  switch (CharByteWidth) {
-  case 1:
-    Length = ByteLength;
-    break;
-  case 2:
-    Length = ByteLength / 2;
-    break;
-  case 4:
-    Length = ByteLength / 4;
-    break;
-  default:
-    llvm_unreachable("Unsupported character width!");
-  }
+  unsigned Length = ByteLength;
 
   StringLiteralBits.Kind = Kind;
-  StringLiteralBits.CharByteWidth = CharByteWidth;
-  StringLiteralBits.IsPascal = Pascal;
   StringLiteralBits.NumConcatenated = NumConcatenated;
+  StringLiteralBits.CharByteWidth = 1;
+
+  if (Kind != StringKind::Unevaluated) {
+    assert(Ctx.getAsConstantArrayType(Ty) &&
+           "StringLiteral must be of constant array type!");
+    unsigned CharByteWidth = mapCharByteWidth(Ctx.getTargetInfo(), Kind);
+    unsigned ByteLength = Str.size();
+    assert((ByteLength % CharByteWidth == 0) &&
+           "The size of the data must be a multiple of CharByteWidth!");
+
+    // Avoid the expensive division. The compiler should be able to figure it
+    // out by itself. However as of clang 7, even with the appropriate
+    // llvm_unreachable added just here, it is not able to do so.
+    switch (CharByteWidth) {
+    case 1:
+      Length = ByteLength;
+      break;
+    case 2:
+      Length = ByteLength / 2;
+      break;
+    case 4:
+      Length = ByteLength / 4;
+      break;
+    default:
+      llvm_unreachable("Unsupported character width!");
+    }
+
+    StringLiteralBits.CharByteWidth = CharByteWidth;
+    StringLiteralBits.IsPascal = Pascal;
+  }
+
   *getTrailingObjects<unsigned>() = Length;
 
   // Initialize the trailing array of SourceLocation.
@@ -1143,6 +1154,8 @@
 
 void StringLiteral::outputString(raw_ostream &OS) const {
   switch (getKind()) {
+  case Unevaluated:
+    break;           // no prefic
   case Ascii: break; // no prefix.
   case Wide:  OS << 'L'; break;
   case UTF8:  OS << "u8"; break;
Index: clang/include/clang/Sema/Sema.h
===================================================================
--- clang/include/clang/Sema/Sema.h
+++ clang/include/clang/Sema/Sema.h
@@ -5242,6 +5242,8 @@
   ExprResult ActOnStringLiteral(ArrayRef<Token> StringToks,
                                 Scope *UDLScope = nullptr);
 
+  ExprResult ActOnUnevaluatedStringLiteral(ArrayRef<Token> StringToks);
+
   ExprResult ActOnGenericSelectionExpr(SourceLocation KeyLoc,
                                        SourceLocation DefaultLoc,
                                        SourceLocation RParenLoc,
Index: clang/include/clang/Parse/Parser.h
===================================================================
--- clang/include/clang/Parse/Parser.h
+++ clang/include/clang/Parse/Parser.h
@@ -1750,8 +1750,12 @@
                                   bool IsUnevaluated);
 
   ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral = false);
+  ExprResult ParseUnevaluatedStringLiteralExpression();
 
 private:
+  ExprResult ParseStringLiteralExpression(bool AllowUserDefinedLiteral,
+                                          bool Unevaluated);
+
   ExprResult ParseExpressionWithLeadingAt(SourceLocation AtLoc);
 
   ExprResult ParseExpressionWithLeadingExtension(SourceLocation ExtLoc);
Index: clang/include/clang/Lex/LiteralSupport.h
===================================================================
--- clang/include/clang/Lex/LiteralSupport.h
+++ clang/include/clang/Lex/LiteralSupport.h
@@ -217,14 +217,15 @@
   unsigned SizeBound;
   unsigned CharByteWidth;
   tok::TokenKind Kind;
+  SmallString<512> UnevaluatedBuf;
   SmallString<512> ResultBuf;
   char *ResultPtr; // cursor
   SmallString<32> UDSuffixBuf;
   unsigned UDSuffixToken;
   unsigned UDSuffixOffset;
 public:
-  StringLiteralParser(ArrayRef<Token> StringToks,
-                      Preprocessor &PP, bool Complain = true);
+  StringLiteralParser(ArrayRef<Token> StringToks, Preprocessor &PP,
+                      bool Unevaluated = false, bool Complain = true);
   StringLiteralParser(ArrayRef<Token> StringToks,
                       const SourceManager &sm, const LangOptions &features,
                       const TargetInfo &target,
@@ -238,6 +239,7 @@
 
   bool hadError;
   bool Pascal;
+  bool Unevaluated;
 
   StringRef GetString() const {
     return StringRef(ResultBuf.data(), GetStringLength());
@@ -261,6 +263,7 @@
   bool isUTF16() const { return Kind == tok::utf16_string_literal; }
   bool isUTF32() const { return Kind == tok::utf32_string_literal; }
   bool isPascal() const { return Pascal; }
+  bool isUnevaluated() const { return Unevaluated; }
 
   StringRef getUDSuffix() const { return UDSuffixBuf; }
 
Index: clang/include/clang/Basic/DiagnosticLexKinds.td
===================================================================
--- clang/include/clang/Basic/DiagnosticLexKinds.td
+++ clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -241,6 +241,12 @@
   "identifier">, InGroup<ReservedUserDefinedLiteral>;
 def err_unsupported_string_concat : Error<
   "unsupported non-standard concatenation of string literals">;
+def err_unevaluated_string_prefix : Error<
+  "an unevaluated string literal cannot have an encoding prefix">;
+def err_unevaluated_string_udl : Error<
+  "an unevaluated string literal cannot be a user defined literal">;
+def err_unevaluated_string_invalid_escape_sequence : Error<
+  "Invalid escape sequence '%0' in an unevaluated string literal">;
 def err_string_concat_mixed_suffix : Error<
   "differing user-defined suffixes ('%0' and '%1') in string literal "
   "concatenation">;
Index: clang/include/clang/AST/Expr.h
===================================================================
--- clang/include/clang/AST/Expr.h
+++ clang/include/clang/AST/Expr.h
@@ -1774,7 +1774,7 @@
   /// * An array of getByteLength() char used to store the string data.
 
 public:
-  enum StringKind { Ascii, Wide, UTF8, UTF16, UTF32 };
+  enum StringKind { Ascii, Wide, UTF8, UTF16, UTF32, Unevaluated };
 
 private:
   unsigned numTrailingObjects(OverloadToken<unsigned>) const { return 1; }
@@ -1836,8 +1836,9 @@
                                     unsigned CharByteWidth);
 
   StringRef getString() const {
-    assert(getCharByteWidth() == 1 &&
-           "This function is used in places that assume strings use char");
+    assert(isUnevaluated() ||
+           getCharByteWidth() == 1 &&
+               "This function is used in places that assume strings use char");
     return StringRef(getStrDataAsChar(), getByteLength());
   }
 
@@ -1876,6 +1877,7 @@
   bool isUTF8() const { return getKind() == UTF8; }
   bool isUTF16() const { return getKind() == UTF16; }
   bool isUTF32() const { return getKind() == UTF32; }
+  bool isUnevaluated() const { return getKind() == Unevaluated; }
   bool isPascal() const { return StringLiteralBits.IsPascal; }
 
   bool containsNonAscii() const {

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D105759: [WIP] Implement P2361 Unevaluated string literals

Reply via email to