https://github.com/cor3ntin updated 
https://github.com/llvm/llvm-project/pull/93216

>From 556c622275c630b74c0f9000c5c599ff665595e1 Mon Sep 17 00:00:00 2001
From: Corentin Jabot <corentinja...@gmail.com>
Date: Thu, 23 May 2024 18:45:58 +0200
Subject: [PATCH] [Clang] allow `` `@$ `` in raw string delimiters in C++26

And as an extension in older language modes.

Per https://eel.is/c++draft/lex.string#nt:d-char

Fixes #93130
---
 clang/docs/ReleaseNotes.rst                   |  1 +
 clang/include/clang/Basic/CharInfo.h          | 15 +++++++-------
 .../include/clang/Basic/DiagnosticLexKinds.td |  8 ++++++++
 clang/lib/Basic/CharInfo.cpp                  | 20 +++++++++----------
 clang/lib/Lex/Lexer.cpp                       | 11 +++++++++-
 clang/test/Lexer/cxx2c-raw-strings.cpp        | 12 +++++++++++
 6 files changed, 49 insertions(+), 18 deletions(-)
 create mode 100644 clang/test/Lexer/cxx2c-raw-strings.cpp

diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst
index 7bcdee96e213e..2e298cd9cdb82 100644
--- a/clang/docs/ReleaseNotes.rst
+++ b/clang/docs/ReleaseNotes.rst
@@ -771,6 +771,7 @@ Bug Fixes to C++ Support
   Fixes (#GH87210), (GH89541).
 - Clang no longer tries to check if an expression is immediate-escalating in 
an unevaluated context.
   Fixes (#GH91308).
+- Clang now allow ``@$``` in raw string literals. Fixes (#GH93130).
 
 Bug Fixes to AST Handling
 ^^^^^^^^^^^^^^^^^^^^^^^^^
diff --git a/clang/include/clang/Basic/CharInfo.h 
b/clang/include/clang/Basic/CharInfo.h
index d807955311828..4d90528f7992e 100644
--- a/clang/include/clang/Basic/CharInfo.h
+++ b/clang/include/clang/Basic/CharInfo.h
@@ -28,8 +28,7 @@ namespace charinfo {
     CHAR_LOWER    = 0x0040,  // a-z
     CHAR_UNDER    = 0x0080,  // _
     CHAR_PERIOD   = 0x0100,  // .
-    CHAR_RAWDEL   = 0x0200,  // {}[]#<>%:;?*+-/^&|~!=,"'
-    CHAR_PUNCT    = 0x0400   // `$@()
+    CHAR_PUNCT    = 0x0200,  // {}[]#<>%:;?*+-/^&|~!=,"'`$@()
   };
 
   enum {
@@ -152,7 +151,8 @@ LLVM_READONLY inline bool isHexDigit(unsigned char c) {
 /// Note that '_' is both a punctuation character and an identifier character!
 LLVM_READONLY inline bool isPunctuation(unsigned char c) {
   using namespace charinfo;
-  return (InfoTable[c] & (CHAR_UNDER|CHAR_PERIOD|CHAR_RAWDEL|CHAR_PUNCT)) != 0;
+  return (InfoTable[c] &
+          (CHAR_UNDER | CHAR_PERIOD | CHAR_PUNCT | CHAR_PUNCT)) != 0;
 }
 
 /// Return true if this character is an ASCII printable character; that is, a
@@ -160,8 +160,8 @@ LLVM_READONLY inline bool isPunctuation(unsigned char c) {
 /// terminal.
 LLVM_READONLY inline bool isPrintable(unsigned char c) {
   using namespace charinfo;
-  return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|CHAR_PUNCT|
-                          CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL|CHAR_SPACE)) != 0;
+  return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_PUNCT |
+                          CHAR_DIGIT | CHAR_UNDER | CHAR_SPACE)) != 0;
 }
 
 /// Return true if this is the body character of a C preprocessing number,
@@ -175,8 +175,9 @@ LLVM_READONLY inline bool 
isPreprocessingNumberBody(unsigned char c) {
 /// Return true if this is the body character of a C++ raw string delimiter.
 LLVM_READONLY inline bool isRawStringDelimBody(unsigned char c) {
   using namespace charinfo;
-  return (InfoTable[c] & (CHAR_UPPER|CHAR_LOWER|CHAR_PERIOD|
-                          CHAR_DIGIT|CHAR_UNDER|CHAR_RAWDEL)) != 0;
+  return (InfoTable[c] & (CHAR_UPPER | CHAR_LOWER | CHAR_PERIOD | CHAR_DIGIT |
+                          CHAR_UNDER | CHAR_PUNCT)) != 0 &&
+         c != '(' && c != ')';
 }
 
 enum class EscapeChar {
diff --git a/clang/include/clang/Basic/DiagnosticLexKinds.td 
b/clang/include/clang/Basic/DiagnosticLexKinds.td
index ad6bacfb118d4..8411842490c4e 100644
--- a/clang/include/clang/Basic/DiagnosticLexKinds.td
+++ b/clang/include/clang/Basic/DiagnosticLexKinds.td
@@ -111,6 +111,14 @@ def warn_cxx98_compat_raw_string_literal : Warning<
   "raw string literals are incompatible with C++98">,
   InGroup<CXX98Compat>, DefaultIgnore;
 
+def warn_cxx26_compat_raw_string_literal_character_set : Warning<
+  "'%0'in a raw string literal delimiter is incompatible "
+  "with standards before C++2c">,
+  InGroup<CXXPre26Compat>, DefaultIgnore;
+def ext_cxx26_raw_string_literal_character_set : Extension<
+  "'%0'in a raw string literal delimiter is a C++2c extension">,
+  InGroup<CXX26>, DefaultIgnore;
+
 def warn_multichar_character_literal : Warning<
   "multi-character character constant">, InGroup<MultiChar>;
 def warn_four_char_character_literal : Warning<
diff --git a/clang/lib/Basic/CharInfo.cpp b/clang/lib/Basic/CharInfo.cpp
index d02054c9718f5..26d693b8e9b94 100644
--- a/clang/lib/Basic/CharInfo.cpp
+++ b/clang/lib/Basic/CharInfo.cpp
@@ -31,20 +31,20 @@ const uint16_t clang::charinfo::InfoTable[256] = {
   0           , 0           , 0           , 0           ,
   //32 SP         33  !         34  "         35  #
   //36  $         37  %         38  &         39  '
-  CHAR_SPACE  , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
-  CHAR_PUNCT  , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
+  CHAR_SPACE  , CHAR_PUNCT  , CHAR_PUNCT  , CHAR_PUNCT  ,
+  CHAR_PUNCT  , CHAR_PUNCT  , CHAR_PUNCT  , CHAR_PUNCT  ,
   //40  (         41  )         42  *         43  +
   //44  ,         45  -         46  .         47  /
-  CHAR_PUNCT  , CHAR_PUNCT  , CHAR_RAWDEL , CHAR_RAWDEL ,
-  CHAR_RAWDEL , CHAR_RAWDEL , CHAR_PERIOD , CHAR_RAWDEL ,
+  CHAR_PUNCT  , CHAR_PUNCT  , CHAR_PUNCT  , CHAR_PUNCT  ,
+  CHAR_PUNCT  , CHAR_PUNCT  , CHAR_PERIOD , CHAR_PUNCT  ,
   //48  0         49  1         50  2         51  3
   //52  4         53  5         54  6         55  7
   CHAR_DIGIT  , CHAR_DIGIT  , CHAR_DIGIT  , CHAR_DIGIT  ,
   CHAR_DIGIT  , CHAR_DIGIT  , CHAR_DIGIT  , CHAR_DIGIT  ,
   //56  8         57  9         58  :         59  ;
   //60  <         61  =         62  >         63  ?
-  CHAR_DIGIT  , CHAR_DIGIT  , CHAR_RAWDEL , CHAR_RAWDEL ,
-  CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL ,
+  CHAR_DIGIT  , CHAR_DIGIT  , CHAR_PUNCT  , CHAR_PUNCT  ,
+  CHAR_PUNCT  , CHAR_PUNCT  , CHAR_PUNCT  , CHAR_PUNCT  ,
   //64  @         65  A         66  B         67  C
   //68  D         69  E         70  F         71  G
   CHAR_PUNCT  , CHAR_XUPPER , CHAR_XUPPER , CHAR_XUPPER ,
@@ -59,8 +59,8 @@ const uint16_t clang::charinfo::InfoTable[256] = {
   CHAR_UPPER  , CHAR_UPPER  , CHAR_UPPER  , CHAR_UPPER  ,
   //88  X         89  Y         90  Z         91  [
   //92  \         93  ]         94  ^         95  _
-  CHAR_UPPER  , CHAR_UPPER  , CHAR_UPPER  , CHAR_RAWDEL ,
-  CHAR_PUNCT  , CHAR_RAWDEL , CHAR_RAWDEL , CHAR_UNDER  ,
+  CHAR_UPPER  , CHAR_UPPER  , CHAR_UPPER  , CHAR_PUNCT  ,
+  CHAR_PUNCT  , CHAR_PUNCT  , CHAR_PUNCT  , CHAR_UNDER  ,
   //96  `         97  a         98  b         99  c
   //100  d       101  e        102  f        103  g
   CHAR_PUNCT  , CHAR_XLOWER , CHAR_XLOWER , CHAR_XLOWER ,
@@ -75,6 +75,6 @@ const uint16_t clang::charinfo::InfoTable[256] = {
   CHAR_LOWER  , CHAR_LOWER  , CHAR_LOWER  , CHAR_LOWER  ,
   //120  x       121  y        122  z        123  {
   //124  |       125  }        126  ~        127 DEL
-  CHAR_LOWER  , CHAR_LOWER  , CHAR_LOWER  , CHAR_RAWDEL ,
-  CHAR_RAWDEL , CHAR_RAWDEL , CHAR_RAWDEL , 0
+  CHAR_LOWER  , CHAR_LOWER  , CHAR_LOWER  , CHAR_PUNCT  ,
+  CHAR_PUNCT  , CHAR_PUNCT  , CHAR_PUNCT  , 0
 };
diff --git a/clang/lib/Lex/Lexer.cpp b/clang/lib/Lex/Lexer.cpp
index c98645993abe0..c7543a48c0b50 100644
--- a/clang/lib/Lex/Lexer.cpp
+++ b/clang/lib/Lex/Lexer.cpp
@@ -2261,8 +2261,17 @@ bool Lexer::LexRawStringLiteral(Token &Result, const 
char *CurPtr,
 
   unsigned PrefixLen = 0;
 
-  while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen]))
+  while (PrefixLen != 16 && isRawStringDelimBody(CurPtr[PrefixLen])) {
     ++PrefixLen;
+    if (!isLexingRawMode() &&
+        llvm::is_contained({'$', '@', '`'}, CurPtr[PrefixLen])) {
+      const char *Pos = &CurPtr[PrefixLen];
+      Diag(Pos, LangOpts.CPlusPlus26
+                    ? diag::warn_cxx26_compat_raw_string_literal_character_set
+                    : diag::ext_cxx26_raw_string_literal_character_set)
+          << StringRef(Pos, 1);
+    }
+  }
 
   // If the last character was not a '(', then we didn't lex a valid delimiter.
   if (CurPtr[PrefixLen] != '(') {
diff --git a/clang/test/Lexer/cxx2c-raw-strings.cpp 
b/clang/test/Lexer/cxx2c-raw-strings.cpp
new file mode 100644
index 0000000000000..9181cbc7cf8d4
--- /dev/null
+++ b/clang/test/Lexer/cxx2c-raw-strings.cpp
@@ -0,0 +1,12 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify -Wc++26-extensions %s
+// RUN: %clang_cc1 -std=c++2c -fsyntax-only -verify=cxx26 -Wpre-c++26-compat %s
+
+int main() {
+  (void) R"abc`@$(foobar)abc`@$";
+  //expected-warning@-1 {{'`'in a raw string literal delimiter is a C++2c 
extension}}
+  //expected-warning@-2 {{'@'in a raw string literal delimiter is a C++2c 
extension}}
+  //expected-warning@-3 {{'$'in a raw string literal delimiter is a C++2c 
extension}}
+  //cxx26-warning@-4 {{'`'in a raw string literal delimiter is incompatible 
with standards before C++2c}}
+  //cxx26-warning@-5 {{'@'in a raw string literal delimiter is incompatible 
with standards before C++2c}}
+  //cxx26-warning@-6 {{'$'in a raw string literal delimiter is incompatible 
with standards before C++2c}}
+}

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to