cor3ntin created this revision.
Herald added a project: All.
cor3ntin requested review of this revision.
Herald added a project: clang.
Herald added a subscriber: cfe-commits.

This implements
N2836 Identifier Syntax using Unicode Standard Annex 31.

The feature was already implemented for C++,
and the semantics are the same.

Unlike C++ there was, afaict, no decision to
backport the feature in older languages mode,
so C17 and earlier are not modified and the
code point tables for these language modes are conserved.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D130416

Files:
  clang/docs/ReleaseNotes.rst
  clang/lib/Lex/Lexer.cpp
  clang/test/Lexer/unicode.c
  clang/www/c_status.html


Index: clang/www/c_status.html
===================================================================
--- clang/www/c_status.html
+++ clang/www/c_status.html
@@ -1024,7 +1024,7 @@
     <tr>
       <td>Identifier Syntax using Unicode Standard Annex 31</td>
       <td><a 
href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2836.pdf";>N2836</a></td>
-      <td class="none" align="center">No</td>
+      <td class="unreleased" align="center">Clang 15</td>
     </tr>
     <tr>
       <td>No function declarators without prototypes</td>
Index: clang/test/Lexer/unicode.c
===================================================================
--- clang/test/Lexer/unicode.c
+++ clang/test/Lexer/unicode.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -x c -std=c11 %s
-// RUN: %clang_cc1 -fsyntax-only -verify -x c++ -std=c++11 %s
+// RUN: %clang_cc1 -fsyntax-only -verify=expected,c2x -x c -std=c2x %s
+// RUN: %clang_cc1 -fsyntax-only -verify=expected,cxx -x c++ -std=c++11 %s
 // RUN: %clang_cc1 -std=c99 -E -DPP_ONLY=1 %s | FileCheck %s 
--strict-whitespace
 // RUN: %clang_cc1 -E -DPP_ONLY=1 %s | FileCheck %s --strict-whitespace
 
@@ -31,7 +32,7 @@
 extern int X\UAAAAAAAA; // expected-error {{not allowed in an identifier}}
 int Y = '\UAAAAAAAA'; // expected-error {{invalid universal character}}
 
-#ifdef __cplusplus
+#if defined(__cplusplus) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 
202000L)
 
 extern int ༀ;
 extern int 𑩐;
@@ -46,7 +47,8 @@
 
 
 // This character doesn't have the XID_Start property
-extern int  \U00016AC0; // TANGSA DIGIT ZERO  // expected-error {{expected 
unqualified-id}}
+extern int  \U00016AC0; // TANGSA DIGIT ZERO  // cxx-error {{expected 
unqualified-id}} \
+                                              // c2x-error {{expected 
identifier or '('}}
 
 extern int 🌹; // expected-error {{unexpected character <U+1F339>}} \
                   expected-warning {{declaration does not declare anything}}
Index: clang/lib/Lex/Lexer.cpp
===================================================================
--- clang/lib/Lex/Lexer.cpp
+++ clang/lib/Lex/Lexer.cpp
@@ -1462,7 +1462,7 @@
     return false;
   } else if (LangOpts.DollarIdents && '$' == C) {
     return true;
-  } else if (LangOpts.CPlusPlus) {
+  } else if (LangOpts.CPlusPlus || LangOpts.C2x) {
     // A non-leading codepoint must have the XID_Continue property.
     // XIDContinueRanges doesn't contains characters also in XIDStartRanges,
     // so we need to check both tables.
@@ -1486,7 +1486,7 @@
   if (LangOpts.AsmPreprocessor) {
     return false;
   }
-  if (LangOpts.CPlusPlus) {
+  if (LangOpts.CPlusPlus || LangOpts.C2x) {
     static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges);
     // '_' doesn't have the XID_Start property but is allowed in C++.
     return C == '_' || XIDStartChars.contains(C);
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -469,6 +469,7 @@
   support for functions without prototypes, which no longer exist in C2x.
 - Implemented `WG14 N2841 No function declarators without prototypes 
<https://www9.open-std.org/jtc1/sc22/wg14/www/docs/n2841.htm>`_
   and `WG14 N2432 Remove support for function definitions with identifier 
lists <https://www9.open-std.org/jtc1/sc22/wg14/www/docs/n2432.pdf>`_.
+- Implemented `WG14 N2836 Identifier Syntax using Unicode Standard Annex 31 
<https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2836.pdf>`_.
 
 C++ Language Changes in Clang
 -----------------------------


Index: clang/www/c_status.html
===================================================================
--- clang/www/c_status.html
+++ clang/www/c_status.html
@@ -1024,7 +1024,7 @@
     <tr>
       <td>Identifier Syntax using Unicode Standard Annex 31</td>
       <td><a href="https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2836.pdf";>N2836</a></td>
-      <td class="none" align="center">No</td>
+      <td class="unreleased" align="center">Clang 15</td>
     </tr>
     <tr>
       <td>No function declarators without prototypes</td>
Index: clang/test/Lexer/unicode.c
===================================================================
--- clang/test/Lexer/unicode.c
+++ clang/test/Lexer/unicode.c
@@ -1,5 +1,6 @@
 // RUN: %clang_cc1 -fsyntax-only -verify -x c -std=c11 %s
-// RUN: %clang_cc1 -fsyntax-only -verify -x c++ -std=c++11 %s
+// RUN: %clang_cc1 -fsyntax-only -verify=expected,c2x -x c -std=c2x %s
+// RUN: %clang_cc1 -fsyntax-only -verify=expected,cxx -x c++ -std=c++11 %s
 // RUN: %clang_cc1 -std=c99 -E -DPP_ONLY=1 %s | FileCheck %s --strict-whitespace
 // RUN: %clang_cc1 -E -DPP_ONLY=1 %s | FileCheck %s --strict-whitespace
 
@@ -31,7 +32,7 @@
 extern int X\UAAAAAAAA; // expected-error {{not allowed in an identifier}}
 int Y = '\UAAAAAAAA'; // expected-error {{invalid universal character}}
 
-#ifdef __cplusplus
+#if defined(__cplusplus) || (defined(__STDC_VERSION__) && __STDC_VERSION__ >= 202000L)
 
 extern int ༀ;
 extern int 𑩐;
@@ -46,7 +47,8 @@
 
 
 // This character doesn't have the XID_Start property
-extern int  \U00016AC0; // TANGSA DIGIT ZERO  // expected-error {{expected unqualified-id}}
+extern int  \U00016AC0; // TANGSA DIGIT ZERO  // cxx-error {{expected unqualified-id}} \
+                                              // c2x-error {{expected identifier or '('}}
 
 extern int 🌹; // expected-error {{unexpected character <U+1F339>}} \
                   expected-warning {{declaration does not declare anything}}
Index: clang/lib/Lex/Lexer.cpp
===================================================================
--- clang/lib/Lex/Lexer.cpp
+++ clang/lib/Lex/Lexer.cpp
@@ -1462,7 +1462,7 @@
     return false;
   } else if (LangOpts.DollarIdents && '$' == C) {
     return true;
-  } else if (LangOpts.CPlusPlus) {
+  } else if (LangOpts.CPlusPlus || LangOpts.C2x) {
     // A non-leading codepoint must have the XID_Continue property.
     // XIDContinueRanges doesn't contains characters also in XIDStartRanges,
     // so we need to check both tables.
@@ -1486,7 +1486,7 @@
   if (LangOpts.AsmPreprocessor) {
     return false;
   }
-  if (LangOpts.CPlusPlus) {
+  if (LangOpts.CPlusPlus || LangOpts.C2x) {
     static const llvm::sys::UnicodeCharSet XIDStartChars(XIDStartRanges);
     // '_' doesn't have the XID_Start property but is allowed in C++.
     return C == '_' || XIDStartChars.contains(C);
Index: clang/docs/ReleaseNotes.rst
===================================================================
--- clang/docs/ReleaseNotes.rst
+++ clang/docs/ReleaseNotes.rst
@@ -469,6 +469,7 @@
   support for functions without prototypes, which no longer exist in C2x.
 - Implemented `WG14 N2841 No function declarators without prototypes <https://www9.open-std.org/jtc1/sc22/wg14/www/docs/n2841.htm>`_
   and `WG14 N2432 Remove support for function definitions with identifier lists <https://www9.open-std.org/jtc1/sc22/wg14/www/docs/n2432.pdf>`_.
+- Implemented `WG14 N2836 Identifier Syntax using Unicode Standard Annex 31 <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n2836.pdf>`_.
 
 C++ Language Changes in Clang
 -----------------------------
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to