https://gcc.gnu.org/bugzilla/show_bug.cgi?id=100977

--- Comment #3 from Jakub Jelinek <jakub at gcc dot gnu.org> ---
Incrementally, here is a makeucnid.c patch to also emit CXX23 and NXX23 flags
(CXX23 for valid as C++23 identifier and NXX23 for valid as C++23 identifier
but not as the first character), but doesn't contain changes to actually handle
it on the libcpp side.

--- libcpp/makeucnid.c.jj       2021-08-04 17:35:35.995944075 +0200
+++ libcpp/makeucnid.c  2021-08-04 18:13:56.399062234 +0200
@@ -17,7 +17,7 @@ along with this program; see the file CO

 /* Run this program as
    ./makeucnid ucnid.tab UnicodeData.txt DerivedNormalizationProps.txt \
-       > ucnid.h
+      DerivedCoreProperties.txt > ucnid.h
 */

 #include <stdio.h>
@@ -32,10 +32,12 @@ enum {
   N99 = 4,
   C11 = 8,
   N11 = 16,
-  all_languages = C99 | CXX | C11,
-  not_NFC = 32,
-  not_NFKC = 64,
-  maybe_not_NFC = 128
+  CXX23 = 32,
+  NXX23 = 64,
+  all_languages = C99 | CXX | C11 | CXX23 | NXX23,
+  not_NFC = 128,
+  not_NFKC = 256,
+  maybe_not_NFC = 512
 };

 #define NUM_CODE_POINTS 0x110000
@@ -241,6 +243,74 @@ read_derived (const char *fname)
   fclose (f);
 }

+/* Read DerivedCoreProperties.txt and fill in languages version in
+   flags from the XID_Start and XID_Continue properties.  */
+
+static void
+read_derivedcore (char *fname)
+{
+  FILE * f = fopen (fname, "r");
+  
+  if (!f)
+    fail ("opening DerivedCoreProperties.txt");
+  for (;;)
+    {
+      char line[256];
+      unsigned long codepoint_start, codepoint_end;
+      char *l;
+      int i, j;
+
+      if (!fgets (line, sizeof (line), f))
+       break;
+      if (line[0] == '#' || line[0] == '\n' || line[0] == '\r')
+       continue;
+      codepoint_start = strtoul (line, &l, 16);
+      if (l == line)
+       fail ("parsing DerivedCoreProperties.txt, reading code point");
+      if (codepoint_start > MAX_CODE_POINT)
+       fail ("parsing DerivedCoreProperties.txt, code point too large");
+      
+      if (*l == '.' && l[1] == '.')
+       {
+         char *l2 = l + 2;
+         codepoint_end = strtoul (l + 2, &l, 16);
+         if (l == l2 || codepoint_end < codepoint_start)
+           fail ("parsing DerivedCoreProperties.txt, reading code point");
+         if (codepoint_end > MAX_CODE_POINT)
+           fail ("parsing DerivedCoreProperties.txt, code point too large");
+       }
+      else
+       codepoint_end = codepoint_start;
+
+      while (*l == ' ')
+       l++;
+      if (*l++ != ';')
+       fail ("parsing DerivedCoreProperties.txt, reading code point");
+
+      while (*l == ' ')
+       l++;
+
+      if (codepoint_end < 0x80)
+        continue;
+
+      if (strncmp (l, "XID_Start ", 10) == 0)
+       {
+         for (; codepoint_start <= codepoint_end; codepoint_start++)
+           flags[codepoint_start]
+             = (flags[codepoint_start] | CXX23) & ~NXX23;
+       }
+      else if (strncmp (l, "XID_Continue ", 13) == 0)
+       {
+         for (; codepoint_start <= codepoint_end; codepoint_start++)
+           if ((flags[codepoint_start] & CXX23) == 0)
+             flags[codepoint_start] |= CXX23 | NXX23;
+       }
+    }
+  if (ferror (f))
+    fail ("reading DerivedCoreProperties.txt");
+  fclose (f);
+}
+
 /* Write out the table.
    The table consists of two words per entry.  The first word is the flags
    for the unicode code points up to and including the second word.  */
@@ -261,12 +331,14 @@ write_table (void)
        || really_safe != (decomp[i][0] == 0)
        || combining_value[i] != last_combine)
       {
-       printf ("{ %s|%s|%s|%s|%s|%s|%s|%s|%s, %3d, %#06x },\n",
+       printf ("{ %s|%s|%s|%s|%s|%s|%s|%s|%s|%s|%s, %3d, %#06x },\n",
                last_flag & C99 ? "C99" : "  0",
                last_flag & N99 ? "N99" : "  0",
                last_flag & CXX ? "CXX" : "  0",
                last_flag & C11 ? "C11" : "  0",
                last_flag & N11 ? "N11" : "  0",
+               last_flag & CXX23 ? "CXX23" : "    0",
+               last_flag & NXX23 ? "NXX23" : "    0",
                really_safe ? "CID" : "  0",
                last_flag & not_NFC ? "  0" : "NFC",
                last_flag & not_NFKC ? "  0" : "NKC",
@@ -439,11 +511,12 @@ write_copyright (void)
 int
 main(int argc, char ** argv)
 {
-  if (argc != 4)
+  if (argc != 5)
     fail ("too few arguments to makeucn");
   read_ucnid (argv[1]);
   read_table (argv[2]);
   read_derived (argv[3]);
+  read_derivedcore (argv[4]);

   write_copyright ();
   write_table ();

Reply via email to