[PATCH] D108308: [WIP] Cleanup identifier parsing.

2021-08-25 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin marked 4 inline comments as done.
cor3ntin added inline comments.



Comment at: clang/include/clang/Lex/Lexer.h:702
   // Helper functions to lex the remainder of a token of the specific type.
-  bool LexIdentifier (Token , const char *CurPtr);
+  bool LexIdentifierContinue(Token , const char *CurPtr);
   bool LexNumericConstant(Token , const char *CurPtr);

cor3ntin wrote:
> aaron.ballman wrote:
> > Should this be `LexUnicodeIdentifierContinue()`? If so, perhaps it can also 
> > be moved up to line 578 so it's near the "start" function?
> > 
> > Or does this function handle both Unicode and ASCII identifiers? If so, the 
> > comments could probably be updated.
> This handles all identifiers - after the first codepoint has been parsed - 
> Which comment are you referring to?
I kept the comment as is - because it applies to all function underneath, but 
added a comment in the definition in the cpp


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108308/new/

https://reviews.llvm.org/D108308

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108308: [WIP] Cleanup identifier parsing.

2021-08-25 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin updated this revision to Diff 368746.
cor3ntin added a comment.

Fix comments following Aaron's feedback, remove
braces deemed unecessary by the guidelines


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108308/new/

https://reviews.llvm.org/D108308

Files:
  clang-tools-extra/clang-include-fixer/IncludeFixer.cpp
  clang-tools-extra/clang-tidy/google/IntegerTypesCheck.cpp
  clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp
  clang-tools-extra/clangd/CodeComplete.cpp
  clang-tools-extra/clangd/SourceCode.cpp
  clang-tools-extra/clangd/refactor/Rename.cpp
  clang/include/clang/Basic/CharInfo.h
  clang/include/clang/Lex/Lexer.h
  clang/lib/ARCMigrate/ObjCMT.cpp
  clang/lib/ARCMigrate/TransUnbridgedCasts.cpp
  clang/lib/AST/MicrosoftMangle.cpp
  clang/lib/Basic/Module.cpp
  clang/lib/Edit/EditedSource.cpp
  clang/lib/Frontend/LayoutOverrideSource.cpp
  clang/lib/Frontend/Rewrite/FrontendActions.cpp
  clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
  clang/lib/Lex/Lexer.cpp
  clang/lib/Lex/ModuleMap.cpp
  clang/lib/Sema/SemaAvailability.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/lib/Sema/SemaExprObjC.cpp
  clang/lib/Sema/SemaType.cpp
  clang/lib/Tooling/Transformer/Parsing.cpp
  clang/unittests/Basic/CharInfoTest.cpp

Index: clang/unittests/Basic/CharInfoTest.cpp
===
--- clang/unittests/Basic/CharInfoTest.cpp
+++ clang/unittests/Basic/CharInfoTest.cpp
@@ -50,44 +50,44 @@
   EXPECT_FALSE(isASCII('\xff'));
 }
 
-TEST(CharInfoTest, isIdentifierHead) {
-  EXPECT_TRUE(isIdentifierHead('a'));
-  EXPECT_TRUE(isIdentifierHead('A'));
-  EXPECT_TRUE(isIdentifierHead('z'));
-  EXPECT_TRUE(isIdentifierHead('Z'));
-  EXPECT_TRUE(isIdentifierHead('_'));
-
-  EXPECT_FALSE(isIdentifierHead('0'));
-  EXPECT_FALSE(isIdentifierHead('.'));
-  EXPECT_FALSE(isIdentifierHead('`'));
-  EXPECT_FALSE(isIdentifierHead('\0'));
-
-  EXPECT_FALSE(isIdentifierHead('$'));
-  EXPECT_TRUE(isIdentifierHead('$', /*AllowDollar=*/true));
-
-  EXPECT_FALSE(isIdentifierHead('\x80'));
-  EXPECT_FALSE(isIdentifierHead('\xc2'));
-  EXPECT_FALSE(isIdentifierHead('\xff'));
+TEST(CharInfoTest, isAsciiIdentifierStart) {
+  EXPECT_TRUE(isAsciiIdentifierStart('a'));
+  EXPECT_TRUE(isAsciiIdentifierStart('A'));
+  EXPECT_TRUE(isAsciiIdentifierStart('z'));
+  EXPECT_TRUE(isAsciiIdentifierStart('Z'));
+  EXPECT_TRUE(isAsciiIdentifierStart('_'));
+
+  EXPECT_FALSE(isAsciiIdentifierStart('0'));
+  EXPECT_FALSE(isAsciiIdentifierStart('.'));
+  EXPECT_FALSE(isAsciiIdentifierStart('`'));
+  EXPECT_FALSE(isAsciiIdentifierStart('\0'));
+
+  EXPECT_FALSE(isAsciiIdentifierStart('$'));
+  EXPECT_TRUE(isAsciiIdentifierStart('$', /*AllowDollar=*/true));
+
+  EXPECT_FALSE(isAsciiIdentifierStart('\x80'));
+  EXPECT_FALSE(isAsciiIdentifierStart('\xc2'));
+  EXPECT_FALSE(isAsciiIdentifierStart('\xff'));
 }
 
-TEST(CharInfoTest, isIdentifierBody) {
-  EXPECT_TRUE(isIdentifierBody('a'));
-  EXPECT_TRUE(isIdentifierBody('A'));
-  EXPECT_TRUE(isIdentifierBody('z'));
-  EXPECT_TRUE(isIdentifierBody('Z'));
-  EXPECT_TRUE(isIdentifierBody('_'));
+TEST(CharInfoTest, isAsciiIdentifierContinue) {
+  EXPECT_TRUE(isAsciiIdentifierContinue('a'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('A'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('z'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('Z'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('_'));
 
-  EXPECT_TRUE(isIdentifierBody('0'));
-  EXPECT_FALSE(isIdentifierBody('.'));
-  EXPECT_FALSE(isIdentifierBody('`'));
-  EXPECT_FALSE(isIdentifierBody('\0'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('0'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('.'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('`'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('\0'));
 
-  EXPECT_FALSE(isIdentifierBody('$'));
-  EXPECT_TRUE(isIdentifierBody('$', /*AllowDollar=*/true));
+  EXPECT_FALSE(isAsciiIdentifierContinue('$'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('$', /*AllowDollar=*/true));
 
-  EXPECT_FALSE(isIdentifierBody('\x80'));
-  EXPECT_FALSE(isIdentifierBody('\xc2'));
-  EXPECT_FALSE(isIdentifierBody('\xff'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('\x80'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('\xc2'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('\xff'));
 }
 
 TEST(CharInfoTest, isHorizontalWhitespace) {
@@ -413,91 +413,91 @@
   EXPECT_EQ('\0', toUppercase('\0'));
 }
 
-TEST(CharInfoTest, isValidIdentifier) {
-  EXPECT_FALSE(isValidIdentifier(""));
+TEST(CharInfoTest, isValidAsciiIdentifier) {
+  EXPECT_FALSE(isValidAsciiIdentifier(""));
 
   // 1 character
-  EXPECT_FALSE(isValidIdentifier("."));
-  EXPECT_FALSE(isValidIdentifier("\n"));
-  EXPECT_FALSE(isValidIdentifier(" "));
-  EXPECT_FALSE(isValidIdentifier("\x80"));
-  EXPECT_FALSE(isValidIdentifier("\xc2"));
-  EXPECT_FALSE(isValidIdentifier("\xff"));
-  EXPECT_FALSE(isValidIdentifier("$"));
-  EXPECT_FALSE(isValidIdentifier("1"));
-
-  

[PATCH] D108308: [WIP] Cleanup identifier parsing.

2021-08-25 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added inline comments.



Comment at: clang/include/clang/Lex/Lexer.h:701
 
   // Helper functions to lex the remainder of a token of the specific type.
+  bool LexIdentifierContinue(Token , const char *CurPtr);

Something like this then?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108308/new/

https://reviews.llvm.org/D108308

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108308: [WIP] Cleanup identifier parsing.

2021-08-25 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added inline comments.



Comment at: clang/include/clang/Lex/Lexer.h:702
   // Helper functions to lex the remainder of a token of the specific type.
-  bool LexIdentifier (Token , const char *CurPtr);
+  bool LexIdentifierContinue(Token , const char *CurPtr);
   bool LexNumericConstant(Token , const char *CurPtr);

aaron.ballman wrote:
> Should this be `LexUnicodeIdentifierContinue()`? If so, perhaps it can also 
> be moved up to line 578 so it's near the "start" function?
> 
> Or does this function handle both Unicode and ASCII identifiers? If so, the 
> comments could probably be updated.
This handles all identifiers - after the first codepoint has been parsed - 
Which comment are you referring to?



Comment at: clang/lib/Lex/Lexer.cpp:1758
+bool Lexer::LexIdentifierContinue(Token , const char *CurPtr) {
+  // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
+  unsigned Size;

aaron.ballman wrote:
> Is the comment here still accurate? Might be worth rewriting in prose rather 
> than regex?
I don't think the comment was accurate before, I'll find somehing better!


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108308/new/

https://reviews.llvm.org/D108308

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108308: [WIP] Cleanup identifier parsing.

2021-08-25 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added a reviewer: rsmith.
aaron.ballman added a subscriber: rsmith.
aaron.ballman added a comment.

In general, I'm in favor of these changes. They help identify (pun *totally* 
intended) where we're improperly expecting ASCII identifiers in places, which 
can hopefully be addressed in follow-up work. @rsmith, do you have any concerns 
with this direction?

Can you remove the [WIP] from the title so it's clear that this is no longer in 
progress? Also, I'd recommend slapping an NFC in the title somewhere to make it 
clear there's no functional changes intended.




Comment at: clang/include/clang/Lex/Lexer.h:702
   // Helper functions to lex the remainder of a token of the specific type.
-  bool LexIdentifier (Token , const char *CurPtr);
+  bool LexIdentifierContinue(Token , const char *CurPtr);
   bool LexNumericConstant(Token , const char *CurPtr);

Should this be `LexUnicodeIdentifierContinue()`? If so, perhaps it can also be 
moved up to line 578 so it's near the "start" function?

Or does this function handle both Unicode and ASCII identifiers? If so, the 
comments could probably be updated.



Comment at: clang/lib/Lex/Lexer.cpp:1758
+bool Lexer::LexIdentifierContinue(Token , const char *CurPtr) {
+  // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$]
+  unsigned Size;

Is the comment here still accurate? Might be worth rewriting in prose rather 
than regex?



Comment at: clang/lib/Lex/Lexer.cpp:1762
+unsigned char C = *CurPtr;
+// Fast path
+if (isAsciiIdentifierContinue(C)) {





Comment at: clang/lib/Lex/Lexer.cpp:1767
+}
+// Slow path: handle trigraph, unicode codepoints, UCNs
+C = getCharAndSize(CurPtr, Size);





Comment at: clang/lib/Lex/Lexer.cpp:1783-1788
+if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) {
   continue;
-} else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
-  C = getCharAndSize(CurPtr, Size);
+}
+if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) {
   continue;
 }





Comment at: clang/lib/Lex/Lexer.cpp:1789
 }
+// Neither an expected unicode codepoint nor a UCN
+break;




Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108308/new/

https://reviews.llvm.org/D108308

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108308: [WIP] Cleanup identifier parsing.

2021-08-18 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin added a comment.

@aaron.ballman Let me know what you think.
The PR does not contain new behavior, only renames and refactor the function 
lexing identifiers. I ran the build a few times and did not measure performance 
differences on my system. The code should behave exactly the same except with 
one loop instead of 3.
I also moved the 2 identifier lexing functions near one another to make it 
easier to understand.

This makes it apparent that some places in tools, maybe header names or module 
parsing too only check for ASCII identifiers when they may want to check for 
Unicode, This is not addressed here.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108308/new/

https://reviews.llvm.org/D108308

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D108308: [WIP] Cleanup identifier parsing.

2021-08-18 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin updated this revision to Diff 367326.
cor3ntin added a comment.

Remove file committed accidentally


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108308/new/

https://reviews.llvm.org/D108308

Files:
  clang-tools-extra/clang-include-fixer/IncludeFixer.cpp
  clang-tools-extra/clang-tidy/google/IntegerTypesCheck.cpp
  clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp
  clang-tools-extra/clangd/CodeComplete.cpp
  clang-tools-extra/clangd/SourceCode.cpp
  clang-tools-extra/clangd/refactor/Rename.cpp
  clang/include/clang/Basic/CharInfo.h
  clang/include/clang/Lex/Lexer.h
  clang/lib/ARCMigrate/ObjCMT.cpp
  clang/lib/ARCMigrate/TransUnbridgedCasts.cpp
  clang/lib/AST/MicrosoftMangle.cpp
  clang/lib/Basic/Module.cpp
  clang/lib/Edit/EditedSource.cpp
  clang/lib/Frontend/LayoutOverrideSource.cpp
  clang/lib/Frontend/Rewrite/FrontendActions.cpp
  clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
  clang/lib/Lex/Lexer.cpp
  clang/lib/Lex/ModuleMap.cpp
  clang/lib/Sema/SemaAvailability.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/lib/Sema/SemaExprObjC.cpp
  clang/lib/Sema/SemaType.cpp
  clang/lib/Tooling/Transformer/Parsing.cpp
  clang/unittests/Basic/CharInfoTest.cpp

Index: clang/unittests/Basic/CharInfoTest.cpp
===
--- clang/unittests/Basic/CharInfoTest.cpp
+++ clang/unittests/Basic/CharInfoTest.cpp
@@ -50,44 +50,44 @@
   EXPECT_FALSE(isASCII('\xff'));
 }
 
-TEST(CharInfoTest, isIdentifierHead) {
-  EXPECT_TRUE(isIdentifierHead('a'));
-  EXPECT_TRUE(isIdentifierHead('A'));
-  EXPECT_TRUE(isIdentifierHead('z'));
-  EXPECT_TRUE(isIdentifierHead('Z'));
-  EXPECT_TRUE(isIdentifierHead('_'));
-
-  EXPECT_FALSE(isIdentifierHead('0'));
-  EXPECT_FALSE(isIdentifierHead('.'));
-  EXPECT_FALSE(isIdentifierHead('`'));
-  EXPECT_FALSE(isIdentifierHead('\0'));
-
-  EXPECT_FALSE(isIdentifierHead('$'));
-  EXPECT_TRUE(isIdentifierHead('$', /*AllowDollar=*/true));
-
-  EXPECT_FALSE(isIdentifierHead('\x80'));
-  EXPECT_FALSE(isIdentifierHead('\xc2'));
-  EXPECT_FALSE(isIdentifierHead('\xff'));
+TEST(CharInfoTest, isAsciiIdentifierStart) {
+  EXPECT_TRUE(isAsciiIdentifierStart('a'));
+  EXPECT_TRUE(isAsciiIdentifierStart('A'));
+  EXPECT_TRUE(isAsciiIdentifierStart('z'));
+  EXPECT_TRUE(isAsciiIdentifierStart('Z'));
+  EXPECT_TRUE(isAsciiIdentifierStart('_'));
+
+  EXPECT_FALSE(isAsciiIdentifierStart('0'));
+  EXPECT_FALSE(isAsciiIdentifierStart('.'));
+  EXPECT_FALSE(isAsciiIdentifierStart('`'));
+  EXPECT_FALSE(isAsciiIdentifierStart('\0'));
+
+  EXPECT_FALSE(isAsciiIdentifierStart('$'));
+  EXPECT_TRUE(isAsciiIdentifierStart('$', /*AllowDollar=*/true));
+
+  EXPECT_FALSE(isAsciiIdentifierStart('\x80'));
+  EXPECT_FALSE(isAsciiIdentifierStart('\xc2'));
+  EXPECT_FALSE(isAsciiIdentifierStart('\xff'));
 }
 
-TEST(CharInfoTest, isIdentifierBody) {
-  EXPECT_TRUE(isIdentifierBody('a'));
-  EXPECT_TRUE(isIdentifierBody('A'));
-  EXPECT_TRUE(isIdentifierBody('z'));
-  EXPECT_TRUE(isIdentifierBody('Z'));
-  EXPECT_TRUE(isIdentifierBody('_'));
+TEST(CharInfoTest, isAsciiIdentifierContinue) {
+  EXPECT_TRUE(isAsciiIdentifierContinue('a'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('A'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('z'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('Z'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('_'));
 
-  EXPECT_TRUE(isIdentifierBody('0'));
-  EXPECT_FALSE(isIdentifierBody('.'));
-  EXPECT_FALSE(isIdentifierBody('`'));
-  EXPECT_FALSE(isIdentifierBody('\0'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('0'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('.'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('`'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('\0'));
 
-  EXPECT_FALSE(isIdentifierBody('$'));
-  EXPECT_TRUE(isIdentifierBody('$', /*AllowDollar=*/true));
+  EXPECT_FALSE(isAsciiIdentifierContinue('$'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('$', /*AllowDollar=*/true));
 
-  EXPECT_FALSE(isIdentifierBody('\x80'));
-  EXPECT_FALSE(isIdentifierBody('\xc2'));
-  EXPECT_FALSE(isIdentifierBody('\xff'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('\x80'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('\xc2'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('\xff'));
 }
 
 TEST(CharInfoTest, isHorizontalWhitespace) {
@@ -413,91 +413,91 @@
   EXPECT_EQ('\0', toUppercase('\0'));
 }
 
-TEST(CharInfoTest, isValidIdentifier) {
-  EXPECT_FALSE(isValidIdentifier(""));
+TEST(CharInfoTest, isValidAsciiIdentifier) {
+  EXPECT_FALSE(isValidAsciiIdentifier(""));
 
   // 1 character
-  EXPECT_FALSE(isValidIdentifier("."));
-  EXPECT_FALSE(isValidIdentifier("\n"));
-  EXPECT_FALSE(isValidIdentifier(" "));
-  EXPECT_FALSE(isValidIdentifier("\x80"));
-  EXPECT_FALSE(isValidIdentifier("\xc2"));
-  EXPECT_FALSE(isValidIdentifier("\xff"));
-  EXPECT_FALSE(isValidIdentifier("$"));
-  EXPECT_FALSE(isValidIdentifier("1"));
-
-  EXPECT_TRUE(isValidIdentifier("_"));
-  EXPECT_TRUE(isValidIdentifier("a"));
-  

[PATCH] D108308: [WIP] Cleanup identifier parsing.

2021-08-18 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin updated this revision to Diff 367325.
cor3ntin added a comment.

Looks better in lower case after all


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108308/new/

https://reviews.llvm.org/D108308

Files:
  clang-tools-extra/clang-include-fixer/IncludeFixer.cpp
  clang-tools-extra/clang-tidy/google/IntegerTypesCheck.cpp
  clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp
  clang-tools-extra/clangd/CodeComplete.cpp
  clang-tools-extra/clangd/SourceCode.cpp
  clang-tools-extra/clangd/refactor/Rename.cpp
  clang/include/clang/Basic/CharInfo.h
  clang/include/clang/Lex/Lexer.h
  clang/lib/ARCMigrate/ObjCMT.cpp
  clang/lib/ARCMigrate/TransUnbridgedCasts.cpp
  clang/lib/AST/MicrosoftMangle.cpp
  clang/lib/Basic/Module.cpp
  clang/lib/Edit/EditedSource.cpp
  clang/lib/Frontend/LayoutOverrideSource.cpp
  clang/lib/Frontend/Rewrite/FrontendActions.cpp
  clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
  clang/lib/Lex/Lexer.cpp
  clang/lib/Lex/ModuleMap.cpp
  clang/lib/Sema/SemaAvailability.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/lib/Sema/SemaExprObjC.cpp
  clang/lib/Sema/SemaType.cpp
  clang/lib/Tooling/Transformer/Parsing.cpp
  clang/unittests/Basic/CharInfoTest.cpp
  llvm/cmake/modules/CheckCompilerVersion.cmake

Index: llvm/cmake/modules/CheckCompilerVersion.cmake
===
--- llvm/cmake/modules/CheckCompilerVersion.cmake
+++ llvm/cmake/modules/CheckCompilerVersion.cmake
@@ -94,7 +94,7 @@
 "
   LLVM_LIBSTDCXX_MIN)
 if(NOT LLVM_LIBSTDCXX_MIN)
-  message(FATAL_ERROR "libstdc++ version must be at least ${GCC_MIN}.")
+ # message(FATAL_ERROR "libstdc++ version must be at least ${GCC_MIN}.")
 endif()
 # Test for libstdc++ version of at least 5.1 by checking for std::iostream_category().
 # Note: We should check _GLIBCXX_RELEASE when possible (i.e., for GCC 7.1 and up).
Index: clang/unittests/Basic/CharInfoTest.cpp
===
--- clang/unittests/Basic/CharInfoTest.cpp
+++ clang/unittests/Basic/CharInfoTest.cpp
@@ -50,44 +50,44 @@
   EXPECT_FALSE(isASCII('\xff'));
 }
 
-TEST(CharInfoTest, isIdentifierHead) {
-  EXPECT_TRUE(isIdentifierHead('a'));
-  EXPECT_TRUE(isIdentifierHead('A'));
-  EXPECT_TRUE(isIdentifierHead('z'));
-  EXPECT_TRUE(isIdentifierHead('Z'));
-  EXPECT_TRUE(isIdentifierHead('_'));
-
-  EXPECT_FALSE(isIdentifierHead('0'));
-  EXPECT_FALSE(isIdentifierHead('.'));
-  EXPECT_FALSE(isIdentifierHead('`'));
-  EXPECT_FALSE(isIdentifierHead('\0'));
-
-  EXPECT_FALSE(isIdentifierHead('$'));
-  EXPECT_TRUE(isIdentifierHead('$', /*AllowDollar=*/true));
-
-  EXPECT_FALSE(isIdentifierHead('\x80'));
-  EXPECT_FALSE(isIdentifierHead('\xc2'));
-  EXPECT_FALSE(isIdentifierHead('\xff'));
+TEST(CharInfoTest, isAsciiIdentifierStart) {
+  EXPECT_TRUE(isAsciiIdentifierStart('a'));
+  EXPECT_TRUE(isAsciiIdentifierStart('A'));
+  EXPECT_TRUE(isAsciiIdentifierStart('z'));
+  EXPECT_TRUE(isAsciiIdentifierStart('Z'));
+  EXPECT_TRUE(isAsciiIdentifierStart('_'));
+
+  EXPECT_FALSE(isAsciiIdentifierStart('0'));
+  EXPECT_FALSE(isAsciiIdentifierStart('.'));
+  EXPECT_FALSE(isAsciiIdentifierStart('`'));
+  EXPECT_FALSE(isAsciiIdentifierStart('\0'));
+
+  EXPECT_FALSE(isAsciiIdentifierStart('$'));
+  EXPECT_TRUE(isAsciiIdentifierStart('$', /*AllowDollar=*/true));
+
+  EXPECT_FALSE(isAsciiIdentifierStart('\x80'));
+  EXPECT_FALSE(isAsciiIdentifierStart('\xc2'));
+  EXPECT_FALSE(isAsciiIdentifierStart('\xff'));
 }
 
-TEST(CharInfoTest, isIdentifierBody) {
-  EXPECT_TRUE(isIdentifierBody('a'));
-  EXPECT_TRUE(isIdentifierBody('A'));
-  EXPECT_TRUE(isIdentifierBody('z'));
-  EXPECT_TRUE(isIdentifierBody('Z'));
-  EXPECT_TRUE(isIdentifierBody('_'));
+TEST(CharInfoTest, isAsciiIdentifierContinue) {
+  EXPECT_TRUE(isAsciiIdentifierContinue('a'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('A'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('z'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('Z'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('_'));
 
-  EXPECT_TRUE(isIdentifierBody('0'));
-  EXPECT_FALSE(isIdentifierBody('.'));
-  EXPECT_FALSE(isIdentifierBody('`'));
-  EXPECT_FALSE(isIdentifierBody('\0'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('0'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('.'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('`'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('\0'));
 
-  EXPECT_FALSE(isIdentifierBody('$'));
-  EXPECT_TRUE(isIdentifierBody('$', /*AllowDollar=*/true));
+  EXPECT_FALSE(isAsciiIdentifierContinue('$'));
+  EXPECT_TRUE(isAsciiIdentifierContinue('$', /*AllowDollar=*/true));
 
-  EXPECT_FALSE(isIdentifierBody('\x80'));
-  EXPECT_FALSE(isIdentifierBody('\xc2'));
-  EXPECT_FALSE(isIdentifierBody('\xff'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('\x80'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('\xc2'));
+  EXPECT_FALSE(isAsciiIdentifierContinue('\xff'));
 }
 
 TEST(CharInfoTest, 

[PATCH] D108308: [WIP] Cleanup identifier parsing.

2021-08-18 Thread Corentin Jabot via Phabricator via cfe-commits
cor3ntin updated this revision to Diff 367322.
cor3ntin added a comment.
Herald added subscribers: llvm-commits, mgorny.
Herald added a project: LLVM.

Spell ASCII in upper case


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D108308/new/

https://reviews.llvm.org/D108308

Files:
  clang-tools-extra/clang-include-fixer/IncludeFixer.cpp
  clang-tools-extra/clang-tidy/google/IntegerTypesCheck.cpp
  clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp
  clang-tools-extra/clangd/CodeComplete.cpp
  clang-tools-extra/clangd/SourceCode.cpp
  clang-tools-extra/clangd/refactor/Rename.cpp
  clang/include/clang/Basic/CharInfo.h
  clang/include/clang/Lex/Lexer.h
  clang/lib/ARCMigrate/ObjCMT.cpp
  clang/lib/ARCMigrate/TransUnbridgedCasts.cpp
  clang/lib/AST/MicrosoftMangle.cpp
  clang/lib/Basic/Module.cpp
  clang/lib/Edit/EditedSource.cpp
  clang/lib/Frontend/LayoutOverrideSource.cpp
  clang/lib/Frontend/Rewrite/FrontendActions.cpp
  clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
  clang/lib/Lex/Lexer.cpp
  clang/lib/Lex/ModuleMap.cpp
  clang/lib/Sema/SemaAvailability.cpp
  clang/lib/Sema/SemaDeclAttr.cpp
  clang/lib/Sema/SemaExprObjC.cpp
  clang/lib/Sema/SemaType.cpp
  clang/lib/Tooling/Transformer/Parsing.cpp
  clang/unittests/Basic/CharInfoTest.cpp
  llvm/cmake/modules/CheckCompilerVersion.cmake

Index: llvm/cmake/modules/CheckCompilerVersion.cmake
===
--- llvm/cmake/modules/CheckCompilerVersion.cmake
+++ llvm/cmake/modules/CheckCompilerVersion.cmake
@@ -94,7 +94,7 @@
 "
   LLVM_LIBSTDCXX_MIN)
 if(NOT LLVM_LIBSTDCXX_MIN)
-  message(FATAL_ERROR "libstdc++ version must be at least ${GCC_MIN}.")
+ # message(FATAL_ERROR "libstdc++ version must be at least ${GCC_MIN}.")
 endif()
 # Test for libstdc++ version of at least 5.1 by checking for std::iostream_category().
 # Note: We should check _GLIBCXX_RELEASE when possible (i.e., for GCC 7.1 and up).
Index: clang/unittests/Basic/CharInfoTest.cpp
===
--- clang/unittests/Basic/CharInfoTest.cpp
+++ clang/unittests/Basic/CharInfoTest.cpp
@@ -50,44 +50,44 @@
   EXPECT_FALSE(isASCII('\xff'));
 }
 
-TEST(CharInfoTest, isIdentifierHead) {
-  EXPECT_TRUE(isIdentifierHead('a'));
-  EXPECT_TRUE(isIdentifierHead('A'));
-  EXPECT_TRUE(isIdentifierHead('z'));
-  EXPECT_TRUE(isIdentifierHead('Z'));
-  EXPECT_TRUE(isIdentifierHead('_'));
-
-  EXPECT_FALSE(isIdentifierHead('0'));
-  EXPECT_FALSE(isIdentifierHead('.'));
-  EXPECT_FALSE(isIdentifierHead('`'));
-  EXPECT_FALSE(isIdentifierHead('\0'));
-
-  EXPECT_FALSE(isIdentifierHead('$'));
-  EXPECT_TRUE(isIdentifierHead('$', /*AllowDollar=*/true));
-
-  EXPECT_FALSE(isIdentifierHead('\x80'));
-  EXPECT_FALSE(isIdentifierHead('\xc2'));
-  EXPECT_FALSE(isIdentifierHead('\xff'));
+TEST(CharInfoTest, isASCIIIdentifierStart) {
+  EXPECT_TRUE(isASCIIIdentifierStart('a'));
+  EXPECT_TRUE(isASCIIIdentifierStart('A'));
+  EXPECT_TRUE(isASCIIIdentifierStart('z'));
+  EXPECT_TRUE(isASCIIIdentifierStart('Z'));
+  EXPECT_TRUE(isASCIIIdentifierStart('_'));
+
+  EXPECT_FALSE(isASCIIIdentifierStart('0'));
+  EXPECT_FALSE(isASCIIIdentifierStart('.'));
+  EXPECT_FALSE(isASCIIIdentifierStart('`'));
+  EXPECT_FALSE(isASCIIIdentifierStart('\0'));
+
+  EXPECT_FALSE(isASCIIIdentifierStart('$'));
+  EXPECT_TRUE(isASCIIIdentifierStart('$', /*AllowDollar=*/true));
+
+  EXPECT_FALSE(isASCIIIdentifierStart('\x80'));
+  EXPECT_FALSE(isASCIIIdentifierStart('\xc2'));
+  EXPECT_FALSE(isASCIIIdentifierStart('\xff'));
 }
 
-TEST(CharInfoTest, isIdentifierBody) {
-  EXPECT_TRUE(isIdentifierBody('a'));
-  EXPECT_TRUE(isIdentifierBody('A'));
-  EXPECT_TRUE(isIdentifierBody('z'));
-  EXPECT_TRUE(isIdentifierBody('Z'));
-  EXPECT_TRUE(isIdentifierBody('_'));
+TEST(CharInfoTest, isASCIIIdentifierContinue) {
+  EXPECT_TRUE(isASCIIIdentifierContinue('a'));
+  EXPECT_TRUE(isASCIIIdentifierContinue('A'));
+  EXPECT_TRUE(isASCIIIdentifierContinue('z'));
+  EXPECT_TRUE(isASCIIIdentifierContinue('Z'));
+  EXPECT_TRUE(isASCIIIdentifierContinue('_'));
 
-  EXPECT_TRUE(isIdentifierBody('0'));
-  EXPECT_FALSE(isIdentifierBody('.'));
-  EXPECT_FALSE(isIdentifierBody('`'));
-  EXPECT_FALSE(isIdentifierBody('\0'));
+  EXPECT_TRUE(isASCIIIdentifierContinue('0'));
+  EXPECT_FALSE(isASCIIIdentifierContinue('.'));
+  EXPECT_FALSE(isASCIIIdentifierContinue('`'));
+  EXPECT_FALSE(isASCIIIdentifierContinue('\0'));
 
-  EXPECT_FALSE(isIdentifierBody('$'));
-  EXPECT_TRUE(isIdentifierBody('$', /*AllowDollar=*/true));
+  EXPECT_FALSE(isASCIIIdentifierContinue('$'));
+  EXPECT_TRUE(isASCIIIdentifierContinue('$', /*AllowDollar=*/true));
 
-  EXPECT_FALSE(isIdentifierBody('\x80'));
-  EXPECT_FALSE(isIdentifierBody('\xc2'));
-  EXPECT_FALSE(isIdentifierBody('\xff'));
+  EXPECT_FALSE(isASCIIIdentifierContinue('\x80'));
+  EXPECT_FALSE(isASCIIIdentifierContinue('\xc2'));
+