[PATCH] D108308: [WIP] Cleanup identifier parsing.
cor3ntin marked 4 inline comments as done. cor3ntin added inline comments. Comment at: clang/include/clang/Lex/Lexer.h:702 // Helper functions to lex the remainder of a token of the specific type. - bool LexIdentifier (Token , const char *CurPtr); + bool LexIdentifierContinue(Token , const char *CurPtr); bool LexNumericConstant(Token , const char *CurPtr); cor3ntin wrote: > aaron.ballman wrote: > > Should this be `LexUnicodeIdentifierContinue()`? If so, perhaps it can also > > be moved up to line 578 so it's near the "start" function? > > > > Or does this function handle both Unicode and ASCII identifiers? If so, the > > comments could probably be updated. > This handles all identifiers - after the first codepoint has been parsed - > Which comment are you referring to? I kept the comment as is - because it applies to all function underneath, but added a comment in the definition in the cpp Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D108308/new/ https://reviews.llvm.org/D108308 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D108308: [WIP] Cleanup identifier parsing.
cor3ntin updated this revision to Diff 368746. cor3ntin added a comment. Fix comments following Aaron's feedback, remove braces deemed unecessary by the guidelines Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D108308/new/ https://reviews.llvm.org/D108308 Files: clang-tools-extra/clang-include-fixer/IncludeFixer.cpp clang-tools-extra/clang-tidy/google/IntegerTypesCheck.cpp clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp clang-tools-extra/clangd/CodeComplete.cpp clang-tools-extra/clangd/SourceCode.cpp clang-tools-extra/clangd/refactor/Rename.cpp clang/include/clang/Basic/CharInfo.h clang/include/clang/Lex/Lexer.h clang/lib/ARCMigrate/ObjCMT.cpp clang/lib/ARCMigrate/TransUnbridgedCasts.cpp clang/lib/AST/MicrosoftMangle.cpp clang/lib/Basic/Module.cpp clang/lib/Edit/EditedSource.cpp clang/lib/Frontend/LayoutOverrideSource.cpp clang/lib/Frontend/Rewrite/FrontendActions.cpp clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp clang/lib/Lex/Lexer.cpp clang/lib/Lex/ModuleMap.cpp clang/lib/Sema/SemaAvailability.cpp clang/lib/Sema/SemaDeclAttr.cpp clang/lib/Sema/SemaExprObjC.cpp clang/lib/Sema/SemaType.cpp clang/lib/Tooling/Transformer/Parsing.cpp clang/unittests/Basic/CharInfoTest.cpp Index: clang/unittests/Basic/CharInfoTest.cpp === --- clang/unittests/Basic/CharInfoTest.cpp +++ clang/unittests/Basic/CharInfoTest.cpp @@ -50,44 +50,44 @@ EXPECT_FALSE(isASCII('\xff')); } -TEST(CharInfoTest, isIdentifierHead) { - EXPECT_TRUE(isIdentifierHead('a')); - EXPECT_TRUE(isIdentifierHead('A')); - EXPECT_TRUE(isIdentifierHead('z')); - EXPECT_TRUE(isIdentifierHead('Z')); - EXPECT_TRUE(isIdentifierHead('_')); - - EXPECT_FALSE(isIdentifierHead('0')); - EXPECT_FALSE(isIdentifierHead('.')); - EXPECT_FALSE(isIdentifierHead('`')); - EXPECT_FALSE(isIdentifierHead('\0')); - - EXPECT_FALSE(isIdentifierHead('$')); - EXPECT_TRUE(isIdentifierHead('$', /*AllowDollar=*/true)); - - EXPECT_FALSE(isIdentifierHead('\x80')); - EXPECT_FALSE(isIdentifierHead('\xc2')); - EXPECT_FALSE(isIdentifierHead('\xff')); +TEST(CharInfoTest, isAsciiIdentifierStart) { + EXPECT_TRUE(isAsciiIdentifierStart('a')); + EXPECT_TRUE(isAsciiIdentifierStart('A')); + EXPECT_TRUE(isAsciiIdentifierStart('z')); + EXPECT_TRUE(isAsciiIdentifierStart('Z')); + EXPECT_TRUE(isAsciiIdentifierStart('_')); + + EXPECT_FALSE(isAsciiIdentifierStart('0')); + EXPECT_FALSE(isAsciiIdentifierStart('.')); + EXPECT_FALSE(isAsciiIdentifierStart('`')); + EXPECT_FALSE(isAsciiIdentifierStart('\0')); + + EXPECT_FALSE(isAsciiIdentifierStart('$')); + EXPECT_TRUE(isAsciiIdentifierStart('$', /*AllowDollar=*/true)); + + EXPECT_FALSE(isAsciiIdentifierStart('\x80')); + EXPECT_FALSE(isAsciiIdentifierStart('\xc2')); + EXPECT_FALSE(isAsciiIdentifierStart('\xff')); } -TEST(CharInfoTest, isIdentifierBody) { - EXPECT_TRUE(isIdentifierBody('a')); - EXPECT_TRUE(isIdentifierBody('A')); - EXPECT_TRUE(isIdentifierBody('z')); - EXPECT_TRUE(isIdentifierBody('Z')); - EXPECT_TRUE(isIdentifierBody('_')); +TEST(CharInfoTest, isAsciiIdentifierContinue) { + EXPECT_TRUE(isAsciiIdentifierContinue('a')); + EXPECT_TRUE(isAsciiIdentifierContinue('A')); + EXPECT_TRUE(isAsciiIdentifierContinue('z')); + EXPECT_TRUE(isAsciiIdentifierContinue('Z')); + EXPECT_TRUE(isAsciiIdentifierContinue('_')); - EXPECT_TRUE(isIdentifierBody('0')); - EXPECT_FALSE(isIdentifierBody('.')); - EXPECT_FALSE(isIdentifierBody('`')); - EXPECT_FALSE(isIdentifierBody('\0')); + EXPECT_TRUE(isAsciiIdentifierContinue('0')); + EXPECT_FALSE(isAsciiIdentifierContinue('.')); + EXPECT_FALSE(isAsciiIdentifierContinue('`')); + EXPECT_FALSE(isAsciiIdentifierContinue('\0')); - EXPECT_FALSE(isIdentifierBody('$')); - EXPECT_TRUE(isIdentifierBody('$', /*AllowDollar=*/true)); + EXPECT_FALSE(isAsciiIdentifierContinue('$')); + EXPECT_TRUE(isAsciiIdentifierContinue('$', /*AllowDollar=*/true)); - EXPECT_FALSE(isIdentifierBody('\x80')); - EXPECT_FALSE(isIdentifierBody('\xc2')); - EXPECT_FALSE(isIdentifierBody('\xff')); + EXPECT_FALSE(isAsciiIdentifierContinue('\x80')); + EXPECT_FALSE(isAsciiIdentifierContinue('\xc2')); + EXPECT_FALSE(isAsciiIdentifierContinue('\xff')); } TEST(CharInfoTest, isHorizontalWhitespace) { @@ -413,91 +413,91 @@ EXPECT_EQ('\0', toUppercase('\0')); } -TEST(CharInfoTest, isValidIdentifier) { - EXPECT_FALSE(isValidIdentifier("")); +TEST(CharInfoTest, isValidAsciiIdentifier) { + EXPECT_FALSE(isValidAsciiIdentifier("")); // 1 character - EXPECT_FALSE(isValidIdentifier(".")); - EXPECT_FALSE(isValidIdentifier("\n")); - EXPECT_FALSE(isValidIdentifier(" ")); - EXPECT_FALSE(isValidIdentifier("\x80")); - EXPECT_FALSE(isValidIdentifier("\xc2")); - EXPECT_FALSE(isValidIdentifier("\xff")); - EXPECT_FALSE(isValidIdentifier("$")); - EXPECT_FALSE(isValidIdentifier("1")); - -
[PATCH] D108308: [WIP] Cleanup identifier parsing.
aaron.ballman added inline comments. Comment at: clang/include/clang/Lex/Lexer.h:701 // Helper functions to lex the remainder of a token of the specific type. + bool LexIdentifierContinue(Token , const char *CurPtr); Something like this then? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D108308/new/ https://reviews.llvm.org/D108308 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D108308: [WIP] Cleanup identifier parsing.
cor3ntin added inline comments. Comment at: clang/include/clang/Lex/Lexer.h:702 // Helper functions to lex the remainder of a token of the specific type. - bool LexIdentifier (Token , const char *CurPtr); + bool LexIdentifierContinue(Token , const char *CurPtr); bool LexNumericConstant(Token , const char *CurPtr); aaron.ballman wrote: > Should this be `LexUnicodeIdentifierContinue()`? If so, perhaps it can also > be moved up to line 578 so it's near the "start" function? > > Or does this function handle both Unicode and ASCII identifiers? If so, the > comments could probably be updated. This handles all identifiers - after the first codepoint has been parsed - Which comment are you referring to? Comment at: clang/lib/Lex/Lexer.cpp:1758 +bool Lexer::LexIdentifierContinue(Token , const char *CurPtr) { + // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$] + unsigned Size; aaron.ballman wrote: > Is the comment here still accurate? Might be worth rewriting in prose rather > than regex? I don't think the comment was accurate before, I'll find somehing better! Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D108308/new/ https://reviews.llvm.org/D108308 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D108308: [WIP] Cleanup identifier parsing.
aaron.ballman added a reviewer: rsmith. aaron.ballman added a subscriber: rsmith. aaron.ballman added a comment. In general, I'm in favor of these changes. They help identify (pun *totally* intended) where we're improperly expecting ASCII identifiers in places, which can hopefully be addressed in follow-up work. @rsmith, do you have any concerns with this direction? Can you remove the [WIP] from the title so it's clear that this is no longer in progress? Also, I'd recommend slapping an NFC in the title somewhere to make it clear there's no functional changes intended. Comment at: clang/include/clang/Lex/Lexer.h:702 // Helper functions to lex the remainder of a token of the specific type. - bool LexIdentifier (Token , const char *CurPtr); + bool LexIdentifierContinue(Token , const char *CurPtr); bool LexNumericConstant(Token , const char *CurPtr); Should this be `LexUnicodeIdentifierContinue()`? If so, perhaps it can also be moved up to line 578 so it's near the "start" function? Or does this function handle both Unicode and ASCII identifiers? If so, the comments could probably be updated. Comment at: clang/lib/Lex/Lexer.cpp:1758 +bool Lexer::LexIdentifierContinue(Token , const char *CurPtr) { + // Match [_A-Za-z0-9]*, we have already matched [_A-Za-z$] + unsigned Size; Is the comment here still accurate? Might be worth rewriting in prose rather than regex? Comment at: clang/lib/Lex/Lexer.cpp:1762 +unsigned char C = *CurPtr; +// Fast path +if (isAsciiIdentifierContinue(C)) { Comment at: clang/lib/Lex/Lexer.cpp:1767 +} +// Slow path: handle trigraph, unicode codepoints, UCNs +C = getCharAndSize(CurPtr, Size); Comment at: clang/lib/Lex/Lexer.cpp:1783-1788 +if (C == '\\' && tryConsumeIdentifierUCN(CurPtr, Size, Result)) { continue; -} else if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) { - C = getCharAndSize(CurPtr, Size); +} +if (!isASCII(C) && tryConsumeIdentifierUTF8Char(CurPtr)) { continue; } Comment at: clang/lib/Lex/Lexer.cpp:1789 } +// Neither an expected unicode codepoint nor a UCN +break; Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D108308/new/ https://reviews.llvm.org/D108308 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D108308: [WIP] Cleanup identifier parsing.
cor3ntin added a comment. @aaron.ballman Let me know what you think. The PR does not contain new behavior, only renames and refactor the function lexing identifiers. I ran the build a few times and did not measure performance differences on my system. The code should behave exactly the same except with one loop instead of 3. I also moved the 2 identifier lexing functions near one another to make it easier to understand. This makes it apparent that some places in tools, maybe header names or module parsing too only check for ASCII identifiers when they may want to check for Unicode, This is not addressed here. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D108308/new/ https://reviews.llvm.org/D108308 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D108308: [WIP] Cleanup identifier parsing.
cor3ntin updated this revision to Diff 367326. cor3ntin added a comment. Remove file committed accidentally Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D108308/new/ https://reviews.llvm.org/D108308 Files: clang-tools-extra/clang-include-fixer/IncludeFixer.cpp clang-tools-extra/clang-tidy/google/IntegerTypesCheck.cpp clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp clang-tools-extra/clangd/CodeComplete.cpp clang-tools-extra/clangd/SourceCode.cpp clang-tools-extra/clangd/refactor/Rename.cpp clang/include/clang/Basic/CharInfo.h clang/include/clang/Lex/Lexer.h clang/lib/ARCMigrate/ObjCMT.cpp clang/lib/ARCMigrate/TransUnbridgedCasts.cpp clang/lib/AST/MicrosoftMangle.cpp clang/lib/Basic/Module.cpp clang/lib/Edit/EditedSource.cpp clang/lib/Frontend/LayoutOverrideSource.cpp clang/lib/Frontend/Rewrite/FrontendActions.cpp clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp clang/lib/Lex/Lexer.cpp clang/lib/Lex/ModuleMap.cpp clang/lib/Sema/SemaAvailability.cpp clang/lib/Sema/SemaDeclAttr.cpp clang/lib/Sema/SemaExprObjC.cpp clang/lib/Sema/SemaType.cpp clang/lib/Tooling/Transformer/Parsing.cpp clang/unittests/Basic/CharInfoTest.cpp Index: clang/unittests/Basic/CharInfoTest.cpp === --- clang/unittests/Basic/CharInfoTest.cpp +++ clang/unittests/Basic/CharInfoTest.cpp @@ -50,44 +50,44 @@ EXPECT_FALSE(isASCII('\xff')); } -TEST(CharInfoTest, isIdentifierHead) { - EXPECT_TRUE(isIdentifierHead('a')); - EXPECT_TRUE(isIdentifierHead('A')); - EXPECT_TRUE(isIdentifierHead('z')); - EXPECT_TRUE(isIdentifierHead('Z')); - EXPECT_TRUE(isIdentifierHead('_')); - - EXPECT_FALSE(isIdentifierHead('0')); - EXPECT_FALSE(isIdentifierHead('.')); - EXPECT_FALSE(isIdentifierHead('`')); - EXPECT_FALSE(isIdentifierHead('\0')); - - EXPECT_FALSE(isIdentifierHead('$')); - EXPECT_TRUE(isIdentifierHead('$', /*AllowDollar=*/true)); - - EXPECT_FALSE(isIdentifierHead('\x80')); - EXPECT_FALSE(isIdentifierHead('\xc2')); - EXPECT_FALSE(isIdentifierHead('\xff')); +TEST(CharInfoTest, isAsciiIdentifierStart) { + EXPECT_TRUE(isAsciiIdentifierStart('a')); + EXPECT_TRUE(isAsciiIdentifierStart('A')); + EXPECT_TRUE(isAsciiIdentifierStart('z')); + EXPECT_TRUE(isAsciiIdentifierStart('Z')); + EXPECT_TRUE(isAsciiIdentifierStart('_')); + + EXPECT_FALSE(isAsciiIdentifierStart('0')); + EXPECT_FALSE(isAsciiIdentifierStart('.')); + EXPECT_FALSE(isAsciiIdentifierStart('`')); + EXPECT_FALSE(isAsciiIdentifierStart('\0')); + + EXPECT_FALSE(isAsciiIdentifierStart('$')); + EXPECT_TRUE(isAsciiIdentifierStart('$', /*AllowDollar=*/true)); + + EXPECT_FALSE(isAsciiIdentifierStart('\x80')); + EXPECT_FALSE(isAsciiIdentifierStart('\xc2')); + EXPECT_FALSE(isAsciiIdentifierStart('\xff')); } -TEST(CharInfoTest, isIdentifierBody) { - EXPECT_TRUE(isIdentifierBody('a')); - EXPECT_TRUE(isIdentifierBody('A')); - EXPECT_TRUE(isIdentifierBody('z')); - EXPECT_TRUE(isIdentifierBody('Z')); - EXPECT_TRUE(isIdentifierBody('_')); +TEST(CharInfoTest, isAsciiIdentifierContinue) { + EXPECT_TRUE(isAsciiIdentifierContinue('a')); + EXPECT_TRUE(isAsciiIdentifierContinue('A')); + EXPECT_TRUE(isAsciiIdentifierContinue('z')); + EXPECT_TRUE(isAsciiIdentifierContinue('Z')); + EXPECT_TRUE(isAsciiIdentifierContinue('_')); - EXPECT_TRUE(isIdentifierBody('0')); - EXPECT_FALSE(isIdentifierBody('.')); - EXPECT_FALSE(isIdentifierBody('`')); - EXPECT_FALSE(isIdentifierBody('\0')); + EXPECT_TRUE(isAsciiIdentifierContinue('0')); + EXPECT_FALSE(isAsciiIdentifierContinue('.')); + EXPECT_FALSE(isAsciiIdentifierContinue('`')); + EXPECT_FALSE(isAsciiIdentifierContinue('\0')); - EXPECT_FALSE(isIdentifierBody('$')); - EXPECT_TRUE(isIdentifierBody('$', /*AllowDollar=*/true)); + EXPECT_FALSE(isAsciiIdentifierContinue('$')); + EXPECT_TRUE(isAsciiIdentifierContinue('$', /*AllowDollar=*/true)); - EXPECT_FALSE(isIdentifierBody('\x80')); - EXPECT_FALSE(isIdentifierBody('\xc2')); - EXPECT_FALSE(isIdentifierBody('\xff')); + EXPECT_FALSE(isAsciiIdentifierContinue('\x80')); + EXPECT_FALSE(isAsciiIdentifierContinue('\xc2')); + EXPECT_FALSE(isAsciiIdentifierContinue('\xff')); } TEST(CharInfoTest, isHorizontalWhitespace) { @@ -413,91 +413,91 @@ EXPECT_EQ('\0', toUppercase('\0')); } -TEST(CharInfoTest, isValidIdentifier) { - EXPECT_FALSE(isValidIdentifier("")); +TEST(CharInfoTest, isValidAsciiIdentifier) { + EXPECT_FALSE(isValidAsciiIdentifier("")); // 1 character - EXPECT_FALSE(isValidIdentifier(".")); - EXPECT_FALSE(isValidIdentifier("\n")); - EXPECT_FALSE(isValidIdentifier(" ")); - EXPECT_FALSE(isValidIdentifier("\x80")); - EXPECT_FALSE(isValidIdentifier("\xc2")); - EXPECT_FALSE(isValidIdentifier("\xff")); - EXPECT_FALSE(isValidIdentifier("$")); - EXPECT_FALSE(isValidIdentifier("1")); - - EXPECT_TRUE(isValidIdentifier("_")); - EXPECT_TRUE(isValidIdentifier("a")); -
[PATCH] D108308: [WIP] Cleanup identifier parsing.
cor3ntin updated this revision to Diff 367325. cor3ntin added a comment. Looks better in lower case after all Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D108308/new/ https://reviews.llvm.org/D108308 Files: clang-tools-extra/clang-include-fixer/IncludeFixer.cpp clang-tools-extra/clang-tidy/google/IntegerTypesCheck.cpp clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp clang-tools-extra/clangd/CodeComplete.cpp clang-tools-extra/clangd/SourceCode.cpp clang-tools-extra/clangd/refactor/Rename.cpp clang/include/clang/Basic/CharInfo.h clang/include/clang/Lex/Lexer.h clang/lib/ARCMigrate/ObjCMT.cpp clang/lib/ARCMigrate/TransUnbridgedCasts.cpp clang/lib/AST/MicrosoftMangle.cpp clang/lib/Basic/Module.cpp clang/lib/Edit/EditedSource.cpp clang/lib/Frontend/LayoutOverrideSource.cpp clang/lib/Frontend/Rewrite/FrontendActions.cpp clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp clang/lib/Lex/Lexer.cpp clang/lib/Lex/ModuleMap.cpp clang/lib/Sema/SemaAvailability.cpp clang/lib/Sema/SemaDeclAttr.cpp clang/lib/Sema/SemaExprObjC.cpp clang/lib/Sema/SemaType.cpp clang/lib/Tooling/Transformer/Parsing.cpp clang/unittests/Basic/CharInfoTest.cpp llvm/cmake/modules/CheckCompilerVersion.cmake Index: llvm/cmake/modules/CheckCompilerVersion.cmake === --- llvm/cmake/modules/CheckCompilerVersion.cmake +++ llvm/cmake/modules/CheckCompilerVersion.cmake @@ -94,7 +94,7 @@ " LLVM_LIBSTDCXX_MIN) if(NOT LLVM_LIBSTDCXX_MIN) - message(FATAL_ERROR "libstdc++ version must be at least ${GCC_MIN}.") + # message(FATAL_ERROR "libstdc++ version must be at least ${GCC_MIN}.") endif() # Test for libstdc++ version of at least 5.1 by checking for std::iostream_category(). # Note: We should check _GLIBCXX_RELEASE when possible (i.e., for GCC 7.1 and up). Index: clang/unittests/Basic/CharInfoTest.cpp === --- clang/unittests/Basic/CharInfoTest.cpp +++ clang/unittests/Basic/CharInfoTest.cpp @@ -50,44 +50,44 @@ EXPECT_FALSE(isASCII('\xff')); } -TEST(CharInfoTest, isIdentifierHead) { - EXPECT_TRUE(isIdentifierHead('a')); - EXPECT_TRUE(isIdentifierHead('A')); - EXPECT_TRUE(isIdentifierHead('z')); - EXPECT_TRUE(isIdentifierHead('Z')); - EXPECT_TRUE(isIdentifierHead('_')); - - EXPECT_FALSE(isIdentifierHead('0')); - EXPECT_FALSE(isIdentifierHead('.')); - EXPECT_FALSE(isIdentifierHead('`')); - EXPECT_FALSE(isIdentifierHead('\0')); - - EXPECT_FALSE(isIdentifierHead('$')); - EXPECT_TRUE(isIdentifierHead('$', /*AllowDollar=*/true)); - - EXPECT_FALSE(isIdentifierHead('\x80')); - EXPECT_FALSE(isIdentifierHead('\xc2')); - EXPECT_FALSE(isIdentifierHead('\xff')); +TEST(CharInfoTest, isAsciiIdentifierStart) { + EXPECT_TRUE(isAsciiIdentifierStart('a')); + EXPECT_TRUE(isAsciiIdentifierStart('A')); + EXPECT_TRUE(isAsciiIdentifierStart('z')); + EXPECT_TRUE(isAsciiIdentifierStart('Z')); + EXPECT_TRUE(isAsciiIdentifierStart('_')); + + EXPECT_FALSE(isAsciiIdentifierStart('0')); + EXPECT_FALSE(isAsciiIdentifierStart('.')); + EXPECT_FALSE(isAsciiIdentifierStart('`')); + EXPECT_FALSE(isAsciiIdentifierStart('\0')); + + EXPECT_FALSE(isAsciiIdentifierStart('$')); + EXPECT_TRUE(isAsciiIdentifierStart('$', /*AllowDollar=*/true)); + + EXPECT_FALSE(isAsciiIdentifierStart('\x80')); + EXPECT_FALSE(isAsciiIdentifierStart('\xc2')); + EXPECT_FALSE(isAsciiIdentifierStart('\xff')); } -TEST(CharInfoTest, isIdentifierBody) { - EXPECT_TRUE(isIdentifierBody('a')); - EXPECT_TRUE(isIdentifierBody('A')); - EXPECT_TRUE(isIdentifierBody('z')); - EXPECT_TRUE(isIdentifierBody('Z')); - EXPECT_TRUE(isIdentifierBody('_')); +TEST(CharInfoTest, isAsciiIdentifierContinue) { + EXPECT_TRUE(isAsciiIdentifierContinue('a')); + EXPECT_TRUE(isAsciiIdentifierContinue('A')); + EXPECT_TRUE(isAsciiIdentifierContinue('z')); + EXPECT_TRUE(isAsciiIdentifierContinue('Z')); + EXPECT_TRUE(isAsciiIdentifierContinue('_')); - EXPECT_TRUE(isIdentifierBody('0')); - EXPECT_FALSE(isIdentifierBody('.')); - EXPECT_FALSE(isIdentifierBody('`')); - EXPECT_FALSE(isIdentifierBody('\0')); + EXPECT_TRUE(isAsciiIdentifierContinue('0')); + EXPECT_FALSE(isAsciiIdentifierContinue('.')); + EXPECT_FALSE(isAsciiIdentifierContinue('`')); + EXPECT_FALSE(isAsciiIdentifierContinue('\0')); - EXPECT_FALSE(isIdentifierBody('$')); - EXPECT_TRUE(isIdentifierBody('$', /*AllowDollar=*/true)); + EXPECT_FALSE(isAsciiIdentifierContinue('$')); + EXPECT_TRUE(isAsciiIdentifierContinue('$', /*AllowDollar=*/true)); - EXPECT_FALSE(isIdentifierBody('\x80')); - EXPECT_FALSE(isIdentifierBody('\xc2')); - EXPECT_FALSE(isIdentifierBody('\xff')); + EXPECT_FALSE(isAsciiIdentifierContinue('\x80')); + EXPECT_FALSE(isAsciiIdentifierContinue('\xc2')); + EXPECT_FALSE(isAsciiIdentifierContinue('\xff')); } TEST(CharInfoTest,
[PATCH] D108308: [WIP] Cleanup identifier parsing.
cor3ntin updated this revision to Diff 367322. cor3ntin added a comment. Herald added subscribers: llvm-commits, mgorny. Herald added a project: LLVM. Spell ASCII in upper case Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D108308/new/ https://reviews.llvm.org/D108308 Files: clang-tools-extra/clang-include-fixer/IncludeFixer.cpp clang-tools-extra/clang-tidy/google/IntegerTypesCheck.cpp clang-tools-extra/clang-tidy/utils/RenamerClangTidyCheck.cpp clang-tools-extra/clangd/CodeComplete.cpp clang-tools-extra/clangd/SourceCode.cpp clang-tools-extra/clangd/refactor/Rename.cpp clang/include/clang/Basic/CharInfo.h clang/include/clang/Lex/Lexer.h clang/lib/ARCMigrate/ObjCMT.cpp clang/lib/ARCMigrate/TransUnbridgedCasts.cpp clang/lib/AST/MicrosoftMangle.cpp clang/lib/Basic/Module.cpp clang/lib/Edit/EditedSource.cpp clang/lib/Frontend/LayoutOverrideSource.cpp clang/lib/Frontend/Rewrite/FrontendActions.cpp clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp clang/lib/Lex/Lexer.cpp clang/lib/Lex/ModuleMap.cpp clang/lib/Sema/SemaAvailability.cpp clang/lib/Sema/SemaDeclAttr.cpp clang/lib/Sema/SemaExprObjC.cpp clang/lib/Sema/SemaType.cpp clang/lib/Tooling/Transformer/Parsing.cpp clang/unittests/Basic/CharInfoTest.cpp llvm/cmake/modules/CheckCompilerVersion.cmake Index: llvm/cmake/modules/CheckCompilerVersion.cmake === --- llvm/cmake/modules/CheckCompilerVersion.cmake +++ llvm/cmake/modules/CheckCompilerVersion.cmake @@ -94,7 +94,7 @@ " LLVM_LIBSTDCXX_MIN) if(NOT LLVM_LIBSTDCXX_MIN) - message(FATAL_ERROR "libstdc++ version must be at least ${GCC_MIN}.") + # message(FATAL_ERROR "libstdc++ version must be at least ${GCC_MIN}.") endif() # Test for libstdc++ version of at least 5.1 by checking for std::iostream_category(). # Note: We should check _GLIBCXX_RELEASE when possible (i.e., for GCC 7.1 and up). Index: clang/unittests/Basic/CharInfoTest.cpp === --- clang/unittests/Basic/CharInfoTest.cpp +++ clang/unittests/Basic/CharInfoTest.cpp @@ -50,44 +50,44 @@ EXPECT_FALSE(isASCII('\xff')); } -TEST(CharInfoTest, isIdentifierHead) { - EXPECT_TRUE(isIdentifierHead('a')); - EXPECT_TRUE(isIdentifierHead('A')); - EXPECT_TRUE(isIdentifierHead('z')); - EXPECT_TRUE(isIdentifierHead('Z')); - EXPECT_TRUE(isIdentifierHead('_')); - - EXPECT_FALSE(isIdentifierHead('0')); - EXPECT_FALSE(isIdentifierHead('.')); - EXPECT_FALSE(isIdentifierHead('`')); - EXPECT_FALSE(isIdentifierHead('\0')); - - EXPECT_FALSE(isIdentifierHead('$')); - EXPECT_TRUE(isIdentifierHead('$', /*AllowDollar=*/true)); - - EXPECT_FALSE(isIdentifierHead('\x80')); - EXPECT_FALSE(isIdentifierHead('\xc2')); - EXPECT_FALSE(isIdentifierHead('\xff')); +TEST(CharInfoTest, isASCIIIdentifierStart) { + EXPECT_TRUE(isASCIIIdentifierStart('a')); + EXPECT_TRUE(isASCIIIdentifierStart('A')); + EXPECT_TRUE(isASCIIIdentifierStart('z')); + EXPECT_TRUE(isASCIIIdentifierStart('Z')); + EXPECT_TRUE(isASCIIIdentifierStart('_')); + + EXPECT_FALSE(isASCIIIdentifierStart('0')); + EXPECT_FALSE(isASCIIIdentifierStart('.')); + EXPECT_FALSE(isASCIIIdentifierStart('`')); + EXPECT_FALSE(isASCIIIdentifierStart('\0')); + + EXPECT_FALSE(isASCIIIdentifierStart('$')); + EXPECT_TRUE(isASCIIIdentifierStart('$', /*AllowDollar=*/true)); + + EXPECT_FALSE(isASCIIIdentifierStart('\x80')); + EXPECT_FALSE(isASCIIIdentifierStart('\xc2')); + EXPECT_FALSE(isASCIIIdentifierStart('\xff')); } -TEST(CharInfoTest, isIdentifierBody) { - EXPECT_TRUE(isIdentifierBody('a')); - EXPECT_TRUE(isIdentifierBody('A')); - EXPECT_TRUE(isIdentifierBody('z')); - EXPECT_TRUE(isIdentifierBody('Z')); - EXPECT_TRUE(isIdentifierBody('_')); +TEST(CharInfoTest, isASCIIIdentifierContinue) { + EXPECT_TRUE(isASCIIIdentifierContinue('a')); + EXPECT_TRUE(isASCIIIdentifierContinue('A')); + EXPECT_TRUE(isASCIIIdentifierContinue('z')); + EXPECT_TRUE(isASCIIIdentifierContinue('Z')); + EXPECT_TRUE(isASCIIIdentifierContinue('_')); - EXPECT_TRUE(isIdentifierBody('0')); - EXPECT_FALSE(isIdentifierBody('.')); - EXPECT_FALSE(isIdentifierBody('`')); - EXPECT_FALSE(isIdentifierBody('\0')); + EXPECT_TRUE(isASCIIIdentifierContinue('0')); + EXPECT_FALSE(isASCIIIdentifierContinue('.')); + EXPECT_FALSE(isASCIIIdentifierContinue('`')); + EXPECT_FALSE(isASCIIIdentifierContinue('\0')); - EXPECT_FALSE(isIdentifierBody('$')); - EXPECT_TRUE(isIdentifierBody('$', /*AllowDollar=*/true)); + EXPECT_FALSE(isASCIIIdentifierContinue('$')); + EXPECT_TRUE(isASCIIIdentifierContinue('$', /*AllowDollar=*/true)); - EXPECT_FALSE(isIdentifierBody('\x80')); - EXPECT_FALSE(isIdentifierBody('\xc2')); - EXPECT_FALSE(isIdentifierBody('\xff')); + EXPECT_FALSE(isASCIIIdentifierContinue('\x80')); + EXPECT_FALSE(isASCIIIdentifierContinue('\xc2')); +