[PATCH] D64525: [clang-scan-deps] Dependency directives source minimizer: single quotes are not digit separators after a valid character literal prefix
This revision was automatically updated to reflect the committed changes. Closed by commit rL365700: [clang-scan-deps] Dependency directives source minimizer: (authored by arphaman, committed by ). Herald added a project: LLVM. Herald added a subscriber: llvm-commits. Changed prior to commit: https://reviews.llvm.org/D64525?vs=209035=209074#toc Repository: rL LLVM CHANGES SINCE LAST ACTION https://reviews.llvm.org/D64525/new/ https://reviews.llvm.org/D64525 Files: cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp Index: cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp === --- cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -262,7 +262,14 @@ if (Start == Cur) return false; // The previous character must be a valid PP number character. - if (!isPreprocessingNumberBody(*(Cur - 1))) + // Make sure that the L, u, U, u8 prefixes don't get marked as a + // separator though. + char Prev = *(Cur - 1); + if (Prev == 'L' || Prev == 'U' || Prev == 'u') +return false; + if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') +return false; + if (!isPreprocessingNumberBody(Prev)) return false; // The next character should be a valid identifier body character. return (Cur + 1) < End && isIdentifierBody(*(Cur + 1)); Index: cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp === --- cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp +++ cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp @@ -507,4 +507,41 @@ EXPECT_STREQ("#include \n#include \n", Out.data()); } +TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixL) { + SmallVector Out; + + StringRef Source = R"(L'P' +#if DEBUG +// ' +#endif +#include +)"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixU) { + SmallVector Out; + + StringRef Source = R"(int x = U'P'; +#include +// ' +)"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixu) { + SmallVector Out; + + StringRef Source = R"(int x = u'b'; +int y = u8'a'; +int z = 128'78; +#include +// ' +)"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n", Out.data()); +} + } // end anonymous namespace Index: cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp === --- cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -262,7 +262,14 @@ if (Start == Cur) return false; // The previous character must be a valid PP number character. - if (!isPreprocessingNumberBody(*(Cur - 1))) + // Make sure that the L, u, U, u8 prefixes don't get marked as a + // separator though. + char Prev = *(Cur - 1); + if (Prev == 'L' || Prev == 'U' || Prev == 'u') +return false; + if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') +return false; + if (!isPreprocessingNumberBody(Prev)) return false; // The next character should be a valid identifier body character. return (Cur + 1) < End && isIdentifierBody(*(Cur + 1)); Index: cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp === --- cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp +++ cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp @@ -507,4 +507,41 @@ EXPECT_STREQ("#include \n#include \n", Out.data()); } +TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixL) { + SmallVector Out; + + StringRef Source = R"(L'P' +#if DEBUG +// ' +#endif +#include +)"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixU) { + SmallVector Out; + + StringRef Source = R"(int x = U'P'; +#include +// ' +)"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixu) { + SmallVector Out; + + StringRef Source = R"(int x = u'b'; +int y = u8'a'; +int z = 128'78; +#include +// ' +)"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n", Out.data()); +} + } // end anonymous namespace ___ cfe-commits mailing
[PATCH] D64525: [clang-scan-deps] Dependency directives source minimizer: single quotes are not digit separators after a valid character literal prefix
Bigcheese accepted this revision. Bigcheese added inline comments. Comment at: clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp:270 +return false; + if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') +return false; arphaman wrote: > Bigcheese wrote: > > Are we sure at this point that it's always safe to jump back 2? > It should be, because otherwise `Start` would've been equals to `Cur - 1` > which we check for right before the dereference. Oh, obviously. I missed that check. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D64525/new/ https://reviews.llvm.org/D64525 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D64525: [clang-scan-deps] Dependency directives source minimizer: single quotes are not digit separators after a valid character literal prefix
arphaman marked an inline comment as done. arphaman added inline comments. Comment at: clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp:270 +return false; + if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') +return false; Bigcheese wrote: > Are we sure at this point that it's always safe to jump back 2? It should be, because otherwise `Start` would've been equals to `Cur - 1` which we check for right before the dereference. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D64525/new/ https://reviews.llvm.org/D64525 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D64525: [clang-scan-deps] Dependency directives source minimizer: single quotes are not digit separators after a valid character literal prefix
arphaman marked an inline comment as done. arphaman added inline comments. Comment at: clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp:265-272 + // Make sure that the L, u, U, u8 prefixes don't get marked as a + // separator though. + char Prev = *(Cur - 1); + if (Prev == 'L' || Prev == 'U' || Prev == 'u') +return false; + if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') +return false; dexonsmith wrote: > I wonder if this would be easier to identify walking forward from `Start` > rather than working backwards from `Cur`. Yes, that's a possible option. We can scan scanning from start, until we reach non-whitespace right before Cur, and then identify the token. In such ambiguous cases it might be a good idea to raw lex the line using the Lexer from Start to End. Then we'll match the behavior of Lexer when there's an actual error as well. I'll see if I can setup a fallback like this in a follow-up patch. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D64525/new/ https://reviews.llvm.org/D64525 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D64525: [clang-scan-deps] Dependency directives source minimizer: single quotes are not digit separators after a valid character literal prefix
Bigcheese added inline comments. Comment at: clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp:270 +return false; + if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') +return false; Are we sure at this point that it's always safe to jump back 2? Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D64525/new/ https://reviews.llvm.org/D64525 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D64525: [clang-scan-deps] Dependency directives source minimizer: single quotes are not digit separators after a valid character literal prefix
dexonsmith accepted this revision. dexonsmith added a comment. This revision is now accepted and ready to land. LGTM, but I have a suggestion inline for another approach. Comment at: clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp:265-272 + // Make sure that the L, u, U, u8 prefixes don't get marked as a + // separator though. + char Prev = *(Cur - 1); + if (Prev == 'L' || Prev == 'U' || Prev == 'u') +return false; + if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') +return false; I wonder if this would be easier to identify walking forward from `Start` rather than working backwards from `Cur`. Repository: rC Clang CHANGES SINCE LAST ACTION https://reviews.llvm.org/D64525/new/ https://reviews.llvm.org/D64525 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D64525: [clang-scan-deps] Dependency directives source minimizer: single quotes are not digit separators after a valid character literal prefix
arphaman created this revision. arphaman added a reviewer: Bigcheese. Herald added subscribers: tschuett, dexonsmith, jkorous. Herald added a project: clang. The single quote character can act as a c++ digit separator. However, the minimizer shouldn't treat it as such when it's actually following a valid character literal prefix, like `L`, `U`, `u`, or `u8`. Repository: rC Clang https://reviews.llvm.org/D64525 Files: clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp Index: clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp === --- clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp +++ clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp @@ -507,4 +507,41 @@ EXPECT_STREQ("#include \n#include \n", Out.data()); } +TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixL) { + SmallVector Out; + + StringRef Source = R"(L'P' +#if DEBUG +// ' +#endif +#include +)"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixU) { + SmallVector Out; + + StringRef Source = R"(int x = U'P'; +#include +// ' +)"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixu) { + SmallVector Out; + + StringRef Source = R"(int x = u'b'; +int y = u8'a'; +int z = 128'78; +#include +// ' +)"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n", Out.data()); +} + } // end anonymous namespace Index: clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp === --- clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -262,7 +262,14 @@ if (Start == Cur) return false; // The previous character must be a valid PP number character. - if (!isPreprocessingNumberBody(*(Cur - 1))) + // Make sure that the L, u, U, u8 prefixes don't get marked as a + // separator though. + char Prev = *(Cur - 1); + if (Prev == 'L' || Prev == 'U' || Prev == 'u') +return false; + if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') +return false; + if (!isPreprocessingNumberBody(Prev)) return false; // The next character should be a valid identifier body character. return (Cur + 1) < End && isIdentifierBody(*(Cur + 1)); Index: clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp === --- clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp +++ clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp @@ -507,4 +507,41 @@ EXPECT_STREQ("#include \n#include \n", Out.data()); } +TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixL) { + SmallVector Out; + + StringRef Source = R"(L'P' +#if DEBUG +// ' +#endif +#include +)"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixU) { + SmallVector Out; + + StringRef Source = R"(int x = U'P'; +#include +// ' +)"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n", Out.data()); +} + +TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixu) { + SmallVector Out; + + StringRef Source = R"(int x = u'b'; +int y = u8'a'; +int z = 128'78; +#include +// ' +)"; + ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out)); + EXPECT_STREQ("#include \n", Out.data()); +} + } // end anonymous namespace Index: clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp === --- clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp +++ clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp @@ -262,7 +262,14 @@ if (Start == Cur) return false; // The previous character must be a valid PP number character. - if (!isPreprocessingNumberBody(*(Cur - 1))) + // Make sure that the L, u, U, u8 prefixes don't get marked as a + // separator though. + char Prev = *(Cur - 1); + if (Prev == 'L' || Prev == 'U' || Prev == 'u') +return false; + if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u') +return false; + if (!isPreprocessingNumberBody(Prev)) return false; // The next character should be a valid identifier body character. return (Cur + 1) < End && isIdentifierBody(*(Cur + 1)); ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits