[PATCH] D64525: [clang-scan-deps] Dependency directives source minimizer: single quotes are not digit separators after a valid character literal prefix

2019-07-10 Thread Alex Lorenz via Phabricator via cfe-commits
This revision was automatically updated to reflect the committed changes.
Closed by commit rL365700: [clang-scan-deps] Dependency directives source 
minimizer: (authored by arphaman, committed by ).
Herald added a project: LLVM.
Herald added a subscriber: llvm-commits.

Changed prior to commit:
  https://reviews.llvm.org/D64525?vs=209035=209074#toc

Repository:
  rL LLVM

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64525/new/

https://reviews.llvm.org/D64525

Files:
  cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
  cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp


Index: cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
===
--- cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
+++ cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
@@ -262,7 +262,14 @@
   if (Start == Cur)
 return false;
   // The previous character must be a valid PP number character.
-  if (!isPreprocessingNumberBody(*(Cur - 1)))
+  // Make sure that the L, u, U, u8 prefixes don't get marked as a
+  // separator though.
+  char Prev = *(Cur - 1);
+  if (Prev == 'L' || Prev == 'U' || Prev == 'u')
+return false;
+  if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
+return false;
+  if (!isPreprocessingNumberBody(Prev))
 return false;
   // The next character should be a valid identifier body character.
   return (Cur + 1) < End && isIdentifierBody(*(Cur + 1));
Index: cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
===
--- cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
+++ cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
@@ -507,4 +507,41 @@
   EXPECT_STREQ("#include \n#include \n", Out.data());
 }
 
+TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixL) {
+  SmallVector Out;
+
+  StringRef Source = R"(L'P'
+#if DEBUG
+// '
+#endif
+#include 
+)";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#include \n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixU) {
+  SmallVector Out;
+
+  StringRef Source = R"(int x = U'P';
+#include 
+// '
+)";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#include \n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixu) {
+  SmallVector Out;
+
+  StringRef Source = R"(int x = u'b';
+int y = u8'a';
+int z = 128'78;
+#include 
+// '
+)";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#include \n", Out.data());
+}
+
 } // end anonymous namespace


Index: cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
===
--- cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
+++ cfe/trunk/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
@@ -262,7 +262,14 @@
   if (Start == Cur)
 return false;
   // The previous character must be a valid PP number character.
-  if (!isPreprocessingNumberBody(*(Cur - 1)))
+  // Make sure that the L, u, U, u8 prefixes don't get marked as a
+  // separator though.
+  char Prev = *(Cur - 1);
+  if (Prev == 'L' || Prev == 'U' || Prev == 'u')
+return false;
+  if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
+return false;
+  if (!isPreprocessingNumberBody(Prev))
 return false;
   // The next character should be a valid identifier body character.
   return (Cur + 1) < End && isIdentifierBody(*(Cur + 1));
Index: cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
===
--- cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
+++ cfe/trunk/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
@@ -507,4 +507,41 @@
   EXPECT_STREQ("#include \n#include \n", Out.data());
 }
 
+TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixL) {
+  SmallVector Out;
+
+  StringRef Source = R"(L'P'
+#if DEBUG
+// '
+#endif
+#include 
+)";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#include \n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixU) {
+  SmallVector Out;
+
+  StringRef Source = R"(int x = U'P';
+#include 
+// '
+)";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#include \n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixu) {
+  SmallVector Out;
+
+  StringRef Source = R"(int x = u'b';
+int y = u8'a';
+int z = 128'78;
+#include 
+// '
+)";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#include \n", Out.data());
+}
+
 } // end anonymous namespace
___
cfe-commits mailing 

[PATCH] D64525: [clang-scan-deps] Dependency directives source minimizer: single quotes are not digit separators after a valid character literal prefix

2019-07-10 Thread Michael Spencer via Phabricator via cfe-commits
Bigcheese accepted this revision.
Bigcheese added inline comments.



Comment at: clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp:270
+return false;
+  if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
+return false;

arphaman wrote:
> Bigcheese wrote:
> > Are we sure at this point that it's always safe to jump back 2?
> It should be, because otherwise `Start` would've been equals to `Cur - 1` 
> which we check for right before the dereference.
Oh, obviously.  I missed that check.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64525/new/

https://reviews.llvm.org/D64525



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64525: [clang-scan-deps] Dependency directives source minimizer: single quotes are not digit separators after a valid character literal prefix

2019-07-10 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman marked an inline comment as done.
arphaman added inline comments.



Comment at: clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp:270
+return false;
+  if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
+return false;

Bigcheese wrote:
> Are we sure at this point that it's always safe to jump back 2?
It should be, because otherwise `Start` would've been equals to `Cur - 1` which 
we check for right before the dereference.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64525/new/

https://reviews.llvm.org/D64525



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64525: [clang-scan-deps] Dependency directives source minimizer: single quotes are not digit separators after a valid character literal prefix

2019-07-10 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman marked an inline comment as done.
arphaman added inline comments.



Comment at: clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp:265-272
+  // Make sure that the L, u, U, u8 prefixes don't get marked as a
+  // separator though.
+  char Prev = *(Cur - 1);
+  if (Prev == 'L' || Prev == 'U' || Prev == 'u')
+return false;
+  if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
+return false;

dexonsmith wrote:
> I wonder if this would be easier to identify walking forward from `Start` 
> rather than working backwards from `Cur`.
Yes, that's a possible option. We can scan scanning from start, until we reach 
non-whitespace right before Cur, and then identify the token. 

In such ambiguous cases it might be a good idea to raw lex the line using the 
Lexer from Start to End. Then we'll match the behavior of Lexer when there's an 
actual error as well. I'll see if I can setup a fallback like this in a 
follow-up patch.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64525/new/

https://reviews.llvm.org/D64525



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64525: [clang-scan-deps] Dependency directives source minimizer: single quotes are not digit separators after a valid character literal prefix

2019-07-10 Thread Michael Spencer via Phabricator via cfe-commits
Bigcheese added inline comments.



Comment at: clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp:270
+return false;
+  if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
+return false;

Are we sure at this point that it's always safe to jump back 2?


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64525/new/

https://reviews.llvm.org/D64525



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64525: [clang-scan-deps] Dependency directives source minimizer: single quotes are not digit separators after a valid character literal prefix

2019-07-10 Thread Duncan P. N. Exon Smith via Phabricator via cfe-commits
dexonsmith accepted this revision.
dexonsmith added a comment.
This revision is now accepted and ready to land.

LGTM, but I have a suggestion inline for another approach.




Comment at: clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp:265-272
+  // Make sure that the L, u, U, u8 prefixes don't get marked as a
+  // separator though.
+  char Prev = *(Cur - 1);
+  if (Prev == 'L' || Prev == 'U' || Prev == 'u')
+return false;
+  if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
+return false;

I wonder if this would be easier to identify walking forward from `Start` 
rather than working backwards from `Cur`.


Repository:
  rC Clang

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D64525/new/

https://reviews.llvm.org/D64525



___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D64525: [clang-scan-deps] Dependency directives source minimizer: single quotes are not digit separators after a valid character literal prefix

2019-07-10 Thread Alex Lorenz via Phabricator via cfe-commits
arphaman created this revision.
arphaman added a reviewer: Bigcheese.
Herald added subscribers: tschuett, dexonsmith, jkorous.
Herald added a project: clang.

The single quote character can act as a c++ digit separator. However, the 
minimizer shouldn't treat it as such when it's actually following a valid 
character literal prefix, like `L`, `U`, `u`, or `u8`.


Repository:
  rC Clang

https://reviews.llvm.org/D64525

Files:
  clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
  clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp


Index: clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
===
--- clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
+++ clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
@@ -507,4 +507,41 @@
   EXPECT_STREQ("#include \n#include \n", Out.data());
 }
 
+TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixL) {
+  SmallVector Out;
+
+  StringRef Source = R"(L'P'
+#if DEBUG
+// '
+#endif
+#include 
+)";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#include \n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixU) {
+  SmallVector Out;
+
+  StringRef Source = R"(int x = U'P';
+#include 
+// '
+)";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#include \n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixu) {
+  SmallVector Out;
+
+  StringRef Source = R"(int x = u'b';
+int y = u8'a';
+int z = 128'78;
+#include 
+// '
+)";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#include \n", Out.data());
+}
+
 } // end anonymous namespace
Index: clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
===
--- clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
+++ clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
@@ -262,7 +262,14 @@
   if (Start == Cur)
 return false;
   // The previous character must be a valid PP number character.
-  if (!isPreprocessingNumberBody(*(Cur - 1)))
+  // Make sure that the L, u, U, u8 prefixes don't get marked as a
+  // separator though.
+  char Prev = *(Cur - 1);
+  if (Prev == 'L' || Prev == 'U' || Prev == 'u')
+return false;
+  if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
+return false;
+  if (!isPreprocessingNumberBody(Prev))
 return false;
   // The next character should be a valid identifier body character.
   return (Cur + 1) < End && isIdentifierBody(*(Cur + 1));


Index: clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
===
--- clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
+++ clang/unittests/Lex/DependencyDirectivesSourceMinimizerTest.cpp
@@ -507,4 +507,41 @@
   EXPECT_STREQ("#include \n#include \n", Out.data());
 }
 
+TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixL) {
+  SmallVector Out;
+
+  StringRef Source = R"(L'P'
+#if DEBUG
+// '
+#endif
+#include 
+)";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#include \n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixU) {
+  SmallVector Out;
+
+  StringRef Source = R"(int x = U'P';
+#include 
+// '
+)";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#include \n", Out.data());
+}
+
+TEST(MinimizeSourceToDependencyDirectivesTest, CharacterLiteralPrefixu) {
+  SmallVector Out;
+
+  StringRef Source = R"(int x = u'b';
+int y = u8'a';
+int z = 128'78;
+#include 
+// '
+)";
+  ASSERT_FALSE(minimizeSourceToDependencyDirectives(Source, Out));
+  EXPECT_STREQ("#include \n", Out.data());
+}
+
 } // end anonymous namespace
Index: clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
===
--- clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
+++ clang/lib/Lex/DependencyDirectivesSourceMinimizer.cpp
@@ -262,7 +262,14 @@
   if (Start == Cur)
 return false;
   // The previous character must be a valid PP number character.
-  if (!isPreprocessingNumberBody(*(Cur - 1)))
+  // Make sure that the L, u, U, u8 prefixes don't get marked as a
+  // separator though.
+  char Prev = *(Cur - 1);
+  if (Prev == 'L' || Prev == 'U' || Prev == 'u')
+return false;
+  if (Prev == '8' && (Cur - 1 != Start) && *(Cur - 2) == 'u')
+return false;
+  if (!isPreprocessingNumberBody(Prev))
 return false;
   // The next character should be a valid identifier body character.
   return (Cur + 1) < End && isIdentifierBody(*(Cur + 1));
___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits