[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files

2022-06-01 Thread Richard via Phabricator via cfe-commits
This revision was landed with ongoing or failed builds.
This revision was automatically updated to reflect the committed changes.
Closed by commit rGb418ef5cb90b: [clang-tidy] Reject invalid enum initializers 
in C files (authored by LegalizeAdulthood).

Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125622/new/

https://reviews.llvm.org/D125622

Files:
  clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp
  clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h
  clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp
  clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c
  clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.cpp
  clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp

Index: clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
===
--- clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
+++ clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
@@ -36,177 +36,241 @@
   return Tokens;
 }
 
-static bool matchText(const char *Text) {
+static bool matchText(const char *Text, bool AllowComma) {
   std::vector Tokens{tokenify(Text)};
-  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens);
+  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, AllowComma);
 
   return Matcher.match();
 }
 
+static modernize::LiteralSize sizeText(const char *Text) {
+  std::vector Tokens{tokenify(Text)};
+  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, true);
+  if (Matcher.match())
+return Matcher.largestLiteralSize();
+  return modernize::LiteralSize::Unknown;
+}
+
+static const char *toString(modernize::LiteralSize Value) {
+  switch (Value) {
+  case modernize::LiteralSize::Int:
+return "Int";
+  case modernize::LiteralSize::UnsignedInt:
+return "UnsignedInt";
+  case modernize::LiteralSize::Long:
+return "Long";
+  case modernize::LiteralSize::UnsignedLong:
+return "UnsignedLong";
+  case modernize::LiteralSize::LongLong:
+return "LongLong";
+  case modernize::LiteralSize::UnsignedLongLong:
+return "UnsignedLongLong";
+  default:
+return "Unknown";
+  }
+}
+
 namespace {
 
-struct Param {
+struct MatchParam {
+  bool AllowComma;
   bool Matched;
   const char *Text;
 
-  friend std::ostream <<(std::ostream , const Param ) {
-return Str << "Matched: " << std::boolalpha << Value.Matched << ", Text: '"
-   << Value.Text << "'";
+  friend std::ostream <<(std::ostream , const MatchParam ) {
+return Str << "Allow operator,: " << std::boolalpha << Value.AllowComma
+   << ", Matched: " << std::boolalpha << Value.Matched
+   << ", Text: '" << Value.Text << '\'';
   }
 };
 
-class MatcherTest : public ::testing::TestWithParam {};
+struct SizeParam {
+  modernize::LiteralSize Size;
+  const char *Text;
+
+  friend std::ostream <<(std::ostream , const SizeParam ) {
+return Str << "Size: " << toString(Value.Size) << ", Text: '" << Value.Text << '\'';
+  }
+};
+
+class MatcherTest : public ::testing::TestWithParam {};
+
+class SizeTest : public ::testing::TestWithParam {};
 
 } // namespace
 
-static const Param Params[] = {
+static const MatchParam MatchParams[] = {
 // Accept integral literals.
-{true, "1"},
-{true, "0177"},
-{true, "0xdeadbeef"},
-{true, "0b1011"},
-{true, "'c'"},
+{true, true, "1"},
+{true, true, "0177"},
+{true, true, "0xdeadbeef"},
+{true, true, "0b1011"},
+{true, true, "'c'"},
 // Reject non-integral literals.
-{false, "1.23"},
-{false, "0x1p3"},
-{false, R"("string")"},
-{false, "1i"},
+{true, false, "1.23"},
+{true, false, "0x1p3"},
+{true, false, R"("string")"},
+{true, false, "1i"},
 
 // Accept literals with these unary operators.
-{true, "-1"},
-{true, "+1"},
-{true, "~1"},
-{true, "!1"},
+{true, true, "-1"},
+{true, true, "+1"},
+{true, true, "~1"},
+{true, true, "!1"},
 // Reject invalid unary operators.
-{false, "1-"},
-{false, "1+"},
-{false, "1~"},
-{false, "1!"},
+{true, false, "1-"},
+{true, false, "1+"},
+{true, false, "1~"},
+{true, false, "1!"},
 
 // Accept valid binary operators.
-{true, "1+1"},
-{true, "1-1"},
-{true, "1*1"},
-{true, "1/1"},
-{true, "1%2"},
-{true, "1<<1"},
-{true, "1>>1"},
-{true, "1<=>1"},
-{true, "1<1"},
-{true, "1>1"},
-{true, "1<=1"},
-{true, "1>=1"},
-{true, "1==1"},
-{true, "1!=1"},
-{true, "1&1"},
-{true, "1^1"},
-{true, "1|1"},
-{true, "1&&1"},
-{true, "1||1"},
-{true, "1+ +1"}, // A space is needed to avoid being tokenized as ++ or --.
-{true, "1- -1"},
-{true, "1,1"},
+{true, true, "1+1"},
+{true, true, "1-1"},
+{true, true, "1*1"},
+{true, true, "1/1"},
+{true, 

[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files

2022-06-01 Thread Richard via Phabricator via cfe-commits
LegalizeAdulthood updated this revision to Diff 433648.
LegalizeAdulthood added a comment.

Update from review comments


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125622/new/

https://reviews.llvm.org/D125622

Files:
  clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp
  clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h
  clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp
  clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c
  clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.cpp
  clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp

Index: clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
===
--- clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
+++ clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
@@ -36,177 +36,241 @@
   return Tokens;
 }
 
-static bool matchText(const char *Text) {
+static bool matchText(const char *Text, bool AllowComma) {
   std::vector Tokens{tokenify(Text)};
-  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens);
+  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, AllowComma);
 
   return Matcher.match();
 }
 
+static modernize::LiteralSize sizeText(const char *Text) {
+  std::vector Tokens{tokenify(Text)};
+  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, true);
+  if (Matcher.match())
+return Matcher.largestLiteralSize();
+  return modernize::LiteralSize::Unknown;
+}
+
+static const char *toString(modernize::LiteralSize Value) {
+  switch (Value) {
+  case modernize::LiteralSize::Int:
+return "Int";
+  case modernize::LiteralSize::UnsignedInt:
+return "UnsignedInt";
+  case modernize::LiteralSize::Long:
+return "Long";
+  case modernize::LiteralSize::UnsignedLong:
+return "UnsignedLong";
+  case modernize::LiteralSize::LongLong:
+return "LongLong";
+  case modernize::LiteralSize::UnsignedLongLong:
+return "UnsignedLongLong";
+  default:
+return "Unknown";
+  }
+}
+
 namespace {
 
-struct Param {
+struct MatchParam {
+  bool AllowComma;
   bool Matched;
   const char *Text;
 
-  friend std::ostream <<(std::ostream , const Param ) {
-return Str << "Matched: " << std::boolalpha << Value.Matched << ", Text: '"
-   << Value.Text << "'";
+  friend std::ostream <<(std::ostream , const MatchParam ) {
+return Str << "Allow operator,: " << std::boolalpha << Value.AllowComma
+   << ", Matched: " << std::boolalpha << Value.Matched
+   << ", Text: '" << Value.Text << '\'';
   }
 };
 
-class MatcherTest : public ::testing::TestWithParam {};
+struct SizeParam {
+  modernize::LiteralSize Size;
+  const char *Text;
+
+  friend std::ostream <<(std::ostream , const SizeParam ) {
+return Str << "Size: " << toString(Value.Size) << ", Text: '" << Value.Text << '\'';
+  }
+};
+
+class MatcherTest : public ::testing::TestWithParam {};
+
+class SizeTest : public ::testing::TestWithParam {};
 
 } // namespace
 
-static const Param Params[] = {
+static const MatchParam MatchParams[] = {
 // Accept integral literals.
-{true, "1"},
-{true, "0177"},
-{true, "0xdeadbeef"},
-{true, "0b1011"},
-{true, "'c'"},
+{true, true, "1"},
+{true, true, "0177"},
+{true, true, "0xdeadbeef"},
+{true, true, "0b1011"},
+{true, true, "'c'"},
 // Reject non-integral literals.
-{false, "1.23"},
-{false, "0x1p3"},
-{false, R"("string")"},
-{false, "1i"},
+{true, false, "1.23"},
+{true, false, "0x1p3"},
+{true, false, R"("string")"},
+{true, false, "1i"},
 
 // Accept literals with these unary operators.
-{true, "-1"},
-{true, "+1"},
-{true, "~1"},
-{true, "!1"},
+{true, true, "-1"},
+{true, true, "+1"},
+{true, true, "~1"},
+{true, true, "!1"},
 // Reject invalid unary operators.
-{false, "1-"},
-{false, "1+"},
-{false, "1~"},
-{false, "1!"},
+{true, false, "1-"},
+{true, false, "1+"},
+{true, false, "1~"},
+{true, false, "1!"},
 
 // Accept valid binary operators.
-{true, "1+1"},
-{true, "1-1"},
-{true, "1*1"},
-{true, "1/1"},
-{true, "1%2"},
-{true, "1<<1"},
-{true, "1>>1"},
-{true, "1<=>1"},
-{true, "1<1"},
-{true, "1>1"},
-{true, "1<=1"},
-{true, "1>=1"},
-{true, "1==1"},
-{true, "1!=1"},
-{true, "1&1"},
-{true, "1^1"},
-{true, "1|1"},
-{true, "1&&1"},
-{true, "1||1"},
-{true, "1+ +1"}, // A space is needed to avoid being tokenized as ++ or --.
-{true, "1- -1"},
-{true, "1,1"},
+{true, true, "1+1"},
+{true, true, "1-1"},
+{true, true, "1*1"},
+{true, true, "1/1"},
+{true, true, "1%2"},
+{true, true, "1<<1"},
+{true, true, "1>>1"},
+{true, true, "1<=>1"},
+{true, true, "1<1"},
+{true, true, "1>1"},
+{true, true, 

[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files

2022-06-01 Thread Richard via Phabricator via cfe-commits
LegalizeAdulthood marked 2 inline comments as done.
LegalizeAdulthood added inline comments.



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:3-7
+// C requires enum values to fit into an int.
+#define TOO_BIG1 1L
+#define TOO_BIG2 1UL
+#define TOO_BIG3 1LL
+#define TOO_BIG4 1ULL

aaron.ballman wrote:
> LegalizeAdulthood wrote:
> > aaron.ballman wrote:
> > > How do we want to handle the fact that Clang has an extension to allow 
> > > this? Perhaps we want a config option for pedantic vs non-pedantic fixes?
> > I was trying to find a way to make it fail on gcc/clang on compiler 
> > explorer and I couldn't get it to fail in either compiler
> > 
> > Where is the extension documented?  It appears to be on by default in both 
> > compilers.
> I don't think we have any documentation for it (we basically don't bother to 
> document our implementation-defined behavior, which drives me mildly nuts -- 
> we fall back on "the source code is the definitive documentation", which is 
> not user-friendly at all).
> 
> That's why I was wondering if we wanted a pedantic vs non-pedantic option 
> here. Pedantically, the behavior you have today is correct. However, because 
> this is a super common extension to compilers, we might want to allow it to 
> be transformed despite being pedantically an extension.
> 
> We can handle that as a follow-up though.
Is there a way to force it to fail with some compiler flag?  If you could show 
me on compiler explorer, that would be great.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125622/new/

https://reviews.llvm.org/D125622

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files

2022-05-31 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman accepted this revision.
aaron.ballman added a comment.
This revision is now accepted and ready to land.

LGTM with a few comments that can be addressed when you commit.




Comment at: 
clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp:87-89
+  if (Length <= 1) {
+  return LiteralSize::Int;
+  }





Comment at: 
clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp:91-93
+  bool SeenUnsigned{};
+  bool SeenLong{};
+  bool SeenLongLong{};

I (personally) think this is a more clear form of initialization to most folks, 
but don't insist.



Comment at: clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp:326
+  bool Matched = Matcher.match();
+  if (LangOpts.C99 &&
+  (Matcher.largestLiteralSize() != LiteralSize::Int &&

LegalizeAdulthood wrote:
> aaron.ballman wrote:
> > And C89?
> `C99` was the earliest dialect of C I could find in `LangOptions.def`.  If 
> you can show me an earlier version for C, I'm happy to switch it.
This keeps catching folks out, I may add a helper method to make this far more 
clear. `!LangOpts.CPlusPlus` means "in a C mode" for us currently.



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:3-7
+// C requires enum values to fit into an int.
+#define TOO_BIG1 1L
+#define TOO_BIG2 1UL
+#define TOO_BIG3 1LL
+#define TOO_BIG4 1ULL

LegalizeAdulthood wrote:
> aaron.ballman wrote:
> > How do we want to handle the fact that Clang has an extension to allow 
> > this? Perhaps we want a config option for pedantic vs non-pedantic fixes?
> I was trying to find a way to make it fail on gcc/clang on compiler explorer 
> and I couldn't get it to fail in either compiler
> 
> Where is the extension documented?  It appears to be on by default in both 
> compilers.
I don't think we have any documentation for it (we basically don't bother to 
document our implementation-defined behavior, which drives me mildly nuts -- we 
fall back on "the source code is the definitive documentation", which is not 
user-friendly at all).

That's why I was wondering if we wanted a pedantic vs non-pedantic option here. 
Pedantically, the behavior you have today is correct. However, because this is 
a super common extension to compilers, we might want to allow it to be 
transformed despite being pedantically an extension.

We can handle that as a follow-up though.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125622/new/

https://reviews.llvm.org/D125622

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files

2022-05-27 Thread Richard via Phabricator via cfe-commits
LegalizeAdulthood updated this revision to Diff 432676.
LegalizeAdulthood added a comment.

- Add C++ test case for acceptable `operator,` initializer


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125622/new/

https://reviews.llvm.org/D125622

Files:
  clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp
  clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h
  clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp
  clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c
  clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.cpp
  clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp

Index: clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
===
--- clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
+++ clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
@@ -36,177 +36,241 @@
   return Tokens;
 }
 
-static bool matchText(const char *Text) {
+static bool matchText(const char *Text, bool AllowComma) {
   std::vector Tokens{tokenify(Text)};
-  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens);
+  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, AllowComma);
 
   return Matcher.match();
 }
 
+static modernize::LiteralSize sizeText(const char *Text) {
+  std::vector Tokens{tokenify(Text)};
+  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, true);
+  if (Matcher.match())
+return Matcher.largestLiteralSize();
+  return modernize::LiteralSize::Unknown;
+}
+
+static const char *toString(modernize::LiteralSize Value) {
+  switch (Value) {
+  case modernize::LiteralSize::Int:
+return "Int";
+  case modernize::LiteralSize::UnsignedInt:
+return "UnsignedInt";
+  case modernize::LiteralSize::Long:
+return "Long";
+  case modernize::LiteralSize::UnsignedLong:
+return "UnsignedLong";
+  case modernize::LiteralSize::LongLong:
+return "LongLong";
+  case modernize::LiteralSize::UnsignedLongLong:
+return "UnsignedLongLong";
+  default:
+return "Unknown";
+  }
+}
+
 namespace {
 
-struct Param {
+struct MatchParam {
+  bool AllowComma;
   bool Matched;
   const char *Text;
 
-  friend std::ostream <<(std::ostream , const Param ) {
-return Str << "Matched: " << std::boolalpha << Value.Matched << ", Text: '"
-   << Value.Text << "'";
+  friend std::ostream <<(std::ostream , const MatchParam ) {
+return Str << "Allow operator,: " << std::boolalpha << Value.AllowComma
+   << ", Matched: " << std::boolalpha << Value.Matched
+   << ", Text: '" << Value.Text << '\'';
   }
 };
 
-class MatcherTest : public ::testing::TestWithParam {};
+struct SizeParam {
+  modernize::LiteralSize Size;
+  const char *Text;
+
+  friend std::ostream <<(std::ostream , const SizeParam ) {
+return Str << "Size: " << toString(Value.Size) << ", Text: '" << Value.Text << '\'';
+  }
+};
+
+class MatcherTest : public ::testing::TestWithParam {};
+
+class SizeTest : public ::testing::TestWithParam {};
 
 } // namespace
 
-static const Param Params[] = {
+static const MatchParam MatchParams[] = {
 // Accept integral literals.
-{true, "1"},
-{true, "0177"},
-{true, "0xdeadbeef"},
-{true, "0b1011"},
-{true, "'c'"},
+{true, true, "1"},
+{true, true, "0177"},
+{true, true, "0xdeadbeef"},
+{true, true, "0b1011"},
+{true, true, "'c'"},
 // Reject non-integral literals.
-{false, "1.23"},
-{false, "0x1p3"},
-{false, R"("string")"},
-{false, "1i"},
+{true, false, "1.23"},
+{true, false, "0x1p3"},
+{true, false, R"("string")"},
+{true, false, "1i"},
 
 // Accept literals with these unary operators.
-{true, "-1"},
-{true, "+1"},
-{true, "~1"},
-{true, "!1"},
+{true, true, "-1"},
+{true, true, "+1"},
+{true, true, "~1"},
+{true, true, "!1"},
 // Reject invalid unary operators.
-{false, "1-"},
-{false, "1+"},
-{false, "1~"},
-{false, "1!"},
+{true, false, "1-"},
+{true, false, "1+"},
+{true, false, "1~"},
+{true, false, "1!"},
 
 // Accept valid binary operators.
-{true, "1+1"},
-{true, "1-1"},
-{true, "1*1"},
-{true, "1/1"},
-{true, "1%2"},
-{true, "1<<1"},
-{true, "1>>1"},
-{true, "1<=>1"},
-{true, "1<1"},
-{true, "1>1"},
-{true, "1<=1"},
-{true, "1>=1"},
-{true, "1==1"},
-{true, "1!=1"},
-{true, "1&1"},
-{true, "1^1"},
-{true, "1|1"},
-{true, "1&&1"},
-{true, "1||1"},
-{true, "1+ +1"}, // A space is needed to avoid being tokenized as ++ or --.
-{true, "1- -1"},
-{true, "1,1"},
+{true, true, "1+1"},
+{true, true, "1-1"},
+{true, true, "1*1"},
+{true, true, "1/1"},
+{true, true, "1%2"},
+{true, true, "1<<1"},
+{true, true, "1>>1"},
+{true, true, "1<=>1"},
+{true, true, "1<1"},
+{true, true, 

[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files

2022-05-27 Thread Richard via Phabricator via cfe-commits
LegalizeAdulthood marked 5 inline comments as done.
LegalizeAdulthood added inline comments.



Comment at: clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp:324
 {
-  IntegralLiteralExpressionMatcher Matcher(MacroTokens);
-  return Matcher.match();
+  IntegralLiteralExpressionMatcher Matcher(MacroTokens, LangOpts.C99 == 0);
+  bool Matched = Matcher.match();

aaron.ballman wrote:
> Huh? Comma wasn't allowed in C89 either... In fact, there's no language mode 
> which allows top-level commas in either C or C++ -- the issue with the comma 
> operator is a parsing one. You can't tell the difference between the comma 
> being part of the initializer expression or the comma being the separator 
> between enumerators.
The most recent diff handles the issue with `operator,` but the previous 
version was only handling the overly-big literal



Comment at: clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp:326
+  bool Matched = Matcher.match();
+  if (LangOpts.C99 &&
+  (Matcher.largestLiteralSize() != LiteralSize::Int &&

aaron.ballman wrote:
> And C89?
`C99` was the earliest dialect of C I could find in `LangOptions.def`.  If you 
can show me an earlier version for C, I'm happy to switch it.



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:3-7
+// C requires enum values to fit into an int.
+#define TOO_BIG1 1L
+#define TOO_BIG2 1UL
+#define TOO_BIG3 1LL
+#define TOO_BIG4 1ULL

aaron.ballman wrote:
> How do we want to handle the fact that Clang has an extension to allow this? 
> Perhaps we want a config option for pedantic vs non-pedantic fixes?
I was trying to find a way to make it fail on gcc/clang on compiler explorer 
and I couldn't get it to fail in either compiler

Where is the extension documented?  It appears to be on by default in both 
compilers.



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:10
+// C forbids comma operator in initializing expressions.
+#define BAD_OP 1, 2
+

aaron.ballman wrote:
> LegalizeAdulthood wrote:
> > aaron.ballman wrote:
> > > Can you add a test:
> > > ```
> > > #define GOOD_OP (1, 2)
> > > ```
> > > to make sure it still gets converted to an enum?
> > Yeah, another one I was thinking of is when someone does [[ 
> > https://godbolt.org/z/c677je8s1 | something as disgusting as this. ]]
> > 
> > ```
> > #define ARGS 1, 2
> > #define OTHER_ARGS (1, 2)
> > 
> > int f(int x, int y) { return x + y; }
> > 
> > int main()
> > {
> > return f(ARGS) + f OTHER_ARGS;
> > }
> > ```
> > 
> > However, the only real way to handle avoiding conversion of the 2nd case is 
> > to examine the context of macro expansion.  This is another edge case that 
> > will have to be handled subsequently.
> > 
> > This gets tricky because AFAIK there is no way to select expressions in the 
> > AST that result from macro expansion.  You have to match the macro 
> > expansion locations against AST nodes to identify the node(s) that match 
> > the expansion location yourself.
> That's a good test case (for some definition of good)! :-)
> 
> At this point, there's a few options:
> 
> 1) Go back to the way things were -- disallow comma expressions, even in 
> parens. Document it as a limitation.
> 2) Accept comma expressions in parens, ignore the problem case like you 
> described above. Document it as a limitation?
> 3) Try to solve every awful code construct we can ever imagine. Document the 
> check is perfect in every way, but probably not land it for several years.
> 
> I think I'd be happy with #2 but I could also live with #1
In the most recent diff, I'm supporting `operator,` inside parens for C++ and 
never allowing it in C.

I plan to do a subsequent enhancement that examines the expansion locations of 
macros and rejects any macro whose expansion doesn't encompass a single 
expression.  In other words, if the tokens in the macro somehow get 
incorporated into some syntactic element that isn't completely encased in a 
single expression, then reject the macro.


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125622/new/

https://reviews.llvm.org/D125622

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files

2022-05-27 Thread Richard via Phabricator via cfe-commits
LegalizeAdulthood updated this revision to Diff 432675.
LegalizeAdulthood marked an inline comment as done.
LegalizeAdulthood added a comment.

- Update from review comments
- Disallow operator, unless inside parentheses


CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125622/new/

https://reviews.llvm.org/D125622

Files:
  clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp
  clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h
  clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp
  clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c
  clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.cpp
  clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp

Index: clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
===
--- clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
+++ clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
@@ -36,177 +36,241 @@
   return Tokens;
 }
 
-static bool matchText(const char *Text) {
+static bool matchText(const char *Text, bool AllowComma) {
   std::vector Tokens{tokenify(Text)};
-  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens);
+  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, AllowComma);
 
   return Matcher.match();
 }
 
+static modernize::LiteralSize sizeText(const char *Text) {
+  std::vector Tokens{tokenify(Text)};
+  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, true);
+  if (Matcher.match())
+return Matcher.largestLiteralSize();
+  return modernize::LiteralSize::Unknown;
+}
+
+static const char *toString(modernize::LiteralSize Value) {
+  switch (Value) {
+  case modernize::LiteralSize::Int:
+return "Int";
+  case modernize::LiteralSize::UnsignedInt:
+return "UnsignedInt";
+  case modernize::LiteralSize::Long:
+return "Long";
+  case modernize::LiteralSize::UnsignedLong:
+return "UnsignedLong";
+  case modernize::LiteralSize::LongLong:
+return "LongLong";
+  case modernize::LiteralSize::UnsignedLongLong:
+return "UnsignedLongLong";
+  default:
+return "Unknown";
+  }
+}
+
 namespace {
 
-struct Param {
+struct MatchParam {
+  bool AllowComma;
   bool Matched;
   const char *Text;
 
-  friend std::ostream <<(std::ostream , const Param ) {
-return Str << "Matched: " << std::boolalpha << Value.Matched << ", Text: '"
-   << Value.Text << "'";
+  friend std::ostream <<(std::ostream , const MatchParam ) {
+return Str << "Allow operator,: " << std::boolalpha << Value.AllowComma
+   << ", Matched: " << std::boolalpha << Value.Matched
+   << ", Text: '" << Value.Text << '\'';
   }
 };
 
-class MatcherTest : public ::testing::TestWithParam {};
+struct SizeParam {
+  modernize::LiteralSize Size;
+  const char *Text;
+
+  friend std::ostream <<(std::ostream , const SizeParam ) {
+return Str << "Size: " << toString(Value.Size) << ", Text: '" << Value.Text << '\'';
+  }
+};
+
+class MatcherTest : public ::testing::TestWithParam {};
+
+class SizeTest : public ::testing::TestWithParam {};
 
 } // namespace
 
-static const Param Params[] = {
+static const MatchParam MatchParams[] = {
 // Accept integral literals.
-{true, "1"},
-{true, "0177"},
-{true, "0xdeadbeef"},
-{true, "0b1011"},
-{true, "'c'"},
+{true, true, "1"},
+{true, true, "0177"},
+{true, true, "0xdeadbeef"},
+{true, true, "0b1011"},
+{true, true, "'c'"},
 // Reject non-integral literals.
-{false, "1.23"},
-{false, "0x1p3"},
-{false, R"("string")"},
-{false, "1i"},
+{true, false, "1.23"},
+{true, false, "0x1p3"},
+{true, false, R"("string")"},
+{true, false, "1i"},
 
 // Accept literals with these unary operators.
-{true, "-1"},
-{true, "+1"},
-{true, "~1"},
-{true, "!1"},
+{true, true, "-1"},
+{true, true, "+1"},
+{true, true, "~1"},
+{true, true, "!1"},
 // Reject invalid unary operators.
-{false, "1-"},
-{false, "1+"},
-{false, "1~"},
-{false, "1!"},
+{true, false, "1-"},
+{true, false, "1+"},
+{true, false, "1~"},
+{true, false, "1!"},
 
 // Accept valid binary operators.
-{true, "1+1"},
-{true, "1-1"},
-{true, "1*1"},
-{true, "1/1"},
-{true, "1%2"},
-{true, "1<<1"},
-{true, "1>>1"},
-{true, "1<=>1"},
-{true, "1<1"},
-{true, "1>1"},
-{true, "1<=1"},
-{true, "1>=1"},
-{true, "1==1"},
-{true, "1!=1"},
-{true, "1&1"},
-{true, "1^1"},
-{true, "1|1"},
-{true, "1&&1"},
-{true, "1||1"},
-{true, "1+ +1"}, // A space is needed to avoid being tokenized as ++ or --.
-{true, "1- -1"},
-{true, "1,1"},
+{true, true, "1+1"},
+{true, true, "1-1"},
+{true, true, "1*1"},
+{true, true, "1/1"},
+{true, true, "1%2"},
+{true, true, "1<<1"},
+{true, true, "1>>1"},
+

[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files

2022-05-24 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added inline comments.



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:10
+// C forbids comma operator in initializing expressions.
+#define BAD_OP 1, 2
+

LegalizeAdulthood wrote:
> aaron.ballman wrote:
> > Can you add a test:
> > ```
> > #define GOOD_OP (1, 2)
> > ```
> > to make sure it still gets converted to an enum?
> Yeah, another one I was thinking of is when someone does [[ 
> https://godbolt.org/z/c677je8s1 | something as disgusting as this. ]]
> 
> ```
> #define ARGS 1, 2
> #define OTHER_ARGS (1, 2)
> 
> int f(int x, int y) { return x + y; }
> 
> int main()
> {
> return f(ARGS) + f OTHER_ARGS;
> }
> ```
> 
> However, the only real way to handle avoiding conversion of the 2nd case is 
> to examine the context of macro expansion.  This is another edge case that 
> will have to be handled subsequently.
> 
> This gets tricky because AFAIK there is no way to select expressions in the 
> AST that result from macro expansion.  You have to match the macro expansion 
> locations against AST nodes to identify the node(s) that match the expansion 
> location yourself.
That's a good test case (for some definition of good)! :-)

At this point, there's a few options:

1) Go back to the way things were -- disallow comma expressions, even in 
parens. Document it as a limitation.
2) Accept comma expressions in parens, ignore the problem case like you 
described above. Document it as a limitation?
3) Try to solve every awful code construct we can ever imagine. Document the 
check is perfect in every way, but probably not land it for several years.

I think I'd be happy with #2 but I could also live with #1


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125622/new/

https://reviews.llvm.org/D125622

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files

2022-05-16 Thread Richard via Phabricator via cfe-commits
LegalizeAdulthood marked 2 inline comments as done.
LegalizeAdulthood added inline comments.



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:9
+
+// C forbids comma operator in initializing expressions.
+#define BAD_OP 1, 2

aaron.ballman wrote:
> LegalizeAdulthood wrote:
> > aaron.ballman wrote:
> > > It's also forbidden in C++.
> > C++ just talks about a constant expression and doesn't explicitly disallow 
> > `operator,` in such expressions that I could find.  Can you point me to 
> > where it is disallowed?
> > 
> > If it is disallowed universally, then I don't see why you asked me to parse 
> > it `:)`
> > C++ just talks about a constant expression and doesn't explicitly disallow 
> > operator, in such expressions that I could find. Can you point me to where 
> > it is disallowed?
> 
> It falls out from the grammar:
> 
> http://eel.is/c++draft/enum#nt:enumerator-definition
> http://eel.is/c++draft/expr.const#nt:constant-expression
> http://eel.is/c++draft/expr.cond#nt:conditional-expression
> 
> > If it is disallowed universally, then I don't see why you asked me to parse 
> > it :)
> 
> It can show up in paren expressions and the interface is generally about 
> integer literal expressions.
OK, yeah, I was looking at the PDF and it doesn't have those nice navigable 
links, I'll have to keep that site in mind.

So we have to disallow top-level comma but allow it as a parenthesized 
expression.  Tricky!



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:10
+// C forbids comma operator in initializing expressions.
+#define BAD_OP 1, 2
+

aaron.ballman wrote:
> Can you add a test:
> ```
> #define GOOD_OP (1, 2)
> ```
> to make sure it still gets converted to an enum?
Yeah, another one I was thinking of is when someone does [[ 
https://godbolt.org/z/c677je8s1 | something as disgusting as this. ]]

```
#define ARGS 1, 2
#define OTHER_ARGS (1, 2)

int f(int x, int y) { return x + y; }

int main()
{
return f(ARGS) + f OTHER_ARGS;
}
```

However, the only real way to handle avoiding conversion of the 2nd case is to 
examine the context of macro expansion.  This is another edge case that will 
have to be handled subsequently.

This gets tricky because AFAIK there is no way to select expressions in the AST 
that result from macro expansion.  You have to match the macro expansion 
locations against AST nodes to identify the node(s) that match the expansion 
location yourself.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125622/new/

https://reviews.llvm.org/D125622

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files

2022-05-16 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added inline comments.



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:9
+
+// C forbids comma operator in initializing expressions.
+#define BAD_OP 1, 2

LegalizeAdulthood wrote:
> aaron.ballman wrote:
> > It's also forbidden in C++.
> C++ just talks about a constant expression and doesn't explicitly disallow 
> `operator,` in such expressions that I could find.  Can you point me to where 
> it is disallowed?
> 
> If it is disallowed universally, then I don't see why you asked me to parse 
> it `:)`
> C++ just talks about a constant expression and doesn't explicitly disallow 
> operator, in such expressions that I could find. Can you point me to where it 
> is disallowed?

It falls out from the grammar:

http://eel.is/c++draft/enum#nt:enumerator-definition
http://eel.is/c++draft/expr.const#nt:constant-expression
http://eel.is/c++draft/expr.cond#nt:conditional-expression

> If it is disallowed universally, then I don't see why you asked me to parse 
> it :)

It can show up in paren expressions and the interface is generally about 
integer literal expressions.


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125622/new/

https://reviews.llvm.org/D125622

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files

2022-05-16 Thread Richard via Phabricator via cfe-commits
LegalizeAdulthood added inline comments.



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:9
+
+// C forbids comma operator in initializing expressions.
+#define BAD_OP 1, 2

aaron.ballman wrote:
> It's also forbidden in C++.
C++ just talks about a constant expression and doesn't explicitly disallow 
`operator,` in such expressions that I could find.  Can you point me to where 
it is disallowed?

If it is disallowed universally, then I don't see why you asked me to parse it 
`:)`


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125622/new/

https://reviews.llvm.org/D125622

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files

2022-05-16 Thread Aaron Ballman via Phabricator via cfe-commits
aaron.ballman added a comment.

Note, I'm in C standards committee meetings all week this week, so I expect I 
won't be doing many reviews this week and I'll be catching up as best I can 
starting next week.




Comment at: clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp:324
 {
-  IntegralLiteralExpressionMatcher Matcher(MacroTokens);
-  return Matcher.match();
+  IntegralLiteralExpressionMatcher Matcher(MacroTokens, LangOpts.C99 == 0);
+  bool Matched = Matcher.match();

Huh? Comma wasn't allowed in C89 either... In fact, there's no language mode 
which allows top-level commas in either C or C++ -- the issue with the comma 
operator is a parsing one. You can't tell the difference between the comma 
being part of the initializer expression or the comma being the separator 
between enumerators.



Comment at: clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp:326
+  bool Matched = Matcher.match();
+  if (LangOpts.C99 &&
+  (Matcher.largestLiteralSize() != LiteralSize::Int &&

And C89?



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:1
+// RUN: %check_clang_tidy %s modernize-macro-to-enum %t
+

It'd be useful to run this test in C++ mode as well to demonstrate the 
behavioral differences.



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:3-7
+// C requires enum values to fit into an int.
+#define TOO_BIG1 1L
+#define TOO_BIG2 1UL
+#define TOO_BIG3 1LL
+#define TOO_BIG4 1ULL

How do we want to handle the fact that Clang has an extension to allow this? 
Perhaps we want a config option for pedantic vs non-pedantic fixes?



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:9
+
+// C forbids comma operator in initializing expressions.
+#define BAD_OP 1, 2

It's also forbidden in C++.



Comment at: 
clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:10
+// C forbids comma operator in initializing expressions.
+#define BAD_OP 1, 2
+

Can you add a test:
```
#define GOOD_OP (1, 2)
```
to make sure it still gets converted to an enum?


Repository:
  rG LLVM Github Monorepo

CHANGES SINCE LAST ACTION
  https://reviews.llvm.org/D125622/new/

https://reviews.llvm.org/D125622

___
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits


[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files

2022-05-14 Thread Richard via Phabricator via cfe-commits
LegalizeAdulthood created this revision.
LegalizeAdulthood added reviewers: aaron.ballman, njames93.
LegalizeAdulthood added a project: clang-tools-extra.
Herald added subscribers: carlosgalvezp, xazax.hun.
Herald added a project: All.
LegalizeAdulthood requested review of this revision.

C requires that enum values fit into an int.  Scan the macro tokens
present in an initializing expression and reject macros that contain
tokens that have suffixes making them larger than int.

C forbids the comma operator in enum initializing expressions, so
optionally reject comma operator.

Fixes #55467


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D125622

Files:
  clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp
  clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h
  clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp
  clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c
  clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp

Index: clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
===
--- clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
+++ clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
@@ -36,177 +36,237 @@
   return Tokens;
 }
 
-static bool matchText(const char *Text) {
+static bool matchText(const char *Text, bool AllowComma) {
   std::vector Tokens{tokenify(Text)};
-  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens);
+  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, AllowComma);
 
   return Matcher.match();
 }
 
+static modernize::LiteralSize sizeText(const char *Text) {
+  std::vector Tokens{tokenify(Text)};
+  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, true);
+  if (Matcher.match())
+return Matcher.largestLiteralSize();
+  return modernize::LiteralSize::Unknown;
+}
+
+static const char *toString(modernize::LiteralSize Value) {
+  switch (Value) {
+  case modernize::LiteralSize::Int:
+return "Int";
+  case modernize::LiteralSize::UnsignedInt:
+return "UnsignedInt";
+  case modernize::LiteralSize::Long:
+return "Long";
+  case modernize::LiteralSize::UnsignedLong:
+return "UnsignedLong";
+  case modernize::LiteralSize::LongLong:
+return "LongLong";
+  case modernize::LiteralSize::UnsignedLongLong:
+return "UnsignedLongLong";
+  default:
+return "Unknown";
+  }
+}
+
 namespace {
 
-struct Param {
+struct MatchParam {
+  bool AllowComma;
   bool Matched;
   const char *Text;
 
-  friend std::ostream <<(std::ostream , const Param ) {
-return Str << "Matched: " << std::boolalpha << Value.Matched << ", Text: '"
-   << Value.Text << "'";
+  friend std::ostream <<(std::ostream , const MatchParam ) {
+return Str << "Allow operator,: " << std::boolalpha << Value.AllowComma
+   << ", Matched: " << std::boolalpha << Value.Matched
+   << ", Text: '" << Value.Text << '\'';
   }
 };
 
-class MatcherTest : public ::testing::TestWithParam {};
+struct SizeParam {
+  modernize::LiteralSize Size;
+  const char *Text;
+
+  friend std::ostream <<(std::ostream , const SizeParam ) {
+return Str << "Size: " << toString(Value.Size) << ", Text: '" << Value.Text << '\'';
+  }
+};
+
+class MatcherTest : public ::testing::TestWithParam {};
+
+class SizeTest : public ::testing::TestWithParam {};
 
 } // namespace
 
-static const Param Params[] = {
+static const MatchParam MatchParams[] = {
 // Accept integral literals.
-{true, "1"},
-{true, "0177"},
-{true, "0xdeadbeef"},
-{true, "0b1011"},
-{true, "'c'"},
+{true, true, "1"},
+{true, true, "0177"},
+{true, true, "0xdeadbeef"},
+{true, true, "0b1011"},
+{true, true, "'c'"},
 // Reject non-integral literals.
-{false, "1.23"},
-{false, "0x1p3"},
-{false, R"("string")"},
-{false, "1i"},
+{true, false, "1.23"},
+{true, false, "0x1p3"},
+{true, false, R"("string")"},
+{true, false, "1i"},
 
 // Accept literals with these unary operators.
-{true, "-1"},
-{true, "+1"},
-{true, "~1"},
-{true, "!1"},
+{true, true, "-1"},
+{true, true, "+1"},
+{true, true, "~1"},
+{true, true, "!1"},
 // Reject invalid unary operators.
-{false, "1-"},
-{false, "1+"},
-{false, "1~"},
-{false, "1!"},
+{true, false, "1-"},
+{true, false, "1+"},
+{true, false, "1~"},
+{true, false, "1!"},
 
 // Accept valid binary operators.
-{true, "1+1"},
-{true, "1-1"},
-{true, "1*1"},
-{true, "1/1"},
-{true, "1%2"},
-{true, "1<<1"},
-{true, "1>>1"},
-{true, "1<=>1"},
-{true, "1<1"},
-{true, "1>1"},
-{true, "1<=1"},
-{true, "1>=1"},
-{true, "1==1"},
-{true, "1!=1"},
-{true, "1&1"},
-{true, "1^1"},
-{true, "1|1"},
-{true, "1&&1"},
-{true, "1||1"},
-{true, "1+ +1"}, // A space is