[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files

Richard via Phabricator via cfe-commits Sat, 14 May 2022 19:49:31 -0700

LegalizeAdulthood created this revision.
LegalizeAdulthood added reviewers: aaron.ballman, njames93.
LegalizeAdulthood added a project: clang-tools-extra.
Herald added subscribers: carlosgalvezp, xazax.hun.
Herald added a project: All.
LegalizeAdulthood requested review of this revision.


C requires that enum values fit into an int.  Scan the macro tokens
present in an initializing expression and reject macros that contain
tokens that have suffixes making them larger than int.

C forbids the comma operator in enum initializing expressions, so
optionally reject comma operator.

Fixes #55467


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D125622

Files:
  clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp
  clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h
  clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp
  clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c
  clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp

Index: clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
===================================================================
--- clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
+++ clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp
@@ -36,177 +36,237 @@
   return Tokens;
 }
 
-static bool matchText(const char *Text) {
+static bool matchText(const char *Text, bool AllowComma) {
   std::vector<Token> Tokens{tokenify(Text)};
-  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens);
+  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, AllowComma);
 
   return Matcher.match();
 }
 
+static modernize::LiteralSize sizeText(const char *Text) {
+  std::vector<Token> Tokens{tokenify(Text)};
+  modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, true);
+  if (Matcher.match())
+    return Matcher.largestLiteralSize();
+  return modernize::LiteralSize::Unknown;
+}
+
+static const char *toString(modernize::LiteralSize Value) {
+  switch (Value) {
+  case modernize::LiteralSize::Int:
+    return "Int";
+  case modernize::LiteralSize::UnsignedInt:
+    return "UnsignedInt";
+  case modernize::LiteralSize::Long:
+    return "Long";
+  case modernize::LiteralSize::UnsignedLong:
+    return "UnsignedLong";
+  case modernize::LiteralSize::LongLong:
+    return "LongLong";
+  case modernize::LiteralSize::UnsignedLongLong:
+    return "UnsignedLongLong";
+  default:
+    return "Unknown";
+  }
+}
+
 namespace {
 
-struct Param {
+struct MatchParam {
+  bool AllowComma;
   bool Matched;
   const char *Text;
 
-  friend std::ostream &operator<<(std::ostream &Str, const Param &Value) {
-    return Str << "Matched: " << std::boolalpha << Value.Matched << ", Text: '"
-               << Value.Text << "'";
+  friend std::ostream &operator<<(std::ostream &Str, const MatchParam &Value) {
+    return Str << "Allow operator,: " << std::boolalpha << Value.AllowComma
+               << ", Matched: " << std::boolalpha << Value.Matched
+               << ", Text: '" << Value.Text << '\'';
   }
 };
 
-class MatcherTest : public ::testing::TestWithParam<Param> {};
+struct SizeParam {
+  modernize::LiteralSize Size;
+  const char *Text;
+
+  friend std::ostream &operator<<(std::ostream &Str, const SizeParam &Value) {
+    return Str << "Size: " << toString(Value.Size) << ", Text: '" << Value.Text << '\'';
+  }
+};
+
+class MatcherTest : public ::testing::TestWithParam<MatchParam> {};
+
+class SizeTest : public ::testing::TestWithParam<SizeParam> {};
 
 } // namespace
 
-static const Param Params[] = {
+static const MatchParam MatchParams[] = {
     // Accept integral literals.
-    {true, "1"},
-    {true, "0177"},
-    {true, "0xdeadbeef"},
-    {true, "0b1011"},
-    {true, "'c'"},
+    {true, true, "1"},
+    {true, true, "0177"},
+    {true, true, "0xdeadbeef"},
+    {true, true, "0b1011"},
+    {true, true, "'c'"},
     // Reject non-integral literals.
-    {false, "1.23"},
-    {false, "0x1p3"},
-    {false, R"("string")"},
-    {false, "1i"},
+    {true, false, "1.23"},
+    {true, false, "0x1p3"},
+    {true, false, R"("string")"},
+    {true, false, "1i"},
 
     // Accept literals with these unary operators.
-    {true, "-1"},
-    {true, "+1"},
-    {true, "~1"},
-    {true, "!1"},
+    {true, true, "-1"},
+    {true, true, "+1"},
+    {true, true, "~1"},
+    {true, true, "!1"},
     // Reject invalid unary operators.
-    {false, "1-"},
-    {false, "1+"},
-    {false, "1~"},
-    {false, "1!"},
+    {true, false, "1-"},
+    {true, false, "1+"},
+    {true, false, "1~"},
+    {true, false, "1!"},
 
     // Accept valid binary operators.
-    {true, "1+1"},
-    {true, "1-1"},
-    {true, "1*1"},
-    {true, "1/1"},
-    {true, "1%2"},
-    {true, "1<<1"},
-    {true, "1>>1"},
-    {true, "1<=>1"},
-    {true, "1<1"},
-    {true, "1>1"},
-    {true, "1<=1"},
-    {true, "1>=1"},
-    {true, "1==1"},
-    {true, "1!=1"},
-    {true, "1&1"},
-    {true, "1^1"},
-    {true, "1|1"},
-    {true, "1&&1"},
-    {true, "1||1"},
-    {true, "1+ +1"}, // A space is needed to avoid being tokenized as ++ or --.
-    {true, "1- -1"},
-    {true, "1,1"},
+    {true, true, "1+1"},
+    {true, true, "1-1"},
+    {true, true, "1*1"},
+    {true, true, "1/1"},
+    {true, true, "1%2"},
+    {true, true, "1<<1"},
+    {true, true, "1>>1"},
+    {true, true, "1<=>1"},
+    {true, true, "1<1"},
+    {true, true, "1>1"},
+    {true, true, "1<=1"},
+    {true, true, "1>=1"},
+    {true, true, "1==1"},
+    {true, true, "1!=1"},
+    {true, true, "1&1"},
+    {true, true, "1^1"},
+    {true, true, "1|1"},
+    {true, true, "1&&1"},
+    {true, true, "1||1"},
+    {true, true, "1+ +1"}, // A space is needed to avoid being tokenized as ++ or --.
+    {true, true, "1- -1"},
+    {true, true, "1,1"},
     // Reject invalid binary operators.
-    {false, "1+"},
-    {false, "1-"},
-    {false, "1*"},
-    {false, "1/"},
-    {false, "1%"},
-    {false, "1<<"},
-    {false, "1>>"},
-    {false, "1<=>"},
-    {false, "1<"},
-    {false, "1>"},
-    {false, "1<="},
-    {false, "1>="},
-    {false, "1=="},
-    {false, "1!="},
-    {false, "1&"},
-    {false, "1^"},
-    {false, "1|"},
-    {false, "1&&"},
-    {false, "1||"},
-    {false, "1,"},
-    {false, ",1"},
+    {true, false, "1+"},
+    {true, false, "1-"},
+    {true, false, "1*"},
+    {true, false, "1/"},
+    {true, false, "1%"},
+    {true, false, "1<<"},
+    {true, false, "1>>"},
+    {true, false, "1<=>"},
+    {true, false, "1<"},
+    {true, false, "1>"},
+    {true, false, "1<="},
+    {true, false, "1>="},
+    {true, false, "1=="},
+    {true, false, "1!="},
+    {true, false, "1&"},
+    {true, false, "1^"},
+    {true, false, "1|"},
+    {true, false, "1&&"},
+    {true, false, "1||"},
+    {true, false, "1,"},
+    {true, false, ",1"},
 
     // Accept valid ternary operators.
-    {true, "1?1:1"},
-    {true, "1?:1"}, // A gcc extension treats x ? : y as x ? x : y.
+    {true, true, "1?1:1"},
+    {true, true, "1?:1"}, // A gcc extension treats x ? : y as x ? x : y.
     // Reject invalid ternary operators.
-    {false, "?"},
-    {false, "?1"},
-    {false, "?:"},
-    {false, "?:1"},
-    {false, "?1:"},
-    {false, "?1:1"},
-    {false, "1?"},
-    {false, "1?1"},
-    {false, "1?:"},
-    {false, "1?1:"},
+    {true, false, "?"},
+    {true, false, "?1"},
+    {true, false, "?:"},
+    {true, false, "?:1"},
+    {true, false, "?1:"},
+    {true, false, "?1:1"},
+    {true, false, "1?"},
+    {true, false, "1?1"},
+    {true, false, "1?:"},
+    {true, false, "1?1:"},
 
     // Accept parenthesized expressions.
-    {true, "(1)"},
-    {true, "((+1))"},
-    {true, "((+(1)))"},
-    {true, "(-1)"},
-    {true, "-(1)"},
-    {true, "(+1)"},
-    {true, "((+1))"},
-    {true, "+(1)"},
-    {true, "(~1)"},
-    {true, "~(1)"},
-    {true, "(!1)"},
-    {true, "!(1)"},
-    {true, "(1+1)"},
-    {true, "(1-1)"},
-    {true, "(1*1)"},
-    {true, "(1/1)"},
-    {true, "(1%2)"},
-    {true, "(1<<1)"},
-    {true, "(1>>1)"},
-    {true, "(1<=>1)"},
-    {true, "(1<1)"},
-    {true, "(1>1)"},
-    {true, "(1<=1)"},
-    {true, "(1>=1)"},
-    {true, "(1==1)"},
-    {true, "(1!=1)"},
-    {true, "(1&1)"},
-    {true, "(1^1)"},
-    {true, "(1|1)"},
-    {true, "(1&&1)"},
-    {true, "(1||1)"},
-    {true, "(1?1:1)"},
+    {true, true, "(1)"},
+    {true, true, "((+1))"},
+    {true, true, "((+(1)))"},
+    {true, true, "(-1)"},
+    {true, true, "-(1)"},
+    {true, true, "(+1)"},
+    {true, true, "((+1))"},
+    {true, true, "+(1)"},
+    {true, true, "(~1)"},
+    {true, true, "~(1)"},
+    {true, true, "(!1)"},
+    {true, true, "!(1)"},
+    {true, true, "(1+1)"},
+    {true, true, "(1-1)"},
+    {true, true, "(1*1)"},
+    {true, true, "(1/1)"},
+    {true, true, "(1%2)"},
+    {true, true, "(1<<1)"},
+    {true, true, "(1>>1)"},
+    {true, true, "(1<=>1)"},
+    {true, true, "(1<1)"},
+    {true, true, "(1>1)"},
+    {true, true, "(1<=1)"},
+    {true, true, "(1>=1)"},
+    {true, true, "(1==1)"},
+    {true, true, "(1!=1)"},
+    {true, true, "(1&1)"},
+    {true, true, "(1^1)"},
+    {true, true, "(1|1)"},
+    {true, true, "(1&&1)"},
+    {true, true, "(1||1)"},
+    {true, true, "(1?1:1)"},
 
     // Accept more complicated "chained" expressions.
-    {true, "1+1+1"},
-    {true, "1+1+1+1"},
-    {true, "1+1+1+1+1"},
-    {true, "1*1*1"},
-    {true, "1*1*1*1"},
-    {true, "1*1*1*1*1"},
-    {true, "1<<1<<1"},
-    {true, "4U>>1>>1"},
-    {true, "1<1<1"},
-    {true, "1>1>1"},
-    {true, "1<=1<=1"},
-    {true, "1>=1>=1"},
-    {true, "1==1==1"},
-    {true, "1!=1!=1"},
-    {true, "1&1&1"},
-    {true, "1^1^1"},
-    {true, "1|1|1"},
-    {true, "1&&1&&1"},
-    {true, "1||1||1"},
-    {true, "1,1,1"},
+    {true, true, "1+1+1"},
+    {true, true, "1+1+1+1"},
+    {true, true, "1+1+1+1+1"},
+    {true, true, "1*1*1"},
+    {true, true, "1*1*1*1"},
+    {true, true, "1*1*1*1*1"},
+    {true, true, "1<<1<<1"},
+    {true, true, "4U>>1>>1"},
+    {true, true, "1<1<1"},
+    {true, true, "1>1>1"},
+    {true, true, "1<=1<=1"},
+    {true, true, "1>=1>=1"},
+    {true, true, "1==1==1"},
+    {true, true, "1!=1!=1"},
+    {true, true, "1&1&1"},
+    {true, true, "1^1^1"},
+    {true, true, "1|1|1"},
+    {true, true, "1&&1&&1"},
+    {true, true, "1||1||1"},
+    {true, true, "1,1,1"},
+
+    // Optionally reject comma operator
+    {false, false, "1,1"}
 };
 
 TEST_P(MatcherTest, MatchResult) {
-  EXPECT_TRUE(matchText(GetParam().Text) == GetParam().Matched);
+  EXPECT_TRUE(matchText(GetParam().Text, GetParam().AllowComma) == GetParam().Matched);
 }
 
-INSTANTIATE_TEST_SUITE_P(TokenExpressionParserTests, MatcherTest,
-                         ::testing::ValuesIn(Params));
+INSTANTIATE_TEST_SUITE_P(IntegralLiteralExpressionMatcherTests, MatcherTest,
+                         ::testing::ValuesIn(MatchParams));
+
+static const SizeParam SizeParams[] = {
+    {modernize::LiteralSize::Int, "1"},
+    {modernize::LiteralSize::UnsignedInt, "1U"},
+    {modernize::LiteralSize::Long, "1L"},
+    {modernize::LiteralSize::UnsignedLong, "1UL"},
+    {modernize::LiteralSize::UnsignedLong, "1LU"},
+    {modernize::LiteralSize::LongLong, "1LL"},
+    {modernize::LiteralSize::UnsignedLongLong, "1ULL"},
+    {modernize::LiteralSize::UnsignedLongLong, "1LLU"}};
+
+TEST_P(SizeTest, TokenSize) {
+  EXPECT_EQ(sizeText(GetParam().Text), GetParam().Size);
+};
+
+INSTANTIATE_TEST_SUITE_P(IntegralLiteralExpressionMatcherTests, SizeTest,
+                         ::testing::ValuesIn(SizeParams));
 
 } // namespace test
 } // namespace tidy
Index: clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c
===================================================================
--- /dev/null
+++ clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c
@@ -0,0 +1,16 @@
+// RUN: %check_clang_tidy %s modernize-macro-to-enum %t
+
+// C requires enum values to fit into an int.
+#define TOO_BIG1 1L
+#define TOO_BIG2 1UL
+#define TOO_BIG3 1LL
+#define TOO_BIG4 1ULL
+
+// C forbids comma operator in initializing expressions.
+#define BAD_OP 1, 2
+
+#define SIZE_OK1 1
+#define SIZE_OK2 1U
+// CHECK-MESSAGES: :[[@LINE-2]]:1: warning: replace macro with enum [modernize-macro-to-enum]
+// CHECK-MESSAGES: :[[@LINE-3]]:9: warning: macro 'SIZE_OK1' defines an integral constant; prefer an enum instead
+// CHECK-MESSAGES: :[[@LINE-3]]:9: warning: macro 'SIZE_OK2' defines an integral constant; prefer an enum instead
Index: clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp
===================================================================
--- clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp
+++ clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp
@@ -321,8 +321,14 @@
 
 bool MacroToEnumCallbacks::isInitializer(ArrayRef<Token> MacroTokens)
 {
-  IntegralLiteralExpressionMatcher Matcher(MacroTokens);
-  return Matcher.match();
+  IntegralLiteralExpressionMatcher Matcher(MacroTokens, LangOpts.C99 == 0);
+  bool Matched = Matcher.match();
+  if (LangOpts.C99 &&
+      (Matcher.largestLiteralSize() != LiteralSize::Int &&
+       Matcher.largestLiteralSize() != LiteralSize::UnsignedInt))
+    return false;
+
+  return Matched;
 }
 
 
Index: clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h
===================================================================
--- clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h
+++ clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h
@@ -16,15 +16,27 @@
 namespace tidy {
 namespace modernize {
 
+enum class LiteralSize {
+  Unknown = 0,
+  Int,
+  UnsignedInt,
+  Long,
+  UnsignedLong,
+  LongLong,
+  UnsignedLongLong
+};
+
 // Parses an array of tokens and returns true if they conform to the rules of
 // C++ for whole expressions involving integral literals.  Follows the operator
-// precedence rules of C++.
+// precedence rules of C++.  Optionally exclude comma operator expressions.
 class IntegralLiteralExpressionMatcher {
 public:
-  IntegralLiteralExpressionMatcher(ArrayRef<Token> Tokens)
-      : Current(Tokens.begin()), End(Tokens.end()) {}
+  IntegralLiteralExpressionMatcher(ArrayRef<Token> Tokens, bool CommaAllowed)
+      : Current(Tokens.begin()), End(Tokens.end()), CommaAllowed(CommaAllowed) {
+  }
 
   bool match();
+  LiteralSize largestLiteralSize() const;
 
 private:
   bool advance();
@@ -64,6 +76,8 @@
 
   ArrayRef<Token>::iterator Current;
   ArrayRef<Token>::iterator End;
+  LiteralSize LargestSize{LiteralSize::Unknown};
+  bool CommaAllowed;
 };
 
 } // namespace modernize
Index: clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp
===================================================================
--- clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp
+++ clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp
@@ -8,6 +8,7 @@
 
 #include "IntegralLiteralExpressionMatcher.h"
 
+#include <algorithm>
 #include <cctype>
 #include <stdexcept>
 
@@ -81,6 +82,51 @@
   return true;
 }
 
+static LiteralSize literalTokenSize(const Token &Tok) {
+  unsigned int Length = Tok.getLength();
+  if (Length <= 1) {
+      return LiteralSize::Int;
+  }
+
+  bool SeenUnsigned{};
+  bool SeenLong{};
+  bool SeenLongLong{};
+  const char *Text = Tok.getLiteralData();
+  for (unsigned int End = Length - 1; End > 0; --End) {
+    if (std::isdigit(Text[End]))
+      break;
+
+    if (std::toupper(Text[End]) == 'U')
+      SeenUnsigned = true;
+    else if (std::toupper(Text[End]) == 'L') {
+      if (SeenLong)
+        SeenLongLong = true;
+      SeenLong = true;
+    }
+  }
+
+  if (SeenLongLong) {
+    if (SeenUnsigned)
+      return LiteralSize::UnsignedLongLong;
+
+    return LiteralSize::LongLong;
+  }
+  if (SeenLong) {
+    if (SeenUnsigned)
+      return LiteralSize::UnsignedLong;
+
+    return LiteralSize::Long;
+  }
+  if (SeenUnsigned)
+    return LiteralSize::UnsignedInt;
+
+  return LiteralSize::Int;
+}
+
+static bool operator<(LiteralSize LHS, LiteralSize RHS) {
+  return static_cast<int>(LHS) < static_cast<int>(RHS);
+}
+
 bool IntegralLiteralExpressionMatcher::unaryExpr() {
   if (!unaryOperator())
     return false;
@@ -102,7 +148,10 @@
       !isIntegralConstant(*Current)) {
     return false;
   }
+
+  LargestSize = std::max(LargestSize, literalTokenSize(*Current));
   ++Current;
+
   return true;
 }
 
@@ -217,8 +266,12 @@
 }
 
 bool IntegralLiteralExpressionMatcher::commaExpr() {
-  return nonTerminalChainedExpr<tok::TokenKind::comma>(
-      &IntegralLiteralExpressionMatcher::conditionalExpr);
+  auto Pred = CommaAllowed
+                  ? std::function<bool(Token)>(
+                        [](Token Tok) { return Tok.is(tok::TokenKind::comma); })
+                  : std::function<bool(Token)>([](Token) { return false; });
+  return nonTerminalChainedExpr(
+      &IntegralLiteralExpressionMatcher::conditionalExpr, Pred);
 }
 
 bool IntegralLiteralExpressionMatcher::expr() { return commaExpr(); }
@@ -227,6 +280,10 @@
   return expr() && Current == End;
 }
 
+LiteralSize IntegralLiteralExpressionMatcher::largestLiteralSize() const {
+  return LargestSize;
+}
+
 } // namespace modernize
 } // namespace tidy
 } // namespace clang

_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files

Reply via email to