[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files
This revision was landed with ongoing or failed builds. This revision was automatically updated to reflect the committed changes. Closed by commit rGb418ef5cb90b: [clang-tidy] Reject invalid enum initializers in C files (authored by LegalizeAdulthood). Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D125622/new/ https://reviews.llvm.org/D125622 Files: clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.cpp clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp Index: clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp === --- clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp +++ clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp @@ -36,177 +36,241 @@ return Tokens; } -static bool matchText(const char *Text) { +static bool matchText(const char *Text, bool AllowComma) { std::vector Tokens{tokenify(Text)}; - modernize::IntegralLiteralExpressionMatcher Matcher(Tokens); + modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, AllowComma); return Matcher.match(); } +static modernize::LiteralSize sizeText(const char *Text) { + std::vector Tokens{tokenify(Text)}; + modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, true); + if (Matcher.match()) +return Matcher.largestLiteralSize(); + return modernize::LiteralSize::Unknown; +} + +static const char *toString(modernize::LiteralSize Value) { + switch (Value) { + case modernize::LiteralSize::Int: +return "Int"; + case modernize::LiteralSize::UnsignedInt: +return "UnsignedInt"; + case modernize::LiteralSize::Long: +return "Long"; + case modernize::LiteralSize::UnsignedLong: +return "UnsignedLong"; + case modernize::LiteralSize::LongLong: +return "LongLong"; + case modernize::LiteralSize::UnsignedLongLong: +return "UnsignedLongLong"; + default: +return "Unknown"; + } +} + namespace { -struct Param { +struct MatchParam { + bool AllowComma; bool Matched; const char *Text; - friend std::ostream <<(std::ostream , const Param ) { -return Str << "Matched: " << std::boolalpha << Value.Matched << ", Text: '" - << Value.Text << "'"; + friend std::ostream <<(std::ostream , const MatchParam ) { +return Str << "Allow operator,: " << std::boolalpha << Value.AllowComma + << ", Matched: " << std::boolalpha << Value.Matched + << ", Text: '" << Value.Text << '\''; } }; -class MatcherTest : public ::testing::TestWithParam {}; +struct SizeParam { + modernize::LiteralSize Size; + const char *Text; + + friend std::ostream <<(std::ostream , const SizeParam ) { +return Str << "Size: " << toString(Value.Size) << ", Text: '" << Value.Text << '\''; + } +}; + +class MatcherTest : public ::testing::TestWithParam {}; + +class SizeTest : public ::testing::TestWithParam {}; } // namespace -static const Param Params[] = { +static const MatchParam MatchParams[] = { // Accept integral literals. -{true, "1"}, -{true, "0177"}, -{true, "0xdeadbeef"}, -{true, "0b1011"}, -{true, "'c'"}, +{true, true, "1"}, +{true, true, "0177"}, +{true, true, "0xdeadbeef"}, +{true, true, "0b1011"}, +{true, true, "'c'"}, // Reject non-integral literals. -{false, "1.23"}, -{false, "0x1p3"}, -{false, R"("string")"}, -{false, "1i"}, +{true, false, "1.23"}, +{true, false, "0x1p3"}, +{true, false, R"("string")"}, +{true, false, "1i"}, // Accept literals with these unary operators. -{true, "-1"}, -{true, "+1"}, -{true, "~1"}, -{true, "!1"}, +{true, true, "-1"}, +{true, true, "+1"}, +{true, true, "~1"}, +{true, true, "!1"}, // Reject invalid unary operators. -{false, "1-"}, -{false, "1+"}, -{false, "1~"}, -{false, "1!"}, +{true, false, "1-"}, +{true, false, "1+"}, +{true, false, "1~"}, +{true, false, "1!"}, // Accept valid binary operators. -{true, "1+1"}, -{true, "1-1"}, -{true, "1*1"}, -{true, "1/1"}, -{true, "1%2"}, -{true, "1<<1"}, -{true, "1>>1"}, -{true, "1<=>1"}, -{true, "1<1"}, -{true, "1>1"}, -{true, "1<=1"}, -{true, "1>=1"}, -{true, "1==1"}, -{true, "1!=1"}, -{true, "1&1"}, -{true, "1^1"}, -{true, "1|1"}, -{true, "1&&1"}, -{true, "1||1"}, -{true, "1+ +1"}, // A space is needed to avoid being tokenized as ++ or --. -{true, "1- -1"}, -{true, "1,1"}, +{true, true, "1+1"}, +{true, true, "1-1"}, +{true, true, "1*1"}, +{true, true, "1/1"}, +{true,
[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files
LegalizeAdulthood updated this revision to Diff 433648. LegalizeAdulthood added a comment. Update from review comments CHANGES SINCE LAST ACTION https://reviews.llvm.org/D125622/new/ https://reviews.llvm.org/D125622 Files: clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.cpp clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp Index: clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp === --- clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp +++ clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp @@ -36,177 +36,241 @@ return Tokens; } -static bool matchText(const char *Text) { +static bool matchText(const char *Text, bool AllowComma) { std::vector Tokens{tokenify(Text)}; - modernize::IntegralLiteralExpressionMatcher Matcher(Tokens); + modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, AllowComma); return Matcher.match(); } +static modernize::LiteralSize sizeText(const char *Text) { + std::vector Tokens{tokenify(Text)}; + modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, true); + if (Matcher.match()) +return Matcher.largestLiteralSize(); + return modernize::LiteralSize::Unknown; +} + +static const char *toString(modernize::LiteralSize Value) { + switch (Value) { + case modernize::LiteralSize::Int: +return "Int"; + case modernize::LiteralSize::UnsignedInt: +return "UnsignedInt"; + case modernize::LiteralSize::Long: +return "Long"; + case modernize::LiteralSize::UnsignedLong: +return "UnsignedLong"; + case modernize::LiteralSize::LongLong: +return "LongLong"; + case modernize::LiteralSize::UnsignedLongLong: +return "UnsignedLongLong"; + default: +return "Unknown"; + } +} + namespace { -struct Param { +struct MatchParam { + bool AllowComma; bool Matched; const char *Text; - friend std::ostream <<(std::ostream , const Param ) { -return Str << "Matched: " << std::boolalpha << Value.Matched << ", Text: '" - << Value.Text << "'"; + friend std::ostream <<(std::ostream , const MatchParam ) { +return Str << "Allow operator,: " << std::boolalpha << Value.AllowComma + << ", Matched: " << std::boolalpha << Value.Matched + << ", Text: '" << Value.Text << '\''; } }; -class MatcherTest : public ::testing::TestWithParam {}; +struct SizeParam { + modernize::LiteralSize Size; + const char *Text; + + friend std::ostream <<(std::ostream , const SizeParam ) { +return Str << "Size: " << toString(Value.Size) << ", Text: '" << Value.Text << '\''; + } +}; + +class MatcherTest : public ::testing::TestWithParam {}; + +class SizeTest : public ::testing::TestWithParam {}; } // namespace -static const Param Params[] = { +static const MatchParam MatchParams[] = { // Accept integral literals. -{true, "1"}, -{true, "0177"}, -{true, "0xdeadbeef"}, -{true, "0b1011"}, -{true, "'c'"}, +{true, true, "1"}, +{true, true, "0177"}, +{true, true, "0xdeadbeef"}, +{true, true, "0b1011"}, +{true, true, "'c'"}, // Reject non-integral literals. -{false, "1.23"}, -{false, "0x1p3"}, -{false, R"("string")"}, -{false, "1i"}, +{true, false, "1.23"}, +{true, false, "0x1p3"}, +{true, false, R"("string")"}, +{true, false, "1i"}, // Accept literals with these unary operators. -{true, "-1"}, -{true, "+1"}, -{true, "~1"}, -{true, "!1"}, +{true, true, "-1"}, +{true, true, "+1"}, +{true, true, "~1"}, +{true, true, "!1"}, // Reject invalid unary operators. -{false, "1-"}, -{false, "1+"}, -{false, "1~"}, -{false, "1!"}, +{true, false, "1-"}, +{true, false, "1+"}, +{true, false, "1~"}, +{true, false, "1!"}, // Accept valid binary operators. -{true, "1+1"}, -{true, "1-1"}, -{true, "1*1"}, -{true, "1/1"}, -{true, "1%2"}, -{true, "1<<1"}, -{true, "1>>1"}, -{true, "1<=>1"}, -{true, "1<1"}, -{true, "1>1"}, -{true, "1<=1"}, -{true, "1>=1"}, -{true, "1==1"}, -{true, "1!=1"}, -{true, "1&1"}, -{true, "1^1"}, -{true, "1|1"}, -{true, "1&&1"}, -{true, "1||1"}, -{true, "1+ +1"}, // A space is needed to avoid being tokenized as ++ or --. -{true, "1- -1"}, -{true, "1,1"}, +{true, true, "1+1"}, +{true, true, "1-1"}, +{true, true, "1*1"}, +{true, true, "1/1"}, +{true, true, "1%2"}, +{true, true, "1<<1"}, +{true, true, "1>>1"}, +{true, true, "1<=>1"}, +{true, true, "1<1"}, +{true, true, "1>1"}, +{true, true,
[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files
LegalizeAdulthood marked 2 inline comments as done. LegalizeAdulthood added inline comments. Comment at: clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:3-7 +// C requires enum values to fit into an int. +#define TOO_BIG1 1L +#define TOO_BIG2 1UL +#define TOO_BIG3 1LL +#define TOO_BIG4 1ULL aaron.ballman wrote: > LegalizeAdulthood wrote: > > aaron.ballman wrote: > > > How do we want to handle the fact that Clang has an extension to allow > > > this? Perhaps we want a config option for pedantic vs non-pedantic fixes? > > I was trying to find a way to make it fail on gcc/clang on compiler > > explorer and I couldn't get it to fail in either compiler > > > > Where is the extension documented? It appears to be on by default in both > > compilers. > I don't think we have any documentation for it (we basically don't bother to > document our implementation-defined behavior, which drives me mildly nuts -- > we fall back on "the source code is the definitive documentation", which is > not user-friendly at all). > > That's why I was wondering if we wanted a pedantic vs non-pedantic option > here. Pedantically, the behavior you have today is correct. However, because > this is a super common extension to compilers, we might want to allow it to > be transformed despite being pedantically an extension. > > We can handle that as a follow-up though. Is there a way to force it to fail with some compiler flag? If you could show me on compiler explorer, that would be great. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D125622/new/ https://reviews.llvm.org/D125622 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files
aaron.ballman accepted this revision. aaron.ballman added a comment. This revision is now accepted and ready to land. LGTM with a few comments that can be addressed when you commit. Comment at: clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp:87-89 + if (Length <= 1) { + return LiteralSize::Int; + } Comment at: clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp:91-93 + bool SeenUnsigned{}; + bool SeenLong{}; + bool SeenLongLong{}; I (personally) think this is a more clear form of initialization to most folks, but don't insist. Comment at: clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp:326 + bool Matched = Matcher.match(); + if (LangOpts.C99 && + (Matcher.largestLiteralSize() != LiteralSize::Int && LegalizeAdulthood wrote: > aaron.ballman wrote: > > And C89? > `C99` was the earliest dialect of C I could find in `LangOptions.def`. If > you can show me an earlier version for C, I'm happy to switch it. This keeps catching folks out, I may add a helper method to make this far more clear. `!LangOpts.CPlusPlus` means "in a C mode" for us currently. Comment at: clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:3-7 +// C requires enum values to fit into an int. +#define TOO_BIG1 1L +#define TOO_BIG2 1UL +#define TOO_BIG3 1LL +#define TOO_BIG4 1ULL LegalizeAdulthood wrote: > aaron.ballman wrote: > > How do we want to handle the fact that Clang has an extension to allow > > this? Perhaps we want a config option for pedantic vs non-pedantic fixes? > I was trying to find a way to make it fail on gcc/clang on compiler explorer > and I couldn't get it to fail in either compiler > > Where is the extension documented? It appears to be on by default in both > compilers. I don't think we have any documentation for it (we basically don't bother to document our implementation-defined behavior, which drives me mildly nuts -- we fall back on "the source code is the definitive documentation", which is not user-friendly at all). That's why I was wondering if we wanted a pedantic vs non-pedantic option here. Pedantically, the behavior you have today is correct. However, because this is a super common extension to compilers, we might want to allow it to be transformed despite being pedantically an extension. We can handle that as a follow-up though. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D125622/new/ https://reviews.llvm.org/D125622 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files
LegalizeAdulthood updated this revision to Diff 432676. LegalizeAdulthood added a comment. - Add C++ test case for acceptable `operator,` initializer CHANGES SINCE LAST ACTION https://reviews.llvm.org/D125622/new/ https://reviews.llvm.org/D125622 Files: clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.cpp clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp Index: clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp === --- clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp +++ clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp @@ -36,177 +36,241 @@ return Tokens; } -static bool matchText(const char *Text) { +static bool matchText(const char *Text, bool AllowComma) { std::vector Tokens{tokenify(Text)}; - modernize::IntegralLiteralExpressionMatcher Matcher(Tokens); + modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, AllowComma); return Matcher.match(); } +static modernize::LiteralSize sizeText(const char *Text) { + std::vector Tokens{tokenify(Text)}; + modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, true); + if (Matcher.match()) +return Matcher.largestLiteralSize(); + return modernize::LiteralSize::Unknown; +} + +static const char *toString(modernize::LiteralSize Value) { + switch (Value) { + case modernize::LiteralSize::Int: +return "Int"; + case modernize::LiteralSize::UnsignedInt: +return "UnsignedInt"; + case modernize::LiteralSize::Long: +return "Long"; + case modernize::LiteralSize::UnsignedLong: +return "UnsignedLong"; + case modernize::LiteralSize::LongLong: +return "LongLong"; + case modernize::LiteralSize::UnsignedLongLong: +return "UnsignedLongLong"; + default: +return "Unknown"; + } +} + namespace { -struct Param { +struct MatchParam { + bool AllowComma; bool Matched; const char *Text; - friend std::ostream <<(std::ostream , const Param ) { -return Str << "Matched: " << std::boolalpha << Value.Matched << ", Text: '" - << Value.Text << "'"; + friend std::ostream <<(std::ostream , const MatchParam ) { +return Str << "Allow operator,: " << std::boolalpha << Value.AllowComma + << ", Matched: " << std::boolalpha << Value.Matched + << ", Text: '" << Value.Text << '\''; } }; -class MatcherTest : public ::testing::TestWithParam {}; +struct SizeParam { + modernize::LiteralSize Size; + const char *Text; + + friend std::ostream <<(std::ostream , const SizeParam ) { +return Str << "Size: " << toString(Value.Size) << ", Text: '" << Value.Text << '\''; + } +}; + +class MatcherTest : public ::testing::TestWithParam {}; + +class SizeTest : public ::testing::TestWithParam {}; } // namespace -static const Param Params[] = { +static const MatchParam MatchParams[] = { // Accept integral literals. -{true, "1"}, -{true, "0177"}, -{true, "0xdeadbeef"}, -{true, "0b1011"}, -{true, "'c'"}, +{true, true, "1"}, +{true, true, "0177"}, +{true, true, "0xdeadbeef"}, +{true, true, "0b1011"}, +{true, true, "'c'"}, // Reject non-integral literals. -{false, "1.23"}, -{false, "0x1p3"}, -{false, R"("string")"}, -{false, "1i"}, +{true, false, "1.23"}, +{true, false, "0x1p3"}, +{true, false, R"("string")"}, +{true, false, "1i"}, // Accept literals with these unary operators. -{true, "-1"}, -{true, "+1"}, -{true, "~1"}, -{true, "!1"}, +{true, true, "-1"}, +{true, true, "+1"}, +{true, true, "~1"}, +{true, true, "!1"}, // Reject invalid unary operators. -{false, "1-"}, -{false, "1+"}, -{false, "1~"}, -{false, "1!"}, +{true, false, "1-"}, +{true, false, "1+"}, +{true, false, "1~"}, +{true, false, "1!"}, // Accept valid binary operators. -{true, "1+1"}, -{true, "1-1"}, -{true, "1*1"}, -{true, "1/1"}, -{true, "1%2"}, -{true, "1<<1"}, -{true, "1>>1"}, -{true, "1<=>1"}, -{true, "1<1"}, -{true, "1>1"}, -{true, "1<=1"}, -{true, "1>=1"}, -{true, "1==1"}, -{true, "1!=1"}, -{true, "1&1"}, -{true, "1^1"}, -{true, "1|1"}, -{true, "1&&1"}, -{true, "1||1"}, -{true, "1+ +1"}, // A space is needed to avoid being tokenized as ++ or --. -{true, "1- -1"}, -{true, "1,1"}, +{true, true, "1+1"}, +{true, true, "1-1"}, +{true, true, "1*1"}, +{true, true, "1/1"}, +{true, true, "1%2"}, +{true, true, "1<<1"}, +{true, true, "1>>1"}, +{true, true, "1<=>1"}, +{true, true, "1<1"}, +{true, true,
[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files
LegalizeAdulthood marked 5 inline comments as done. LegalizeAdulthood added inline comments. Comment at: clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp:324 { - IntegralLiteralExpressionMatcher Matcher(MacroTokens); - return Matcher.match(); + IntegralLiteralExpressionMatcher Matcher(MacroTokens, LangOpts.C99 == 0); + bool Matched = Matcher.match(); aaron.ballman wrote: > Huh? Comma wasn't allowed in C89 either... In fact, there's no language mode > which allows top-level commas in either C or C++ -- the issue with the comma > operator is a parsing one. You can't tell the difference between the comma > being part of the initializer expression or the comma being the separator > between enumerators. The most recent diff handles the issue with `operator,` but the previous version was only handling the overly-big literal Comment at: clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp:326 + bool Matched = Matcher.match(); + if (LangOpts.C99 && + (Matcher.largestLiteralSize() != LiteralSize::Int && aaron.ballman wrote: > And C89? `C99` was the earliest dialect of C I could find in `LangOptions.def`. If you can show me an earlier version for C, I'm happy to switch it. Comment at: clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:3-7 +// C requires enum values to fit into an int. +#define TOO_BIG1 1L +#define TOO_BIG2 1UL +#define TOO_BIG3 1LL +#define TOO_BIG4 1ULL aaron.ballman wrote: > How do we want to handle the fact that Clang has an extension to allow this? > Perhaps we want a config option for pedantic vs non-pedantic fixes? I was trying to find a way to make it fail on gcc/clang on compiler explorer and I couldn't get it to fail in either compiler Where is the extension documented? It appears to be on by default in both compilers. Comment at: clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:10 +// C forbids comma operator in initializing expressions. +#define BAD_OP 1, 2 + aaron.ballman wrote: > LegalizeAdulthood wrote: > > aaron.ballman wrote: > > > Can you add a test: > > > ``` > > > #define GOOD_OP (1, 2) > > > ``` > > > to make sure it still gets converted to an enum? > > Yeah, another one I was thinking of is when someone does [[ > > https://godbolt.org/z/c677je8s1 | something as disgusting as this. ]] > > > > ``` > > #define ARGS 1, 2 > > #define OTHER_ARGS (1, 2) > > > > int f(int x, int y) { return x + y; } > > > > int main() > > { > > return f(ARGS) + f OTHER_ARGS; > > } > > ``` > > > > However, the only real way to handle avoiding conversion of the 2nd case is > > to examine the context of macro expansion. This is another edge case that > > will have to be handled subsequently. > > > > This gets tricky because AFAIK there is no way to select expressions in the > > AST that result from macro expansion. You have to match the macro > > expansion locations against AST nodes to identify the node(s) that match > > the expansion location yourself. > That's a good test case (for some definition of good)! :-) > > At this point, there's a few options: > > 1) Go back to the way things were -- disallow comma expressions, even in > parens. Document it as a limitation. > 2) Accept comma expressions in parens, ignore the problem case like you > described above. Document it as a limitation? > 3) Try to solve every awful code construct we can ever imagine. Document the > check is perfect in every way, but probably not land it for several years. > > I think I'd be happy with #2 but I could also live with #1 In the most recent diff, I'm supporting `operator,` inside parens for C++ and never allowing it in C. I plan to do a subsequent enhancement that examines the expansion locations of macros and rejects any macro whose expansion doesn't encompass a single expression. In other words, if the tokens in the macro somehow get incorporated into some syntactic element that isn't completely encased in a single expression, then reject the macro. CHANGES SINCE LAST ACTION https://reviews.llvm.org/D125622/new/ https://reviews.llvm.org/D125622 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files
LegalizeAdulthood updated this revision to Diff 432675. LegalizeAdulthood marked an inline comment as done. LegalizeAdulthood added a comment. - Update from review comments - Disallow operator, unless inside parentheses CHANGES SINCE LAST ACTION https://reviews.llvm.org/D125622/new/ https://reviews.llvm.org/D125622 Files: clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.cpp clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp Index: clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp === --- clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp +++ clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp @@ -36,177 +36,241 @@ return Tokens; } -static bool matchText(const char *Text) { +static bool matchText(const char *Text, bool AllowComma) { std::vector Tokens{tokenify(Text)}; - modernize::IntegralLiteralExpressionMatcher Matcher(Tokens); + modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, AllowComma); return Matcher.match(); } +static modernize::LiteralSize sizeText(const char *Text) { + std::vector Tokens{tokenify(Text)}; + modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, true); + if (Matcher.match()) +return Matcher.largestLiteralSize(); + return modernize::LiteralSize::Unknown; +} + +static const char *toString(modernize::LiteralSize Value) { + switch (Value) { + case modernize::LiteralSize::Int: +return "Int"; + case modernize::LiteralSize::UnsignedInt: +return "UnsignedInt"; + case modernize::LiteralSize::Long: +return "Long"; + case modernize::LiteralSize::UnsignedLong: +return "UnsignedLong"; + case modernize::LiteralSize::LongLong: +return "LongLong"; + case modernize::LiteralSize::UnsignedLongLong: +return "UnsignedLongLong"; + default: +return "Unknown"; + } +} + namespace { -struct Param { +struct MatchParam { + bool AllowComma; bool Matched; const char *Text; - friend std::ostream <<(std::ostream , const Param ) { -return Str << "Matched: " << std::boolalpha << Value.Matched << ", Text: '" - << Value.Text << "'"; + friend std::ostream <<(std::ostream , const MatchParam ) { +return Str << "Allow operator,: " << std::boolalpha << Value.AllowComma + << ", Matched: " << std::boolalpha << Value.Matched + << ", Text: '" << Value.Text << '\''; } }; -class MatcherTest : public ::testing::TestWithParam {}; +struct SizeParam { + modernize::LiteralSize Size; + const char *Text; + + friend std::ostream <<(std::ostream , const SizeParam ) { +return Str << "Size: " << toString(Value.Size) << ", Text: '" << Value.Text << '\''; + } +}; + +class MatcherTest : public ::testing::TestWithParam {}; + +class SizeTest : public ::testing::TestWithParam {}; } // namespace -static const Param Params[] = { +static const MatchParam MatchParams[] = { // Accept integral literals. -{true, "1"}, -{true, "0177"}, -{true, "0xdeadbeef"}, -{true, "0b1011"}, -{true, "'c'"}, +{true, true, "1"}, +{true, true, "0177"}, +{true, true, "0xdeadbeef"}, +{true, true, "0b1011"}, +{true, true, "'c'"}, // Reject non-integral literals. -{false, "1.23"}, -{false, "0x1p3"}, -{false, R"("string")"}, -{false, "1i"}, +{true, false, "1.23"}, +{true, false, "0x1p3"}, +{true, false, R"("string")"}, +{true, false, "1i"}, // Accept literals with these unary operators. -{true, "-1"}, -{true, "+1"}, -{true, "~1"}, -{true, "!1"}, +{true, true, "-1"}, +{true, true, "+1"}, +{true, true, "~1"}, +{true, true, "!1"}, // Reject invalid unary operators. -{false, "1-"}, -{false, "1+"}, -{false, "1~"}, -{false, "1!"}, +{true, false, "1-"}, +{true, false, "1+"}, +{true, false, "1~"}, +{true, false, "1!"}, // Accept valid binary operators. -{true, "1+1"}, -{true, "1-1"}, -{true, "1*1"}, -{true, "1/1"}, -{true, "1%2"}, -{true, "1<<1"}, -{true, "1>>1"}, -{true, "1<=>1"}, -{true, "1<1"}, -{true, "1>1"}, -{true, "1<=1"}, -{true, "1>=1"}, -{true, "1==1"}, -{true, "1!=1"}, -{true, "1&1"}, -{true, "1^1"}, -{true, "1|1"}, -{true, "1&&1"}, -{true, "1||1"}, -{true, "1+ +1"}, // A space is needed to avoid being tokenized as ++ or --. -{true, "1- -1"}, -{true, "1,1"}, +{true, true, "1+1"}, +{true, true, "1-1"}, +{true, true, "1*1"}, +{true, true, "1/1"}, +{true, true, "1%2"}, +{true, true, "1<<1"}, +{true, true, "1>>1"}, +
[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files
aaron.ballman added inline comments. Comment at: clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:10 +// C forbids comma operator in initializing expressions. +#define BAD_OP 1, 2 + LegalizeAdulthood wrote: > aaron.ballman wrote: > > Can you add a test: > > ``` > > #define GOOD_OP (1, 2) > > ``` > > to make sure it still gets converted to an enum? > Yeah, another one I was thinking of is when someone does [[ > https://godbolt.org/z/c677je8s1 | something as disgusting as this. ]] > > ``` > #define ARGS 1, 2 > #define OTHER_ARGS (1, 2) > > int f(int x, int y) { return x + y; } > > int main() > { > return f(ARGS) + f OTHER_ARGS; > } > ``` > > However, the only real way to handle avoiding conversion of the 2nd case is > to examine the context of macro expansion. This is another edge case that > will have to be handled subsequently. > > This gets tricky because AFAIK there is no way to select expressions in the > AST that result from macro expansion. You have to match the macro expansion > locations against AST nodes to identify the node(s) that match the expansion > location yourself. That's a good test case (for some definition of good)! :-) At this point, there's a few options: 1) Go back to the way things were -- disallow comma expressions, even in parens. Document it as a limitation. 2) Accept comma expressions in parens, ignore the problem case like you described above. Document it as a limitation? 3) Try to solve every awful code construct we can ever imagine. Document the check is perfect in every way, but probably not land it for several years. I think I'd be happy with #2 but I could also live with #1 Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D125622/new/ https://reviews.llvm.org/D125622 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files
LegalizeAdulthood marked 2 inline comments as done. LegalizeAdulthood added inline comments. Comment at: clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:9 + +// C forbids comma operator in initializing expressions. +#define BAD_OP 1, 2 aaron.ballman wrote: > LegalizeAdulthood wrote: > > aaron.ballman wrote: > > > It's also forbidden in C++. > > C++ just talks about a constant expression and doesn't explicitly disallow > > `operator,` in such expressions that I could find. Can you point me to > > where it is disallowed? > > > > If it is disallowed universally, then I don't see why you asked me to parse > > it `:)` > > C++ just talks about a constant expression and doesn't explicitly disallow > > operator, in such expressions that I could find. Can you point me to where > > it is disallowed? > > It falls out from the grammar: > > http://eel.is/c++draft/enum#nt:enumerator-definition > http://eel.is/c++draft/expr.const#nt:constant-expression > http://eel.is/c++draft/expr.cond#nt:conditional-expression > > > If it is disallowed universally, then I don't see why you asked me to parse > > it :) > > It can show up in paren expressions and the interface is generally about > integer literal expressions. OK, yeah, I was looking at the PDF and it doesn't have those nice navigable links, I'll have to keep that site in mind. So we have to disallow top-level comma but allow it as a parenthesized expression. Tricky! Comment at: clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:10 +// C forbids comma operator in initializing expressions. +#define BAD_OP 1, 2 + aaron.ballman wrote: > Can you add a test: > ``` > #define GOOD_OP (1, 2) > ``` > to make sure it still gets converted to an enum? Yeah, another one I was thinking of is when someone does [[ https://godbolt.org/z/c677je8s1 | something as disgusting as this. ]] ``` #define ARGS 1, 2 #define OTHER_ARGS (1, 2) int f(int x, int y) { return x + y; } int main() { return f(ARGS) + f OTHER_ARGS; } ``` However, the only real way to handle avoiding conversion of the 2nd case is to examine the context of macro expansion. This is another edge case that will have to be handled subsequently. This gets tricky because AFAIK there is no way to select expressions in the AST that result from macro expansion. You have to match the macro expansion locations against AST nodes to identify the node(s) that match the expansion location yourself. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D125622/new/ https://reviews.llvm.org/D125622 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files
aaron.ballman added inline comments. Comment at: clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:9 + +// C forbids comma operator in initializing expressions. +#define BAD_OP 1, 2 LegalizeAdulthood wrote: > aaron.ballman wrote: > > It's also forbidden in C++. > C++ just talks about a constant expression and doesn't explicitly disallow > `operator,` in such expressions that I could find. Can you point me to where > it is disallowed? > > If it is disallowed universally, then I don't see why you asked me to parse > it `:)` > C++ just talks about a constant expression and doesn't explicitly disallow > operator, in such expressions that I could find. Can you point me to where it > is disallowed? It falls out from the grammar: http://eel.is/c++draft/enum#nt:enumerator-definition http://eel.is/c++draft/expr.const#nt:constant-expression http://eel.is/c++draft/expr.cond#nt:conditional-expression > If it is disallowed universally, then I don't see why you asked me to parse > it :) It can show up in paren expressions and the interface is generally about integer literal expressions. Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D125622/new/ https://reviews.llvm.org/D125622 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files
LegalizeAdulthood added inline comments. Comment at: clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:9 + +// C forbids comma operator in initializing expressions. +#define BAD_OP 1, 2 aaron.ballman wrote: > It's also forbidden in C++. C++ just talks about a constant expression and doesn't explicitly disallow `operator,` in such expressions that I could find. Can you point me to where it is disallowed? If it is disallowed universally, then I don't see why you asked me to parse it `:)` Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D125622/new/ https://reviews.llvm.org/D125622 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files
aaron.ballman added a comment. Note, I'm in C standards committee meetings all week this week, so I expect I won't be doing many reviews this week and I'll be catching up as best I can starting next week. Comment at: clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp:324 { - IntegralLiteralExpressionMatcher Matcher(MacroTokens); - return Matcher.match(); + IntegralLiteralExpressionMatcher Matcher(MacroTokens, LangOpts.C99 == 0); + bool Matched = Matcher.match(); Huh? Comma wasn't allowed in C89 either... In fact, there's no language mode which allows top-level commas in either C or C++ -- the issue with the comma operator is a parsing one. You can't tell the difference between the comma being part of the initializer expression or the comma being the separator between enumerators. Comment at: clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp:326 + bool Matched = Matcher.match(); + if (LangOpts.C99 && + (Matcher.largestLiteralSize() != LiteralSize::Int && And C89? Comment at: clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:1 +// RUN: %check_clang_tidy %s modernize-macro-to-enum %t + It'd be useful to run this test in C++ mode as well to demonstrate the behavioral differences. Comment at: clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:3-7 +// C requires enum values to fit into an int. +#define TOO_BIG1 1L +#define TOO_BIG2 1UL +#define TOO_BIG3 1LL +#define TOO_BIG4 1ULL How do we want to handle the fact that Clang has an extension to allow this? Perhaps we want a config option for pedantic vs non-pedantic fixes? Comment at: clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:9 + +// C forbids comma operator in initializing expressions. +#define BAD_OP 1, 2 It's also forbidden in C++. Comment at: clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c:10 +// C forbids comma operator in initializing expressions. +#define BAD_OP 1, 2 + Can you add a test: ``` #define GOOD_OP (1, 2) ``` to make sure it still gets converted to an enum? Repository: rG LLVM Github Monorepo CHANGES SINCE LAST ACTION https://reviews.llvm.org/D125622/new/ https://reviews.llvm.org/D125622 ___ cfe-commits mailing list cfe-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits
[PATCH] D125622: [clang-tidy] Reject invalid enum initializers in C files
LegalizeAdulthood created this revision. LegalizeAdulthood added reviewers: aaron.ballman, njames93. LegalizeAdulthood added a project: clang-tools-extra. Herald added subscribers: carlosgalvezp, xazax.hun. Herald added a project: All. LegalizeAdulthood requested review of this revision. C requires that enum values fit into an int. Scan the macro tokens present in an initializing expression and reject macros that contain tokens that have suffixes making them larger than int. C forbids the comma operator in enum initializing expressions, so optionally reject comma operator. Fixes #55467 Repository: rG LLVM Github Monorepo https://reviews.llvm.org/D125622 Files: clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.cpp clang-tools-extra/clang-tidy/modernize/IntegralLiteralExpressionMatcher.h clang-tools-extra/clang-tidy/modernize/MacroToEnumCheck.cpp clang-tools-extra/test/clang-tidy/checkers/modernize-macro-to-enum.c clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp Index: clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp === --- clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp +++ clang-tools-extra/unittests/clang-tidy/ModernizeModuleTest.cpp @@ -36,177 +36,237 @@ return Tokens; } -static bool matchText(const char *Text) { +static bool matchText(const char *Text, bool AllowComma) { std::vector Tokens{tokenify(Text)}; - modernize::IntegralLiteralExpressionMatcher Matcher(Tokens); + modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, AllowComma); return Matcher.match(); } +static modernize::LiteralSize sizeText(const char *Text) { + std::vector Tokens{tokenify(Text)}; + modernize::IntegralLiteralExpressionMatcher Matcher(Tokens, true); + if (Matcher.match()) +return Matcher.largestLiteralSize(); + return modernize::LiteralSize::Unknown; +} + +static const char *toString(modernize::LiteralSize Value) { + switch (Value) { + case modernize::LiteralSize::Int: +return "Int"; + case modernize::LiteralSize::UnsignedInt: +return "UnsignedInt"; + case modernize::LiteralSize::Long: +return "Long"; + case modernize::LiteralSize::UnsignedLong: +return "UnsignedLong"; + case modernize::LiteralSize::LongLong: +return "LongLong"; + case modernize::LiteralSize::UnsignedLongLong: +return "UnsignedLongLong"; + default: +return "Unknown"; + } +} + namespace { -struct Param { +struct MatchParam { + bool AllowComma; bool Matched; const char *Text; - friend std::ostream <<(std::ostream , const Param ) { -return Str << "Matched: " << std::boolalpha << Value.Matched << ", Text: '" - << Value.Text << "'"; + friend std::ostream <<(std::ostream , const MatchParam ) { +return Str << "Allow operator,: " << std::boolalpha << Value.AllowComma + << ", Matched: " << std::boolalpha << Value.Matched + << ", Text: '" << Value.Text << '\''; } }; -class MatcherTest : public ::testing::TestWithParam {}; +struct SizeParam { + modernize::LiteralSize Size; + const char *Text; + + friend std::ostream <<(std::ostream , const SizeParam ) { +return Str << "Size: " << toString(Value.Size) << ", Text: '" << Value.Text << '\''; + } +}; + +class MatcherTest : public ::testing::TestWithParam {}; + +class SizeTest : public ::testing::TestWithParam {}; } // namespace -static const Param Params[] = { +static const MatchParam MatchParams[] = { // Accept integral literals. -{true, "1"}, -{true, "0177"}, -{true, "0xdeadbeef"}, -{true, "0b1011"}, -{true, "'c'"}, +{true, true, "1"}, +{true, true, "0177"}, +{true, true, "0xdeadbeef"}, +{true, true, "0b1011"}, +{true, true, "'c'"}, // Reject non-integral literals. -{false, "1.23"}, -{false, "0x1p3"}, -{false, R"("string")"}, -{false, "1i"}, +{true, false, "1.23"}, +{true, false, "0x1p3"}, +{true, false, R"("string")"}, +{true, false, "1i"}, // Accept literals with these unary operators. -{true, "-1"}, -{true, "+1"}, -{true, "~1"}, -{true, "!1"}, +{true, true, "-1"}, +{true, true, "+1"}, +{true, true, "~1"}, +{true, true, "!1"}, // Reject invalid unary operators. -{false, "1-"}, -{false, "1+"}, -{false, "1~"}, -{false, "1!"}, +{true, false, "1-"}, +{true, false, "1+"}, +{true, false, "1~"}, +{true, false, "1!"}, // Accept valid binary operators. -{true, "1+1"}, -{true, "1-1"}, -{true, "1*1"}, -{true, "1/1"}, -{true, "1%2"}, -{true, "1<<1"}, -{true, "1>>1"}, -{true, "1<=>1"}, -{true, "1<1"}, -{true, "1>1"}, -{true, "1<=1"}, -{true, "1>=1"}, -{true, "1==1"}, -{true, "1!=1"}, -{true, "1&1"}, -{true, "1^1"}, -{true, "1|1"}, -{true, "1&&1"}, -{true, "1||1"}, -{true, "1+ +1"}, // A space is