https://github.com/owenca created https://github.com/llvm/llvm-project/pull/128996
Backports ffc61dc393e4 0968df9c3a55 2d585ccecc45 Fixes #105482 >From 678c9eda153fcaebbf1b1bf34c9253c40f3564fc Mon Sep 17 00:00:00 2001 From: Owen Pan <owenpi...@gmail.com> Date: Fri, 21 Feb 2025 20:46:43 -0800 Subject: [PATCH] [clang-format] Fix a bug that changes keyword `or` to an identifier (#128410) Backports ffc61dc393e4 0968df9c3a55 2d585ccecc45 Fixes #105482 --- clang/docs/ClangFormatStyleOptions.rst | 13 +++++- clang/docs/ReleaseNotes.rst | 4 ++ clang/include/clang/Format/Format.h | 17 ++++++-- clang/lib/Format/Format.cpp | 43 ++++++++++++++++++- clang/lib/Format/FormatToken.cpp | 10 ++--- clang/lib/Format/FormatToken.h | 23 ---------- clang/lib/Format/TokenAnnotator.cpp | 4 +- clang/lib/Format/TokenAnnotator.h | 2 +- clang/lib/Format/UnwrappedLineParser.cpp | 8 +--- clang/unittests/Format/FormatTest.cpp | 19 +++++++- clang/unittests/Format/TokenAnnotatorTest.cpp | 11 ++++- 11 files changed, 106 insertions(+), 48 deletions(-) diff --git a/clang/docs/ClangFormatStyleOptions.rst b/clang/docs/ClangFormatStyleOptions.rst index bbb912eb10e94..4b4c412a13323 100644 --- a/clang/docs/ClangFormatStyleOptions.rst +++ b/clang/docs/ClangFormatStyleOptions.rst @@ -4735,15 +4735,24 @@ the configuration (without a prefix: ``Auto``). .. _Language: **Language** (``LanguageKind``) :versionbadge:`clang-format 3.5` :ref:`ΒΆ <Language>` - Language, this format style is targeted at. + The language that this format style targets. + + .. note:: + + You can specify the language (``C``, ``Cpp``, or ``ObjC``) for ``.h`` + files by adding a ``// clang-format Language:`` line before the first + non-comment (and non-empty) line, e.g. ``// clang-format Language: Cpp``. Possible values: * ``LK_None`` (in configuration: ``None``) Do not use. + * ``LK_C`` (in configuration: ``C``) + Should be used for C. + * ``LK_Cpp`` (in configuration: ``Cpp``) - Should be used for C, C++. + Should be used for C++. * ``LK_CSharp`` (in configuration: ``CSharp``) Should be used for C#. diff --git a/clang/docs/ReleaseNotes.rst b/clang/docs/ReleaseNotes.rst index 153afdb3d59e3..57a567509a068 100644 --- a/clang/docs/ReleaseNotes.rst +++ b/clang/docs/ReleaseNotes.rst @@ -1358,6 +1358,10 @@ clang-format - Adds ``WrapNamespaceBodyWithEmptyLines`` option. - Adds the ``IndentExportBlock`` option. - Adds ``PenaltyBreakBeforeMemberAccess`` option. +- Add the C language instead of treating it like C++. +- Allow specifying the language (C, C++, or Objective-C) for a ``.h`` file by + adding a special comment (e.g. ``// clang-format Language: ObjC``) near the + top of the file. libclang -------- diff --git a/clang/include/clang/Format/Format.h b/clang/include/clang/Format/Format.h index 6f432d1d50315..abab543518222 100644 --- a/clang/include/clang/Format/Format.h +++ b/clang/include/clang/Format/Format.h @@ -3275,7 +3275,9 @@ struct FormatStyle { enum LanguageKind : int8_t { /// Do not use. LK_None, - /// Should be used for C, C++. + /// Should be used for C. + LK_C, + /// Should be used for C++. LK_Cpp, /// Should be used for C#. LK_CSharp, @@ -3300,7 +3302,9 @@ struct FormatStyle { /// https://sci-hub.st/10.1109/IEEESTD.2018.8299595 LK_Verilog }; - bool isCpp() const { return Language == LK_Cpp || Language == LK_ObjC; } + bool isCpp() const { + return Language == LK_Cpp || Language == LK_C || Language == LK_ObjC; + } bool isCSharp() const { return Language == LK_CSharp; } bool isJson() const { return Language == LK_Json; } bool isJavaScript() const { return Language == LK_JavaScript; } @@ -3310,7 +3314,12 @@ struct FormatStyle { } bool isTableGen() const { return Language == LK_TableGen; } - /// Language, this format style is targeted at. + /// The language that this format style targets. + /// \note + /// You can specify the language (``C``, ``Cpp``, or ``ObjC``) for ``.h`` + /// files by adding a ``// clang-format Language:`` line before the first + /// non-comment (and non-empty) line, e.g. ``// clang-format Language: Cpp``. + /// \endnote /// \version 3.5 LanguageKind Language; @@ -5665,6 +5674,8 @@ FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code); // Returns a string representation of ``Language``. inline StringRef getLanguageName(FormatStyle::LanguageKind Language) { switch (Language) { + case FormatStyle::LK_C: + return "C"; case FormatStyle::LK_Cpp: return "C++"; case FormatStyle::LK_CSharp: diff --git a/clang/lib/Format/Format.cpp b/clang/lib/Format/Format.cpp index f02bf95cfeed7..0bb8545884442 100644 --- a/clang/lib/Format/Format.cpp +++ b/clang/lib/Format/Format.cpp @@ -401,6 +401,7 @@ template <> struct MappingTraits<FormatStyle::KeepEmptyLinesStyle> { template <> struct ScalarEnumerationTraits<FormatStyle::LanguageKind> { static void enumeration(IO &IO, FormatStyle::LanguageKind &Value) { + IO.enumCase(Value, "C", FormatStyle::LK_C); IO.enumCase(Value, "Cpp", FormatStyle::LK_Cpp); IO.enumCase(Value, "Java", FormatStyle::LK_Java); IO.enumCase(Value, "JavaScript", FormatStyle::LK_JavaScript); @@ -3952,7 +3953,12 @@ LangOptions getFormattingLangOpts(const FormatStyle &Style) { LangOpts.Digraphs = LexingStd >= FormatStyle::LS_Cpp11; LangOpts.LineComment = 1; - LangOpts.CXXOperatorNames = Style.isCpp(); + + const auto Language = Style.Language; + LangOpts.C17 = Language == FormatStyle::LK_C; + LangOpts.CXXOperatorNames = + Language == FormatStyle::LK_Cpp || Language == FormatStyle::LK_ObjC; + LangOpts.Bool = 1; LangOpts.ObjC = 1; LangOpts.MicrosoftExt = 1; // To get kw___try, kw___finally. @@ -3977,6 +3983,8 @@ const char *StyleOptionHelpDescription = " --style=\"{BasedOnStyle: llvm, IndentWidth: 8}\""; static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { + if (FileName.ends_with(".c")) + return FormatStyle::LK_C; if (FileName.ends_with(".java")) return FormatStyle::LK_Java; if (FileName.ends_with_insensitive(".js") || @@ -4016,6 +4024,35 @@ static FormatStyle::LanguageKind getLanguageByFileName(StringRef FileName) { return FormatStyle::LK_Cpp; } +static FormatStyle::LanguageKind getLanguageByComment(const Environment &Env) { + const auto ID = Env.getFileID(); + const auto &SourceMgr = Env.getSourceManager(); + + LangOptions LangOpts; + LangOpts.CPlusPlus = 1; + LangOpts.LineComment = 1; + + Lexer Lex(ID, SourceMgr.getBufferOrFake(ID), SourceMgr, LangOpts); + Lex.SetCommentRetentionState(true); + + for (Token Tok; !Lex.LexFromRawLexer(Tok) && Tok.is(tok::comment);) { + auto Text = StringRef(SourceMgr.getCharacterData(Tok.getLocation()), + Tok.getLength()); + if (!Text.consume_front("// clang-format Language:")) + continue; + + Text = Text.trim(); + if (Text == "C") + return FormatStyle::LK_C; + if (Text == "Cpp") + return FormatStyle::LK_Cpp; + if (Text == "ObjC") + return FormatStyle::LK_ObjC; + } + + return FormatStyle::LK_None; +} + FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code) { const auto GuessedLanguage = getLanguageByFileName(FileName); if (GuessedLanguage == FormatStyle::LK_Cpp) { @@ -4025,6 +4062,10 @@ FormatStyle::LanguageKind guessLanguage(StringRef FileName, StringRef Code) { if (!Code.empty() && (Extension.empty() || Extension == ".h")) { auto NonEmptyFileName = FileName.empty() ? "guess.h" : FileName; Environment Env(Code, NonEmptyFileName, /*Ranges=*/{}); + if (const auto Language = getLanguageByComment(Env); + Language != FormatStyle::LK_None) { + return Language; + } ObjCHeaderStyleGuesser Guesser(Env, getLLVMStyle()); Guesser.process(); if (Guesser.isObjC()) diff --git a/clang/lib/Format/FormatToken.cpp b/clang/lib/Format/FormatToken.cpp index 963e8f87793fa..60e428123d26d 100644 --- a/clang/lib/Format/FormatToken.cpp +++ b/clang/lib/Format/FormatToken.cpp @@ -42,11 +42,11 @@ static SmallVector<StringRef> CppNonKeywordTypes = { }; bool FormatToken::isTypeName(const LangOptions &LangOpts) const { - const bool IsCpp = LangOpts.CXXOperatorNames; - return is(TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts) || - (IsCpp && is(tok::identifier) && - std::binary_search(CppNonKeywordTypes.begin(), - CppNonKeywordTypes.end(), TokenText)); + if (is(TT_TypeName) || Tok.isSimpleTypeSpecifier(LangOpts)) + return true; + return (LangOpts.CXXOperatorNames || LangOpts.C17) && is(tok::identifier) && + std::binary_search(CppNonKeywordTypes.begin(), + CppNonKeywordTypes.end(), TokenText); } bool FormatToken::isTypeOrIdentifier(const LangOptions &LangOpts) const { diff --git a/clang/lib/Format/FormatToken.h b/clang/lib/Format/FormatToken.h index 29aba281ae103..02429970599c0 100644 --- a/clang/lib/Format/FormatToken.h +++ b/clang/lib/Format/FormatToken.h @@ -743,29 +743,6 @@ struct FormatToken { return isOneOf(tok::star, tok::amp, tok::ampamp); } - bool isCppAlternativeOperatorKeyword() const { - assert(!TokenText.empty()); - if (!isalpha(TokenText[0])) - return false; - - switch (Tok.getKind()) { - case tok::ampamp: - case tok::ampequal: - case tok::amp: - case tok::pipe: - case tok::tilde: - case tok::exclaim: - case tok::exclaimequal: - case tok::pipepipe: - case tok::pipeequal: - case tok::caret: - case tok::caretequal: - return true; - default: - return false; - } - } - bool isUnaryOperator() const { switch (Tok.getKind()) { case tok::plus: diff --git a/clang/lib/Format/TokenAnnotator.cpp b/clang/lib/Format/TokenAnnotator.cpp index ac5b25d52ce84..976c4d888e1fd 100644 --- a/clang/lib/Format/TokenAnnotator.cpp +++ b/clang/lib/Format/TokenAnnotator.cpp @@ -129,7 +129,7 @@ class AnnotatingParser { : Style(Style), Line(Line), CurrentToken(Line.First), AutoFound(false), IsCpp(Style.isCpp()), LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords), Scopes(Scopes), TemplateDeclarationDepth(0) { - assert(IsCpp == LangOpts.CXXOperatorNames); + assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C17)); Contexts.push_back(Context(tok::unknown, 1, /*IsExpression=*/false)); resetTokenMetadata(); } @@ -3820,7 +3820,7 @@ static bool isFunctionDeclarationName(const LangOptions &LangOpts, }; const auto *Next = Current.Next; - const bool IsCpp = LangOpts.CXXOperatorNames; + const bool IsCpp = LangOpts.CXXOperatorNames || LangOpts.C17; // Find parentheses of parameter list. if (Current.is(tok::kw_operator)) { diff --git a/clang/lib/Format/TokenAnnotator.h b/clang/lib/Format/TokenAnnotator.h index 6aea310a56d69..c0c13941ef4f7 100644 --- a/clang/lib/Format/TokenAnnotator.h +++ b/clang/lib/Format/TokenAnnotator.h @@ -225,7 +225,7 @@ class TokenAnnotator { TokenAnnotator(const FormatStyle &Style, const AdditionalKeywords &Keywords) : Style(Style), IsCpp(Style.isCpp()), LangOpts(getFormattingLangOpts(Style)), Keywords(Keywords) { - assert(IsCpp == LangOpts.CXXOperatorNames); + assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C17)); } /// Adapts the indent levels of comment lines to the indent of the diff --git a/clang/lib/Format/UnwrappedLineParser.cpp b/clang/lib/Format/UnwrappedLineParser.cpp index 1411197e32554..9b4257fdd8c8f 100644 --- a/clang/lib/Format/UnwrappedLineParser.cpp +++ b/clang/lib/Format/UnwrappedLineParser.cpp @@ -168,7 +168,7 @@ UnwrappedLineParser::UnwrappedLineParser( : IG_Inited), IncludeGuardToken(nullptr), FirstStartColumn(FirstStartColumn), Macros(Style.Macros, SourceMgr, Style, Allocator, IdentTable) { - assert(IsCpp == LangOpts.CXXOperatorNames); + assert(IsCpp == (LangOpts.CXXOperatorNames || LangOpts.C17)); } void UnwrappedLineParser::reset() { @@ -1712,12 +1712,6 @@ void UnwrappedLineParser::parseStructuralElement( OpeningBrace && OpeningBrace->isOneOf(TT_RequiresExpressionLBrace, TT_CompoundRequirementLBrace); !eof();) { - if (IsCpp && FormatTok->isCppAlternativeOperatorKeyword()) { - if (auto *Next = Tokens->peekNextToken(/*SkipComment=*/true); - Next && Next->isBinaryOperator()) { - FormatTok->Tok.setKind(tok::identifier); - } - } const FormatToken *Previous = FormatTok->Previous; switch (FormatTok->Tok.getKind()) { case tok::at: diff --git a/clang/unittests/Format/FormatTest.cpp b/clang/unittests/Format/FormatTest.cpp index 3b7856d6ee150..d1e96e0fa544a 100644 --- a/clang/unittests/Format/FormatTest.cpp +++ b/clang/unittests/Format/FormatTest.cpp @@ -17784,9 +17784,11 @@ TEST_F(FormatTest, ConfigurableSpaceBeforeAssignmentOperators) { verifyFormat("int a = 5;"); verifyFormat("a += 42;"); verifyFormat("a or_eq 8;"); - verifyFormat("xor = foo;"); - FormatStyle Spaces = getLLVMStyle(); + auto Spaces = getLLVMStyle(FormatStyle::LK_C); + verifyFormat("xor = foo;", Spaces); + + Spaces.Language = FormatStyle::LK_Cpp; Spaces.SpaceBeforeAssignmentOperators = false; verifyFormat("int a= 5;", Spaces); verifyFormat("a+= 42;", Spaces); @@ -24683,6 +24685,7 @@ TEST_F(FormatTest, StructuredBindings) { } TEST_F(FormatTest, FileAndCode) { + EXPECT_EQ(FormatStyle::LK_C, guessLanguage("foo.c", "")); EXPECT_EQ(FormatStyle::LK_Cpp, guessLanguage("foo.cc", "")); EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.m", "")); EXPECT_EQ(FormatStyle::LK_ObjC, guessLanguage("foo.mm", "")); @@ -24848,6 +24851,18 @@ TEST_F(FormatTest, GuessLanguageWithChildLines) { guessLanguage("foo.h", "#define FOO ({ foo(); ({ NSString *s; }) })")); } +TEST_F(FormatTest, GetLanguageByComment) { + EXPECT_EQ(FormatStyle::LK_C, + guessLanguage("foo.h", "// clang-format Language: C\n" + "int i;")); + EXPECT_EQ(FormatStyle::LK_Cpp, + guessLanguage("foo.h", "// clang-format Language: Cpp\n" + "int DoStuff(CGRect rect);")); + EXPECT_EQ(FormatStyle::LK_ObjC, + guessLanguage("foo.h", "// clang-format Language: ObjC\n" + "int i;")); +} + TEST_F(FormatTest, TypenameMacros) { std::vector<std::string> TypenameMacros = {"STACK_OF", "LIST", "TAILQ_ENTRY"}; diff --git a/clang/unittests/Format/TokenAnnotatorTest.cpp b/clang/unittests/Format/TokenAnnotatorTest.cpp index dffb07c89bacc..f1a6999cfdfb8 100644 --- a/clang/unittests/Format/TokenAnnotatorTest.cpp +++ b/clang/unittests/Format/TokenAnnotatorTest.cpp @@ -3646,6 +3646,11 @@ TEST_F(TokenAnnotatorTest, CppAltOperatorKeywords) { ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::pipepipe, TT_BinaryOperator); + Tokens = annotate("return segment < *this or *this < segment;"); + ASSERT_EQ(Tokens.size(), 12u) << Tokens; + EXPECT_TOKEN(Tokens[5], tok::pipepipe, TT_BinaryOperator); + EXPECT_TOKEN(Tokens[6], tok::star, TT_UnaryOperator); + Tokens = annotate("a = b or_eq c;"); ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::pipeequal, TT_BinaryOperator); @@ -3658,11 +3663,13 @@ TEST_F(TokenAnnotatorTest, CppAltOperatorKeywords) { ASSERT_EQ(Tokens.size(), 7u) << Tokens; EXPECT_TOKEN(Tokens[3], tok::caretequal, TT_BinaryOperator); - Tokens = annotate("xor = foo;"); + const auto StyleC = getLLVMStyle(FormatStyle::LK_C); + + Tokens = annotate("xor = foo;", StyleC); ASSERT_EQ(Tokens.size(), 5u) << Tokens; EXPECT_TOKEN(Tokens[0], tok::identifier, TT_Unknown); - Tokens = annotate("int xor = foo;"); + Tokens = annotate("int xor = foo;", StyleC); ASSERT_EQ(Tokens.size(), 6u) << Tokens; EXPECT_TOKEN(Tokens[1], tok::identifier, TT_StartOfName); } _______________________________________________ llvm-branch-commits mailing list llvm-branch-commits@lists.llvm.org https://lists.llvm.org/cgi-bin/mailman/listinfo/llvm-branch-commits