JakeMerdichAMD created this revision.
Herald added subscribers: cfe-commits, kristof.beyls.
Herald added a project: clang.
JakeMerdichAMD added reviewers: MyDeveloperDay, curdeius, sammccall, jbcoe.
JakeMerdichAMD added a project: clang-format.

https://bugs.llvm.org/show_bug.cgi?id=46383

When the c preprocessor stringizes tokens, the generated string literals
are affected by the whitespace. This means clang-format can affect
codegen silently, adding spaces and newlines to strings.  Practically
speaking, the vast majority of cases will be harmless, only affecting
single identifiers or debug macros.

In the interest of doing no harm in other cases though, this introduces
a blacklist option 'WhitespaceSensitiveMacros', which contains a list of
names of function-like macros whose contents should not be touched by
clang-format, period. Clang-format can't automatically detect these
without a real compile context, so users will have to specify it
explicitly (it still beats clang-format off'ing at every invocation).

I added one default, "STRINGIZE", but am not particularly attached to
it, nor have I given much thought to defaults.


Repository:
  rG LLVM Github Monorepo

https://reviews.llvm.org/D82620

Files:
  clang/docs/ClangFormatStyleOptions.rst
  clang/include/clang/Format/Format.h
  clang/lib/Format/Format.cpp
  clang/lib/Format/FormatToken.h
  clang/lib/Format/FormatTokenLexer.cpp
  clang/lib/Format/TokenAnnotator.cpp
  clang/unittests/Format/FormatTest.cpp

Index: clang/unittests/Format/FormatTest.cpp
===================================================================
--- clang/unittests/Format/FormatTest.cpp
+++ clang/unittests/Format/FormatTest.cpp
@@ -13957,6 +13957,13 @@
   CHECK_PARSE("NamespaceMacros: [TESTSUITE, SUITE]", NamespaceMacros,
               std::vector<std::string>({"TESTSUITE", "SUITE"}));
 
+  Style.WhitespaceSensitiveMacros.clear();
+  CHECK_PARSE("WhitespaceSensitiveMacros: [STRINGIZE]",
+              WhitespaceSensitiveMacros, std::vector<std::string>{"STRINGIZE"});
+  CHECK_PARSE("WhitespaceSensitiveMacros: [STRINGIZE, ASSERT]",
+              WhitespaceSensitiveMacros,
+              std::vector<std::string>({"STRINGIZE", "ASSERT"}));
+
   Style.IncludeStyle.IncludeCategories.clear();
   std::vector<tooling::IncludeStyle::IncludeCategory> ExpectedCategories = {
       {"abc/.*", 2, 0}, {".*", 1, 0}};
@@ -16466,6 +16473,33 @@
   verifyFormat("foo(operator, , -42);", Style);
 }
 
+TEST_F(FormatTest, WhitespaceSensitiveMacros) {
+  FormatStyle Style = getLLVMStyle();
+  Style.WhitespaceSensitiveMacros.push_back("FOO");
+
+  // Don't use the helpers here, since 'mess up' will change the whitespace
+  // and these are all whitespace sensitive by definition
+  EXPECT_EQ("FOO(String-ized&Messy+But(: :Still)=Intentional);",
+            format("FOO(String-ized&Messy+But(: :Still)=Intentional);", Style));
+  EXPECT_EQ("FOO(String-ized&Messy+But,: :Still=Intentional);",
+            format("FOO(String-ized&Messy+But,: :Still=Intentional);", Style));
+  EXPECT_EQ("FOO(String-ized&Messy+But,: :\n"
+            "       Still=Intentional);",
+            format("FOO(String-ized&Messy+But,: :\n"
+                   "       Still=Intentional);",
+                   Style));
+  Style.AlignConsecutiveAssignments = true;
+  EXPECT_EQ("FOO(String-ized=&Messy+But,: :\n"
+            "       Still=Intentional);",
+            format("FOO(String-ized=&Messy+But,: :\n"
+                   "       Still=Intentional);",
+                   Style));
+
+  Style.ColumnLimit = 21;
+  EXPECT_EQ("FOO(String-ized&Messy+But: :Still=Intentional);",
+            format("FOO(String-ized&Messy+But: :Still=Intentional);", Style));
+}
+
 TEST_F(FormatTest, VeryLongNamespaceCommentSplit) {
   // These tests are not in NamespaceFixer because that doesn't
   // test its interaction with line wrapping
Index: clang/lib/Format/TokenAnnotator.cpp
===================================================================
--- clang/lib/Format/TokenAnnotator.cpp
+++ clang/lib/Format/TokenAnnotator.cpp
@@ -160,6 +160,27 @@
     return false;
   }
 
+  bool parseUntouchableParens() {
+    while (CurrentToken) {
+      CurrentToken->Finalized = true;
+      switch (CurrentToken->Tok.getKind()) {
+      case tok::l_paren:
+        next();
+        if (!parseUntouchableParens())
+          return false;
+        continue;
+      case tok::r_paren:
+        next();
+        return true;
+      default:
+        // no-op
+        break;
+      }
+      next();
+    }
+    return false;
+  }
+
   bool parseParens(bool LookForDecls = false) {
     if (!CurrentToken)
       return false;
@@ -171,6 +192,11 @@
     Contexts.back().ColonIsForRangeExpr =
         Contexts.size() == 2 && Contexts[0].ColonIsForRangeExpr;
 
+    if (Left->Previous && Left->Previous->is(TT_UntouchableMacroFunc)) {
+      Left->Finalized = true;
+      return parseUntouchableParens();
+    }
+
     bool StartsObjCMethodExpr = false;
     if (FormatToken *MaybeSel = Left->Previous) {
       // @selector( starts a selector.
@@ -1311,7 +1337,7 @@
             TT_TypenameMacro, TT_FunctionLBrace, TT_ImplicitStringLiteral,
             TT_InlineASMBrace, TT_JsFatArrow, TT_LambdaArrow, TT_NamespaceMacro,
             TT_OverloadedOperator, TT_RegexLiteral, TT_TemplateString,
-            TT_ObjCStringLiteral))
+            TT_ObjCStringLiteral, TT_UntouchableMacroFunc))
       CurrentToken->setType(TT_Unknown);
     CurrentToken->Role.reset();
     CurrentToken->MatchingParen = nullptr;
@@ -3970,7 +3996,7 @@
                  << " C=" << Tok->CanBreakBefore
                  << " T=" << getTokenTypeName(Tok->getType())
                  << " S=" << Tok->SpacesRequiredBefore
-                 << " B=" << Tok->BlockParameterCount
+                 << " F=" << Tok->Finalized << " B=" << Tok->BlockParameterCount
                  << " BK=" << Tok->BlockKind << " P=" << Tok->SplitPenalty
                  << " Name=" << Tok->Tok.getName() << " L=" << Tok->TotalLength
                  << " PPK=" << Tok->PackingKind << " FakeLParens=";
Index: clang/lib/Format/FormatTokenLexer.cpp
===================================================================
--- clang/lib/Format/FormatTokenLexer.cpp
+++ clang/lib/Format/FormatTokenLexer.cpp
@@ -43,6 +43,10 @@
     Macros.insert({&IdentTable.get(TypenameMacro), TT_TypenameMacro});
   for (const std::string &NamespaceMacro : Style.NamespaceMacros)
     Macros.insert({&IdentTable.get(NamespaceMacro), TT_NamespaceMacro});
+  for (const std::string &WhitespaceSensitiveMacro :
+       Style.WhitespaceSensitiveMacros)
+    Macros.insert(
+        {&IdentTable.get(WhitespaceSensitiveMacro), TT_UntouchableMacroFunc});
 }
 
 ArrayRef<FormatToken *> FormatTokenLexer::lex() {
Index: clang/lib/Format/FormatToken.h
===================================================================
--- clang/lib/Format/FormatToken.h
+++ clang/lib/Format/FormatToken.h
@@ -102,6 +102,7 @@
   TYPE(TrailingUnaryOperator)                                                  \
   TYPE(TypenameMacro)                                                          \
   TYPE(UnaryOperator)                                                          \
+  TYPE(UntouchableMacroFunc)                                                   \
   TYPE(CSharpStringLiteral)                                                    \
   TYPE(CSharpNamedArgumentColon)                                               \
   TYPE(CSharpNullable)                                                         \
Index: clang/lib/Format/Format.cpp
===================================================================
--- clang/lib/Format/Format.cpp
+++ clang/lib/Format/Format.cpp
@@ -599,6 +599,8 @@
     IO.mapOptional("TypenameMacros", Style.TypenameMacros);
     IO.mapOptional("UseCRLF", Style.UseCRLF);
     IO.mapOptional("UseTab", Style.UseTab);
+    IO.mapOptional("WhitespaceSensitiveMacros",
+                   Style.WhitespaceSensitiveMacros);
   }
 };
 
@@ -933,6 +935,7 @@
   LLVMStyle.SortUsingDeclarations = true;
   LLVMStyle.StatementMacros.push_back("Q_UNUSED");
   LLVMStyle.StatementMacros.push_back("QT_REQUIRE_VERSION");
+  LLVMStyle.WhitespaceSensitiveMacros.push_back("STRINGIZE");
 
   // Defaults that differ when not C++.
   if (Language == FormatStyle::LK_TableGen) {
Index: clang/include/clang/Format/Format.h
===================================================================
--- clang/include/clang/Format/Format.h
+++ clang/include/clang/Format/Format.h
@@ -1425,6 +1425,17 @@
   /// For example: TESTSUITE
   std::vector<std::string> NamespaceMacros;
 
+  /// A vector of macros which are whitespace-sensitive and shouldn't be
+  /// touched.
+  ///
+  /// These are expected to be macros of the form:
+  /// \code
+  ///   STRINGIZE(...)
+  /// \endcode
+  ///
+  /// For example: STRINGIZE
+  std::vector<std::string> WhitespaceSensitiveMacros;
+
   tooling::IncludeStyle IncludeStyle;
 
   /// Indent case labels one level from the switch statement.
Index: clang/docs/ClangFormatStyleOptions.rst
===================================================================
--- clang/docs/ClangFormatStyleOptions.rst
+++ clang/docs/ClangFormatStyleOptions.rst
@@ -2694,6 +2694,17 @@
     Use tabs whenever we need to fill whitespace that spans at least from
     one tab stop to the next one.
 
+**WhitespaceSensitiveMacros** (``std::vector<std::string>``)
+  A vector of macros which are whitespace-sensitive and should not be touched.
+
+  These are expected to be macros of the form:
+
+  .. code-block:: c++
+
+    STRINGIZE(...)
+
+  For example: STRINGIZE
+
 
 
 .. END_FORMAT_STYLE_OPTIONS
_______________________________________________
cfe-commits mailing list
cfe-commits@lists.llvm.org
https://lists.llvm.org/cgi-bin/mailman/listinfo/cfe-commits

Reply via email to