[PATCH] Better macro error messages involving initializer lists

Richard Trieu Fri, 14 Jun 2013 19:28:55 -0700

When an initializer list is used within a marco argument, each comma is the 
list is treated as a macro argument separator.  When this happens, the only 
error message is "too many macro arguments provided".  This patch changes the 
handling of function-like macros to give additional notes detailing the 
problems.


For a macro FOO(vector<int>{1,2,3}, 3), it will suggest parenthesis to preserve 
the initializer list with the corrected code as FOO((vector<int>{1,2,3}), 3).

For a macro such as FOO( {1,2,3} ), it will provide a different note saying 
that initializer lists cannot be used as macro arguments.  This is because 
adding parentheses around the braces would make it no longer an initializer 
list.

http://llvm-reviews.chandlerc.com/D986

Files:
  lib/Lex/PPMacroExpansion.cpp
  test/Preprocessor/macro_with_initializer_list.cpp
  include/clang/Basic/DiagnosticLexKinds.td
  include/clang/Lex/Preprocessor.h

Index: lib/Lex/PPMacroExpansion.cpp
===================================================================
--- lib/Lex/PPMacroExpansion.cpp
+++ lib/Lex/PPMacroExpansion.cpp
@@ -397,89 +397,235 @@
 MacroArgs *Preprocessor::ReadFunctionLikeMacroArgs(Token &MacroName,
                                                    MacroInfo *MI,
                                                    SourceLocation &MacroEnd) {
-  // The number of fixed arguments to parse.
-  unsigned NumFixedArgsLeft = MI->getNumArgs();
-  bool isVariadic = MI->isVariadic();
-
-  // Outer loop, while there are more arguments, keep reading them.
   Token Tok;
 
   // Read arguments as unexpanded tokens.  This avoids issues, e.g., where
   // an argument value in a macro could expand to ',' or '(' or ')'.
   LexUnexpandedToken(Tok);
   assert(Tok.is(tok::l_paren) && "Error computing l-paren-ness?");
 
+  SmallVector<Token, 64> MacroTokens;
+  MacroTokens.push_back(Tok);
+  unsigned Parens = 1;  // from the paren already lexed
+  bool HasCodeCompletion = false;
+  while (Tok.isNot(tok::r_paren) || Parens != 0) {
+    LexUnexpandedToken(Tok);
+    if (Tok.is(tok::eof) || Tok.is(tok::eod)) {
+      MacroTokens.push_back(Tok);
+      break;
+    } else if (Tok.is(tok::l_paren)) {
+      ++Parens;
+    } else if (Tok.is(tok::r_paren)) {
+      --Parens;
+    } else if (Tok.is(tok::code_completion)) {
+      HasCodeCompletion = true;
+    } else if (IdentifierInfo *II = Tok.getIdentifierInfo()) {
+      // Reading macro arguments can cause macros that we are currently
+      // expanding from to be popped off the expansion stack.  Doing so causes
+      // them to be reenabled for expansion.  Here we record whether any
+      // identifiers we lex as macro arguments correspond to disabled macros.
+      // If so, we mark the token as noexpand.  This is a subtle aspect of
+      // C99 6.10.3.4p2.
+      if (MacroInfo *MI = getMacroInfo(II))
+        if (!MI->isEnabled())
+          Tok.setFlag(Token::DisableExpand);
+    }
+    MacroTokens.push_back(Tok);
+  }
+
+  if (Tok.is(tok::r_paren))
+    MacroEnd = Tok.getLocation();
+
+  MacroArgs *MA = ProcessFunctionLikeMacroArgTokens(MacroTokens, MacroName, MI,
+                                                    /*EmitErrors*/true);
+
+  if (MA) return MA;
+  if (HasCodeCompletion) return 0;
+  if (MacroTokens.back().isNot(tok::r_paren)) return 0;
+
+  // Checks that braces and parens are properly nested.  If not, return.
+  SmallVector<bool, 8> IsParen;
+  for (SmallVector<Token, 64>::iterator TokenIterator = MacroTokens.begin(),
+                                        TokenEnd = MacroTokens.end();
+       TokenIterator != TokenEnd; ++TokenIterator) {
+    if (TokenIterator->is(tok::l_paren)) {
+      IsParen.push_back(true);
+    } else if (TokenIterator->is(tok::r_paren)) {
+      if (IsParen.empty() || !IsParen.back())
+        return 0;
+      IsParen.pop_back();
+    } else if (TokenIterator->is(tok::l_brace)) {
+      IsParen.push_back(false);
+    } else if (TokenIterator->is(tok::r_brace)) {
+      if (IsParen.empty() || IsParen.back())
+        return 0;
+      IsParen.pop_back();
+    }
+  }
+  if (!IsParen.empty()) return 0;
+
+  Parens = 0;
+  unsigned Braces = 0;
+  bool FoundComma = false;
+  bool FoundBrace = false;
+
+  // Attempt braced list correction.
+  SmallVector<Token, 64> CorrectedMacroTokens;
+  SmallVector<SourceRange, 4> ParenHints;
+  SmallVector<SourceRange, 4> InitLists;
+  for (SmallVector<Token, 64>::iterator TokenIterator = MacroTokens.begin(),
+                                        TokenEnd = MacroTokens.end(),
+                                        ArgStart;
+       TokenIterator != TokenEnd; ++TokenIterator) {
+
+    // Update brace and paren counts.
+    if (TokenIterator->is(tok::l_paren)) {
+      ++Parens;
+    } else if (TokenIterator->is(tok::r_paren)) {
+      --Parens;
+    } else if (TokenIterator->is(tok::l_brace)) {
+      ++Braces;
+      if (Parens == 1)
+        FoundBrace = true;
+    } else if (TokenIterator->is(tok::r_brace)) {
+      --Braces;
+    } else if (TokenIterator->is(tok::comma)) {
+      if (Braces > 0 && Parens == 1)
+        FoundComma = true;
+    }
+
+    if ((Braces == 0 && Parens == 1 && TokenIterator->is(tok::comma)) ||
+        (Braces == 0 && Parens == 0 && TokenIterator->is(tok::r_paren))) {
+      // End of argument.
+      if (!FoundComma || !FoundBrace) {
+        CorrectedMacroTokens.insert(CorrectedMacroTokens.end(),
+                                    ArgStart, TokenIterator);
+      } else if (ArgStart->is(tok::l_brace) &&
+                 (TokenIterator - 1)->is(tok::r_brace)) {
+        InitLists.push_back(SourceRange(ArgStart->getLocation(),
+                                        (TokenIterator - 1)->getLocation()));
+      } else {
+        ParenHints.push_back(SourceRange(ArgStart->getLocation(),
+                                         TokenIterator->getLocation()));
+        CorrectedMacroTokens.push_back(MacroTokens.front());
+        CorrectedMacroTokens.insert(CorrectedMacroTokens.end(),
+                                    ArgStart, TokenIterator);
+        CorrectedMacroTokens.push_back(MacroTokens.back());
+      }
+    }
+
+    if (Braces == 0 && Parens == 1 &&
+        (TokenIterator->is(tok::comma) || TokenIterator->is(tok::l_paren))) {
+      // Start of argument.
+      ArgStart = TokenIterator + 1;
+      FoundComma = false;
+      FoundBrace = false;
+      CorrectedMacroTokens.push_back(*TokenIterator);
+    }
+  }
+  CorrectedMacroTokens.push_back(MacroTokens.back());
+
+  if (!InitLists.empty()) {
+    DiagnosticBuilder DB =
+        Diag(MacroName, diag::note_cannot_use_init_list_as_macro_argument);
+    for (SmallVector<SourceRange, 4>::iterator
+             Range = InitLists.begin(), RangeEnd = InitLists.end();
+             Range != RangeEnd; ++Range)
+      DB << *Range;
+
+    return 0;
+  }
+
+  if (ParenHints.empty()) return 0;
+  MA = ProcessFunctionLikeMacroArgTokens(CorrectedMacroTokens, MacroName, MI,
+                                         /*EmitErrors*/false);
+
+  if (!MA) return 0;
+
+  DiagnosticBuilder DB = Diag(MacroName, diag::note_suggest_parens_for_macro);
+  for (SmallVector<SourceRange, 4>::iterator
+           ParenLocation = ParenHints.begin(), ParenEnd = ParenHints.end();
+       ParenLocation != ParenEnd; ++ParenLocation) {
+    DB << FixItHint::CreateInsertion(ParenLocation->getBegin(), "(")
+       << FixItHint::CreateInsertion(ParenLocation->getEnd(), ")");
+  }
+
+  return MA;
+}
+
+MacroArgs *Preprocessor::ProcessFunctionLikeMacroArgTokens(
+    const SmallVector<Token, 64> &MacroTokens, Token &MacroName,
+    MacroInfo *MI, bool EmitErrors) {
+
+  // The number of fixed arguments to parse.
+  unsigned NumFixedArgsLeft = MI->getNumArgs();
+  bool isVariadic = MI->isVariadic();
+
   // ArgTokens - Build up a list of tokens that make up each argument.  Each
   // argument is separated by an EOF token.  Use a SmallVector so we can avoid
   // heap allocations in the common case.
   SmallVector<Token, 64> ArgTokens;
   bool ContainsCodeCompletionTok = false;
 
   unsigned NumActuals = 0;
-  while (Tok.isNot(tok::r_paren)) {
-    if (ContainsCodeCompletionTok && (Tok.is(tok::eof) || Tok.is(tok::eod)))
+  SmallVector<Token, 64>::const_iterator TokIter = MacroTokens.begin();
+  while (TokIter->isNot(tok::r_paren)) {
+    if (ContainsCodeCompletionTok &&
+        (TokIter->is(tok::eof) || TokIter->is(tok::eod)))
       break;
 
-    assert((Tok.is(tok::l_paren) || Tok.is(tok::comma)) &&
+    assert((TokIter->is(tok::l_paren) || TokIter->is(tok::comma)) &&
            "only expect argument separators here");
 
     unsigned ArgTokenStart = ArgTokens.size();
-    SourceLocation ArgStartLoc = Tok.getLocation();
+    SourceLocation ArgStartLoc = TokIter->getLocation();
 
     // C99 6.10.3p11: Keep track of the number of l_parens we have seen.  Note
     // that we already consumed the first one.
     unsigned NumParens = 0;
 
     while (1) {
       // Read arguments as unexpanded tokens.  This avoids issues, e.g., where
       // an argument value in a macro could expand to ',' or '(' or ')'.
-      LexUnexpandedToken(Tok);
+      ++TokIter;
 
-      if (Tok.is(tok::eof) || Tok.is(tok::eod)) { // "#if f(<eof>" & "#if f(\n"
+      if (TokIter->is(tok::eof) || TokIter->is(tok::eod)) {
+        // "#if f(<eof>" & "#if f(\n"
         if (!ContainsCodeCompletionTok) {
-          Diag(MacroName, diag::err_unterm_macro_invoc);
-          Diag(MI->getDefinitionLoc(), diag::note_macro_here)
-            << MacroName.getIdentifierInfo();
+          if (EmitErrors) {
+            Diag(MacroName, diag::err_unterm_macro_invoc);
+            Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+                << MacroName.getIdentifierInfo();
+          }
           // Do not lose the EOF/EOD.  Return it to the client.
-          MacroName = Tok;
+          MacroName = *TokIter;
           return 0;
         } else {
           // Do not lose the EOF/EOD.
           Token *Toks = new Token[1];
-          Toks[0] = Tok;
+          Toks[0] = *TokIter;
           EnterTokenStream(Toks, 1, true, true);
           break;
         }
-      } else if (Tok.is(tok::r_paren)) {
+      } else if (TokIter->is(tok::r_paren)) {
         // If we found the ) token, the macro arg list is done.
         if (NumParens-- == 0) {
-          MacroEnd = Tok.getLocation();
           break;
         }
-      } else if (Tok.is(tok::l_paren)) {
+      } else if (TokIter->is(tok::l_paren)) {
         ++NumParens;
-      } else if (Tok.is(tok::comma) && NumParens == 0) {
+      } else if (TokIter->is(tok::comma) && NumParens == 0) {
         // Comma ends this argument if there are more fixed arguments expected.
         // However, if this is a variadic macro, and this is part of the
         // variadic part, then the comma is just an argument token.
         if (!isVariadic) break;
         if (NumFixedArgsLeft > 1)
           break;
-      } else if (Tok.is(tok::comment) && !KeepMacroComments) {
+      } else if (TokIter->is(tok::comment) && !KeepMacroComments) {
         // If this is a comment token in the argument list and we're just in
         // -C mode (not -CC mode), discard the comment.
         continue;
-      } else if (Tok.getIdentifierInfo() != 0) {
-        // Reading macro arguments can cause macros that we are currently
-        // expanding from to be popped off the expansion stack.  Doing so causes
-        // them to be reenabled for expansion.  Here we record whether any
-        // identifiers we lex as macro arguments correspond to disabled macros.
-        // If so, we mark the token as noexpand.  This is a subtle aspect of
-        // C99 6.10.3.4p2.
-        if (MacroInfo *MI = getMacroInfo(Tok.getIdentifierInfo()))
-          if (!MI->isEnabled())
-            Tok.setFlag(Token::DisableExpand);
-      } else if (Tok.is(tok::code_completion)) {
+      } else if (TokIter->is(tok::code_completion)) {
         ContainsCodeCompletionTok = true;
         if (CodeComplete)
           CodeComplete->CodeCompleteMacroArgument(MacroName.getIdentifierInfo(),
@@ -489,12 +635,12 @@
         // code-completion callback.
       }
 
-      ArgTokens.push_back(Tok);
+      ArgTokens.push_back(*TokIter);
     }
 
     // If this was an empty argument list foo(), don't add this as an empty
     // argument.
-    if (ArgTokens.empty() && Tok.getKind() == tok::r_paren)
+    if (ArgTokens.empty() && TokIter->getKind() == tok::r_paren)
       break;
 
     // If this is not a variadic macro, and too many args were specified, emit
@@ -504,27 +650,30 @@
         ArgStartLoc = ArgTokens[ArgTokenStart].getLocation();
 
       if (!ContainsCodeCompletionTok) {
-        // Emit the diagnostic at the macro name in case there is a missing ).
-        // Emitting it at the , could be far away from the macro name.
-        Diag(ArgStartLoc, diag::err_too_many_args_in_macro_invoc);
-        Diag(MI->getDefinitionLoc(), diag::note_macro_here)
-          << MacroName.getIdentifierInfo();
+        if (EmitErrors) {
+          // Emit the diagnostic at the macro name in case there is a missing ).
+          // Emitting it at the , could be far away from the macro name.
+          Diag(ArgStartLoc, diag::err_too_many_args_in_macro_invoc);
+          Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+              << MacroName.getIdentifierInfo();
+        }
         return 0;
       }
     }
 
-    // Empty arguments are standard in C99 and C++0x, and are supported as an extension in
-    // other modes.
+    // Empty arguments are standard in C99 and C++0x, and are supported as an
+    // extension in other modes.
     if (ArgTokens.size() == ArgTokenStart && !LangOpts.C99)
-      Diag(Tok, LangOpts.CPlusPlus11 ?
-           diag::warn_cxx98_compat_empty_fnmacro_arg :
-           diag::ext_empty_fnmacro_arg);
+      if (EmitErrors)
+        Diag(*TokIter, LangOpts.CPlusPlus11 ?
+             diag::warn_cxx98_compat_empty_fnmacro_arg :
+             diag::ext_empty_fnmacro_arg);
 
     // Add a marker EOF token to the end of the token list for this argument.
     Token EOFTok;
     EOFTok.startToken();
     EOFTok.setKind(tok::eof);
-    EOFTok.setLocation(Tok.getLocation());
+    EOFTok.setLocation(TokIter->getLocation());
     EOFTok.setLength(0);
     ArgTokens.push_back(EOFTok);
     ++NumActuals;
@@ -546,7 +695,7 @@
     Token EOFTok;
     EOFTok.startToken();
     EOFTok.setKind(tok::eof);
-    EOFTok.setLocation(Tok.getLocation());
+    EOFTok.setLocation(TokIter->getLocation());
     EOFTok.setLength(0);
     for (; NumActuals < MinArgsExpected; ++NumActuals)
       ArgTokens.push_back(EOFTok);
@@ -571,9 +720,11 @@
       // If the macro contains the comma pasting extension, the diagnostic
       // is suppressed; we know we'll get another diagnostic later.
       if (!MI->hasCommaPasting()) {
-        Diag(Tok, diag::ext_missing_varargs_arg);
-        Diag(MI->getDefinitionLoc(), diag::note_macro_here)
-          << MacroName.getIdentifierInfo();
+        if (EmitErrors) {
+          Diag(*TokIter, diag::ext_missing_varargs_arg);
+          Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+              << MacroName.getIdentifierInfo();
+        }
       }
 
       // Remember this occurred, allowing us to elide the comma when used for
@@ -584,15 +735,18 @@
       //  A(x) B(x) C()
       isVarargsElided = true;
     } else if (!ContainsCodeCompletionTok) {
-      // Otherwise, emit the error.
-      Diag(Tok, diag::err_too_few_args_in_macro_invoc);
-      Diag(MI->getDefinitionLoc(), diag::note_macro_here)
-        << MacroName.getIdentifierInfo();
+      if (EmitErrors) {
+        // Otherwise, emit the error.
+        Diag(*TokIter, diag::err_too_few_args_in_macro_invoc);
+        Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+            << MacroName.getIdentifierInfo();
+      }
       return 0;
     }
 
     // Add a marker EOF token to the end of the token list for this argument.
-    SourceLocation EndLoc = Tok.getLocation();
+    Token Tok = *TokIter;
+    SourceLocation EndLoc = TokIter->getLocation();
     Tok.startToken();
     Tok.setKind(tok::eof);
     Tok.setLocation(EndLoc);
@@ -605,11 +759,13 @@
 
   } else if (NumActuals > MinArgsExpected && !MI->isVariadic() &&
              !ContainsCodeCompletionTok) {
-    // Emit the diagnostic at the macro name in case there is a missing ).
-    // Emitting it at the , could be far away from the macro name.
-    Diag(MacroName, diag::err_too_many_args_in_macro_invoc);
-    Diag(MI->getDefinitionLoc(), diag::note_macro_here)
-      << MacroName.getIdentifierInfo();
+    if (EmitErrors) {
+      // Emit the diagnostic at the macro name in case there is a missing ).
+      // Emitting it at the , could be far away from the macro name.
+      Diag(MacroName, diag::err_too_many_args_in_macro_invoc);
+      Diag(MI->getDefinitionLoc(), diag::note_macro_here)
+          << MacroName.getIdentifierInfo();
+    }
     return 0;
   }
 
Index: test/Preprocessor/macro_with_initializer_list.cpp
===================================================================
--- test/Preprocessor/macro_with_initializer_list.cpp
+++ test/Preprocessor/macro_with_initializer_list.cpp
@@ -0,0 +1,85 @@
+// RUN: %clang_cc1 -std=c++11 -fsyntax-only -verify %s
+
+namespace std {
+  template <class X>
+  class initializer_list {
+    public:
+    initializer_list();
+  };
+}
+
+class Foo {
+public:
+  Foo();
+  Foo(std::initializer_list<int>);
+  bool operator==(const Foo);
+};
+
+#define EQ(x,y) (void)(x == y)  // expected-note 3{{defined here}}
+void test_EQ() {
+  Foo F;
+  F = Foo{1,2};
+
+  EQ(F,F);
+  EQ(F,Foo());
+  EQ(F,Foo({1,2,3}));
+  EQ(Foo({1,2,3}),F);
+  EQ((Foo{1,2,3}),(Foo{1,2,3}));
+
+  EQ(F,Foo{1,2,3});
+  // expected-error@-1 {{too many arguments provided}}
+  // expected-note@-2 {{require parenthesis}}
+  EQ(Foo{1,2,3},F);
+  // expected-error@-1 {{too many arguments provided}}
+  // expected-note@-2 {{require parenthesis}}
+  EQ(Foo{1,2,3},Foo{1,2,3});
+  // expected-error@-1 {{too many arguments provided}}
+  // expected-note@-2 {{require parenthesis}}
+}
+
+#define NE(x,y) (void)(x != y)  // expected-note 3{{defined here}}
+// Operator != isn't defined.  This tests that the corrected macro arguments
+// are used in the macro expansion.
+void test_NE() {
+  Foo F;
+
+  NE(F,F);
+  // expected-error@-1 {{invalid operands}}
+  NE(F,Foo());
+  // expected-error@-1 {{invalid operands}}
+  NE(F,Foo({1,2,3}));
+  // expected-error@-1 {{invalid operands}}
+  NE((Foo{1,2,3}),(Foo{1,2,3}));
+  // expected-error@-1 {{invalid operands}}
+
+  NE(F,Foo{1,2,3});
+  // expected-error@-1 {{too many arguments provided}}
+  // expected-note@-2 {{require parenthesis}}
+  // expected-error@-3 {{invalid operands}}
+  NE(Foo{1,2,3},F);
+  // expected-error@-1 {{too many arguments provided}}
+  // expected-note@-2 {{require parenthesis}}
+  // expected-error@-3 {{invalid operands}}
+  NE(Foo{1,2,3},Foo{1,2,3});
+  // expected-error@-1 {{too many arguments provided}}
+  // expected-note@-2 {{require parenthesis}}
+  // expected-error@-3 {{invalid operands}}
+}
+
+#define INIT(var, init) Foo var = init; // expected-note 2{{defined here}}
+// Can't use an initializer list as a macro argument.  The commas in the list
+// will be interpretted as argument separaters and adding parenthesis will
+// make it no longer an initializer list.
+void test() {
+  INIT(a, Foo());
+  INIT(b, Foo({1, 2, 3}));
+  INIT(c, Foo());
+
+  INIT(d, Foo{1, 2, 3});
+  // expected-error@-1 {{too many arguments provided}}
+  // expected-note@-2 {{require parenthesis}}
+  INIT(e, {1, 2, 3});
+  // expected-error@-1 {{too many arguments provided}}
+  // expected-note@-2 {{cannot use initializer list as macro argument}}
+  // expected-error@-3 {{use of undeclared identifier}}
+}
Index: include/clang/Basic/DiagnosticLexKinds.td
===================================================================
--- include/clang/Basic/DiagnosticLexKinds.td
+++ include/clang/Basic/DiagnosticLexKinds.td
@@ -460,6 +460,10 @@
   "unterminated function-like macro invocation">;
 def err_too_many_args_in_macro_invoc : Error<
   "too many arguments provided to function-like macro invocation">;
+def note_suggest_parens_for_macro : Note<
+  "braced lists require parenthesis to be recognized inside macros">;
+def note_cannot_use_init_list_as_macro_argument : Note<
+  "cannot use initializer list as macro argument">;
 def err_too_few_args_in_macro_invoc : Error<
   "too few arguments provided to function-like macro invocation">;
 def err_pp_bad_paste : Error<
Index: include/clang/Lex/Preprocessor.h
===================================================================
--- include/clang/Lex/Preprocessor.h
+++ include/clang/Lex/Preprocessor.h
@@ -1364,6 +1364,14 @@
   MacroArgs *ReadFunctionLikeMacroArgs(Token &MacroName, MacroInfo *MI,
                                        SourceLocation &ExpansionEnd);
 
+  /// ProcessFunctionLikeMacroArgTokens - Helper function to
+  /// ReadFunctionLikeMacroArgs.  Thie does the actual processing of the tokens,
+  /// and can be used to validate correctness of tokens added to the
+  /// macro arguments.
+  MacroArgs *ProcessFunctionLikeMacroArgTokens(
+      const SmallVector<Token, 64> &MacroTokens, Token &MacroName,
+      MacroInfo *MI, bool EmitErrors);
+
   /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
   /// as a builtin macro, handle it and return the next token as 'Tok'.
   void ExpandBuiltinMacro(Token &Tok);

_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

[PATCH] Better macro error messages involving initializer lists

Reply via email to