Hi doug.gregor,

Added tests that show the problems:
1. isAt(Start|End)OfMacroExpansion did not work correctly for nested macro
   calls; consider
   #define M(x) x
   #define N(x) x
   N(f(M(i)))
   Here getSourceText would return "M(f(M(i)))" for the token location of 'i',
   as makeFileCharRange would consider 'i' to be at the start of the macro.
   The problem is that getDecomposedLoc in this case returns the offset from
   the inner call to 'M', and I have not found a way to get the offset from
   'N' easily, as this information seems to be incrementally overwritten during
   preprocessing.
   The solution is to instead take the location before / after the examined
   location and check whether its expansion location is the same as the
   expansion location of the examined location. If this is the case, the
   location before / after is not the first / last token of the macro expansion.

2. makeFileCharRange only considered the outermost layer of macro expansion,
   thus not returning the widest possible range if the outermost expansion
   was not fully describing the range.
   After fixing (1), this would lead to getSourceText returning an empty
   string instead of "M(i)".

Note that I'm still not sure I'm entirely happy with this implementation.
So far I have not found a good way to figure out whether the location
is good to call getLocWithOffset(-1) or getLocWithOffset(tokLen + 1) on.
Especially for the latter, the check for SM.isLocalSourceLocation(...) seems
to work, but doesn't seem like the right solution. Any hints for a better
solution would be highly welcome.

http://llvm-reviews.chandlerc.com/D767

Files:
  lib/Lex/Lexer.cpp
  unittests/Lex/LexerTest.cpp
Index: lib/Lex/Lexer.cpp
===================================================================
--- lib/Lex/Lexer.cpp
+++ lib/Lex/Lexer.cpp
@@ -798,14 +798,22 @@
                                       SourceLocation *MacroBegin) {
   assert(loc.isValid() && loc.isMacroID() && "Expected a valid macro loc");
 
-  std::pair<FileID, unsigned> infoLoc = SM.getDecomposedLoc(loc);
-  // FIXME: If the token comes from the macro token paste operator ('##')
-  // this function will always return false;
-  if (infoLoc.second > 0)
-    return false; // Does not point at the start of token.
-
+  FileID FID = SM.getFileID(loc);
   SourceLocation expansionLoc =
-    SM.getSLocEntry(infoLoc.first).getExpansion().getExpansionLocStart();
+    SM.getSLocEntry(FID).getExpansion().getExpansionLocStart();
+
+  SourceLocation beforeLoc = loc.getLocWithOffset(-1);
+  if (beforeLoc.isValid()) {
+    FileID BeforeFID = SM.getFileID(beforeLoc);
+    SrcMgr::SLocEntry SE = SM.getSLocEntry(BeforeFID);
+    if (SE.isExpansion()) {
+      SourceLocation beforeExpansionLoc =
+          SE.getExpansion().getExpansionLocStart();
+      if (expansionLoc == beforeExpansionLoc)
+        return false;
+    }
+  }
+
   if (expansionLoc.isFileID()) {
     // No other macro expansions, this is the first.
     if (MacroBegin)
@@ -830,15 +838,20 @@
     return false;
 
   FileID FID = SM.getFileID(loc);
-  SourceLocation afterLoc = loc.getLocWithOffset(tokLen+1);
-  if (SM.isInFileID(afterLoc, FID))
-    return false; // Still in the same FileID, does not point to the last token.
-
-  // FIXME: If the token comes from the macro token paste operator ('##')
-  // or the stringify operator ('#') this function will always return false;
-
   SourceLocation expansionLoc =
     SM.getSLocEntry(FID).getExpansion().getExpansionLocEnd();
+
+  SourceLocation afterLoc = loc.getLocWithOffset(tokLen + 1);
+  if (afterLoc.isValid() && SM.isLocalSourceLocation(afterLoc)) {
+    FileID AfterFID = SM.getFileID(afterLoc);
+    SrcMgr::SLocEntry SE = SM.getSLocEntry(AfterFID);
+    if (SE.isExpansion()) {
+      SourceLocation afterExpansionLoc = SE.getExpansion().getExpansionLocEnd();
+      if (expansionLoc == afterExpansionLoc)
+        return false;
+    }
+  }
+
   if (expansionLoc.isFileID()) {
     // No other macro expansions.
     if (MacroEnd)
@@ -905,15 +918,20 @@
   }
 
   assert(Begin.isMacroID() && End.isMacroID());
-  SourceLocation MacroBegin, MacroEnd;
-  if (isAtStartOfMacroExpansion(Begin, SM, LangOpts, &MacroBegin) &&
-      ((Range.isTokenRange() && isAtEndOfMacroExpansion(End, SM, LangOpts,
-                                                        &MacroEnd)) ||
-       (Range.isCharRange() && isAtStartOfMacroExpansion(End, SM, LangOpts,
-                                                         &MacroEnd)))) {
-    Range.setBegin(MacroBegin);
-    Range.setEnd(MacroEnd);
-    return makeRangeFromFileLocs(Range, SM, LangOpts);
+  SourceLocation InnermostBegin = Begin, InnermostEnd = End;
+  while (InnermostBegin.isMacroID() && InnermostEnd.isMacroID()) {
+    SourceLocation MacroBegin, MacroEnd;
+    if (isAtStartOfMacroExpansion(InnermostBegin, SM, LangOpts, &MacroBegin) &&
+        ((Range.isTokenRange() &&
+          isAtEndOfMacroExpansion(InnermostEnd, SM, LangOpts, &MacroEnd)) ||
+         (Range.isCharRange() &&
+          isAtStartOfMacroExpansion(InnermostEnd, SM, LangOpts, &MacroEnd)))) {
+      Range.setBegin(MacroBegin);
+      Range.setEnd(MacroEnd);
+      return makeRangeFromFileLocs(Range, SM, LangOpts);
+    }
+    InnermostBegin = SM.getImmediateSpellingLoc(InnermostBegin);
+    InnermostEnd = SM.getImmediateSpellingLoc(InnermostEnd);
   }
 
   FileID FID;
Index: unittests/Lex/LexerTest.cpp
===================================================================
--- unittests/Lex/LexerTest.cpp
+++ unittests/Lex/LexerTest.cpp
@@ -28,6 +28,20 @@
 
 namespace {
 
+class VoidModuleLoader : public ModuleLoader {
+  virtual ModuleLoadResult loadModule(SourceLocation ImportLoc, 
+                                      ModuleIdPath Path,
+                                      Module::NameVisibilityKind Visibility,
+                                      bool IsInclusionDirective) {
+    return ModuleLoadResult();
+  }
+
+  virtual void makeModuleVisible(Module *Mod,
+                                 Module::NameVisibilityKind Visibility,
+                                 SourceLocation ImportLoc,
+                                 bool Complain) { }
+};
+
 // The test fixture.
 class LexerTest : public ::testing::Test {
 protected:
@@ -42,6 +56,43 @@
     Target = TargetInfo::CreateTargetInfo(Diags, &*TargetOpts);
   }
 
+  std::vector<Token> CheckLex(StringRef Source,
+                              ArrayRef<tok::TokenKind> ExpectedTokens) {
+    MemoryBuffer *buf = MemoryBuffer::getMemBuffer(Source);
+    (void) SourceMgr.createMainFileIDForMemBuffer(buf);
+
+    VoidModuleLoader ModLoader;
+    HeaderSearch HeaderInfo(new HeaderSearchOptions, FileMgr, Diags, LangOpts,
+                            Target.getPtr());
+    Preprocessor PP(new PreprocessorOptions(), Diags, LangOpts, Target.getPtr(),
+                    SourceMgr, HeaderInfo, ModLoader, /*IILookup =*/ 0,
+                    /*OwnsHeaderSearch =*/ false,
+                    /*DelayInitialization =*/ false);
+    PP.EnterMainSourceFile();
+
+    std::vector<Token> toks;
+    while (1) {
+      Token tok;
+      PP.Lex(tok);
+      if (tok.is(tok::eof))
+        break;
+      toks.push_back(tok);
+    }
+
+    EXPECT_EQ(ExpectedTokens.size(), toks.size());
+    for (unsigned i = 0, e = ExpectedTokens.size(); i != e; ++i) {
+      EXPECT_EQ(ExpectedTokens[i], toks[i].getKind());
+    }
+
+    return toks;
+  }
+
+  std::string getSourceText(Token Begin, Token End) {
+    return Lexer::getSourceText(CharSourceRange::getTokenRange(SourceRange(
+                                    Begin.getLocation(), End.getLocation())),
+                                SourceMgr, LangOpts);
+  }
+
   FileSystemOptions FileMgrOpts;
   FileManager FileMgr;
   IntrusiveRefCntPtr<DiagnosticIDs> DiagID;
@@ -52,65 +103,139 @@
   IntrusiveRefCntPtr<TargetInfo> Target;
 };
 
-class VoidModuleLoader : public ModuleLoader {
-  virtual ModuleLoadResult loadModule(SourceLocation ImportLoc, 
-                                      ModuleIdPath Path,
-                                      Module::NameVisibilityKind Visibility,
-                                      bool IsInclusionDirective) {
-    return ModuleLoadResult();
-  }
+TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgument) {
+  std::vector<tok::TokenKind> ExpectedTokens;
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::l_paren);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::r_paren);
 
-  virtual void makeModuleVisible(Module *Mod,
-                                 Module::NameVisibilityKind Visibility,
-                                 SourceLocation ImportLoc,
-                                 bool Complain) { }
-};
+  std::vector<Token> toks = CheckLex("#define M(x) x\n"
+                                     "M(f(M(i)))",
+                                     ExpectedTokens);
+
+  EXPECT_EQ("M(i)", getSourceText(toks[2], toks[2]));
+}
+
+TEST_F(LexerTest, GetSourceTextExpandsToMaximumInMacroArgumentForEndOfMacro) {
+  std::vector<tok::TokenKind> ExpectedTokens;
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::identifier);
+
+  std::vector<Token> toks = CheckLex("#define M(x) x\n"
+                                     "M(M(i) c)",
+                                     ExpectedTokens);
+
+  EXPECT_EQ("M(i)", getSourceText(toks[0], toks[0]));
+}
+
+TEST_F(LexerTest, GetSourceTextWorksAcrossTokenPastes) {
+  std::vector<tok::TokenKind> ExpectedTokens;
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::l_paren);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::r_paren);
+
+  std::vector<Token> toks = CheckLex("#define M(x) x\n"
+                                     "#define C(x) M(x##c)\n"
+                                     "M(f(C(i)))",
+                                     ExpectedTokens);
+
+  EXPECT_EQ("C(i)", getSourceText(toks[2], toks[2]));
+}
+
+TEST_F(LexerTest, GetSourceTextExpandsAcrossMultipleMacroCalls) {
+  std::vector<tok::TokenKind> ExpectedTokens;
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::l_paren);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::r_paren);
+
+  std::vector<Token> toks = CheckLex("#define M(x) x\n"
+                                     "f(M(M(i)))",
+                                     ExpectedTokens);
+  EXPECT_EQ("M(M(i))", getSourceText(toks[2], toks[2]));
+}
+
+TEST_F(LexerTest, GetSourceTextInMiddleOfMacroArgument) {
+  std::vector<tok::TokenKind> ExpectedTokens;
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::l_paren);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::r_paren);
+
+  std::vector<Token> toks = CheckLex("#define M(x) x\n"
+                                     "M(f(i))",
+                                     ExpectedTokens);
+  EXPECT_EQ("i", getSourceText(toks[2], toks[2]));
+}
+
+TEST_F(LexerTest, GetSourceTextExpandsAroundDifferentMacroCalls) {
+  std::vector<tok::TokenKind> ExpectedTokens;
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::l_paren);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::r_paren);
+
+  std::vector<Token> toks = CheckLex("#define M(x) x\n"
+                                     "#define C(x) x\n"
+                                     "f(C(M(i)))",
+                                     ExpectedTokens);
+  EXPECT_EQ("C(M(i))", getSourceText(toks[2], toks[2]));
+}
+
+TEST_F(LexerTest, GetSourceTextOnlyExpandsIfFirstTokenInMacro) {
+  std::vector<tok::TokenKind> ExpectedTokens;
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::l_paren);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::r_paren);
+
+  std::vector<Token> toks = CheckLex("#define M(x) x\n"
+                                     "#define C(x) c x\n"
+                                     "f(C(M(i)))",
+                                     ExpectedTokens);
+  EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
+}
+
+TEST_F(LexerTest, GetSourceTextExpandsRecursively) {
+  std::vector<tok::TokenKind> ExpectedTokens;
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::l_paren);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::r_paren);
+
+  std::vector<Token> toks = CheckLex("#define M(x) x\n"
+                                     "#define C(x) c M(x)\n"
+                                     "C(f(M(i)))",
+                                     ExpectedTokens);
+  EXPECT_EQ("M(i)", getSourceText(toks[3], toks[3]));
+}
 
 TEST_F(LexerTest, LexAPI) {
-  const char *source =
-    "#define M(x) [x]\n"
-    "#define N(x) x\n"
-    "#define INN(x) x\n"
-    "#define NOF1 INN(val)\n"
-    "#define NOF2 val\n"
-    "M(foo) N([bar])\n"
-    "N(INN(val)) N(NOF1) N(NOF2) N(val)";
-
-  MemoryBuffer *buf = MemoryBuffer::getMemBuffer(source);
-  (void)SourceMgr.createMainFileIDForMemBuffer(buf);
-
-  VoidModuleLoader ModLoader;
-  HeaderSearch HeaderInfo(new HeaderSearchOptions, FileMgr, Diags, LangOpts, 
-                          Target.getPtr());
-  Preprocessor PP(new PreprocessorOptions(), Diags, LangOpts, Target.getPtr(),
-                  SourceMgr, HeaderInfo, ModLoader,
-                  /*IILookup =*/ 0,
-                  /*OwnsHeaderSearch =*/false,
-                  /*DelayInitialization =*/ false);
-  PP.EnterMainSourceFile();
-
-  std::vector<Token> toks;
-  while (1) {
-    Token tok;
-    PP.Lex(tok);
-    if (tok.is(tok::eof))
-      break;
-    toks.push_back(tok);
-  }
+  std::vector<tok::TokenKind> ExpectedTokens;
+  ExpectedTokens.push_back(tok::l_square);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::r_square);
+  ExpectedTokens.push_back(tok::l_square);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::r_square);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::identifier);
+  ExpectedTokens.push_back(tok::identifier);
+
+  std::vector<Token> toks = CheckLex("#define M(x) [x]\n"
+                                     "#define N(x) x\n"
+                                     "#define INN(x) x\n"
+                                     "#define NOF1 INN(val)\n"
+                                     "#define NOF2 val\n"
+                                     "M(foo) N([bar])\n"
+                                     "N(INN(val)) N(NOF1) N(NOF2) N(val)",
+                                     ExpectedTokens);
 
-  // Make sure we got the tokens that we expected.
-  ASSERT_EQ(10U, toks.size());
-  ASSERT_EQ(tok::l_square, toks[0].getKind());
-  ASSERT_EQ(tok::identifier, toks[1].getKind());
-  ASSERT_EQ(tok::r_square, toks[2].getKind());
-  ASSERT_EQ(tok::l_square, toks[3].getKind());
-  ASSERT_EQ(tok::identifier, toks[4].getKind());
-  ASSERT_EQ(tok::r_square, toks[5].getKind());
-  ASSERT_EQ(tok::identifier, toks[6].getKind());
-  ASSERT_EQ(tok::identifier, toks[7].getKind());
-  ASSERT_EQ(tok::identifier, toks[8].getKind());
-  ASSERT_EQ(tok::identifier, toks[9].getKind());
-  
   SourceLocation lsqrLoc = toks[0].getLocation();
   SourceLocation idLoc = toks[1].getLocation();
   SourceLocation rsqrLoc = toks[2].getLocation();
_______________________________________________
cfe-commits mailing list
[email protected]
http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits

Reply via email to