Re: r271883 - [clang-format] make header guard identification stricter (with Lexer).

2016-06-11 Thread Eric Liu via cfe-commits
Hi Mehdi,

No, it is not intended... thanks for pointing that out! I have fixed it at
rL272465.

Thanks,
Eric

On Fri, Jun 10, 2016 at 10:42 PM Mehdi Amini  wrote:

> Hi Eric,
>
> > On Jun 6, 2016, at 4:00 AM, Eric Liu via cfe-commits <
> cfe-commits@lists.llvm.org> wrote:
> >
> > Author: ioeric
> > Date: Mon Jun  6 06:00:13 2016
> > New Revision: 271883
> >
> > URL: http://llvm.org/viewvc/llvm-project?rev=271883=rev
> > Log:
> > [clang-format] make header guard identification stricter (with Lexer).
> >
> > Summary: make header guard identification stricter with Lexer.
> >
> > Reviewers: djasper
> >
> > Subscribers: klimek, cfe-commits
> >
> > Differential Revision: http://reviews.llvm.org/D20959
> >
> > Modified:
> >cfe/trunk/lib/Format/Format.cpp
> >cfe/trunk/unittests/Format/CleanupTest.cpp
> >
> > Modified: cfe/trunk/lib/Format/Format.cpp
> > URL:
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=271883=271882=271883=diff
> >
> ==
> > --- cfe/trunk/lib/Format/Format.cpp (original)
> > +++ cfe/trunk/lib/Format/Format.cpp Mon Jun  6 06:00:13 2016
> > @@ -1436,6 +1436,49 @@ inline bool isHeaderInsertion(const tool
> >
> llvm::Regex(IncludeRegexPattern).match(Replace.getReplacementText());
> > }
> >
> > +void skipComments(Lexer , Token ) {
> > +  while (Tok.is(tok::comment))
> > +if (Lex.LexFromRawLexer(Tok))
> > +  return;
> > +}
> > +
> > +// Check if a sequence of tokens is like "# ". If
> it is,
> > +// \p Tok will be the token after this directive; otherwise, it can be
> any token
> > +// after the given \p Tok (including \p Tok).
> > +bool checkAndConsumeDirectiveWithName(Lexer , StringRef Name, Token
> ) {
> > +  bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
> > + Tok.is(tok::raw_identifier) &&
> > + Tok.getRawIdentifier() == Name &&
> !Lex.LexFromRawLexer(Tok) &&
> > + Tok.is(tok::raw_identifier);
> > +  if (Matched)
> > +Lex.LexFromRawLexer(Tok);
> > +  return Matched;
> > +}
> > +
> > +unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
> > +   StringRef Code,
> > +   FormatStyle Style) {
>
>
> FormatStyle is 360B, did you really intended to pass it by value here?
>
> --
> Mehdi
>
>
> > +  std::unique_ptr Env =
> > +  Environment::CreateVirtualEnvironment(Code, FileName,
> /*Ranges=*/{});
> > +  const SourceManager  = Env->getSourceManager();
> > +  Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()),
> SourceMgr,
> > +getFormattingLangOpts(Style));
> > +  Token Tok;
> > +  // Get the first token.
> > +  Lex.LexFromRawLexer(Tok);
> > +  skipComments(Lex, Tok);
> > +  unsigned AfterComments = SourceMgr.getFileOffset(Tok.getLocation());
> > +  if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
> > +skipComments(Lex, Tok);
> > +if (checkAndConsumeDirectiveWithName(Lex, "define", Tok))
> > +  return SourceMgr.getFileOffset(Tok.getLocation());
> > +  }
> > +  return AfterComments;
> > +}
> > +
> > +// FIXME: we also need to insert a '\n' at the end of the code if we
> have an
> > +// insertion with offset Code.size(), and there is no '\n' at the end
> of the
> > +// code.
> > // FIXME: do not insert headers into conditional #include blocks, e.g.
> #includes
> > // surrounded by compile condition "#if...".
> > // FIXME: do not insert existing headers.
> > @@ -1469,20 +1512,6 @@ fixCppIncludeInsertions(StringRef Code,
> >   StringRef FileName = Replaces.begin()->getFilePath();
> >   IncludeCategoryManager Categories(Style, FileName);
> >
> > -  std::unique_ptr Env =
> > -  Environment::CreateVirtualEnvironment(Code, FileName,
> /*Ranges=*/{});
> > -  const SourceManager  = Env->getSourceManager();
> > -  Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()),
> SourceMgr,
> > -getFormattingLangOpts(Style));
> > -  Token Tok;
> > -  // All new headers should be inserted after this offset.
> > -  int MinInsertOffset = Code.size();
> > -  while (!Lex.LexFromRawLexer(Tok)) {
> > -if (Tok.isNot(tok::comment)) {
> > -  MinInsertOffset = SourceMgr.getFileOffset(Tok.getLocation());
> > -  break;
> > -}
> > -  }
> >   // Record the offset of the end of the last include in each category.
> >   std::map CategoryEndOffsets;
> >   // All possible priorities.
> > @@ -1491,26 +1520,25 @@ fixCppIncludeInsertions(StringRef Code,
> >   for (const auto  : Style.IncludeCategories)
> > Priorities.insert(Category.Priority);
> >   int FirstIncludeOffset = -1;
> > -  bool HeaderGuardFound = false;
> > +  // All new headers should be inserted after this offset.
> > +  unsigned MinInsertOffset =
> > +  getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style);
> >   StringRef TrimmedCode = 

Re: r271883 - [clang-format] make header guard identification stricter (with Lexer).

2016-06-10 Thread Mehdi Amini via cfe-commits
Hi Eric,

> On Jun 6, 2016, at 4:00 AM, Eric Liu via cfe-commits 
>  wrote:
> 
> Author: ioeric
> Date: Mon Jun  6 06:00:13 2016
> New Revision: 271883
> 
> URL: http://llvm.org/viewvc/llvm-project?rev=271883=rev
> Log:
> [clang-format] make header guard identification stricter (with Lexer).
> 
> Summary: make header guard identification stricter with Lexer.
> 
> Reviewers: djasper
> 
> Subscribers: klimek, cfe-commits
> 
> Differential Revision: http://reviews.llvm.org/D20959
> 
> Modified:
>cfe/trunk/lib/Format/Format.cpp
>cfe/trunk/unittests/Format/CleanupTest.cpp
> 
> Modified: cfe/trunk/lib/Format/Format.cpp
> URL: 
> http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=271883=271882=271883=diff
> ==
> --- cfe/trunk/lib/Format/Format.cpp (original)
> +++ cfe/trunk/lib/Format/Format.cpp Mon Jun  6 06:00:13 2016
> @@ -1436,6 +1436,49 @@ inline bool isHeaderInsertion(const tool
>  llvm::Regex(IncludeRegexPattern).match(Replace.getReplacementText());
> }
> 
> +void skipComments(Lexer , Token ) {
> +  while (Tok.is(tok::comment))
> +if (Lex.LexFromRawLexer(Tok))
> +  return;
> +}
> +
> +// Check if a sequence of tokens is like "# ". If it 
> is,
> +// \p Tok will be the token after this directive; otherwise, it can be any 
> token
> +// after the given \p Tok (including \p Tok).
> +bool checkAndConsumeDirectiveWithName(Lexer , StringRef Name, Token 
> ) {
> +  bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
> + Tok.is(tok::raw_identifier) &&
> + Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) 
> &&
> + Tok.is(tok::raw_identifier);
> +  if (Matched)
> +Lex.LexFromRawLexer(Tok);
> +  return Matched;
> +}
> +
> +unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
> +   StringRef Code,
> +   FormatStyle Style) {


FormatStyle is 360B, did you really intended to pass it by value here?

-- 
Mehdi


> +  std::unique_ptr Env =
> +  Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
> +  const SourceManager  = Env->getSourceManager();
> +  Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), 
> SourceMgr,
> +getFormattingLangOpts(Style));
> +  Token Tok;
> +  // Get the first token.
> +  Lex.LexFromRawLexer(Tok);
> +  skipComments(Lex, Tok);
> +  unsigned AfterComments = SourceMgr.getFileOffset(Tok.getLocation());
> +  if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
> +skipComments(Lex, Tok);
> +if (checkAndConsumeDirectiveWithName(Lex, "define", Tok))
> +  return SourceMgr.getFileOffset(Tok.getLocation());
> +  }
> +  return AfterComments;
> +}
> +
> +// FIXME: we also need to insert a '\n' at the end of the code if we have an
> +// insertion with offset Code.size(), and there is no '\n' at the end of the
> +// code.
> // FIXME: do not insert headers into conditional #include blocks, e.g. 
> #includes
> // surrounded by compile condition "#if...".
> // FIXME: do not insert existing headers.
> @@ -1469,20 +1512,6 @@ fixCppIncludeInsertions(StringRef Code,
>   StringRef FileName = Replaces.begin()->getFilePath();
>   IncludeCategoryManager Categories(Style, FileName);
> 
> -  std::unique_ptr Env =
> -  Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
> -  const SourceManager  = Env->getSourceManager();
> -  Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), 
> SourceMgr,
> -getFormattingLangOpts(Style));
> -  Token Tok;
> -  // All new headers should be inserted after this offset.
> -  int MinInsertOffset = Code.size();
> -  while (!Lex.LexFromRawLexer(Tok)) {
> -if (Tok.isNot(tok::comment)) {
> -  MinInsertOffset = SourceMgr.getFileOffset(Tok.getLocation());
> -  break;
> -}
> -  }
>   // Record the offset of the end of the last include in each category.
>   std::map CategoryEndOffsets;
>   // All possible priorities.
> @@ -1491,26 +1520,25 @@ fixCppIncludeInsertions(StringRef Code,
>   for (const auto  : Style.IncludeCategories)
> Priorities.insert(Category.Priority);
>   int FirstIncludeOffset = -1;
> -  bool HeaderGuardFound = false;
> +  // All new headers should be inserted after this offset.
> +  unsigned MinInsertOffset =
> +  getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style);
>   StringRef TrimmedCode = Code.drop_front(MinInsertOffset);
>   SmallVector Lines;
>   TrimmedCode.split(Lines, '\n');
> -  int Offset = MinInsertOffset;
> +  unsigned Offset = MinInsertOffset;
> +  unsigned NextLineOffset;
>   for (auto Line : Lines) {
> +NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
> if (IncludeRegex.match(Line, )) {
>   StringRef IncludeName = Matches[2];
>   int 

r271883 - [clang-format] make header guard identification stricter (with Lexer).

2016-06-06 Thread Eric Liu via cfe-commits
Author: ioeric
Date: Mon Jun  6 06:00:13 2016
New Revision: 271883

URL: http://llvm.org/viewvc/llvm-project?rev=271883=rev
Log:
[clang-format] make header guard identification stricter (with Lexer).

Summary: make header guard identification stricter with Lexer.

Reviewers: djasper

Subscribers: klimek, cfe-commits

Differential Revision: http://reviews.llvm.org/D20959

Modified:
cfe/trunk/lib/Format/Format.cpp
cfe/trunk/unittests/Format/CleanupTest.cpp

Modified: cfe/trunk/lib/Format/Format.cpp
URL: 
http://llvm.org/viewvc/llvm-project/cfe/trunk/lib/Format/Format.cpp?rev=271883=271882=271883=diff
==
--- cfe/trunk/lib/Format/Format.cpp (original)
+++ cfe/trunk/lib/Format/Format.cpp Mon Jun  6 06:00:13 2016
@@ -1436,6 +1436,49 @@ inline bool isHeaderInsertion(const tool
  llvm::Regex(IncludeRegexPattern).match(Replace.getReplacementText());
 }
 
+void skipComments(Lexer , Token ) {
+  while (Tok.is(tok::comment))
+if (Lex.LexFromRawLexer(Tok))
+  return;
+}
+
+// Check if a sequence of tokens is like "# ". If it is,
+// \p Tok will be the token after this directive; otherwise, it can be any 
token
+// after the given \p Tok (including \p Tok).
+bool checkAndConsumeDirectiveWithName(Lexer , StringRef Name, Token ) {
+  bool Matched = Tok.is(tok::hash) && !Lex.LexFromRawLexer(Tok) &&
+ Tok.is(tok::raw_identifier) &&
+ Tok.getRawIdentifier() == Name && !Lex.LexFromRawLexer(Tok) &&
+ Tok.is(tok::raw_identifier);
+  if (Matched)
+Lex.LexFromRawLexer(Tok);
+  return Matched;
+}
+
+unsigned getOffsetAfterHeaderGuardsAndComments(StringRef FileName,
+   StringRef Code,
+   FormatStyle Style) {
+  std::unique_ptr Env =
+  Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
+  const SourceManager  = Env->getSourceManager();
+  Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr,
+getFormattingLangOpts(Style));
+  Token Tok;
+  // Get the first token.
+  Lex.LexFromRawLexer(Tok);
+  skipComments(Lex, Tok);
+  unsigned AfterComments = SourceMgr.getFileOffset(Tok.getLocation());
+  if (checkAndConsumeDirectiveWithName(Lex, "ifndef", Tok)) {
+skipComments(Lex, Tok);
+if (checkAndConsumeDirectiveWithName(Lex, "define", Tok))
+  return SourceMgr.getFileOffset(Tok.getLocation());
+  }
+  return AfterComments;
+}
+
+// FIXME: we also need to insert a '\n' at the end of the code if we have an
+// insertion with offset Code.size(), and there is no '\n' at the end of the
+// code.
 // FIXME: do not insert headers into conditional #include blocks, e.g. 
#includes
 // surrounded by compile condition "#if...".
 // FIXME: do not insert existing headers.
@@ -1469,20 +1512,6 @@ fixCppIncludeInsertions(StringRef Code,
   StringRef FileName = Replaces.begin()->getFilePath();
   IncludeCategoryManager Categories(Style, FileName);
 
-  std::unique_ptr Env =
-  Environment::CreateVirtualEnvironment(Code, FileName, /*Ranges=*/{});
-  const SourceManager  = Env->getSourceManager();
-  Lexer Lex(Env->getFileID(), SourceMgr.getBuffer(Env->getFileID()), SourceMgr,
-getFormattingLangOpts(Style));
-  Token Tok;
-  // All new headers should be inserted after this offset.
-  int MinInsertOffset = Code.size();
-  while (!Lex.LexFromRawLexer(Tok)) {
-if (Tok.isNot(tok::comment)) {
-  MinInsertOffset = SourceMgr.getFileOffset(Tok.getLocation());
-  break;
-}
-  }
   // Record the offset of the end of the last include in each category.
   std::map CategoryEndOffsets;
   // All possible priorities.
@@ -1491,26 +1520,25 @@ fixCppIncludeInsertions(StringRef Code,
   for (const auto  : Style.IncludeCategories)
 Priorities.insert(Category.Priority);
   int FirstIncludeOffset = -1;
-  bool HeaderGuardFound = false;
+  // All new headers should be inserted after this offset.
+  unsigned MinInsertOffset =
+  getOffsetAfterHeaderGuardsAndComments(FileName, Code, Style);
   StringRef TrimmedCode = Code.drop_front(MinInsertOffset);
   SmallVector Lines;
   TrimmedCode.split(Lines, '\n');
-  int Offset = MinInsertOffset;
+  unsigned Offset = MinInsertOffset;
+  unsigned NextLineOffset;
   for (auto Line : Lines) {
+NextLineOffset = std::min(Code.size(), Offset + Line.size() + 1);
 if (IncludeRegex.match(Line, )) {
   StringRef IncludeName = Matches[2];
   int Category = Categories.getIncludePriority(
   IncludeName, /*CheckMainHeader=*/FirstIncludeOffset < 0);
-  CategoryEndOffsets[Category] = Offset + Line.size() + 1;
+  CategoryEndOffsets[Category] = NextLineOffset;
   if (FirstIncludeOffset < 0)
 FirstIncludeOffset = Offset;
 }
-Offset += Line.size() + 1;
-// FIXME: make header guard matching stricter, e.g. consider #ifndef.
-if