This is an automated email from the ASF dual-hosted git repository.

leaves12138 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/paimon-cpp.git


The following commit(s) were added to refs/heads/main by this push:
     new 478887b  chore: add fix_includes.py for include cleanup workflow (#25)
478887b is described below

commit 478887ba48218b45c237da995b5ff2646d9894aa
Author: Yonghao Fang <[email protected]>
AuthorDate: Fri May 29 10:39:23 2026 +0800

    chore: add fix_includes.py for include cleanup workflow (#25)
    
    Squash merge PR #25.
---
 LICENSE                       |    2 +
 build_support/fix_includes.py | 2498 +++++++++++++++++++++++++++++++++++++++++
 2 files changed, 2500 insertions(+)

diff --git a/LICENSE b/LICENSE
index 976412a..51438f3 100644
--- a/LICENSE
+++ b/LICENSE
@@ -298,6 +298,8 @@ This product includes code from include-what-you-use.
 
 * IWYU driver utility:
   * build_support/iwyu/iwyu_tool.py
+* IWYU include rewrite utility:
+   * build_support/fix_includes.py
 
 Copyright: 2003-2010 University of Illinois at Urbana-Champaign.
 License: University of Illinois/NCSA Open Source License.
diff --git a/build_support/fix_includes.py b/build_support/fix_includes.py
new file mode 100644
index 0000000..534a9d1
--- /dev/null
+++ b/build_support/fix_includes.py
@@ -0,0 +1,2498 @@
+#!/usr/bin/env python3
+
+##===--- fix_includes.py - rewrite source files based on iwyu output 
------===##
+#
+#                     The LLVM Compiler Infrastructure
+#
+# This file is distributed under the University of Illinois Open Source
+# License. See LICENSE.TXT for details.
+#
+##===----------------------------------------------------------------------===##
+
+from __future__ import print_function
+
+"""Update files with the 'correct' #include and forward-declare lines.
+
+Given the output of include_what_you_use on stdin -- when run at the
+(default) --v=1 verbosity level or higher -- modify the files
+mentioned in the output, removing their old #include lines and
+replacing them with the lines given by the include_what_you_use
+script.
+
+This script runs in four stages.  In the first, it groups physical
+lines together to form 'move spans'.  A 'move span' is the atomic unit
+for moving or deleting code.  A move span is either a) an #include
+line, along with any comment lines immediately preceding it; b) a
+forward-declare line -- or more if it's a multi-line forward declare
+-- along with preceding comments; c) any other single line.  Example:
+
+   // I really am glad I'm forward-declaring this class!
+   // If I didn't, I'd have to #include the entire world.
+   template<typename A, typename B, typename C, typename D>
+   class MyClass;
+
+Then, it groups move spans together into 'reorder spans'.  These are
+spans of code that consist entirely of #includes and forward-declares,
+maybe separated by blank lines and comments.  We assume that we can
+arbitrarily reorder #includes and forward-declares within a reorder
+span, without affecting correctness.  Things like #ifdefs, #defines,
+namespace declarations, static variable declarations, class
+definitions, etc -- just about anything -- break up reorder spans.
+
+In stage 3 it deletes all #include and forward-declare lines that iwyu
+says to delete.  iwyu includes line numbers for deletion, making this
+part easy.  If this step results in "empty" #ifdefs or namespaces
+(#ifdefs or namespaces with no code inside them), we delete those as
+well.  We recalculate the reorder spans, which may have gotten bigger
+due to the deleted code.
+
+In stage 4 it adds new iwyu-dictated #includes and forward-declares
+after the last existing #includes and forward-declares.  Then it
+reorders the #includes and forward-declares to match the order
+specified by iwyu.  It follows iwyu's instructions as much as
+possible, modulo the constraint that an #include or forward-declare
+cannot leave its current reorder span.
+
+All this moving messes up the blank lines, which we then need to fix
+up.  Then we're done!
+"""
+
+__author__ = '[email protected] (Craig Silverstein)'
+
+import difflib
+import argparse
+import os
+import re
+import sys
+from collections import OrderedDict
+
+_EPILOG = """\
+Reads the output from include-what-you-use on stdin -- run with --v=1 (default)
+verbosity level or above -- and, unless --sort_only or --dry_run is specified,
+modifies the files mentioned in the output, removing their old #include lines
+and replacing them with the lines given by include-what-you-use.  It also sorts
+the #include and forward-declare lines.
+
+All files mentioned in include-what-you-use output are modified, unless
+filenames are specified on the commandline, in which case only those files are
+modified.
+
+The exit code is non-zero if a critical error occurs, otherwise zero.
+"""
+
+_COMMENT_RE = re.compile(r'\s*//.*')
+
+# These are the types of lines a file can have.  These are matched
+# using re.match(), so don't need a leading ^.
+_C_COMMENT_START_RE = re.compile(r'\s*/\*')
+_C_COMMENT_END_RE = re.compile(r'.*\*/\s*(.*)$')
+_COMMENT_LINE_RE = re.compile(r'\s*//')
+_PRAGMA_ONCE_LINE_RE = re.compile(r'\s*#\s*pragma\s+once')
+_PRAGMA_PUSH_LINE_RE = re.compile(r'\s*#\s*pragma.*push.*')
+_PRAGMA_POP_LINE_RE = re.compile(r'\s*#\s*pragma.*pop.*')
+_BLANK_LINE_RE = re.compile(r'\s*$')
+_IF_RE = re.compile(r'\s*#\s*if')               # compiles #if/ifdef/ifndef
+_ELSE_RE = re.compile(r'\s*#\s*(else|elif)\b')  # compiles #else/elif
+_ENDIF_RE = re.compile(r'\s*#\s*endif\b')
+# This is used to delete 'empty' namespaces after fwd-decls are removed.
+# Some third-party libraries use macros to start/end namespaces.
+_NAMESPACE_START_RE = re.compile(r'\s*(namespace\b[^{]*{\s*)+(//.*)?$|'
+                                 r'\s*(U_NAMESPACE_BEGIN)|'
+                                 r'\s*(HASH_NAMESPACE_DECLARATION_START)')
+# Also detect Allman and mixed style namespaces.  Use a continue regex for
+# validation and to correctly set the line info.
+_NAMESPACE_START_ALLMAN_RE = re.compile(r'\s*(namespace\b[^{=]*)+(//.*)?$')
+_NAMESPACE_START_MIXED_RE = re.compile(
+  r'\s*(namespace\b[^{]*{\s*)+(namespace\b[^{]*)+(//.*)?$')
+_NAMESPACE_CONTINUE_ALLMAN_MIXED_RE = re.compile(r'\s*{\s*(//.*)?$')
+_NAMESPACE_END_RE = re.compile(r'\s*(})|'
+                               r'\s*(U_NAMESPACE_END)|'
+                               r'\s*(HASH_NAMESPACE_DECLARATION_END)')
+# The group (in parens) holds the unique 'key' identifying this #include.
+_INCLUDE_RE = re.compile(r'\s*#\s*include\s+([<"][^">]+[>"])')
+# We don't need this to actually match forward-declare lines (we get
+# that information from the iwyu input), but we do need an RE here to
+# serve as an index to _LINE_TYPES.  So we use an RE that never matches.
+_FORWARD_DECLARE_RE = re.compile(r'$.FORWARD_DECLARE_RE')
+# Likewise, used to mark an '#ifdef' line of a header guard, or other
+# #ifdef that covers an entire file.
+_HEADER_GUARD_RE = re.compile(r'$.HEADER_GUARD_RE')
+# Marks the '#define' line that comes after a header guard.  Since we
+# know the previous line was a header-guard line, we're not that picky
+# about this one.
+_HEADER_GUARD_DEFINE_RE = re.compile(r'\s*#\s*define\s+')
+# Pragma to mark the associated header (for use when it cannot be deduced from
+# the filename)
+_IWYU_PRAGMA_ASSOCIATED_RE = re.compile(r'IWYU\s*pragma:\s*associated')
+
+# We annotate every line in the source file by the re it matches, or None.
+# Note that not all of the above RE's are represented here; for instance,
+# we fold _C_COMMENT_START_RE and _C_COMMENT_END_RE into _COMMENT_LINE_RE.
+# The _NAMESPACE_CONTINUE_ALLMAN_MIXED_RE is also set on lines when Allman
+# and mixed namespaces are detected but the RE is too easy to match to add
+# under normal circumstances (must always be preceded by Allman/mixed).
+_LINE_TYPES = [_COMMENT_LINE_RE, _BLANK_LINE_RE,
+               _NAMESPACE_START_RE, _NAMESPACE_START_ALLMAN_RE,
+               _NAMESPACE_START_MIXED_RE, _NAMESPACE_END_RE,
+               _IF_RE, _ELSE_RE, _ENDIF_RE,
+               _INCLUDE_RE, _FORWARD_DECLARE_RE,
+               _HEADER_GUARD_RE, _HEADER_GUARD_DEFINE_RE,
+               _PRAGMA_ONCE_LINE_RE,
+               _PRAGMA_PUSH_LINE_RE, _PRAGMA_POP_LINE_RE,
+              ]
+
+# A regexp matching #include lines that should be a barrier for
+# sorting -- that is, we should never reorganize the code so an
+# #include that used to come before this line now comes after, or vice
+# versa.  This can be used for 'fragile' #includes that require other
+# #includes to happen before them to function properly.
+# (Note that the barrier has no effect on where new #includes are
+# added; it just affects the reordering of existing #includes.)
+_BARRIER_INCLUDES = re.compile(r'^\s*#\s*include\s+(<linux/)')
+
+# A list of all known extensions for C++ source files, used to
+# guess if a filename is a source file or a header.
+# Please keep this in sync with source_extensions in iwyu_path_util.cc.
+_SOURCE_EXTENSIONS = [".c", ".C", ".cc", ".CC", ".cxx", ".CXX",
+                      ".cpp", ".CPP", ".c++", ".C++", ".cp"]
+
+
+# Adapt Python 2 iterators to Python 3 syntax
+if sys.version_info[0] < 3:
+  def next(i):
+    return i.next()
+
+
+class OrderedSet(object):
+  """ Sometimes sets affect order of outputs, which hinders testing. This
+  (naive) set implementation preserves order to avoid that problem. """
+  def __init__(self, iterable=None):
+    iterable = iterable or []
+    self.storage = OrderedDict((a, None) for a in iterable)
+
+  def add(self, value):
+    self.storage[value] = None
+
+  def intersection_update(self, other):
+    self.storage = OrderedDict(
+        (k, None) for k in self.storage if k in other.storage)
+
+  def update(self, other):
+    self.storage.update(other.storage)
+
+  def difference(self, other):
+    diff_values = (v for v in self if v not in other)
+    return OrderedSet(diff_values)
+
+  def __iter__(self):
+    return self.storage.keys().__iter__()
+
+  def __contains__(self, value):
+    return value in self.storage
+
+  def __len__(self):
+    return len(self.storage)
+
+
+def _MayBeHeaderFile(filename):
+  """Tries to figure out if filename is a C++ header file.  Defaults to yes."""
+  # Header files have all sorts of extensions: .h, .hpp, .hxx, or no
+  # extension at all.  So we say everything is a header file unless it
+  # has a known extension that's not.
+  extension = os.path.splitext(filename)[1]
+  return extension not in _SOURCE_EXTENSIONS
+
+
+class FixIncludesError(Exception):
+  pass
+
+
+class IWYUOutputRecord(object):
+  """Information that the iwyu output file has about one source file."""
+
+  def __init__(self, filename):
+    self.filename = filename
+
+    # A set of integers.
+    self.lines_to_delete = set()
+
+    # A set of integer line-numbers, for each #include iwyu saw that
+    # is marked with a line number.  This is usually not an exhaustive
+    # list of include-lines, but that's ok because we only use this
+    # data structure for sanity checking: we double-check with our own
+    # analysis that these lines are all # #include lines.  If not, we
+    # know the iwyu data is likely out of date, and we complain.  So
+    # more data here is always welcome, but not essential.
+    self.some_include_lines = set()
+
+    # A set of integer line-number spans [start_line, end_line), for
+    # each forward-declare iwyu saw.  iwyu reports line numbers for
+    # every forward-declare it sees in the source code.  (It won't
+    # report, though, forward-declares inside '#if 0' or similar.)
+    self.seen_forward_declare_lines = set()
+
+    # Those spans which pertain to nested forward declarations (i.e. of nested
+    # classes).  This set should be a subset of 
self.seen_forward_declare_lines.
+    self.nested_forward_declare_lines = set()
+
+    # A set of each line in the iwyu 'add' section.
+    self.includes_and_forward_declares_to_add = OrderedSet()
+
+    # A map from the include filename (including ""s or <>s) to the
+    # full line as given by iwyu, which includes comments that iwyu
+    # has put next to the #include.  This holds both 'to-add' and
+    # 'to-keep' #includes.  If flags.comments is False, the comments
+    # are removed before adding to this list.
+    self.full_include_lines = OrderedDict()
+
+  def Merge(self, other):
+    """Merges other with this one.  They must share a filename.
+
+    This function is intended to be used when we see two iwyu records
+    in the input, both for the same file.  We can merge the two together.
+    We are conservative: we union the lines to add, and intersect the
+    lines to delete.
+
+    Arguments:
+      other: an IWYUOutputRecord to merge into this one.
+        It must have the same value for filename that self does.
+    """
+    assert self.filename == other.filename, "Can't merge distinct files"
+    self.lines_to_delete.intersection_update(other.lines_to_delete)
+    self.some_include_lines.update(other.some_include_lines)
+    self.seen_forward_declare_lines.update(other.seen_forward_declare_lines)
+    
self.nested_forward_declare_lines.update(other.nested_forward_declare_lines)
+    self.includes_and_forward_declares_to_add.update(
+        other.includes_and_forward_declares_to_add)
+    self.full_include_lines.update(other.full_include_lines)
+
+  def HasContentfulChanges(self):
+    """Returns true iff this record has at least one add or delete."""
+    return (self.includes_and_forward_declares_to_add or
+            self.lines_to_delete)
+
+  def __str__(self):
+    return ('--- iwyu record ---\n  FILENAME: %s\n  LINES TO DELETE: %s\n'
+            '  (SOME) INCLUDE LINES: %s\n  (SOME) FWD-DECL LINES: %s\n'
+            '  TO ADD: %s\n  ALL INCLUDES: %s\n---\n'
+            % (self.filename, self.lines_to_delete,
+               self.some_include_lines, self.seen_forward_declare_lines,
+               self.includes_and_forward_declares_to_add,
+               self.full_include_lines))
+
+
+class IWYUOutputParser(object):
+  """Parses the lines in iwyu output corresponding to one source file."""
+
+  # iwyu adds this comment to some lines to map them to the source file.
+  _LINE_NUMBERS_COMMENT_RE = re.compile(r'\s*// lines ([0-9]+)-([0-9]+)')
+
+  # The output of include-what-you-use has sections that indicate what
+  # #includes and forward-declares should be added to the output file,
+  # what should be removed, and what the end result is.  The first line
+  # of each section also has the filename.
+  _ADD_SECTION_RE = re.compile(r'^(.*) should add these lines:$')
+  _REMOVE_SECTION_RE = re.compile(r'^(.*) should remove these lines:$')
+  _TOTAL_SECTION_RE = re.compile(r'^The full include-list for (.*):$')
+  _SECTION_END_RE = re.compile(r'^---$')
+
+  # Alternately, if a file does not need any iwyu modifications (though
+  # it still may need its #includes sorted), iwyu will emit this:
+  _NO_EDITS_RE = re.compile(r'^\((.*) has correct #includes/fwd-decls\)$')
+
+  _RE_TO_NAME = {_ADD_SECTION_RE: 'add',
+                 _REMOVE_SECTION_RE: 'remove',
+                 _TOTAL_SECTION_RE: 'total',
+                 _SECTION_END_RE: 'end',
+                 _NO_EDITS_RE: 'no_edits',
+                }
+  # A small state-transition machine.  key==None indicates the start
+  # state.  value==None means that the key is an end state (that is,
+  # its presence indicates the record is finished).
+  _EXPECTED_NEXT_RE = {
+      None:               frozenset([_ADD_SECTION_RE, _NO_EDITS_RE]),
+      _ADD_SECTION_RE:    frozenset([_REMOVE_SECTION_RE]),
+      _REMOVE_SECTION_RE: frozenset([_TOTAL_SECTION_RE]),
+      _TOTAL_SECTION_RE:  frozenset([_SECTION_END_RE]),
+      _SECTION_END_RE:    None,
+      _NO_EDITS_RE:       None,
+  }
+
+  def __init__(self):
+    # This is set to one of the 'section' REs above.  None is the start-state.
+    self.current_section = None
+    self.filename = '<unknown file>'
+    self.lines_by_section = {}     # key is an RE, value is a list of lines
+
+  def _ProcessOneLine(self, line, basedir=None):
+    """Reads one line of input, updates self, and returns False at EORecord.
+
+    If the line matches one of the hard-coded section names, updates
+    self.filename and self.current_section.  Otherwise, the line is
+    taken to be a member of the currently active section, and is added
+    to self.lines_by_section.
+
+    Arguments:
+      line: one line from the iwyu input file.
+
+    Returns:
+      False if the line is the end-of-section marker, True otherwise.
+
+    Raises:
+      FixIncludesError: if there is an out-of-order section or
+      mismatched filename.
+    """
+    line = line.rstrip()     # don't worry about line endings
+    if not line:             # just ignore blank lines
+      return True
+
+    for (section_re, section_name) in self._RE_TO_NAME.items():
+      m = section_re.search(line)
+      if m:
+        # Check or set the filename (if the re has a group, it's for filename).
+        if section_re.groups >= 1:
+          this_filename = NormalizeFilePath(basedir, m.group(1))
+
+          if (self.current_section is not None and
+              this_filename != self.filename):
+            raise FixIncludesError('"%s" section for %s comes after "%s" for 
%s'
+                                   % (section_name, this_filename,
+                                      self._RE_TO_NAME[self.current_section],
+                                      self.filename))
+          self.filename = this_filename
+
+        # Check and set the new section we're entering.
+        if section_re not in self._EXPECTED_NEXT_RE[self.current_section]:
+          if self.current_section is None:
+            raise FixIncludesError('%s: "%s" section unexpectedly comes first'
+                                   % (self.filename, section_name))
+          else:
+            raise FixIncludesError('%s: "%s" section unexpectedly follows "%s"'
+                                   % (self.filename, section_name,
+                                      self._RE_TO_NAME[self.current_section]))
+        self.current_section = section_re
+        # We're done parsing this record if this section has nothing after it.
+        return self._EXPECTED_NEXT_RE[self.current_section] is not None
+
+    # We're not starting a new section, so just add to the current section.
+    # We ignore lines before section-start, they're probably things like
+    # compiler messages ("Compiling file foo").
+    if self.current_section is not None:
+      self.lines_by_section.setdefault(self.current_section, []).append(line)
+    return True
+
+  def ParseOneRecord(self, iwyu_output, flags):
+    """Given a file object with output from an iwyu run, return per file info.
+
+    For each source file that iwyu_output mentions (because iwyu was run on
+    it), we return a structure holding the information in IWYUOutputRecord:
+    1) What file these changes apply to
+    2) What line numbers hold includes/fwd-declares to remove
+    3) What includes/fwd-declares to add
+    4) Ordering information for includes and fwd-declares
+
+    Arguments:
+      iwyu_output: a File object returning lines from an iwyu run
+      flags: commandline flags, as parsed by argparse.  We use
+         flags.comments, which controls whether we output comments
+         generated by iwyu.
+    Returns:
+       An IWYUOutputRecord object, or None at EOF.
+
+    Raises:
+       FixIncludesError: for malformed-looking lines in the iwyu output.
+    """
+    for line in iwyu_output:
+      if not self._ProcessOneLine(line, flags.basedir):
+        # returns False at end-of-record
+        break
+    else:                                  # for/else
+      return None                          # at EOF
+
+    # Now set up all the fields in an IWYUOutputRecord.
+    # IWYUOutputRecord.filename
+    retval = IWYUOutputRecord(self.filename)
+
+    # IWYUOutputRecord.lines_to_delete
+    for line in self.lines_by_section.get(self._REMOVE_SECTION_RE, []):
+      m = self._LINE_NUMBERS_COMMENT_RE.search(line)
+      if not m:
+        raise FixIncludesError('line "%s" (for %s) has no line number'
+                               % (line, self.filename))
+      # The RE is of the form [start_line, end_line], inclusive.
+      for line_number in range(int(m.group(1)), int(m.group(2)) + 1):
+        retval.lines_to_delete.add(line_number)
+
+    # IWYUOutputRecord.some_include_lines
+    for line in (self.lines_by_section.get(self._REMOVE_SECTION_RE, []) +
+                 self.lines_by_section.get(self._TOTAL_SECTION_RE, [])):
+      if not _INCLUDE_RE.match(line):
+        continue
+      m = self._LINE_NUMBERS_COMMENT_RE.search(line)
+      if not m:
+        continue   # not all #include lines have line numbers, but some do
+      for line_number in range(int(m.group(1)), int(m.group(2)) + 1):
+        retval.some_include_lines.add(line_number)
+
+    # IWYUOutputRecord.seen_forward_declare_lines
+    for line in (self.lines_by_section.get(self._REMOVE_SECTION_RE, []) +
+                 self.lines_by_section.get(self._TOTAL_SECTION_RE, [])):
+      # Everything that's not an #include is a forward-declare.
+      if line.startswith('- '):    # the 'remove' lines all start with '- '.
+        line = line[len('- '):]
+      if _INCLUDE_RE.match(line):
+        continue
+      m = self._LINE_NUMBERS_COMMENT_RE.search(line)
+      if m:
+        line_range = (int(m.group(1)), int(m.group(2))+1)
+        retval.seen_forward_declare_lines.add(line_range)
+        if '::' in line:
+            retval.nested_forward_declare_lines.add(line_range)
+
+    # IWYUOutputRecord.includes_and_forward_declares_to_add
+    for line in self.lines_by_section.get(self._ADD_SECTION_RE, []):
+      line = _COMMENT_RE.sub('', line)
+      retval.includes_and_forward_declares_to_add.add(line)
+
+    # IWYUOutputRecord.full_include_lines
+    for line in self.lines_by_section.get(self._TOTAL_SECTION_RE, []):
+      m = _INCLUDE_RE.match(line)
+      if m:
+        if not flags.comments:
+          line = _COMMENT_RE.sub('', line)  # pretend there were no comments
+        else:
+          # Just remove '// line XX': that's iwyu metadata, not a real comment
+          line = self._LINE_NUMBERS_COMMENT_RE.sub('', line)
+        retval.full_include_lines[m.group(1)] = line
+
+    return retval
+
+
+class LineInfo(object):
+  """Information about a single line of a source file."""
+
+  def __init__(self, line):
+    """Initializes the content of the line, but no ancillary fields."""
+    # The content of the line in the input file
+    self.line = line
+
+    # The 'type' of the line.  The 'type' is one of the regular
+    # expression objects in _LINE_TYPES, or None for any line that
+    # does not match any regular expression in _LINE_TYPES.
+    self.type = None
+
+    # True if no lines processed before this one have the same type
+    # as this line.
+    self.is_first_line_of_this_type = False
+
+    # Set to true if we want to delete/ignore this line in the output
+    # (for instance, because iwyu says to delete this line).  At the
+    # start, the only line to delete is the 'dummy' line 0.
+    self.deleted = self.line is None
+
+    # If this line is an #include or a forward-declare, gives a
+    # [begin,end) pair saying the 'span' this line is part of.  We do
+    # this for two types of span: the move span (an #include or
+    # forward declare, along with any preceding comments) and the
+    # reorder span (a continguous block of move-spans, connected only
+    # by blank lines and comments).  For lines that are not an
+    # #include or forward-declare, these may have an arbitrary value.
+    self.move_span = None
+    self.reorder_span = None
+
+    # If this line is an #include or a forward-declare, gives the
+    # 'key' of the line.  For #includes it is the filename included,
+    # including the ""s or <>s.  For a forward-declare it's the name
+    # of the class/struct.  For other types of lines, this is None.
+    self.key = None
+
+    # If this is a forward-declaration of a nested class, then this will be
+    # True.
+    self.is_nested_forward_declaration = False
+
+  def __str__(self):
+    if self.deleted:
+      line = 'XX-%s-XX' % self.line
+    else:
+      line = '>>>%s<<<' % self.line
+    if self.type is None:
+      type_id = None
+    else:
+      type_id = _LINE_TYPES.index(self.type)
+    return ('%s\n  -- type: %s (key: %s).  move_span: %s.  reorder_span: %s'
+            % (line, type_id, self.key, self.move_span, self.reorder_span))
+
+
+class FileInfo(object):
+  """ Details about a file's storage encoding  """
+  DEFAULT_LINESEP = os.linesep
+  DEFAULT_ENCODING = 'utf-8'
+
+  def __init__(self, linesep, encoding):
+    self.linesep = linesep
+    self.encoding = encoding
+
+  @staticmethod
+  def parse(filename):
+    """ Return a FileInfo object describing file encoding details. """
+    with open(filename, 'rb') as f:
+      content = f.read()
+
+    linesep = FileInfo.guess_linesep(content)
+    encoding = FileInfo.guess_encoding(content)
+    return FileInfo(linesep, encoding)
+
+  @staticmethod
+  def guess_linesep(bytebuf):
+    """ Return most frequent line separator of buffer. """
+    win = bytebuf.count(b'\r\n')
+    unix = bytebuf.count(b'\n') - win
+    if win > unix:
+      return '\r\n'
+    elif unix > win:
+      return '\n'
+
+    return FileInfo.DEFAULT_LINESEP
+
+  @staticmethod
+  def guess_encoding(bytebuf):
+    """ Return approximate encoding for buffer.
+
+    This is heavily heuristic, and will return any supported encoding that can
+    describe the file without losing information, not necessarily the *right*
+    encoding. This is usually OK, because IWYU typically only adds ASCII 
content
+    (or content pulled from the file itself).
+    """
+    def try_decode(buf, encoding):
+      try:
+        buf.decode(encoding, errors='strict')
+      except UnicodeError:
+        return False
+      return True
+
+    # Special-case UTF-8 BOM
+    if bytebuf[0:3] == b'\xef\xbb\xbf':
+      if try_decode(bytebuf, 'utf-8-sig'):
+        return 'utf-8-sig'
+
+    encodings = ['ascii', 'utf-8', 'windows-1250', 'windows-1252']
+    for encoding in encodings:
+      if try_decode(bytebuf, encoding):
+        return encoding
+
+    return FileInfo.DEFAULT_ENCODING
+
+
+def _ReadFile(filename, fileinfo):
+  """Read from filename and return a list of file lines."""
+  try:
+    with open(filename, 'rb') as f:
+      content = f.read()
+      # Call splitlines with True to keep the original line
+      # endings.  Later in WriteFile, they will be used as-is.
+      # This will reduce spurious changes to the original files.
+      # The lines we add will have the linesep determined by
+      # FileInfo.
+      return content.decode(fileinfo.encoding).splitlines(True)
+  except (IOError, OSError) as why:
+    print("Skipping '%s': %s" % (filename, why))
+  return None
+
+
+def _WriteFile(filename, fileinfo, file_lines):
+  """Write the given file-lines to the file."""
+  try:
+    with open(filename, 'wb') as f:
+      # file_lines already have line endings, so join with ''.
+      content = ''.join(file_lines)
+      content = content.encode(fileinfo.encoding)
+      f.write(content)
+  except (IOError, OSError) as why:
+    print("Error writing '%s': %s" % (filename, why))
+
+
+def PrintFileDiff(old_file_contents, new_file_contents):
+  """Print a unified diff between files, specified as lists of lines."""
+  diff = difflib.unified_diff(old_file_contents, new_file_contents)
+  # skip the '--- <filename>/+++ <filename>' lines at the start
+  try:
+    next(diff)
+    next(diff)
+    print('\n'.join(l.rstrip() for l in diff))
+  except StopIteration:
+    pass
+
+
+def _MarkHeaderGuardIfPresent(file_lines):
+  """If any line in file_lines is a header-guard, mark it in file_lines.
+
+  We define a header-guard as follows: an #ifdef where there is
+  nothing contentful before or after the #ifdef.  Also, the #ifdef
+  should have no #elif in it (though we don't currently test that).
+  This catches the common case of an 'ifdef guard' in .h file, such
+  as '#ifndef FOO_H\n#define FOO_H\n...contents...\n#endif', but it
+  can also catch other whole-program #ifdefs, such as
+  '#ifdef __linux\n...\n#endif'.  The issue here is that if an #ifdef
+  encloses the entire file, then we are willing to put new
+  #includes/fwd-declares inside the #ifdef (which normally we
+  wouldn't do).  So we want to mark such #ifdefs with a special label.
+
+  If we find such an #ifdef line -- and a single file can have at most
+  one -- we change its type to a special type for header guards.
+
+  Arguments:
+    file_lines: an array of LineInfo objects with .type filled in.
+  """
+  # Pass over blank lines, pragmas and comments at the top of the file.
+  i = 0
+  for i in range(len(file_lines)):
+    if (not file_lines[i].deleted and
+        file_lines[i].type not in [_COMMENT_LINE_RE, _BLANK_LINE_RE,
+                                   _PRAGMA_ONCE_LINE_RE]):
+      break
+  else:     # for/else: got to EOF without finding any non-blank/comment lines
+    return
+
+  # This next line is the candidate header guard-line.
+  ifdef_start = i
+  if file_lines[ifdef_start].type != _IF_RE:
+    # Not a header guard, just return without doing anything.
+    return
+
+  # Find the end of this ifdef, to see if it's really a header guard..
+  ifdef_depth = 0
+  for ifdef_end in range(ifdef_start, len(file_lines)):
+    if file_lines[ifdef_end].deleted:
+      continue
+    if file_lines[ifdef_end].type == _IF_RE:
+      ifdef_depth += 1
+    elif file_lines[ifdef_end].type == _ENDIF_RE:
+      ifdef_depth -= 1
+      if ifdef_depth == 0:   # The end of our #ifdef!
+        break
+  else:                      # for/else
+    return False             # Weird: never found a close to this #ifdef
+
+  # Finally, all the lines after the end of the ifdef must be blank or 
comments.
+  for i in range(ifdef_end + 1, len(file_lines)):
+    if (not file_lines[i].deleted and
+        file_lines[i].type not in [_COMMENT_LINE_RE, _BLANK_LINE_RE]):
+      return
+
+  # We passed the gauntlet!
+  file_lines[ifdef_start].type = _HEADER_GUARD_RE
+
+  # And the line after the header guard #ifdef is the '#define' (usually).
+  if _HEADER_GUARD_DEFINE_RE.match(file_lines[ifdef_start + 1].line):
+    file_lines[ifdef_start+1].type = _HEADER_GUARD_DEFINE_RE
+
+
+def _CalculateLineTypesAndKeys(file_lines, iwyu_record):
+  """Fills file_line's type and key fields, where the 'type' is a regexp 
object.
+
+  We match each line (line_info.line) against every regexp in
+  _LINE_TYPES, and assign the first that matches, or None if none
+  does.  We also use iwyu_record's some_include_lines and
+  seen_forward_declare_lines to identify those lines.  In fact,
+  that's the only data source we use for forward-declare lines.
+
+  Sets file_line.type and file_line.is_first_line_of_this_type for
+  each file_line in file_lines.
+
+  Arguments:
+    file_lines: an array of LineInfo objects with .line fields filled in.
+    iwyu_record: the IWYUOutputRecord struct for this source file.
+
+  Raises:
+    FixIncludesError: if iwyu_record's line-number information is
+      is inconsistent with what we see in the file.  (For instance,
+      it says line 12 is an #include, but we say it's a blank line,
+      or the file only has 11 lines.)
+  """
+  seen_types = set()
+  in_c_style_comment = False
+  in_allman_or_mixed_namespace = False
+  for line_info in file_lines:
+    if line_info.line is None:
+      line_info.type = None
+    elif _C_COMMENT_START_RE.match(line_info.line):
+      # Note: _C_COMMENT_START_RE only matches a comment at the start
+      # of a line.  Comments in the middle of a line are ignored.
+      # This can cause problems with multi-line comments that start
+      # in the middle of the line, but that's hopefully quite rare.
+      # TODO(csilvers): check for that case.
+      m = _C_COMMENT_END_RE.match(line_info.line)
+      if not m:             # comment continues onto future lines
+        line_info.type = _COMMENT_LINE_RE
+        in_c_style_comment = True
+      elif not m.group(1):  # comment extends across entire line (only)
+        line_info.type = _COMMENT_LINE_RE
+      else:                 # comment takes only part of line, treat as content
+        # TODO(csilvers): this mis-diagnoses lines like '/*comment*/class Foo;'
+        line_info.type = None
+    elif in_c_style_comment and _C_COMMENT_END_RE.match(line_info.line):
+      line_info.type = _COMMENT_LINE_RE
+      in_c_style_comment = False
+    elif in_c_style_comment:
+      line_info.type = _COMMENT_LINE_RE
+    elif (in_allman_or_mixed_namespace and
+          _NAMESPACE_CONTINUE_ALLMAN_MIXED_RE.match(line_info.line)):
+      in_allman_or_mixed_namespace = False
+      line_info.type = _NAMESPACE_CONTINUE_ALLMAN_MIXED_RE
+    else:
+      for type_re in _LINE_TYPES:
+        # header-guard-define-re has a two-part decision criterion: it
+        # matches the RE, *and* it comes after a header guard line.
+        # That's too complex to figure out now, so we skip over it now
+        # and fix it up later in _MarkHeaderGuardIfPresent().
+        if type_re in (_HEADER_GUARD_DEFINE_RE,):
+          continue
+        m = type_re.match(line_info.line)
+        if m:
+          line_info.type = type_re
+          if type_re == _INCLUDE_RE:
+            line_info.key = m.group(1)   # get the 'key' for the #include.
+          elif type_re in (_NAMESPACE_START_ALLMAN_RE,
+                           _NAMESPACE_START_MIXED_RE):
+            # set in_allman_or_mixed_namespace to true to find the next {
+            in_allman_or_mixed_namespace = True
+          break
+      else:    # for/else
+        line_info.type = None   # means we didn't match any re
+
+    line_info.is_first_line_of_this_type = (line_info.type not in seen_types)
+    seen_types.add(line_info.type)
+
+  # Now double-check against iwyu that we got all the #include lines right.
+  for line_number in iwyu_record.some_include_lines:
+    if file_lines[line_number].type != _INCLUDE_RE:
+      raise FixIncludesError('iwyu line number %s:%d (%s) is not an #include'
+                             % (iwyu_record.filename, line_number,
+                                file_lines[line_number].line))
+
+  # We depend entirely on the iwyu_record for the forward-declare lines.
+  for (start_line, end_line) in iwyu_record.seen_forward_declare_lines:
+    for line_number in range(start_line, end_line):
+      if line_number >= len(file_lines):
+        raise FixIncludesError('iwyu line number %s:%d is past file-end'
+                               % (iwyu_record.filename, line_number))
+      file_lines[line_number].type = _FORWARD_DECLARE_RE
+
+  for (start_line, end_line) in iwyu_record.nested_forward_declare_lines:
+    for line_number in range(start_line, end_line):
+      if line_number >= len(file_lines):
+        raise FixIncludesError('iwyu line number %s:%d is past file-end'
+                               % (iwyu_record.filename, line_number))
+      file_lines[line_number].is_nested_forward_declaration = True
+
+  # While we're at it, let's do a bit more sanity checking on iwyu_record.
+  for line_number in iwyu_record.lines_to_delete:
+    if line_number >= len(file_lines):
+      raise FixIncludesError('iwyu line number %s:%d is past file-end'
+                             % (iwyu_record.filename, line_number))
+    elif file_lines[line_number].type not in (_INCLUDE_RE,
+                                              _FORWARD_DECLARE_RE):
+      raise FixIncludesError('iwyu line number %s:%d (%s) is not'
+                             ' an #include or forward declare'
+                             % (iwyu_record.filename, line_number,
+                                file_lines[line_number].line))
+
+  # Check if this file has a header guard, which for our purposes is
+  # an #ifdef (or #if) that covers an entire source file.  Usually
+  # this will be a standard .h header-guard, but it could be something
+  # like '#if __linux/#endif'.  The point here is that if an #ifdef
+  # encloses the entire file, then we are willing to put new
+  # #includes/fwd-declares inside the #ifdef (which normally we
+  # wouldn't do).  So we mark such #ifdefs with a special label.
+  _MarkHeaderGuardIfPresent(file_lines)
+
+
+def _PreviousNondeletedLine(file_lines, line_number):
+  """Returns the line number of the previous not-deleted line, or None."""
+  for line_number in range(line_number - 1, -1, -1):
+    if not file_lines[line_number].deleted:
+      return line_number
+  return None
+
+
+def _NextNondeletedLine(file_lines, line_number):
+  """Returns the line number of the next not-deleted line, or None."""
+  for line_number in range(line_number + 1, len(file_lines)):
+    if not file_lines[line_number].deleted:
+      return line_number
+  return None
+
+
+def _LineNumberStartingPrecedingComments(file_lines, line_number):
+  """Returns the line-number for the comment-lines preceding the given linenum.
+
+  Looking at file_lines, look at the lines immediately preceding the
+  given line-number.  If they're comment lines, return the first line
+  of the comment lines preceding the given line.  Otherwise, return
+  the given line number.
+
+  As a special case, if the comments go all the way up to the first
+  line of the file (line 1), we assume they're comment lines, which
+  are special -- they're not associated with any source code line --
+  and we return line_number in that case.
+
+  Arguments:
+    file_lines: an array of LineInfo objects, with .type fields filled in.
+    line_number: an index into file_lines.
+
+  Returns:
+    The first line number of the preceding comments, or line_number
+      if there are no preceding comments or they appear to be a
+      top-of-file copyright notice.
+  """
+  retval = line_number
+  while retval > 0 and file_lines[retval - 1].type == _COMMENT_LINE_RE:
+    retval -= 1
+  if retval <= 1:          # top-of-line comments
+    retval = line_number   # so ignore all the comment lines
+  return retval
+
+
+def _CalculateMoveSpans(file_lines, forward_declare_spans):
+  """Fills each input_line's move_span field.
+
+  A 'move span' is a range of lines (from file_lines) that includes
+  an #include or forward-declare, and all the comments preceding it.
+  It is the unit we would move if we decided to move (or delete) this
+  #include or forward-declare.
+
+  For lines of type _INCLUDE_RE or _FORWARD_DECLARE_RE, the move span
+  is set to the tuple [start_of_span, end_of_span).  All other lines
+  have the move span kept at None.
+
+  Arguments:
+    file_lines: an array of LineInfo objects, with .type fields filled in.
+    forward_declare_spans: a set of line-number pairs
+       [start_line, end_line), each representing a single namespace.
+       In practice this comes from iwyu_record.seen_forward_declare_lines.
+  """
+  # First let's do #includes.
+  for line_number in range(len(file_lines)):
+    if file_lines[line_number].type == _INCLUDE_RE:
+      span_begin = _LineNumberStartingPrecedingComments(file_lines, 
line_number)
+      for i in range(span_begin, line_number + 1):
+        file_lines[i].move_span = (span_begin, line_number + 1)
+
+  # Now forward-declares.  These spans come as input to this function.
+  for (span_begin, span_end) in forward_declare_spans:
+    span_begin = _LineNumberStartingPrecedingComments(file_lines, span_begin)
+    for i in range(span_begin, span_end):
+      file_lines[i].move_span = (span_begin, span_end)
+
+
+def _ContainsBarrierInclude(file_lines, line_range):
+  """Returns true iff some line in [line_range[0], line_range[1]) is 
BARRIER."""
+  for line_number in range(*line_range):
+    if (not file_lines[line_number].deleted and
+        _BARRIER_INCLUDES.search(file_lines[line_number].line)):
+      return True
+  return False
+
+
+def _LinesAreAllBlank(file_lines, start_line, end_line):
+  """Returns true iff all lines in [start_line, end_line) are blank/deleted."""
+  for line_number in range(start_line, end_line):
+    if (not file_lines[line_number].deleted and
+        file_lines[line_number].type != _BLANK_LINE_RE):
+      return False
+  return True
+
+
+def _CalculateReorderSpans(file_lines):
+  """Fills each input_line's reorder_span field.
+
+  A 'reorder span' is a range of lines (from file_lines) that only has
+  #includes and forward-declares in it (and maybe blank lines, and
+  comments associated with #includes or forward-declares).  In
+  particular, it does not include any "real code" besides #includes
+  and forward-declares: no functions, no static variable assignment,
+  no macro #defines, no nothing.  We are willing to reorder #includes
+  and namespaces freely inside a reorder span.
+
+  Calculating reorder_span is easy: they're just the union of
+  contiguous move-spans (with perhaps blank lines and comments
+  thrown in), because move-spans share the 'no actual code'
+  requirement.
+
+  There's one exception: if any move-span matches the
+  _BARRIER_INCLUDES regexp, it means that we should consider that
+  move-span to be a 'barrier': nothing should get reordered from one
+  side of that move-span to the other.  (This is used for #includes
+  that depend on other #includes being before them to function
+  properly.)  We do that by putting them into their own reorder span.
+
+  For lines of type _INCLUDE_RE or _FORWARD_DECLARE_RE, the reorder
+  span is set to the tuple [start_of_span, end_of_span).  All other
+  lines have an arbitrary value for the reorder span.
+
+  Arguments:
+    file_lines: an array of LineInfo objects with .type and .move_span
+       fields filled in.
+  """
+  # Happily, move_spans are disjoint. Just make sure they're sorted and unique.
+  move_spans = [s.move_span for s in file_lines if s.move_span is not None]
+  sorted_move_spans = sorted(set(move_spans))
+
+  i = 0
+  while i < len(sorted_move_spans):
+    reorder_span_start = sorted_move_spans[i][0]
+
+    # If we're a 'nosort' include, we're always in a reorder span of
+    # our own.  Otherwise, add in the next move span if we're
+    # connected to it only by blank lines.
+    if not _ContainsBarrierInclude(file_lines, sorted_move_spans[i]):
+      while i < len(sorted_move_spans) - 1:
+        move_span_end = sorted_move_spans[i][1]
+        next_move_span_start = sorted_move_spans[i+1][0]
+        if (_LinesAreAllBlank(file_lines, move_span_end, next_move_span_start)
+            and not _ContainsBarrierInclude(file_lines, 
sorted_move_spans[i+1])):
+          i += 1
+        else:
+          break
+    reorder_span_end = sorted_move_spans[i][1]
+    # We'll map every line in the span to the span-extent.
+    for line_number in range(reorder_span_start, reorder_span_end):
+      file_lines[line_number].reorder_span = (reorder_span_start,
+                                              reorder_span_end)
+    i += 1
+
+
+def ParseOneFile(f, iwyu_record):
+  """Given a file object, read and classify the lines of the file.
+
+  For each file that iwyu_output mentions, we return a list of LineInfo
+  objects, which is a parsed version of each line, including not only
+  its content but its 'type', its 'key', etc.
+
+  Arguments:
+    f: an iterable object returning lines from a file.
+    iwyu_record: the IWYUOutputRecord struct for this source file.
+
+  Returns:
+     An array of LineInfo objects.  The first element is always a dummy
+     element, so the first line of the file is at retval[1], matching
+     the way iwyu counts line numbers.
+  """
+  file_lines = [LineInfo(None)]
+  for line in f:
+    file_lines.append(LineInfo(line))
+  _CalculateLineTypesAndKeys(file_lines, iwyu_record)
+  _CalculateMoveSpans(file_lines, iwyu_record.seen_forward_declare_lines)
+  _CalculateReorderSpans(file_lines)
+  return file_lines
+
+
+def _DeleteEmptyNamespaces(file_lines):
+  """Delete namespaces with nothing in them.
+
+  Empty namespaces could be caused by transformations that removed
+  forward-declarations:
+        namespace foo {
+        class Myclass;
+        }
+     ->
+        namespace foo {
+        }
+  We want to get rid of the 'empty' namespace in this case.
+
+  This routine 'deletes' lines by setting their 'deleted' field to True.
+
+  Arguments:
+    file_lines: an array of LineInfo objects with .type fields filled in.
+
+  Returns:
+    The number of namespaces deleted.
+  """
+  num_namespaces_deleted = 0
+  start_line = 0
+  while start_line < len(file_lines):
+    line_info = file_lines[start_line]
+    if (line_info.deleted or
+        (line_info.type != _NAMESPACE_START_RE and
+         line_info.type != _NAMESPACE_START_ALLMAN_RE and
+         line_info.type != _NAMESPACE_START_MIXED_RE)):
+      start_line += 1
+      continue
+    if line_info.type in (_NAMESPACE_START_RE, _NAMESPACE_START_MIXED_RE):
+      # Because multiple namespaces can be on one line
+      # ("namespace foo { namespace bar { ..."), we need to count.
+      # We use the max because line may have 0 '{'s if it's a macro.
+      # TODO(csilvers): ignore { in comments.
+      namespace_depth = max(line_info.line.count('{'), 1)
+    elif line_info.type == _NAMESPACE_START_ALLMAN_RE:
+      # For Allman namespaces, keep the start line and increment
+      # the namespace depths when the actual brace is encountered.
+      namespace_depth = 0
+    else:
+      # We should have handled all the namespace styles above!
+      assert False, ('unknown namespace type',
+                     _LINE_TYPES.index(line_info.type))
+    end_line = start_line + 1
+    while end_line < len(file_lines):
+      line_info = file_lines[end_line]
+      if line_info.deleted:
+        end_line += 1
+      elif line_info.type in (_COMMENT_LINE_RE, _BLANK_LINE_RE):
+        end_line += 1                # ignore blank lines
+      elif line_info.type == _NAMESPACE_CONTINUE_ALLMAN_MIXED_RE:
+        namespace_depth += 1
+        end_line += 1
+      elif line_info.type in (_NAMESPACE_START_RE, _NAMESPACE_START_MIXED_RE):
+        # nested namespace
+        namespace_depth += max(line_info.line.count('{'), 1)
+        end_line += 1
+      elif line_info.type == _NAMESPACE_START_ALLMAN_RE:
+        # nested Allman namespace
+        end_line += 1
+      elif line_info.type == _NAMESPACE_END_RE:
+        namespace_depth -= max(line_info.line.count('}'), 1)
+        end_line += 1
+        if namespace_depth <= 0:
+          # Delete any comments preceding this namespace as well.
+          start_line = _LineNumberStartingPrecedingComments(file_lines,
+                                                            start_line)
+          # And also blank lines.
+          while (start_line > 0 and
+                 file_lines[start_line-1].type == _BLANK_LINE_RE):
+            start_line -= 1
+          for line_number in range(start_line, end_line):
+            file_lines[line_number].deleted = True
+          num_namespaces_deleted += 1
+          break
+      else:   # bail: we're at a line indicating this isn't an empty namespace
+        end_line = start_line + 1  # rewind to try again with nested namespaces
+        break
+    start_line = end_line
+
+  return num_namespaces_deleted
+
+
+def _DeleteEmptyIfdefs(file_lines):
+  """Deletes ifdefs with nothing in them.
+
+  This could be caused by transformations that removed #includes:
+        #ifdef OS_WINDOWS
+        # include <windows.h>
+        #endif
+     ->
+        #ifdef OS_WINDOWS
+        #endif
+  We want to get rid of the 'empty' #ifdef in this case.
+  We also handle 'empty' #ifdefs with #else, if both sides of
+  the #else are empty.  We also handle #ifndef and #if.
+
+  This routine 'deletes' lines by replacing their content with None.
+
+  Arguments:
+    file_lines: an array of LineInfo objects with .type fields filled in.
+
+  Returns:
+    The number of ifdefs deleted.
+  """
+  num_ifdefs_deleted = 0
+  start_line = 0
+  while start_line < len(file_lines):
+    if file_lines[start_line].type not in (_IF_RE, _HEADER_GUARD_RE):
+      start_line += 1
+      continue
+    end_line = start_line + 1
+    while end_line < len(file_lines):
+      line_info = file_lines[end_line]
+      if line_info.deleted:
+        end_line += 1
+      elif line_info.type in (_ELSE_RE, _COMMENT_LINE_RE, _BLANK_LINE_RE):
+        end_line += 1                # ignore blank lines
+      elif line_info.type == _ENDIF_RE:
+        end_line += 1
+        # Delete any comments preceding this #ifdef as well.
+        start_line = _LineNumberStartingPrecedingComments(file_lines,
+                                                          start_line)
+        # And also blank lines.
+        while (start_line > 0 and
+               file_lines[start_line-1].type == _BLANK_LINE_RE):
+          start_line -= 1
+        for line_number in range(start_line, end_line):
+          file_lines[line_number].deleted = True
+        num_ifdefs_deleted += 1
+        break
+      else:   # bail: we're at a line indicating this isn't an empty ifdef
+        end_line = start_line + 1  # rewind to try again with nested #ifdefs
+        break
+    start_line = end_line
+
+  return num_ifdefs_deleted
+
+
+def _DeleteDuplicateLines(file_lines, line_ranges):
+  """Goes through all lines in line_ranges, and if any are dups, deletes them.
+
+  For all lines in line_ranges, if any is the same as a previously
+  seen line, set its deleted bit to True.  The purpose of line_ranges
+  is to avoid lines in #ifdefs and namespaces, that may be identical
+  syntactically but have different semantics.  Ideally, line_ranges
+  should include only 'top-level' lines.
+
+  We ignore lines that consist only of comments (or are blank).  We
+  ignore end-of-line comments when comparing lines for equality.
+  NOTE: Because our comment-finding RE is primitive, it's best if
+  line_ranges covers only #include and forward-declare lines.  In
+  particular, it should not cover lines that may have C literal
+  strings in them.
+
+  We only delete whole move_spans, not lines within them.
+
+  Arguments:
+    file_lines: an array of LineInfo objects.
+    line_ranges: a list of [start_line, end_line) pairs.
+  """
+  seen_lines = set()
+  for line_range in line_ranges:
+    for line_number in range(*line_range):
+      line_info = file_lines[line_number]
+      if line_info.type in (_BLANK_LINE_RE, _COMMENT_LINE_RE):
+        continue
+      if line_number != line_info.move_span[0]:
+        continue
+      span_line_numbers = range(line_info.move_span[0], line_info.move_span[1])
+      line_infos_in_span = [file_lines[i] for i in span_line_numbers]
+      uncommented_lines = [
+          _COMMENT_RE.sub('', inf.line.strip()) for inf in line_infos_in_span]
+      uncommented_span = ' '.join(uncommented_lines)
+      if uncommented_span in seen_lines:
+        for info in line_infos_in_span:
+          info.deleted = True
+      elif not line_info.deleted:
+        seen_lines.add(uncommented_span)
+
+
+def _DeleteExtraneousBlankLines(file_lines, line_range):
+  """Deletes extraneous blank lines caused by line deletion.
+
+  Here's a example file:
+     class Foo { ... };
+
+     class Bar;
+
+     class Baz { ... }
+
+  If we delete the "class Bar;" line, we also want to delete one of
+  the blank lines around it, otherwise we leave two blank lines
+  between Foo and Baz which looks bad.  The idea is that if we have
+  whitespace on both sides of a deleted span of code, the whitespace
+  on one of the sides is 'extraneous'.  In this case, we should delete
+  not only 'class Bar;' but also the whitespace line below it.  That
+  leaves one blank line between Foo and Bar, like people would expect.
+
+  We're careful to only delete the minimum of the number of blank
+  lines that show up on either side.  If 'class Bar' had one blank
+  line before it, and one hundred after it, we'd only delete one blank
+  line when we delete 'class Bar'.  This matches user's expecatations.
+
+  The situation can get tricky when two deleted spans touch (we might
+  think it's safe to delete the whitespace between them when it's
+  not).  To be safe, we only do this check when an entire reorder-span
+  has been deleted.  So we check the given line_range, and only do
+  blank-line deletion if every line in the range is deleted.
+
+  Arguments:
+    file_lines: an array of LineInfo objects, with .type filled in.
+    line_range: a range [start_line, end_line).  It should correspond
+       to a reorder-span.
+  """
+  # First make sure the entire span is deleted.
+  for line_number in range(*line_range):
+    if not file_lines[line_number].deleted:
+      return
+
+  before_line = _PreviousNondeletedLine(file_lines, line_range[0])
+  after_line = _NextNondeletedLine(file_lines, line_range[1] - 1)
+  while (before_line and file_lines[before_line].type == _BLANK_LINE_RE and
+         after_line and file_lines[after_line].type == _BLANK_LINE_RE):
+    # OK, we've got whitespace on both sides of a deleted span.  We
+    # only want to keep whitespace on one side, so delete on the other.
+    file_lines[after_line].deleted = True
+    before_line = _PreviousNondeletedLine(file_lines, before_line)
+    after_line = _NextNondeletedLine(file_lines, after_line)
+
+
+def _ShouldInsertBlankLine(decorated_move_span, next_decorated_move_span,
+                           file_lines, flags):
+  """Returns true iff we should insert a blank line between the two spans.
+
+  Given two decorated move-spans, of the form
+     (reorder_range, kind, noncomment_lines, all_lines)
+  returns true if we should insert a blank line between them.  We
+  always put a blank line when transitioning from an #include to a
+  forward-declare and back.  When the appropriate commandline flag is
+  set, we also put a blank line between the 'main' includes (foo.h)
+  and the C/C++ system includes, and another between the system
+  includes and the rest of the Google includes.
+
+  If the two move spans are in different reorder_ranges, that means
+  the first move_span is at the end of a reorder range.  In that case,
+  a different rule for blank lines applies: if the next line is
+  contentful (eg 'static int x = 5;'), or a namespace start, we want
+  to insert a blank line to separate the move-span from the next
+  block.  When figuring out if the next line is contentful, we skip
+  over comments.
+
+  Arguments:
+    decorated_move_span: a decorated_move_span we may want to put a blank
+       line after.
+    next_decorated_move_span: the next decorated_move_span, which may
+       be a sentinel decorated_move_span at end-of-file.
+    file_lines: an array of LineInfo objects with .deleted filled in.
+    flags: commandline flags, as parsed by argparse.  We use
+       flags.blank_lines, which controls whether we put blank
+       lines between different 'kinds' of #includes.
+
+  Returns:
+     true if we should insert a blank line after decorated_move_span.
+  """
+  # First handle the 'at the end of a reorder range' case.
+  if decorated_move_span[0] != next_decorated_move_span[0]:
+    next_line = _NextNondeletedLine(file_lines, decorated_move_span[0][1] - 1)
+    # Skip over comments to figure out if the next line is contentful.
+    while (next_line and next_line < len(file_lines) and
+           file_lines[next_line].type == _COMMENT_LINE_RE):
+      next_line += 1
+    return (next_line and next_line < len(file_lines) and
+            file_lines[next_line].type in (_NAMESPACE_START_RE,
+                                           _NAMESPACE_START_ALLMAN_RE,
+                                           _NAMESPACE_START_MIXED_RE,
+                                           _PRAGMA_PUSH_LINE_RE,
+                                           None))
+
+  # We never insert a blank line between two spans of the same kind.
+  # Nor do we ever insert a blank line at EOF.
+  (this_kind, next_kind) = (decorated_move_span[1], 
next_decorated_move_span[1])
+  if this_kind == next_kind or next_kind == _EOF_KIND:
+    return False
+
+  # We also never insert a blank line between C and C++-style #includes,
+  # no matter what the flag value.
+  if (this_kind in [_C_SYSTEM_INCLUDE_KIND, _CXX_SYSTEM_INCLUDE_KIND] and
+      next_kind in [_C_SYSTEM_INCLUDE_KIND, _CXX_SYSTEM_INCLUDE_KIND]):
+    return False
+
+  # Handle the case we're going from an include to fwd declare or
+  # back.  If we get here, we can't both be fwd-declares, so it
+  # suffices to check if either of us is.
+  if this_kind == _FORWARD_DECLARE_KIND or next_kind == _FORWARD_DECLARE_KIND:
+    return True
+
+  # Now, depending on the flag, we insert a blank line whenever the
+  # kind changes (we handled the one case where a changing kind
+  # doesn't introduce a blank line, above).
+  if flags.blank_lines:
+    return this_kind != next_kind
+
+  return False
+
+
+def _GetToplevelReorderSpans(file_lines):
+  """Returns a sorted list of all reorder_spans not inside an
+  #ifdef/namespace/class.
+
+  This routine looks at all the reorder_spans in file_lines, ignores
+  reorder spans inside #ifdefs and namespaces -- except for the 'header
+  guard' ifdef that encapsulates an entire .h file -- and returns the
+  rest in sorted order.
+
+  Arguments:
+    file_lines: an array of LineInfo objects with .type and
+       .reorder_span filled in.
+
+  Returns:
+    A list of [start_line, end_line) reorder_spans.
+  """
+  in_ifdef = [False] * len(file_lines)   # lines inside an #if
+  ifdef_depth = 0
+  for line_number in range(len(file_lines)):
+    line_info = file_lines[line_number]
+    if line_info.deleted:
+      continue
+    if line_info.type == _IF_RE:  # does not cover the header-guard ifdef
+      ifdef_depth += 1
+    elif line_info.type == _ENDIF_RE:
+      ifdef_depth -= 1
+    if ifdef_depth > 0:
+      in_ifdef[line_number] = True
+
+  # Figuring out whether a } ends a namespace or some other languague
+  # construct is hard, so as soon as we see any 'contentful' line
+  # inside a namespace, we assume the entire rest of the file is in
+  # the namespace.
+  in_namespace = [False] * len(file_lines)
+  namespace_depth = 0
+  for line_number in range(len(file_lines)):
+    line_info = file_lines[line_number]
+    if line_info.deleted:
+      continue
+    if line_info.type in (_NAMESPACE_START_RE, _NAMESPACE_START_MIXED_RE):
+      # The 'max' is because the namespace-re may be a macro.
+      namespace_depth += max(line_info.line.count('{'), 1)
+    elif line_info.type == _NAMESPACE_CONTINUE_ALLMAN_MIXED_RE:
+      namespace_depth += 1
+    elif line_info.type == _NAMESPACE_END_RE:
+      namespace_depth -= max(line_info.line.count('}'), 1)
+    if namespace_depth > 0:
+      in_namespace[line_number] = True
+      if line_info.type is None:
+        for i in range(line_number, len(file_lines)):  # rest of file
+          in_namespace[i] = True
+        break
+
+  reorder_spans = [fl.reorder_span for fl in file_lines if fl.reorder_span]
+  reorder_spans = sorted(set(reorder_spans))
+  good_reorder_spans = []
+  for reorder_span in reorder_spans:
+    for line_number in range(*reorder_span):
+      if (in_ifdef[line_number] or in_namespace[line_number] or
+          file_lines[line_number].is_nested_forward_declaration):
+        break
+    else:   # for/else
+      good_reorder_spans.append(reorder_span)    # never in ifdef or namespace
+
+  return good_reorder_spans
+
+
+def _GetNamespaceLevelReorderSpans(file_lines):
+  """Returns a list of reorder-spans inside namespaces, if it's easy to do.
+
+  This routine is meant to handle the simple case where code consists
+  of includes and forward-declares, and then a 'namespace
+  my_namespace'.  We return the reorder spans of the inside-namespace
+  forward-declares, which is a good place to insert new
+  inside-namespace forward-declares (rather than putting these new
+  forward-declares at the top level).
+
+  So it goes through the top of the file, stopping at the first
+  'contentful' line.  If that line has the form 'namespace <foo> {',
+  it then continues until it finds a forward-declare line, or a
+  non-namespace contentful line.  In the former case, it figures out
+  the reorder-span this forward-declare line is part of, while in the
+  latter case it creates a new reorder-span.  A list of these namespace
+  reorder spans are returned so they can all be checked.  These elements
+  are in the form (enclosing_namespace, reorder_span).
+
+  Arguments:
+    file_lines: an array of LineInfo objects with .type and
+    .reorder_span filled in.
+
+  Returns:
+    [] if we could not find any namespace-level reorder-spans, or
+    [(enclosing_namespace, reorder_span), ...], where enclosing_namespace
+    is a string that looks like (for instance)
+    'namespace ns1 { namespace ns2 {', and reorder-span is a
+    [start_line, end_line) pair.
+  """
+
+  def _GetNamespaceNames(namespace_line):
+    """Returns a list of namespace names given a namespace line.  Anonymous
+    namespaces will return an empty string
+    """
+    namespace_re = re.compile(r'\s*namespace\b(.*)')
+    namespaces = []
+    namespace_line = namespace_line.split("/")[0] # remove C++ comments
+    namespace_line = namespace_line.split("{") # extract all namespaces
+    for namespace in namespace_line:
+      m = namespace_re.match(namespace)
+      if m:
+        namespaces.append(m.group(1).strip())
+
+    return namespaces
+
+  namespace_reorder_spans = {}
+  try:
+    namespace_prefixes = []
+    pending_namespace_prefix = ''
+    ifdef_depth = 0
+
+    for line_number, line_info in enumerate(file_lines):
+      if line_info.deleted:
+        continue
+
+      # If we're an empty line, just ignore us.  Likewise with #include
+      # lines, which aren't 'contentful' for our purposes, and the
+      # header guard, which is (by definition) the only kind of #ifdef
+      # that we can be inside and still considered at the "top level".
+      if line_info.type in (_COMMENT_LINE_RE,
+                            _BLANK_LINE_RE,
+                            _INCLUDE_RE,
+                            _HEADER_GUARD_RE,
+                            _HEADER_GUARD_DEFINE_RE,
+                            _PRAGMA_ONCE_LINE_RE):
+        continue
+
+      # If we're a 'contentful' line such as a (non-header-guard) #ifdef, add
+      # to the ifdef depth.  If we encounter #endif, reduce the ifdef depth.
+      # Only keep track of namespaces when ifdef depth is 0
+      elif line_info.type == _IF_RE:
+        ifdef_depth += 1
+
+      elif line_info.type == _ELSE_RE:
+        continue
+
+      elif line_info.type == _ENDIF_RE:
+        ifdef_depth -= 1
+
+      elif ifdef_depth != 0:
+        continue # skip lines until we're outside of an ifdef block
+
+      # Build the simplified namespace dictionary.  When any new namespace is
+      # encountered, add the namespace to the list using the next line to cover
+      # namespaces without forward declarations.  When a forward declare is
+      # found, update the dictionary using the existing namespace span that the
+      # forward declare contains.  Once a contentful line (None) has been found
+      # or any exception occurs, return the results that have been found.  Any
+      # forward declare that wasn't able to have a proper namespace name found
+      # will still propagate to the top of the file.
+      elif line_info.type == _NAMESPACE_START_RE:
+        for namespace in _GetNamespaceNames(line_info.line):
+          if not namespace:
+            namespace_prefixes.append('namespace {')
+          else:
+            namespace_prefixes.append('namespace %s {' % namespace)
+
+        namespace_reorder_spans[' '.join(namespace_prefixes)] = (
+          line_number+1, line_number+1)
+
+      elif line_info.type == _NAMESPACE_START_ALLMAN_RE:
+        pending_namespace_prefix = ''
+        namespaces = _GetNamespaceNames(line_info.line)
+        if len(namespaces) != 1:
+          raise FixIncludesError('Allman namespace found containing multiple '
+                                 'names: %s', line_info.line)
+        for namespace in namespaces:
+          if not namespace:
+            pending_namespace_prefix += 'namespace'
+          else:
+            pending_namespace_prefix += 'namespace %s' % namespace
+
+      elif line_info.type == _NAMESPACE_START_MIXED_RE:
+        # For mixed namespace styles, we need to append normalized prefixes
+        # using regular and Allman style.  Treat the first elements as
+        # normal and only treat the final element as Allman.  By the
+        # nature of mixed namespaces, there will always be more than
+        # one namespace so it is okay to assume that _GetNamespaceNames
+        # will always return multiple records.
+        pending_namespace_prefix = ''
+        namespaces = _GetNamespaceNames(line_info.line)
+        for namespace in namespaces[:-1]:
+          if not namespace:
+            namespace_prefixes.append('namespace {')
+          else:
+            namespace_prefixes.append('namespace %s {' % namespace)
+
+        if not namespaces[-1]:
+          pending_namespace_prefix += 'namespace'
+        else:
+          pending_namespace_prefix += 'namespace %s' % namespaces[-1]
+
+      elif line_info.type == _NAMESPACE_CONTINUE_ALLMAN_MIXED_RE:
+        # Append to the simplified allman namespace.
+        if pending_namespace_prefix == '':
+          raise FixIncludesError('Namespace bracket found without an 
associated '
+                                 'namespace name at line: %s', line_number)
+        pending_namespace_prefix += ' {'
+        namespace_prefixes.append(pending_namespace_prefix)
+        namespace_reorder_spans[' '.join(namespace_prefixes)] = (
+          line_number+1, line_number+1)
+
+      elif line_info.type == _NAMESPACE_END_RE:
+        # Remove C++ comments and count the ending brackets.
+        namespace_end_count = line_info.line.split("/")[0].count("}")
+        namespace_prefixes = namespace_prefixes[:-namespace_end_count]
+
+      elif line_info.type == _FORWARD_DECLARE_RE:
+        # If we're not in a namespace, keep going.  Otherwise, this is
+        # just the situation we're looking for!  Update the dictionary
+        # with the better reorder span
+        if len(namespace_prefixes) > 0:
+          namespace_reorder_spans[' '.join(namespace_prefixes)] = (
+            line_info.reorder_span)
+
+      elif line_info.type == None:
+        break
+
+      else:
+        # We should have handled all the cases above!
+        assert False, ('unknown line-info type',
+                       _LINE_TYPES.index(line_info.type))
+  except Exception as why:
+    # Namespace detection could be tricky so take what we have and return.
+    print('DEBUG: Namespace detection returned prematurely because of an '
+          'exception: %s' % (why))
+    pass
+
+  # return a reverse sorted list so longest matches are checked first
+  return sorted(namespace_reorder_spans.items(), reverse=True)
+
+
+# These are potential 'kind' arguments to _FirstReorderSpanWith.
+_MAIN_CU_INCLUDE_KIND = 1         # e.g. #include "foo.h" when editing foo.cc
+_C_SYSTEM_INCLUDE_KIND = 2        # e.g. #include <stdio.h>
+_CXX_SYSTEM_INCLUDE_KIND = 3      # e.g. #include <vector>
+_NONSYSTEM_INCLUDE_KIND = 4       # e.g. #include "bar.h"
+_PROJECT_INCLUDE_KIND = 5         # e.g. #include "myproject/quux.h"
+_FORWARD_DECLARE_KIND = 6         # e.g. class Baz;
+_EOF_KIND = 7                     # used at eof
+
+# The span kinds are defined in default sort order, so generate a default
+# identity mapping.
+SORT_ORDER_DEFAULT = {
+  kind: kind for kind in range(_MAIN_CU_INCLUDE_KIND, _EOF_KIND + 1)
+}
+
+# In quoted-first mode, we sort all quoted kinds before system kinds.
+SORT_ORDER_QUOTED_FIRST = {
+  _MAIN_CU_INCLUDE_KIND: 1,
+  _NONSYSTEM_INCLUDE_KIND: 2,
+  _PROJECT_INCLUDE_KIND: 3,
+  _C_SYSTEM_INCLUDE_KIND: 4,
+  _CXX_SYSTEM_INCLUDE_KIND: 5,
+  _FORWARD_DECLARE_KIND: 6,
+  _EOF_KIND: 7,
+}
+
+def _IsSystemInclude(line_info):
+  """Given a line-info, return true iff the line is a <>-style #include."""
+  # The key for #includes includes the <> or "", so this is easy. :-)
+  return line_info.type == _INCLUDE_RE and line_info.key[0] == '<'
+
+
+def _IsMainCUInclude(line_info, filename):
+  """Given a line-info, return true iff the line is a 'main-CU' #include line.
+
+  A 'main-CU' #include line is one that is related to the file being edited.
+  For instance, if we are editing foo.cc, foo.h is a main-CU #include, as
+  is foo-inl.h.  The same holds if we are editing foo_test.cc.
+
+  The algorithm is like so: first, canonicalize the includee by removing the
+  following suffixes:
+     -inl.h  .h
+
+  Then canonicalize the includer by removing file extension and then the
+  following suffixes:
+     _unittest  _regtest  _test
+
+  Rule 1: If the canonical names (filenames after removal) match --
+  including all directories -- the .h file is a main-cu #include.
+
+  Rule 2: If the basenames of the canonnical names match -- that is,
+  ignoring all directories -- the .h file is a main-cu #include *if*
+  it is the first #include seen.
+
+  Arguments:
+    line_info: a LineInfo structure with .type,
+       .is_first_line_of_this_type, and .key filled in.
+    filename: the name of the file being edited.
+
+  Returns:
+    True if line_info is an #include of a main_CU file, False else.
+  """
+  if line_info.type != _INCLUDE_RE or _IsSystemInclude(line_info):
+    return False
+  if _IWYU_PRAGMA_ASSOCIATED_RE.search(line_info.line):
+    return True
+  # First, normalize the includee by getting rid of -inl.h and .h
+  # suffixes (for the #include) and the "'s around the #include line.
+  canonical_include = re.sub(r'(-inl\.h|\.h|\.hpp)$', '',
+                             line_info.key.replace('"', ''), flags=re.I)
+  # Then normalize includer by stripping extension and Google's test suffixes.
+  canonical_file, _ = os.path.splitext(filename)
+  canonical_file = re.sub(r'(_unittest|_regtest|_test)$', '', canonical_file)
+  # .h files in /public/ match .cc files in /internal/.
+  canonical_include2 = re.sub(r'/public/', '/internal/', canonical_include)
+
+  # Rule 1:
+  if canonical_file in (canonical_include, canonical_include2):
+    return True
+  # Rule 2:
+  if (line_info.is_first_line_of_this_type and
+      os.path.basename(canonical_file) == os.path.basename(canonical_include)):
+    return True
+
+  return False
+
+
+def _GetPathRoot(path):
+  """ Return the root of a path, i.e. the first path component.
+  We allow / as an alternative path separator on Windows because it helps with
+  testing and forward slashes are common even on Windows in portable codebases.
+  """
+  first_sep = path.find(os.path.sep)
+  if os.path.sep != '/' and first_sep == -1:
+    first_sep = path.find('/')
+
+  if first_sep == -1:
+    return None
+
+  return path[0:first_sep]
+
+
+def _IsSameProject(line_info, edited_file, project):
+  """Return true if included file and edited file are in the same project.
+
+  An included_file is in project 'project' if the project is a prefix of the
+  included_file.  'project' should end with /.
+
+  As a special case, if project is '<tld>', then the project is defined to
+  be the top-level directory of edited_file.
+
+  Arguments:
+    line_info: a LineInfo structure with .key containing the file that is
+      being included.
+    edited_file: the name of the file being edited.
+    project: if '<tld>', set the project path to be the top-level directory
+      name of the file being edited.  If not '<tld>', this value is used to
+      specify the project directory.
+
+  Returns:
+    True if line_info and filename belong in the same project, False otherwise.
+  """
+  included_file = line_info.key[1:]
+  if project != '<tld>':
+    return included_file.startswith(project)
+  included_root = _GetPathRoot(included_file)
+  edited_root = _GetPathRoot(edited_file)
+  return (included_root and edited_root and included_root == edited_root)
+
+
+def _GetLineKind(file_line, filename, separate_project_includes):
+  """Given a file_line + file being edited, return best *_KIND value or 
None."""
+  line_without_coments = _COMMENT_RE.sub('', file_line.line)
+  if file_line.deleted:
+    return None
+  elif _IsMainCUInclude(file_line, filename):
+    return _MAIN_CU_INCLUDE_KIND
+  elif _IsSystemInclude(file_line) and '.' in line_without_coments:
+    return _C_SYSTEM_INCLUDE_KIND
+  elif _IsSystemInclude(file_line):
+    return _CXX_SYSTEM_INCLUDE_KIND
+  elif file_line.type == _INCLUDE_RE:
+    if (separate_project_includes and
+        _IsSameProject(file_line, filename, separate_project_includes)):
+      return _PROJECT_INCLUDE_KIND
+    return _NONSYSTEM_INCLUDE_KIND
+  elif file_line.type == _FORWARD_DECLARE_RE:
+    return _FORWARD_DECLARE_KIND
+  else:
+    return None
+
+
+def _FirstReorderSpanWith(file_lines, good_reorder_spans, kind, filename,
+                          flags):
+  """Returns [start_line,end_line) of 1st reorder_span with a line of kind 
kind.
+
+  This function iterates over all the reorder_spans in file_lines, and
+  calculates the first one that has a line of the given kind in it.
+  If no such reorder span is found, it takes the last span of 'lower'
+  kinds (main-cu kind is lowest, forward-declare is highest).  If no
+  such reorder span is found, it takes the first span of 'higher'
+  kind, but not considering the forward-declare kind (we don't want to
+  put an #include with the first forward-declare, because it may be
+  inside a class or something weird).  If there's *still* no match, we
+  return the first line past leading comments, whitespace, and #ifdef
+  guard lines.  If there's *still* no match, we just insert at
+  end-of-file.
+
+  As a special case, we never return a span for forward-declares that is
+  after 'contentful' code, even if other forward-declares are there.
+  For instance:
+     using Foo::Bar;
+     class Bang;
+  We want to make sure to put 'namespace Foo { class Bar; }'
+  *before* the using line!
+
+  kind is one of the following enums, with examples:
+     _MAIN_CU_INCLUDE_KIND:    #include "foo.h" when editing foo.cc
+     _C_SYSTEM_INCLUDE_KIND:   #include <stdio.h>
+     _CXX_SYSTEM_INCLUDE_KIND: #include <vector>
+     _NONSYSTEM_INCLUDE_KIND:  #include "bar.h"
+     _PROJECT_INCLUDE_KIND:    #include "myproject/quux.h"
+     _FORWARD_DECLARE_KIND:    class Baz;
+
+  Arguments:
+    file_lines: an array of LineInfo objects with .type and
+       .reorder_span filled in.
+    good_reorder_spans: a sorted list of reorder_spans to consider
+       (should not include reorder_spans inside #ifdefs or
+       namespaces).
+    kind: one of *_KIND values.
+    filename: the name of the file that file_lines comes from.
+       This is passed to _GetLineKind (are we a main-CU #include?)
+    flags: commandline flags, as parsed by argparse.  We use
+       flags.separate_project_includes to sort the #includes for the
+       current project separately from other #includes.
+
+  Returns:
+    A pair of line numbers, [start_line, end_line), that is the 'best'
+    reorder_span in file_lines for the given kind.
+  """
+  assert kind in (_MAIN_CU_INCLUDE_KIND, _C_SYSTEM_INCLUDE_KIND,
+                  _CXX_SYSTEM_INCLUDE_KIND, _NONSYSTEM_INCLUDE_KIND,
+                  _PROJECT_INCLUDE_KIND, _FORWARD_DECLARE_KIND), kind
+  # Figure out where the first 'contentful' line is (after the first
+  # 'good' span, so we skip past header guards and the like).  Basically,
+  # the first contentful line is a line not in any reorder span.
+  for i in range(len(good_reorder_spans) - 1):
+    if good_reorder_spans[i][1] != good_reorder_spans[i+1][0]:
+      first_contentful_line = good_reorder_spans[i][1]
+      break
+  else:     # got to the end of the file without finding a break in the spans
+    if good_reorder_spans:
+      first_contentful_line = good_reorder_spans[-1][1]
+    else:
+      first_contentful_line = 0
+
+  # Let's just find the first and last span for each kind.
+  first_reorder_spans = {}
+  last_reorder_spans = {}
+  for reorder_span in good_reorder_spans:
+    for line_number in range(*reorder_span):
+      line_kind = _GetLineKind(file_lines[line_number], filename,
+                               flags.separate_project_includes)
+      # Ignore forward-declares that come after 'contentful' code; we
+      # never want to insert new forward-declares there.
+      if (line_kind == _FORWARD_DECLARE_KIND and
+          line_number > first_contentful_line):
+        continue
+      if line_kind is not None:
+        first_reorder_spans.setdefault(line_kind, reorder_span)
+        last_reorder_spans[line_kind] = reorder_span
+
+  # Find the first span of our kind.
+  if kind in first_reorder_spans:
+    return first_reorder_spans[kind]
+
+  # Second choice: last span of the kinds above us:
+  for backup_kind in range(kind - 1, _MAIN_CU_INCLUDE_KIND - 1, -1):
+    if backup_kind in last_reorder_spans:
+      return last_reorder_spans[backup_kind]
+
+  # Third choice: first span of the kinds below us, but not counting
+  # _FORWARD_DECLARE_KIND.
+  for backup_kind in range(kind + 1, _FORWARD_DECLARE_KIND):
+    if backup_kind in first_reorder_spans:
+      return first_reorder_spans[backup_kind]
+
+  # There are no reorder-spans at all, or they are only
+  # _FORWARD_DECLARE spans.  Return the first line past the leading
+  # comments, whitespace, and #ifdef guard lines, or the beginning
+  # of the _FORWARD_DECLARE span, whichever is smaller.
+  line_number = 0
+  seen_header_guard = False
+  while line_number < len(file_lines):
+    if file_lines[line_number].deleted:
+      line_number += 1
+    elif file_lines[line_number].type == _HEADER_GUARD_RE:
+      seen_header_guard = True
+      line_number += 2    # skip over the header guard
+    elif file_lines[line_number].type == _BLANK_LINE_RE:
+      line_number += 1
+    elif file_lines[line_number].type == _PRAGMA_ONCE_LINE_RE:
+      seen_header_guard = True
+      line_number += 1
+    elif (file_lines[line_number].type == _COMMENT_LINE_RE
+          and not seen_header_guard):
+      # We put #includes after top-of-file comments.  But comments
+      # inside the header guard are no longer top-of-file comments;
+      # #includes go before them.
+      line_number += 1
+    else:
+      # If the "first line" we would return is inside the forward-declare
+      # reorder span, just return that span, rather than creating a new
+      # span inside the existing one.
+      if first_reorder_spans:
+        assert list(first_reorder_spans.keys()) == [_FORWARD_DECLARE_KIND], \
+            first_reorder_spans
+        if line_number >= first_reorder_spans[_FORWARD_DECLARE_KIND][0]:
+          return first_reorder_spans[_FORWARD_DECLARE_KIND]
+      return (line_number, line_number)
+
+  # OK, I guess just insert at the end of the file
+  return (len(file_lines), len(file_lines))
+
+
+def _RemoveNamespacePrefix(fwd_decl_iwyu_line, namespace_prefix):
+  """Return a version of the input line with namespace_prefix removed, or None.
+
+  If fwd_decl_iwyu_line is
+     namespace ns1 { namespace ns2 { namespace ns3 { foo } } }
+  and namespace_prefix = 'namespace ns1 { namespace ns2 {', then
+  this function returns 'namespace ns3 { foo }'.  It removes the
+  namespace_prefix, and any } }'s at the end of the line.  If line
+  does not fit this form, then this function returns None.
+
+  Arguments:
+    line: a line from iwyu about a forward-declare line to add
+    namespace_prefix: a non-empty string of the form
+      namespace <ns1> { namespace <ns2> { [...]
+
+  Returns:
+    A version of the input line with the namespaces in namespace
+    prefix removed, or None if this is not possible because the input
+    line is not of the right form.
+  """
+  assert namespace_prefix, "_RemoveNamespaces requires a non-empty prefix"
+  if not fwd_decl_iwyu_line.startswith(namespace_prefix):
+    return None
+
+  # Remove the prefix
+  fwd_decl_iwyu_line = fwd_decl_iwyu_line[len(namespace_prefix):].lstrip()
+
+  # Remove the matching trailing }'s, preserving comments.
+  num_braces = namespace_prefix.count('{')
+  ending_braces_re = re.compile(r'(\s*\}){%d}\s*$' % num_braces)
+  m = ending_braces_re.search(fwd_decl_iwyu_line)
+  if not m:
+    return None
+  fwd_decl_iwyu_line = fwd_decl_iwyu_line[:m.start(0)]
+
+  return fwd_decl_iwyu_line
+
+
+def _DecoratedMoveSpanLines(iwyu_record, file_lines, move_span_lines, flags):
+  """Given a span of lines from file_lines, returns a "decorated" result.
+
+  First, we construct the actual contents of the move-span, as a list
+  of strings (one per line).  If we see an #include in the move_span,
+  we replace its comments with the ones in iwyu_record, if present
+  (iwyu_record will never have any comments if flags.comments is
+  False).
+
+  Second, we construct a string, of the 'contentful' part of the
+  move_span -- that is, without the leading comments -- with
+  whitespace removed, and a few other changes made.  This is used for
+  sorting (we remove whitespace so '# include <foo>' compares properly
+  against '#include <bar>').
+
+  Third, we figure out the 'kind' of this span: system include,
+  main-cu include, etc.
+
+  We return all of these together in a tuple, along with the
+  reorder-span this move span is inside.  We pick the best
+  reorder-span if one isn't already present (because it's an
+  #include we're adding in, for instance.)  This allows us to sort
+  all the moveable content.
+
+  Arguments:
+    iwyu_record: the IWYUOutputRecord struct for this source file.
+    file_lines: a list of LineInfo objects holding the parsed output of
+      the file in iwyu_record.filename
+    move_span_lines: A list of LineInfo objects.  For #includes and
+      forward-declares already in the file, this will be a sub-list
+      of file_lines.  For #includes and forward-declares we're adding
+      in, it will be a newly created list.
+    flags: commandline flags, as parsed by argparse.  We use
+      flags.separate_project_includes to sort the #includes for the
+      current project separately from other #includes.
+
+  Returns:
+    A tuple (reorder_span, kind, sort_key, all_lines_as_list)
+    sort_key is the 'contentful' part of the move_span, which whitespace
+      removed, and -inl.h changed to _inl.h (so it sorts later).
+    all_lines_as_list is a list of strings, not of LineInfo objects.
+    Returns None if the move-span has been deleted, or for some other
+      reason lacks an #include or forward-declare line.
+  """
+  # Get to the first contentful line.
+  for i in range(len(move_span_lines)):
+    if (not move_span_lines[i].deleted and
+        move_span_lines[i].type in (_INCLUDE_RE, _FORWARD_DECLARE_RE)):
+      first_contentful_line = i
+      break
+  else:       # for/else
+    # No include or forward-declare line seen, must be a deleted span.
+    return None
+
+  firstline = move_span_lines[first_contentful_line]
+  m = _INCLUDE_RE.match(firstline.line)
+  if m:
+    # If we're an #include, the contentful lines are easy.  But we have
+    # to do the comment-replacing first.
+    sort_key = firstline.line
+    iwyu_version = iwyu_record.full_include_lines.get(m.group(1), '')
+    if _COMMENT_LINE_RE.search(iwyu_version):  # the iwyu version has comments
+      sort_key = iwyu_version                  # replace the comments
+    all_lines = ([li.line for li in move_span_lines[:-1] if not li.deleted] +
+                 [sort_key])
+  else:
+    # We're a forward-declare.  Also easy.
+    contentful_list = [li.line for li in 
move_span_lines[first_contentful_line:]
+                       if not li.deleted]
+    sort_key = ''.join(contentful_list)
+    all_lines = [li.line for li in move_span_lines if not li.deleted]
+
+  # Get rid of whitespace in the contentful_lines
+  sort_key = re.sub(r'\s+', '', sort_key)
+  # Replace -inl.h with _inl.h so foo-inl.h sorts after foo.h in #includes.
+  sort_key = sort_key.replace('-inl.h', '_inl.h')
+
+  # Next figure out the kind.
+  kind = _GetLineKind(firstline, iwyu_record.filename,
+                      flags.separate_project_includes)
+
+  # All we're left to do is the reorder-span we're in.  Hopefully it's easy.
+  reorder_span = firstline.reorder_span
+  if reorder_span is None:     # must be a new #include we're adding
+    # If we're a forward-declare inside a namespace, see if there's a
+    # reorder span inside the same namespace we can fit into.
+    if kind == _FORWARD_DECLARE_KIND:
+      namespace_reorder_spans = _GetNamespaceLevelReorderSpans(file_lines)
+      for namespace_prefix, possible_reorder_span in namespace_reorder_spans:
+        if (namespace_prefix and possible_reorder_span and
+            firstline.line.startswith(namespace_prefix)):
+          # Great, we can go into this reorder_span.  We also need to
+          # modify all-lines because this line doesn't need the
+          # namespace prefix anymore.  Make sure we can do that before
+          # succeeding.
+          new_firstline = _RemoveNamespacePrefix(firstline.line, 
namespace_prefix)
+          if new_firstline:
+            assert all_lines[first_contentful_line] == firstline.line
+            all_lines[first_contentful_line] = new_firstline
+            sort_key = re.sub(r'\s+', '', new_firstline)
+            reorder_span = possible_reorder_span
+            break
+
+    # If that didn't work out, find a top-level reorder span to go into.
+    if reorder_span is None:
+      # TODO(csilvers): could make this more efficient by storing, per-kind.
+      toplevel_reorder_spans = _GetToplevelReorderSpans(file_lines)
+      reorder_span = _FirstReorderSpanWith(file_lines, toplevel_reorder_spans,
+                                           kind, iwyu_record.filename, flags)
+
+  return (reorder_span, kind, sort_key, all_lines)
+
+
+def _CommonPrefixLength(a, b):
+  """Given two lists, returns the index of 1st element not common to both."""
+  end = min(len(a), len(b))
+  for i in range(end):
+    if a[i] != b[i]:
+      return i
+  return end
+
+
+def _NormalizeNamespaceForwardDeclareLines(lines):
+  """'Normalize' namespace lines in a list of output lines and return new list.
+
+  When suggesting new forward-declares to insert, iwyu uses the following
+  format, putting each class on its own line with all namespaces:
+     namespace foo { namespace bar { class A; } }
+     namespace foo { namespace bar { class B; } }
+     namespace foo { namespace bang { class C; } }
+  We convert this to 'normalized' form, which puts namespaces on their
+  own line and collects classes together:
+     namespace foo {
+     namespace bar {
+     class A;
+     class B;
+     }  // namespace bar
+     namespace bang {
+     class C;
+     }  // namespace bang
+     }  // namespace foo
+
+  Non-namespace lines are left alone.  Only adjacent namespace lines
+  from the input are merged.
+
+  Arguments:
+    lines: a list of output-lines -- that is, lines that are ready to
+       be emitted as-is to the output file.
+
+  Returns:
+    A new version of lines, with namespace lines normalized as above.
+  """
+  # iwyu input is very regular, which is nice.
+  iwyu_namespace_re = re.compile(r'namespace ([^{]*) { ')
+  iwyu_classname_re = re.compile(r'{ ([^{}]*) }')
+
+  retval = []
+  current_namespaces = []
+  # We append a blank line so the final namespace-closing happens 
"organically".
+  for line in lines + ['']:
+    namespaces_in_line = iwyu_namespace_re.findall(line)
+    differ_pos = _CommonPrefixLength(namespaces_in_line, current_namespaces)
+    namespaces_to_close = reversed(current_namespaces[differ_pos:])
+    namespaces_to_open = namespaces_in_line[differ_pos:]
+    retval.extend('}  // namespace %s' % ns for ns in namespaces_to_close)
+    retval.extend('namespace %s {' % ns for ns in namespaces_to_open)
+    current_namespaces = namespaces_in_line
+    # Now add the current line.  If we were a namespace line, it's the
+    # 'class' part of the line (everything but the 'namespace {'s).
+    if namespaces_in_line:
+      m = iwyu_classname_re.search(line)
+      if not m:
+        raise FixIncludesError('Malformed namespace line from iwyu: %s', line)
+      retval.append(m.group(1))
+    else:
+      retval.append(line)
+
+  assert retval and retval[-1] == '', 'What happened to our sentinel line?'
+  return retval[:-1]
+
+
+def _DeleteLinesAccordingToIwyu(iwyu_record, file_lines):
+  """Deletes all lines that iwyu_record tells us to, and cleans up after."""
+  for line_number in iwyu_record.lines_to_delete:
+    # Delete the entire move-span (us and our preceding comments).
+    for i in range(*file_lines[line_number].move_span):
+      file_lines[i].deleted = True
+
+  while True:
+    num_deletes = _DeleteEmptyNamespaces(file_lines)
+    num_deletes += _DeleteEmptyIfdefs(file_lines)
+    if num_deletes == 0:
+      break
+
+  # Also delete any duplicate lines in the input.  To avoid trouble
+  # (accidentally deleting inside an #ifdef, for instance), we only
+  # check 'top-level' #includes and forward-declares.
+  toplevel_reorder_spans = _GetToplevelReorderSpans(file_lines)
+  _DeleteDuplicateLines(file_lines, toplevel_reorder_spans)
+
+  # If a whole reorder span was deleted, check if it has extra
+  # whitespace on both sides that we could trim.  We've already
+  # deleted extra blank lines inside #ifdefs and namespaces,
+  # so looking at toplevel spans is enough.
+  for reorder_span in toplevel_reorder_spans:
+    _DeleteExtraneousBlankLines(file_lines, reorder_span)
+
+
+def _GetSymbolNameFromForwardDeclareLine(line):
+  """Given a forward declare line to add from iwyu output, get symbol.
+
+  Two possibilities: In or not in namespace(s).
+  If in namespaces, then return foo::bar::sym.
+  Else just sym.
+  """
+  iwyu_namespace_re = re.compile(r'namespace ([^{]*) { ')
+  symbolname_re = re.compile(r'([A-Za-z0-9_]+)')
+  # Turn anonymous namespaces into their proper symbol representation.
+  namespaces_in_line = iwyu_namespace_re.findall(line.replace(
+    "namespace {", "namespace (anonymous namespace) {"))
+  symbols_in_line = symbolname_re.findall(line)
+  symbol_name = symbols_in_line[-1]
+  if (namespaces_in_line):
+    symbol_name = '::'.join(namespaces_in_line) + '::' + symbol_name
+  return symbol_name
+
+
+def GetLineSortOrdinal(kind, quoted_includes_first):
+  if quoted_includes_first:
+    return SORT_ORDER_QUOTED_FIRST[kind]
+  else:
+    return SORT_ORDER_DEFAULT[kind]
+
+
+def FixFileLines(iwyu_record, file_lines, flags, fileinfo):
+  """Applies one block of lines from the iwyu output script.
+
+  Called once we have read all the lines from the iwyu output script
+  pertaining to a single source file, and parsed them into an
+  iwyu_record.  At that point we edit the source file, remove the old
+  #includes and forward-declares, insert the #includes and
+  forward-declares, and reorder the lot, all as specified by the iwyu
+  output script.  The resulting source code lines are returned.
+
+  Arguments:
+    iwyu_record: an IWYUOutputRecord object holding the parsed output
+      of the include-what-you-use script (run at verbose level 1 or
+      higher) pertaining to a single source file.
+    file_lines: a list of LineInfo objects holding the parsed output of
+      the file in iwyu_record.filename
+    flags: commandline flags, as parsed by argparse.  We use
+       flags.safe_headers to turn off deleting lines, and use the
+       other flags indirectly (via calls to other routines).
+    fileinfo: FileInfo for the current file.
+
+  Returns:
+    An array of 'fixed' source code lines, after modifications as
+    specified by iwyu.
+  """
+  # First delete the includes and forward-declares that we should delete.
+  # This is easy since iwyu tells us the line numbers.
+  if not (flags.safe_headers and _MayBeHeaderFile(iwyu_record.filename)):
+    _DeleteLinesAccordingToIwyu(iwyu_record, file_lines)
+
+  # With these deletions, we may be able to merge together some
+  # reorder-spans.  Recalculate them to see.
+  _CalculateReorderSpans(file_lines)
+
+  # For every move-span in our file -- that's every #include and
+  # forward-declare we saw -- 'decorate' the move-range to allow us
+  # to sort them.
+  move_spans = OrderedSet([fl.move_span for fl in file_lines if fl.move_span])
+  decorated_move_spans = []
+  for (start_line, end_line) in move_spans:
+    decorated_span = _DecoratedMoveSpanLines(iwyu_record, file_lines,
+                                             file_lines[start_line:end_line],
+                                             flags)
+    if decorated_span:
+      decorated_move_spans.append(decorated_span)
+
+  # Now let's add in a decorated move-span for all the new #includes
+  # and forward-declares.
+  symbol_names_seen = set()
+  for line in iwyu_record.includes_and_forward_declares_to_add:
+    line_info = LineInfo(line)
+    m = _INCLUDE_RE.match(line)
+    if m:
+      line_info.type = _INCLUDE_RE
+      line_info.key = m.group(1)
+    else:
+      # Avoid duplicates that can arise if different template args
+      # were suggested by different iwyu analyses for this file.
+      symbol_name = _GetSymbolNameFromForwardDeclareLine(line)
+      if symbol_name in symbol_names_seen:
+        continue
+      symbol_names_seen.add(symbol_name)
+      line_info.type = _FORWARD_DECLARE_RE
+    decorated_span = _DecoratedMoveSpanLines(iwyu_record, file_lines,
+                                             [line_info], flags)
+    assert decorated_span, 'line to add is not an #include or fwd-decl?'
+    decorated_move_spans.append(decorated_span)
+
+  # Add a sentinel decorated move-span, to make life easy, and sort.
+  decorated_move_spans.append(((len(file_lines), len(file_lines)),
+                               _EOF_KIND, '', []))
+
+  def key(decorated_span):
+    reorder_span, kind, sort_key, all_lines = decorated_span
+    kind_key = GetLineSortOrdinal(kind, flags.quoted_includes_first)
+    if flags.reorder:
+      return reorder_span, kind_key, sort_key, all_lines
+    else:
+      return reorder_span, kind_key
+
+  decorated_move_spans.sort(key=key)
+
+  # Now go through all the lines of the input file and construct the
+  # output file.  Before we get to the next reorder-span, we just
+  # copy lines over verbatim (ignoring deleted lines, of course).
+  # In a reorder-span, we just print the sorted content, introducing
+  # blank lines when appropriate.
+  output_lines = []
+  line_number = 0
+  while line_number < len(file_lines):
+    current_reorder_span = decorated_move_spans[0][0]
+
+    # Just copy over all the lines until the next reorder span.
+    while line_number < current_reorder_span[0]:
+      if not file_lines[line_number].deleted:
+        output_lines.append(file_lines[line_number].line)
+      line_number += 1
+
+    # Now fill in the contents of the reorder-span from decorated_move_spans
+    new_lines = []
+    while (decorated_move_spans and
+           decorated_move_spans[0][0] == current_reorder_span):
+      new_lines.extend(decorated_move_spans[0][3])   # the full content
+      if (len(decorated_move_spans) > 1 and
+          _ShouldInsertBlankLine(decorated_move_spans[0],
+                                 decorated_move_spans[1], file_lines, flags)):
+        new_lines.append('')
+      decorated_move_spans = decorated_move_spans[1:]   # pop
+
+    if not flags.keep_iwyu_namespace_format:
+      # Now do the munging to convert namespace lines from the iwyu input
+      # format to the 'official style' format:
+      #    'namespace foo { class Bar; }\n' -> 'namespace foo {\nclass Bar;\n}'
+      # along with collecting multiple classes in the same namespace.
+      new_lines = _NormalizeNamespaceForwardDeclareLines(new_lines)
+
+    # Add line separators to the new lines.
+    new_lines = [nl.rstrip() + fileinfo.linesep for nl in new_lines]
+
+    output_lines.extend(new_lines)
+    line_number = current_reorder_span[1]               # go to end of span
+
+  return [line for line in output_lines if line is not None]
+
+
+def FixOneFile(iwyu_record, file_contents, flags, fileinfo):
+  """Parse a file guided by an iwyu_record and flags and apply IWYU fixes.
+  Returns two lists of lines (old, fixed).
+  """
+  file_lines = ParseOneFile(file_contents, iwyu_record)
+  old_lines = [fl.line for fl in file_lines
+               if fl is not None and fl.line is not None]
+  fixed_lines = FixFileLines(iwyu_record, file_lines, flags, fileinfo)
+  return old_lines, fixed_lines
+
+
+def FixManyFiles(iwyu_records, flags):
+  """Given a list of iwyu_records, fix each file listed in the record.
+
+  For each iwyu record in the input, which lists the #includes and
+  forward-declares to add, remove, and re-sort, loads the file, makes
+  the fixes, and writes the fixed file to disk.  The flags affect the
+  details of the fixing.
+
+  Arguments:
+    iwyu_records: a collection of IWYUOutputRecord objects holding
+      the parsed output of the include-what-you-use script (run at
+      verbose level 1 or higher) pertaining to a single source file.
+      iwyu_record.filename indicates what file to edit.
+    flags: commandline flags, as parsed by argparse..
+
+  Returns:
+    The number of files fixed (as opposed to ones that needed no fixing).
+  """
+  files_fixed = 0
+  for iwyu_record in iwyu_records:
+    try:
+      fileinfo = FileInfo.parse(iwyu_record.filename)
+
+      file_contents = _ReadFile(iwyu_record.filename, fileinfo)
+      if not file_contents:
+        continue
+
+      print(">>> Fixing #includes in '%s'" % iwyu_record.filename)
+      old_lines, fixed_lines = FixOneFile(iwyu_record, file_contents, flags, 
fileinfo)
+      if old_lines == fixed_lines:
+        print("No changes in file %s" % iwyu_record.filename)
+        continue
+
+      if flags.dry_run:
+        PrintFileDiff(old_lines, fixed_lines)
+      else:
+        _WriteFile(iwyu_record.filename, fileinfo, fixed_lines)
+
+      files_fixed += 1
+    except FixIncludesError as why:
+      print('ERROR: %s - skipping file %s' % (why, iwyu_record.filename))
+
+  print('IWYU edited %d files on your behalf.\n' % files_fixed)
+  return files_fixed
+
+
+def ProcessIWYUOutput(f, files_to_process, flags, cwd):
+  """Fix the #include and forward-declare lines as directed by f.
+
+  Given a file object that has the output of the include_what_you_use
+  script, see every file to be edited and edit it, if appropriate.
+
+  Arguments:
+    f: an iterable object that is the output of include_what_you_use.
+    files_to_process: A set of filenames, or None.  If not None, we
+       ignore files mentioned in f that are not in files_to_process.
+    flags: commandline flags, as parsed by argparse.  The only flag
+       we use directly is flags.ignore_re, to indicate files not to
+       process; we also pass the flags to other routines.
+    cwd: the current working directory, externalized for testing.
+
+  Returns:
+    The number of files that had to be modified (because they weren't
+    already all correct).  In dry_run mode, returns the number of
+    files that would have been modified.
+  """
+  if files_to_process is not None:
+    files_to_process = [NormalizeFilePath(cwd, fname)
+                        for fname in files_to_process]
+
+  # First collect all the iwyu data from stdin.
+
+  # Maintain sort order by using OrderedDict instead of dict
+  iwyu_output_records = OrderedDict()  # IWYUOutputRecords keyed by filename
+  while True:
+    iwyu_output_parser = IWYUOutputParser()
+    try:
+      iwyu_record = iwyu_output_parser.ParseOneRecord(f, flags)
+      if not iwyu_record:
+        break
+    except FixIncludesError as why:
+      print('ERROR: %s' % why)
+      continue
+    filename = NormalizeFilePath(flags.basedir, iwyu_record.filename)
+    if files_to_process is not None and filename not in files_to_process:
+      print('(skipping %s: not listed on commandline)' % filename)
+      continue
+    if flags.ignore_re and re.search(flags.ignore_re, filename):
+      print('(skipping %s: it matches --ignore_re, which is %s)' % (
+          filename, flags.ignore_re))
+      continue
+    if flags.only_re and not re.search(flags.only_re, filename):
+      print('(skipping %s: it does not match --only_re, which is %s)' % (
+          filename, flags.only_re))
+      continue
+
+    if filename in iwyu_output_records:
+      iwyu_output_records[filename].Merge(iwyu_record)
+    else:
+      iwyu_output_records[filename] = iwyu_record
+
+  # Now ignore all the files that never had any contentful changes
+  # seen for them.  (We have to wait until we're all done, since a .h
+  # file may have a contentful change when #included from one .cc
+  # file, but not another, and we need to have merged them above.)
+  if not flags.update_comments:
+    for filename in iwyu_output_records:
+      if not iwyu_output_records[filename].HasContentfulChanges():
+        print('(skipping %s: iwyu reports no contentful changes)' % filename)
+        # Mark that we're skipping this file by setting the record to None
+        iwyu_output_records[filename] = None
+
+  # Now do all the fixing, and return the number of files modified
+  contentful_records = [ior for ior in iwyu_output_records.values() if ior]
+  return FixManyFiles(contentful_records, flags)
+
+
+def NormalizeFilePath(basedir, filename):
+  """ Normalize filename to be comparable.
+
+  If basedir has a value and filename is not already absolute, make filename
+  absolute. Otherwise return filename as-is.
+  """
+  if basedir and not os.path.isabs(filename):
+    return os.path.normpath(os.path.join(basedir, filename))
+  return filename
+
+
+def SortIncludesInFiles(files_to_process, flags):
+  """For each file in files_to_process, sort its #includes.
+
+  This reads each input file, sorts the #include lines, and replaces
+  the input file with the result. SortIncludesInFiles does not add
+  or remove any #includes.  It also ignores forward-declares.
+
+  Arguments:
+    files_to_process: a list (or set) of filenames.
+    flags: commandline flags, as parsed by argparse.  We do not use
+       any flags directly, but pass them to other routines.
+
+  Returns:
+    The number of files that had to be modified (because they weren't
+    already all correct, that is, already in sorted order).
+  """
+  sort_only_iwyu_records = []
+  for filename in files_to_process:
+    filename = NormalizeFilePath(flags.basedir, filename)
+    # An empty iwyu record has no adds or deletes, so its only effect
+    # is to cause us to sort the #include lines.  (Since fix_includes
+    # gets all its knowledge of where forward-declare lines are from
+    # the iwyu input, with an empty iwyu record it just ignores all
+    # the forward-declare lines entirely.)
+    sort_only_iwyu_records.append(IWYUOutputRecord(filename))
+  return FixManyFiles(sort_only_iwyu_records, flags)
+
+
+def main(argv):
+  # Parse the command line.
+  parser = argparse.ArgumentParser(
+    formatter_class=argparse.RawDescriptionHelpFormatter,
+    description='Update files based on include-what-you-use output',
+    epilog=_EPILOG)
+  parser.add_argument('-b', '--blank_lines', action='store_true', default=True,
+                      help=('Put a blank line between primary header file and'
+                            ' C/C++ system #includes, and another blank line'
+                            ' between system #includes and google #includes'
+                            ' [default]'))
+  parser.add_argument('--noblank_lines', action='store_false',
+                      dest='blank_lines')
+
+  parser.add_argument('--comments', action='store_true', default=False,
+                      help='Put comments after the #include lines')
+  parser.add_argument('--nocomments', action='store_false', dest='comments')
+
+  parser.add_argument('--update_comments', action='store_true', default=False,
+                      help=('Replace \'why\' comments with the ones provided 
by'
+                            ' IWYU'))
+  parser.add_argument('--noupdate_comments', action='store_false',
+                      dest='update_comments')
+
+  parser.add_argument('--safe_headers', action='store_true', default=True,
+                      help=('Do not remove unused #includes/fwd-declares from'
+                            ' header files; just add new ones [default]'))
+  parser.add_argument('--nosafe_headers', action='store_false',
+                      dest='safe_headers')
+
+  parser.add_argument('--reorder', action='store_true', default=False,
+                      help=('Re-order lines relative to other similar lines '
+                            '(e.g. headers relative to other headers)'))
+  parser.add_argument('--noreorder', action='store_false', dest='reorder',
+                      help=('Do not re-order lines relative to other similar '
+                            'lines.'))
+
+  parser.add_argument('-s', '--sort_only', action='store_true',
+                      help=('Just sort #includes of files listed on cmdline;'
+                            ' do not add or remove any #includes'))
+
+  parser.add_argument('-n', '--dry_run', action='store_true', default=False,
+                      help=('Do not actually edit any files; just print diffs.'
+                            ' Return code is 0 if no changes are needed,'
+                            ' else min(the number of files that would be'
+                            ' modified, 100)'))
+
+  parser.add_argument('--ignore_re', default=None,
+                      help=('%(prog)s will skip editing any file whose name'
+                            ' matches this regular expression.'))
+
+  parser.add_argument('--only_re', default=None,
+                      help=('%(prog)s will skip editing any file whose name'
+                            ' does not match this regular expression.'))
+
+  parser.add_argument('--separate_project_includes', default=None,
+                      help=('Sort #includes for current project separately'
+                            ' from all other #includes.  This flag specifies'
+                            ' the root directory of the current project.'
+                            ' If the value is "<tld>", #includes that share 
the'
+                            ' same top-level directory are assumed to be in 
the'
+                            ' same project. If not specified, project 
#includes'
+                            ' will be sorted with other non-system 
#includes.'))
+
+  parser.add_argument('-m', '--keep_iwyu_namespace_format', 
action='store_true',
+                      default=False,
+                      help=('Keep forward-declaration namespaces in IWYU 
format'
+                            ', eg. namespace n1 { namespace n2 { class c1; } 
}.'
+                            ' Do not convert to "normalized" Google format: '
+                            'namespace n1 {\\nnamespace n2 {\\n class c1;'
+                            '\\n}\\n}.'))
+  parser.add_argument('--nokeep_iwyu_namespace_format', action='store_false',
+                      dest='keep_iwyu_namespace_format')
+
+  parser.add_argument('--basedir', '-p', default=None,
+                      help=('Specify the base directory. fix_includes will '
+                            'interpret non-absolute filenames relative to this 
'
+                            'path.'))
+  parser.add_argument('--quoted_includes_first', action='store_true',
+                      default=False,
+                      help='When sorting includes, place quoted ones first')
+
+  parser.add_argument('files', nargs='*', metavar='FILES')
+
+  flags = parser.parse_args(argv[1:])
+  if flags.files:
+    files_to_modify = set(flags.files)
+  else:
+    files_to_modify = None
+
+  if (flags.separate_project_includes and
+      not flags.separate_project_includes.startswith('<') and  # 'special' vals
+      not flags.separate_project_includes.endswith(os.path.sep) and
+      not flags.separate_project_includes.endswith('/')):
+    flags.separate_project_includes += os.path.sep
+
+  if flags.update_comments:
+    flags.comments = True
+
+  if flags.sort_only:
+    if not files_to_modify:
+      sys.exit('FATAL ERROR: -s flag requires a list of filenames')
+    SortIncludesInFiles(files_to_modify, flags)
+  else:
+    ProcessIWYUOutput(sys.stdin, files_to_modify, flags, cwd=os.getcwd())
+
+  return 0
+
+
+if __name__ == '__main__':
+  sys.exit(main(sys.argv))

Reply via email to