I just had the chance to try this. One question below.
On Thu, May 23, 2013 at 10:53 AM, Daniel Jasper <[email protected]> wrote: > Author: djasper > Date: Thu May 23 12:53:42 2013 > New Revision: 182596 > > URL: http://llvm.org/viewvc/llvm-project?rev=182596&view=rev > Log: > clang-format integration for git. > > Put this somewhere on your path and use: > > git clang-format > > Awesome work by Mark Lodato. Many thanks! > > Added: > cfe/trunk/tools/clang-format/git-clang-format (with props) > > Added: cfe/trunk/tools/clang-format/git-clang-format > URL: > http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/clang-format/git-clang-format?rev=182596&view=auto > > ============================================================================== > --- cfe/trunk/tools/clang-format/git-clang-format (added) > +++ cfe/trunk/tools/clang-format/git-clang-format Thu May 23 12:53:42 2013 > @@ -0,0 +1,513 @@ > +#!/usr/bin/python > +# > +#===- git-clang-format - ClangFormat Git Integration ---------*- python > -*--===# > +# > +# The LLVM Compiler Infrastructure > +# > +# This file is distributed under the University of Illinois Open Source > +# License. See LICENSE.TXT for details. > +# > > +#===------------------------------------------------------------------------===# > + > +r""" > +clang-format git integration > +============================ > + > +This file provides a clang-format integration for git. Put it somewhere > in your > +path and ensure that it is executable. Then, "git clang-format" will > invoke > +clang-format on the changes in current files or a specific commit. > + > +For further details, run: > +git clang-format -h > + > +Requires Python 2.7 > +""" > + > +import argparse > +import collections > +import contextlib > +import errno > +import os > +import re > +import subprocess > +import sys > + > +usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]' > + > +desc = ''' > +Run clang-format on all lines that differ between the working directory > +and <commit>, which defaults to HEAD. Changes are only applied to the > working > +directory. > + > +The following git-config settings set the default of the corresponding > option: > + clangFormat.binary > + clangFormat.commit > + clangFormat.extension > + clangFormat.style > +''' > + > +# Name of the temporary index file in which save the output of > clang-format. > +# This file is created within the .git directory. > +temp_index_basename = 'clang-format-index' > + > + > +Range = collections.namedtuple('Range', 'start, count') > + > + > +def main(): > + config = load_git_config() > + > + # In order to keep '--' yet allow options after positionals, we need to > + # check for '--' ourselves. (Setting nargs='*' throws away the '--', > while > + # nargs=argparse.REMAINDER disallows options after positionals.) > + argv = sys.argv[1:] > + try: > + idx = argv.index('--') > + except ValueError: > + dash_dash = [] > + else: > + dash_dash = argv[idx:] > + argv = argv[:idx] > + > + default_extensions = ','.join([ > + # From clang/lib/Frontend/FrontendOptions.cpp, all lower case > + 'c', 'h', # C > + 'm', # ObjC > + 'mm', # ObjC++ > + 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++ > + ]) > + > + p = argparse.ArgumentParser( > + usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, > + description=desc) > + p.add_argument('--binary', > + default=config.get('clangformat.binary', 'clang-format'), > + help='path to clang-format'), > + p.add_argument('--commit', > + default=config.get('clangformat.commit', 'HEAD'), > + help='default commit to use if none is specified'), > + p.add_argument('--diff', action='store_true', > + help='print a diff instead of applying the changes') > + p.add_argument('--extensions', > + default=config.get('clangformat.extensions', > + default_extensions), > + help=('comma-separated list of file extensions to > format, ' > + 'excluding the period and case-insensitive')), > + p.add_argument('-f', '--force', action='store_true', > + help='allow changes to unstaged files') > + p.add_argument('-p', '--patch', action='store_true', > + help='select hunks interactively') > + p.add_argument('-q', '--quiet', action='count', default=0, > + help='print less information') > + p.add_argument('--style', > + default=config.get('clangformat.style', None), > + help='passed to clang-format'), > + p.add_argument('-v', '--verbose', action='count', default=0, > + help='print extra information') > + # We gather all the remaining positional arguments into 'args' since we > need > + # to use some heuristics to determine whether or not <commit> was > present. > + # However, to print pretty messages, we make use of metavar and help. > + p.add_argument('args', nargs='*', metavar='<commit>', > + help='revision from which to compute the diff') > + p.add_argument('ignored', nargs='*', metavar='<file>...', > + help='if specified, only consider differences in these > files') > + opts = p.parse_args(argv) > + > + opts.verbose -= opts.quiet > + del opts.quiet > + > + commit, files = interpret_args(opts.args, dash_dash, opts.commit) > + changed_lines = compute_diff_and_extract_lines(commit, files) > + if opts.verbose >= 1: > + ignored_files = set(changed_lines) > + filter_by_extension(changed_lines, opts.extensions.lower().split(',')) > + if opts.verbose >= 1: > + ignored_files.difference_update(changed_lines) > + if ignored_files: > + print 'Ignoring changes in the following files (wrong extension):' > + for filename in ignored_files: > + print ' ', filename > + if changed_lines: > + print 'Running clang-format on the following files:' > + for filename in changed_lines: > + print ' ', filename > + if not changed_lines: > + print 'no modified files to format' > + return > + # The computed diff outputs absolute paths, so we must cd before > accessing > + # those files. > + cd_to_toplevel() > + changed_bytes = lines_to_bytes(changed_lines) > + old_tree = create_tree_from_workdir(changed_bytes) > + new_tree = run_clang_format_and_save_to_tree(changed_bytes, > + binary=opts.binary, > + style=opts.style) > + if opts.verbose >= 1: > + print 'old tree:', old_tree > + print 'new tree:', new_tree > + if old_tree == new_tree: > + if opts.verbose >= 0: > + print 'clang-format did not modify any files' > + elif opts.diff: > + print_diff(old_tree, new_tree) > + else: > + changed_files = apply_changes(old_tree, new_tree, force=opts.force, > + patch_mode=opts.patch) > + if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: > + print 'changed files:' > + for filename in changed_files: > + print ' ', filename > + > + > +def load_git_config(non_string_options=None): > + """Return the git configuration as a dictionary. > + > + All options are assumed to be strings unless in `non_string_options`, > in which > + is a dictionary mapping option name (in lower case) to either "--bool" > or > + "--int".""" > + if non_string_options is None: > + non_string_options = {} > + out = {} > + for entry in run('git', 'config', '--list', '--null').split('\0'): > + if entry: > + name, value = entry.split('\n', 1) > + if name in non_string_options: > + value = run('git', 'config', non_string_options[name], name) > + out[name] = value > + return out > + > + > +def interpret_args(args, dash_dash, default_commit): > + """Interpret `args` as "[commit] [--] [files...]" and return (commit, > files). > + > + It is assumed that "--" and everything that follows has been removed > from > + args and placed in `dash_dash`. > + > + If "--" is present (i.e., `dash_dash` is non-empty), the argument to its > + left (if present) is taken as commit. Otherwise, the first argument is > + checked if it is a commit or a file. If commit is not given, > + `default_commit` is used.""" > + if dash_dash: > + if len(args) == 0: > + commit = default_commit > + elif len(args) > 1: > + die('at most one commit allowed; %d given' % len(args)) > + else: > + commit = args[0] > + object_type = get_object_type(commit) > + if object_type not in ('commit', 'tag'): > + if object_type is None: > + die("'%s' is not a commit" % commit) > + else: > + die("'%s' is a %s, but a commit was expected" % (commit, > object_type)) > + files = dash_dash[1:] > + elif args: > + if disambiguate_revision(args[0]): > + commit = args[0] > + files = args[1:] > + else: > + commit = default_commit > + files = args > + else: > + commit = default_commit > + files = [] > + return commit, files > + > + > +def disambiguate_revision(value): > + """Returns True if `value` is a revision, False if it is a file, or > dies.""" > + # If `value` is ambiguous (neither a commit nor a file), the following > + # command will die with an appropriate error message. > + run('git', 'rev-parse', value, verbose=False) > + object_type = get_object_type(value) > + if object_type is None: > + return False > + if object_type in ('commit', 'tag'): > + return True > + die('`%s` is a %s, but a commit or filename was expected' % > + (value, object_type)) > + > + > +def get_object_type(value): > + """Returns a string description of an object's type, or None if it is > not > + a valid git object.""" > + cmd = ['git', 'cat-file', '-t', value] > + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, > stderr=subprocess.PIPE) > + stdout, stderr = p.communicate() > + if p.returncode != 0: > + return None > + return stdout.strip() > + > + > +def compute_diff_and_extract_lines(commit, files): > + """Calls compute_diff() followed by extract_lines().""" > + diff_process = compute_diff(commit, files) > + changed_lines = extract_lines(diff_process.stdout) > + diff_process.stdout.close() > + diff_process.wait() > + if diff_process.returncode != 0: > + # Assume error was already printed to stderr. > + sys.exit(2) > + return changed_lines > + > + > +def compute_diff(commit, files): > + """Return a subprocess object producing the diff from `commit`. > + > + The return value's `stdin` file object will produce a patch with the > + differences between the working directory and `commit`, filtered on > `files` > + (if non-empty). Zero context lines are used in the patch.""" > + cmd = ['git', 'diff-index', '-p', '-U0', commit, '--'] > + cmd.extend(files) > + p = subprocess.Popen(cmd, stdin=subprocess.PIPE, stdout=subprocess.PIPE) > + p.stdin.close() > + return p > + > + > +def extract_lines(patch_file): > + """Extract the changed lines in `patch_file`. > + > + The input must have been produced with ``-U0``, meaning unidiff format > with > + zero lines of context. The return value is a dict mapping filename to a > + list of line `Range`s.""" > + matches = {} > + for line in patch_file: > + match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) > + if match: > + filename = match.group(1).rstrip('\r\n') > + match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) > + if match: > + start_line = int(match.group(1)) > + line_count = 1 > + if match.group(3): > + line_count = int(match.group(3)) > + if line_count > 0: > + matches.setdefault(filename, []).append(Range(start_line, > line_count)) > + return matches > + > + > +def filter_by_extension(dictionary, allowed_extensions): > + """Delete every key in `dictionary` that doesn't have an allowed > extension. > + > + `allowed_extensions` must be a collection of lowercase file extensions, > + excluding the period.""" > + allowed_extensions = frozenset(allowed_extensions) > + for filename in dictionary.keys(): > + base_ext = filename.rsplit('.', 1) > + if len(base_ext) == 1 or base_ext[1].lower() not in > allowed_extensions: > + del dictionary[filename] > + > + > +def cd_to_toplevel(): > + """Change to the top level of the git repository.""" > + toplevel = run('git', 'rev-parse', '--show-toplevel') > + os.chdir(toplevel) > + > + > +def lines_to_bytes(changed_lines): > + """Convert the mapping of changed line ranges to changed byte ranges. > + > + This function opens each file to compute the byte ranges.""" > + changed_bytes = {} > + for filename, line_ranges in changed_lines.iteritems(): > + with open(filename) as f: > + changed_bytes[filename] = lines_to_bytes_single_file(f, line_ranges) > + return changed_bytes > + > + > +def lines_to_bytes_single_file(file, line_ranges): > + byte_ranges = [] > + line_ranges_iter = iter(line_ranges + [None]) > + r = next(line_ranges_iter) > + linenum = 1 > + byte_idx = 0 > + byte_start = None > + byte_count = None > + for line in file: > + if r is None: > + break > + if linenum == r.start: > + byte_start = byte_idx > + if linenum == r.start + r.count: > + byte_ranges.append(Range(byte_start, byte_idx - byte_start)) > ^ Is this correct? With this, the --length that's passed to clang-format will include the trailing '\n', which apparently causes clang-format to format the next line as well. I'm not sure if that's a bug in this script, in clang-format, or both. Here's an example where this caused formatting on a line that wasn't touched by a patch: https://codereview.chromium.org/16917011/diff/1/chrome/browser/search_engines/template_url_prepopulate_data.cc#newcode622 > + r = next(line_ranges_iter) > + linenum += 1 > + byte_idx += len(line) > + if r is not None: > + # FIXME: Detect and warn if line ranges go past the end of file? > + byte_ranges.append(Range(byte_start, byte_idx - byte_start)) > + return byte_ranges > + > + > +def create_tree_from_workdir(filenames): > + """Create a new git tree with the given files from the working > directory. > + > + Returns the object ID (SHA-1) of the created tree.""" > + return create_tree(filenames, '--stdin') > + > + > +def run_clang_format_and_save_to_tree(changed_bytes, > binary='clang-format', > + style=None): > + """Run clang-format on each file and save the result to a git tree. > + > + Returns the object ID (SHA-1) of the created tree.""" > + def index_info_generator(): > + for filename, byte_ranges in changed_bytes.iteritems(): > + mode = oct(os.stat(filename).st_mode) > + blob_id = clang_format_to_blob(filename, byte_ranges, binary=binary, > + style=style) > + yield '%s %s\t%s' % (mode, blob_id, filename) > + return create_tree(index_info_generator(), '--index-info') > + > + > +def create_tree(input_lines, mode): > + """Create a tree object from the given input. > + > + If mode is '--stdin', it must be a list of filenames. If mode is > + '--index-info' is must be a list of values suitable for "git > update-index > + --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any > other mode > + is invalid.""" > + assert mode in ('--stdin', '--index-info') > + cmd = ['git', 'update-index', '--add', '-z', mode] > + with temporary_index_file(): > + p = subprocess.Popen(cmd, stdin=subprocess.PIPE) > + for line in input_lines: > + p.stdin.write('%s\0' % line) > + p.stdin.close() > + if p.wait() != 0: > + die('`%s` failed' % ' '.join(cmd)) > + tree_id = run('git', 'write-tree') > + return tree_id > + > + > +def clang_format_to_blob(filename, byte_ranges, binary='clang-format', > + style=None): > + """Run clang-format on the given file and save the result to a git blob. > + > + Returns the object ID (SHA-1) of the created blob.""" > + clang_format_cmd = [binary, filename] > + if style: > + clang_format_cmd.extend(['-style='+style]) > + for offset, length in byte_ranges: > + clang_format_cmd.extend(['-offset='+str(offset), > '-length='+str(length)]) > + try: > + clang_format = subprocess.Popen(clang_format_cmd, > stdin=subprocess.PIPE, > + stdout=subprocess.PIPE) > + except OSError as e: > + if e.errno == errno.ENOENT: > + die('cannot find executable "%s"' % binary) > + else: > + raise > + clang_format.stdin.close() > + hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, > '--stdin'] > + hash_object = subprocess.Popen(hash_object_cmd, > stdin=clang_format.stdout, > + stdout=subprocess.PIPE) > + clang_format.stdout.close() > + stdout = hash_object.communicate()[0] > + if hash_object.returncode != 0: > + die('`%s` failed' % ' '.join(hash_object_cmd)) > + if clang_format.wait() != 0: > + die('`%s` failed' % ' '.join(clang_format_cmd)) > + return stdout.rstrip('\r\n') > + > + > [email protected] > +def temporary_index_file(tree=None): > + """Context manager for setting GIT_INDEX_FILE to a temporary file and > deleting > + the file afterward.""" > + index_path = create_temporary_index(tree) > + old_index_path = os.environ.get('GIT_INDEX_FILE') > + os.environ['GIT_INDEX_FILE'] = index_path > + try: > + yield > + finally: > + if old_index_path is None: > + del os.environ['GIT_INDEX_FILE'] > + else: > + os.environ['GIT_INDEX_FILE'] = old_index_path > + os.remove(index_path) > + > + > +def create_temporary_index(tree=None): > + """Create a temporary index file and return the created file's path. > + > + If `tree` is not None, use that as the tree to read in. Otherwise, an > + empty index is created.""" > + gitdir = run('git', 'rev-parse', '--git-dir') > + path = os.path.join(gitdir, temp_index_basename) > + if tree is None: > + tree = '--empty' > + run('git', 'read-tree', '--index-output='+path, tree) > + return path > + > + > +def print_diff(old_tree, new_tree): > + """Print the diff between the two trees to stdout.""" > + # We use the porcelain 'diff' and not plumbing 'diff-tree' because the > output > + # is expected to be viewed by the user, and only the former does nice > things > + # like color and pagination. > + subprocess.check_call(['git', 'diff', old_tree, new_tree, '--']) > + > + > +def apply_changes(old_tree, new_tree, force=False, patch_mode=False): > + """Apply the changes in `new_tree` to the working directory. > + > + Bails if there are local changes in those files and not `force`. If > + `patch_mode`, runs `git checkout --patch` to select hunks > interactively.""" > + changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', > old_tree, > + new_tree).rstrip('\0').split('\0') > + if not force: > + unstaged_files = run('git', 'diff-files', '--name-status', > *changed_files) > + if unstaged_files: > + print >>sys.stderr, ('The following files would be modified but ' > + 'have unstaged changes:') > + print >>sys.stderr, unstaged_files > + print >>sys.stderr, 'Please commit, stage, or stash them first.' > + sys.exit(2) > + if patch_mode: > + # In patch mode, we could just as well create an index from the new > tree > + # and checkout from that, but then the user will be presented with a > + # message saying "Discard ... from worktree". Instead, we use the old > + # tree as the index and checkout from new_tree, which gives the > slightly > + # better message, "Apply ... to index and worktree". This is not > quite > + # right, since it won't be applied to the user's index, but oh well. > + with temporary_index_file(old_tree): > + subprocess.check_call(['git', 'checkout', '--patch', new_tree]) > + index_tree = old_tree > + else: > + with temporary_index_file(new_tree): > + run('git', 'checkout-index', '-a', '-f') > + return changed_files > + > + > +def run(*args, **kwargs): > + stdin = kwargs.pop('stdin', '') > + verbose = kwargs.pop('verbose', True) > + strip = kwargs.pop('strip', True) > + for name in kwargs: > + raise TypeError("run() got an unexpected keyword argument '%s'" % > name) > + p = subprocess.Popen(args, stdout=subprocess.PIPE, > stderr=subprocess.PIPE, > + stdin=subprocess.PIPE) > + stdout, stderr = p.communicate(input=stdin) > + if p.returncode == 0: > + if stderr: > + if verbose: > + print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args) > + print >>sys.stderr, stderr.rstrip() > + if strip: > + stdout = stdout.rstrip('\r\n') > + return stdout > + if verbose: > + print >>sys.stderr, '`%s` returned %s' % (' '.join(args), > p.returncode) > + if stderr: > + print >>sys.stderr, stderr.rstrip() > + sys.exit(2) > + > + > +def die(message): > + print >>sys.stderr, 'error:', message > + sys.exit(2) > + > + > +if __name__ == '__main__': > + main() > > Propchange: cfe/trunk/tools/clang-format/git-clang-format > > ------------------------------------------------------------------------------ > svn:executable = * > > > _______________________________________________ > cfe-commits mailing list > [email protected] > http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits >
_______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
