Fixed in r185423.
On Fri, Jun 21, 2013 at 11:00 PM, Nico Weber <[email protected]> wrote: > I just had the chance to try this. One question below. > > > On Thu, May 23, 2013 at 10:53 AM, Daniel Jasper <[email protected]>wrote: > >> Author: djasper >> Date: Thu May 23 12:53:42 2013 >> New Revision: 182596 >> >> URL: http://llvm.org/viewvc/llvm-project?rev=182596&view=rev >> Log: >> clang-format integration for git. >> >> Put this somewhere on your path and use: >> >> git clang-format >> >> Awesome work by Mark Lodato. Many thanks! >> >> Added: >> cfe/trunk/tools/clang-format/git-clang-format (with props) >> >> Added: cfe/trunk/tools/clang-format/git-clang-format >> URL: >> http://llvm.org/viewvc/llvm-project/cfe/trunk/tools/clang-format/git-clang-format?rev=182596&view=auto >> >> ============================================================================== >> --- cfe/trunk/tools/clang-format/git-clang-format (added) >> +++ cfe/trunk/tools/clang-format/git-clang-format Thu May 23 12:53:42 2013 >> @@ -0,0 +1,513 @@ >> +#!/usr/bin/python >> +# >> +#===- git-clang-format - ClangFormat Git Integration ---------*- python >> -*--===# >> +# >> +# The LLVM Compiler Infrastructure >> +# >> +# This file is distributed under the University of Illinois Open Source >> +# License. See LICENSE.TXT for details. >> +# >> >> +#===------------------------------------------------------------------------===# >> + >> +r""" >> +clang-format git integration >> +============================ >> + >> +This file provides a clang-format integration for git. Put it somewhere >> in your >> +path and ensure that it is executable. Then, "git clang-format" will >> invoke >> +clang-format on the changes in current files or a specific commit. >> + >> +For further details, run: >> +git clang-format -h >> + >> +Requires Python 2.7 >> +""" >> + >> +import argparse >> +import collections >> +import contextlib >> +import errno >> +import os >> +import re >> +import subprocess >> +import sys >> + >> +usage = 'git clang-format [OPTIONS] [<commit>] [--] [<file>...]' >> + >> +desc = ''' >> +Run clang-format on all lines that differ between the working directory >> +and <commit>, which defaults to HEAD. Changes are only applied to the >> working >> +directory. >> + >> +The following git-config settings set the default of the corresponding >> option: >> + clangFormat.binary >> + clangFormat.commit >> + clangFormat.extension >> + clangFormat.style >> +''' >> + >> +# Name of the temporary index file in which save the output of >> clang-format. >> +# This file is created within the .git directory. >> +temp_index_basename = 'clang-format-index' >> + >> + >> +Range = collections.namedtuple('Range', 'start, count') >> + >> + >> +def main(): >> + config = load_git_config() >> + >> + # In order to keep '--' yet allow options after positionals, we need to >> + # check for '--' ourselves. (Setting nargs='*' throws away the '--', >> while >> + # nargs=argparse.REMAINDER disallows options after positionals.) >> + argv = sys.argv[1:] >> + try: >> + idx = argv.index('--') >> + except ValueError: >> + dash_dash = [] >> + else: >> + dash_dash = argv[idx:] >> + argv = argv[:idx] >> + >> + default_extensions = ','.join([ >> + # From clang/lib/Frontend/FrontendOptions.cpp, all lower case >> + 'c', 'h', # C >> + 'm', # ObjC >> + 'mm', # ObjC++ >> + 'cc', 'cp', 'cpp', 'c++', 'cxx', 'hpp', # C++ >> + ]) >> + >> + p = argparse.ArgumentParser( >> + usage=usage, formatter_class=argparse.RawDescriptionHelpFormatter, >> + description=desc) >> + p.add_argument('--binary', >> + default=config.get('clangformat.binary', >> 'clang-format'), >> + help='path to clang-format'), >> + p.add_argument('--commit', >> + default=config.get('clangformat.commit', 'HEAD'), >> + help='default commit to use if none is specified'), >> + p.add_argument('--diff', action='store_true', >> + help='print a diff instead of applying the changes') >> + p.add_argument('--extensions', >> + default=config.get('clangformat.extensions', >> + default_extensions), >> + help=('comma-separated list of file extensions to >> format, ' >> + 'excluding the period and case-insensitive')), >> + p.add_argument('-f', '--force', action='store_true', >> + help='allow changes to unstaged files') >> + p.add_argument('-p', '--patch', action='store_true', >> + help='select hunks interactively') >> + p.add_argument('-q', '--quiet', action='count', default=0, >> + help='print less information') >> + p.add_argument('--style', >> + default=config.get('clangformat.style', None), >> + help='passed to clang-format'), >> + p.add_argument('-v', '--verbose', action='count', default=0, >> + help='print extra information') >> + # We gather all the remaining positional arguments into 'args' since >> we need >> + # to use some heuristics to determine whether or not <commit> was >> present. >> + # However, to print pretty messages, we make use of metavar and help. >> + p.add_argument('args', nargs='*', metavar='<commit>', >> + help='revision from which to compute the diff') >> + p.add_argument('ignored', nargs='*', metavar='<file>...', >> + help='if specified, only consider differences in these >> files') >> + opts = p.parse_args(argv) >> + >> + opts.verbose -= opts.quiet >> + del opts.quiet >> + >> + commit, files = interpret_args(opts.args, dash_dash, opts.commit) >> + changed_lines = compute_diff_and_extract_lines(commit, files) >> + if opts.verbose >= 1: >> + ignored_files = set(changed_lines) >> + filter_by_extension(changed_lines, opts.extensions.lower().split(',')) >> + if opts.verbose >= 1: >> + ignored_files.difference_update(changed_lines) >> + if ignored_files: >> + print 'Ignoring changes in the following files (wrong extension):' >> + for filename in ignored_files: >> + print ' ', filename >> + if changed_lines: >> + print 'Running clang-format on the following files:' >> + for filename in changed_lines: >> + print ' ', filename >> + if not changed_lines: >> + print 'no modified files to format' >> + return >> + # The computed diff outputs absolute paths, so we must cd before >> accessing >> + # those files. >> + cd_to_toplevel() >> + changed_bytes = lines_to_bytes(changed_lines) >> + old_tree = create_tree_from_workdir(changed_bytes) >> + new_tree = run_clang_format_and_save_to_tree(changed_bytes, >> + binary=opts.binary, >> + style=opts.style) >> + if opts.verbose >= 1: >> + print 'old tree:', old_tree >> + print 'new tree:', new_tree >> + if old_tree == new_tree: >> + if opts.verbose >= 0: >> + print 'clang-format did not modify any files' >> + elif opts.diff: >> + print_diff(old_tree, new_tree) >> + else: >> + changed_files = apply_changes(old_tree, new_tree, force=opts.force, >> + patch_mode=opts.patch) >> + if (opts.verbose >= 0 and not opts.patch) or opts.verbose >= 1: >> + print 'changed files:' >> + for filename in changed_files: >> + print ' ', filename >> + >> + >> +def load_git_config(non_string_options=None): >> + """Return the git configuration as a dictionary. >> + >> + All options are assumed to be strings unless in `non_string_options`, >> in which >> + is a dictionary mapping option name (in lower case) to either "--bool" >> or >> + "--int".""" >> + if non_string_options is None: >> + non_string_options = {} >> + out = {} >> + for entry in run('git', 'config', '--list', '--null').split('\0'): >> + if entry: >> + name, value = entry.split('\n', 1) >> + if name in non_string_options: >> + value = run('git', 'config', non_string_options[name], name) >> + out[name] = value >> + return out >> + >> + >> +def interpret_args(args, dash_dash, default_commit): >> + """Interpret `args` as "[commit] [--] [files...]" and return (commit, >> files). >> + >> + It is assumed that "--" and everything that follows has been removed >> from >> + args and placed in `dash_dash`. >> + >> + If "--" is present (i.e., `dash_dash` is non-empty), the argument to >> its >> + left (if present) is taken as commit. Otherwise, the first argument is >> + checked if it is a commit or a file. If commit is not given, >> + `default_commit` is used.""" >> + if dash_dash: >> + if len(args) == 0: >> + commit = default_commit >> + elif len(args) > 1: >> + die('at most one commit allowed; %d given' % len(args)) >> + else: >> + commit = args[0] >> + object_type = get_object_type(commit) >> + if object_type not in ('commit', 'tag'): >> + if object_type is None: >> + die("'%s' is not a commit" % commit) >> + else: >> + die("'%s' is a %s, but a commit was expected" % (commit, >> object_type)) >> + files = dash_dash[1:] >> + elif args: >> + if disambiguate_revision(args[0]): >> + commit = args[0] >> + files = args[1:] >> + else: >> + commit = default_commit >> + files = args >> + else: >> + commit = default_commit >> + files = [] >> + return commit, files >> + >> + >> +def disambiguate_revision(value): >> + """Returns True if `value` is a revision, False if it is a file, or >> dies.""" >> + # If `value` is ambiguous (neither a commit nor a file), the following >> + # command will die with an appropriate error message. >> + run('git', 'rev-parse', value, verbose=False) >> + object_type = get_object_type(value) >> + if object_type is None: >> + return False >> + if object_type in ('commit', 'tag'): >> + return True >> + die('`%s` is a %s, but a commit or filename was expected' % >> + (value, object_type)) >> + >> + >> +def get_object_type(value): >> + """Returns a string description of an object's type, or None if it is >> not >> + a valid git object.""" >> + cmd = ['git', 'cat-file', '-t', value] >> + p = subprocess.Popen(cmd, stdout=subprocess.PIPE, >> stderr=subprocess.PIPE) >> + stdout, stderr = p.communicate() >> + if p.returncode != 0: >> + return None >> + return stdout.strip() >> + >> + >> +def compute_diff_and_extract_lines(commit, files): >> + """Calls compute_diff() followed by extract_lines().""" >> + diff_process = compute_diff(commit, files) >> + changed_lines = extract_lines(diff_process.stdout) >> + diff_process.stdout.close() >> + diff_process.wait() >> + if diff_process.returncode != 0: >> + # Assume error was already printed to stderr. >> + sys.exit(2) >> + return changed_lines >> + >> + >> +def compute_diff(commit, files): >> + """Return a subprocess object producing the diff from `commit`. >> + >> + The return value's `stdin` file object will produce a patch with the >> + differences between the working directory and `commit`, filtered on >> `files` >> + (if non-empty). Zero context lines are used in the patch.""" >> + cmd = ['git', 'diff-index', '-p', '-U0', commit, '--'] >> + cmd.extend(files) >> + p = subprocess.Popen(cmd, stdin=subprocess.PIPE, >> stdout=subprocess.PIPE) >> + p.stdin.close() >> + return p >> + >> + >> +def extract_lines(patch_file): >> + """Extract the changed lines in `patch_file`. >> + >> + The input must have been produced with ``-U0``, meaning unidiff format >> with >> + zero lines of context. The return value is a dict mapping filename to >> a >> + list of line `Range`s.""" >> + matches = {} >> + for line in patch_file: >> + match = re.search(r'^\+\+\+\ [^/]+/(.*)', line) >> + if match: >> + filename = match.group(1).rstrip('\r\n') >> + match = re.search(r'^@@ -[0-9,]+ \+(\d+)(,(\d+))?', line) >> + if match: >> + start_line = int(match.group(1)) >> + line_count = 1 >> + if match.group(3): >> + line_count = int(match.group(3)) >> + if line_count > 0: >> + matches.setdefault(filename, []).append(Range(start_line, >> line_count)) >> + return matches >> + >> + >> +def filter_by_extension(dictionary, allowed_extensions): >> + """Delete every key in `dictionary` that doesn't have an allowed >> extension. >> + >> + `allowed_extensions` must be a collection of lowercase file extensions, >> + excluding the period.""" >> + allowed_extensions = frozenset(allowed_extensions) >> + for filename in dictionary.keys(): >> + base_ext = filename.rsplit('.', 1) >> + if len(base_ext) == 1 or base_ext[1].lower() not in >> allowed_extensions: >> + del dictionary[filename] >> + >> + >> +def cd_to_toplevel(): >> + """Change to the top level of the git repository.""" >> + toplevel = run('git', 'rev-parse', '--show-toplevel') >> + os.chdir(toplevel) >> + >> + >> +def lines_to_bytes(changed_lines): >> + """Convert the mapping of changed line ranges to changed byte ranges. >> + >> + This function opens each file to compute the byte ranges.""" >> + changed_bytes = {} >> + for filename, line_ranges in changed_lines.iteritems(): >> + with open(filename) as f: >> + changed_bytes[filename] = lines_to_bytes_single_file(f, >> line_ranges) >> + return changed_bytes >> + >> + >> +def lines_to_bytes_single_file(file, line_ranges): >> + byte_ranges = [] >> + line_ranges_iter = iter(line_ranges + [None]) >> + r = next(line_ranges_iter) >> + linenum = 1 >> + byte_idx = 0 >> + byte_start = None >> + byte_count = None >> + for line in file: >> + if r is None: >> + break >> + if linenum == r.start: >> + byte_start = byte_idx >> + if linenum == r.start + r.count: >> + byte_ranges.append(Range(byte_start, byte_idx - byte_start)) >> > > ^ Is this correct? With this, the --length that's passed to clang-format > will include the trailing '\n', which apparently causes clang-format to > format the next line as well. I'm not sure if that's a bug in this script, > in clang-format, or both. > > Here's an example where this caused formatting on a line that wasn't > touched by a patch: > https://codereview.chromium.org/16917011/diff/1/chrome/browser/search_engines/template_url_prepopulate_data.cc#newcode622 > > >> + r = next(line_ranges_iter) >> + linenum += 1 >> + byte_idx += len(line) >> + if r is not None: >> + # FIXME: Detect and warn if line ranges go past the end of file? >> + byte_ranges.append(Range(byte_start, byte_idx - byte_start)) >> + return byte_ranges >> + >> + >> +def create_tree_from_workdir(filenames): >> + """Create a new git tree with the given files from the working >> directory. >> + >> + Returns the object ID (SHA-1) of the created tree.""" >> + return create_tree(filenames, '--stdin') >> + >> + >> +def run_clang_format_and_save_to_tree(changed_bytes, >> binary='clang-format', >> + style=None): >> + """Run clang-format on each file and save the result to a git tree. >> + >> + Returns the object ID (SHA-1) of the created tree.""" >> + def index_info_generator(): >> + for filename, byte_ranges in changed_bytes.iteritems(): >> + mode = oct(os.stat(filename).st_mode) >> + blob_id = clang_format_to_blob(filename, byte_ranges, >> binary=binary, >> + style=style) >> + yield '%s %s\t%s' % (mode, blob_id, filename) >> + return create_tree(index_info_generator(), '--index-info') >> + >> + >> +def create_tree(input_lines, mode): >> + """Create a tree object from the given input. >> + >> + If mode is '--stdin', it must be a list of filenames. If mode is >> + '--index-info' is must be a list of values suitable for "git >> update-index >> + --index-info", such as "<mode> <SP> <sha1> <TAB> <filename>". Any >> other mode >> + is invalid.""" >> + assert mode in ('--stdin', '--index-info') >> + cmd = ['git', 'update-index', '--add', '-z', mode] >> + with temporary_index_file(): >> + p = subprocess.Popen(cmd, stdin=subprocess.PIPE) >> + for line in input_lines: >> + p.stdin.write('%s\0' % line) >> + p.stdin.close() >> + if p.wait() != 0: >> + die('`%s` failed' % ' '.join(cmd)) >> + tree_id = run('git', 'write-tree') >> + return tree_id >> + >> + >> +def clang_format_to_blob(filename, byte_ranges, binary='clang-format', >> + style=None): >> + """Run clang-format on the given file and save the result to a git >> blob. >> + >> + Returns the object ID (SHA-1) of the created blob.""" >> + clang_format_cmd = [binary, filename] >> + if style: >> + clang_format_cmd.extend(['-style='+style]) >> + for offset, length in byte_ranges: >> + clang_format_cmd.extend(['-offset='+str(offset), >> '-length='+str(length)]) >> + try: >> + clang_format = subprocess.Popen(clang_format_cmd, >> stdin=subprocess.PIPE, >> + stdout=subprocess.PIPE) >> + except OSError as e: >> + if e.errno == errno.ENOENT: >> + die('cannot find executable "%s"' % binary) >> + else: >> + raise >> + clang_format.stdin.close() >> + hash_object_cmd = ['git', 'hash-object', '-w', '--path='+filename, >> '--stdin'] >> + hash_object = subprocess.Popen(hash_object_cmd, >> stdin=clang_format.stdout, >> + stdout=subprocess.PIPE) >> + clang_format.stdout.close() >> + stdout = hash_object.communicate()[0] >> + if hash_object.returncode != 0: >> + die('`%s` failed' % ' '.join(hash_object_cmd)) >> + if clang_format.wait() != 0: >> + die('`%s` failed' % ' '.join(clang_format_cmd)) >> + return stdout.rstrip('\r\n') >> + >> + >> [email protected] >> +def temporary_index_file(tree=None): >> + """Context manager for setting GIT_INDEX_FILE to a temporary file and >> deleting >> + the file afterward.""" >> + index_path = create_temporary_index(tree) >> + old_index_path = os.environ.get('GIT_INDEX_FILE') >> + os.environ['GIT_INDEX_FILE'] = index_path >> + try: >> + yield >> + finally: >> + if old_index_path is None: >> + del os.environ['GIT_INDEX_FILE'] >> + else: >> + os.environ['GIT_INDEX_FILE'] = old_index_path >> + os.remove(index_path) >> + >> + >> +def create_temporary_index(tree=None): >> + """Create a temporary index file and return the created file's path. >> + >> + If `tree` is not None, use that as the tree to read in. Otherwise, an >> + empty index is created.""" >> + gitdir = run('git', 'rev-parse', '--git-dir') >> + path = os.path.join(gitdir, temp_index_basename) >> + if tree is None: >> + tree = '--empty' >> + run('git', 'read-tree', '--index-output='+path, tree) >> + return path >> + >> + >> +def print_diff(old_tree, new_tree): >> + """Print the diff between the two trees to stdout.""" >> + # We use the porcelain 'diff' and not plumbing 'diff-tree' because the >> output >> + # is expected to be viewed by the user, and only the former does nice >> things >> + # like color and pagination. >> + subprocess.check_call(['git', 'diff', old_tree, new_tree, '--']) >> + >> + >> +def apply_changes(old_tree, new_tree, force=False, patch_mode=False): >> + """Apply the changes in `new_tree` to the working directory. >> + >> + Bails if there are local changes in those files and not `force`. If >> + `patch_mode`, runs `git checkout --patch` to select hunks >> interactively.""" >> + changed_files = run('git', 'diff-tree', '-r', '-z', '--name-only', >> old_tree, >> + new_tree).rstrip('\0').split('\0') >> + if not force: >> + unstaged_files = run('git', 'diff-files', '--name-status', >> *changed_files) >> + if unstaged_files: >> + print >>sys.stderr, ('The following files would be modified but ' >> + 'have unstaged changes:') >> + print >>sys.stderr, unstaged_files >> + print >>sys.stderr, 'Please commit, stage, or stash them first.' >> + sys.exit(2) >> + if patch_mode: >> + # In patch mode, we could just as well create an index from the new >> tree >> + # and checkout from that, but then the user will be presented with a >> + # message saying "Discard ... from worktree". Instead, we use the >> old >> + # tree as the index and checkout from new_tree, which gives the >> slightly >> + # better message, "Apply ... to index and worktree". This is not >> quite >> + # right, since it won't be applied to the user's index, but oh well. >> + with temporary_index_file(old_tree): >> + subprocess.check_call(['git', 'checkout', '--patch', new_tree]) >> + index_tree = old_tree >> + else: >> + with temporary_index_file(new_tree): >> + run('git', 'checkout-index', '-a', '-f') >> + return changed_files >> + >> + >> +def run(*args, **kwargs): >> + stdin = kwargs.pop('stdin', '') >> + verbose = kwargs.pop('verbose', True) >> + strip = kwargs.pop('strip', True) >> + for name in kwargs: >> + raise TypeError("run() got an unexpected keyword argument '%s'" % >> name) >> + p = subprocess.Popen(args, stdout=subprocess.PIPE, >> stderr=subprocess.PIPE, >> + stdin=subprocess.PIPE) >> + stdout, stderr = p.communicate(input=stdin) >> + if p.returncode == 0: >> + if stderr: >> + if verbose: >> + print >>sys.stderr, '`%s` printed to stderr:' % ' '.join(args) >> + print >>sys.stderr, stderr.rstrip() >> + if strip: >> + stdout = stdout.rstrip('\r\n') >> + return stdout >> + if verbose: >> + print >>sys.stderr, '`%s` returned %s' % (' '.join(args), >> p.returncode) >> + if stderr: >> + print >>sys.stderr, stderr.rstrip() >> + sys.exit(2) >> + >> + >> +def die(message): >> + print >>sys.stderr, 'error:', message >> + sys.exit(2) >> + >> + >> +if __name__ == '__main__': >> + main() >> >> Propchange: cfe/trunk/tools/clang-format/git-clang-format >> >> ------------------------------------------------------------------------------ >> svn:executable = * >> >> >> _______________________________________________ >> cfe-commits mailing list >> [email protected] >> http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits >> > >
_______________________________________________ cfe-commits mailing list [email protected] http://lists.cs.uiuc.edu/mailman/listinfo/cfe-commits
