Hi,
On Wed, 3 Aug 2005, Linus Torvalds wrote:
On Wed, 3 Aug 2005, Johannes Schindelin wrote:
I try to write a "git annotate" based on the output of git-whatchanged.
You can't. It's fundamentally not doable, because you lose the merge
information.
That's why I said I need git-rev-tree. In the meantime I discovered, that
git-rev-list has a "--parents" option which suits me just fine. Therefore
I'd say: kill git-rev-tree.
So you need to use a combination of git-rev-list _and_ git-whatchanged.
I tried to do without the ugly "script is calling script is calling
program" idiom. That is why my attempt at git-annotate (see attached) is
so slow.
I have been thinking of adding a "follow this file" mode to git-rev-list,
which just ignores all children of merges that used the file directly from
one of the other children. Exactly because then you could have a
git-rev-list that prunes based on filename, and would potentially be a lot
more efficient (not follow the "uninteresting" side of a merge).
Let's sit and see if people pick it up at all. If yes, I'd rather rewrite
the whole thing in C eventually (it is in perl now and uses hashes quite
extensively...).
P.S.: My only unsolved problem is that git-whatchanged sometimes shows
the diffs in the wrong order (clock-skew problem?)
Nope, this is a direct result of two branches modifying the same file in
parallel. There is no "right" or "wrong" order.
Exactly. But there is a "you probably meant that": the branch in which it
was modified last (not counting merges, of course).
Notes:
- You can either annotate by commit (this is the default), or show
some other informations with the "-f" flag: Try "-f author,commit:8".
- If you don't specify any files, it assumes you mean all files
(Attention: slow).
- You can start at a commit instead of the current state by specifying
"-c other_commit".
- The list of commits is traversed as output by git-rev-list, i.e.
chronologically. Each line is marked with the commit whose parent does not
contain that line.
- I am not at all sure if my handling of merges is sane. The logic is like
this: If the commit is parent to more than one commit (i.e. a merge), then
the touched lines are tentatively marked as changed in that commit, but
are possibly overridden at a later stage.
Ciao,
Dscho
#!/usr/bin/perl
use Getopt::Std;
sub usage() {
print STDERR 'Usage: ${\basename $0} [-s] [-f format] [-c commit]
[files...]
-s only look at first parent in case of a merge
-f format revision format (e.g. "author,commit:8")
-c commit start looking at this commit
';
exit(1);
}
getopts("hsf:c:") or usage();
$opt_h && usage();
$first_parent_only=$opt_s;
sub read_file ($) {
my $file=$_[0];
open IN, $file || return 1;
$orig_line_count=0;
@lines=();
@line_handled=();
@revisions=();
while(<IN>) {
$orig_line_count++;
$lines[$orig_line_count]=$_;
}
close IN;
$orig_line_count>0 || return 2;
@mapping[1..$orig_line_count]=(1..$orig_line_count);
return 0;
}
sub init_file($$) {
my $file=$_[0];
my $head=$_[1];
if($head eq "") {
# read current file
my $ret=read_file("<".$file);
$ret==0 || return $ret;
$current_revision="*"x($revision_string_length-1).";";
handle_diff("git-diff-files -p ".$file."|",1);
} else {
if(`git-ls-tree $head $file`=~/^\S+\s+\S+\s+(\S+)/) {
$sha1=$1;
my $ret=read_file("git-cat-file blob ".$sha1."|");
$ret==0 || return $ret;
} else {
usage();
}
}
$file_version=0;
return 0;
}
# mark all lines still unaccounted for
sub mark_all ($) {
my $mark_lines_as_handled=$_[0];
foreach $line (@mapping) {
if($line_handled{$line}==undef) {
$revisions[$line]=get_revision();
if($mark_lines_as_handled) {
$line_handled{$line}=1;
}
}
}
if($mark_lines_as_handled) {
$orig_line_count=0;
@mapping=();
}
}
# this sub only handles unified diffs
sub handle_diff($$) {
my $diff=$_[0];
my $mark_lines_as_handled=$_[1];
open DIFF, $diff;
my @new_mapping=();
my $current_line_nr_minus=1;
my $current_line_nr_plus=1;
while(<DIFF>) {
if(/^@@ -(\d+),(\d+) \+(\d+),(\d+) @@/) {
$empty_diff=0;
$start_minus=$1;
$count_minus=$2;
$start_plus=$3;
$count_plus=$4;
# if file was created here, were finished
if($start_minus==0) {
mark_all($mark_lines_as_handled);
return;
}
# sane check
$start_minus-$current_line_nr_minus==$start_plus-$current_line_nr_plus
|| die "invalid diff:
$start_minus,$current_line_nr_minus,$start_plus,$current_line_nr_plus";
if($start_minus-$current_line_nr_minus>0) {
@new_mapping[$current_line_nr_minus..$start_minus]
[EMAIL PROTECTED];
$current_line_nr_minus=$start_minus;
$current_line_nr_plus=$start_plus;
}
while($count_minus>0 || $count_plus>0) {
$_=<DIFF>;
if(/^-/) {
$new_mapping[$current_line_nr_minus]=undef;
$current_line_nr_minus++;
$count_minus--;
} else {
if(/^\+/) {
$orig=$mapping[$current_line_nr_plus];
if($orig>0 &&
$line_handled[$orig]==undef) {
$revisions[$orig]=get_revision();
if($mark_lines_as_handled) {
$line_handled[$orig]=1;
$orig_line_count--;
if($orig_line_count==0) {
@[EMAIL
PROTECTED];
return;
}
}
}
$current_line_nr_plus++;
$count_plus--;
} else {
$orig_line_nr=$mapping[$current_line_nr_plus];
$new_mapping[$current_line_nr_minus]=$orig_line_nr;
if($orig_line_nr>0) {
# sane check
if(substr($_,1) ne
$lines[$orig_line_nr]) {
print "--\n";
print
@lines[($orig_line_nr-3)..($orig_line_nr+3)];
print "--\n";
die "invalid
diff ($orig_line_nr:$current_line_nr_plus): ".substr($_,1)." is not
".$lines[$orig_line_nr];
}
substr($_,1) eq
$lines[$orig_line_nr] ||
die "invalid
diff ($diff): ".substr($_,1)." is not
".$lines[$orig_line_nr].$lines[1..$#lines];
}
$current_line_nr_minus++;
$current_line_nr_plus++;
$count_minus--;
$count_plus--;
}
}
}
}
}
close DIFF;
$rest_lines=$#mapping-$current_line_nr_plus;
if($rest_lines>0) {
@new_mapping[$current_line_nr_minus..($current_line_nr_minus+$rest_lines)]
[EMAIL PROTECTED]($current_line_nr_plus+$rest_lines)];
}
$file_version++;
@[EMAIL PROTECTED];
return;
}
sub get_revision() {
if($current_revision eq "") {
%commit_values=('fileversion','V-'.$file_version);
$commit_values{'commit'}=$current_commit;
open COMMIT, "git-cat-file commit $current_commit|";
while(($_=<COMMIT>) && !/^$/) {
if(/^parent ([0-9a-f]{40})/) {
$commit_values{'parents'}.=$1." ";
} elsif(/^author (.*)/) {
$commit_values{'author'}=$1;
} elsif(/^committer (.*)/) {
$commit_values{'committer'}=$1;
}
}
close COMMIT;
for($i=0;$i<$#revision_format;$i+=2) {
my $temp=$commit_values{$revision_format[$i]};
my $length=length($temp);
if($length>$revision_format[$i+1]) {
$temp=substr($temp,0,$revision_format[$i+1]);
} elsif($length<$revision_format[$i+1]) {
$temp.=" "x($revision_format[$i+1]-$length);
}
$current_revision.=$temp.";";
}
}
return $current_revision;
}
sub show_current {
for($i=1;$i<=$#lines;$i++) {
print $revisions[$i].$lines[$i];
}
}
sub annotate_file($$) {
my $file=$_[0];
my $head=$_[1];
init_file($file,$head);
my @[EMAIL PROTECTED];
%commit_mappings=($commits[0]=>[EMAIL PROTECTED]);
my $index;
for($index=0;$index<$#commits;$index++) {
$current_commit=$commits[$index];
$current_revision="";
if($orig_line_count==0 || $children_count{$current_commit}==0) {
mark_all(1);
show_current();
return;
} else {
$next_commit=$commits[$index+1];
if($parent_tree{$next_commit} ne $current_commit) {
# current_commit has children, so save the line
mapping
my @[EMAIL PROTECTED]; # force copy
[EMAIL PROTECTED];
# get current_commit (which is the parent of
next_commit)
$current_commit=$parent_tree{$next_commit};
if($commit_mappings{$current_commit}==undef) {
die "fatal";
}
@[EMAIL PROTECTED];
# free memory
if($children_count{$current_commit}==1) {
$commit_mappings{$current_commit}=undef;
}
}
handle_diff("git-diff-tree -p $next_commit
$current_commit $file|",$children_count{$current_commit}==1);
# if there are multiple children, save the line mapping
if($children_count{$next_commit}>1) {
my @[EMAIL PROTECTED]; # force copy
[EMAIL PROTECTED];
}
}
}
show_current();
}
sub parse_revision_format($) {
my $format=$_[0];
$revision_string_length=0;
@revision_format=();
foreach $f (split(",",$format)) {
if($f=~/^(.*):(.*)/) {
push @revision_format,($1,$2);
} else {
push @revision_format,($f,16);
}
$revision_string_length+=$revision_format[$#revision_format]+1;
}
if($#revision_format<1) {
push @revision_format,("commit",40);
$revision_string_length=$revision_format[$#revision_format]+1;
}
}
sub get_parent_tree($) {
my $head=$_[0];
my $i;
%parent_tree=();
%children_count=();
@commits=();
$next_commit="";
open REVS, "git-rev-list --parents ".($head eq ""?"HEAD":$head)."|";
LOOP: while(<REVS>) {
if($first_parent_only && $next_commit ne "") {
while(!/^$next_commit/) {
if(!($_=<REVS>)) {
last LOOP;
}
}
}
my @list=split /[ \n]/;
push @commits, $list[0];
for($i=1;$i<=$#list;$i++) {
$parent_tree{$list[$i]}=$list[0];
}
$children_count{$list[0]}=$#list;
if($first_parent_only) {
$next_commit=$list[1];
}
}
close REVS;
}
get_parent_tree($opt_c);
parse_revision_format($opt_f);
if($#ARGV<0) {
open FILES, 'git-ls-tree -r '.($opt_c eq ''?'HEAD':$opt_c).'|';
while(<FILES>) {
if(/^\S+\s+\S+\s+\S+\s+(.*)$/) {
push @ARGV, $1;
}
}
close FILES;
}
for($i=0;$i<=$#ARGV;$i++) {
if($#ARGV>1) {
print "File: ".$ARGV[$i]."\n";
}
annotate_file($ARGV[$i],$opt_c);
}