[PATCH 00/10] [RFC] pickaxe for function names

2014-03-27 Thread David A. Dalrymple (and Bhushan G. Lodha)
This series introduces a --function-name=pattern option for git-log, intended
to search for commits which touch a function matching a certain pattern (a
feature we've seen requested and are interested in using ourselves).

This is our first attempt to patch git; we've tried to observe and follow the
community standards, but we would greatly appreciate feedback. We've been
working on this for a few weeks, and I just noticed that René Scharfe has done
conflicing (and better) refactoring work in diffcore-pickaxe.c a few days ago.
We'd be happy to rebase our changes and resolve the conflicts once René's
patches are committed to master, but we thought we may as well ask for comments
on the version we have working now.

Thanks for your time!

  .gitattributes: specify the language used
  diffcore-pickaxe.c: refactor regex compilation
  diffcore-pickaxe.c: Refactor pickaxe_fn signature
  diff.c/diff.h: expose userdiff_funcname
  diffcore-pickaxe.c: set up funcname pattern
  log: --function-name pickaxe
  xdiff: add XDL_EMIT_MOREFUNCNAMES to try harder
  xdiff: add XDL_EMIT_MOREHUNKHEADS to split hunks
  t4213: test --function-name option
  Documentation: Document --function-name usage

 .gitattributes |   2 +-
 Documentation/diff-options.txt |   9 +++
 Documentation/gitdiffcore.txt  |  17 -
 builtin/log.c  |   2 +-
 diff.c |  13 +++-
 diff.h |   3 +
 diffcore-pickaxe.c | 162 
+++---
 revision.c |   3 +-
 t/t4213-log-function-name.sh   |  73 +
 xdiff/xdiff.h  |   2 +
 xdiff/xdiffi.c |   2 +-
 xdiff/xemit.c  |  99 ++--
 xdiff/xemit.h  |   4 +-
 13 files changed, 323 insertions(+), 68 deletions(-)
--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 09/10] t4213: test --function-name option

2014-03-27 Thread David A. Dalrymple (and Bhushan G. Lodha)
From: Bhushan G. Lodha  David A. Dalrymple dad-...@mit.edu

This test builds a sample C file, adding and removing functions, and
checks that the right commits are filtered by --function-name matching.

Signed-off-by: David Dalrymple (on zayin) davi...@alum.mit.edu
---
 t/t4213-log-function-name.sh | 73 
 1 file changed, 73 insertions(+)
 create mode 100755 t/t4213-log-function-name.sh

diff --git a/t/t4213-log-function-name.sh b/t/t4213-log-function-name.sh
new file mode 100755
index 000..1243ce5
--- /dev/null
+++ b/t/t4213-log-function-name.sh
@@ -0,0 +1,73 @@
+#!/bin/sh
+
+test_description='log --function-name'
+. ./test-lib.sh
+
+test_expect_success setup '
+   echo * diff=cpp  .gitattributes
+
+   file 
+   git add file 
+   test_tick 
+   git commit -m initial 
+
+   printf int main(){\n\treturn 0;\n}\n  file 
+   test_tick 
+   git commit -am second
+
+   printf void newfunc(){\n\treturn;\n}\n  file 
+   test_tick 
+   git commit -am third
+
+   printf void newfunc2(){\n\treturn;\n}\n | cat - file  temp 
+   mv temp file 
+   test_tick 
+   git commit -am fourth
+
+   printf void newfunc3(){\n\treturn;\n}\n | cat - file  temp 
+   mv temp file 
+   test_tick 
+   git commit -am fifth
+
+   sed -i -e s/void newfunc2/void newfunc4/ file 
+   test_tick 
+   git commit -am sixth
+'
+
+test_expect_success 'log --function-name=main' '
+   git log --function-name=main actual 
+   git log --grep=second expect 
+   test_cmp expect actual
+'
+
+test_expect_success 'log --function-name newfunc\W' '
+   git log --function-name newfunc\W actual 
+   git log --grep=third expect 
+   test_cmp expect actual
+'
+
+test_expect_success 'log --function-name newfunc2' '
+   git log --function-name newfunc2 actual 
+   git log -E --grep sixth|fourth expect 
+   test_cmp expect actual
+'
+
+test_expect_success 'log --function-name newfunc3' '
+   git log --function-name newfunc3 actual 
+   git log --grep=fifth expect 
+   test_cmp expect actual
+'
+
+test_expect_success 'log --function-name newfunc4' '
+   git log --function-name newfunc4 actual 
+   git log --grep=sixth expect 
+   test_cmp expect actual
+'
+
+test_expect_success 'log --function-name newfunc' '
+   git log --function-name newfunc actual 
+   git log -E --grep third|fourth|fifth|sixth expect 
+   test_cmp expect actual
+'
+
+test_done
-- 
1.7.12.4 (Apple Git-37)

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 05/10] diffcore-pickaxe.c: set up funcname pattern

2014-03-27 Thread David A. Dalrymple (and Bhushan G. Lodha)
From: Bhushan G. Lodha  David A. Dalrymple dad-...@mit.edu

We use userdiff_funcname to make the filetype-dependent function name
pattern available to pickaxe functions.

Signed-off-by: David Dalrymple (on zayin) davi...@alum.mit.edu
---
 diffcore-pickaxe.c | 9 +
 1 file changed, 9 insertions(+)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 7e65095..103fe6c 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -7,10 +7,12 @@
 #include diffcore.h
 #include xdiff-interface.h
 #include kwset.h
+#include userdiff.h
 
 struct fn_options {
regex_t *regex;
kwset_t kws;
+   const struct userdiff_funcname *funcname_pattern;
 };
 
 typedef int (*pickaxe_fn)(mmfile_t *one, mmfile_t *two,
@@ -224,6 +226,13 @@ static int pickaxe_match(struct diff_filepair *p, struct 
diff_options *o,
if (textconv_one == textconv_two  diff_unmodified_pair(p))
return 0;
 
+   const struct userdiff_funcname *funcname_pattern;
+   funcname_pattern = diff_funcname_pattern(p-one);
+   if (!funcname_pattern)
+   funcname_pattern = diff_funcname_pattern(p-two);
+
+   fno-funcname_pattern = funcname_pattern;
+
mf1.size = fill_textconv(textconv_one, p-one, mf1.ptr);
mf2.size = fill_textconv(textconv_two, p-two, mf2.ptr);
 
-- 
1.7.12.4 (Apple Git-37)

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 06/10] log: --function-name pickaxe

2014-03-27 Thread David A. Dalrymple (and Bhushan G. Lodha)
From: Bhushan G. Lodha  David A. Dalrymple dad-...@mit.edu

This is similar to the pickaxe grep option (-G), but applies the
provided regex only to diff hunk headers, thereby showing only those
commits which affect a function with a definition line matching the
pattern. These are functions in the same sense as with
--function-context, i.e., they may be classes, structs, etc. depending
on the programming-language-specific pattern specified by the diff
attribute in .gitattributes.

builtin/log.c:
as with pickaxe, set always_show_header when using --function-name
diff.c:
parse option; as with pickaxe, always set the RECURSIVE option
for --function-name
diff.h:
include funcname field in struct diff_options
diffcore-pickaxe.c:
implementation of --function-name filtering (diffcore_funcname), like
the existing diffcore_pickaxe_grep and diffcore_pickaxe_count
revision.c:
as with pickaxe, set revs-diff to always generate diffs when
using --function-name

Signed-off-by: David Dalrymple (on zayin) davi...@alum.mit.edu
---
 builtin/log.c  |  2 +-
 diff.c |  8 +--
 diff.h |  1 +
 diffcore-pickaxe.c | 69 --
 revision.c |  3 ++-
 5 files changed, 77 insertions(+), 6 deletions(-)

diff --git a/builtin/log.c b/builtin/log.c
index b97373d..78694de 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -158,7 +158,7 @@ static void cmd_log_init_finish(int argc, const char 
**argv, const char *prefix,
if (rev-show_notes)
init_display_notes(rev-notes_opt);
 
-   if (rev-diffopt.pickaxe || rev-diffopt.filter)
+   if (rev-diffopt.pickaxe || rev-diffopt.filter || 
rev-diffopt.funcname)
rev-always_show_header = 0;
if (DIFF_OPT_TST(rev-diffopt, FOLLOW_RENAMES)) {
rev-always_show_header = 0;
diff --git a/diff.c b/diff.c
index f978ee7..2f6dbc1 100644
--- a/diff.c
+++ b/diff.c
@@ -3298,7 +3298,7 @@ void diff_setup_done(struct diff_options *options)
/*
 * Also pickaxe would not work very well if you do not say recursive
 */
-   if (options-pickaxe)
+   if (options-pickaxe || options-funcname)
DIFF_OPT_SET(options, RECURSIVE);
/*
 * When patches are generated, submodules diffed against the work tree
@@ -3821,6 +3821,10 @@ int diff_opt_parse(struct diff_options *options, const 
char **av, int ac)
options-orderfile = optarg;
return argcount;
}
+   else if ((argcount = parse_long_opt(function-name, av, optarg))) {
+   options-funcname = optarg;
+   return argcount;
+   }
else if ((argcount = parse_long_opt(diff-filter, av, optarg))) {
int offending = parse_diff_filter_opt(optarg, options);
if (offending)
@@ -4768,7 +4772,7 @@ void diffcore_std(struct diff_options *options)
if (options-break_opt != -1)
diffcore_merge_broken();
}
-   if (options-pickaxe)
+   if (options-pickaxe || options-funcname)
diffcore_pickaxe(options);
if (options-orderfile)
diffcore_order(options-orderfile);
diff --git a/diff.h b/diff.h
index 9e96fc9..0fd5f1d 100644
--- a/diff.h
+++ b/diff.h
@@ -107,6 +107,7 @@ enum diff_words_type {
 struct diff_options {
const char *orderfile;
const char *pickaxe;
+   const char *funcname;
const char *single_follow;
const char *a_prefix, *b_prefix;
unsigned flags;
diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 103fe6c..259a8fa 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -67,6 +67,12 @@ struct diffgrep_cb {
int hit;
 };
 
+struct funcname_cb {
+   struct userdiff_funcname *pattern;
+   regex_t *regex;
+   int hit;
+};
+
 static void diffgrep_consume(void *priv, char *line, unsigned long len)
 {
struct diffgrep_cb *data = priv;
@@ -88,6 +94,20 @@ static void diffgrep_consume(void *priv, char *line, 
unsigned long len)
line[len] = hold;
 }
 
+static void match_funcname(void *priv, char *line, unsigned long len)
+{
+   regmatch_t regmatch;
+   int hold;
+   struct funcname_cb *data = priv;
+   hold = line[len];
+   line[len] = '\0';
+
+   if (line[0] == '@'  line[1] == '@')
+   if (!regexec(data-regex, line, 1, regmatch, 0))
+   data-hit = 1;
+   line[len] = hold;
+}
+
 static int diff_grep(mmfile_t *one, mmfile_t *two,
 struct diff_options *o,
 struct fn_options *fno)
@@ -117,6 +137,38 @@ static int diff_grep(mmfile_t *one, mmfile_t *two,
return ecbdata.hit;
 }
 
+static int diff_funcname_filter(mmfile_t *one, mmfile_t *two,
+   struct diff_options *o,
+   struct fn_options *fno

[PATCH 04/10] diff.c/diff.h: expose userdiff_funcname

2014-03-27 Thread David A. Dalrymple (and Bhushan G. Lodha)
From: Bhushan G. Lodha  David A. Dalrymple dad-...@mit.edu

The functionality of userdiff_funcname (determining the language in use
for a given file and setting up patterns to match function names in
that language) is useful outside of diff.c, so here we remove its static
specifier and declare it in diff.h.

Signed-off-by: David Dalrymple (on zayin) davi...@alum.mit.edu
---
 diff.c | 2 +-
 diff.h | 2 ++
 2 files changed, 3 insertions(+), 1 deletion(-)

diff --git a/diff.c b/diff.c
index e343191..f978ee7 100644
--- a/diff.c
+++ b/diff.c
@@ -2203,7 +2203,7 @@ int diff_filespec_is_binary(struct diff_filespec *one)
return one-is_binary;
 }
 
-static const struct userdiff_funcname *diff_funcname_pattern(struct 
diff_filespec *one)
+const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespec 
*one)
 {
diff_filespec_load_driver(one);
return one-driver-funcname.pattern ? one-driver-funcname : NULL;
diff --git a/diff.h b/diff.h
index a24a767..9e96fc9 100644
--- a/diff.h
+++ b/diff.h
@@ -349,4 +349,6 @@ extern int print_stat_summary(FILE *fp, int files,
  int insertions, int deletions);
 extern void setup_diff_pager(struct diff_options *);
 
+const struct userdiff_funcname *diff_funcname_pattern(struct diff_filespec *);
+
 #endif /* DIFF_H */
-- 
1.7.12.4 (Apple Git-37)

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 02/10] diffcore-pickaxe.c: refactor regex compilation

2014-03-27 Thread David A. Dalrymple (and Bhushan G. Lodha)
From: Bhushan G. Lodha  David A. Dalrymple dad-...@mit.edu

In this file, two functions use identical blocks of code to call the
POSIX regex compiling function and handle a possible error. Here we
factor that block into its own function, in anticipation of using the
same code a third time.

Signed-off-by: David Dalrymple (on zayin) davi...@alum.mit.edu
---
 diffcore-pickaxe.c | 33 -
 1 file changed, 16 insertions(+), 17 deletions(-)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 401eb72..0d36a3c 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -12,6 +12,8 @@ typedef int (*pickaxe_fn)(mmfile_t *one, mmfile_t *two,
  struct diff_options *o,
  regex_t *regexp, kwset_t kws);
 
+static void compile_regex(regex_t *r, const char *s, int cflags);
+
 static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
 regex_t *regexp, kwset_t kws, pickaxe_fn fn);
 
@@ -110,20 +112,13 @@ static int diff_grep(mmfile_t *one, mmfile_t *two,
 
 static void diffcore_pickaxe_grep(struct diff_options *o)
 {
-   int err;
regex_t regex;
int cflags = REG_EXTENDED | REG_NEWLINE;
 
if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE))
cflags |= REG_ICASE;
 
-   err = regcomp(regex, o-pickaxe, cflags);
-   if (err) {
-   char errbuf[1024];
-   regerror(err, regex, errbuf, 1024);
-   regfree(regex);
-   die(invalid regex: %s, errbuf);
-   }
+   compile_regex(regex, o-pickaxe, cflags);
 
pickaxe(diff_queued_diff, o, regex, NULL, diff_grep);
 
@@ -180,6 +175,18 @@ static int has_changes(mmfile_t *one, mmfile_t *two,
return one_contains != two_contains;
 }
 
+static void compile_regex(regex_t *r, const char *s, int cflags)
+{
+   int err;
+   err = regcomp(r, s, cflags);
+   if (err) {
+   char errbuf[1024];
+   regerror(err, r, errbuf, 1024);
+   regfree(r);
+   die(invalid regex: %s, errbuf);
+   }
+}
+
 static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
 regex_t *regexp, kwset_t kws, pickaxe_fn fn)
 {
@@ -236,15 +243,7 @@ static void diffcore_pickaxe_count(struct diff_options *o)
kwset_t kws = NULL;
 
if (opts  DIFF_PICKAXE_REGEX) {
-   int err;
-   err = regcomp(regex, needle, REG_EXTENDED | REG_NEWLINE);
-   if (err) {
-   /* The POSIX.2 people are surely sick */
-   char errbuf[1024];
-   regerror(err, regex, errbuf, 1024);
-   regfree(regex);
-   die(invalid regex: %s, errbuf);
-   }
+   compile_regex(regex, needle, REG_EXTENDED | REG_NEWLINE);
regexp = regex;
} else {
kws = kwsalloc(DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE)
-- 
1.7.12.4 (Apple Git-37)

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 03/10] diffcore-pickaxe.c: Refactor pickaxe_fn signature

2014-03-27 Thread David A. Dalrymple (and Bhushan G. Lodha)
From: Bhushan G. Lodha  David A. Dalrymple dad-...@mit.edu

This function type previously accepted separate regex_t and kwset_t
parameters, which conceptually go together. Here we create a struct to
encapsulate them, in anticipation of adding a third field that
pickaxe_fn's may require.

This parallels the existing diffgrep_cb structure for passing possibly
relevant values through to the callbacks invoked by xdi_diff_outf.

Signed-off-by: David Dalrymple (on zayin) davi...@alum.mit.edu
---
 diffcore-pickaxe.c | 50 ++
 1 file changed, 30 insertions(+), 20 deletions(-)

diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 0d36a3c..7e65095 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -8,17 +8,22 @@
 #include xdiff-interface.h
 #include kwset.h
 
+struct fn_options {
+   regex_t *regex;
+   kwset_t kws;
+};
+
 typedef int (*pickaxe_fn)(mmfile_t *one, mmfile_t *two,
  struct diff_options *o,
- regex_t *regexp, kwset_t kws);
+ struct fn_options *fno);
 
 static void compile_regex(regex_t *r, const char *s, int cflags);
 
 static int pickaxe_match(struct diff_filepair *p, struct diff_options *o,
-regex_t *regexp, kwset_t kws, pickaxe_fn fn);
+pickaxe_fn fn, struct fn_options *fno);
 
 static void pickaxe(struct diff_queue_struct *q, struct diff_options *o,
-   regex_t *regexp, kwset_t kws, pickaxe_fn fn)
+   pickaxe_fn fn, struct fn_options *fno)
 {
int i;
struct diff_queue_struct outq;
@@ -29,7 +34,7 @@ static void pickaxe(struct diff_queue_struct *q, struct 
diff_options *o,
/* Showing the whole changeset if needle exists */
for (i = 0; i  q-nr; i++) {
struct diff_filepair *p = q-queue[i];
-   if (pickaxe_match(p, o, regexp, kws, fn))
+   if (pickaxe_match(p, o, fn, fno))
return; /* do not munge the queue */
}
 
@@ -44,7 +49,7 @@ static void pickaxe(struct diff_queue_struct *q, struct 
diff_options *o,
/* Showing only the filepairs that has the needle */
for (i = 0; i  q-nr; i++) {
struct diff_filepair *p = q-queue[i];
-   if (pickaxe_match(p, o, regexp, kws, fn))
+   if (pickaxe_match(p, o, fn, fno))
diff_q(outq, p);
else
diff_free_filepair(p);
@@ -83,7 +88,7 @@ static void diffgrep_consume(void *priv, char *line, unsigned 
long len)
 
 static int diff_grep(mmfile_t *one, mmfile_t *two,
 struct diff_options *o,
-regex_t *regexp, kwset_t kws)
+struct fn_options *fno)
 {
regmatch_t regmatch;
struct diffgrep_cb ecbdata;
@@ -91,9 +96,9 @@ static int diff_grep(mmfile_t *one, mmfile_t *two,
xdemitconf_t xecfg;
 
if (!one)
-   return !regexec(regexp, two-ptr, 1, regmatch, 0);
+   return !regexec(fno-regex, two-ptr, 1, regmatch, 0);
if (!two)
-   return !regexec(regexp, one-ptr, 1, regmatch, 0);
+   return !regexec(fno-regex, one-ptr, 1, regmatch, 0);
 
/*
 * We have both sides; need to run textual diff and see if
@@ -101,7 +106,7 @@ static int diff_grep(mmfile_t *one, mmfile_t *two,
 */
memset(xpp, 0, sizeof(xpp));
memset(xecfg, 0, sizeof(xecfg));
-   ecbdata.regexp = regexp;
+   ecbdata.regexp = fno-regex;
ecbdata.hit = 0;
xecfg.ctxlen = o-context;
xecfg.interhunkctxlen = o-interhunkcontext;
@@ -113,6 +118,7 @@ static int diff_grep(mmfile_t *one, mmfile_t *two,
 static void diffcore_pickaxe_grep(struct diff_options *o)
 {
regex_t regex;
+   struct fn_options fno;
int cflags = REG_EXTENDED | REG_NEWLINE;
 
if (DIFF_OPT_TST(o, PICKAXE_IGNORE_CASE))
@@ -120,13 +126,14 @@ static void diffcore_pickaxe_grep(struct diff_options *o)
 
compile_regex(regex, o-pickaxe, cflags);
 
-   pickaxe(diff_queued_diff, o, regex, NULL, diff_grep);
+   fno.regex = regex;
+   pickaxe(diff_queued_diff, o, diff_grep, fno);
 
regfree(regex);
return;
 }
 
-static unsigned int contains(mmfile_t *mf, regex_t *regexp, kwset_t kws)
+static unsigned int contains(mmfile_t *mf, struct fn_options *fno)
 {
unsigned int cnt;
unsigned long sz;
@@ -136,12 +143,12 @@ static unsigned int contains(mmfile_t *mf, regex_t 
*regexp, kwset_t kws)
data = mf-ptr;
cnt = 0;
 
-   if (regexp) {
+   if (fno-regex) {
regmatch_t regmatch;
int flags = 0;
 
assert(data[sz] == '\0');
-   while (*data  !regexec(regexp, data, 1

[PATCH 10/10] Documentation: Document --function-name usage

2014-03-27 Thread David A. Dalrymple (and Bhushan G. Lodha)
From: Bhushan Lodha  David A. Dalrymple dad-...@mit.edu

Signed-off-by: David Dalrymple (on zayin) davi...@alum.mit.edu
---
 Documentation/diff-options.txt |  9 +
 Documentation/gitdiffcore.txt  | 17 ++---
 2 files changed, 23 insertions(+), 3 deletions(-)

diff --git a/Documentation/diff-options.txt b/Documentation/diff-options.txt
index 9b37b2a..a778dff 100644
--- a/Documentation/diff-options.txt
+++ b/Documentation/diff-options.txt
@@ -427,6 +427,15 @@ information.
 --pickaxe-regex::
Treat the string given to `-S` as an extended POSIX regular
expression to match.
+
+--function-nameregex::
+   Look for differences whose patch text contains hunk headers that match
+   regex. This can be useful to locate changes to a particular function
+   or other semantic element in a program, since hunk headers are intended
+   to indicate the function context (in the sense of
+   `--function-context`) in which the particular change occurs. The
+   function context is determined by the diff driver corresponding to the
+   file in question; see linkgit:gitattributes[7] for details.
 endif::git-format-patch[]
 
 -Oorderfile::
diff --git a/Documentation/gitdiffcore.txt b/Documentation/gitdiffcore.txt
index c8b3e51..b8477ce 100644
--- a/Documentation/gitdiffcore.txt
+++ b/Documentation/gitdiffcore.txt
@@ -222,10 +222,11 @@ version prefixed with '+'.
 diffcore-pickaxe: For Detecting Addition/Deletion of Specified String
 -
 
-This transformation limits the set of filepairs to those that change
+This transformation limits the set of filepairs to those that involve
 specified strings between the preimage and the postimage in a certain
-way.  -Sblock of text and -Gregular expression options are used to
-specify different ways these strings are sought.
+way.  -Sblock of text, -Gregular expression, and
+--function-nameregular expression options are used to specify
+different ways these strings are sought.
 
 -Sblock of text detects filepairs whose preimage and postimage
 have different number of occurrences of the specified block of text.
@@ -251,6 +252,16 @@ criterion in a changeset, the entire changeset is kept.  
This behavior
 is designed to make reviewing changes in the context of the whole
 changeset easier.
 
+--function-nameregular expression detects filepairs whose textual
+diff contains a hunk header that matches the given regular expression.
+The hunk header is generated via the diff driver specified in
+`.gitattributes`, and is intended to reflect the function context
+(in the sense of `--function-context`) in which the change occurs,
+with programming-language-dependent heuristics. As of now, the
+programming language is not auto-detected in any way. Also note that
+hunks whose headers do not match the regular expression are not
+currently filtered out; this is only a filepair filter.
+
 diffcore-order: For Sorting the Output Based on Filenames
 -
 
-- 
1.7.12.4 (Apple Git-37)

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 01/10] .gitattributes: specify the language used

2014-03-27 Thread David A. Dalrymple (and Bhushan G. Lodha)
From: Bhushan G. Lodha  David A. Dalrymple dad-...@mit.edu

Since git can intelligently emit diff hunk headers based on the
programming language of each file, assuming that the language is
specified in .gitattributes, it makes sense to specify our own
language (cpp) in our own .gitattributes file.

Signed-off-by: David Dalrymple (on zayin) davi...@alum.mit.edu
---
 .gitattributes | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitattributes b/.gitattributes
index 5e98806..320e33c 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,3 @@
 * whitespace=!indent,trail,space
-*.[ch] whitespace=indent,trail,space
+*.[ch] whitespace=indent,trail,space diff=cpp
 *.sh whitespace=indent,trail,space
-- 
1.7.12.4 (Apple Git-37)

--
To unsubscribe from this list: send the line unsubscribe git in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html


[PATCH 07/10] xdiff: add XDL_EMIT_MOREFUNCNAMES

2014-03-27 Thread David A. Dalrymple (and Bhushan G. Lodha)
From: Bhushan G. Lodha  David A. Dalrymple dad-...@mit.edu

For filtering commits by function name, it's useful to identify the
function name in cases such as adding a new function to a file (where
the default functionality will not emit a function name in the hunk
header, because it isn't part of the context).

This adds a flag asking xdiff to be more aggressive in finding function
names to emit, and turns the flag on when the --function-name option is
in use.

Signed-off-by: David Dalrymple (on zayin) davi...@alum.mit.edu
---
 diff.c |  2 ++
 diffcore-pickaxe.c |  2 +-
 xdiff/xdiff.h  |  1 +
 xdiff/xemit.c  | 39 +++
 4 files changed, 27 insertions(+), 17 deletions(-)

diff --git a/diff.c b/diff.c
index 2f6dbc1..914b4a2 100644
--- a/diff.c
+++ b/diff.c
@@ -2380,6 +2380,8 @@ static void builtin_diff(const char *name_a,
xecfg.ctxlen = o-context;
xecfg.interhunkctxlen = o-interhunkcontext;
xecfg.flags = XDL_EMIT_FUNCNAMES;
+   if (o-funcname)
+   xecfg.flags |= XDL_EMIT_MOREFUNCNAMES;
if (DIFF_OPT_TST(o, FUNCCONTEXT))
xecfg.flags |= XDL_EMIT_FUNCCONTEXT;
if (pe)
diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index 259a8fa..ab31c18 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -164,7 +164,7 @@ static int diff_funcname_filter(mmfile_t *one, mmfile_t 
*two,
xecfg.interhunkctxlen = o-interhunkcontext;
if (!(one  two))
xecfg.flags = XDL_EMIT_FUNCCONTEXT;
-   xecfg.flags |= XDL_EMIT_FUNCNAMES;
+   xecfg.flags |= XDL_EMIT_FUNCNAMES | XDL_EMIT_MOREFUNCNAMES;
xdi_diff_outf(one, two, match_funcname, ecbdata, xpp, xecfg);
return ecbdata.hit;
 }
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index c033991..469bded 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -44,6 +44,7 @@
 #define XDL_EMIT_FUNCNAMES (1  0)
 #define XDL_EMIT_COMMON (1  1)
 #define XDL_EMIT_FUNCCONTEXT (1  2)
+#define XDL_EMIT_MOREFUNCNAMES (1  3)
 
 #define XDL_MMB_READONLY (1  0)
 
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index 4266ada..0ddb094 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -23,6 +23,10 @@
 #include xinclude.h
 
 
+struct func_line {
+   long len;
+   char buf[80];
+};
 
 
 static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec);
@@ -135,12 +139,7 @@ static int xdl_emit_common(xdfenv_t *xe, xdchange_t *xscr, 
xdemitcb_t *ecb,
return 0;
 }
 
-struct func_line {
-   long len;
-   char buf[80];
-};
-
-static long get_func_line(xdfenv_t *xe, xdemitconf_t const *xecfg,
+static long get_func_line(xdfile_t *xdf, xdemitconf_t const *xecfg,
  struct func_line *func_line, long start, long limit)
 {
find_func_t ff = xecfg-find_func ? xecfg-find_func : def_ff;
@@ -150,9 +149,9 @@ static long get_func_line(xdfenv_t *xe, xdemitconf_t const 
*xecfg,
buf = func_line ? func_line-buf : dummy;
size = func_line ? sizeof(func_line-buf) : sizeof(dummy);
 
-   for (l = start; l != limit  0 = l  l  xe-xdf1.nrec; l += step) {
+   for (l = start; l != limit  0 = l  l  xdf-nrec; l += step) {
const char *rec;
-   long reclen = xdl_get_rec(xe-xdf1, l, rec);
+   long reclen = xdl_get_rec(xdf, l, rec);
long len = ff(rec, reclen, buf, size, xecfg-find_func_priv);
if (len = 0) {
if (func_line)
@@ -167,7 +166,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, 
xdemitcb_t *ecb,
  xdemitconf_t const *xecfg) {
long s1, s2, e1, e2, lctx;
xdchange_t *xch, *xche;
-   long funclineprev = -1;
+   long funclineprev1 = -1, funclineprev2 = -1;
struct func_line func_line = { 0 };
 
if (xecfg-flags  XDL_EMIT_COMMON)
@@ -182,7 +181,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, 
xdemitcb_t *ecb,
s2 = XDL_MAX(xch-i2 - xecfg-ctxlen, 0);
 
if (xecfg-flags  XDL_EMIT_FUNCCONTEXT) {
-   long fs1 = get_func_line(xe, xecfg, NULL, xch-i1, -1);
+   long fs1 = get_func_line(xe-xdf1, xecfg, NULL, 
xch-i1, -1);
if (fs1  0)
fs1 = 0;
if (fs1  s1) {
@@ -200,7 +199,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, 
xdemitcb_t *ecb,
e2 = xche-i2 + xche-chg2 + lctx;
 
if (xecfg-flags  XDL_EMIT_FUNCCONTEXT) {
-   long fe1 = get_func_line(xe, xecfg, NULL,
+   long fe1 = get_func_line(xe-xdf1, xecfg, NULL,
 xche-i1 + xche-chg1,
 xe-xdf1.nrec);
if (fe1  0)
@@ -218,7 +217,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, 
xdemitcb_t *ecb

[PATCH 08/10] xdiff: add XDL_EMIT_MOREHUNKHEADS

2014-03-27 Thread David A. Dalrymple (and Bhushan G. Lodha)
From: Bhushan G. Lodha  David A. Dalrymple dad-...@mit.edu

For filtering by function names, it's useful to split hunks whenever a
function line is encountered, so that each function name being deleted
or inserted gets its own hunk header (which then can be easily detected
by the filter).

This adds a flag, XDL_EMIT_MOREHUNKHEADS, which triggers this
nonstandard behavior, and enables it only in case the --function-name
option is being used.

Signed-off-by: David Dalrymple (on zayin) davi...@alum.mit.edu
---
 diff.c |  3 ++-
 diffcore-pickaxe.c |  3 ++-
 xdiff/xdiff.h  |  1 +
 xdiff/xdiffi.c |  2 +-
 xdiff/xemit.c  | 60 --
 xdiff/xemit.h  |  4 +++-
 6 files changed, 67 insertions(+), 6 deletions(-)

diff --git a/diff.c b/diff.c
index 914b4a2..a86206c 100644
--- a/diff.c
+++ b/diff.c
@@ -2381,7 +2381,8 @@ static void builtin_diff(const char *name_a,
xecfg.interhunkctxlen = o-interhunkcontext;
xecfg.flags = XDL_EMIT_FUNCNAMES;
if (o-funcname)
-   xecfg.flags |= XDL_EMIT_MOREFUNCNAMES;
+   xecfg.flags |= XDL_EMIT_MOREFUNCNAMES
+   | XDL_EMIT_MOREHUNKHEADS;
if (DIFF_OPT_TST(o, FUNCCONTEXT))
xecfg.flags |= XDL_EMIT_FUNCCONTEXT;
if (pe)
diff --git a/diffcore-pickaxe.c b/diffcore-pickaxe.c
index ab31c18..d9f4c30 100644
--- a/diffcore-pickaxe.c
+++ b/diffcore-pickaxe.c
@@ -164,7 +164,8 @@ static int diff_funcname_filter(mmfile_t *one, mmfile_t 
*two,
xecfg.interhunkctxlen = o-interhunkcontext;
if (!(one  two))
xecfg.flags = XDL_EMIT_FUNCCONTEXT;
-   xecfg.flags |= XDL_EMIT_FUNCNAMES | XDL_EMIT_MOREFUNCNAMES;
+   xecfg.flags |= XDL_EMIT_FUNCNAMES | XDL_EMIT_MOREFUNCNAMES
+   | XDL_EMIT_MOREHUNKHEADS;
xdi_diff_outf(one, two, match_funcname, ecbdata, xpp, xecfg);
return ecbdata.hit;
 }
diff --git a/xdiff/xdiff.h b/xdiff/xdiff.h
index 469bded..787c376 100644
--- a/xdiff/xdiff.h
+++ b/xdiff/xdiff.h
@@ -45,6 +45,7 @@
 #define XDL_EMIT_COMMON (1  1)
 #define XDL_EMIT_FUNCCONTEXT (1  2)
 #define XDL_EMIT_MOREFUNCNAMES (1  3)
+#define XDL_EMIT_MOREHUNKHEADS (1  4)
 
 #define XDL_MMB_READONLY (1  0)
 
diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 2358a2d..c29804e 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -545,7 +545,7 @@ static int xdl_call_hunk_func(xdfenv_t *xe, xdchange_t 
*xscr, xdemitcb_t *ecb,
xdchange_t *xch, *xche;
 
for (xch = xscr; xch; xch = xche-next) {
-   xche = xdl_get_hunk(xch, xecfg);
+   xche = xdl_get_hunk(xe, xch, xecfg);
if (!xch)
break;
if (xecfg-hunk_func(xch-i1, xche-i1 + xche-chg1 - xch-i1,
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index 0ddb094..f49eaaf 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -29,6 +29,9 @@ struct func_line {
 };
 
 
+static long get_func_line(xdfile_t *xdf, xdemitconf_t const *xecfg,
+ struct func_line *func_line, long start, long limit);
+
 static long xdl_get_rec(xdfile_t *xdf, long ri, char const **rec);
 static int xdl_emit_record(xdfile_t *xdf, long ri, char const *pre, xdemitcb_t 
*ecb);
 
@@ -62,7 +65,7 @@ static int xdl_emit_record(xdfile_t *xdf, long ri, char const 
*pre, xdemitcb_t *
  * inside the differential hunk according to the specified configuration.
  * Also advance xscr if the first changes must be discarded.
  */
-xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t const *xecfg)
+xdchange_t *xdl_get_hunk(xdfenv_t *xe, xdchange_t **xscr, xdemitconf_t const 
*xecfg)
 {
xdchange_t *xch, *xchp, *lxch;
long max_common = 2 * xecfg-ctxlen + xecfg-interhunkctxlen;
@@ -83,6 +86,59 @@ xdchange_t *xdl_get_hunk(xdchange_t **xscr, xdemitconf_t 
const *xecfg)
 
lxch = *xscr;
 
+   if (xecfg-flags  XDL_EMIT_MOREHUNKHEADS)
+   for (xch = *xscr; xch; xch=xch-next) {
+   /*
+* If a current change contains a func_line, end this
+* hunk immediately before and create a new hunk
+* starting from that line.
+*/
+   long fl_in_xch1 = get_func_line(xe-xdf1, xecfg, NULL,
+   xch-i1, xch-i1+xch-chg1);
+   long fl_in_xch2 = get_func_line(xe-xdf2, xecfg, NULL,
+   xch-i2, xch-i2+xch-chg2);
+   if (fl_in_xch1 = xch-i1  fl_in_xch2 = xch-i2) {
+   xdchange_t *new_next =
+   (xdchange_t 
*)xdl_malloc(sizeof(xdchange_t));
+   new_next-i1 = xch-i1+xch-chg1;
+   new_next-chg1 = 0