[RFC/PATCH 11/17] diff.c: convert emit_binary_diff_body to use emit_line_*

2016-09-12 Thread Stefan Beller
In a later patch, I want to propose an option to detect
moved lines in a diff, which cannot be done in a one-pass over
the diff. Instead we need to go over the whole diff twice,
because we cannot detect the first line of the two corresponding
lines (+ and -) that got moved.

So to prepare the diff machinery for two pass algorithms
(i.e. buffer it all up and then operate on the result),
move all emissions to places, such that the only emitting
function is emit_line_0.

This covers emit_binary_diff_body.

Signed-off-by: Stefan Beller 
---
 diff.c | 39 ++-
 1 file changed, 22 insertions(+), 17 deletions(-)

diff --git a/diff.c b/diff.c
index 1892c2b..ff1e829 100644
--- a/diff.c
+++ b/diff.c
@@ -2169,8 +2169,8 @@ static unsigned char *deflate_it(char *data,
return deflated;
 }
 
-static void emit_binary_diff_body(FILE *file, mmfile_t *one, mmfile_t *two,
- const char *prefix)
+static void emit_binary_diff_body(struct diff_options *o,
+ mmfile_t *one, mmfile_t *two)
 {
void *cp;
void *delta;
@@ -2199,13 +2199,12 @@ static void emit_binary_diff_body(FILE *file, mmfile_t 
*one, mmfile_t *two,
}
 
if (delta && delta_size < deflate_size) {
-   fprintf(file, "%sdelta %lu\n", prefix, orig_size);
+   emit_line_fmt(o, 0, 0, "delta %lu\n", orig_size);
free(deflated);
data = delta;
data_size = delta_size;
-   }
-   else {
-   fprintf(file, "%sliteral %lu\n", prefix, two->size);
+   } else {
+   emit_line_fmt(o, 0, 0, "literal %lu\n", two->size);
free(delta);
data = deflated;
data_size = deflate_size;
@@ -2214,8 +2213,9 @@ static void emit_binary_diff_body(FILE *file, mmfile_t 
*one, mmfile_t *two,
/* emit data encoded in base85 */
cp = data;
while (data_size) {
+   int len;
int bytes = (52 < data_size) ? 52 : data_size;
-   char line[70];
+   char line[71];
data_size -= bytes;
if (bytes <= 26)
line[0] = bytes + 'A' - 1;
@@ -2223,20 +2223,25 @@ static void emit_binary_diff_body(FILE *file, mmfile_t 
*one, mmfile_t *two,
line[0] = bytes - 26 + 'a' - 1;
encode_85(line + 1, cp, bytes);
cp = (char *) cp + bytes;
-   fprintf(file, "%s", prefix);
-   fputs(line, file);
-   fputc('\n', file);
+
+   len = strlen(line);
+   line[len++] = '\n';
+   line[len] = '\0';
+
+   emit_line(o, 0, 0, line, len);
}
-   fprintf(file, "%s\n", prefix);
+   emit_line(o, 0, 0, "\n", 1);
free(data);
 }
 
-static void emit_binary_diff(FILE *file, mmfile_t *one, mmfile_t *two,
-const char *prefix)
+static void emit_binary_diff(struct diff_options *o,
+mmfile_t *one, mmfile_t *two)
 {
-   fprintf(file, "%sGIT binary patch\n", prefix);
-   emit_binary_diff_body(file, one, two, prefix);
-   emit_binary_diff_body(file, two, one, prefix);
+   const char *s = "GIT binary patch\n";
+   const int len = strlen(s);
+   emit_line(o, 0, 0, s, len);
+   emit_binary_diff_body(o, one, two);
+   emit_binary_diff_body(o, two, one);
 }
 
 int diff_filespec_is_binary(struct diff_filespec *one)
@@ -2411,7 +2416,7 @@ static void builtin_diff(const char *name_a,
emit_line(o, 0, 0, header.buf, header.len);
strbuf_reset();
if (DIFF_OPT_TST(o, BINARY))
-   emit_binary_diff(o->file, , , line_prefix);
+   emit_binary_diff(o, , );
else
emit_line_fmt(o, 0, 0, "Binary files %s and %s 
differ\n",
  lbl[0], lbl[1]);
-- 
2.10.0.21.g1da280f.dirty



[RFC/PATCH 14/17] diff.c: convert diff_flush to use emit_line_*

2016-09-12 Thread Stefan Beller
In a later patch, I want to propose an option to detect
moved lines in a diff, which cannot be done in a one-pass over
the diff. Instead we need to go over the whole diff twice,
because we cannot detect the first line of the two corresponding
lines (+ and -) that got moved.

So to prepare the diff machinery for two pass algorithms
(i.e. buffer it all up and then operate on the result),
move all emissions to places, such that the only emitting
function is emit_line_0.

This covers diff_flush.

Signed-off-by: Stefan Beller 
---
 diff.c | 9 +
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/diff.c b/diff.c
index 3940f79..2b1ebcc 100644
--- a/diff.c
+++ b/diff.c
@@ -4737,12 +4737,13 @@ void diff_flush(struct diff_options *options)
 
if (output_format & DIFF_FORMAT_PATCH) {
if (separator) {
-   fprintf(options->file, "%s%c",
-   diff_line_prefix(options),
-   options->line_termination);
+   emit_line_0(options, NULL, NULL,
+   options->line_termination, "", 0);
if (options->stat_sep) {
/* attach patch instead of inline */
-   fputs(options->stat_sep, options->file);
+   emit_line_0(options, NULL, NULL, 0,
+   options->stat_sep,
+   strlen(options->stat_sep));
}
}
 
-- 
2.10.0.21.g1da280f.dirty



[RFC/PATCH 16/17] diff: buffer output in emit_line_0

2016-09-12 Thread Stefan Beller
emit_line_0 factors out the emission part into emit_line_emission,
and depending on the diff_options->use_buffer the emission
will be performed directly when calling emit_line_0 or after the
whole process is done, i.e. by buffering we have add the possibility
for a second pass over the whole output before doing the actual
output.

In 6440d34 (2012-03-14, diff: tweak a _copy_ of diff_options with
word-diff) we introduced a duplicate diff options struct for word
emissions as we may have different regex settings in there.

When buffering the output, we need to operate on just one buffer,
so we have to copy back the emissions of the word buffer into the main
buffer.

Unconditionally enable output via buffer in this patch as it yields
a great opportunity for testing, i.e. all the diff tests from the
test suite pass without having reordering issues (i.e. only parts
of the output got buffered, and we forgot to buffer other parts).
The test suite passes, which gives confidence that we converted all
functions to use emit_line_* for output.

Signed-off-by: Stefan Beller 
---
 diff.c | 151 +
 diff.h |  18 
 2 files changed, 142 insertions(+), 27 deletions(-)

diff --git a/diff.c b/diff.c
index 68da135..e240e89 100644
--- a/diff.c
+++ b/diff.c
@@ -449,42 +449,86 @@ static void check_blank_at_eof(mmfile_t *mf1, mmfile_t 
*mf2,
ecbdata->blank_at_eof_in_postimage = (at - l2) + 1;
 }
 
-static void emit_line_0(struct diff_options *o, const char *set, const char 
*reset,
-   int first, const char *line, int len)
+static void emit_line_emission(struct diff_options *o, struct line_emission *e)
 {
-   int has_trailing_newline, has_trailing_carriage_return;
-   int nofirst;
FILE *file = o->file;
 
fputs(diff_line_prefix(o), file);
 
+   if (e->len || e->first) {
+   if (e->set)
+   fputs(e->set, file);
+   if (e->first)
+   fputc(e->first, file);
+   if (e->whitespace_check) {
+   if (e->reset)
+   fputs(e->reset, file);
+   ws_check_emit(e->line, e->len, e->ws_rule,
+ file, e->set, e->reset, e->ws);
+   } else {
+   fwrite(e->line, e->len, 1, file);
+   if (e->reset)
+   fputs(e->reset, file);
+   }
+   }
+   if (e->has_trailing_carriage_return)
+   fputc('\r', file);
+   if (e->has_trailing_newline)
+   fputc('\n', file);
+}
+
+static void free_line_emission(struct line_emission *e)
+{
+   /*
+* No need to free set, reset, ws as they always point to the
+* diff_colors[] static array. We don't own that memory.
+*/
+   free((char*)e->line);
+}
+
+static void append_line_emission_to_buffer(struct diff_options *o,
+struct line_emission *e)
+{
+   struct line_emission *f;
+   ALLOC_GROW(o->line_buffer,
+  o->line_buffer_nr + 1,
+  o->line_buffer_alloc);
+   f = >line_buffer[o->line_buffer_nr++];
+   memcpy(f, e, sizeof(*e));
+
+   /* duplicate the line for now as it is not a stable pointer */
+   f->line = xmemdupz(e->line, e->len);
+}
+
+static void emit_line_0(struct diff_options *o, const char *set,
+   const char *reset, int first, const char *line, int len)
+{
+   int nofirst;
+   struct line_emission e = LINE_EMISSION_INIT;
+
if (len == 0) {
-   has_trailing_newline = (first == '\n');
-   has_trailing_carriage_return = (first == '\r');
-   nofirst = has_trailing_newline || has_trailing_carriage_return;
+   e.has_trailing_newline = (first == '\n');
+   e.has_trailing_carriage_return = (first == '\r');
+   nofirst = e.has_trailing_newline || 
e.has_trailing_carriage_return;
} else {
-   has_trailing_newline = (len > 0 && line[len-1] == '\n');
-   if (has_trailing_newline)
+   e.has_trailing_newline = (len > 0 && line[len-1] == '\n');
+   if (e.has_trailing_newline)
len--;
-   has_trailing_carriage_return = (len > 0 && line[len-1] == '\r');
-   if (has_trailing_carriage_return)
+   e.has_trailing_carriage_return = (len > 0 && line[len-1] == 
'\r');
+   if (e.has_trailing_carriage_return)
len--;
nofirst = 0;
}
 
-   if (len || !nofirst) {
-   if (set)
-   fputs(set, file);
-   if (!nofirst)
-   fputc(first, file);
-   fwrite(line, len, 1, file);
-   if (reset)
-   fputs(reset, 

[RFC/PATCH 15/17] diff.c: convert diff_summary to use emit_line_*

2016-09-12 Thread Stefan Beller
In a later patch, I want to propose an option to detect
moved lines in a diff, which cannot be done in a one-pass over
the diff. Instead we need to go over the whole diff twice,
because we cannot detect the first line of the two corresponding
lines (+ and -) that got moved.

So to prepare the diff machinery for two pass algorithms
(i.e. buffer it all up and then operate on the result),
move all emissions to places, such that the only emitting
function is emit_line_0.

This covers diff_summary.

Signed-off-by: Stefan Beller 
---
 diff.c | 62 +-
 1 file changed, 33 insertions(+), 29 deletions(-)

diff --git a/diff.c b/diff.c
index 2b1ebcc..68da135 100644
--- a/diff.c
+++ b/diff.c
@@ -4382,67 +4382,71 @@ static void flush_one_pair(struct diff_filepair *p, 
struct diff_options *opt)
}
 }
 
-static void show_file_mode_name(FILE *file, const char *newdelete, struct 
diff_filespec *fs)
+static void show_file_mode_name(struct diff_options *opt, const char 
*newdelete, struct diff_filespec *fs)
 {
+   struct strbuf sb = STRBUF_INIT;
if (fs->mode)
-   fprintf(file, " %s mode %06o ", newdelete, fs->mode);
+   strbuf_addf(, " %s mode %06o ", newdelete, fs->mode);
else
-   fprintf(file, " %s ", newdelete);
-   write_name_quoted(fs->path, file, '\n');
+   strbuf_addf(, " %s ", newdelete);
+
+   quote_c_style(fs->path, , NULL, 0);
+   strbuf_addch(, '\n');
+   emit_line(opt, NULL, NULL, sb.buf, sb.len);
+   strbuf_release();
 }
 
 
-static void show_mode_change(FILE *file, struct diff_filepair *p, int 
show_name,
-   const char *line_prefix)
+static void show_mode_change(struct diff_options *opt, struct diff_filepair *p,
+   int show_name)
 {
if (p->one->mode && p->two->mode && p->one->mode != p->two->mode) {
-   fprintf(file, "%s mode change %06o => %06o%c", line_prefix, 
p->one->mode,
-   p->two->mode, show_name ? ' ' : '\n');
+   struct strbuf sb = STRBUF_INIT;
if (show_name) {
-   write_name_quoted(p->two->path, file, '\n');
+   strbuf_addch(, ' ');
+   quote_c_style(p->two->path, , NULL, 0);
}
+   emit_line_fmt(opt, NULL, NULL,
+ " mode change %06o => %06o%s\n",
+ p->one->mode, p->two->mode,
+ show_name ? sb.buf : "");
+   strbuf_release();
}
 }
 
-static void show_rename_copy(FILE *file, const char *renamecopy, struct 
diff_filepair *p,
-   const char *line_prefix)
+static void show_rename_copy(struct diff_options *opt, const char *renamecopy,
+   struct diff_filepair *p)
 {
char *names = pprint_rename(p->one->path, p->two->path);
-
-   fprintf(file, " %s %s (%d%%)\n", renamecopy, names, 
similarity_index(p));
+   emit_line_fmt(opt, NULL, NULL, " %s %s (%d%%)\n", renamecopy, names, 
similarity_index(p));
free(names);
-   show_mode_change(file, p, 0, line_prefix);
+   show_mode_change(opt, p, 0);
 }
 
 static void diff_summary(struct diff_options *opt, struct diff_filepair *p)
 {
-   FILE *file = opt->file;
-   const char *line_prefix = diff_line_prefix(opt);
-
switch(p->status) {
case DIFF_STATUS_DELETED:
-   fputs(line_prefix, file);
-   show_file_mode_name(file, "delete", p->one);
+   show_file_mode_name(opt, "delete", p->one);
break;
case DIFF_STATUS_ADDED:
-   fputs(line_prefix, file);
-   show_file_mode_name(file, "create", p->two);
+   show_file_mode_name(opt, "create", p->two);
break;
case DIFF_STATUS_COPIED:
-   fputs(line_prefix, file);
-   show_rename_copy(file, "copy", p, line_prefix);
+   show_rename_copy(opt, "copy", p);
break;
case DIFF_STATUS_RENAMED:
-   fputs(line_prefix, file);
-   show_rename_copy(file, "rename", p, line_prefix);
+   show_rename_copy(opt, "rename", p);
break;
default:
if (p->score) {
-   fprintf(file, "%s rewrite ", line_prefix);
-   write_name_quoted(p->two->path, file, ' ');
-   fprintf(file, "(%d%%)\n", similarity_index(p));
+   struct strbuf sb = STRBUF_INIT;
+   strbuf_addstr(, " rewrite ");
+   quote_c_style(p->two->path, , NULL, 0);
+   strbuf_addf(, " (%d%%)\n", similarity_index(p));
+   emit_line(opt, NULL, NULL, sb.buf, sb.len);
}
-   show_mode_change(file, p, !p->score, line_prefix);
+   

[RFC/PATCH 10/17] submodule.c: convert show_submodule_summary to use emit_line_fmt

2016-09-12 Thread Stefan Beller
In a later patch, I want to propose an option to detect
moved lines in a diff, which cannot be done in a one-pass over
the diff. Instead we need to go over the whole diff twice,
because we cannot detect the first line of the two corresponding
lines (+ and -) that got moved.

So to prepare the diff machinery for two pass algorithms
(i.e. buffer it all up and then operate on the result),
move all emissions to places, such that the only emitting
function is emit_line_0.

This prepares the code for submodules to go through the
emit_line_0 function.

Signed-off-by: Stefan Beller 
---
 diff.c  |  5 ++---
 diff.h  |  3 +++
 submodule.c | 42 +-
 submodule.h |  3 +--
 4 files changed, 23 insertions(+), 30 deletions(-)

diff --git a/diff.c b/diff.c
index 3aa99d1..1892c2b 100644
--- a/diff.c
+++ b/diff.c
@@ -493,7 +493,7 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
emit_line_0(o, set, reset, line[0], line+1, len-1);
 }
 
-static void emit_line_fmt(struct diff_options *o,
+void emit_line_fmt(struct diff_options *o,
  const char *set, const char *reset,
  const char *fmt, ...)
 {
@@ -2306,8 +2306,7 @@ static void builtin_diff(const char *name_a,
(!two->mode || S_ISGITLINK(two->mode))) {
const char *del = diff_get_color_opt(o, DIFF_FILE_OLD);
const char *add = diff_get_color_opt(o, DIFF_FILE_NEW);
-   show_submodule_summary(o->file, one->path ? one->path : 
two->path,
-   line_prefix,
+   show_submodule_summary(o, one->path ? one->path : two->path,
one->oid.hash, two->oid.hash,
two->dirty_submodule,
meta, del, add, reset);
diff --git a/diff.h b/diff.h
index 7883729..1699d9c 100644
--- a/diff.h
+++ b/diff.h
@@ -180,6 +180,9 @@ struct diff_options {
int diff_path_counter;
 };
 
+void emit_line_fmt(struct diff_options *o, const char *set, const char *reset,
+  const char *fmt, ...);
+
 enum color_diff {
DIFF_RESET = 0,
DIFF_CONTEXT = 1,
diff --git a/submodule.c b/submodule.c
index 1b5cdfb..c817b46 100644
--- a/submodule.c
+++ b/submodule.c
@@ -304,8 +304,8 @@ static int prepare_submodule_summary(struct rev_info *rev, 
const char *path,
return prepare_revision_walk(rev);
 }
 
-static void print_submodule_summary(struct rev_info *rev, FILE *f,
-   const char *line_prefix,
+static void print_submodule_summary(struct rev_info *rev,
+   struct diff_options *o,
const char *del, const char *add, const char *reset)
 {
static const char format[] = "  %m %s";
@@ -317,24 +317,16 @@ static void print_submodule_summary(struct rev_info *rev, 
FILE *f,
ctx.date_mode = rev->date_mode;
ctx.output_encoding = get_log_output_encoding();
strbuf_setlen(, 0);
-   strbuf_addstr(, line_prefix);
-   if (commit->object.flags & SYMMETRIC_LEFT) {
-   if (del)
-   strbuf_addstr(, del);
-   }
-   else if (add)
-   strbuf_addstr(, add);
format_commit_message(commit, format, , );
-   if (reset)
-   strbuf_addstr(, reset);
-   strbuf_addch(, '\n');
-   fprintf(f, "%s", sb.buf);
+   if (commit->object.flags & SYMMETRIC_LEFT)
+   emit_line_fmt(o, del, reset, "%s\n", sb.buf);
+   else if (add)
+   emit_line_fmt(o, add, reset, "%s\n", sb.buf);
}
strbuf_release();
 }
 
-void show_submodule_summary(FILE *f, const char *path,
-   const char *line_prefix,
+void show_submodule_summary(struct diff_options *o, const char *path,
unsigned char one[20], unsigned char two[20],
unsigned dirty_submodule, const char *meta,
const char *del, const char *add, const char *reset)
@@ -359,30 +351,30 @@ void show_submodule_summary(FILE *f, const char *path,
message = "(revision walker failed)";
 
if (dirty_submodule & DIRTY_SUBMODULE_UNTRACKED)
-   fprintf(f, "%sSubmodule %s contains untracked content\n",
-   line_prefix, path);
+   emit_line_fmt(o, 0, 0,
+ "Submodule %s contains untracked content\n", 
path);
if (dirty_submodule & DIRTY_SUBMODULE_MODIFIED)
-   fprintf(f, "%sSubmodule %s contains modified content\n",
-   line_prefix, path);
+   emit_line_fmt(o, 0, 0,
+ "Submodule %s contains modified content\n", path);
 
if (!hashcmp(one, two)) {

[RFC/PATCH 12/17] diff.c: convert show_stats to use emit_line_*

2016-09-12 Thread Stefan Beller
In a later patch, I want to propose an option to detect
moved lines in a diff, which cannot be done in a one-pass over
the diff. Instead we need to go over the whole diff twice,
because we cannot detect the first line of the two corresponding
lines (+ and -) that got moved.

So to prepare the diff machinery for two pass algorithms
(i.e. buffer it all up and then operate on the result),
move all emissions to places, such that the only emitting
function is emit_line_0.

We call print_stat_summary from builtin/apply, so we still
need the version with a file pointer, so introduce
print_stat_summary_0 that uses emit_line_* machinery and
keep print_stat_summary with the same arguments around.

Signed-off-by: Stefan Beller 
---
 diff.c | 87 +-
 diff.h |  4 +--
 2 files changed, 51 insertions(+), 40 deletions(-)

diff --git a/diff.c b/diff.c
index ff1e829..00a5a4e 100644
--- a/diff.c
+++ b/diff.c
@@ -1465,20 +1465,19 @@ static int scale_linear(int it, int width, int 
max_change)
return 1 + (it * (width - 1) / max_change);
 }
 
-static void show_name(FILE *file,
+static void show_name(struct strbuf *out,
  const char *prefix, const char *name, int len)
 {
-   fprintf(file, " %s%-*s |", prefix, len, name);
+   strbuf_addf(out, " %s%-*s |", prefix, len, name);
 }
 
-static void show_graph(FILE *file, char ch, int cnt, const char *set, const 
char *reset)
+static void show_graph(struct strbuf *out, char ch, int cnt, const char *set, 
const char *reset)
 {
if (cnt <= 0)
return;
-   fprintf(file, "%s", set);
-   while (cnt--)
-   putc(ch, file);
-   fprintf(file, "%s", reset);
+   strbuf_addstr(out, set);
+   strbuf_addchars(out, ch, cnt);
+   strbuf_addstr(out, reset);
 }
 
 static void fill_print_name(struct diffstat_file *file)
@@ -1502,14 +1501,16 @@ static void fill_print_name(struct diffstat_file *file)
file->print_name = pname;
 }
 
-int print_stat_summary(FILE *fp, int files, int insertions, int deletions)
+void print_stat_summary_0(struct diff_options *options, int files,
+ int insertions, int deletions)
 {
struct strbuf sb = STRBUF_INIT;
-   int ret;
 
if (!files) {
assert(insertions == 0 && deletions == 0);
-   return fprintf(fp, "%s\n", " 0 files changed");
+   strbuf_addstr(, " 0 files changed");
+   emit_line_0(options, 0, 0, 0, sb.buf, sb.len);
+   return;
}
 
strbuf_addf(,
@@ -1536,9 +1537,17 @@ int print_stat_summary(FILE *fp, int files, int 
insertions, int deletions)
deletions);
}
strbuf_addch(, '\n');
-   ret = fputs(sb.buf, fp);
+   emit_line_0(options, 0, 0, 0, sb.buf, sb.len);
strbuf_release();
-   return ret;
+}
+
+void print_stat_summary(FILE *fp, int files,
+   int insertions, int deletions)
+{
+   struct diff_options o;
+   memset(, 0, sizeof(o));
+   o.file = fp;
+   print_stat_summary_0(, files, insertions, deletions);
 }
 
 static void show_stats(struct diffstat_t *data, struct diff_options *options)
@@ -1548,13 +1557,12 @@ static void show_stats(struct diffstat_t *data, struct 
diff_options *options)
int total_files = data->nr, count;
int width, name_width, graph_width, number_width = 0, bin_width = 0;
const char *reset, *add_c, *del_c;
-   const char *line_prefix = "";
int extra_shown = 0;
+   struct strbuf out = STRBUF_INIT;
 
if (data->nr == 0)
return;
 
-   line_prefix = diff_line_prefix(options);
count = options->stat_count ? options->stat_count : data->nr;
 
reset = diff_get_color_opt(options, DIFF_RESET);
@@ -1708,26 +1716,29 @@ static void show_stats(struct diffstat_t *data, struct 
diff_options *options)
}
 
if (file->is_binary) {
-   fprintf(options->file, "%s", line_prefix);
-   show_name(options->file, prefix, name, len);
-   fprintf(options->file, " %*s", number_width, "Bin");
+   show_name(, prefix, name, len);
+   strbuf_addf(, " %*s", number_width, "Bin");
if (!added && !deleted) {
-   putc('\n', options->file);
+   strbuf_addch(, '\n');
+   emit_line(options, 0, 0, out.buf, out.len);
+   strbuf_reset();
continue;
}
-   fprintf(options->file, " %s%"PRIuMAX"%s",
+   strbuf_addf(, " %s%"PRIuMAX"%s",
del_c, deleted, reset);
-   fprintf(options->file, " -> ");
-  

[RFC/PATCH 17/17] diff.c: color moved lines differently

2016-09-12 Thread Stefan Beller
When there is a lot of code moved around such as in 11979b9 (2005-11-18,
"http.c: reorder to avoid compilation failure.") for example, the review
process is quite hard, as it is not mentally challenging. It is rather a
tedious process, that gets boring quickly. However you still need to read
through all of the code to make sure the moved lines are there as supposed.

While it is trivial to color up a patch like the following

$ git show -p
commit 95b1fc60907fb9224bd785111ccd16e7e0aec4d1
Author: Stefan Beller 
Date:   Mon Sep 12 20:02:46 2016 -0700

move secure_foo from test.c to file2.c

diff --git a/file2.c b/file2.c
index 9163a0f..8e66dc0 100644
--- a/file2.c
+++ b/file2.c
@@ -3,13 +3,6 @@ void *xmemdupz(const void *data, size_t len)
return memcpy(xmallocz(len), data, len);
 }

-int secure_foo(struct user *u)
-{
-   if (!u->is_allowed_foo)
-   return;
-   foo(u);
-}
-
 char *xstrndup(const char *str, size_t len)
 {
char *p = memchr(str, '\0', len);
diff --git a/test.c b/test.c
index a95e6fe..81eb0eb 100644
--- a/test.c
+++ b/test.c
@@ -18,6 +18,13 @@ ssize_t pread_in_full(int fd, void *buf, size_t count, 
off_t offset)
return total;
 }

+int secure_foo(struct user *u)
+{
+   if (!u->is_allowed_foo)
+   return;
+   foo(u);
+}
+
 int xdup(int fd)
 {
int ret = dup(fd);

as in this patch all lines that add or remove lines
should be colored in the new color that indicates moved
lines.

However the intention of this patch is to aid reviewers
to spotting permutations in the moved code. So consider the
following malicious move:

diff --git a/file2.c b/file2.c
index 9163a0f..8e66dc0 100644
--- a/file2.c
+++ b/file2.c
@@ -3,13 +3,6 @@ void *xmemdupz(const void *data, size_t len)
return memcpy(xmallocz(len), data, len);
 }

-int secure_foo(struct user *u)
-{
-   if (!u->is_allowed_foo)
-   return;
-   foo(u);
-}
-
 char *xstrndup(const char *str, size_t len)
 {
char *p = memchr(str, '\0', len);
diff --git a/test.c b/test.c
index a95e6fe..a679c40 100644
--- a/test.c
+++ b/test.c
@@ -18,6 +18,13 @@ ssize_t pread_in_full(int fd, void *buf, size_t count, 
off_t offset)
return total;
 }

+int secure_foo(struct user *u)
+{
+   foo(u);
+   if (!u->is_allowed_foo)
+   return;
+}
+
 int xdup(int fd)
 {
int ret = dup(fd);

If the moved code is larger, it is easier to hide some permutation in the
code, which is why we would not want to color all lines as "moved" in this
case. So additionally to coloring lines differently that are added and
removed in the same diff, we need to tweak the algorithm a bit more.

As the reviewers attention should be brought to the places, where the
difference is introduced to the moved code, we will not color a moved
line if the previous line is a different moved line, i.e. in the code
above the line `foo()` is still colored as a regular added line, because
the line before the call to `foo` changed from `return` to `{`. The line
with the condition is also not colored as moved but as a new addition.

Signed-off-by: Stefan Beller 
---
 Documentation/config.txt   |  12 +-
 Documentation/diff-options.txt |   7 +
 contrib/completion/git-completion.bash |   2 +
 diff.c | 171 
 diff.h |  10 +-
 t/t4015-diff-whitespace.sh | 229 +
 6 files changed, 402 insertions(+), 29 deletions(-)

diff --git a/Documentation/config.txt b/Documentation/config.txt
index 0bcb679..e0c7b91 100644
--- a/Documentation/config.txt
+++ b/Documentation/config.txt
@@ -974,14 +974,22 @@ This does not affect linkgit:git-format-patch[1] or the
 'git-diff-{asterisk}' plumbing commands.  Can be overridden on the
 command line with the `--color[=]` option.
 
+color.moved::
+   A boolean value, whether a diff should color moved lines
+   differently. The moved lines are searched for in the diff only.
+   Duplicated lines from somewhere in the project that are not
+   part of the diff are not colored as moved.
+   Defaults to true.
+
 color.diff.::
Use customized color for diff colorization.  `` specifies
which part of the patch to use the specified color, and is one
of `context` (context text - `plain` is a historical synonym),
`meta` (metainformation), `frag`
(hunk header), 'func' (function in hunk header), `old` (removed lines),
-   `new` (added lines), `commit` (commit headers), or `whitespace`
-   (highlighting whitespace errors).
+   `new` (added 

[RFC/PATCH 13/17] diff.c: convert word diffing to use emit_line_*

2016-09-12 Thread Stefan Beller
In a later patch, I want to propose an option to detect
moved lines in a diff, which cannot be done in a one-pass over
the diff. Instead we need to go over the whole diff twice,
because we cannot detect the first line of the two corresponding
lines (+ and -) that got moved.

So to prepare the diff machinery for two pass algorithms
(i.e. buffer it all up and then operate on the result),
move all emissions to places, such that the only emitting
function is emit_line_0.

This covers all code related to diffing words.

Signed-off-by: Stefan Beller 
---
 diff.c | 56 
 1 file changed, 28 insertions(+), 28 deletions(-)

diff --git a/diff.c b/diff.c
index 00a5a4e..3940f79 100644
--- a/diff.c
+++ b/diff.c
@@ -828,37 +828,38 @@ struct diff_words_data {
struct diff_words_style *style;
 };
 
-static int fn_out_diff_words_write_helper(FILE *fp,
+static int fn_out_diff_words_write_helper(struct diff_options *o,
  struct diff_words_style_elem *st_el,
  const char *newline,
  size_t count, const char *buf,
  const char *line_prefix)
 {
-   int print = 0;
+   struct strbuf sb = STRBUF_INIT;
 
while (count) {
char *p = memchr(buf, '\n', count);
-   if (print)
-   fputs(line_prefix, fp);
+
if (p != buf) {
-   if (st_el->color && fputs(st_el->color, fp) < 0)
-   return -1;
-   if (fputs(st_el->prefix, fp) < 0 ||
-   fwrite(buf, p ? p - buf : count, 1, fp) != 1 ||
-   fputs(st_el->suffix, fp) < 0)
-   return -1;
-   if (st_el->color && *st_el->color
-   && fputs(GIT_COLOR_RESET, fp) < 0)
-   return -1;
+   if (st_el->color)
+   strbuf_addstr(, st_el->color);
+   strbuf_addstr(, st_el->prefix);
+   strbuf_add(, buf, p ? p - buf : count);
+   strbuf_addstr(, st_el->suffix);
+   if (st_el->color && *st_el->color)
+   strbuf_addstr(, GIT_COLOR_RESET);
}
if (!p)
-   return 0;
-   if (fputs(newline, fp) < 0)
-   return -1;
+   goto out;
+   strbuf_addstr(, newline);
+   emit_line_0(o, 0, 0, 0, sb.buf, sb.len);
+   strbuf_reset();
count -= p + 1 - buf;
buf = p + 1;
-   print = 1;
}
+out:
+   if (sb.len)
+   emit_line_0(o, 0, 0, 0, sb.buf, sb.len);
+   strbuf_release();
return 0;
 }
 
@@ -936,25 +937,25 @@ static void fn_out_diff_words_aux(void *priv, char *line, 
unsigned long len)
} else
plus_begin = plus_end = diff_words->plus.orig[plus_first].end;
 
-   if (color_words_output_graph_prefix(diff_words)) {
-   fputs(line_prefix, diff_words->opt->file);
-   }
+   if (color_words_output_graph_prefix(diff_words))
+   emit_line_0(diff_words->opt, 0, 0, 0, "", 0);
+
if (diff_words->current_plus != plus_begin) {
-   fn_out_diff_words_write_helper(diff_words->opt->file,
+   fn_out_diff_words_write_helper(diff_words->opt,
>ctx, style->newline,
plus_begin - diff_words->current_plus,
diff_words->current_plus, line_prefix);
if (*(plus_begin - 1) == '\n')
-   fputs(line_prefix, diff_words->opt->file);
+   emit_line_0(diff_words->opt, 0, 0, 0, "", 0);
}
if (minus_begin != minus_end) {
-   fn_out_diff_words_write_helper(diff_words->opt->file,
+   fn_out_diff_words_write_helper(diff_words->opt,
>old, style->newline,
minus_end - minus_begin, minus_begin,
line_prefix);
}
if (plus_begin != plus_end) {
-   fn_out_diff_words_write_helper(diff_words->opt->file,
+   fn_out_diff_words_write_helper(diff_words->opt,
>new, style->newline,
plus_end - plus_begin, plus_begin,
line_prefix);
@@ -1050,8 +1051,7 @@ static void diff_words_show(struct diff_words_data 
*diff_words)
 
/* special case: only removal */
if (!diff_words->plus.text.size) {
-   fputs(line_prefix, diff_words->opt->file);
-   

[RFC/PATCH 09/17] diff.c: convert emit_rewrite_lines to use emit_line_*

2016-09-12 Thread Stefan Beller
In a later patch, I want to propose an option to detect
moved lines in a diff, which cannot be done in a one-pass over
the diff. Instead we need to go over the whole diff twice,
because we cannot detect the first line of the two corresponding
lines (+ and -) that got moved.

So to prepare the diff machinery for two pass algorithms
(i.e. buffer it all up and then operate on the result),
move all emissions to places, such that the only emitting
function is emit_line_0.

This covers emit_rewrite_lines.

Signed-off-by: Stefan Beller 
---
 diff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/diff.c b/diff.c
index ee4c9d4..3aa99d1 100644
--- a/diff.c
+++ b/diff.c
@@ -690,7 +690,7 @@ static void emit_rewrite_lines(struct emit_callback *ecb,
if (!endp) {
const char *context = diff_get_color(ecb->color_diff,
 DIFF_CONTEXT);
-   putc('\n', ecb->opt->file);
+   emit_line_0(ecb->opt, 0, 0, 0, "\n", 1);
emit_line_0(ecb->opt, context, reset, '\\',
nneof, strlen(nneof));
}
-- 
2.10.0.21.g1da280f.dirty



[RFC/PATCH 05/17] diff.c: emit_line_0 can handle no color setting

2016-09-12 Thread Stefan Beller
In a later patch, I want to propose an option to detect
moved lines in a diff, which cannot be done in a one-pass over
the diff. Instead we need to go over the whole diff twice,
because we cannot detect the first line of the two corresponding
lines (+ and -) that got moved.

So to prepare the diff machinery for two pass algorithms
(i.e. buffer it all up and then operate on the result),
move all emissions to places, such that the only emitting
function is emit_line_0.

In later patches we may pass lines that are not colored to
the central function emit_line_0, so we
need to emit the color only when it is non-NULL.

Signed-off-by: Stefan Beller 
---
 diff.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index b6a40ae..5d57130 100644
--- a/diff.c
+++ b/diff.c
@@ -473,11 +473,13 @@ static void emit_line_0(struct diff_options *o, const 
char *set, const char *res
}
 
if (len || !nofirst) {
-   fputs(set, file);
+   if (set)
+   fputs(set, file);
if (!nofirst)
fputc(first, file);
fwrite(line, len, 1, file);
-   fputs(reset, file);
+   if (reset)
+   fputs(reset, file);
}
if (has_trailing_carriage_return)
fputc('\r', file);
-- 
2.10.0.21.g1da280f.dirty



[RFC/PATCH 07/17] diff.c: convert builtin_diff to use emit_line_*

2016-09-12 Thread Stefan Beller
In a later patch, I want to propose an option to detect
moved lines in a diff, which cannot be done in a one-pass over
the diff. Instead we need to go over the whole diff twice,
because we cannot detect the first line of the two corresponding
lines (+ and -) that got moved.

So to prepare the diff machinery for two pass algorithms
(i.e. buffer it all up and then operate on the result),
move all emissions to places, such that the only emitting
function is emit_line_0.

This covers builtin_diff.

Signed-off-by: Stefan Beller 
---
 diff.c | 37 -
 1 file changed, 20 insertions(+), 17 deletions(-)

diff --git a/diff.c b/diff.c
index b4f4b75..cbd4ba7 100644
--- a/diff.c
+++ b/diff.c
@@ -1234,8 +1234,8 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
o->found_changes = 1;
 
if (ecbdata->header) {
-   fprintf(o->file, "%s", ecbdata->header->buf);
-   strbuf_reset(ecbdata->header);
+   emit_line(o, 0, 0, ecbdata->header->buf, ecbdata->header->len);
+   strbuf_release(ecbdata->header);
ecbdata->header = NULL;
}
 
@@ -2333,7 +2333,7 @@ static void builtin_diff(const char *name_a,
b_two = quote_two(b_prefix, name_b + (*name_b == '/'));
lbl[0] = DIFF_FILE_VALID(one) ? a_one : "/dev/null";
lbl[1] = DIFF_FILE_VALID(two) ? b_two : "/dev/null";
-   strbuf_addf(, "%s%sdiff --git %s %s%s\n", line_prefix, meta, 
a_one, b_two, reset);
+   strbuf_addf(, "%sdiff --git %s %s%s\n", meta, a_one, b_two, 
reset);
if (lbl[0][0] == '/') {
/* /dev/null */
strbuf_addf(, "%s%snew file mode %06o%s\n", line_prefix, 
meta, two->mode, reset);
@@ -2365,7 +2365,7 @@ static void builtin_diff(const char *name_a,
if (complete_rewrite &&
(textconv_one || !diff_filespec_is_binary(one)) &&
(textconv_two || !diff_filespec_is_binary(two))) {
-   fprintf(o->file, "%s", header.buf);
+   emit_line(o, 0, 0, header.buf, header.len);
strbuf_reset();
emit_rewrite_diff(name_a, name_b, one, two,
textconv_one, textconv_two, o);
@@ -2375,7 +2375,8 @@ static void builtin_diff(const char *name_a,
}
 
if (o->irreversible_delete && lbl[1][0] == '/') {
-   fprintf(o->file, "%s", header.buf);
+   if (header.len)
+   emit_line(o, 0, 0, header.buf, header.len);
strbuf_reset();
goto free_ab_and_return;
} else if (!DIFF_OPT_TST(o, TEXT) &&
@@ -2385,13 +2386,14 @@ static void builtin_diff(const char *name_a,
S_ISREG(one->mode) && S_ISREG(two->mode) &&
!DIFF_OPT_TST(o, BINARY)) {
if (!oidcmp(>oid, >oid)) {
-   if (must_show_header)
-   fprintf(o->file, "%s", header.buf);
+   if (must_show_header && header.len)
+   emit_line(o, 0, 0, header.buf, 
header.len);
goto free_ab_and_return;
}
-   fprintf(o->file, "%s", header.buf);
-   fprintf(o->file, "%sBinary files %s and %s differ\n",
-   line_prefix, lbl[0], lbl[1]);
+   if (header.len)
+   emit_line(o, 0, 0, header.buf, header.len);
+   emit_line_fmt(o, 0, 0, "Binary files %s and %s 
differ\n",
+ lbl[0], lbl[1]);
goto free_ab_and_return;
}
if (fill_mmfile(, one) < 0 || fill_mmfile(, two) < 0)
@@ -2399,17 +2401,18 @@ static void builtin_diff(const char *name_a,
/* Quite common confusing case */
if (mf1.size == mf2.size &&
!memcmp(mf1.ptr, mf2.ptr, mf1.size)) {
-   if (must_show_header)
-   fprintf(o->file, "%s", header.buf);
+   if (must_show_header && header.len)
+   emit_line(o, 0, 0, header.buf, header.len);
goto free_ab_and_return;
}
-   fprintf(o->file, "%s", header.buf);
+   if (header.len)
+   emit_line(o, 0, 0, header.buf, header.len);
strbuf_reset();
if (DIFF_OPT_TST(o, BINARY))
emit_binary_diff(o->file, , , line_prefix);
else
-   fprintf(o->file, "%sBinary files %s and %s differ\n",
-   line_prefix, lbl[0], lbl[1]);
+   emit_line_fmt(o, 0, 0, "Binary files %s and 

[RFC/PATCH 04/17] diff.c: factor out diff_flush_patch_all_file_pairs

2016-09-12 Thread Stefan Beller
In a later patch we want to do more things before and after all filepairs
are flushed. So factor flushing out all file pairs into its own function
that the new code can be plugged in easily.

Signed-off-by: Stefan Beller 
---
 diff.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/diff.c b/diff.c
index 9d2e704..b6a40ae 100644
--- a/diff.c
+++ b/diff.c
@@ -4608,6 +4608,17 @@ void diff_warn_rename_limit(const char *varname, int 
needed, int degraded_cc)
warning(rename_limit_advice, varname, needed);
 }
 
+static void diff_flush_patch_all_file_pairs(struct diff_options *o)
+{
+   int i;
+   struct diff_queue_struct *q = _queued_diff;
+   for (i = 0; i < q->nr; i++) {
+   struct diff_filepair *p = q->queue[i];
+   if (check_pair_status(p))
+   diff_flush_patch(p, o);
+   }
+}
+
 void diff_flush(struct diff_options *options)
 {
struct diff_queue_struct *q = _queued_diff;
@@ -4702,11 +4713,7 @@ void diff_flush(struct diff_options *options)
}
}
 
-   for (i = 0; i < q->nr; i++) {
-   struct diff_filepair *p = q->queue[i];
-   if (check_pair_status(p))
-   diff_flush_patch(p, options);
-   }
+   diff_flush_patch_all_file_pairs(options);
}
 
if (output_format & DIFF_FORMAT_CALLBACK)
-- 
2.10.0.21.g1da280f.dirty



[RFC/PATCH 06/17] diff.c: convert fn_out_consume to use emit_line_*

2016-09-12 Thread Stefan Beller
In a later patch, I want to propose an option to detect
moved lines in a diff, which cannot be done in a one-pass over
the diff. Instead we need to go over the whole diff twice,
because we cannot detect the first line of the two corresponding
lines (+ and -) that got moved.

So to prepare the diff machinery for two pass algorithms
(i.e. buffer it all up and then operate on the result),
move all emissions to places, such that the only emitting
function is emit_line_0.

This covers the remaining parts of fn_out_consume.

Signed-off-by: Stefan Beller 
---
 diff.c | 26 ++
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/diff.c b/diff.c
index 5d57130..b4f4b75 100644
--- a/diff.c
+++ b/diff.c
@@ -493,6 +493,19 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
emit_line_0(o, set, reset, line[0], line+1, len-1);
 }
 
+static void emit_line_fmt(struct diff_options *o,
+ const char *set, const char *reset,
+ const char *fmt, ...)
+{
+   struct strbuf sb = STRBUF_INIT;
+   va_list ap;
+   va_start(ap, fmt);
+   strbuf_vaddf(, fmt, ap);
+   va_end(ap);
+
+   emit_line(o, set, reset, sb.buf, sb.len);
+}
+
 static int new_blank_line_at_eof(struct emit_callback *ecbdata, const char 
*line, int len)
 {
if (!((ecbdata->ws_rule & WS_BLANK_AT_EOF) &&
@@ -1217,7 +1230,6 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
const char *context = diff_get_color(ecbdata->color_diff, DIFF_CONTEXT);
const char *reset = diff_get_color(ecbdata->color_diff, DIFF_RESET);
struct diff_options *o = ecbdata->opt;
-   const char *line_prefix = diff_line_prefix(o);
 
o->found_changes = 1;
 
@@ -1229,14 +1241,12 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
 
if (ecbdata->label_path[0]) {
const char *name_a_tab, *name_b_tab;
-
name_a_tab = strchr(ecbdata->label_path[0], ' ') ? "\t" : "";
name_b_tab = strchr(ecbdata->label_path[1], ' ') ? "\t" : "";
-
-   fprintf(o->file, "%s%s--- %s%s%s\n",
-   line_prefix, meta, ecbdata->label_path[0], reset, 
name_a_tab);
-   fprintf(o->file, "%s%s+++ %s%s%s\n",
-   line_prefix, meta, ecbdata->label_path[1], reset, 
name_b_tab);
+   emit_line_fmt(o, meta, reset, "--- %s%s\n",
+ ecbdata->label_path[0], name_a_tab);
+   emit_line_fmt(o, meta, reset, "+++ %s%s\n",
+ ecbdata->label_path[1], name_b_tab);
ecbdata->label_path[0] = ecbdata->label_path[1] = NULL;
}
 
@@ -1277,7 +1287,7 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
diff_words_flush(ecbdata);
if (ecbdata->diff_words->type == DIFF_WORDS_PORCELAIN) {
emit_line(o, context, reset, line, len);
-   fputs("~\n", o->file);
+   emit_line(o, 0, 0, "~\n", 2);
} else {
/*
 * Skip the prefix character, if any.  With
-- 
2.10.0.21.g1da280f.dirty



[RFC/PATCH 08/17] diff.c: convert emit_rewrite_diff to use emit_line_*

2016-09-12 Thread Stefan Beller
In a later patch, I want to propose an option to detect
moved lines in a diff, which cannot be done in a one-pass over
the diff. Instead we need to go over the whole diff twice,
because we cannot detect the first line of the two corresponding
lines (+ and -) that got moved.

So to prepare the diff machinery for two pass algorithms
(i.e. buffer it all up and then operate on the result),
move all emissions to places, such that the only emitting
function is emit_line_0.

This covers emit_rewrite_diff.

Signed-off-by: Stefan Beller 
---
 diff.c | 33 ++---
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/diff.c b/diff.c
index cbd4ba7..ee4c9d4 100644
--- a/diff.c
+++ b/diff.c
@@ -653,17 +653,17 @@ static void remove_tempfile(void)
}
 }
 
-static void print_line_count(FILE *file, int count)
+static void add_line_count(struct strbuf *out, int count)
 {
switch (count) {
case 0:
-   fprintf(file, "0,0");
+   strbuf_addstr(out, "0,0");
break;
case 1:
-   fprintf(file, "1");
+   strbuf_addstr(out, "1");
break;
default:
-   fprintf(file, "1,%d", count);
+   strbuf_addf(out, "1,%d", count);
break;
}
 }
@@ -714,7 +714,7 @@ static void emit_rewrite_diff(const char *name_a,
char *data_one, *data_two;
size_t size_one, size_two;
struct emit_callback ecbdata;
-   const char *line_prefix = diff_line_prefix(o);
+   struct strbuf out = STRBUF_INIT;
 
if (diff_mnemonic_prefix && DIFF_OPT_TST(o, REVERSE_DIFF)) {
a_prefix = o->b_prefix;
@@ -752,20 +752,23 @@ static void emit_rewrite_diff(const char *name_a,
ecbdata.lno_in_preimage = 1;
ecbdata.lno_in_postimage = 1;
 
+   emit_line_fmt(o, metainfo, reset, "--- %s%s\n", a_name.buf, name_a_tab);
+   emit_line_fmt(o, metainfo, reset, "+++ %s%s\n", b_name.buf, name_b_tab);
+
lc_a = count_lines(data_one, size_one);
lc_b = count_lines(data_two, size_two);
-   fprintf(o->file,
-   "%s%s--- %s%s%s\n%s%s+++ %s%s%s\n%s%s@@ -",
-   line_prefix, metainfo, a_name.buf, name_a_tab, reset,
-   line_prefix, metainfo, b_name.buf, name_b_tab, reset,
-   line_prefix, fraginfo);
+
+   strbuf_addstr(, "@@ -");
if (!o->irreversible_delete)
-   print_line_count(o->file, lc_a);
+   add_line_count(, lc_a);
else
-   fprintf(o->file, "?,?");
-   fprintf(o->file, " +");
-   print_line_count(o->file, lc_b);
-   fprintf(o->file, " @@%s\n", reset);
+   strbuf_addstr(, "?,?");
+   strbuf_addstr(, " +");
+   add_line_count(, lc_b);
+   strbuf_addstr(, " @@\n");
+   emit_line(o, fraginfo, reset, out.buf, out.len);
+   strbuf_release();
+
if (lc_a && !o->irreversible_delete)
emit_rewrite_lines(, '-', data_one, size_one);
if (lc_b)
-- 
2.10.0.21.g1da280f.dirty



[RFC/PATCH 01/17] diff: move line ending check into emit_hunk_header

2016-09-12 Thread Stefan Beller
In a later patch, I want to propose an option to detect
moved lines in a diff, which cannot be done in a one-pass over
the diff. Instead we need to go over the whole diff twice,
because we cannot detect the first line of the two corresponding
lines (+ and -) that got moved.

So to prepare the diff machinery for two pass algorithms
(i.e. buffer it all up and then operate on the result),
move all emissions to places, such that the only emitting
function is emit_line_0.

This patch moves code that is conceptually part of
emit_hunk_header, but was using output in fn_out_consume,
back to emit_hunk_header.

Signed-off-by: Stefan Beller 
---
 diff.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index cc8e812..aa50b2d 100644
--- a/diff.c
+++ b/diff.c
@@ -610,6 +610,9 @@ static void emit_hunk_header(struct emit_callback *ecbdata,
}
 
strbuf_add(, line + len, org_len - len);
+   if (line[org_len - 1] != '\n')
+   strbuf_addch(, '\n');
+
emit_line(ecbdata->opt, "", "", msgbuf.buf, msgbuf.len);
strbuf_release();
 }
@@ -1247,8 +1250,6 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
len = sane_truncate_line(ecbdata, line, len);
find_lno(line, ecbdata);
emit_hunk_header(ecbdata, line, len);
-   if (line[len-1] != '\n')
-   putc('\n', o->file);
return;
}
 
-- 
2.10.0.21.g1da280f.dirty



[RFC/PATCH 03/17] diff.c: drop tautologous condition in emit_line_0

2016-09-12 Thread Stefan Beller
When `first` equals '\r', then it cannot equal '\n', which makes
`!has_trailing_newline` always true if `first` is '\r'.
Drop that check.

Signed-off-by: Stefan Beller 
---
 diff.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index 156c2aa..9d2e704 100644
--- a/diff.c
+++ b/diff.c
@@ -460,8 +460,7 @@ static void emit_line_0(struct diff_options *o, const char 
*set, const char *res
 
if (len == 0) {
has_trailing_newline = (first == '\n');
-   has_trailing_carriage_return = (!has_trailing_newline &&
-   (first == '\r'));
+   has_trailing_carriage_return = (first == '\r');
nofirst = has_trailing_newline || has_trailing_carriage_return;
} else {
has_trailing_newline = (len > 0 && line[len-1] == '\n');
-- 
2.10.0.21.g1da280f.dirty



[RFC/PATCH 02/17] diff: emit_{add, del, context}_line to increase {pre,post}image line count

2016-09-12 Thread Stefan Beller
At all call sites of emit_{add, del, context}_line we increment the line
count, so move it into the respective functions to make the code at the
calling site a bit clearer.

Signed-off-by: Stefan Beller 
---
 diff.c | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/diff.c b/diff.c
index aa50b2d..156c2aa 100644
--- a/diff.c
+++ b/diff.c
@@ -536,6 +536,7 @@ static void emit_add_line(const char *reset,
  struct emit_callback *ecbdata,
  const char *line, int len)
 {
+   ecbdata->lno_in_postimage++;
emit_line_checked(reset, ecbdata, line, len,
  DIFF_FILE_NEW, WSEH_NEW, '+');
 }
@@ -544,6 +545,7 @@ static void emit_del_line(const char *reset,
  struct emit_callback *ecbdata,
  const char *line, int len)
 {
+   ecbdata->lno_in_preimage++;
emit_line_checked(reset, ecbdata, line, len,
  DIFF_FILE_OLD, WSEH_OLD, '-');
 }
@@ -552,6 +554,8 @@ static void emit_context_line(const char *reset,
  struct emit_callback *ecbdata,
  const char *line, int len)
 {
+   ecbdata->lno_in_postimage++;
+   ecbdata->lno_in_preimage++;
emit_line_checked(reset, ecbdata, line, len,
  DIFF_CONTEXT, WSEH_CONTEXT, ' ');
 }
@@ -662,13 +666,10 @@ static void emit_rewrite_lines(struct emit_callback *ecb,
 
endp = memchr(data, '\n', size);
len = endp ? (endp - data + 1) : size;
-   if (prefix != '+') {
-   ecb->lno_in_preimage++;
+   if (prefix != '+')
emit_del_line(reset, ecb, data, len);
-   } else {
-   ecb->lno_in_postimage++;
+   else
emit_add_line(reset, ecb, data, len);
-   }
size -= len;
data += len;
}
@@ -1293,16 +1294,12 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
 
switch (line[0]) {
case '+':
-   ecbdata->lno_in_postimage++;
emit_add_line(reset, ecbdata, line + 1, len - 1);
break;
case '-':
-   ecbdata->lno_in_preimage++;
emit_del_line(reset, ecbdata, line + 1, len - 1);
break;
case ' ':
-   ecbdata->lno_in_postimage++;
-   ecbdata->lno_in_preimage++;
emit_context_line(reset, ecbdata, line + 1, len - 1);
break;
default:
-- 
2.10.0.21.g1da280f.dirty



[RFC/PATCH 00/17]

2016-09-12 Thread Stefan Beller
About a week ago, I started experimenting to add a new functionality to the 
diff machinery: We want to color moved lines differently! This will aid 
developers
to review patches such as the patch "Move libified code from builtin/apply.c to 
apply.{c,h}"
Or a smaller example:

git show 11979b9


In the original patch series I just used the xdl diff machinery multiple times
to obtain the output for a multi pass algorithm I need for this coloring.
This was one of Junios main concerns (We have to rely on the xdl stuff to 
yield the same output in the two passes).

This brings in another approach:
 * run the xdl stuff only once
 * In case we do not ask for coloring moves, put it out immediately
   (small detour: we do not use fprintf/fputs directly but we channel
   all output through the emit_line_* interface)
 * When asking for coloring moves: buffer all its output for now.
   Feed the line by line into the move detection algorithm.
 * then output it all at once using the information from the move detection
   to recolor moves while outputting.
   
While viewing the output of 11979b9 again, I notice a small bug in the proposal,
but I'd rather be asking for review of the whole design approach.
   
Thanks,
Stefan

Stefan Beller (17):
  diff: move line ending check into emit_hunk_header
  diff: emit_{add, del, context}_line to increase {pre,post}image line
count
  diff.c: drop tautologous condition in emit_line_0
  diff.c: factor out diff_flush_patch_all_file_pairs
  diff.c: emit_line_0 can handle no color setting
  diff.c: convert fn_out_consume to use emit_line_*
  diff.c: convert builtin_diff to use emit_line_*
  diff.c: convert emit_rewrite_diff to use emit_line_*
  diff.c: convert emit_rewrite_lines to use emit_line_*
  submodule.c: convert show_submodule_summary to use emit_line_fmt
  diff.c: convert emit_binary_diff_body to use emit_line_*
  diff.c: convert show_stats to use emit_line_*
  diff.c: convert word diffing to use emit_line_*
  diff.c: convert diff_flush to use emit_line_*
  diff.c: convert diff_summary to use emit_line_*
  diff: buffer output in emit_line_0
  diff.c: color moved lines differently

 Documentation/config.txt   |  12 +-
 Documentation/diff-options.txt |   7 +
 contrib/completion/git-completion.bash |   2 +
 diff.c | 666 +++--
 diff.h |  35 +-
 submodule.c|  42 +--
 submodule.h|   3 +-
 t/t4015-diff-whitespace.sh | 229 
 8 files changed, 760 insertions(+), 236 deletions(-)

-- 
2.10.0.21.g1da280f.dirty



Re: [PATCH v2] ls-files: adding support for submodules

2016-09-12 Thread Brandon Williams
>  static void write_name(const char *name)
>  {
> /*
> +* NEEDSWORK: To make this thread-safe, full_name would have to be 
> owned
> +* by the caller.
> +*
> +* full_name get reused across output lines to minimize the allocation
> +* churn.
> +*/
> +   static struct strbuf full_name = STRBUF_INIT;
> +   if (output_path_prefix != '\0') {

It was pointed out to me that this should be:
if (*output_path_prefix != '\0') {


> +   strbuf_reset(_name);
> +   strbuf_addstr(_name, output_path_prefix);
> +   strbuf_addstr(_name, name);
> +   name = full_name.buf;
> +   }


[PATCH 11/16] pager: handle early config

2016-09-12 Thread Jeff King
The pager code is often run early in the git.c startup,
before we have actually found the repository. When we ask
git_config() to look for values like core.pager, it doesn't
know where to find the repo-level config, and will blindly
examine ".git/config" if it exists. That's why t7006 shows
that many pager-related features happen to work from the
top-level of a repository, but not from a subdirectory.

This patch pulls that ".git/config" hack explicitly into the
pager code. There are two reasons for this:

  1. We'd like to clean up the git_config() behavior, as
 looking at ".git/config" when we do not have a
 configured repository is often the wrong thing to do.
 But we'd prefer not to break the pager config any worse
 than it already is.

  2. It's one very tiny step on the road to ultimately
 making the pager config work consistently. If we
 eventually get an equivalent of setup_git_directory()
 that _just_ finds the directory and doesn't chdir() or
 set up any global state, we could plug it in here
 (instead of blindly looking at ".git/config").

Signed-off-by: Jeff King 
---
 pager.c | 35 +--
 1 file changed, 33 insertions(+), 2 deletions(-)

diff --git a/pager.c b/pager.c
index 46cc411..ae79643 100644
--- a/pager.c
+++ b/pager.c
@@ -43,6 +43,37 @@ static int core_pager_config(const char *var, const char 
*value, void *data)
return 0;
 }
 
+static void read_early_config(config_fn_t cb, void *data)
+{
+   git_config_with_options(cb, data, NULL, 1);
+
+   /*
+* Note that this is a really dirty hack that does the wrong thing in
+* many cases. The crux of the problem is that we cannot run
+* setup_git_directory() early on in git's setup, so we have no idea if
+* we are in a repository or not, and therefore are not sure whether
+* and how to read repository-local config.
+*
+* So if we _aren't_ in a repository (or we are but we would reject its
+* core.repositoryformatversion), we'll read whatever is in .git/config
+* blindly. Similarly, if we _are_ in a repository, but not at the
+* root, we'll fail to find .git/config (because it's really
+* ../.git/config, etc). See t7006 for a complete set of failures.
+*
+* However, we have historically provided this hack because it does
+* work some of the time (namely when you are at the top-level of a
+* valid repository), and would rarely make things worse (i.e., you do
+* not generally have a .git/config file sitting around).
+*/
+   if (!startup_info->have_repository) {
+   struct git_config_source repo_config;
+
+   memset(_config, 0, sizeof(repo_config));
+   repo_config.file = ".git/config";
+   git_config_with_options(cb, data, _config, 1);
+   }
+}
+
 const char *git_pager(int stdout_is_tty)
 {
const char *pager;
@@ -53,7 +84,7 @@ const char *git_pager(int stdout_is_tty)
pager = getenv("GIT_PAGER");
if (!pager) {
if (!pager_program)
-   git_config(core_pager_config, NULL);
+   read_early_config(core_pager_config, NULL);
pager = pager_program;
}
if (!pager)
@@ -216,7 +247,7 @@ int check_pager_config(const char *cmd)
data.want = -1;
data.value = NULL;
 
-   git_config(pager_command_config, );
+   read_early_config(pager_command_config, );
 
if (data.value)
pager_program = data.value;
-- 
2.10.0.230.g6f8d04b



[PATCH 12/16] t1302: use "git -C"

2016-09-12 Thread Jeff King
This is shorter, and saves a subshell.

Signed-off-by: Jeff King 
---
 t/t1302-repo-version.sh | 30 ++
 1 file changed, 6 insertions(+), 24 deletions(-)

diff --git a/t/t1302-repo-version.sh b/t/t1302-repo-version.sh
index 9bcd349..f859809 100755
--- a/t/t1302-repo-version.sh
+++ b/t/t1302-repo-version.sh
@@ -25,10 +25,7 @@ test_expect_success 'setup' '
 test_expect_success 'gitdir selection on normal repos' '
echo 0 >expect &&
git config core.repositoryformatversion >actual &&
-   (
-   cd test &&
-   git config core.repositoryformatversion >../actual2
-   ) &&
+   git -C test config core.repositoryformatversion >actual2 &&
test_cmp expect actual &&
test_cmp expect actual2
 '
@@ -36,35 +33,20 @@ test_expect_success 'gitdir selection on normal repos' '
 test_expect_success 'gitdir selection on unsupported repo' '
# Make sure it would stop at test2, not trash
echo 99 >expect &&
-   (
-   cd test2 &&
-   git config core.repositoryformatversion >../actual
-   ) &&
+   git -C test2 config core.repositoryformatversion >actual &&
test_cmp expect actual
 '
 
 test_expect_success 'gitdir not required mode' '
git apply --stat test.patch &&
-   (
-   cd test &&
-   git apply --stat ../test.patch
-   ) &&
-   (
-   cd test2 &&
-   git apply --stat ../test.patch
-   )
+   git -C test apply --stat ../test.patch &&
+   git -C test2 apply --stat ../test.patch
 '
 
 test_expect_success 'gitdir required mode' '
git apply --check --index test.patch &&
-   (
-   cd test &&
-   git apply --check --index ../test.patch
-   ) &&
-   (
-   cd test2 &&
-   test_must_fail git apply --check --index ../test.patch
-   )
+   git -C test apply --check --index ../test.patch &&
+   test_must_fail git -C test2 apply --check --index ../test.patch
 '
 
 check_allow () {
-- 
2.10.0.230.g6f8d04b



[PATCH 13/16] test-config: setup git directory

2016-09-12 Thread Jeff King
The t1308 test script uses our test-config helper to read
repository-level config, but never actually sets up the
repository. This works because git_config() blindly reads
".git/config" even if we have not configured a repository.

This means that test-config won't work from a subdirectory,
though since it's just a helper for the test scripts, that's
not a big deal.

More important is that the behavior of git_config() is going
to change, and we want to make sure that t1308 continues to
work. We can just use setup_git_directory(), and not the
gentle form; there's no point in being flexible, as it's
just a helper for the tests.

Signed-off-by: Jeff King 
---
 t/helper/test-config.c | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/t/helper/test-config.c b/t/helper/test-config.c
index 3c6d08c..83a4f2a 100644
--- a/t/helper/test-config.c
+++ b/t/helper/test-config.c
@@ -72,6 +72,9 @@ int cmd_main(int argc, const char **argv)
const char *v;
const struct string_list *strptr;
struct config_set cs;
+
+   setup_git_directory();
+
git_configset_init();
 
if (argc < 2) {
-- 
2.10.0.230.g6f8d04b



[PATCH 15/16] init: expand comments explaining config trickery

2016-09-12 Thread Jeff King
git-init may copy "config" from the templates directory and
then re-read it. There are some comments explaining what's
going on here, but they are not grouped very well with the
matching code. Let's rearrange and expand them.

Signed-off-by: Jeff King 
---
 builtin/init-db.c | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/builtin/init-db.c b/builtin/init-db.c
index 3a45f0b..e935895 100644
--- a/builtin/init-db.c
+++ b/builtin/init-db.c
@@ -191,16 +191,19 @@ static int create_default_files(const char *template_path)
/* Just look for `init.templatedir` */
git_config(git_init_db_config, NULL);
 
-   /* First copy the templates -- we might have the default
+   /*
+* First copy the templates -- we might have the default
 * config file there, in which case we would want to read
 * from it after installing.
 */
copy_templates(template_path);
-
git_config(git_default_config, NULL);
-   is_bare_repository_cfg = init_is_bare_repository;
 
-   /* reading existing config may have overwrote it */
+   /*
+* We must make sure command-line options continue to override any
+* values we might have just re-read from the config.
+*/
+   is_bare_repository_cfg = init_is_bare_repository;
if (init_shared_repository != -1)
set_shared_repository(init_shared_repository);
 
-- 
2.10.0.230.g6f8d04b



[PATCH 14/16] config: only read .git/config from configured repos

2016-09-12 Thread Jeff King
When git_config() runs, it looks in the system, user-wide,
and repo-level config files. It gets the latter by calling
git_pathdup(), which in turn calls get_git_dir(). If we
haven't set up the git repository yet, this may simply
return ".git", and we will look at ".git/config".  This
seems like it would be helpful (presumably we haven't set up
the repository yet, so it tries to find it), but it turns
out to be a bad idea for a few reasons:

  - it's not sufficient, and therefore hides bugs in a
confusing way. Config will be respected if commands are
run from the top-level of the working tree, but not from
a subdirectory.

  - it's not always true that we haven't set up the
repository _yet_; we may not want to do it at all. For
instance, if you run "git init /some/path" from inside
another repository, it should not load config from the
existing repository.

  - there might be a path ".git/config", but it is not the
actual repository we would find via setup_git_directory().
This may happen, e.g., if you are storing a git
repository inside another git repository, but have
munged one of the files in such a way that the
inner repository is not valid (e.g., by removing HEAD).

We have at least two bugs of the second type in git-init,
introduced by ae5f677 (lazily load core.sharedrepository,
2016-03-11). It causes init to use git_configset(), which
loads all of the config, including values from the current
repo (if any).  This shows up in two ways:

  1. If we happen to be in an existing repository directory,
 we'll read and respect core.sharedrepository from it,
 even though it should have no bearing on the new
 repository. A new test in t1301 covers this.

  2. Similarly, if we're in an existing repo that sets
 core.logallrefupdates, that will cause init to fail to
 set it in a newly created repository (because it thinks
 that the user's templates already did so). A new test
 in t0001 covers this.

We also need to adjust an existing test in t1302, which
gives another example of why this patch is an improvement.

That test creates an embedded repository with a bogus
core.repositoryformatversion of "99". It wants to make sure
that we actually stop at the bogus repo rather than
continuing upward to find the outer repo. So it checks that
"git config core.repositoryformatversion" returns 99. But
that only works because we blindly read ".git/config", even
though we _know_ we're in a repository whose vintage we do
not understand.

After this patch, we avoid reading config from the unknown
vintage repository at all, which is a safer choice.  But we
need to tweak the test, since core.repositoryformatversion
will not return 99; it will claim that it could not find the
variable at all.

Signed-off-by: Jeff King 
---
 cache.h | 6 ++
 config.c| 2 +-
 environment.c   | 7 +++
 t/t0001-init.sh | 9 +
 t/t1301-shared-repo.sh  | 9 +
 t/t1302-repo-version.sh | 4 +---
 6 files changed, 33 insertions(+), 4 deletions(-)

diff --git a/cache.h b/cache.h
index 4f23952..e9592d3 100644
--- a/cache.h
+++ b/cache.h
@@ -453,6 +453,12 @@ static inline enum object_type object_type(unsigned int 
mode)
  */
 extern const char * const local_repo_env[];
 
+/*
+ * Returns true iff we have a configured git repository (either via
+ * setup_git_directory, or in the environment via $GIT_DIR).
+ */
+int have_git_dir(void);
+
 extern int is_bare_repository_cfg;
 extern int is_bare_repository(void);
 extern int is_inside_git_dir(void);
diff --git a/config.c b/config.c
index 8b28447..1e4b617 100644
--- a/config.c
+++ b/config.c
@@ -1286,7 +1286,7 @@ static int do_git_config_sequence(config_fn_t fn, void 
*data)
int ret = 0;
char *xdg_config = xdg_config_home("config");
char *user_config = expand_user_path("~/.gitconfig");
-   char *repo_config = git_pathdup("config");
+   char *repo_config = have_git_dir() ? git_pathdup("config") : NULL;
 
current_parsing_scope = CONFIG_SCOPE_SYSTEM;
if (git_config_system() && !access_or_die(git_etc_gitconfig(), R_OK, 0))
diff --git a/environment.c b/environment.c
index c266428..c0cce6b 100644
--- a/environment.c
+++ b/environment.c
@@ -195,6 +195,13 @@ int is_bare_repository(void)
return is_bare_repository_cfg && !get_git_work_tree();
 }
 
+int have_git_dir(void)
+{
+   return startup_info->have_repository
+   || git_dir
+   || getenv(GIT_DIR_ENVIRONMENT);
+}
+
 const char *get_git_dir(void)
 {
if (!git_dir)
diff --git a/t/t0001-init.sh b/t/t0001-init.sh
index a6fdd5e..8ffbbea 100755
--- a/t/t0001-init.sh
+++ b/t/t0001-init.sh
@@ -384,4 +384,13 @@ test_expect_success MINGW 'bare git dir not hidden' '
! is_hidden newdir
 '
 
+test_expect_success 'remote init from does not use config from cwd' '
+   rm -rf newdir &&
+   test_config core.logallrefupdates 

[PATCH 16/16] init: reset cached config when entering new repo

2016-09-12 Thread Jeff King
After we copy the templates into place, we re-read the
config in case we copied in a default config file. But since
git_config() is backed by a cache these days, it's possible
that the call will not actually touch the filesystem at all;
we need to tell it that something has changed behind the
scenes.

Note that we also need to reset the shared_repository
config. At first glance, it seems like this should probably
just be folded into git_config_clear(). But unfortunately
that is not quite right. The shared repository value may
come from config, _or_ it may have been set manually. So
only the caller who knows whether or not they set it is the
one who can clear it (and indeed, if you _do_ put it into
git_config_clear(), then many tests fail, as we have to
clear the config cache any time we set a new config
variable).

There are three tests here. The first two actually pass
already, though it's largely luck: they just don't happen to
actually read any config before we enter the new repo.

But the third one does fail without this patch; we look at
core.sharedrepository while creating the directory, but need
to make sure the value from the template config overrides
it.

Signed-off-by: Jeff King 
---
 builtin/init-db.c  |  6 ++
 cache.h|  7 +++
 environment.c  |  5 +
 t/t1301-shared-repo.sh | 32 
 4 files changed, 50 insertions(+)

diff --git a/builtin/init-db.c b/builtin/init-db.c
index e935895..cea4550 100644
--- a/builtin/init-db.c
+++ b/builtin/init-db.c
@@ -195,8 +195,14 @@ static int create_default_files(const char *template_path)
 * First copy the templates -- we might have the default
 * config file there, in which case we would want to read
 * from it after installing.
+*
+* Before reading that config, we also need to clear out any cached
+* values (since we've just potentially changed what's available on
+* disk).
 */
copy_templates(template_path);
+   git_config_clear();
+   reset_shared_repository();
git_config(git_default_config, NULL);
 
/*
diff --git a/cache.h b/cache.h
index e9592d3..4c8d85b 100644
--- a/cache.h
+++ b/cache.h
@@ -671,8 +671,15 @@ extern size_t delta_base_cache_limit;
 extern unsigned long big_file_threshold;
 extern unsigned long pack_size_limit_cfg;
 
+/*
+ * Accessors for the core.sharedrepository config which lazy-load the value
+ * from the config (if not already set). The "reset" function can be
+ * used to unset "set" or cached value, meaning that the value will be loaded
+ * fresh from the config file on the next call to get_shared_repository().
+ */
 void set_shared_repository(int value);
 int get_shared_repository(void);
+void reset_shared_repository(void);
 
 /*
  * Do replace refs need to be checked this run?  This variable is
diff --git a/environment.c b/environment.c
index c0cce6b..cd5aa57 100644
--- a/environment.c
+++ b/environment.c
@@ -351,3 +351,8 @@ int get_shared_repository(void)
}
return the_shared_repository;
 }
+
+void reset_shared_repository(void)
+{
+   need_shared_repository_from_config = 1;
+}
diff --git a/t/t1301-shared-repo.sh b/t/t1301-shared-repo.sh
index 7c28642..1312004 100755
--- a/t/t1301-shared-repo.sh
+++ b/t/t1301-shared-repo.sh
@@ -181,4 +181,36 @@ test_expect_success POSIXPERM 'remote init does not use 
config from cwd' '
test_cmp expect actual
 '
 
+test_expect_success POSIXPERM 're-init respects core.sharedrepository (local)' 
'
+   git config core.sharedrepository 0666 &&
+   umask 0022 &&
+   echo whatever >templates/foo &&
+   git init --template=templates &&
+   echo "-rw-rw-rw-" >expect &&
+   modebits .git/foo >actual &&
+   test_cmp expect actual
+'
+
+test_expect_success POSIXPERM 're-init respects core.sharedrepository 
(remote)' '
+   rm -rf child.git &&
+   umask 0022 &&
+   git init --bare --shared=0666 child.git &&
+   test_path_is_missing child.git/foo &&
+   git init --bare --template=../templates child.git &&
+   echo "-rw-rw-rw-" >expect &&
+   modebits child.git/foo >actual &&
+   test_cmp expect actual
+'
+
+test_expect_success POSIXPERM 'template can set core.sharedrepository' '
+   rm -rf child.git &&
+   umask 0022 &&
+   git config core.sharedrepository 0666 &&
+   cp .git/config templates/config &&
+   git init --bare --template=../templates child.git &&
+   echo "-rw-rw-rw-" >expect &&
+   modebits child.git/HEAD >actual &&
+   test_cmp expect actual
+'
+
 test_done
-- 
2.10.0.230.g6f8d04b


[PATCH 10/16] pager: use callbacks instead of configset

2016-09-12 Thread Jeff King
While the cached configset interface is more pleasant to
use, it is not appropriate for "early" config like pager
setup, which must sometimes do tricky things like reading
from ".git/config" even when we have not set up the
repository.

As a preparatory step to handling these cases better, let's
switch back to using the callback interface, which gives us
more control.

Note that this is essentially a revert of 586f414 (pager.c:
replace `git_config()` with `git_config_get_value()`,
2014-08-07), but with some minor style fixups and
modernizations.

Signed-off-by: Jeff King 
---
 pager.c | 47 +--
 1 file changed, 33 insertions(+), 14 deletions(-)

diff --git a/pager.c b/pager.c
index 8aa0a82..46cc411 100644
--- a/pager.c
+++ b/pager.c
@@ -183,23 +183,42 @@ int decimal_width(uintmax_t number)
return width;
 }
 
-/* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */
-int check_pager_config(const char *cmd)
+struct pager_command_config_data {
+   const char *cmd;
+   int want;
+   char *value;
+};
+
+static int pager_command_config(const char *var, const char *value, void 
*vdata)
 {
-   int want = -1;
-   struct strbuf key = STRBUF_INIT;
-   const char *value = NULL;
-   strbuf_addf(, "pager.%s", cmd);
-   if (git_config_key_is_valid(key.buf) &&
-   !git_config_get_value(key.buf, )) {
-   int b = git_config_maybe_bool(key.buf, value);
+   struct pager_command_config_data *data = vdata;
+   const char *cmd;
+
+   if (skip_prefix(var, "pager.", ) && !strcmp(cmd, data->cmd)) {
+   int b = git_config_maybe_bool(var, value);
if (b >= 0)
-   want = b;
+   data->want = b;
else {
-   want = 1;
-   pager_program = xstrdup(value);
+   data->want = 1;
+   data->value = xstrdup(value);
}
}
-   strbuf_release();
-   return want;
+
+   return 0;
+}
+
+/* returns 0 for "no pager", 1 for "use pager", and -1 for "not specified" */
+int check_pager_config(const char *cmd)
+{
+   struct pager_command_config_data data;
+
+   data.cmd = cmd;
+   data.want = -1;
+   data.value = NULL;
+
+   git_config(pager_command_config, );
+
+   if (data.value)
+   pager_program = data.value;
+   return data.want;
 }
-- 
2.10.0.230.g6f8d04b



[PATCH 02/16] hash-object: always try to set up the git repository

2016-09-12 Thread Jeff King
When "hash-object" is run without "-w", we don't need to be
in a git repository at all; we can just hash the object and
write its sha1 to stdout. However, if we _are_ in a git
repository, we would want to know that so we can follow the
normal rules for respecting config, .gitattributes, etc.

This happens to work at the top-level of a git repository
because we blindly read ".git/config", but as the included
test shows, it does not work when you are in a subdirectory.

The solution is to just do a "gentle" setup in this case. We
already take care to use prefix_filename() on any filename
arguments we get (to handle the "-w" case), so we don't need
to do anything extra to handle the side effects of repo
setup.

An alternative would be to specify RUN_SETUP_GENTLY for this
command in git.c, and then die if "-w" is set but we are not
in a repository. However, the error messages generated at
the time of setup_git_directory() are more detailed, so it's
better to find out which mode we are in, and then call the
appropriate function.

Signed-off-by: Jeff King 
---
 builtin/hash-object.c  | 13 -
 t/t1007-hash-object.sh | 11 +++
 2 files changed, 19 insertions(+), 5 deletions(-)

diff --git a/builtin/hash-object.c b/builtin/hash-object.c
index f7d3567..9028e1f 100644
--- a/builtin/hash-object.c
+++ b/builtin/hash-object.c
@@ -87,6 +87,7 @@ int cmd_hash_object(int argc, const char **argv, const char 
*prefix)
int stdin_paths = 0;
int no_filters = 0;
int literally = 0;
+   int nongit = 0;
unsigned flags = HASH_FORMAT_CHECK;
const char *vpath = NULL;
const struct option hash_object_options[] = {
@@ -107,12 +108,14 @@ int cmd_hash_object(int argc, const char **argv, const 
char *prefix)
argc = parse_options(argc, argv, NULL, hash_object_options,
 hash_object_usage, 0);
 
-   if (flags & HASH_WRITE_OBJECT) {
+   if (flags & HASH_WRITE_OBJECT)
prefix = setup_git_directory();
-   prefix_length = prefix ? strlen(prefix) : 0;
-   if (vpath && prefix)
-   vpath = prefix_filename(prefix, prefix_length, vpath);
-   }
+   else
+   prefix = setup_git_directory_gently();
+
+   prefix_length = prefix ? strlen(prefix) : 0;
+   if (vpath && prefix)
+   vpath = prefix_filename(prefix, prefix_length, vpath);
 
git_config(git_default_config, NULL);
 
diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh
index c89faa6..acca9ac 100755
--- a/t/t1007-hash-object.sh
+++ b/t/t1007-hash-object.sh
@@ -123,6 +123,17 @@ test_expect_success 'check that appropriate filter is 
invoke when --path is used
test "$file1_sha" = "$path1_sha"
 '
 
+test_expect_success 'gitattributes also work in a subdirectory' '
+   mkdir subdir &&
+   (
+   cd subdir &&
+   subdir_sha0=$(git hash-object ../file0) &&
+   subdir_sha1=$(git hash-object ../file1) &&
+   test "$file0_sha" = "$subdir_sha0" &&
+   test "$file1_sha" = "$subdir_sha1"
+   )
+'
+
 test_expect_success 'check that --no-filters option works' '
nofilters_file1=$(git hash-object --no-filters file1) &&
test "$file0_sha" = "$nofilters_file1" &&
-- 
2.10.0.230.g6f8d04b



[PATCH 09/16] pager: make pager_program a file-local static

2016-09-12 Thread Jeff King
This variable is only ever used by the routines in pager.c,
and other parts of the code should always use those routines
(like git_pager()) to make decisions about which pager to
use. Let's reduce its scope to prevent accidents.

Signed-off-by: Jeff King 
---
 cache.h   | 1 -
 environment.c | 1 -
 pager.c   | 1 +
 3 files changed, 1 insertion(+), 2 deletions(-)

diff --git a/cache.h b/cache.h
index 6738050..4f23952 100644
--- a/cache.h
+++ b/cache.h
@@ -1786,7 +1786,6 @@ extern void write_file(const char *path, const char *fmt, 
...);
 
 /* pager.c */
 extern void setup_pager(void);
-extern const char *pager_program;
 extern int pager_in_use(void);
 extern int pager_use_color;
 extern int term_columns(void);
diff --git a/environment.c b/environment.c
index ca72464..c266428 100644
--- a/environment.c
+++ b/environment.c
@@ -40,7 +40,6 @@ size_t packed_git_window_size = 
DEFAULT_PACKED_GIT_WINDOW_SIZE;
 size_t packed_git_limit = DEFAULT_PACKED_GIT_LIMIT;
 size_t delta_base_cache_limit = 96 * 1024 * 1024;
 unsigned long big_file_threshold = 512 * 1024 * 1024;
-const char *pager_program;
 int pager_use_color = 1;
 const char *editor_program;
 const char *askpass_program;
diff --git a/pager.c b/pager.c
index d747f50..8aa0a82 100644
--- a/pager.c
+++ b/pager.c
@@ -7,6 +7,7 @@
 #endif
 
 static struct child_process pager_process = CHILD_PROCESS_INIT;
+static const char *pager_program;
 
 static void wait_for_pager(int in_signal)
 {
-- 
2.10.0.230.g6f8d04b



[PATCH 06/16] diff: always try to set up the repository

2016-09-12 Thread Jeff King
If we see an explicit "--no-index", we do not bother calling
setup_git_directory_gently() at all. This means that we may
miss out on reading repo-specific config.

It's arguable whether this is correct or not. If we were
designing from scratch, making "git diff --no-index"
completely ignore the repository makes some sense. But we
are nowhere near scratch, so let's look at the existing
behavior:

  1. If you're in the top-level of a repository and run an
 explicit "diff --no-index", the config subsystem falls
 back to reading ".git/config", and we will respect repo
 config.

  2. If you're in a subdirectory of a repository, then we
 still try to read ".git/config", but it generally
 doesn't exist. So "diff --no-index" there does not
 respect repo config.

  3. If you have $GIT_DIR set in the environment, we read
 and respect $GIT_DIR/config,

  4. If you run "git diff /tmp/foo /tmp/bar" to get an
 implicit no-index, we _do_ run the repository setup,
 and set $GIT_DIR (or respect an existing $GIT_DIR
 variable). We find the repo config no matter where we
 started, and respect it.

So we already respect the repository config in a number of
common cases, and case (2) is the only one that does not.
And at least one of our tests, t4034, depends on case (1)
behaving as it does now (though it is just incidental, not
an explicit test for this behavior).

So let's bring case (2) in line with the others by always
running the repository setup, even with an explicit
"--no-index". We shouldn't need to change anything else, as the
implicit case already handles the prefix.

Signed-off-by: Jeff King 
---
 builtin/diff.c   |  4 ++--
 t/t4053-diff-no-index.sh | 20 
 2 files changed, 22 insertions(+), 2 deletions(-)

diff --git a/builtin/diff.c b/builtin/diff.c
index a31643c..7f91f6d 100644
--- a/builtin/diff.c
+++ b/builtin/diff.c
@@ -301,9 +301,9 @@ int cmd_diff(int argc, const char **argv, const char 
*prefix)
break;
}
 
-   if (!no_index) {
-   prefix = setup_git_directory_gently();
+   prefix = setup_git_directory_gently();
 
+   if (!no_index) {
/*
 * Treat git diff with at least one path outside of the
 * repo the same as if the command would have been executed
diff --git a/t/t4053-diff-no-index.sh b/t/t4053-diff-no-index.sh
index e60c951..453e6c3 100755
--- a/t/t4053-diff-no-index.sh
+++ b/t/t4053-diff-no-index.sh
@@ -107,4 +107,24 @@ test_expect_success 'diff from repo subdir shows real 
paths (implicit)' '
test_cmp expect actual.head
 '
 
+test_expect_success 'diff --no-index from repo subdir respects config 
(explicit)' '
+   echo "diff --git ../../non/git/a ../../non/git/b" >expect &&
+   test_config -C repo diff.noprefix true &&
+   test_expect_code 1 \
+   git -C repo/sub \
+   diff --no-index ../../non/git/a ../../non/git/b >actual &&
+   head -n 1 actual.head &&
+   test_cmp expect actual.head
+'
+
+test_expect_success 'diff --no-index from repo subdir respects config 
(implicit)' '
+   echo "diff --git ../../non/git/a ../../non/git/b" >expect &&
+   test_config -C repo diff.noprefix true &&
+   test_expect_code 1 \
+   git -C repo/sub \
+   diff ../../non/git/a ../../non/git/b >actual &&
+   head -n 1 actual.head &&
+   test_cmp expect actual.head
+'
+
 test_done
-- 
2.10.0.230.g6f8d04b



[PATCH 08/16] pager: stop loading git_default_config()

2016-09-12 Thread Jeff King
In git_pager(), we really only care about getting the value
of core.pager. But to do so, we use the git_default_config()
callback, which loads many other values. Ordinarily it
isn't a big deal to load this config an extra time, as it
simply overwrites the values from the previous run. But it's
a bad idea here, for two reasons:

  1. The pager setup may be called very early in the
 program, before we have found the git repository. As a
 result, we may fail to read the correct repo-level
 config file.  This is a problem for core.pager, too,
 but we should at least try to minimize the pollution to
 other configured values.

  2. Because we call setup_pager() from git.c, basically
 every builtin command _may_ end up reading this config
 and getting an implicit git_default_config() setup.

 Which doesn't sound like a terrible thing, except that
 we don't do it consistently; it triggers only when
 stdout is a tty. So if a command forgets to load the
 default config itself (but depends on it anyway), it
 may appear to work, and then mysteriously fail when the
 pager is not in use.

We can improve this by loading _just_ the core.pager config
from git_pager().

Signed-off-by: Jeff King 
---
 config.c | 3 ---
 pager.c  | 9 -
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/config.c b/config.c
index 0dfed68..8b28447 100644
--- a/config.c
+++ b/config.c
@@ -927,9 +927,6 @@ static int git_default_core_config(const char *var, const 
char *value)
return 0;
}
 
-   if (!strcmp(var, "core.pager"))
-   return git_config_string(_program, var, value);
-
if (!strcmp(var, "core.editor"))
return git_config_string(_program, var, value);
 
diff --git a/pager.c b/pager.c
index ae46da9..d747f50 100644
--- a/pager.c
+++ b/pager.c
@@ -35,6 +35,13 @@ static void wait_for_pager_signal(int signo)
raise(signo);
 }
 
+static int core_pager_config(const char *var, const char *value, void *data)
+{
+   if (!strcmp(var, "core.pager"))
+   return git_config_string(_program, var, value);
+   return 0;
+}
+
 const char *git_pager(int stdout_is_tty)
 {
const char *pager;
@@ -45,7 +52,7 @@ const char *git_pager(int stdout_is_tty)
pager = getenv("GIT_PAGER");
if (!pager) {
if (!pager_program)
-   git_config(git_default_config, NULL);
+   git_config(core_pager_config, NULL);
pager = pager_program;
}
if (!pager)
-- 
2.10.0.230.g6f8d04b



[PATCH 03/16] patch-id: use RUN_SETUP_GENTLY

2016-09-12 Thread Jeff King
Patch-id does not require a repository because it is just
processing the incoming diff on stdin, but it may look at
git config for keys like patchid.stable.

Even though we do not setup_git_directory(), this works from
the top-level of a repository because we blindly look at
".git/config" in this case. But as the included test
demonstrates, it does not work from a subdirectory.

We can fix it by using RUN_SETUP_GENTLY. We do not take any
filenames from the user on the command line, so there's no
need to adjust them via prefix_filename().

Signed-off-by: Jeff King 
---
 git.c   |  2 +-
 t/t4204-patch-id.sh | 14 ++
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/git.c b/git.c
index 1c61151..ab5c99c 100644
--- a/git.c
+++ b/git.c
@@ -444,7 +444,7 @@ static struct cmd_struct commands[] = {
{ "pack-objects", cmd_pack_objects, RUN_SETUP },
{ "pack-redundant", cmd_pack_redundant, RUN_SETUP },
{ "pack-refs", cmd_pack_refs, RUN_SETUP },
-   { "patch-id", cmd_patch_id },
+   { "patch-id", cmd_patch_id, RUN_SETUP_GENTLY },
{ "pickaxe", cmd_blame, RUN_SETUP },
{ "prune", cmd_prune, RUN_SETUP },
{ "prune-packed", cmd_prune_packed, RUN_SETUP },
diff --git a/t/t4204-patch-id.sh b/t/t4204-patch-id.sh
index 84a8096..0288c17 100755
--- a/t/t4204-patch-id.sh
+++ b/t/t4204-patch-id.sh
@@ -143,6 +143,20 @@ test_expect_success 'patch-id supports git-format-patch 
MIME output' '
test_cmp patch-id_master patch-id_same
 '
 
+test_expect_success 'patch-id respects config from subdir' '
+   test_config patchid.stable true &&
+   mkdir subdir &&
+
+   # copy these because test_patch_id() looks for them in
+   # the current directory
+   cp bar-then-foo foo-then-bar subdir &&
+
+   (
+   cd subdir &&
+   test_patch_id irrelevant patchid.stable=true
+   )
+'
+
 cat >nonl <<\EOF
 diff --git i/a w/a
 index e69de29..2e65efe 100644
-- 
2.10.0.230.g6f8d04b



[PATCH 04/16] diff: skip implicit no-index check when given --no-index

2016-09-12 Thread Jeff King
We can invoke no-index mode in two ways: by an explicit
request from the user, or implicitly by noticing that we
have two paths, and at least one is outside the repository.

If the user already told us --no-index, there is no need for
us to do the implicit test at all.  However, we currently
do, and downgrade our "explicit" to DIFF_NO_INDEX_IMPLICIT.

This doesn't have any user-visible behavior, though it's not
immediately obvious why. We only trigger the implicit check
when we have exactly two non-option arguments. And the only
code that cares about implicit versus explicit is an error
message that we show when we _don't_ have two non-option
arguments.

However, it's worth fixing anyway. Besides being slightly
more efficient, it makes the code easier to follow, which
will help when we modify it in future patches.

Signed-off-by: Jeff King 
---
 builtin/diff.c | 25 +
 1 file changed, 13 insertions(+), 12 deletions(-)

diff --git a/builtin/diff.c b/builtin/diff.c
index b7a9405..a31643c 100644
--- a/builtin/diff.c
+++ b/builtin/diff.c
@@ -301,20 +301,21 @@ int cmd_diff(int argc, const char **argv, const char 
*prefix)
break;
}
 
-   if (!no_index)
+   if (!no_index) {
prefix = setup_git_directory_gently();
 
-   /*
-* Treat git diff with at least one path outside of the
-* repo the same as if the command would have been executed
-* outside of a git repository.  In this case it behaves
-* the same way as "git diff --no-index  ", which acts
-* as a colourful "diff" replacement.
-*/
-   if (nongit || ((argc == i + 2) &&
-  (!path_inside_repo(prefix, argv[i]) ||
-   !path_inside_repo(prefix, argv[i + 1]
-   no_index = DIFF_NO_INDEX_IMPLICIT;
+   /*
+* Treat git diff with at least one path outside of the
+* repo the same as if the command would have been executed
+* outside of a git repository.  In this case it behaves
+* the same way as "git diff --no-index  ", which acts
+* as a colourful "diff" replacement.
+*/
+   if (nongit || ((argc == i + 2) &&
+  (!path_inside_repo(prefix, argv[i]) ||
+   !path_inside_repo(prefix, argv[i + 1]
+   no_index = DIFF_NO_INDEX_IMPLICIT;
+   }
 
if (!no_index)
gitmodules_config();
-- 
2.10.0.230.g6f8d04b



[PATCH 07/16] pager: remove obsolete comment

2016-09-12 Thread Jeff King
The comment at the top of pager.c claims that we've split
the code out so that Windows can do something different.
This dates back to f67b45f (Introduce trivial new pager.c
helper infrastructure, 2006-02-28), because the original
implementation used fork(). Later, we ended up sticking the
Windows #ifdefs into this file anyway. And then even later,
in ea27a18 (spawn pager via run_command interface,
2008-07-22) we unified the implementations.

So these days this comment is really saying nothing at all.
Let's drop it.

Signed-off-by: Jeff King 
---
 pager.c | 5 -
 1 file changed, 5 deletions(-)

diff --git a/pager.c b/pager.c
index 6470b81..ae46da9 100644
--- a/pager.c
+++ b/pager.c
@@ -6,11 +6,6 @@
 #define DEFAULT_PAGER "less"
 #endif
 
-/*
- * This is split up from the rest of git so that we can do
- * something different on Windows.
- */
-
 static struct child_process pager_process = CHILD_PROCESS_INIT;
 
 static void wait_for_pager(int in_signal)
-- 
2.10.0.230.g6f8d04b



[PATCH 05/16] diff: handle --no-index prefixes consistently

2016-09-12 Thread Jeff King
If we see an explicit "git diff --no-index ../foo ../bar",
then we do not set up the git repository at all (we already
know we are in --no-index mode, so do not have to check "are
we in a repository?"), and hence have no "prefix" within the
repository. A patch generated by this command will have the
filenames "a/../foo" and "b/../bar", no matter which
directory we are in with respect to any repository.

However, in the implicit case, where we notice that the
files are outside the repository, we will have chdir()'d to
the top-level of the repository. We then feed the prefix
back to the diff machinery. As a result, running the same
diff from a subdirectory will result in paths that look like
"a/subdir/../../foo".

Besides being unnecessarily long, this may also be confusing
to the user: they don't care about the subdir or the
repository at all; it's just where they happened to be when
running the command. We should treat this the same as the
explicit --no-index case.

One way to address this would be to chdir() back to the
original path before running our diff. However, that's a bit
hacky, as we would also need to adjust $GIT_DIR, which could
be a relative path from our top-level.

Instead, we can reuse the diff machinery's RELATIVE_NAME
option, which automatically strips off the prefix. Note that
this _also_ restricts the diff to this relative prefix, but
that's OK for our purposes: we queue our own diff pairs
manually, and do not rely on that part of the diff code.

Signed-off-by: Jeff King 
---
 diff-no-index.c  |  3 +++
 t/t4053-diff-no-index.sh | 18 ++
 2 files changed, 21 insertions(+)

diff --git a/diff-no-index.c b/diff-no-index.c
index 1f8999b..f420786 100644
--- a/diff-no-index.c
+++ b/diff-no-index.c
@@ -281,6 +281,9 @@ void diff_no_index(struct rev_info *revs,
 
DIFF_OPT_SET(>diffopt, NO_INDEX);
 
+   DIFF_OPT_SET(>diffopt, RELATIVE_NAME);
+   revs->diffopt.prefix = prefix;
+
revs->max_count = -2;
diff_setup_done(>diffopt);
 
diff --git a/t/t4053-diff-no-index.sh b/t/t4053-diff-no-index.sh
index 6eb8321..e60c951 100755
--- a/t/t4053-diff-no-index.sh
+++ b/t/t4053-diff-no-index.sh
@@ -89,4 +89,22 @@ test_expect_success 'turning a file into a directory' '
)
 '
 
+test_expect_success 'diff from repo subdir shows real paths (explicit)' '
+   echo "diff --git a/../../non/git/a b/../../non/git/b" >expect &&
+   test_expect_code 1 \
+   git -C repo/sub \
+   diff --no-index ../../non/git/a ../../non/git/b >actual &&
+   head -n 1 actual.head &&
+   test_cmp expect actual.head
+'
+
+test_expect_success 'diff from repo subdir shows real paths (implicit)' '
+   echo "diff --git a/../../non/git/a b/../../non/git/b" >expect &&
+   test_expect_code 1 \
+   git -C repo/sub \
+   diff ../../non/git/a ../../non/git/b >actual &&
+   head -n 1 actual.head &&
+   test_cmp expect actual.head
+'
+
 test_done
-- 
2.10.0.230.g6f8d04b



[PATCH 0/16] fix config-reading in non-repos

2016-09-12 Thread Jeff King
The motivation for this series is to fix the regression in v2.9 where
core.logallrefupdates is sometimes not set properly in a newly
initialized repository, as described in this thread:

  http://public-inbox.org/git/c46d36ef-3c2e-374f-0f2e-ffe31104e...@gmx.de/T/#u

The root of the problem is that we are overly eager to read and use
config from ".git/config", even when we have not established that it is
part of a repository. This is especially bad for git-init, which would
not want to read anything until we've created the new repo.

So the two interesting parts of the fix are:

  1. We stop blindly reading ".git/config" when we don't know there's an
 actual git directory. This is in patch 14, and is actually enough
 to fix the v2.9 regression.

  2. We are more thorough about dropping any cached config values when
 we move into the new repository in git-init (patch 16).

 I didn't dig into when this was broken, but it was probably when we
 switched git_config() to use cached values in the v2.2.0
 time-frame.

Doing (1) required fixing up some builtins that depended on the blind
.git/config thing, as the tests demonstrated. But I think this is a sign
that we are moving in the right direction, because each one of those
programs could easily be demonstrated to be broken in scenarios only
slightly more exotic than the test scripts (e.g., see patch 3 for one of
the simplest cases).

So I think notwithstanding their use as prep for patch 14, patches 1-13
fix useful bugs.

I won't be surprised if there are other fallouts that were not caught by
the test suite (i.e., programs that expect to read config, don't do
RUN_SETUP, but aren't covered well by tests). I poked around the list of
builtins in git.c that do not use RUN_SETUP, and they seem to correctly
end up in setup_git_directory_gently() before reading config. But it's
possible I missed a case.

So this is definitely a bit larger than I'd hope for a regression-fix to
maint. But anything that doesn't address this issue at the config layer
is going to end up as a bit of a hack, and I'd rather not pile up hacks
if we can avoid it.

I have a few patches on top that go even further and disallow the
auto-fallback of looking in ".git" at all for non-repositories. I think
that's the right thing to do, and after years of chipping away at the
setup code, I think we're finally at a point to make that change (with a
few fixes of course). But that's an even riskier change and not fixing
an immediate regression. So I'll hold that back for now, and hopefully
it would become "master" material once this is sorted out.

I've cc'd Dennis, who helped investigate solutions in the thread
mentioned above, and Duy, because historically he has been the one most
willing and able to battle the dragon of our setup code. :)

  [01/16]: t1007: factor out repeated setup
  [02/16]: hash-object: always try to set up the git repository
  [03/16]: patch-id: use RUN_SETUP_GENTLY
  [04/16]: diff: skip implicit no-index check when given --no-index
  [05/16]: diff: handle --no-index prefixes consistently
  [06/16]: diff: always try to set up the repository
  [07/16]: pager: remove obsolete comment
  [08/16]: pager: stop loading git_default_config()
  [09/16]: pager: make pager_program a file-local static
  [10/16]: pager: use callbacks instead of configset
  [11/16]: pager: handle early config
  [12/16]: t1302: use "git -C"
  [13/16]: test-config: setup git directory
  [14/16]: config: only read .git/config from configured repos
  [15/16]: init: expand comments explaining config trickery
  [16/16]: init: reset cached config when entering new repo

-Peff


[PATCH 01/16] t1007: factor out repeated setup

2016-09-12 Thread Jeff King
We have a series of 3 CRLF tests that do exactly the same
(long) setup sequence. Let's pull it out into a common setup
test, which is shorter, more efficient, and will make it
easier to add new tests.

Note that we don't have to worry about cleaning up any of
the setup which was previously per-test; we call pop_repo
after the CRLF tests, which cleans up everything.

Signed-off-by: Jeff King 
---
 t/t1007-hash-object.sh | 32 
 1 file changed, 8 insertions(+), 24 deletions(-)

diff --git a/t/t1007-hash-object.sh b/t/t1007-hash-object.sh
index 7d2baa1..c89faa6 100755
--- a/t/t1007-hash-object.sh
+++ b/t/t1007-hash-object.sh
@@ -101,7 +101,7 @@ test_expect_success 'git hash-object --stdin file1 file0 &&
cp file0 file1 &&
echo "file0 -crlf" >.gitattributes &&
@@ -109,7 +109,10 @@ test_expect_success 'check that appropriate filter is 
invoke when --path is used
git config core.autocrlf true &&
file0_sha=$(git hash-object file0) &&
file1_sha=$(git hash-object file1) &&
-   test "$file0_sha" != "$file1_sha" &&
+   test "$file0_sha" != "$file1_sha"
+'
+
+test_expect_success 'check that appropriate filter is invoke when --path is 
used' '
path1_sha=$(git hash-object --path=file1 file0) &&
path0_sha=$(git hash-object --path=file0 file1) &&
test "$file0_sha" = "$path0_sha" &&
@@ -117,38 +120,19 @@ test_expect_success 'check that appropriate filter is 
invoke when --path is used
path1_sha=$(cat file0 | git hash-object --path=file1 --stdin) &&
path0_sha=$(cat file1 | git hash-object --path=file0 --stdin) &&
test "$file0_sha" = "$path0_sha" &&
-   test "$file1_sha" = "$path1_sha" &&
-   git config --unset core.autocrlf
+   test "$file1_sha" = "$path1_sha"
 '
 
 test_expect_success 'check that --no-filters option works' '
-   echo fooQ | tr Q "\\015" >file0 &&
-   cp file0 file1 &&
-   echo "file0 -crlf" >.gitattributes &&
-   echo "file1 crlf" >>.gitattributes &&
-   git config core.autocrlf true &&
-   file0_sha=$(git hash-object file0) &&
-   file1_sha=$(git hash-object file1) &&
-   test "$file0_sha" != "$file1_sha" &&
nofilters_file1=$(git hash-object --no-filters file1) &&
test "$file0_sha" = "$nofilters_file1" &&
nofilters_file1=$(cat file1 | git hash-object --stdin) &&
-   test "$file0_sha" = "$nofilters_file1" &&
-   git config --unset core.autocrlf
+   test "$file0_sha" = "$nofilters_file1"
 '
 
 test_expect_success 'check that --no-filters option works with --stdin-paths' '
-   echo fooQ | tr Q "\\015" >file0 &&
-   cp file0 file1 &&
-   echo "file0 -crlf" >.gitattributes &&
-   echo "file1 crlf" >>.gitattributes &&
-   git config core.autocrlf true &&
-   file0_sha=$(git hash-object file0) &&
-   file1_sha=$(git hash-object file1) &&
-   test "$file0_sha" != "$file1_sha" &&
nofilters_file1=$(echo "file1" | git hash-object --stdin-paths 
--no-filters) &&
-   test "$file0_sha" = "$nofilters_file1" &&
-   git config --unset core.autocrlf
+   test "$file0_sha" = "$nofilters_file1"
 '
 
 pop_repo
-- 
2.10.0.230.g6f8d04b



Re: [PATCH 07/10] diff.c: convert fn_out_consume to use emit_line_*

2016-09-12 Thread Stefan Beller
On Mon, Sep 12, 2016 at 5:25 PM, Junio C Hamano  wrote:
> Stefan Beller  writes:
>
>> diff --git a/diff.c b/diff.c
>> index 2aefd0f..7dcef73 100644
>> --- a/diff.c
>> +++ b/diff.c
>> @@ -493,6 +493,19 @@ static void emit_line(struct diff_options *o, const 
>> char *set, const char *reset
>>   emit_line_0(o, set, reset, line[0], line+1, len-1);
>>  }
>>
>> +static void emit_line_fmt(struct diff_options *o,
>> +   const char *set, const char *reset,
>> +   const char *fmt, ...)
>> +{
>> + struct strbuf sb = STRBUF_INIT;
>> + va_list ap;
>> + va_start(ap, fmt);
>> + strbuf_vaddf(, fmt, ap);
>> + va_end(ap);
>> +
>> + emit_line(o, set, reset, sb.buf, sb.len);
>> +}
>> +
>>  static int new_blank_line_at_eof(struct emit_callback *ecbdata, const char 
>> *line, int len)
>>  {
>>   if (!((ecbdata->ws_rule & WS_BLANK_AT_EOF) &&
>> @@ -1217,7 +1230,6 @@ static void fn_out_consume(void *priv, char *line, 
>> unsigned long len)
>>   const char *context = diff_get_color(ecbdata->color_diff, 
>> DIFF_CONTEXT);
>>   const char *reset = diff_get_color(ecbdata->color_diff, DIFF_RESET);
>>   struct diff_options *o = ecbdata->opt;
>> - const char *line_prefix = diff_line_prefix(o);
>>
>>   o->found_changes = 1;
>>
>> @@ -1233,10 +1245,12 @@ static void fn_out_consume(void *priv, char *line, 
>> unsigned long len)
>>   name_a_tab = strchr(ecbdata->label_path[0], ' ') ? "\t" : "";
>>   name_b_tab = strchr(ecbdata->label_path[1], ' ') ? "\t" : "";
>>
>> - fprintf(o->file, "%s%s--- %s%s%s\n",
>> - line_prefix, meta, ecbdata->label_path[0], reset, 
>> name_a_tab);
>> - fprintf(o->file, "%s%s+++ %s%s%s\n",
>> - line_prefix, meta, ecbdata->label_path[1], reset, 
>> name_b_tab);
>> + emit_line_fmt(o, meta, reset, "--- %s%s\n",
>> +   ecbdata->label_path[0], name_a_tab);
>> +
>> + emit_line_fmt(o, meta, reset, "+++ %s%s\n",
>> +   ecbdata->label_path[1], name_b_tab);
>
> Hmph, the original showed the following for the name-a line:
>
> diff_line_prefix(o) META "--- " label_path RESET name_a_tab LF
>
> The updated one calls emit_line_fmt() with o, meta, reset, fmt and
> args, and then
>
>  * strbuf_vaddf(, "--- %s%s\n", label_path, name_a_tab) creates
>a string "--- " + label_path + LF
>
>  * emit_line() is called on the whole thing with META and RESET

Oh right, that is a real issue, i.e. name_b_tab is not colored. I'll
have to think about that.

>
>  * which is emit_line_0() that encloses the whole thing between META
>and RESET but knows the trailing LF should come after RESET.
>
> So the coloring seems to be correct, but I am not sure where the
> line-prefix went.

emit_line_0 puts the line_prefix by default in front, i.e.
it emits

line_prefix (SET) (first char) line (RESET) (CR/LF)

with things in parenthesis optional.

As said in 6/10 I think the emit_line_0 function is a great starting point
to build up a buffer when we do a two passes output. For that I want to channel
all output through emit_line_*.


---
I am done converting all diff output to emit_line for a rought first series,
and now all tests pass when asking for a buffered output.

So I started building the --color-moved on top of that, which seems
to work as well. The diff stat is  ~700 insertions and ~300 deletions
compared to 2.10, however we call out to the xdl stuff only once.

Comparing that to the original quick-hack-patch
https://public-inbox.org/git/20160906070151.15163-1-stefanbel...@gmail.com/
which has just 280 additions, 10 deletions;
we seem to need about ~350 refactor lines and
slightly slower emissions in the non-color-moved case
(all the calls to emit_line instead of directly using fprintf/fputs/fputc)


[PATCH v2] ls-files: adding support for submodules

2016-09-12 Thread Brandon Williams
Allow ls-files to recognize submodules in order to retrieve a list of
files from a repository's submodules.  This is done by forking off a
process to recursively call ls-files on all submodules. Also added an
output-path-prefix command in order to prepend paths to child processes.

Signed-off-by: Brandon Williams 
---
 Documentation/git-ls-files.txt | 11 +++-
 builtin/ls-files.c | 60 +
 t/t3007-ls-files-recurse-submodules.sh | 99 ++
 3 files changed, 169 insertions(+), 1 deletion(-)
 create mode 100755 t/t3007-ls-files-recurse-submodules.sh

diff --git a/Documentation/git-ls-files.txt b/Documentation/git-ls-files.txt
index 0d933ac..a623ebf 100644
--- a/Documentation/git-ls-files.txt
+++ b/Documentation/git-ls-files.txt
@@ -18,7 +18,9 @@ SYNOPSIS
[--exclude-per-directory=]
[--exclude-standard]
[--error-unmatch] [--with-tree=]
-   [--full-name] [--abbrev] [--] [...]
+   [--full-name] [--recurse-submodules]
+   [--output-path-prefix=]
+   [--abbrev] [--] [...]
 
 DESCRIPTION
 ---
@@ -137,6 +139,13 @@ a space) at the start of each line:
option forces paths to be output relative to the project
top directory.
 
+--recurse-submodules::
+   Recursively calls ls-files on each submodule in the repository.
+   Currently there is only support for the --cached mode.
+
+--output-path-prefix=::
+   Prepend the provided path to the output of each file
+
 --abbrev[=]::
Instead of showing the full 40-byte hexadecimal object
lines, show only a partial prefix.
diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 00ea91a..c0bce00 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -14,6 +14,7 @@
 #include "resolve-undo.h"
 #include "string-list.h"
 #include "pathspec.h"
+#include "run-command.h"
 
 static int abbrev;
 static int show_deleted;
@@ -28,6 +29,8 @@ static int show_valid_bit;
 static int line_terminator = '\n';
 static int debug_mode;
 static int show_eol;
+static const char *output_path_prefix = "";
+static int recurse_submodules;
 
 static const char *prefix;
 static int max_prefix_len;
@@ -68,6 +71,21 @@ static void write_eolinfo(const struct cache_entry *ce, 
const char *path)
 static void write_name(const char *name)
 {
/*
+* NEEDSWORK: To make this thread-safe, full_name would have to be owned
+* by the caller.
+*
+* full_name get reused across output lines to minimize the allocation
+* churn.
+*/
+   static struct strbuf full_name = STRBUF_INIT;
+   if (output_path_prefix != '\0') {
+   strbuf_reset(_name);
+   strbuf_addstr(_name, output_path_prefix);
+   strbuf_addstr(_name, name);
+   name = full_name.buf;
+   }
+
+   /*
 * With "--full-name", prefix_len=0; this caller needs to pass
 * an empty string in that case (a NULL is good for "").
 */
@@ -152,6 +170,25 @@ static void show_killed_files(struct dir_struct *dir)
}
 }
 
+/**
+ * Recursively call ls-files on a submodule
+ */
+static void show_gitlink(const struct cache_entry *ce)
+{
+   struct child_process cp = CHILD_PROCESS_INIT;
+   int status;
+
+   argv_array_push(, "ls-files");
+   argv_array_push(, "--recurse-submodules");
+   argv_array_pushf(, "--output-path-prefix=%s%s/",
+output_path_prefix, ce->name);
+   cp.git_cmd = 1;
+   cp.dir = ce->name;
+   status = run_command();
+   if (status)
+   exit(status);
+}
+
 static void show_ce_entry(const char *tag, const struct cache_entry *ce)
 {
int len = max_prefix_len;
@@ -163,6 +200,10 @@ static void show_ce_entry(const char *tag, const struct 
cache_entry *ce)
len, ps_matched,
S_ISDIR(ce->ce_mode) || S_ISGITLINK(ce->ce_mode)))
return;
+   if (recurse_submodules && S_ISGITLINK(ce->ce_mode)) {
+   show_gitlink(ce);
+   return;
+   }
 
if (tag && *tag && show_valid_bit &&
(ce->ce_flags & CE_VALID)) {
@@ -468,6 +509,10 @@ int cmd_ls_files(int argc, const char **argv, const char 
*cmd_prefix)
{ OPTION_SET_INT, 0, "full-name", _len, NULL,
N_("make the output relative to the project top 
directory"),
PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL },
+   OPT_STRING(0, "output-path-prefix", _path_prefix,
+   N_("path"), N_("prepend  to each file")),
+   OPT_BOOL(0, "recurse-submodules", _submodules,
+   N_("recurse through submodules")),
OPT_BOOL(0, "error-unmatch", _unmatch,
N_("if any  is not in the index, treat this as an 
error")),
 

Re: Potential Same Name Bug

2016-09-12 Thread Andrew Ardill
Hi Kevin,

On 13 September 2016 at 09:29, Kevin Smith  wrote:
> So when I move from master to develop that status would come up.  If I
> ran "git reset --hard" I would no longer have that message.  I also
> saw that when I do a git clone and specify to clone the develop branch
> that I would not see the git status above.  Is this an issue where if
> one branch has two files of the same name where one gets removed that
> it will remove both instances of that file in another branch when you
> switch to it?  I fixed this issue in our repo by removing the
> "file_NAME.png" file in the master branch, but it seems like this
> should be handled better in the case I described.

Just to clarify, is the machine you are cloning to on a
case-insensitive file system, or have you set core.ignorecase=true?

If so, I would imagine the behaviour would have been _interesting_ in
that repository before you removed one of the versions - that may be
why you removed it in the first place.

For future reference, the typical way I have seen this situation dealt
with is to git mv one file to a different filename, or using git rm.
There may be better ways to do it, and it's possible git should deal
with the specific situation you mentioned better, but I'm not able to
comment on that.

This situation commonly arises when you set core.ignorecase=false on a
case insensitive file system and then rename a file. When you commit
that change, git doesn't notice that the old filename has been
'deleted' (git sees a rename as a delete+create with the same file
contents) and so now thinks that you have two files with very similar
names in your working directory. To avoid this, make sure
core.ignorecase is set correctly, and use git mv -f to rename files on
case insensitive file systems.

Details at 
https://stackoverflow.com/questions/17683458/how-do-i-commit-case-sensitive-only-filename-changes-in-git

Regards,

Andrew Ardill


[RFC 2/3] http: consolidate #ifdefs for curl_multi_remove_handle

2016-09-12 Thread Eric Wong
I find #ifdefs makes code difficult-to-follow.

An early version of this patch had error checking for
curl_multi_remove_handle calls, but caused some tests (e.g.
t5541) to fail under curl 7.26.0 on old Debian wheezy.

Signed-off-by: Eric Wong 
---
 http.c | 17 ++---
 1 file changed, 10 insertions(+), 7 deletions(-)

diff --git a/http.c b/http.c
index cac5db9..a7eaf7b 100644
--- a/http.c
+++ b/http.c
@@ -201,6 +201,13 @@ static void finish_active_slot(struct active_request_slot 
*slot)
slot->callback_func(slot->callback_data);
 }
 
+static void xmulti_remove_handle(struct active_request_slot *slot)
+{
+#ifdef USE_CURL_MULTI
+   curl_multi_remove_handle(curlm, slot->curl);
+#endif
+}
+
 #ifdef USE_CURL_MULTI
 static void process_curl_messages(void)
 {
@@ -216,7 +223,7 @@ static void process_curl_messages(void)
   slot->curl != curl_message->easy_handle)
slot = slot->next;
if (slot != NULL) {
-   curl_multi_remove_handle(curlm, slot->curl);
+   xmulti_remove_handle(slot);
slot->curl_result = curl_result;
finish_active_slot(slot);
} else {
@@ -881,9 +888,7 @@ void http_cleanup(void)
while (slot != NULL) {
struct active_request_slot *next = slot->next;
if (slot->curl != NULL) {
-#ifdef USE_CURL_MULTI
-   curl_multi_remove_handle(curlm, slot->curl);
-#endif
+   xmulti_remove_handle(slot);
curl_easy_cleanup(slot->curl);
}
free(slot);
@@ -1164,9 +1169,7 @@ static void release_active_slot(struct 
active_request_slot *slot)
 {
closedown_active_slot(slot);
if (slot->curl && curl_session_count > min_curl_sessions) {
-#ifdef USE_CURL_MULTI
-   curl_multi_remove_handle(curlm, slot->curl);
-#endif
+   xmulti_remove_handle(slot);
curl_easy_cleanup(slot->curl);
slot->curl = NULL;
curl_session_count--;
-- 
EW



[RFC 3/3] http: always remove curl easy from curlm session on release

2016-09-12 Thread Eric Wong
We must call curl_multi_remove_handle when releasing the slot to
prevent subsequent calls to curl_multi_add_handle from failing
with CURLM_ADDED_ALREADY (in curl 7.32.1+; older versions
returned CURLM_BAD_EASY_HANDLE)

Signed-off-by: Eric Wong 
---
 http.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/http.c b/http.c
index a7eaf7b..3c4c3f5 100644
--- a/http.c
+++ b/http.c
@@ -1168,11 +1168,13 @@ void run_active_slot(struct active_request_slot *slot)
 static void release_active_slot(struct active_request_slot *slot)
 {
closedown_active_slot(slot);
-   if (slot->curl && curl_session_count > min_curl_sessions) {
+   if (slot->curl) {
xmulti_remove_handle(slot);
-   curl_easy_cleanup(slot->curl);
-   slot->curl = NULL;
-   curl_session_count--;
+   if (curl_session_count > min_curl_sessions) {
+   curl_easy_cleanup(slot->curl);
+   slot->curl = NULL;
+   curl_session_count--;
+   }
}
 #ifdef USE_CURL_MULTI
fill_active_slots();
-- 
EW



[RFC 0/3] http: avoid repeatedly adding curl easy to curlm

2016-09-12 Thread Eric Wong
(I have some hours online today, so I decided to work on this)

The key patch here is 3/3 which seems like an obvious fix to
adding the problem of adding a curl easy handle to a curl multi
handle repeatedly.

What is unclear to me is how only Yaroslav's repository seems to
trigger this bug after all these years...

However, I am fairly sure this fixes the bug Yaroslav
encountered.  This patch series is also needed for 2.9.3 and
perhaps older maintenance tracks for distros.

In PATCH 2/3, I originally had error checking, but removed it
after noticing it was causing failures on wheezy.

I will investigate those failures in a week or two when I regain
regular computer access.

These patches are needed for 2.9.x (and probably earlier), too.

The following changes since commit cda1bbd474805e653dda8a71d4ea3790e2a66cbb:

  Sync with maint (2016-09-08 22:00:53 -0700)

are available in the git repository at:

  git://bogomips.org/git-svn.git dumb-http-release

for you to fetch changes up to 3f561d0f0f78fd841708b5e81122e9d825919fd3:

  http: always remove curl easy from curlm session on release (2016-09-12 
23:59:35 +)


Eric Wong (3):
  http: warn on curl_multi_add_handle failures
  http: consolidate #ifdefs for curl_multi_remove_handle
  http: always remove curl easy from curlm session on release

 http.c | 29 ++---
 1 file changed, 18 insertions(+), 11 deletions(-)

-- 
EW


[RFC 1/3] http: warn on curl_multi_add_handle failures

2016-09-12 Thread Eric Wong
This will be useful for tracking down curl usage errors.

Signed-off-by: Eric Wong 
---
 http.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/http.c b/http.c
index cd40b01..cac5db9 100644
--- a/http.c
+++ b/http.c
@@ -1022,6 +1022,8 @@ int start_active_slot(struct active_request_slot *slot)
 
if (curlm_result != CURLM_OK &&
curlm_result != CURLM_CALL_MULTI_PERFORM) {
+   warning("curl_multi_add_handle failed: %s",
+   curl_multi_strerror(curlm_result));
active_requests--;
slot->in_use = 0;
return 0;
-- 
EW



Re: [PATCH 07/10] diff.c: convert fn_out_consume to use emit_line_*

2016-09-12 Thread Junio C Hamano
Stefan Beller  writes:

> diff --git a/diff.c b/diff.c
> index 2aefd0f..7dcef73 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -493,6 +493,19 @@ static void emit_line(struct diff_options *o, const char 
> *set, const char *reset
>   emit_line_0(o, set, reset, line[0], line+1, len-1);
>  }
>  
> +static void emit_line_fmt(struct diff_options *o,
> +   const char *set, const char *reset,
> +   const char *fmt, ...)
> +{
> + struct strbuf sb = STRBUF_INIT;
> + va_list ap;
> + va_start(ap, fmt);
> + strbuf_vaddf(, fmt, ap);
> + va_end(ap);
> +
> + emit_line(o, set, reset, sb.buf, sb.len);
> +}
> +
>  static int new_blank_line_at_eof(struct emit_callback *ecbdata, const char 
> *line, int len)
>  {
>   if (!((ecbdata->ws_rule & WS_BLANK_AT_EOF) &&
> @@ -1217,7 +1230,6 @@ static void fn_out_consume(void *priv, char *line, 
> unsigned long len)
>   const char *context = diff_get_color(ecbdata->color_diff, DIFF_CONTEXT);
>   const char *reset = diff_get_color(ecbdata->color_diff, DIFF_RESET);
>   struct diff_options *o = ecbdata->opt;
> - const char *line_prefix = diff_line_prefix(o);
>  
>   o->found_changes = 1;
>  
> @@ -1233,10 +1245,12 @@ static void fn_out_consume(void *priv, char *line, 
> unsigned long len)
>   name_a_tab = strchr(ecbdata->label_path[0], ' ') ? "\t" : "";
>   name_b_tab = strchr(ecbdata->label_path[1], ' ') ? "\t" : "";
>  
> - fprintf(o->file, "%s%s--- %s%s%s\n",
> - line_prefix, meta, ecbdata->label_path[0], reset, 
> name_a_tab);
> - fprintf(o->file, "%s%s+++ %s%s%s\n",
> - line_prefix, meta, ecbdata->label_path[1], reset, 
> name_b_tab);
> + emit_line_fmt(o, meta, reset, "--- %s%s\n",
> +   ecbdata->label_path[0], name_a_tab);
> +
> + emit_line_fmt(o, meta, reset, "+++ %s%s\n",
> +   ecbdata->label_path[1], name_b_tab);

Hmph, the original showed the following for the name-a line:

diff_line_prefix(o) META "--- " label_path RESET name_a_tab LF

The updated one calls emit_line_fmt() with o, meta, reset, fmt and
args, and then

 * strbuf_vaddf(, "--- %s%s\n", label_path, name_a_tab) creates
   a string "--- " + label_path + LF

 * emit_line() is called on the whole thing with META and RESET

 * which is emit_line_0() that encloses the whole thing between META
   and RESET but knows the trailing LF should come after RESET.

So the coloring seems to be correct, but I am not sure where the
line-prefix went.





Re: [PATCH 06/10] diff.c: emit_line_0 can handle no color

2016-09-12 Thread Stefan Beller
On Mon, Sep 12, 2016 at 5:11 PM, Junio C Hamano  wrote:
> Stefan Beller  writes:
>
>> From: Stefan Beller 
>>
>> ---
>
> "X can do Y" can be taken as a statement of fact (to which "so
> what?"  is an appropriate response), a desire (to which "then please
> say 'make X do Y' instead" is an appropriate response), or a report
> of a bug (to which "please explain why X should be forbidden from
> doing Y" is an appropriate response).
>
> This is way under-explained.  I think this is "make X do Y" kind,
> and if so, please say so and possibly why it is a good idea to teach
> X how to do Y.
>
> Thanks.

Ok, I see the general pattern of your answers: Add more explanations.

Answering for
patch 01/10 as well as this one here:

I want to propose an option to detect moved lines in a patch,
which cannot be done in a one-pass over the diff. Instead we need to go
over the whole diff twice, because we cannot detect the first line of a
line pair that got moved in the first pass. So I aim for
* collecting all output into a buffer as a first pass,
* as the second pass output the buffer.

So in a later patch I will split up the emit_line_* machinery to either
emitting to options->file or buffering if we do the 2 pass thing.

To make sure the 2 passes work correctly, we need to make sure all output
is routed through the emit_line functions, and there will be no direct writes.

Now that we will be using the emit_lines functions for non colored
output as well,
we want to pass in "no color" which I think is best done via NULL and then not
calling the output of the color writes.


Re: [PATCH 06/10] diff.c: emit_line_0 can handle no color

2016-09-12 Thread Junio C Hamano
Stefan Beller  writes:

> From: Stefan Beller 
>
> ---

"X can do Y" can be taken as a statement of fact (to which "so
what?"  is an appropriate response), a desire (to which "then please
say 'make X do Y' instead" is an appropriate response), or a report
of a bug (to which "please explain why X should be forbidden from
doing Y" is an appropriate response).

This is way under-explained.  I think this is "make X do Y" kind,
and if so, please say so and possibly why it is a good idea to teach
X how to do Y.

Thanks.



>  diff.c | 6 --
>  1 file changed, 4 insertions(+), 2 deletions(-)
>
> diff --git a/diff.c b/diff.c
> index 87b1bb2..2aefd0f 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -473,11 +473,13 @@ static void emit_line_0(struct diff_options *o, const 
> char *set, const char *res
>   }
>  
>   if (len || !nofirst) {
> - fputs(set, file);
> + if (set)
> + fputs(set, file);
>   if (!nofirst)
>   fputc(first, file);
>   fwrite(line, len, 1, file);
> - fputs(reset, file);
> + if (reset)
> + fputs(reset, file);
>   }
>   if (has_trailing_carriage_return)
>   fputc('\r', file);


Re: [PATCH 05/10] diff.c: reintroduce diff_flush_patch for all files

2016-09-12 Thread Junio C Hamano
Stefan Beller  writes:

> From: Stefan Beller 
>
> ---

This shows why 04/10 should have had the overall plan for these two
steps.  We want a short-and-sweet name "diff-flush-patch" to mean
"flush all the queued diff elements" so rename the singleton one
from diff-flush-patch to diff-flush-patch-filepair to make room in
04/10 and then introduce the "diff-flush-patch-all" in 05/10.

I just said with the above explanation the changes in 04/10 and
05/10 become undertandable, which is a bit different from being
justifiable.  "diff_flush_raw()", "diff_flush_stat()", etc. are
_all_ about a single filepair.  I'd rather see diff_flush_patch()
to be also about a single filepair.

It may be helpful to have a helper that calls diff_flush_patch() for
all filepairs in the queue, but can't that function get a new name
instead?  By definition, it will be called much less often than a
pair-wise one, so it can afford to have a longer name.

diff_flush_patch_queue() or something, perhaps?  I am not sure if it
should be diff_flush_queue_patch(), or even

diff_flush_queue(struct diff_options *o, diff_flush_fn fn);

where diff_flush_fn is 

typedef void (*diff_flush_fn)(struct diff_filepair *p,
struct diff_options *o, void *other_data)

that can be used to flush the queue by calling any of these
filepair-wise flush functions like diff_flush_{raw,stat,checkdiff,patch}.

This last approach might be overkill, but if you want to try it,
you'd need a preparatory step to give an unused "void *other" to
diff_flush_{raw,patch,checkdiff} as diff_flush_stat() is an oddball
that needs an extra "accumulator" pointer.  I actually wonder if
that "diffstat" pointer should become part of "struct diff_options",
though.  Anyway.

>  diff.c | 17 -
>  1 file changed, 12 insertions(+), 5 deletions(-)
>
> diff --git a/diff.c b/diff.c
> index 85fb887..87b1bb2 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -4608,6 +4608,17 @@ void diff_warn_rename_limit(const char *varname, int 
> needed, int degraded_cc)
>   warning(rename_limit_advice, varname, needed);
>  }
>  
> +static void diff_flush_patch(struct diff_options *o)
> +{
> + int i;
> + struct diff_queue_struct *q = _queued_diff;
> + for (i = 0; i < q->nr; i++) {
> + struct diff_filepair *p = q->queue[i];
> + if (check_pair_status(p))
> + diff_flush_patch_filepair(p, o);
> + }
> +}
> +
>  void diff_flush(struct diff_options *options)
>  {
>   struct diff_queue_struct *q = _queued_diff;
> @@ -4702,11 +4713,7 @@ void diff_flush(struct diff_options *options)
>   }
>   }
>  
> - for (i = 0; i < q->nr; i++) {
> - struct diff_filepair *p = q->queue[i];
> - if (check_pair_status(p))
> - diff_flush_patch_filepair(p, options);
> - }
> + diff_flush_patch(options);
>   }
>  
>   if (output_format & DIFF_FORMAT_CALLBACK)


Re: [PATCH 04/10] diff.c: rename diff_flush_patch to diff_flush_patch_filepair

2016-09-12 Thread Junio C Hamano
Stefan Beller  writes:

> From: Stefan Beller 
>
> Signed-off-by: Stefan Beller 
> ---

The reason being...?



Re: [PATCH 03/10] diff.c: drop tautologous condition in emit_line_0

2016-09-12 Thread Junio C Hamano
Stefan Beller  writes:

> diff --git a/diff.c b/diff.c
> index 156c2aa..9d2e704 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -460,8 +460,7 @@ static void emit_line_0(struct diff_options *o, const 
> char *set, const char *res
>  
>   if (len == 0) {
>   has_trailing_newline = (first == '\n');
> - has_trailing_carriage_return = (!has_trailing_newline &&
> - (first == '\r'));
> + has_trailing_carriage_return = (first == '\r');
>   nofirst = has_trailing_newline || has_trailing_carriage_return;
>   } else {
>   has_trailing_newline = (len > 0 && line[len-1] == '\n');

Interesting.

This may be a mis-conversion at 250f7993 ("diff.c: split emit_line()
from the first char and the rest of the line", 2009-09-14), I
suspect.  The original took line[] with length and peeked for '\n',
and when it saw one, it decremented length before checking
line[len-1] for '\r'.

But of course if there is only one byte on the line (i.e. len == 0
after first is stripped off), it cannot be both '\n' or '\r' at the
same time.

Thanks for spotting.






Re: [PATCH 02/10] diff: emit_{add, del, context}_line to increase {pre,post}image line count

2016-09-12 Thread Junio C Hamano
Stefan Beller  writes:

> From: Stefan Beller 
>
> At all call sites of emit_{add, del, context}_line we increment the line
> count, so move it into the respective functions to make the code at the
> calling site a bit clearer.
>
> Signed-off-by: Stefan Beller 
> ---
>  diff.c | 15 ++-
>  1 file changed, 6 insertions(+), 9 deletions(-)

I am mostly in favor of this change, as the calls to these three
functions are always preceded by increment of these fields, but it
is "mostly" exactly because the reverse is not true.  Namely ...

> @@ -1293,16 +1294,12 @@ static void fn_out_consume(void *priv, char *line, 
> unsigned long len)
>  
>   switch (line[0]) {
>   case '+':
> - ecbdata->lno_in_postimage++;
>   emit_add_line(reset, ecbdata, line + 1, len - 1);
>   break;
>   case '-':
> - ecbdata->lno_in_preimage++;
>   emit_del_line(reset, ecbdata, line + 1, len - 1);
>   break;
>   case ' ':
> - ecbdata->lno_in_postimage++;
> - ecbdata->lno_in_preimage++;
>   emit_context_line(reset, ecbdata, line + 1, len - 1);
>   break;
>   default:

... there still needs an increment in the context lines, not shown
in the patch, just after this "default:".  I think the patch is OK
as the comment after this "default:" (also not shown in the patch)
makes it clear what is going on.

Thanks.




Re: [PATCH 01/10] diff: move line ending check into emit_hunk_header

2016-09-12 Thread Junio C Hamano
Stefan Beller  writes:

> From: Stefan Beller 
>
> Signed-off-by: Stefan Beller 
> ---

The reason being...?

"Doing this would not change any behaviour and would not break
anything" may be true, but that is not a reason to do a change.

Hopefully it will become clear why this is needed once we look at a
later patch in the series.

>  diff.c | 5 +++--
>  1 file changed, 3 insertions(+), 2 deletions(-)
>
> diff --git a/diff.c b/diff.c
> index cc8e812..aa50b2d 100644
> --- a/diff.c
> +++ b/diff.c
> @@ -610,6 +610,9 @@ static void emit_hunk_header(struct emit_callback 
> *ecbdata,
>   }
>  
>   strbuf_add(, line + len, org_len - len);
> + if (line[org_len - 1] != '\n')
> + strbuf_addch(, '\n');
> +
>   emit_line(ecbdata->opt, "", "", msgbuf.buf, msgbuf.len);
>   strbuf_release();
>  }
> @@ -1247,8 +1250,6 @@ static void fn_out_consume(void *priv, char *line, 
> unsigned long len)
>   len = sane_truncate_line(ecbdata, line, len);
>   find_lno(line, ecbdata);
>   emit_hunk_header(ecbdata, line, len);
> - if (line[len-1] != '\n')
> - putc('\n', o->file);
>   return;
>   }


Re: [PATCH v7 05/10] pkt-line: add packet_write_gently()

2016-09-12 Thread Junio C Hamano
larsxschnei...@gmail.com writes:

> +int packet_write_gently(const int fd_out, const char *buf, size_t size)
> +{
> + static char packet_write_buffer[LARGE_PACKET_MAX];
> +
> + if (size > sizeof(packet_write_buffer) - 4) {
> + error("packet write failed");
> + return -1;
> + }
> + packet_trace(buf, size, 1);
> + size += 4;
> + set_packet_header(packet_write_buffer, size);
> + memcpy(packet_write_buffer + 4, buf, size - 4);
> + if (write_in_full(fd_out, packet_write_buffer, size) == size)
> + return 0;
> +
> + error("packet write failed");
> + return -1;
> +}

The same comment as 4/10 applies here.


Potential Same Name Bug

2016-09-12 Thread Kevin Smith
I hit an issue in Git today that seemed to be a bug.  Basically what
happened is in our master branch we had two files, one named something
like "file_NAME.png" and another named "file_name.png" in the same
folder.  In the develop branch in the same repo we had removed the
"file_NAME.png" file so that only the "file_name.png" file was left.
If I clone the repo so I get master and then do "git checkout develop"
I would see when running "git status" that I would have this message:

On branch develop
Your branch is up-to-date with 'origin/develop'.
Changes not staged for commit:
  (use "git add/rm ..." to update what will be committed)
  (use "git checkout -- ..." to discard changes in working directory)

deleted:file_name.png

no changes added to commit (use "git add" and/or "git commit -a")

So when I move from master to develop that status would come up.  If I
ran "git reset --hard" I would no longer have that message.  I also
saw that when I do a git clone and specify to clone the develop branch
that I would not see the git status above.  Is this an issue where if
one branch has two files of the same name where one gets removed that
it will remove both instances of that file in another branch when you
switch to it?  I fixed this issue in our repo by removing the
"file_NAME.png" file in the master branch, but it seems like this
should be handled better in the case I described.


Re: [PATCH v7 04/10] pkt-line: add packet_flush_gently()

2016-09-12 Thread Junio C Hamano
larsxschnei...@gmail.com writes:

> From: Lars Schneider 
>
> packet_flush() would die in case of a write error even though for some
> callers an error would be acceptable. Add packet_flush_gently() which
> writes a pkt-line flush packet and returns `0` for success and `-1` for
> failure.
> ...
> +int packet_flush_gently(int fd)
> +{
> + packet_trace("", 4, 1);
> + if (write_in_full(fd, "", 4) == 4)
> + return 0;
> + error("flush packet write failed");
> + return -1;

It is more idiomatic to do

return error(...);

but more importantly, does the caller even want an error message
unconditionally printed here?

I suspect that it is a strong sign that the caller wants to be in
control of when and what error message is produced; otherwise it
wouldn't be calling the _gently() variant, no?

Of course, if you have written callers to this function in later
patches in this series, they would be responsible for reporting (or
choosing not to report) this failure, but I think making this
function silent is a better course in the longer term.


Re: Git Miniconference at Plumbers

2016-09-12 Thread Lars Schneider

> On 12 Sep 2016, at 21:11, Junio C Hamano  wrote:
> 
> [..]
> properly; supporting "huge objects" better in the object layer,
> without having to resort to ugly hacks like GitLFS that will never
> be part of the core Git. [...]

I agree with you that GitLFS is an ugly hack.

Some applications have test data, image assets, and other data sets that
need to be versioned along with the source code.

How would you deal with these kind of "huge objects" _today_?

Thanks,
Lars


Re: [PATCH] t/perf/run: Don't forget to copy config.mak.autogen & friends to build area

2016-09-12 Thread Junio C Hamano
Junio C Hamano  writes:

> In other words, something along this line, perhaps.
> ...

Not quite.  There is no guanratee that the user is using autoconf at
all.  It should be more like this, I think.

 t/perf/run | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/t/perf/run b/t/perf/run
index aa383c2..7ec3734 100755
--- a/t/perf/run
+++ b/t/perf/run
@@ -30,7 +30,13 @@ unpack_git_rev () {
 }
 build_git_rev () {
rev=$1
-   cp -t build/$rev ../../{config.mak,config.mak.autogen,config.status}
+   for config in config.mak config.mak.autogen config.status
+   do
+   if test -f "../../$config"
+   then
+   cp "../../$config" "build/$rev/"
+   fi
+   done
(cd build/$rev && make $GIT_PERF_MAKE_OPTS) ||
die "failed to build revision '$mydir'"
 }




Re: build issues on AIX - aka non-GNU environment, no gnu grep, no gcc

2016-09-12 Thread Michael Felt

Try again, reply all this time...


On 12-Sep-16 22:24, Junio C Hamano wrote:

Michael Felt  writes:


I had a couple of issues when packaging git for AIX
a) option -Wall by default - works fine with gcc I am sure, but not so
well when gcc is not your compiler

That is expected.  As you said, it is merely "by default" and there
are mechanisms like config.mak provided for people to override them.


b) needs a special (GNU) grep argument (-a from memory). This I
resolved by downloading and packaging GNU grep. However, I hope this
has not introduced a new dependency.

Read the Makefile and find SANE_TEXT_GREP, perhaps?
Ah - read the manual heh?. Generally, I just download, unpack, and run 
configure.

I do not mind needing an extra tool - as long as it is only for the build.

And, since you have something about it in your (still unread) install 
Notes - it has not slipped in by accident.


Thanks for helping to spread Git to minority platforms.

The link is: http://www.aixtools.net/index.php/git
I do not use it much (I prefer to package "distributed" src tarballs, 
but when testing a potential fix - git can be extremely useful!


All in all, thanks for an easy to port to a "minority" platform!



Re: [GIT PULL] l10n updates for 2.10.0 maint branch

2016-09-12 Thread Junio C Hamano
Jiang Xin  writes:

> There are some l10n updates for Git 2.10.0, please merge them to the
> maint branch.

Thanks.


Re: [PATCH v3 4/4] add: modify already added files when --chmod is given

2016-09-12 Thread Junio C Hamano
Thomas Gummerer  writes:

> When the chmod option was added to git add, it was hooked up to the diff
> machinery, meaning that it only works when the version in the index
> differs from the version on disk.
>
> As the option was supposed to mirror the chmod option in update-index,
> which always changes the mode in the index, regardless of the status of
> the file, make sure the option behaves the same way in git add.
>
> Signed-off-by: Thomas Gummerer 
> ---
>  builtin/add.c  | 36 +---
>  builtin/checkout.c |  2 +-
>  builtin/commit.c   |  2 +-
>  cache.h| 10 +-
>  read-cache.c   | 14 ++
>  t/t3700-add.sh | 21 +
>  6 files changed, 59 insertions(+), 26 deletions(-)

The change looks quite large but this in essense reverts what Edward
did in the first round by hooking into "we add only modified files
here" and "we add new files here", both of which are made unnecessary,
because this adds the final "now we finished adding things, let's
fix modes of paths that match the pathspec".

Which makes sense.

> +static void chmod_pathspec(struct pathspec *pathspec, int force_mode)
> +{
> + int i;
> + 
> + for (i = 0; i < active_nr; i++) {
> + struct cache_entry *ce = active_cache[i];
> +
> + if (pathspec && !ce_path_match(ce, pathspec, NULL))
> + continue;
> +
> + if (chmod_cache_entry(ce, force_mode) < 0)
> + fprintf(stderr, "cannot chmod '%s'", ce->name);
> + }
> +}

If pathspec is NULL, this will chmod all paths in the index, which
is probably not very useful and quite risky operation at the same
time.

However ...

> + if (force_mode)
> + chmod_pathspec(, force_mode);

... the caller never passes a NULL as pathspec.

In any case, I somehow expected to see

if (force_mode && pathspec.nr)
chmod_pathspec(, force_mode);

because it would make it very easy to see in the caller that

git add --chmod=+x  ;# no pathspec
cd subdir && git add --chmod=+x ;# no pathspec

will be a no-op, which is what we want, if I am not mistaken.  Of
course, if somebody really wants to drop executable bit from
everything, she can do

git add --chmod=-x .

pretty easily.

Above three may want to be added as new tests.  The first two should
be a no-op, while the last one should drop executable bits from
everywhere.

Thanks.





[PATCH] [git-p4.py] Add --checkpoint-period option to sync/clone

2016-09-12 Thread Ori Rawlings
Importing a long history from Perforce into git using the git-p4 tool
can be especially challenging. The `git p4 clone` operation is based
on an all-or-nothing transactionality guarantee. Under real-world
conditions like network unreliability or a busy Perforce server,
`git p4 clone` and  `git p4 sync` operations can easily fail, forcing a
user to restart the import process from the beginning. The longer the
history being imported, the more likely a fault occurs during the
process. Long enough imports thus become statistically unlikely to ever
succeed.

The underlying git fast-import protocol supports an explicit checkpoint
command. The idea here is to optionally allow the user to force an
explicit checkpoint every  seconds. If the sync/clone operation fails
branches are left updated at the appropriate commit available during the
latest checkpoint. This allows a user to resume importing Perforce
history while only having to repeat at most approximately  seconds
worth of import activity.

Signed-off-by: Ori Rawlings 
---
 git-p4.py | 8 
 1 file changed, 8 insertions(+)

diff --git a/git-p4.py b/git-p4.py
index fd5ca52..40cb64f 100755
--- a/git-p4.py
+++ b/git-p4.py
@@ -2244,6 +2244,7 @@ class P4Sync(Command, P4UserMap):
 optparse.make_option("-/", dest="cloneExclude",
  action="append", type="string",
  help="exclude depot path"),
+optparse.make_option("--checkpoint-period", 
dest="checkpointPeriod", type="int", help="Period in seconds between explict 
git fast-import checkpoints (by default, no explicit checkpoints are 
performed)"),
 ]
 self.description = """Imports from Perforce into a git repository.\n
 example:
@@ -2276,6 +2277,7 @@ class P4Sync(Command, P4UserMap):
 self.tempBranches = []
 self.tempBranchLocation = "refs/git-p4-tmp"
 self.largeFileSystem = None
+self.checkpointPeriod = -1
 
 if gitConfig('git-p4.largeFileSystem'):
 largeFileSystemConstructor = 
globals()[gitConfig('git-p4.largeFileSystem')]
@@ -3031,6 +3033,8 @@ class P4Sync(Command, P4UserMap):
 
 def importChanges(self, changes):
 cnt = 1
+if self.checkpointPeriod > -1:
+self.lastCheckpointTime = time.time()
 for change in changes:
 description = p4_describe(change)
 self.updateOptionDict(description)
@@ -3107,6 +3111,10 @@ class P4Sync(Command, P4UserMap):
 self.initialParent)
 # only needed once, to connect to the previous commit
 self.initialParent = ""
+
+if self.checkpointPeriod > -1 and time.time() - 
self.lastCheckpointTime > self.checkpointPeriod:
+self.checkpoint()
+self.lastCheckpointTime = time.time()
 except IOError:
 print self.gitError.read()
 sys.exit(1)
-- 
2.7.4 (Apple Git-66)



[PATCH] [git-p4.py] Add --checkpoint-period option to sync/clone

2016-09-12 Thread Ori Rawlings
Importing a long history from Perforce into git using the git-p4 tool
can be especially challenging. The `git p4 clone` operation is based
on an all-or-nothing transactionality guarantee. Under real-world
conditions like network unreliability or a busy Perforce server,
`git p4 clone` and  `git p4 sync` operations can easily fail, forcing a
user to restart the import process from the beginning. The longer the
history being imported, the more likely a fault occurs during the
process. Long enough imports thus become statistically unlikely to ever
succeed.

I'm looking for feedback on a potential approach for addressing the
problem. My idea was to leverage the checkpoint feature of git 
fast-import. I've included a patch which exposes a new option to the 
sync/clone commands in the git-p4 tool. The option enables explict 
checkpoints on a periodic basis (approximately every x seconds).

If the sync/clone command fails during processing of Perforce changes, 
the user can craft a new git p4 sync command that will identify 
changes that have already been imported and proceed with importing 
only changes more recent than the last successful checkpoint.

Assuming this approach makes sense, there are a few questions/items I
have:

  1. To add tests for this option, I'm thinking I'd need to simulate a 
 Perforce server or client that exits abnormally after first 
 processing some operations successfully. I'm looking for 
 suggestions on sane approaches for implementing that.
  2. From a usability perspective, I think it makes sense to print 
 out a message upon clone/sync failure if the user has enabled the 
 option. This message would describe how long ago the last 
 successful checkpoint was completed and document what command/s 
 to execute to continue importing Perforce changes. Ideally, the 
 commmand to continue would be exactly the same as the command 
 which failed, but today, clone will ignore any commits already 
 imported to git. There are some lingering TODO comments in 
 git-p4.py suggesting that clone should try to avoid reimporting
 changes. I don't mind taking a stab at addressing the TODO, but 
 am worried I'll quickly encounter edge cases in the clone/sync 
 features I don't understand.
  3. This is my first attempt at a git contribution, so I'm definitely 
 looking for feedback on commit messages, etc.


Cheers!

Ori Rawlings (1):
  [git-p4.py] Add --checkpoint-period option to sync/clone

 git-p4.py | 8 
 1 file changed, 8 insertions(+)

-- 
2.7.4 (Apple Git-66)



Re: [PATCH v3 2/4] update-index: use the same structure for chmod as add

2016-09-12 Thread Junio C Hamano
Thomas Gummerer  writes:

> + char flip = force_mode == 0777 ? '+' : '-';
>  
>   pos = cache_name_pos(path, strlen(path));
>   if (pos < 0)
> @@ -432,17 +433,11 @@ static void chmod_path(int flip, const char *path)
>   mode = ce->ce_mode;
>   if (!S_ISREG(mode))
>   goto fail;
> - switch (flip) {
> - case '+':
> - ce->ce_mode |= 0111; break;
> - case '-':
> - ce->ce_mode &= ~0111; break;
> - default:
> - goto fail;
> - }
> + ce->ce_mode = create_ce_mode(force_mode);

create_ce_mode() is supposed to take the st_mode taken from a
"struct stat", but here force_mode is 0777 or something else.  The
above to feed only 0777 or 0666 may happen to work with the way how
create_ce_mode() is implemented now, but it is a time-bomb waiting
to go off.

Instead of using force_mode that is unsigned, keep the 'flip' as
argument, and do:

if (!S_ISREG(mode))
goto fail;
+   if (flip == '+')
+   mode |= 0111;
+   else
+   mode &= ~0111;
ce->ce_mode = create_ce_mode(mode);

perhaps, as you do not need and are not using the full 9 bits in
force_mode anyway.

That would mean chmod_index_entry() introduced in 3/4 and its user
in 4/4 need to be updated to pass '+' or '-' down the callchain, but
I think that is a good change for the same reason.  We do not allow
you to set full 9 bits; let's not pretend as if we do so by passing
just a single bit '+' or '-' around.

I think 3/4 needs to be fixed where it calls create_ce_mode() with
only the 0666/0777 in chmod_index_entry() anyway, regardless of the
above suggested change.

> diff --git a/t/t2107-update-index-basic.sh b/t/t2107-update-index-basic.sh
> index dfe02f4..32ac6e0 100755
> --- a/t/t2107-update-index-basic.sh
> +++ b/t/t2107-update-index-basic.sh
> @@ -80,4 +80,17 @@ test_expect_success '.lock files cleaned up' '
>   )
>  '
>  
> +test_expect_success '--chmod=+x and chmod=-x in the same argument list' '
> + >A &&
> + >B &&
> + git add A B &&
> + git update-index --chmod=+x A --chmod=-x B &&
> + cat >expect <<-\EOF &&
> + 100755 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0   A
> + 100644 e69de29bb2d1d6434b8b29ae775ad8c2e48c5391 0   B
> + EOF
> + git ls-files --stage A B >actual &&
> + test_cmp expect actual
> +'

Thanks for adding this test.  We may want to cross the b/c bridge in
a later version bump, but until then it is good to make sure we know
what the currently expected behaviour is.


Re: What's cooking in git.git (Sep 2016, #03; Fri, 9)

2016-09-12 Thread Jeff King
On Mon, Sep 12, 2016 at 12:10:13PM -0700, Junio C Hamano wrote:

> Jeff King  writes:
> 
> > I happened to notice today that this topic needs a minor tweak:
> >
> > -- >8 --
> > Subject: [PATCH] add_delta_base_cache: use list_for_each_safe
> >
> > We may remove elements from the list while we are iterating,
> > which requires using a second temporary pointer. Otherwise
> > stepping to the next element of the list might involve
> > looking at freed memory (which generally works in practice,
> > as we _just_ freed it, but of course is wrong to rely on;
> > valgrind notices it).
> 
> I failed to notice it, too.  Thanks.

After staring at this, I was wondering how the _original_ ever worked.
Because the problem is in the linked-list code, which I did not really
change (I switched it to LIST_HEAD(), but the code is equivalent).

The answer is that in the original, there was no free() in the original
code when we released an entry; it just went back to the (static) pool.

So the bug is in the conversion to hashmap, where we start allocating
(and freeing) the entries individually.

-Peff


Re: [PATCH v2 17/25] sequencer: support amending commits

2016-09-12 Thread Junio C Hamano
Johannes Schindelin  writes:

> This teaches the sequencer_commit() function to take an argument that
> will allow us to implement "todo" commands that need to amend the commit
> messages ("fixup", "squash" and "reword").
>
> Signed-off-by: Johannes Schindelin 
> ---
>  sequencer.c | 6 --
>  sequencer.h | 2 +-
>  2 files changed, 5 insertions(+), 3 deletions(-)
>
> diff --git a/sequencer.c b/sequencer.c
> index 6e9732c..60b522e 100644
> --- a/sequencer.c
> +++ b/sequencer.c
> @@ -485,7 +485,7 @@ static char **read_author_script(void)
>   * author metadata.
>   */
>  int sequencer_commit(const char *defmsg, struct replay_opts *opts,
> -   int allow_empty, int edit)
> +   int allow_empty, int edit, int amend)
>  {
>   char **env = NULL;
>   struct argv_array array;
> @@ -514,6 +514,8 @@ int sequencer_commit(const char *defmsg, struct 
> replay_opts *opts,
>   argv_array_push(, "commit");
>   argv_array_push(, "-n");
>  
> + if (amend)
> + argv_array_push(, "--amend");
>   if (opts->gpg_sign)
>   argv_array_pushf(, "-S%s", opts->gpg_sign);
>   if (opts->signoff)
> @@ -786,7 +788,7 @@ static int do_pick_commit(enum todo_command command, 
> struct commit *commit,
>   }
>   if (!opts->no_commit)
>   res = sequencer_commit(opts->edit ? NULL : git_path_merge_msg(),
> - opts, allow, opts->edit);
> + opts, allow, opts->edit, 0);
>  
>  leave:
>   free_message(commit, );

Hmm, this is more about a comment on 18/25, but I suspect that
"amend" or any opportunity given to the user to futz with the
contents in the editor invites a wish for the result to be treated
with stripspace.

No existing callers use "amend" to call this function, so there is
no change in behaviour, but at the same time, we do not have enough
information to see if 'amend' should by default toggle cleanup.


> diff --git a/sequencer.h b/sequencer.h
> index 7f5222f..c45f5c4 100644
> --- a/sequencer.h
> +++ b/sequencer.h
> @@ -54,7 +54,7 @@ int sequencer_rollback(struct replay_opts *opts);
>  int sequencer_remove_state(struct replay_opts *opts);
>  
>  int sequencer_commit(const char *defmsg, struct replay_opts *opts,
> -   int allow_empty, int edit);
> +   int allow_empty, int edit, int amend);
>  
>  extern const char sign_off_header[];


Re: [PATCH v2 18/25] sequencer: support cleaning up commit messages

2016-09-12 Thread Junio C Hamano
Johannes Schindelin  writes:

> @@ -788,7 +792,7 @@ static int do_pick_commit(enum todo_command command, 
> struct commit *commit,
>   }
>   if (!opts->no_commit)
>   res = sequencer_commit(opts->edit ? NULL : git_path_merge_msg(),
> - opts, allow, opts->edit, 0);
> + opts, allow, opts->edit, 0, 0);
>  
>  leave:
>   free_message(commit, );

So the existing caller did not ask to cleanup


>  int sequencer_commit(const char *defmsg, struct replay_opts *opts,
> -   int allow_empty, int edit, int amend)
> +   int allow_empty, int edit, int amend,
> +   int cleanup_commit_message)
>  {
>   char **env = NULL;
>   struct argv_array array;
> @@ -522,9 +523,12 @@ int sequencer_commit(const char *defmsg, struct 
> replay_opts *opts,
>   argv_array_push(, "-s");
>   if (defmsg)
>   argv_array_pushl(, "-F", defmsg, NULL);
> + if (cleanup_commit_message)
> + argv_array_push(, "--cleanup=strip");
>   if (edit)
>   argv_array_push(, "-e");
> - else if (!opts->signoff && !opts->record_origin &&
> + else if (!cleanup_commit_message &&
> +  !opts->signoff && !opts->record_origin &&
>git_config_get_value("commit.cleanup", ))
>   argv_array_push(, "--cleanup=verbatim");

Which is consistent with this change.  This is a no-op enhancement
for existing callers and makes new callers to ask for cleaning up.

We will see if a hardcoded "--cleanup=strip" is sufficient when we
see new callers (we _know_ it would be sufficient for the first new
caller by definition ;-).

Makes sense.  Thanks.


Re: [PATCH v2 19/25] sequencer: remember do_recursive_merge()'s return value

2016-09-12 Thread Junio C Hamano
Johannes Schindelin  writes:

> The return value of do_recursive_merge() may be positive (indicating merge
> conflicts), so let's OR later error conditions so as not to overwrite them
> with 0.

Are the untold assumptions as follows?

 - The caller wants to act on positive (not quite an error), zero
   (success) or negative (error);

 - do_recursive_merge() can return positive (success with
   reservation), zero or negative, and the call to it would return
   immediately if it got negative;

 - all other functions that come later may return either zero or negative, and 
   never positive;

 - Hence the caller can be assured that "res" being positive can
   only mean do_recursive_merge() succeeded with reservation and
   everything else succeeded.

This can be extended if the only thing the caller cares about is
positive/zero/negative and it does not care what exact positive
value it gets--in such a case, we can loosen the condition on the
return values from other functions whose return values are OR'ed
together; they may also return positive to signal the same "not
quite an error", i.e. updating the latter two points to

 - all other functions that come later can return positive (success
   with reservation), zero or negative.

 - Hence the caller can be assured that "res" being positive can
   mean nobody failed with negative return, but it is not an
   unconditional success, which is signalled by value "res" being
   0.

I cannot quite tell which is the case, especially what is planned in
the future.  The proposed log message is a good place to explain the
future course this code will take.

> This is not yet a problem, but preparing for the patches to come: we will
> teach the sequencer to do rebase -i's job.

Thanks.


Re: Git Miniconference at Plumbers

2016-09-12 Thread Jakub Narębski
W dniu 12.09.2016 o 20:09, Jon Loeliger napisał:
> So, like, David Bainbridge said:

>> Does anyone know whether the sessions will be recorded in any way?
> 
> I am uncertain about outright recording (digital video/audio),
> but there will be at least summarizing notes taken and posted.
> Anyone wishing to record the talks/discussions is likely welcome
> to do so.

I think LWN.net usually covers Linux Plumbers Conference, see e.g.
https://lwn.net/Archives/ConferenceByYear/#2015-Linux_Plumbers_Conference

Hopefully they would cover it this year too.

HTH
-- 
Jakub Narębski



Re: git commit -p with file arguments

2016-09-12 Thread Jakub Narębski
W dniu 12.09.2016 o 03:57, Junio C Hamano pisze:
> Jacob Keller  writes:
> 
>> Yes, I'm actually confused by "git commit " *not* usinng what's
>> in the index already, so I think that isn't intuitive as is.
> 
> You are excused ;-)
> 
> In ancient days, "git commit " was to add the contents
> from working tree files that match  to what is already in
> the index and create a commit from that state.

That is, "git commit " meant --include, being equivalent to
"git commit --include ".

>   This ran against the
> intuition of many users who knew older systems (e.g. cvs) and we had
> to migrate it to the current behaviour by breaking backward
> compatibility.

That is, "git commit " means --only, being equivalent to
"git commit --only ".

But it was always about working tree version of .

And of course older version control systems didn't have the index...
-- 
Jakub Narębski



[PATCH v3 4/4] add: modify already added files when --chmod is given

2016-09-12 Thread Thomas Gummerer
When the chmod option was added to git add, it was hooked up to the diff
machinery, meaning that it only works when the version in the index
differs from the version on disk.

As the option was supposed to mirror the chmod option in update-index,
which always changes the mode in the index, regardless of the status of
the file, make sure the option behaves the same way in git add.

Signed-off-by: Thomas Gummerer 
---
 builtin/add.c  | 36 +---
 builtin/checkout.c |  2 +-
 builtin/commit.c   |  2 +-
 cache.h| 10 +-
 read-cache.c   | 14 ++
 t/t3700-add.sh | 21 +
 6 files changed, 59 insertions(+), 26 deletions(-)

diff --git a/builtin/add.c b/builtin/add.c
index b1dddb4..892198a 100644
--- a/builtin/add.c
+++ b/builtin/add.c
@@ -26,10 +26,25 @@ static int patch_interactive, add_interactive, 
edit_interactive;
 static int take_worktree_changes;
 
 struct update_callback_data {
-   int flags, force_mode;
+   int flags;
int add_errors;
 };
 
+static void chmod_pathspec(struct pathspec *pathspec, int force_mode)
+{
+   int i;
+   
+   for (i = 0; i < active_nr; i++) {
+   struct cache_entry *ce = active_cache[i];
+
+   if (pathspec && !ce_path_match(ce, pathspec, NULL))
+   continue;
+
+   if (chmod_cache_entry(ce, force_mode) < 0)
+   fprintf(stderr, "cannot chmod '%s'", ce->name);
+   }
+}
+
 static int fix_unmerged_status(struct diff_filepair *p,
   struct update_callback_data *data)
 {
@@ -65,8 +80,7 @@ static void update_callback(struct diff_queue_struct *q,
die(_("unexpected diff status %c"), p->status);
case DIFF_STATUS_MODIFIED:
case DIFF_STATUS_TYPE_CHANGED:
-   if (add_file_to_index(_index, path,
-   data->flags, data->force_mode)) {
+   if (add_file_to_index(_index, path, data->flags)) {
if (!(data->flags & ADD_CACHE_IGNORE_ERRORS))
die(_("updating files failed"));
data->add_errors++;
@@ -84,15 +98,14 @@ static void update_callback(struct diff_queue_struct *q,
}
 }
 
-int add_files_to_cache(const char *prefix, const struct pathspec *pathspec,
-   int flags, int force_mode)
+int add_files_to_cache(const char *prefix,
+  const struct pathspec *pathspec, int flags)
 {
struct update_callback_data data;
struct rev_info rev;
 
memset(, 0, sizeof(data));
data.flags = flags;
-   data.force_mode = force_mode;
 
init_revisions(, prefix);
setup_revisions(0, NULL, , NULL);
@@ -281,7 +294,7 @@ static int add_config(const char *var, const char *value, 
void *cb)
return git_default_config(var, value, cb);
 }
 
-static int add_files(struct dir_struct *dir, int flags, int force_mode)
+static int add_files(struct dir_struct *dir, int flags)
 {
int i, exit_status = 0;
 
@@ -294,8 +307,7 @@ static int add_files(struct dir_struct *dir, int flags, int 
force_mode)
}
 
for (i = 0; i < dir->nr; i++)
-   if (add_file_to_index(_index, dir->entries[i]->name,
-   flags, force_mode)) {
+   if (add_file_to_index(_index, dir->entries[i]->name, 
flags)) {
if (!ignore_add_errors)
die(_("adding files failed"));
exit_status = 1;
@@ -441,11 +453,13 @@ int cmd_add(int argc, const char **argv, const char 
*prefix)
 
plug_bulk_checkin();
 
-   exit_status |= add_files_to_cache(prefix, , flags, force_mode);
+   exit_status |= add_files_to_cache(prefix, , flags);
 
if (add_new_files)
-   exit_status |= add_files(, flags, force_mode);
+   exit_status |= add_files(, flags);
 
+   if (force_mode)
+   chmod_pathspec(, force_mode);
unplug_bulk_checkin();
 
 finish:
diff --git a/builtin/checkout.c b/builtin/checkout.c
index 8672d07..a83c78f 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -548,7 +548,7 @@ static int merge_working_tree(const struct checkout_opts 
*opts,
 * entries in the index.
 */
 
-   add_files_to_cache(NULL, NULL, 0, 0);
+   add_files_to_cache(NULL, NULL, 0);
/*
 * NEEDSWORK: carrying over local changes
 * when branches have different end-of-line
diff --git a/builtin/commit.c b/builtin/commit.c
index 77e3dc8..7a1ade0 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -387,7 +387,7 @@ static const char *prepare_index(int argc, const char 
**argv, const char 

[PATCH v3 2/4] update-index: use the same structure for chmod as add

2016-09-12 Thread Thomas Gummerer
While the chmod options for update-index and the add have the same
functionality, they are using different ways to parse and handle the
option internally.  Unify these modes in order to make further
refactoring simpler.

Signed-off-by: Thomas Gummerer 
---
 builtin/update-index.c| 39 +++
 t/t2107-update-index-basic.sh | 13 +
 2 files changed, 32 insertions(+), 20 deletions(-)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index ba04b19..6d6cddd 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -419,11 +419,12 @@ static int add_cacheinfo(unsigned int mode, const 
unsigned char *sha1,
return 0;
 }
 
-static void chmod_path(int flip, const char *path)
+static void chmod_path(int force_mode, const char *path)
 {
int pos;
struct cache_entry *ce;
unsigned int mode;
+   char flip = force_mode == 0777 ? '+' : '-';
 
pos = cache_name_pos(path, strlen(path));
if (pos < 0)
@@ -432,17 +433,11 @@ static void chmod_path(int flip, const char *path)
mode = ce->ce_mode;
if (!S_ISREG(mode))
goto fail;
-   switch (flip) {
-   case '+':
-   ce->ce_mode |= 0111; break;
-   case '-':
-   ce->ce_mode &= ~0111; break;
-   default:
-   goto fail;
-   }
+   ce->ce_mode = create_ce_mode(force_mode);
cache_tree_invalidate_path(_index, path);
ce->ce_flags |= CE_UPDATE_IN_BASE;
active_cache_changed |= CE_ENTRY_CHANGED;
+
report("chmod %cx '%s'", flip, path);
return;
  fail:
@@ -789,12 +784,16 @@ static int really_refresh_callback(const struct option 
*opt,
 }
 
 static int chmod_callback(const struct option *opt,
-   const char *arg, int unset)
+ const char *arg, int unset)
 {
-   char *flip = opt->value;
-   if ((arg[0] != '-' && arg[0] != '+') || arg[1] != 'x' || arg[2])
-   return error("option 'chmod' expects \"+x\" or \"-x\"");
-   *flip = arg[0];
+   int *force_mode = opt->value;
+   if (!strcmp(arg, "-x"))
+   *force_mode = 0666;
+   else if (!strcmp(arg, "+x"))
+   *force_mode = 0777;
+   else
+   die(_("option 'chmod' expects \"+x\" or \"-x\""));
+
return 0;
 }
 
@@ -917,7 +916,7 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
int read_from_stdin = 0;
int prefix_length = prefix ? strlen(prefix) : 0;
int preferred_index_format = 0;
-   char set_executable_bit = 0;
+   int force_mode = 0;
struct refresh_params refresh_args = {0, _errors};
int lock_error = 0;
int split_index = -1;
@@ -955,7 +954,7 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
PARSE_OPT_NOARG | /* disallow --cacheinfo= form */
PARSE_OPT_NONEG | PARSE_OPT_LITERAL_ARGHELP,
(parse_opt_cb *) cacheinfo_callback},
-   {OPTION_CALLBACK, 0, "chmod", _executable_bit, N_("(+/-)x"),
+   {OPTION_CALLBACK, 0, "chmod", _mode, N_("(+/-)x"),
N_("override the executable bit of the listed files"),
PARSE_OPT_NONEG | PARSE_OPT_LITERAL_ARGHELP,
chmod_callback},
@@ -1055,8 +1054,8 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
setup_work_tree();
p = prefix_path(prefix, prefix_length, path);
update_one(p);
-   if (set_executable_bit)
-   chmod_path(set_executable_bit, p);
+   if (force_mode)
+   chmod_path(force_mode, p);
free(p);
ctx.argc--;
ctx.argv++;
@@ -1100,8 +1099,8 @@ int cmd_update_index(int argc, const char **argv, const 
char *prefix)
}
p = prefix_path(prefix, prefix_length, buf.buf);
update_one(p);
-   if (set_executable_bit)
-   chmod_path(set_executable_bit, p);
+   if (force_mode)
+   chmod_path(force_mode, p);
free(p);
}
strbuf_release();
diff --git a/t/t2107-update-index-basic.sh b/t/t2107-update-index-basic.sh
index dfe02f4..32ac6e0 100755
--- a/t/t2107-update-index-basic.sh
+++ b/t/t2107-update-index-basic.sh
@@ -80,4 +80,17 @@ test_expect_success '.lock files cleaned up' '
)
 '
 
+test_expect_success '--chmod=+x and chmod=-x in the same argument list' '
+   >A &&
+   >B &&
+   git add A B &&
+   git update-index --chmod=+x A --chmod=-x B &&
+   cat >expect 

[PATCH v3 3/4] read-cache: introduce chmod_index_entry

2016-09-12 Thread Thomas Gummerer
As there are chmod options for both add and update-index, introduce a
new chmod_index_entry function to do the work.  Use it in update-index,
while it will be used in add in the next patch.

Signed-off-by: Thomas Gummerer 
---
 builtin/update-index.c |  8 +---
 cache.h|  2 ++
 read-cache.c   | 19 +++
 3 files changed, 22 insertions(+), 7 deletions(-)

diff --git a/builtin/update-index.c b/builtin/update-index.c
index 6d6cddd..809ce97 100644
--- a/builtin/update-index.c
+++ b/builtin/update-index.c
@@ -423,20 +423,14 @@ static void chmod_path(int force_mode, const char *path)
 {
int pos;
struct cache_entry *ce;
-   unsigned int mode;
char flip = force_mode == 0777 ? '+' : '-';
 
pos = cache_name_pos(path, strlen(path));
if (pos < 0)
goto fail;
ce = active_cache[pos];
-   mode = ce->ce_mode;
-   if (!S_ISREG(mode))
+   if (chmod_cache_entry(ce, force_mode) < 0)
goto fail;
-   ce->ce_mode = create_ce_mode(force_mode);
-   cache_tree_invalidate_path(_index, path);
-   ce->ce_flags |= CE_UPDATE_IN_BASE;
-   active_cache_changed |= CE_ENTRY_CHANGED;
 
report("chmod %cx '%s'", flip, path);
return;
diff --git a/cache.h b/cache.h
index b780a91..44a4f76 100644
--- a/cache.h
+++ b/cache.h
@@ -369,6 +369,7 @@ extern void free_name_hash(struct index_state *istate);
 #define remove_file_from_cache(path) remove_file_from_index(_index, (path))
 #define add_to_cache(path, st, flags) add_to_index(_index, (path), (st), 
(flags), 0)
 #define add_file_to_cache(path, flags) add_file_to_index(_index, (path), 
(flags), 0)
+#define chmod_cache_entry(ce, force_mode) chmod_index_entry(_index, (ce), 
(force_mode))
 #define refresh_cache(flags) refresh_index(_index, (flags), NULL, NULL, 
NULL)
 #define ce_match_stat(ce, st, options) ie_match_stat(_index, (ce), (st), 
(options))
 #define ce_modified(ce, st, options) ie_modified(_index, (ce), (st), 
(options))
@@ -584,6 +585,7 @@ extern int remove_file_from_index(struct index_state *, 
const char *path);
 extern int add_to_index(struct index_state *, const char *path, struct stat *, 
int flags, int force_mode);
 extern int add_file_to_index(struct index_state *, const char *path, int 
flags, int force_mode);
 extern struct cache_entry *make_cache_entry(unsigned int mode, const unsigned 
char *sha1, const char *path, int stage, unsigned int refresh_options);
+extern int chmod_index_entry(struct index_state *, struct cache_entry *ce, int 
force_mode);
 extern int ce_same_name(const struct cache_entry *a, const struct cache_entry 
*b);
 extern void set_object_name_for_intent_to_add_entry(struct cache_entry *ce);
 extern int index_name_is_other(const struct index_state *, const char *, int);
diff --git a/read-cache.c b/read-cache.c
index 491e52d..367be57 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -756,6 +756,25 @@ struct cache_entry *make_cache_entry(unsigned int mode,
return ret;
 }
 
+/*
+ * Change the mode of an index entry to force_mode, where force_mode can
+ * either be 0777 or 0666.
+ * Returns -1 if the chmod for the particular cache entry failed (if it's
+ * not a regular file), 0 otherwise.
+ */
+int chmod_index_entry(struct index_state *istate, struct cache_entry *ce,
+  int force_mode)
+{
+   if (!S_ISREG(ce->ce_mode))
+   return -1;
+   ce->ce_mode = create_ce_mode(force_mode);
+   cache_tree_invalidate_path(istate, ce->name);
+   ce->ce_flags |= CE_UPDATE_IN_BASE;
+   istate->cache_changed |= CE_ENTRY_CHANGED;
+
+   return 0;
+}
+
 int ce_same_name(const struct cache_entry *a, const struct cache_entry *b)
 {
int len = ce_namelen(a);
-- 
2.10.0.304.gf2ff484



[PATCH v3 1/4] add: document the chmod option

2016-09-12 Thread Thomas Gummerer
The git add --chmod option was introduced in 4e55ed3 ("add: add
--chmod=+x / --chmod=-x options", 2016-05-31), but was never
documented.  Document the feature.

Signed-off-by: Thomas Gummerer 
---
 Documentation/git-add.txt | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/Documentation/git-add.txt b/Documentation/git-add.txt
index 6a96a66..7ed63dc 100644
--- a/Documentation/git-add.txt
+++ b/Documentation/git-add.txt
@@ -11,7 +11,7 @@ SYNOPSIS
 'git add' [--verbose | -v] [--dry-run | -n] [--force | -f] [--interactive | 
-i] [--patch | -p]
  [--edit | -e] [--[no-]all | --[no-]ignore-removal | [--update | -u]]
  [--intent-to-add | -N] [--refresh] [--ignore-errors] 
[--ignore-missing]
- [--] [...]
+ [--chmod=(+|-)x] [--] [...]
 
 DESCRIPTION
 ---
@@ -165,6 +165,11 @@ for "git add --no-all ...", i.e. ignored removed 
files.
be ignored, no matter if they are already present in the work
tree or not.
 
+--chmod=(+|-)x::
+   Override the executable bit of the added files.  The executable
+   bit is only changed in the index, the files on disk are left
+   unchanged.
+
 \--::
This option can be used to separate command-line options from
the list of files, (useful when filenames might be mistaken
-- 
2.10.0.304.gf2ff484



[PATCH v3 0/4] git add --chmod: always change the file

2016-09-12 Thread Thomas Gummerer
Thanks to Junio on setting me straight on the change of behaviour in
the previous round.

This round includes a similar change, which does however not change
the behaviour of update-index with repeated arguments.  I still think
the unification of the way git add and git update-index handle chmod
is useful, when we can keep the behaviour with multiple arguments in
update-index the same.

Thomas Gummerer (4):
  add: document the chmod option
  update-index: use the same structure for chmod as add
  read-cache: introduce chmod_index_entry
  add: modify already added files when --chmod is given

 Documentation/git-add.txt |  7 ++-
 builtin/add.c | 36 +++---
 builtin/checkout.c|  2 +-
 builtin/commit.c  |  2 +-
 builtin/update-index.c| 45 ++-
 cache.h   | 12 +++-
 read-cache.c  | 33 +++
 t/t2107-update-index-basic.sh | 13 +
 t/t3700-add.sh| 21 
 9 files changed, 118 insertions(+), 53 deletions(-)

-- 
2.10.0.304.gf2ff484



Re: [PATCH v2 12/14] i18n: show-branch: mark error messages for translation

2016-09-12 Thread Junio C Hamano
Jean-Noël AVILA  writes:

> ..., and 
> plural forms can be quite different depending on its value.

AHHhhh, of course you are right.


Re: [PATCH v3 0/2] patch-id for merges

2016-09-12 Thread Junio C Hamano
Jeff King  writes:

> On Mon, Sep 12, 2016 at 10:18:33AM -0700, Junio C Hamano wrote:
>
>> > +static int patch_id_defined(struct commit *commit)
>> > +{
>> > +  /* must be 0 or 1 parents */
>> > +  return !commit->parents || !commit->parents->next;
>> > +}
>> 
>> If we make the first hunk begin like so:
>> 
>> > +  if (commit->parents) {
>> > +  if (!patch_id_defined(commit))
>> > +  return -1;
>> 
>> I wonder if the compiler gives us the same code.
>
> Good idea. I actually put the "patch_id_defined" check outside the "if"
> block you've quoted (otherwise we're making assumptions about the
> contents of patch_id_defined).

Facepalm.  I was mis-reading the condition in the helper function.
Of course, guarding up-front makes more sense.



Re: [PATCH v2] checkout: eliminate unnecessary merge for trivial checkout

2016-09-12 Thread Junio C Hamano
"Ben Peart"  writes:

> I completely agree that optimizing within merge_working_tree would provide 
> more opportunities for optimization.  I can certainly move the test into
> that function as a first step.

Note that "optimizing more" was not the primary point of my
response.

Quite honestly, I'd rather see us speed up _ONLY_ obviously correct
and commonly used cases, while leaving most cases that _MAY_ turn
out to be optimizable (if we did careful analysis) unoptimized, and
instead have them handled by generic but known to be correct
codepath, if it means we do NOT to have to spend mental bandwidth to
analyze not-common case--that is a much better tradeoff.

The suggestion to move the check one level down in the callchain was
primarily to avoid the proposed optimization from being overly eager
and ending up skipping necessary parts of what merge_working_tree()
does (e.g. like I suspected in the review that the proposed patch
skips the check for "you have unmerged entries" situation).


Re: build issues on AIX - aka non-GNU environment, no gnu grep, no gcc

2016-09-12 Thread Junio C Hamano
Michael Felt  writes:

> I had a couple of issues when packaging git for AIX
> a) option -Wall by default - works fine with gcc I am sure, but not so
> well when gcc is not your compiler

That is expected.  As you said, it is merely "by default" and there
are mechanisms like config.mak provided for people to override them.

> b) needs a special (GNU) grep argument (-a from memory). This I
> resolved by downloading and packaging GNU grep. However, I hope this
> has not introduced a new dependency.

Read the Makefile and find SANE_TEXT_GREP, perhaps?

Thanks for helping to spread Git to minority platforms.


Re: Git Miniconference at Plumbers

2016-09-12 Thread Junio C Hamano
Jon Loeliger  writes:

> So, like, David Bainbridge said:
>> Hi,
>> 
>> The subject matter of the conference looks really interesting but I am
>> unlikely to be able to attend, unfortunately.
>> 
>> The subjects being covered like the current State of Git and the
>> Future of Git, for example, deserve much wider exposure, and I would
>> certainly appreciate hearing the thoughts of Junio and others.
>
> Indeed.

You do not need to go to NM to _hear_ that.  Basically, I want us
not to have "big" plans that come from the top.

Now, you heard it ;-)

There are areas that we as Git community would want to address for
some audience that were discovered over the years, and that "some
audience" might even be a large population of Git users, but if that
does not have overlap with Kernel Plumbers, the Plumbers mini-conf
may not be a suitable venue for even mentioning them.  E.g. the
enhancement of the submodule subsystem to allow more end-user facing
commands to recurse into them; rearchitecting the index and redoing
the "sparse checkout" hack so that we can do narrow clones more
properly; supporting "huge objects" better in the object layer,
without having to resort to ugly hacks like GitLFS that will never
be part of the core Git.  These things may all be worth talking
about in wider Git setting, but some of them may be waste of time to
bring up in the Plumbers' venue.

The future of Git is shaped largely by end-user itches.  From my
point of view, Git people are going there primarily to find what
Kernel Plubmbers' itches are, and help assessing the workflow
improvements around Git the Plumbers are wishing for or designing
themselves by being there, because we are at the best position to
tell what kind of enhancement to Git is feasible and what is
unlikely to happen in the near term.


[ANNOUNCE] Git User's Survey 2016

2016-09-12 Thread Jakub Narębski
Hello all,

We would like to ask you a few questions about your use of the Git
version control system. This survey is mainly to understand who is
using Git, how and why.

The results will be published to the Git wiki on the GitSurvey2016
page (https://git.wiki.kernel.org/index.php/GitSurvey2016) and
discussed on the git mailing list.


The survey would be open from 12 September to 20 October 2016.


Please devote a few minutes of your time to fill this simple
questionnaire, it will help a lot the git community to understand your
needs, what you like of Git, and of course what you don't like.

The survey can be found here:
  https://tinyurl.com/GitSurvey2016
  https://survs.com/survey/lmo7ed3439

There is also alternate version which does not require cookies,
but it doesn't allow one to go back to response and edit it.
  https://tinyurl.com/GitSurvey2016-anon
  https://survs.com/survey/naeec8kwd8


P.S. At request I can open a separate channel in survey, with a separate
survey URL, so that responses from particular site or organization could
be separated out.

Please send me a email with name of channel, and I will return with
a separate survey URL to use.  Note that the name of the channel would
be visible to others.

P.P.S. Different announcements use different URLs (different channels)
to better track where one got information about this survey.

Thanks in advance for taking time to answer the survey,
-- 
Jakub Narębski
on behalf of
Git Development Community


Re: [PATCH v2 04/25] sequencer: future-proof remove_sequencer_state()

2016-09-12 Thread Junio C Hamano
Johannes Schindelin  writes:

> +static const char *get_dir(const struct replay_opts *opts)
> +{
> + return git_path_seq_dir();
> +}

Presumably this is what "In a couple of commits" meant, i.e. opts
will be used soonish.

> -static void remove_sequencer_state(void)
> +static void remove_sequencer_state(const struct replay_opts *opts)
>  {
> - struct strbuf seq_dir = STRBUF_INIT;
> + struct strbuf dir = STRBUF_INIT;
>  
> - strbuf_addstr(_dir, git_path_seq_dir());
> - remove_dir_recursively(_dir, 0);
> - strbuf_release(_dir);
> + strbuf_addf(, "%s", get_dir(opts));
> + remove_dir_recursively(, 0);
> + strbuf_release();
>  }

As long as "who called the sequencer" is the only thing that
determines where the state is kept, this should work fine, I would
think.  I wondered that it would introduce a chicken-and-egg problem
if we had to support "git sequencer --clear-state" command ;-) but
that is not the case.

Good.


Re: [PATCH v2 03/25] sequencer: avoid unnecessary indirection

2016-09-12 Thread Junio C Hamano
Johannes Schindelin  writes:

> -static int read_populate_opts(struct replay_opts **opts)
> +static int read_populate_opts(struct replay_opts *opts)
>  {
>   if (!file_exists(git_path_opts_file()))
>   return 0;
> @@ -823,7 +823,7 @@ static int read_populate_opts(struct replay_opts **opts)
>* about this case, though, because we wrote that file ourselves, so we
>* are pretty certain that it is syntactically correct.
>*/
> - if (git_config_from_file(populate_opts_cb, git_path_opts_file(), *opts) 
> < 0)
> + if (git_config_from_file(populate_opts_cb, git_path_opts_file(), opts) 
> < 0)
>   return error(_("Malformed options sheet: %s"),
>   git_path_opts_file());
>   return 0;
> @@ -1054,7 +1054,7 @@ static int sequencer_continue(struct replay_opts *opts)
>  
>   if (!file_exists(git_path_todo_file()))
>   return continue_single_pick();
> - if (read_populate_opts() ||
> + if (read_populate_opts(opts) ||
>   read_populate_todo(_list, opts))
>   return -1;

Good!


Re: [PATCH v2 02/25] sequencer: use memoized sequencer directory path

2016-09-12 Thread Junio C Hamano
Johannes Schindelin  writes:

> Signed-off-by: Johannes Schindelin 
> ---
>  builtin/commit.c |  2 +-
>  sequencer.c  | 11 ++-
>  sequencer.h  |  5 +
>  3 files changed, 8 insertions(+), 10 deletions(-)

OK.  It's nice to see code reduction.

> -#define SEQ_DIR  "sequencer"
> -#define SEQ_HEAD_FILE"sequencer/head"
> -#define SEQ_TODO_FILE"sequencer/todo"
> -#define SEQ_OPTS_FILE"sequencer/opts"
> +const char *git_path_seq_dir(void);



Re: [PATCH v2 01/25] sequencer: use static initializers for replay_opts

2016-09-12 Thread Junio C Hamano
Johannes Schindelin  writes:

> This change is not completely faithful: instead of initializing all fields
> to 0, we choose to initialize command and subcommand to -1 (instead of
> defaulting to REPLAY_REVERT and REPLAY_NONE, respectively). Practically,
> it makes no difference at all, but future-proofs the code to require
> explicit assignments for both fields.
>
> Signed-off-by: Johannes Schindelin 
> ---

OK.


Re: [PATCH v2 05/25] sequencer: allow the sequencer to take custody of malloc()ed data

2016-09-12 Thread Junio C Hamano
Johannes Schindelin  writes:

> The sequencer is our attempt to lib-ify cherry-pick. Yet it behaves
> like a one-shot command when it reads its configuration: memory is
> allocated and released only when the command exits.
>
> This is kind of okay for git-cherry-pick, which *is* a one-shot
> command. All the work to make the sequencer its work horse was
> done to allow using the functionality as a library function, though,
> including proper clean-up after use.
>
> This patch introduces an API to pass the responsibility of releasing
> certain memory to the sequencer. Example:
>
>   const char *label =
>   sequencer_entrust(opts, xstrfmt("From: %s", email));

I thought we (not just me) were already pretty clear during the last
round of review that we will not want this entrust() thing.





Re: Gitattributes file is not respected when switching between branches

2016-09-12 Thread Torsten Bögershausen
On 12.09.16 21:35, Torsten Bögershausen wrote:
> On 12.09.16 14:55, Виталий Ищенко wrote:
>> Good day
>>
>> I faced following issue with gitattributes file (at least eol setting)
>> when was trying to force `lf` mode on windows.
>>
>> We have 2 branches: master & dev. With master set as HEAD in repository
>>
>> I've added `.gitattributes` with following content to `dev` branch
>>
>> ```
>> * text eol=lf
>> ```
>>
>> Now when you clone this repo on other machine and checkout dev branch,
>> eol setting is not respected.
>> As a workaround you can rm all files except .git folder and do hard reset.
>>
>> Issue is reproducible on windows & unix versions. Test repo can be
>> found on github
>> https://github.com/betalb/gitattributes-issue
>>
>> master branch - one file without gitattributes
>> feature-branch - .gitattributes added with eol=lf
>> unix-feature-branch - .gitattributes added with eol=crlf
>>
>> Thanks,
>> Vitalii
> Some more information may be needed, to help to debug.
>
> Which version of Git are you using ?
> What does
>
> git ls-files --eol
>
> say ?
Obs, All information was in the email.

tb@xxx:/tmp/gitattributes-issue> git ls-files --eol
i/lfw/lfattr/   testfile-crlf.txt
tb@xxx:/tmp/gitattributes-issue> ls -al
total 8
drwxr-xr-x   4 tbwheel  136 Sep 12 21:38 .
drwxrwxrwt  19 root  wheel  646 Sep 12 21:38 ..
drwxr-xr-x  13 tbwheel  442 Sep 12 21:38 .git
-rw-r--r--   1 tbwheel   60 Sep 12 21:38 testfile-crlf.txt
tb@xxx:/tmp/gitattributes-issue>

Could it be that you didn't commit the file ".gitattributes" ?
This could help:
git add .gitattributes && git commit -m "Add .gitattributes"









Re: [PATCH v2 2/4] update-index: use the same structure for chmod as add

2016-09-12 Thread Thomas Gummerer
On 09/11, Junio C Hamano wrote:
> Thomas Gummerer  writes:
> 
> > @@ -955,10 +941,8 @@ int cmd_update_index(int argc, const char **argv, 
> > const char *prefix)
> > PARSE_OPT_NOARG | /* disallow --cacheinfo= form */
> > PARSE_OPT_NONEG | PARSE_OPT_LITERAL_ARGHELP,
> > (parse_opt_cb *) cacheinfo_callback},
> > -   {OPTION_CALLBACK, 0, "chmod", _executable_bit, N_("(+/-)x"),
> > -   N_("override the executable bit of the listed files"),
> > -   PARSE_OPT_NONEG | PARSE_OPT_LITERAL_ARGHELP,
> > -   chmod_callback},
> > +   OPT_STRING( 0, "chmod", _arg, N_("(+/-)x"),
> > +   N_("override the executable bit of the listed files")),
> > {OPTION_SET_INT, 0, "assume-unchanged", _valid_only, NULL,
> > N_("mark files as \"not changing\""),
> > PARSE_OPT_NOARG | PARSE_OPT_NONEG, NULL, MARK_FLAG},
> > @@ -1018,6 +1002,15 @@ int cmd_update_index(int argc, const char **argv, 
> > const char *prefix)
> > if (argc == 2 && !strcmp(argv[1], "-h"))
> > usage_with_options(update_index_usage, options);
> >  
> > +   if (!chmod_arg)
> > +   force_mode = 0;
> > +   else if (!strcmp(chmod_arg, "-x"))
> > +   force_mode = 0666;
> > +   else if (!strcmp(chmod_arg, "+x"))
> > +   force_mode = 0777;
> > +   else
> > +   die(_("option 'chmod' expects \"+x\" or \"-x\""));
> > +
> 
> I am afraid that this changes the behaviour drastically.
> 
> "git update-index" is an oddball command that takes options and then
> processes them immediately, exactly because it was designed to take
> 
>   git update-index --chmod=-x A --chmod=+x B --add C
> 
> and say things like "A and B are not in the index and you are
> attempting to add them before giving me --add option".
> 
>   git update-index --add --chmod=-x A --chmod=+x B C
> 
> is expected to add A as non-executable, and B and C as executable.
> Many exotic parse-options callback mechanisms used in this command
> were invented exactly to support its quirky way of not doing "get a
> list of options and use the last one".  And this patch breaks it for
> only one option without changing the others.
> 
> If we were willing to take such a big backward compatiblity hit in
> the upcoming release (which I personally won't be affected, but old
> scripts by others need to be audited and adjusted, which I won't
> volunteer to do myself), we should make such a change consistently,
> e.g. "git update-index A --add --remove B" should no longer error
> out when it sees A and it is not yet in the index because "--add"
> hasn't been given yet, or A is in the index but is missing from the
> working tree because "--remove" hasn't been given yet.  Then it may
> be more justifiable if "update-index --chmod=-x A --chmod=+x B"
> added A as an executable.  With the current form of this patch, it
> is not.

Thanks for the explanation, this change in backwards compatibility is
certainly not what I intended, but rather something I missed while
cooking up this patch.

> Can we do this "fix" without this change?

Yeah, let me see what I can come up with in a re-roll.

Thanks,
Thomas


Re: [PATCH v2 0/5] Pull out require_clean_work_tree() functionality from builtin/pull.c

2016-09-12 Thread Junio C Hamano
Johannes Schindelin  writes:

> Johannes Schindelin (5):
>   pull: drop confusing prefix parameter of die_on_unclean_work_tree()
>   pull: make code more similar to the shell script again
>   Make the require_clean_work_tree() function truly reusable
>   Export also the has_un{staged,committed}_changed() functions
>   wt-status: teach has_{unstaged,uncommitted}_changes() about submodules

Other than two minor things I've already mentioned, this round looks
ready to be queued.  Thanks.


Re: Gitattributes file is not respected when switching between branches

2016-09-12 Thread Torsten Bögershausen
On 12.09.16 14:55, Виталий Ищенко wrote:
> Good day
>
> I faced following issue with gitattributes file (at least eol setting)
> when was trying to force `lf` mode on windows.
>
> We have 2 branches: master & dev. With master set as HEAD in repository
>
> I've added `.gitattributes` with following content to `dev` branch
>
> ```
> * text eol=lf
> ```
>
> Now when you clone this repo on other machine and checkout dev branch,
> eol setting is not respected.
> As a workaround you can rm all files except .git folder and do hard reset.
>
> Issue is reproducible on windows & unix versions. Test repo can be
> found on github
> https://github.com/betalb/gitattributes-issue
>
> master branch - one file without gitattributes
> feature-branch - .gitattributes added with eol=lf
> unix-feature-branch - .gitattributes added with eol=crlf
>
> Thanks,
> Vitalii
Some more information may be needed, to help to debug.

Which version of Git are you using ?
What does

git ls-files --eol

say ?




Re: [PATCH 2/2 v7] pack-objects: use reachability bitmap index when generating non-stdout pack

2016-09-12 Thread Junio C Hamano
Kirill Smelkov  writes:

>> This is v7, but as I understand your numbering, it goes with v5 of patch
>> 1/2 that I just reviewed (usually we just increment the version number
>> on the whole series and treat it as a unit, even if some patches didn't
>> change from version to version).
>
> The reason those patches are having their own numbers is that they are
> orthogonal to each other and can be applied / rejected independently.

In such a case, we wouldn't label them 1/2 and 2/2, which tells the
readers that these are two pieces that are to be applied together to
form a single unit of change.  That was what these numbered patches
with different version numbers confusing.

> But ok, since now we have them considered both together, their next
> versions posted will be uniform v8.

OK.  Thanks for clarifying.


Re: [PATCH] t/perf/run: Don't forget to copy config.mak.autogen & friends to build area

2016-09-12 Thread Junio C Hamano
Junio C Hamano  writes:

>>  build_git_rev () {
>>  rev=$1
>> -cp ../../config.mak build/$rev/config.mak
>> +cp -t build/$rev ../../{config.mak,config.mak.autogen,config.status}
>
> That unfortunately is a GNUism -t with a bash-ism {a,b,c}; just keep
> it simple and stupid to make sure it is portable.
>
> This is not even a part that we measure the runtime for anyway.

In other words, something along this line, perhaps.

 t/perf/run | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/t/perf/run b/t/perf/run
index aa383c2..69a4714 100755
--- a/t/perf/run
+++ b/t/perf/run
@@ -30,7 +30,10 @@ unpack_git_rev () {
 }
 build_git_rev () {
rev=$1
-   cp -t build/$rev ../../{config.mak,config.mak.autogen,config.status}
+   for config in config.mak config.mak.autogen config.status
+   do
+   cp "../../$config" "build/$rev/"
+   done
(cd build/$rev && make $GIT_PERF_MAKE_OPTS) ||
die "failed to build revision '$mydir'"
 }
-- 
2.10.0-342-gc678130



Re: [PATCH] t/perf/run: Don't forget to copy config.mak.autogen & friends to build area

2016-09-12 Thread Junio C Hamano
Kirill Smelkov  writes:

> Otherwise for people who use autotools-based configure in main worktree,
> the performance testing results will be inconsistent as work and build
> trees could be using e.g. different optimization levels.
>
> See e.g.
>
>   
> http://public-inbox.org/git/20160818175222.bmm3ivjheokf2...@sigill.intra.peff.net/
>
> for example.
>
> NOTE config.status has to be copied because otherwise without it the build
> would want to run reconfigure this way loosing just copied config.mak.autogen.
>
> Signed-off-by: Kirill Smelkov 
> ---
>  ( Resending as separate patch-mail, just in case )
>
>  t/perf/run | 2 +-
>  1 file changed, 1 insertion(+), 1 deletion(-)
>
> diff --git a/t/perf/run b/t/perf/run
> index cfd7012..aa383c2 100755
> --- a/t/perf/run
> +++ b/t/perf/run
> @@ -30,7 +30,7 @@ unpack_git_rev () {
>  }
>  build_git_rev () {
>   rev=$1
> - cp ../../config.mak build/$rev/config.mak
> + cp -t build/$rev ../../{config.mak,config.mak.autogen,config.status}

That unfortunately is a GNUism -t with a bash-ism {a,b,c}; just keep
it simple and stupid to make sure it is portable.

This is not even a part that we measure the runtime for anyway.

>   (cd build/$rev && make $GIT_PERF_MAKE_OPTS) ||
>   die "failed to build revision '$mydir'"
>  }


Re: What's cooking in git.git (Sep 2016, #03; Fri, 9)

2016-09-12 Thread Junio C Hamano
Jeff King  writes:

> I happened to notice today that this topic needs a minor tweak:
>
> -- >8 --
> Subject: [PATCH] add_delta_base_cache: use list_for_each_safe
>
> We may remove elements from the list while we are iterating,
> which requires using a second temporary pointer. Otherwise
> stepping to the next element of the list might involve
> looking at freed memory (which generally works in practice,
> as we _just_ freed it, but of course is wrong to rely on;
> valgrind notices it).

I failed to notice it, too.  Thanks.

> Signed-off-by: Jeff King 
> ---
>  sha1_file.c | 4 ++--
>  1 file changed, 2 insertions(+), 2 deletions(-)
>
> diff --git a/sha1_file.c b/sha1_file.c
> index a57b71d..132c861 100644
> --- a/sha1_file.c
> +++ b/sha1_file.c
> @@ -2187,11 +2187,11 @@ static void add_delta_base_cache(struct packed_git 
> *p, off_t base_offset,
>   void *base, unsigned long base_size, enum object_type type)
>  {
>   struct delta_base_cache_entry *ent = xmalloc(sizeof(*ent));
> - struct list_head *lru;
> + struct list_head *lru, *tmp;
>  
>   delta_base_cached += base_size;
>  
> - list_for_each(lru, _base_cache_lru) {
> + list_for_each_safe(lru, tmp, _base_cache_lru) {
>   struct delta_base_cache_entry *f =
>   list_entry(lru, struct delta_base_cache_entry, lru);
>   if (delta_base_cached <= delta_base_cache_limit)


Re: [PATCH v2 3/5] Make the require_clean_work_tree() function truly reusable

2016-09-12 Thread Junio C Hamano
Johannes Schindelin  writes:

> It is remarkable that libgit.a did not sport this function yet... Let's
> move it into a more prominent (and into an actually reusable) spot:
> wt-status.[ch].
>
> Signed-off-by: Johannes Schindelin 
> ---

I do not think "truly" is needed at all.

It was not reusable.  It is somewhat reusable after this patch.  It
may or may not be "truly" reusable depending on the need of future
patches, which we do not know yet.

I agree wt-status.[ch] is a good home for this feature.

Thanks.


Re: [PATCH v2 2/5] pull: make code more similar to the shell script again

2016-09-12 Thread Junio C Hamano
Johannes Schindelin  writes:

> When converting the pull command to a builtin, the
> require_clean_work_tree() function was renamed and the pull-specific
> parts hard-coded.
>
> This makes it impossible to reuse the code, so let's modify the code to
> make it more similar to the original shell script again.
>
> Signed-off-by: Johannes Schindelin 
> ---
>  builtin/pull.c | 29 +++--
>  1 file changed, 19 insertions(+), 10 deletions(-)
>
> diff --git a/builtin/pull.c b/builtin/pull.c
> index d4bd635..a3ed054 100644
> --- a/builtin/pull.c
> +++ b/builtin/pull.c
> @@ -365,10 +365,11 @@ static int has_uncommitted_changes(void)
>   * If the work tree has unstaged or uncommitted changes, dies with the
>   * appropriate message.
>   */
> -static void die_on_unclean_work_tree(void)
> +static int require_clean_work_tree(const char *action, const char *hint,
> + int gently)
>  {
>   struct lock_file *lock_file = xcalloc(1, sizeof(*lock_file));
> - int do_die = 0;
> + int err = 0;
>  
>   hold_locked_index(lock_file, 0);
>   refresh_cache(REFRESH_QUIET);
> @@ -376,20 +377,27 @@ static void die_on_unclean_work_tree(void)
>   rollback_lock_file(lock_file);
>  
>   if (has_unstaged_changes()) {
> - error(_("Cannot pull with rebase: You have unstaged changes."));
> - do_die = 1;
> + error(_("Cannot %s: You have unstaged changes."), _(action));
> + err = 1;
>   }
> ...
> + error(_("Cannot %s: Your index contains uncommitted 
> changes."),
> +   _(action));
> + err = 1;

These are much better than the one in v1.

Depending on the target language, the translators may have to phrase
these not like "Cannot :" but "Cannot perform :" where
the "" is for "the act of doing ", if the "cannot" part
in their language needs to change shape depending on the verb.
Hence, I think the translators need a /* TRANSLATORS: ... */ comment
that tells them what is interpolated here are their translations for
phrases like "pull with rebase".  You do not have to be exhausitive
in the comment; a representative example would help the translators
see the message in context.

Other than that (and the need to further clean-up error() and die()
to begin with lower-case to match the modern practice in a separate
follow-up series), this looks ready to be queued.

Thanks.



RE: [PATCH v2] checkout: eliminate unnecessary merge for trivial checkout

2016-09-12 Thread Ben Peart


> -Original Message-
> From: Junio C Hamano [mailto:gits...@pobox.com]
> Sent: Friday, September 9, 2016 5:55 PM
> To: Ben Peart 
> Cc: git@vger.kernel.org; pclo...@gmail.com; Ben Peart
> 
> Subject: Re: [PATCH v2] checkout: eliminate unnecessary merge for trivial
> checkout
> 
> Ben Peart  writes:
> 
> > @@ -802,6 +806,87 @@ static void orphaned_commit_warning(struct
> commit *old, struct commit *new)
> > free(refs.objects);
> >  }
> >
> > +static int needs_working_tree_merge(const struct checkout_opts *opts,
> > +   const struct branch_info *old,
> > +   const struct branch_info *new)
> > +{
> > +   /*
> > +* We must do the merge if we are actually moving to a new
> > +* commit tree.
> > +*/
> > +   if (!old->commit || !new->commit ||
> > +   oidcmp(>commit->tree->object.oid, >commit-
> >tree->object.oid))
> > +   return 1;
> 
> A huge helper function helps it somewhat, compared with the earlier
> unreadable mess ;-).
> 
> Are we certain that at this point the commit objects are both parsed and
> their tree->object.oid are both valid?
> 
> > +   /*
> > +* Honor the explicit request for a three-way merge or to throw away
> > +* local changes
> > +*/
> > +   if (opts->merge || opts->force)
> > +   return 1;
> 
> Hmph, "git checkout -m HEAD" wouldn't have to do anything wrt the index
> status, no?
> 
> For that matter, neither "git checkout -f HEAD".  Unless we rely on
> unpack_trees() to write over the working tree files.
> 
> ... me goes and looks, and finds that merge_working_tree()
> indeed does have a logic to do quite different thing when
> "--force" is given.
> 
> This makes me wonder if the "merge_working_tree() is expensive, so
> selectively skip calling it" approach is working at a wrong level.
> Wouldn't the merge_working_tree() function itself a better place to do
this
> kind of "we may not have to do the full two-way merge"
> optimization?  It already looks at opts and does things differently (e.g.
when
> running with "--force", it does not even call unpack).
> If you can optimize even more by looking at other fields in opts to avoid
> unpack, that would fit better with the structure of the code that we
already
> have.
> 

I completely agree that optimizing within merge_working_tree would provide 
more opportunities for optimization.  I can certainly move the test into
that 
function as a first step.  I've looked into it a little but came to the
conclusion
that it will be non-trivial to determine how to ensure the minimal work is 
done for any arbitrary set of options passed in without breaking something.


While I'd love to see that work done, I just don't have the time to pursue
further 
optimizations that may be available at this point in time.  There are other
things 
(like speeding up status on large repos) I need to work on first.

> > +   /*
> > +* Checking out the requested commit may require updating the
> working
> > +* directory and index, let the merge handle it.
> > +*/
> > +   if (opts->force_detach)
> > +   return 1;
> 
> This does not make much sense to me.  After "git branch -f foo HEAD",
there
> is no difference in what is done to the index and the working directory
> between "git checkout --detach HEAD" and "git checkout foo", is there?
> 

I'm attempting to optimize for a single, common path where checkout is 
just creating a new branch (ie "git checkout -b foo") to minimize the 
possibility that I broke some other path I didn't fully understand.  

It is quite possible that there are cases where the index and/or working
directory do not need to be updated or where a merge won't actually 
change anything that this test is not optimized for.  Perhaps I should 
emphasize the "*may* require updating the working directory" in my 
comment.  Because it *could* happen, I let the code fall back to the
old behavior.

> > +   /*
> > +* opts->writeout_stage cannot be used with switching branches so is
> > +* not tested here
> > +*/
> > +
> > +/*
> > + * Honor the explicit ignore requests
> > + */
> > +   if (!opts->overwrite_ignore || opts->ignore_skipworktree
> > +   || opts->ignore_other_worktrees)
> > +   return 1;
> 
> Style.  I think you earlier had
> 
>   if (a || b ||
> c)
> 
> and here you are doing
> 
>   if (a || b
> || c)
> 
> Please pick one and stick to it (I'd pick the former).

Done

> 
> > +/*
> > +* If we're not creating a new branch, by definition we're changing
> > +* the existing one so need to do the merge
> > +*/
> > +   if (!opts->new_branch)
> > +   return 1;
> 
> Sorry, but I fail to follow that line of thought.  Starting from a state
where
> your HEAD points at commit A,
> 
>  - switching to a detached HEAD pointing at a commit A,
>  - switching to an existing branch that already points at the same
>commit 

RE: Git Miniconference at Plumbers

2016-09-12 Thread David Bainbridge
Hi,

The subject matter of the conference looks really interesting but I am unlikely 
to be able to attend, unfortunately.

The subjects being covered like the current State of Git and the Future of Git, 
for example, deserve much wider exposure, and I would certainly appreciate 
hearing the thoughts of Junio and others.

Does anyone know whether the sessions will be recorded in any way?

Thanks

David

DAVID BAINBRIDGE
Product Manager SW Development

Ericsson
8500 Decarie
Montreal, H4P 2N2, Canada
david.bainbri...@ericsson.com
www.ericsson.com

-Original Message-
From: git-ow...@vger.kernel.org [mailto:git-ow...@vger.kernel.org] On Behalf Of 
Jon Loeliger
Sent: Monday, September 12, 2016 09:33
To: Jeff King 
Cc: git@vger.kernel.org
Subject: Re: Git Miniconference at Plumbers

So, like, Jeff King said:
> On Tue, Sep 06, 2016 at 12:42:04PM -0500, Jon Loeliger wrote:
> 
> > I have recently been enlisted by folks at the Linux Foundation to 
> > help run a Miniconference on Git at the Plumbers Conference [*] this 
> > fall.
> 
> I see the conference runs for 4 days; I assume the Git portion will 
> just be one day.

Yes, and yes.  Likely even "a half day".

> Do you know yet which day?

No.  Sorry.

> -Peff

jdl


Re: Git Miniconference at Plumbers

2016-09-12 Thread Jon Loeliger
So, like, David Bainbridge said:
> Hi,
> 
> The subject matter of the conference looks really interesting but I am
> unlikely to be able to attend, unfortunately.
> 
> The subjects being covered like the current State of Git and the
> Future of Git, for example, deserve much wider exposure, and I would
> certainly appreciate hearing the thoughts of Junio and others.

Indeed.

> Does anyone know whether the sessions will be recorded in any way?

I am uncertain about outright recording (digital video/audio),
but there will be at least summarizing notes taken and posted.
Anyone wishing to record the talks/discussions is likely welcome
to do so.

HTH,
jdl


Re: [PATCH v3 0/2] patch-id for merges

2016-09-12 Thread Jeff King
On Mon, Sep 12, 2016 at 10:18:33AM -0700, Junio C Hamano wrote:

> > +static int patch_id_defined(struct commit *commit)
> > +{
> > +   /* must be 0 or 1 parents */
> > +   return !commit->parents || !commit->parents->next;
> > +}
> 
> If we make the first hunk begin like so:
> 
> > +   if (commit->parents) {
> > +   if (!patch_id_defined(commit))
> > +   return -1;
> 
> I wonder if the compiler gives us the same code.

Good idea. I actually put the "patch_id_defined" check outside the "if"
block you've quoted (otherwise we're making assumptions about the
contents of patch_id_defined).

I didn't check that the compiler generates the same code, but I'm
willing to blindly put faith in it. Either it will inline
patch_id_defined and optimize out the double-conditional, or it probably
doesn't matter in practice. Either way, the compiler is probably smarter
than me, and we should shoot for readability and not repeating
ourselves.

> > I'd probably do a preparatory patch to drop the return value from
> > add_commit_patch_id(). No callers actually look at it.

I decided against this. Technically add_commit_patch_id() can return an
error via the header-only diff_flush_patch_id(), and we'd be shutting
that down. Of course no callers actually _care_ about that right now, so
it doesn't matter at this point. But I'd prefer to punt it down the line
for when somebody does (and the solution may be to distinguish between
those two return codes, or it may be for the caller to have access to
patch_id_defined(); we won't know until we see the code).

So here's the replacement for 2/2:

-- >8 --
Subject: [PATCH] patch-ids: refuse to compute patch-id for merge commit

The patch-id code which powers "log --cherry-pick" doesn't
look at whether each commit is a merge or not. It just feeds
the commit's first parent to the diff, and ignores any
additional parents.

In theory, this might be useful if you wanted to find
equivalence between, say, a merge commit and a squash-merge
that does the same thing.  But it also promotes a false
equivalence between distinct merges. For example, every
"merge -s ours" would look identical to an empty commit
(which is true in a sense, but presumably there was a value
in merging in the discarded history). Since patch-ids are
meant for throwing away duplicates, we should err on the
side of _not_ matching such merges.

Moreover, we may spend a lot of extra time computing these
merge diffs. In the case that inspired this patch, a "git
format-patch --cherry-pick" dropped from over 3 minutes to
less than 3 seconds.

This seems pretty drastic, but is easily explained. The
command was invoked by a "git rebase" of an older topic
branch; there had been tens of thousands of commits on the
upstream branch in the meantime. In addition, this project
used a topic-branch workflow with occasional "back-merges"
from "master" to each topic (to resolve conflicts on the
topics rather than in the merge commits). So there were not
only extra merges, but the diffs for these back-merges were
generally quite large (because they represented _everything_
that had been merged to master since the topic branched).

This patch treats a merge fed to commit_patch_id() or
add_commit_patch_id() as an error, and a lookup for such a
merge via has_commit_patch_id() will always return NULL.
An earlier version of the patch tried to distinguish between
"error" and "patch id for merges not defined", but that
becomes unnecessarily complicated. The only callers are:

  1. revision traversals which want to do --cherry-pick;
 they call add_commit_patch_id(), but do not care if it
 fails. They only want to add what we can, look it up
 later with has_commit_patch_id(), and err on the side
 of not-matching.

  2. format-patch --base, which calls commit_patch_id().
 This _does_ notice errors, but should never feed a
 merge in the first place (and if it were to do so
 accidentally, then this patch is a strict improvement;
 we notice the bug rather than generating a bogus
 patch-id).

So in both cases, this does the right thing.

Helped-by: Johannes Schindelin 
Signed-off-by: Jeff King 
---
 patch-ids.c | 15 +++
 1 file changed, 15 insertions(+)

diff --git a/patch-ids.c b/patch-ids.c
index 77e4663..ce285c2 100644
--- a/patch-ids.c
+++ b/patch-ids.c
@@ -4,9 +4,18 @@
 #include "sha1-lookup.h"
 #include "patch-ids.h"
 
+static int patch_id_defined(struct commit *commit)
+{
+   /* must be 0 or 1 parents */
+   return !commit->parents || !commit->parents->next;
+}
+
 int commit_patch_id(struct commit *commit, struct diff_options *options,
unsigned char *sha1, int diff_header_only)
 {
+   if (!patch_id_defined(commit))
+   return -1;
+
if (commit->parents)
diff_tree_sha1(commit->parents->item->object.oid.hash,
   commit->object.oid.hash, "", 

Re: [RFC/PATCH] ls-files: adding support for submodules

2016-09-12 Thread Brandon Williams
Thanks for all the comments.  What it sounds like is that using ls-files as
a means to power a recursive git-grep may not be like the best approach (I
assumed that would be the case but thought it a decent place to start).

I agree that not all operating modes would be useful for a recursive
ls-files, which is why I initially don't have support for them.  I guess
the question would be which modes would be worth supporting in a recursive
case?


Re: [PATCH 1/2 v5] pack-objects: respect --local/--honor-pack-keep/--incremental when bitmap is in use

2016-09-12 Thread Junio C Hamano
Kirill Smelkov  writes:

> On Thu, Aug 18, 2016 at 01:52:22PM -0400, Jeff King wrote:
> > 
> > Good to know there is no regression. It is curious that there is a
> > slight _improvement_ across the board. Do we have an explanation for
> > that? It seems odd that noise would be so consistent.
>
> Yes, I too thought it and it turned out to be t/perf/run does not copy
> config.mak.autogen & friends to build/ and I'm using autoconf with
> CFLAGS="-march=native -O3 ..."
>
> Junio, I could not resist to the following:
> ...
> With corrected t/perf/run the timings are more realistic - e.g. 3
> consecutive runs of `./run 56dfeb62 . ./p5310-pack-bitmaps.sh`:

Wow, that's what I call an exchange with quality during a review ;-)

Thanks for the curiosity and digging it to the root cause of the
anomaly.  Some GNUism/bashism in the way copying is spelled in the
patch bothers me, but that is easily fixable.

Thanks.


  1   2   >