[PATCH 07/10] diff.c: convert fn_out_consume to use emit_line_*

2016-09-10 Thread Stefan Beller
From: Stefan Beller 

In the coming patches we want to eliminate writes that do not go
though emit_line_* as we later want to hook into the emit_line_*
functions to add a buffered output. To make the conversion easy,
add a function that takes a format string.

Signed-off-by: Stefan Beller 
---
 diff.c | 24 +++-
 1 file changed, 19 insertions(+), 5 deletions(-)

diff --git a/diff.c b/diff.c
index 2aefd0f..7dcef73 100644
--- a/diff.c
+++ b/diff.c
@@ -493,6 +493,19 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
emit_line_0(o, set, reset, line[0], line+1, len-1);
 }
 
+static void emit_line_fmt(struct diff_options *o,
+ const char *set, const char *reset,
+ const char *fmt, ...)
+{
+   struct strbuf sb = STRBUF_INIT;
+   va_list ap;
+   va_start(ap, fmt);
+   strbuf_vaddf(, fmt, ap);
+   va_end(ap);
+
+   emit_line(o, set, reset, sb.buf, sb.len);
+}
+
 static int new_blank_line_at_eof(struct emit_callback *ecbdata, const char 
*line, int len)
 {
if (!((ecbdata->ws_rule & WS_BLANK_AT_EOF) &&
@@ -1217,7 +1230,6 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
const char *context = diff_get_color(ecbdata->color_diff, DIFF_CONTEXT);
const char *reset = diff_get_color(ecbdata->color_diff, DIFF_RESET);
struct diff_options *o = ecbdata->opt;
-   const char *line_prefix = diff_line_prefix(o);
 
o->found_changes = 1;
 
@@ -1233,10 +1245,12 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
name_a_tab = strchr(ecbdata->label_path[0], ' ') ? "\t" : "";
name_b_tab = strchr(ecbdata->label_path[1], ' ') ? "\t" : "";
 
-   fprintf(o->file, "%s%s--- %s%s%s\n",
-   line_prefix, meta, ecbdata->label_path[0], reset, 
name_a_tab);
-   fprintf(o->file, "%s%s+++ %s%s%s\n",
-   line_prefix, meta, ecbdata->label_path[1], reset, 
name_b_tab);
+   emit_line_fmt(o, meta, reset, "--- %s%s\n",
+ ecbdata->label_path[0], name_a_tab);
+
+   emit_line_fmt(o, meta, reset, "+++ %s%s\n",
+ ecbdata->label_path[1], name_b_tab);
+
ecbdata->label_path[0] = ecbdata->label_path[1] = NULL;
}
 
-- 
2.7.4



[PATCH 02/10] diff: emit_{add, del, context}_line to increase {pre,post}image line count

2016-09-10 Thread Stefan Beller
From: Stefan Beller 

At all call sites of emit_{add, del, context}_line we increment the line
count, so move it into the respective functions to make the code at the
calling site a bit clearer.

Signed-off-by: Stefan Beller 
---
 diff.c | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/diff.c b/diff.c
index aa50b2d..156c2aa 100644
--- a/diff.c
+++ b/diff.c
@@ -536,6 +536,7 @@ static void emit_add_line(const char *reset,
  struct emit_callback *ecbdata,
  const char *line, int len)
 {
+   ecbdata->lno_in_postimage++;
emit_line_checked(reset, ecbdata, line, len,
  DIFF_FILE_NEW, WSEH_NEW, '+');
 }
@@ -544,6 +545,7 @@ static void emit_del_line(const char *reset,
  struct emit_callback *ecbdata,
  const char *line, int len)
 {
+   ecbdata->lno_in_preimage++;
emit_line_checked(reset, ecbdata, line, len,
  DIFF_FILE_OLD, WSEH_OLD, '-');
 }
@@ -552,6 +554,8 @@ static void emit_context_line(const char *reset,
  struct emit_callback *ecbdata,
  const char *line, int len)
 {
+   ecbdata->lno_in_postimage++;
+   ecbdata->lno_in_preimage++;
emit_line_checked(reset, ecbdata, line, len,
  DIFF_CONTEXT, WSEH_CONTEXT, ' ');
 }
@@ -662,13 +666,10 @@ static void emit_rewrite_lines(struct emit_callback *ecb,
 
endp = memchr(data, '\n', size);
len = endp ? (endp - data + 1) : size;
-   if (prefix != '+') {
-   ecb->lno_in_preimage++;
+   if (prefix != '+')
emit_del_line(reset, ecb, data, len);
-   } else {
-   ecb->lno_in_postimage++;
+   else
emit_add_line(reset, ecb, data, len);
-   }
size -= len;
data += len;
}
@@ -1293,16 +1294,12 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
 
switch (line[0]) {
case '+':
-   ecbdata->lno_in_postimage++;
emit_add_line(reset, ecbdata, line + 1, len - 1);
break;
case '-':
-   ecbdata->lno_in_preimage++;
emit_del_line(reset, ecbdata, line + 1, len - 1);
break;
case ' ':
-   ecbdata->lno_in_postimage++;
-   ecbdata->lno_in_preimage++;
emit_context_line(reset, ecbdata, line + 1, len - 1);
break;
default:
-- 
2.7.4



[PATCH 10/10] submodule.c: convert show_submodule_summary to use emit_line_fmt

2016-09-10 Thread Stefan Beller
From: Stefan Beller 

---
 diff.c  |  5 ++---
 diff.h  |  3 +++
 submodule.c | 42 +-
 submodule.h |  3 +--
 4 files changed, 23 insertions(+), 30 deletions(-)

diff --git a/diff.c b/diff.c
index 255fbf3..c00306b 100644
--- a/diff.c
+++ b/diff.c
@@ -493,7 +493,7 @@ static void emit_line(struct diff_options *o, const char 
*set, const char *reset
emit_line_0(o, set, reset, line[0], line+1, len-1);
 }
 
-static void emit_line_fmt(struct diff_options *o,
+void emit_line_fmt(struct diff_options *o,
  const char *set, const char *reset,
  const char *fmt, ...)
 {
@@ -2310,8 +2310,7 @@ static void builtin_diff(const char *name_a,
(!two->mode || S_ISGITLINK(two->mode))) {
const char *del = diff_get_color_opt(o, DIFF_FILE_OLD);
const char *add = diff_get_color_opt(o, DIFF_FILE_NEW);
-   show_submodule_summary(o->file, one->path ? one->path : 
two->path,
-   line_prefix,
+   show_submodule_summary(o, one->path ? one->path : two->path,
one->oid.hash, two->oid.hash,
two->dirty_submodule,
meta, del, add, reset);
diff --git a/diff.h b/diff.h
index 7883729..1699d9c 100644
--- a/diff.h
+++ b/diff.h
@@ -180,6 +180,9 @@ struct diff_options {
int diff_path_counter;
 };
 
+void emit_line_fmt(struct diff_options *o, const char *set, const char *reset,
+  const char *fmt, ...);
+
 enum color_diff {
DIFF_RESET = 0,
DIFF_CONTEXT = 1,
diff --git a/submodule.c b/submodule.c
index 1b5cdfb..c817b46 100644
--- a/submodule.c
+++ b/submodule.c
@@ -304,8 +304,8 @@ static int prepare_submodule_summary(struct rev_info *rev, 
const char *path,
return prepare_revision_walk(rev);
 }
 
-static void print_submodule_summary(struct rev_info *rev, FILE *f,
-   const char *line_prefix,
+static void print_submodule_summary(struct rev_info *rev,
+   struct diff_options *o,
const char *del, const char *add, const char *reset)
 {
static const char format[] = "  %m %s";
@@ -317,24 +317,16 @@ static void print_submodule_summary(struct rev_info *rev, 
FILE *f,
ctx.date_mode = rev->date_mode;
ctx.output_encoding = get_log_output_encoding();
strbuf_setlen(, 0);
-   strbuf_addstr(, line_prefix);
-   if (commit->object.flags & SYMMETRIC_LEFT) {
-   if (del)
-   strbuf_addstr(, del);
-   }
-   else if (add)
-   strbuf_addstr(, add);
format_commit_message(commit, format, , );
-   if (reset)
-   strbuf_addstr(, reset);
-   strbuf_addch(, '\n');
-   fprintf(f, "%s", sb.buf);
+   if (commit->object.flags & SYMMETRIC_LEFT)
+   emit_line_fmt(o, del, reset, "%s\n", sb.buf);
+   else if (add)
+   emit_line_fmt(o, add, reset, "%s\n", sb.buf);
}
strbuf_release();
 }
 
-void show_submodule_summary(FILE *f, const char *path,
-   const char *line_prefix,
+void show_submodule_summary(struct diff_options *o, const char *path,
unsigned char one[20], unsigned char two[20],
unsigned dirty_submodule, const char *meta,
const char *del, const char *add, const char *reset)
@@ -359,30 +351,30 @@ void show_submodule_summary(FILE *f, const char *path,
message = "(revision walker failed)";
 
if (dirty_submodule & DIRTY_SUBMODULE_UNTRACKED)
-   fprintf(f, "%sSubmodule %s contains untracked content\n",
-   line_prefix, path);
+   emit_line_fmt(o, 0, 0,
+ "Submodule %s contains untracked content\n", 
path);
if (dirty_submodule & DIRTY_SUBMODULE_MODIFIED)
-   fprintf(f, "%sSubmodule %s contains modified content\n",
-   line_prefix, path);
+   emit_line_fmt(o, 0, 0,
+ "Submodule %s contains modified content\n", path);
 
if (!hashcmp(one, two)) {
strbuf_release();
return;
}
 
-   strbuf_addf(, "%s%sSubmodule %s %s..", line_prefix, meta, path,
-   find_unique_abbrev(one, DEFAULT_ABBREV));
+   strbuf_addf(, "Submodule %s %s..", path,
+   find_unique_abbrev(one, DEFAULT_ABBREV));
if (!fast_backward && !fast_forward)
strbuf_addch(, '.');
strbuf_addf(, "%s", find_unique_abbrev(two, DEFAULT_ABBREV));
if (message)
-   strbuf_addf(, " %s%s\n", message, reset);
+   strbuf_addf(, " %s\n", 

[PATCH 09/10] diff.c: convert emit_rewrite_lines to use emit_line_0

2016-09-10 Thread Stefan Beller
From: Stefan Beller 

---
 diff.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/diff.c b/diff.c
index 98d6655..255fbf3 100644
--- a/diff.c
+++ b/diff.c
@@ -690,7 +690,7 @@ static void emit_rewrite_lines(struct emit_callback *ecb,
if (!endp) {
const char *context = diff_get_color(ecb->color_diff,
 DIFF_CONTEXT);
-   putc('\n', ecb->opt->file);
+   emit_line_0(ecb->opt, 0, 0, 0, "\n", 1);
emit_line_0(ecb->opt, context, reset, '\\',
nneof, strlen(nneof));
}
-- 
2.7.4



[PATCH 05/10] diff.c: reintroduce diff_flush_patch for all files

2016-09-10 Thread Stefan Beller
From: Stefan Beller 

---
 diff.c | 17 -
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/diff.c b/diff.c
index 85fb887..87b1bb2 100644
--- a/diff.c
+++ b/diff.c
@@ -4608,6 +4608,17 @@ void diff_warn_rename_limit(const char *varname, int 
needed, int degraded_cc)
warning(rename_limit_advice, varname, needed);
 }
 
+static void diff_flush_patch(struct diff_options *o)
+{
+   int i;
+   struct diff_queue_struct *q = _queued_diff;
+   for (i = 0; i < q->nr; i++) {
+   struct diff_filepair *p = q->queue[i];
+   if (check_pair_status(p))
+   diff_flush_patch_filepair(p, o);
+   }
+}
+
 void diff_flush(struct diff_options *options)
 {
struct diff_queue_struct *q = _queued_diff;
@@ -4702,11 +4713,7 @@ void diff_flush(struct diff_options *options)
}
}
 
-   for (i = 0; i < q->nr; i++) {
-   struct diff_filepair *p = q->queue[i];
-   if (check_pair_status(p))
-   diff_flush_patch_filepair(p, options);
-   }
+   diff_flush_patch(options);
}
 
if (output_format & DIFF_FORMAT_CALLBACK)
-- 
2.7.4



[PATCH 04/10] diff.c: rename diff_flush_patch to diff_flush_patch_filepair

2016-09-10 Thread Stefan Beller
From: Stefan Beller 

Signed-off-by: Stefan Beller 
---
 diff.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/diff.c b/diff.c
index 9d2e704..85fb887 100644
--- a/diff.c
+++ b/diff.c
@@ -4176,7 +4176,7 @@ int diff_unmodified_pair(struct diff_filepair *p)
return 0;
 }
 
-static void diff_flush_patch(struct diff_filepair *p, struct diff_options *o)
+static void diff_flush_patch_filepair(struct diff_filepair *p, struct 
diff_options *o)
 {
if (diff_unmodified_pair(p))
return;
@@ -4672,7 +4672,7 @@ void diff_flush(struct diff_options *options)
DIFF_OPT_TST(options, EXIT_WITH_STATUS) &&
DIFF_OPT_TST(options, DIFF_FROM_CONTENTS)) {
/*
-* run diff_flush_patch for the exit status. setting
+* run diff_flush_patch_filepair for the exit status. setting
 * options->file to /dev/null should be safe, because we
 * aren't supposed to produce any output anyway.
 */
@@ -4685,7 +4685,7 @@ void diff_flush(struct diff_options *options)
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
if (check_pair_status(p))
-   diff_flush_patch(p, options);
+   diff_flush_patch_filepair(p, options);
if (options->found_changes)
break;
}
@@ -4705,7 +4705,7 @@ void diff_flush(struct diff_options *options)
for (i = 0; i < q->nr; i++) {
struct diff_filepair *p = q->queue[i];
if (check_pair_status(p))
-   diff_flush_patch(p, options);
+   diff_flush_patch_filepair(p, options);
}
}
 
-- 
2.7.4



[PATCH 06/10] diff.c: emit_line_0 can handle no color

2016-09-10 Thread Stefan Beller
From: Stefan Beller 

---
 diff.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index 87b1bb2..2aefd0f 100644
--- a/diff.c
+++ b/diff.c
@@ -473,11 +473,13 @@ static void emit_line_0(struct diff_options *o, const 
char *set, const char *res
}
 
if (len || !nofirst) {
-   fputs(set, file);
+   if (set)
+   fputs(set, file);
if (!nofirst)
fputc(first, file);
fwrite(line, len, 1, file);
-   fputs(reset, file);
+   if (reset)
+   fputs(reset, file);
}
if (has_trailing_carriage_return)
fputc('\r', file);
-- 
2.7.4



[PATCH 08/10] diff.c: convert emit_rewrite_diff to use emit_line_*

2016-09-10 Thread Stefan Beller
From: Stefan Beller 

Signed-off-by: Stefan Beller 
---
 diff.c | 33 ++---
 1 file changed, 18 insertions(+), 15 deletions(-)

diff --git a/diff.c b/diff.c
index 7dcef73..98d6655 100644
--- a/diff.c
+++ b/diff.c
@@ -653,17 +653,17 @@ static void remove_tempfile(void)
}
 }
 
-static void print_line_count(FILE *file, int count)
+static void add_line_count(struct strbuf *out, int count)
 {
switch (count) {
case 0:
-   fprintf(file, "0,0");
+   strbuf_addstr(out, "0,0");
break;
case 1:
-   fprintf(file, "1");
+   strbuf_addstr(out, "1");
break;
default:
-   fprintf(file, "1,%d", count);
+   strbuf_addf(out, "1,%d", count);
break;
}
 }
@@ -714,7 +714,7 @@ static void emit_rewrite_diff(const char *name_a,
char *data_one, *data_two;
size_t size_one, size_two;
struct emit_callback ecbdata;
-   const char *line_prefix = diff_line_prefix(o);
+   struct strbuf out = STRBUF_INIT;
 
if (diff_mnemonic_prefix && DIFF_OPT_TST(o, REVERSE_DIFF)) {
a_prefix = o->b_prefix;
@@ -752,20 +752,23 @@ static void emit_rewrite_diff(const char *name_a,
ecbdata.lno_in_preimage = 1;
ecbdata.lno_in_postimage = 1;
 
+   emit_line_fmt(o, metainfo, reset, "--- %s%s\n", a_name.buf, name_a_tab);
+   emit_line_fmt(o, metainfo, reset, "+++ %s%s\n", b_name.buf, name_b_tab);
+
lc_a = count_lines(data_one, size_one);
lc_b = count_lines(data_two, size_two);
-   fprintf(o->file,
-   "%s%s--- %s%s%s\n%s%s+++ %s%s%s\n%s%s@@ -",
-   line_prefix, metainfo, a_name.buf, name_a_tab, reset,
-   line_prefix, metainfo, b_name.buf, name_b_tab, reset,
-   line_prefix, fraginfo);
+
+   strbuf_addstr(, "@@ -");
if (!o->irreversible_delete)
-   print_line_count(o->file, lc_a);
+   add_line_count(, lc_a);
else
-   fprintf(o->file, "?,?");
-   fprintf(o->file, " +");
-   print_line_count(o->file, lc_b);
-   fprintf(o->file, " @@%s\n", reset);
+   strbuf_addstr(, "?,?");
+   strbuf_addstr(, " +");
+   add_line_count(, lc_b);
+   strbuf_addstr(, " @@\n");
+   emit_line(o, fraginfo, reset, out.buf, out.len);
+   strbuf_release();
+
if (lc_a && !o->irreversible_delete)
emit_rewrite_lines(, '-', data_one, size_one);
if (lc_b)
-- 
2.7.4



[PATCH 01/10] diff: move line ending check into emit_hunk_header

2016-09-10 Thread Stefan Beller
From: Stefan Beller 

Signed-off-by: Stefan Beller 
---
 diff.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index cc8e812..aa50b2d 100644
--- a/diff.c
+++ b/diff.c
@@ -610,6 +610,9 @@ static void emit_hunk_header(struct emit_callback *ecbdata,
}
 
strbuf_add(, line + len, org_len - len);
+   if (line[org_len - 1] != '\n')
+   strbuf_addch(, '\n');
+
emit_line(ecbdata->opt, "", "", msgbuf.buf, msgbuf.len);
strbuf_release();
 }
@@ -1247,8 +1250,6 @@ static void fn_out_consume(void *priv, char *line, 
unsigned long len)
len = sane_truncate_line(ecbdata, line, len);
find_lno(line, ecbdata);
emit_hunk_header(ecbdata, line, len);
-   if (line[len-1] != '\n')
-   putc('\n', o->file);
return;
}
 
-- 
2.7.4



[PATCH 03/10] diff.c: drop tautologous condition in emit_line_0

2016-09-10 Thread Stefan Beller
From: Stefan Beller 

When `first` equals '\r', then it cannot equal '\n', which makes
`!has_trailing_newline` always true if `first` is '\r'.
Drop that check.

Signed-off-by: Stefan Beller 
---
 diff.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/diff.c b/diff.c
index 156c2aa..9d2e704 100644
--- a/diff.c
+++ b/diff.c
@@ -460,8 +460,7 @@ static void emit_line_0(struct diff_options *o, const char 
*set, const char *res
 
if (len == 0) {
has_trailing_newline = (first == '\n');
-   has_trailing_carriage_return = (!has_trailing_newline &&
-   (first == '\r'));
+   has_trailing_carriage_return = (first == '\r');
nofirst = has_trailing_newline || has_trailing_carriage_return;
} else {
has_trailing_newline = (len > 0 && line[len-1] == '\n');
-- 
2.7.4



[PATCH 00/10] Another preparatory series for rename detection

2016-09-10 Thread Stefan Beller
From: Stefan Beller 

This applies on top of sb/diff-cleanup.

The long term strategy is to get the rename detection done, is to run
any output through emit_line as there we can either write it to the
output file or buffer it internally across file pairs and work on the
buffered output.

As I got stuck with the word diff as well as the correct implementation
of builtin_diff, I am sending this out as an intermediate state.
As the diff code is central to Git, I'd rather send these cleanups
early so I do not run into merge conflicts with other people down
the road.

Thanks,
Stefan

Stefan Beller (10):
  diff: move line ending check into emit_hunk_header
  diff: emit_{add, del, context}_line to increase {pre,post}image line
count
  diff.c: drop tautologous condition in emit_line_0
  diff.c: rename diff_flush_patch to diff_flush_patch_filepair
  diff.c: reintroduce diff_flush_patch for all files
  diff.c: emit_line_0 can handle no color
  diff.c: convert fn_out_consume to use emit_line_*
  diff.c: convert emit_rewrite_diff to use emit_line_*
  diff.c: convert emit_rewrite_lines to use emit_line_0
  submodule.c: convert show_submodule_summary to use emit_line_fmt

 diff.c  | 114 
 diff.h  |   3 ++
 submodule.c |  42 +-
 submodule.h |   3 +-
 4 files changed, 89 insertions(+), 73 deletions(-)

-- 
2.7.4



R

2016-09-10 Thread Виктория Кузнецова


Отправлено с iPhone

Re: [PATCH v7 10/10] convert: add filter..process option

2016-09-10 Thread Torsten Bögershausen
[]

One general question up here, more comments inline.
The current order for a clean-filter is like this, I removed the error handling:

int convert_to_git()
{
ret |= apply_filter(path, src, len, -1, dst, filter);
if (ret && dst) {
src = dst->buf;
len = dst->len;
}
ret |= crlf_to_git(path, src, len, dst, ca.crlf_action, checksafe);
return ret | ident_to_git(path, src, len, dst, ca.ident);
}

The first step is the clean filter, the CRLF-LF conversion (if needed),
then ident.
The current implementation streams the whole file content to the filter,
(STDIN of the filter) and reads back STDOUT from the filter into a STRBUF.
This is to use the UNIX-like STDIN--STDOUT method for writing a filter.

However, int would_convert_to_git_filter_fd() and convert_to_git_filter_fd()
offer a sort of short-cut:
The filter reads from the file directly, and the output of the filter is
read into a STRBUF.

It looks as if the multi-filter approach can use this in a similar way:
Give the pathname to the filter, the filter opens the file for reading
and stream the result via the pkt-line protocol into Git.
This needs some more changes, and may be very well go into a separate patch
series. (and should).

What I am asking for:
When a multi-filter is used, the content is handled to the filter via pkt-line,
and the result is given to Git via pkt-line ?
Nothing wrong with it, I just wonder, if it should be mentioned somewhere.

> diff --git a/contrib/long-running-filter/example.pl 
> b/contrib/long-running-filter/example.pl
> new file mode 100755
> index 000..279fbfb
> --- /dev/null
> +++ b/contrib/long-running-filter/example.pl
> @@ -0,0 +1,111 @@
> +#!/usr/bin/perl
> +#
> +# Example implementation for the Git filter protocol version 2
> +# See Documentation/gitattributes.txt, section "Filter Protocol"
> +#
> +
> +use strict;
> +use warnings;
> +
> +my $MAX_PACKET_CONTENT_SIZE = 65516;
> +
> +sub packet_read {
> +my $buffer;
> +my $bytes_read = read STDIN, $buffer, 4;
> +if ( $bytes_read == 0 ) {
> +
> +# EOF - Git stopped talking to us!
> +exit();
> +}
> +elsif ( $bytes_read != 4 ) {
> +die "invalid packet size '$bytes_read' field";
> +}

This is half-kosher, I would say,
(And I really. really would like to see an implementation in C ;-)

A read function may look like this:

   ret = read(0, , 4);
   if (ret < 0) {
 /* Error, nothing we can do */
 exit(1);
   } else if (ret == 0) {
 /* EOF  */
 exit(0);
   } else if (ret < 4) {
 /* 
  * Go and read more, until we have 4 bytes or EOF or Error */
   } else {
 /* Good case, see below */
   }

> +my $pkt_size = hex($buffer);
> +if ( $pkt_size == 0 ) {
> +return ( 1, "" );
> +}
> +elsif ( $pkt_size > 4 ) {
> +my $content_size = $pkt_size - 4;
> +$bytes_read = read STDIN, $buffer, $content_size;
> +if ( $bytes_read != $content_size ) {
> +die "invalid packet ($content_size expected; $bytes_read read)";
> +}
> +return ( 0, $buffer );
> +}
> +else {
> +die "invalid packet size";
> +}
> +}
> +
> +sub packet_write {
> +my ($packet) = @_;
> +print STDOUT sprintf( "%04x", length($packet) + 4 );
> +print STDOUT $packet;
> +STDOUT->flush();
> +}
> +
> +sub packet_flush {
> +print STDOUT sprintf( "%04x", 0 );
> +STDOUT->flush();
> +}
> +
> +( packet_read() eq ( 0, "git-filter-client" ) ) || die "bad initialization";
> +( packet_read() eq ( 0, "version=2" ) ) || die "bad version";
> +( packet_read() eq ( 1, "" ) )  || die "bad version end";
> +
> +packet_write("git-filter-server\n");
> +packet_write("version=2\n");
> +
> +( packet_read() eq ( 0, "clean=true" ) )  || die "bad capability";
> +( packet_read() eq ( 0, "smudge=true" ) ) || die "bad capability";
> +( packet_read() eq ( 1, "" ) )|| die "bad capability end";
> +
> +packet_write( "clean=true\n" );
> +packet_write( "smudge=true\n" );
> +packet_flush();
> +
> +while (1) {
> +my ($command) = packet_read() =~ /^command=([^=]+)\n$/;
> +my ($pathname) = packet_read() =~ /^pathname=([^=]+)\n$/;
> +
> +packet_read();
> +
> +my $input = "";
> +{
> +binmode(STDIN);
> +my $buffer;
> +my $done = 0;
> +while ( !$done ) {
> +( $done, $buffer ) = packet_read();
> +$input .= $buffer;
> +}
> +}
> +
> +my $output;
> +if ( $command eq "clean" ) {
> +### Perform clean here ###
> +$output = $input;
> +}
> +elsif ( $command eq "smudge" ) {
> +### Perform smudge here ###
> +$output = $input;
> +}
> +else {
> +die "bad command '$command'";
> +}
> +
> +packet_write("status=success\n");
> +packet_flush();
> +while ( length($output) > 0 ) {
> +my $packet = substr( $output, 0, 

Re: How to simulate a real checkout to test a new smudge filter?

2016-09-10 Thread john smith
On 9/10/16, Jakub Narębski  wrote:
> You would need post-checkout hook together with clean / smudge filters
> (though you could get by without smudge filter, at least in theory...).
> The `post-checkout` hook could run e.g. "git checkout -- '*.conf'"
> to force use of smudge filter, after checking that it was a change
> of branch ("git checkout ").  See githooks(5) for details
> (last, third parameter passed to hook script is 1, rather than 0).

Neat idea.  I could still have an advantage of filter as git status
wouldn't show that files are changed and there would be no merge
conflicts and additionally filters would run after every checkout even
if files stayed the same.  But are sure that `git checkout -- '
actually forces checkout and runs smudge filter?  It doesn't work for
me neither with 2.9.0 nor 2.10.0.85.gcda1bbd.  For a minimal example,
let's say that I have this in .gitattributes:

a filter=test-filter

And this in .git/config:

smudge = touch /tmp/SMUDGE_RUN && cat

And also have a tracked file named `a' in the repository. Now this
should create /tmp/SMUDGE_RUN:

git checkout  -- a

but it doesn't.

This also doesn't work:

git checkout -f -- a

But this does:

git checkout HEAD^ && git co -

> Unfortunately I don't see a way to query .gitattributes files to
> find out all patterns that match specific attribute; you would need
> to duplicate configuration:
>
>   .gitattributes:
>   *.conf filter=transform
>
>   .git/config
>   [filter "transform"]
>   clean  = replace-with-placeholder %f
>   smudge = expand-with-branchname %f
>
>   .git/hooks/post-checkout
>   #!/bin/sh
>
>   test "$3" -eq "1" && git checkout -- '*.conf'

It's a secondary issue, but - in my repository I only have one .conf
file on every branch with real definitions of machine-specific
variables and a number of various config files for various programs
such as bash/.bashrc or screen/.screenrc so in my case it would be:

  .gitattributes:
  bash/.basrc filter=transform
  git/.gitconfig filter=transform
  (...)

  .git/config
  [filter "transform"]
clean  = replace-with-placeholder %f
smudge = expand-with-branchname %f

  .git/hooks/post-checkout
  #!/bin/sh

  test "$3" -eq "1" && (
   git checkout -- bash/.bashrc &&
   git checkout -- git/.gitconfig
   (...)
   )

Of course a list of files in .git/hooks/post-checkout could be
generated at runtime from first column of .gitattributes.

-- 



[PATCH] t/perf/run: Don't forget to copy config.mak.autogen & friends to build area

2016-09-10 Thread Kirill Smelkov
Otherwise for people who use autotools-based configure in main worktree,
the performance testing results will be inconsistent as work and build
trees could be using e.g. different optimization levels.

See e.g.


http://public-inbox.org/git/20160818175222.bmm3ivjheokf2...@sigill.intra.peff.net/

for example.

NOTE config.status has to be copied because otherwise without it the build
would want to run reconfigure this way loosing just copied config.mak.autogen.

Signed-off-by: Kirill Smelkov 
---
 ( Resending as separate patch-mail, just in case )

 t/perf/run | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/perf/run b/t/perf/run
index cfd7012..aa383c2 100755
--- a/t/perf/run
+++ b/t/perf/run
@@ -30,7 +30,7 @@ unpack_git_rev () {
 }
 build_git_rev () {
rev=$1
-   cp ../../config.mak build/$rev/config.mak
+   cp -t build/$rev ../../{config.mak,config.mak.autogen,config.status}
(cd build/$rev && make $GIT_PERF_MAKE_OPTS) ||
die "failed to build revision '$mydir'"
 }
-- 
2.9.2.701.gf965a18.dirty


[PATCH 2/2 v8] pack-objects: use reachability bitmap index when generating non-stdout pack

2016-09-10 Thread Kirill Smelkov
Starting from 6b8fda2d (pack-objects: use bitmaps when packing objects)
if a repository has bitmap index, pack-objects can nicely speedup
"Counting objects" graph traversal phase. That however was done only for
case when resultant pack is sent to stdout, not written into a file.

The reason here is for on-disk repack by default we want:

- to produce good pack (with bitmap index not-yet-packed objects are
  emitted to pack in suboptimal order).

- to use more robust pack-generation codepath (avoiding possible
  bugs in bitmap code and possible bitmap index corruption).

Jeff King further explains:

The reason for this split is that pack-objects tries to determine how
"careful" it should be based on whether we are packing to disk or to
stdout. Packing to disk implies "git repack", and that we will likely
delete the old packs after finishing. We want to be more careful (so
as not to carry forward a corruption, and to generate a more optimal
pack), and we presumably run less frequently and can afford extra CPU.
Whereas packing to stdout implies serving a remote via "git fetch" or
"git push". This happens more frequently (e.g., a server handling many
fetching clients), and we assume the receiving end takes more
responsibility for verifying the data.

But this isn't always the case. One might want to generate on-disk
packfiles for a specialized object transfer. Just using "--stdout" and
writing to a file is not optimal, as it will not generate the matching
pack index.

So it would be useful to have some way of overriding this heuristic:
to tell pack-objects that even though it should generate on-disk
files, it is still OK to use the reachability bitmaps to do the
traversal.

So we can teach pack-objects to use bitmap index for initial object
counting phase when generating resultant pack file too:

- if we take care to not let it be activated under git-repack:

  See above about repack robustness and not forward-carrying corruption.

- if we know bitmap index generation is not enabled for resultant pack:

  The current code has singleton bitmap_git, so it cannot work
  simultaneously with two bitmap indices.

  We also want to avoid (at least with current implementation)
  generating bitmaps off of bitmaps. The reason here is: when generating
  a pack, not-yet-packed objects will be emitted into pack in
  suboptimal order and added to tail of the bitmap as "extended entries".
  When the resultant pack + some new objects in associated repository
  are in turn used to generate another pack with bitmap, the situation
  repeats: new objects are again not emitted optimally and just added to
  bitmap tail - not in recency order.

  So the pack badness can grow over time when at each step we have
  bitmapped pack + some other objects. That's why we want to avoid
  generating bitmaps off of bitmaps, not to let pack badness grow.

- if we keep pack reuse enabled still only for "send-to-stdout" case:

  Because pack-to-file needs to generate index for destination pack, and
  currently on pack reuse raw entries are directly written out to the
  destination pack by write_reused_pack(), bypassing needed for pack index
  generation bookkeeping done by regular codepath in write_one() and
  friends.

  ( In the future we might teach pack-reuse code about cases when index
also needs to be generated for resultant pack and remove
pack-reuse-only-for-stdout limitation )

This way for pack-objects -> file we get nice speedup:

erp5.git[1] (~230MB) extracted from ~ 5GB lab.nexedi.com backup
repository managed by git-backup[2] via

time echo 0186ac99 | git pack-objects --revs erp5pack

before:  37.2s
after:   26.2s

And for `git repack -adb` packed git.git

time echo 5c589a73 | git pack-objects --revs gitpack

before:   7.1s
after:3.6s

i.e. it can be 30% - 50% speedup for pack extraction.

git-backup extracts many packs on repositories restoration. That was my
initial motivation for the patch.

[1] https://lab.nexedi.com/nexedi/erp5
[2] https://lab.nexedi.com/kirr/git-backup

NOTE

Jeff also suggests that pack.useBitmaps was probably a mistake to
introduce originally. This way we are not adding another config point,
but instead just always default to-file pack-objects not to use bitmap
index: Tools which need to generate on-disk packs with using bitmap, can
pass --use-bitmap-index explicitly. And git-repack does never pass
--use-bitmap-index, so this way we can be sure regular on-disk repacking
remains robust.

NOTE2

`git pack-objects --stdout >file.pack` + `git index-pack file.pack` is much 
slower
than `git pack-objects file.pack`. Extracting erp5.git pack from
lab.nexedi.com backup repository:

$ time echo 0186ac99 | git pack-objects --stdout --revs 
>erp5pack-stdout.pack

real0m22.309s
user0m21.148s
sys 0m0.932s

$ time git index-pack erp5pack-stdout.pack

real0m50.873s   <-- more than 2 

[PATCH 1/2 v8] pack-objects: respect --local/--honor-pack-keep/--incremental when bitmap is in use

2016-09-10 Thread Kirill Smelkov
Since 6b8fda2d (pack-objects: use bitmaps when packing objects) there
are two codepaths in pack-objects: with & without using bitmap
reachability index.

However add_object_entry_from_bitmap(), despite its non-bitmapped
counterpart add_object_entry(), in no way does check for whether --local
or --honor-pack-keep or --incremental should be respected. In
non-bitmapped codepath this is handled in want_object_in_pack(), but
bitmapped codepath has simply no such checking at all.

The bitmapped codepath however was allowing to pass in all those options
and with bitmap indices still being used under such conditions -
potentially giving wrong output (e.g. including objects from non-local or
.keep'ed pack).

We can easily fix this by noting the following: when an object comes to
add_object_entry_from_bitmap() it can come for two reasons:

1. entries coming from main pack covered by bitmap index, and
2. object coming from, possibly alternate, loose or other packs.

"2" can be already handled by want_object_in_pack() and to cover
"1" we can teach want_object_in_pack() to expect that *found_pack can be
non-NULL, meaning calling client already found object's pack entry.

In want_object_in_pack() we care to start the checks from already found
pack, if we have one, this way determining the answer right away
in case neither --local nor --honour-pack-keep are active. In
particular, as p5310-pack-bitmaps.sh shows (3 consecutive runs), we do
not do harm to served-with-bitmap clones performance-wise:

Test  56dfeb62  this tree
-
5310.2: repack to disk9.08(8.20+0.25)   9.09(8.14+0.32) +0.1%
5310.3: simulated clone   1.92(2.12+0.08)   1.93(2.12+0.09) +0.5%
5310.4: simulated fetch   0.82(1.07+0.04)   0.82(1.06+0.04) +0.0%
5310.6: partial bitmap1.96(2.42+0.13)   1.95(2.40+0.15) -0.5%

Test  56dfeb62  this tree
-
5310.2: repack to disk9.11(8.16+0.32)   9.11(8.19+0.28) +0.0%
5310.3: simulated clone   1.93(2.14+0.07)   1.92(2.11+0.10) -0.5%
5310.4: simulated fetch   0.82(1.06+0.04)   0.82(1.04+0.05) +0.0%
5310.6: partial bitmap1.95(2.38+0.16)   1.94(2.39+0.14) -0.5%

Test  56dfeb62  this tree
-
5310.2: repack to disk9.13(8.17+0.31)   9.07(8.13+0.28) -0.7%
5310.3: simulated clone   1.92(2.13+0.07)   1.91(2.12+0.06) -0.5%
5310.4: simulated fetch   0.82(1.08+0.03)   0.82(1.08+0.03) +0.0%
5310.6: partial bitmap1.96(2.43+0.14)   1.96(2.42+0.14) +0.0%

with delta timings showing they are all within noise from run to run.

In the general case we do not want to call find_pack_entry_one() more than
once, because it is expensive. This patch splits the loop in
want_object_in_pack() into two parts: finding the object and seeing if it
impacts our choice to include it in the pack. We may call the inexpensive
want_found_object() twice, but we will never call find_pack_entry_one() if we
do not need to.

I appreciate help and discussing this change with Junio C Hamano and
Jeff King.

Signed-off-by: Kirill Smelkov 
Signed-off-by: Junio C Hamano 
---
 builtin/pack-objects.c  | 97 -
 t/t5310-pack-bitmaps.sh | 92 ++
 2 files changed, 156 insertions(+), 33 deletions(-)

diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
index c4c2a3c..19668d3 100644
--- a/builtin/pack-objects.c
+++ b/builtin/pack-objects.c
@@ -944,13 +944,48 @@ static int have_duplicate_entry(const unsigned char *sha1,
return 1;
 }
 
+static int want_found_object(int exclude, struct packed_git *p)
+{
+   if (exclude)
+   return 1;
+   if (incremental)
+   return 0;
+
+   /*
+* When asked to do --local (do not include an object that appears in a
+* pack we borrow from elsewhere) or --honor-pack-keep (do not include
+* an object that appears in a pack marked with .keep), finding a pack
+* that matches the criteria is sufficient for us to decide to omit it.
+* However, even if this pack does not satisfy the criteria, we need to
+* make sure no copy of this object appears in _any_ pack that makes us
+* to omit the object, so we need to check all the packs.
+*
+* We can however first check whether these options can possible matter;
+* if they do not matter we know we want the object in generated pack.
+* Otherwise, we signal "-1" at the end to tell the caller that we do
+* not know either way, and it needs to check more packs.
+*/
+   if (!ignore_packed_keep &&
+   (!local || !have_non_local_packs))
+   return 1;
+

Re: [PATCH 2/2 v7] pack-objects: use reachability bitmap index when generating non-stdout pack

2016-09-10 Thread Kirill Smelkov
On Thu, Aug 18, 2016 at 02:06:15PM -0400, Jeff King wrote:
> On Tue, Aug 09, 2016 at 10:32:17PM +0300, Kirill Smelkov wrote:
> 
> > Subject: Re: [PATCH 2/2 v7] pack-objects: use reachability bitmap index when
> >generating non-stdout pack
> 
> This is v7, but as I understand your numbering, it goes with v5 of patch
> 1/2 that I just reviewed (usually we just increment the version number
> on the whole series and treat it as a unit, even if some patches didn't
> change from version to version).

The reason those patches are having their own numbers is that they are
orthogonal to each other and can be applied / rejected independently.
Since I though Junio might want to pick them up as separate topics they
were versioned separately.

But ok, since now we have them considered both together, their next
versions posted will be uniform v8.


> > So we can teach pack-objects to use bitmap index for initial object
> > counting phase when generating resultant pack file too:
> > 
> > - if we care it is not activated under git-repack:
> 
> Do you mean "if we take care that it is not..." here?
> 
> (I think you might just be getting tripped up in the English idioms;
> "care" means that we have a preference; "to take care" means that we are
> being careful).

Ok, I've might have been tripped and thanks for the catch up. I've changed to

"if we take care to not let it be activated under git-repack"

> 
> > - if we know bitmap index generation is not enabled for resultant pack:
> > 
> >   Current code has singleton bitmap_git so cannot work simultaneously
> >   with two bitmap indices.
> 
> Minor English fixes:
> 
>   The current code has a singleton bitmap_git, so it cannot work
>   simultaneously with two bitmap indices.

ok.

> > - if we keep pack reuse enabled still only for "send-to-stdout" case:
> > 
> >   Because on pack reuse raw entries are directly written out to destination
> >   pack by write_reused_pack() bypassing needed for pack index generation
> >   bookkeeping done by regular codepath in write_one() and friends.
> 
> Ditto on English:
> 
>   On pack reuse raw entries are directly written out to the destination
>   pack by write_reused_pack(), bypassing the need for pack index
>   generation bookkeeping done by the regular code path in write_one()
>   and friends.
> 
> I think this is missing the implication. Why wouldn't we want to reuse
> in this case? Certainly we don't when doing a "careful" on-disk repack.
> I suspect the answer is that we cannot write a ".idx" off of the result
> of write_reused_pack(), and write-to-disk always includes the .idx.

Yes, mentioning pack-to-file needs to generate .idx makes it more clear
and thanks for pointing this out. I've changed this item to the
following (picking some of your English corrections):

- if we keep pack reuse enabled still only for "send-to-stdout" case:

  Because pack-to-file needs to generate index for destination pack, and
  currently on pack reuse raw entries are directly written out to the
  destination pack by write_reused_pack(), bypassing needed for pack index
  generation bookkeeping done by regular codepath in write_one() and
  friends.

  ( In the future we might teach pack-reuse code about cases when index
also needs to be generated for resultant pack and remove
pack-reuse-only-for-stdout limitation )

Hope it is ok.

> > More context:
> > 
> > http://marc.info/?t=14679210141=1=2
> 
> Can we turn this into a link to public-inbox? We have just been bit by
> all of our old links to gmane dying, and they cannot easily be replaced
> because they use a gmane-specific article number. public-inbox URLs use
> message-ids, which should be usable for other archives if public-inbox
> goes away.

Yes, makes sense to put msgid here. I've added

http://public-inbox.org/git/20160707190917.20011-1-k...@nexedi.com/T/#t


> > diff --git a/builtin/pack-objects.c b/builtin/pack-objects.c
> > index b1007f2..c92d7fc 100644
> > --- a/builtin/pack-objects.c
> > +++ b/builtin/pack-objects.c
> 
> The code here looks fine.

Thanks.

> > diff --git a/t/t5310-pack-bitmaps.sh b/t/t5310-pack-bitmaps.sh
> > index a278d30..9602e9a 100755
> > --- a/t/t5310-pack-bitmaps.sh
> > +++ b/t/t5310-pack-bitmaps.sh
> > @@ -196,6 +196,18 @@ test_expect_success 'pack-objects respects --local 
> > (non-local bitmapped pack)' '
> > ! has_any packbitmap.objects 3b.objects
> >  '
> >  
> > +test_expect_success 'pack-objects to file can use bitmap' '
> > +   # make sure we still have 1 bitmap index from previous tests
> > +   ls .git/objects/pack/ | grep bitmap >output &&
> > +   test_line_count = 1 output &&
> > +   # verify equivalent packs are generated with/without using bitmap index
> > +   packasha1=$(git pack-objects --no-use-bitmap-index --all packa 
> >  > +   packbsha1=$(git pack-objects --use-bitmap-index --all packb  > &&
> > +   list_packed_objects packa.objects &&
> > +   

Re: [PATCH 1/2 v5] pack-objects: respect --local/--honor-pack-keep/--incremental when bitmap is in use

2016-09-10 Thread Kirill Smelkov
On Thu, Aug 18, 2016 at 01:52:22PM -0400, Jeff King wrote:
> On Tue, Aug 09, 2016 at 10:31:43PM +0300, Kirill Smelkov wrote:
> 
> > Since 6b8fda2d (pack-objects: use bitmaps when packing objects) there
> > are two codepaths in pack-objects: with & without using bitmap
> > reachability index.
> 
> Sorry, I got distracted from reviewing these patches. I'll give them a
> detailed look now and hopefully we can finalize the topic.

Jeff, thanks for feedback. On my side I'm sorry for the delay because I
was travelling and only recently got back to work.

> > In want_object_in_pack() we care to start the checks from already found
> > pack, if we have one, this way determining the answer right away
> > in case neither --local nor --honour-pack-keep are active. In
> > particular, as p5310-pack-bitmaps.sh shows, we do not do harm to
> > served-with-bitmap clones performance-wise:
> > 
> > Test  56dfeb62  this tree
> > -
> > 5310.2: repack to disk9.63(8.67+0.33)   9.47(8.55+0.28) -1.7%
> > 5310.3: simulated clone   2.07(2.17+0.12)   2.03(2.14+0.12) -1.9%
> > 5310.4: simulated fetch   0.78(1.03+0.02)   0.76(1.00+0.03) -2.6%
> > 5310.6: partial bitmap1.97(2.43+0.15)   1.92(2.36+0.14) -2.5%
> > 
> > with all differences strangely showing we are a bit faster now, but
> > probably all being within noise.
> 
> Good to know there is no regression. It is curious that there is a
> slight _improvement_ across the board. Do we have an explanation for
> that? It seems odd that noise would be so consistent.

Yes, I too thought it and it turned out to be t/perf/run does not copy
config.mak.autogen & friends to build/ and I'm using autoconf with
CFLAGS="-march=native -O3 ..."

Junio, I could not resist to the following:

 8< 
From: Kirill Smelkov 
Subject: [PATCH] t/perf/run: Don't forget to copy config.mak.autogen & friends
 to build area

Otherwise for people who use autotools-based configure in main worktree,
the performance testing results will be inconsistent as work and build
trees could be using e.g. different optimization levels.

See e.g.


http://public-inbox.org/git/20160818175222.bmm3ivjheokf2...@sigill.intra.peff.net/

for example.

NOTE config.status has to be copied because otherwise without it the build
would want to run reconfigure this way loosing just copied config.mak.autogen.

Signed-off-by: Kirill Smelkov 
---
 t/perf/run | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/perf/run b/t/perf/run
index cfd7012..aa383c2 100755
--- a/t/perf/run
+++ b/t/perf/run
@@ -30,7 +30,7 @@ unpack_git_rev () {
 }
 build_git_rev () {
rev=$1
-   cp ../../config.mak build/$rev/config.mak
+   cp -t build/$rev ../../{config.mak,config.mak.autogen,config.status}
(cd build/$rev && make $GIT_PERF_MAKE_OPTS) ||
die "failed to build revision '$mydir'"
 }
-- 
2.9.2.701.gf965a18.dirty
 8< 

With corrected t/perf/run the timings are more realistic - e.g. 3
consecutive runs of `./run 56dfeb62 . ./p5310-pack-bitmaps.sh`:

Test  56dfeb62  this tree
-
5310.2: repack to disk9.08(8.20+0.25)   9.09(8.14+0.32) +0.1%
5310.3: simulated clone   1.92(2.12+0.08)   1.93(2.12+0.09) +0.5%
5310.4: simulated fetch   0.82(1.07+0.04)   0.82(1.06+0.04) +0.0%
5310.6: partial bitmap1.96(2.42+0.13)   1.95(2.40+0.15) -0.5%

Test  56dfeb62  this tree
-
5310.2: repack to disk9.11(8.16+0.32)   9.11(8.19+0.28) +0.0%
5310.3: simulated clone   1.93(2.14+0.07)   1.92(2.11+0.10) -0.5%
5310.4: simulated fetch   0.82(1.06+0.04)   0.82(1.04+0.05) +0.0%
5310.6: partial bitmap1.95(2.38+0.16)   1.94(2.39+0.14) -0.5%

Test  56dfeb62  this tree
-
5310.2: repack to disk9.13(8.17+0.31)   9.07(8.13+0.28) -0.7%
5310.3: simulated clone   1.92(2.13+0.07)   1.91(2.12+0.06) -0.5%
5310.4: simulated fetch   0.82(1.08+0.03)   0.82(1.08+0.03) +0.0%
5310.6: partial bitmap1.96(2.43+0.14)   1.96(2.42+0.14) +0.0%



> > And in the general case we care not to have duplicate
> > find_pack_entry_one(*found_pack) calls. Worst what can happen is we can
> > call want_found_object(*found_pack) -- newly introduced helper for
> > checking whether we want object -- twice, but since want_found_object()
> > is very lightweight it does not make any difference.
> 
> I had trouble parsing this. I think maybe:
> 
>   In the general case we do not want to call find_pack_entry_one() more
>   than once, because it is expensive. This patch splits the loop in
>   want_object_in_pack() into two parts: finding the object and seeing if
>   it impacts our choice to include it in the pack. We may 

Re: How to simulate a real checkout to test a new smudge filter?

2016-09-10 Thread Jakub Narębski
W dniu 10.09.2016 o 01:07, john smith pisze:
> On 9/10/16, Junio C Hamano  wrote:
>> The clean and smudge operations should look _only_ at the contents
>> they are filtering, and nothing else, and the clean/smudge filtering
>> mechanism is designed to support that use case.  It is not designed
>> to do things like embedding the name of the branch that is being
>> checked out into the result.
> 
> Ok, I think I get it. It was actually my original problem with smudge
> filters because I wanted them to be run after *every* checkout even if
> file contents stayed the same (hence the subject).

And that's not the case, for performance reasons.

> 
> Now Jakub suggested post-checkout hook in conjunction with only clean
> filter for my problem of managing dotfiles but I think I don't fully
> get it.  The problem is that in the scenario presented in my last
> e-mail clean filter is run in the situation which doesn't like a
> checkin to me.  Is `git checkout ' doing a *checkin*" under the
> hood so that the clean filter is called?  What does actually `checkin'
> mean in Git?  I thought that checkin it's the same as committing a
> file into the repository.

I was wrong, I'm sorry. My mistake.

You would need post-checkout hook together with clean / smudge filters
(though you could get by without smudge filter, at least in theory...).
The `post-checkout` hook could run e.g. "git checkout -- '*.conf'"
to force use of smudge filter, after checking that it was a change
of branch ("git checkout ").  See githooks(5) for details
(last, third parameter passed to hook script is 1, rather than 0).

Unfortunately I don't see a way to query .gitattributes files to
find out all patterns that match specific attribute; you would need
to duplicate configuration:

  .gitattributes:
  *.conf filter=transform

  .git/config
  [filter "transform"]
clean  = replace-with-placeholder %f
smudge = expand-with-branchname %f

  .git/hooks/post-checkout
  #!/bin/sh

  test "$3" -eq "1" && git checkout -- '*.conf'

Or something like that.
-- 
Jakub Narębski



Re: git commit -p with file arguments

2016-09-10 Thread Jakub Narębski
W dniu 09.09.2016 o 22:52, Christian Neukirchen napisał:
> Jakub Narębski  writes:
> 
>> Which means that with "git add -p  && git commit ",
>> the "git add -p " would carefully craft the  state
>> in the index... and "git commit " would take worktree version
>> of  for commit, ignoring what was in the index :-(
>>
>> Currently there is no way to create commit out of subset of the index,
>> e.g. with "git commit :0:"
> 
> I played around with creating a new index just for "add -p" and then
> committing that one.  Seems to have worked...
> 
> Perhaps I'll just wrap git-commit myself then.

What I wanted to say is that there is no built-in way to create commit
out of subset of the index.  You can of course do what "git commit "
does, that is use temporary index file (GIT_INDEX_FILE etc.; see
contrib/examples/git-commit.sh).

I wonder, if git-commit is to acquire such feature, what would be the
best interface.  "git commit :0:./"?  "git commit -o -p "
(that is, "git commit --only --patch ")?

-- 
Jakub Narębski



Re: [PATCH 04/13] i18n: blame: mark error messages for translation

2016-09-10 Thread Jean-Noël AVILA
On mercredi 7 septembre 2016 14:49:08 CEST Vasco Almeida wrote:
> Mark error messages for translation passed to die() function.
> Change "Cannot" to lowercase following the usual style.
> 
> Reflect changes to test by using test_i18ngrep.
> 
> Signed-off-by: Vasco Almeida 
> ---
>  builtin/blame.c   | 12 ++--
>  t/t8003-blame-corner-cases.sh |  4 ++--
>  2 files changed, 8 insertions(+), 8 deletions(-)
> 
> diff --git a/builtin/blame.c b/builtin/blame.c
> index a5bbf91..3fee197 100644
> --- a/builtin/blame.c
> +++ b/builtin/blame.c
> @@ -2601,7 +2601,7 @@ int cmd_blame(int argc, const char **argv, const char
> *prefix)
> 
>   if (incremental || (output_option & OUTPUT_PORCELAIN)) {
>   if (show_progress > 0)
> - die("--progress can't be used with --incremental or 
> porcelain 
formats");
> + die(_("--progress can't be used with --incremental or 
> porcelain
> formats")); show_progress = 0;
>   } else if (show_progress < 0)
>   show_progress = isatty(2);
> @@ -2727,7 +2727,7 @@ int cmd_blame(int argc, const char **argv, const char
> *prefix) sb.commits.compare = compare_commits_by_commit_date;
>   }
>   else if (contents_from)
> - die("--contents and --reverse do not blend well.");
> + die(_("--contents and --reverse do not blend well."));
>   else {
>   final_commit_name = prepare_initial();
>   sb.commits.compare = compare_commits_by_reverse_commit_date;
> @@ -2747,12 +2747,12 @@ int cmd_blame(int argc, const char **argv, const
> char *prefix) add_pending_object(, &(sb.final->object), ":");
>   }
>   else if (contents_from)
> - die("Cannot use --contents with final commit object name");
> + die(_("cannot use --contents with final commit object name"));
> 
>   if (reverse && revs.first_parent_only) {
>   final_commit = find_single_final(sb.revs, NULL);
>   if (!final_commit)
> - die("--reverse and --first-parent together require 
> specified latest
> commit"); +   die(_("--reverse and --first-parent together 
> require
> specified latest commit")); }
> 
>   /*
> @@ -2779,7 +2779,7 @@ int cmd_blame(int argc, const char **argv, const char
> *prefix) }
> 
>   if (oidcmp(>object.oid, >object.oid))
> - die("--reverse --first-parent together require range 
> along first-
parent
> chain"); +die(_("--reverse --first-parent together 
> require range along
> first-parent chain")); }
> 
>   if (is_null_oid(>object.oid)) {
> @@ -2820,7 +2820,7 @@ int cmd_blame(int argc, const char **argv, const char
> *prefix) , , sb.path))
>   usage(blame_usage);
>   if (lno < top || ((lno || bottom) && lno < bottom))
> - die("file %s has only %lu lines", path, lno);
> + die(_("file %s has only %lu lines"), path, lno);

Here a plural version is needed.

>   if (bottom < 1)
>   bottom = 1;
>   if (top < 1)
> diff --git a/t/t8003-blame-corner-cases.sh b/t/t8003-blame-corner-cases.sh
> index e48370d..661f9d4 100755
> --- a/t/t8003-blame-corner-cases.sh
> +++ b/t/t8003-blame-corner-cases.sh
> @@ -212,12 +212,12 @@ EOF
> 
>  test_expect_success 'blame -L with invalid start' '
>   test_must_fail git blame -L5 tres 2>errors &&
> - grep "has only 2 lines" errors
> + test_i18ngrep "has only 2 lines" errors
>  '
> 
>  test_expect_success 'blame -L with invalid end' '
>   test_must_fail git blame -L1,5 tres 2>errors &&
> - grep "has only 2 lines" errors
> + test_i18ngrep "has only 2 lines" errors
>  '
> 
>  test_expect_success 'blame parses  part of -L' '




Company Payment Agent Needed

2016-09-10 Thread Shougang Group Ltd



--
Shougang Group
45 Huagong Road Xinji City,
Hebei Province China.
webpage: www.shougang.com.cn


This is an official request for Professional/consultants who will stand as
our regional representative to run logistics on behalf of Shougang Group.
We are only looking for individual or company from USA and Canada. You
will be entitle to ten percent (10%) of every payment you receive from our
customers in your region on behalf of the company.

NOTE: You are not require to travel, all communications with our customers
will be through email or phone. All fees, including taxes will be handled
by us.

You can apply for this position by filling the information below and send
back to us;

Full Name:
Residencial or office address:
Town:
State:
Zipcode:
Country:
Current Job:
If you own a company, please state the company name:
Prefered Email:
Gender:
Age:
Telephone:

If you have a current job, you can still be part of our business, as
services for us will not bother with your hours of work.

Respectfully,
Mr. Zhu Jimin (Chief Executive Officer)
Shougang Group



Re: Missing RPM spec file in tarball

2016-09-10 Thread Johannes Schindelin
Hi Stefan,

On Fri, 9 Sep 2016, Stefan Beller wrote:

> On Fri, Sep 9, 2016 at 9:19 AM, Sergio Martín Turiel
>  wrote:
> 
> > Can you tell me what I'm doing wrong?
> 
> Not crying out loud when that commit was discussed on the
> mailing list. ;)

Umm, I think it would be more: "Not stepping up to maintain the RPM
specs"...

;-)

Ciao,
Dscho

Re: [PATCH v3 3/4] cat-file --textconv/--filters: allow specifying the path separately

2016-09-10 Thread Johannes Schindelin
Hi Junio,

On Fri, 9 Sep 2016, Junio C Hamano wrote:

> Johannes Schindelin  writes:
> 
> > +test_expect_success 'path= complains without 
> > --textconv/--filters' '
> 
> I wonder where this "path" came from; it wasn't in v2 I queued,
> but somehow came back mysteriously.

No idea how it crept back in. Sorry.

> Will locally amend.

Thanks!
Dscho


Re: [PATCH v3 2/4] cat-file: introduce the --filters option

2016-09-10 Thread Johannes Schindelin
Hi Junio,

On Fri, 9 Sep 2016, Junio C Hamano wrote:

> Junio C Hamano  writes:
> 
> > So I would not mind if we define the semantics of "--filters" as
> > such (as long as it is clearly documented, of course).  AFAICS, the
> > batch interface does not call filter_object() for non-blobs, and by
> > returning successfully without doing anything special for a symbolic
> > link from filter_object() automatically gives us the "by default
> > return as-is, but give processed output only for regular file blobs"
> > semantics to the batch mode.
> >
> > But for a non-batch mode, it feels somewhat funny to be giving the
> > as-is output without saying anything to symbolic links; we can argue
> > that it is being consistent with what we do in the batch mode,
> > though.
> 
> In other words, instead of trying to be consistent by erroring out
> in non-regular blob case, I think the attached change on top would
> make more sense, by consistently passing the object contents as-is
> for all "not filtered" cases, whether it is run from the batch mode
> or from the command line.
> 
>  builtin/cat-file.c | 7 +--
>  1 file changed, 1 insertion(+), 6 deletions(-)
> 
> diff --git a/builtin/cat-file.c b/builtin/cat-file.c
> index f8a3a08..99cb525 100644
> --- a/builtin/cat-file.c
> +++ b/builtin/cat-file.c
> @@ -33,12 +33,7 @@ static int filter_object(const char *path, unsigned mode,
>   if (!*buf)
>   return error(_("cannot read object %s '%s'"),
>   sha1_to_hex(sha1), path);
> - if (type != OBJ_BLOB) {
> - free(*buf);
> - return error(_("blob expected for %s '%s'"),
> - sha1_to_hex(sha1), path);
> - }
> - if (S_ISREG(mode)) {
> + if ((type == OBJ_BLOB) && S_ISREG(mode)) {
>   struct strbuf strbuf = STRBUF_INIT;
>   if (convert_to_working_tree(path, *buf, *size, )) {
>   free(*buf);

Yes, that makes most sense to me, too.

Ciao,
Dscho


Re: [PATCH] git-gui: respect commit.gpgsign again

2016-09-10 Thread Johannes Schindelin
Hi Junio,

On Fri, 9 Sep 2016, Junio C Hamano wrote:

> Johannes Schindelin  writes:
> 
> > As of v2.9.0, `git commit-tree` no longer heeds the `commit.gpgsign`
> > config setting. This broke committing in Git GUI.
> 
> Thanks.  Will shift it up to apply to my copy of git-gui project and
> then pull in the result.

Thanks. There are a couple more git-gui patches waiting for quite a long
time. So you prefer them still as patches to git-gui.git?

Also, I just noticed poor wording. Would you mind fixing it up by

s/committing/& with GPG signature/

?

Thanks,
Dscho


Re: [PATCH v7 10/10] convert: add filter..process option

2016-09-10 Thread Torsten Bögershausen
On Thu, Sep 08, 2016 at 08:21:32PM +0200, larsxschnei...@gmail.com wrote:
[]
> +packet:  git> git-filter-client
> +packet:  git> version=2
> +packet:  git> version=42
> +packet:  git> 
> +packet:  git< git-filter-server
> +packet:  git< version=2
> +packet:  git> clean=true
> +packet:  git> smudge=true
> +packet:  git> not-yet-invented=true
> +packet:  git> 
> +packet:  git< clean=true
> +packet:  git< smudge=true
> +packet:  git< 

It's probalby only me who has difficulties to distinguish
'>' from '<'.

packet:  git> git-filter-client
packet:  git> version=2
packet:  git> version=42
packet:  git> 
packet:   filter> git-filter-server
packet:   filter> version=2

(Otherwise the dialoge description is nice)

> +
> +Supported filter capabilities in version 2 are "clean" and
> +"smudge".
> +
> +Afterwards Git sends a list of "key=value" pairs terminated with
> +a flush packet. The list will contain at least the filter command
> +(based on the supported capabilities) and the pathname of the file
> +to filter relative to the repository root. Right after these packets
> +Git sends the content split in zero or more pkt-line packets and a
> +flush packet to terminate content.
> +
> +packet:  git> command=smudge\n
> +packet:  git> pathname=path/testfile.dat\n

How do we send pathnames the have '\n' ?
Not really recommended, but allowed.
And here I am a little bit lost, is each of the lines packed into
a pkt-line ?
command=smudge is packet as pkt-line and pathname= is packed into
another one ? (The we don't need the '\n' at all)
Or go both lines into one pkt-line (thats what I think), then
we don't need the '\n' afther the pathname.
And in this case the pathname is always the last element, and a '\n'
may occur in the pathname, since we know the length of the packet
we know how long the pathname must be.



> +packet:  git> 
> +packet:  git> CONTENT
> +packet:  git> 
> +
> +


> +The filter is expected to respond with a list of "key=value" pairs
> +terminated with a flush packet. If the filter does not experience
> +problems then the list must contain a "success" status. Right after
> +these packets the filter is expected to send the content in zero
> +or more pkt-line packets and a flush packet at the end. Finally, a
> +second list of "key=value" pairs terminated with a flush packet
> +is expected. The filter can change the status in the second list.
> +
> +packet:  git< status=success\n
> +packet:  git< 
> +packet:  git< SMUDGED_CONTENT
> +packet:  git< 
> +packet:  git<   # empty list!
> +
> +
> +If the result content is empty then the filter is expected to respond
> +with a success status and an empty list.
> +
> +packet:  git< status=success\n
> +packet:  git< 
> +packet:  git<   # empty content!
> +packet:  git<   # empty list!
> +
> +
> +In case the filter cannot or does not want to process the content,

Does not want ? 
I can see something like "I read through the file, there is nothing
to do. So Git, I don't send anything back, you know where the file is.

> +it is expected to respond with an "error" status. Depending on the
> +`filter..required` flag Git will interpret that as error
> +but it will not stop or restart the filter process.
> +
> +packet:  git< status=error\n
> +packet:  git< 
> +
> +
> +If the filter experiences an error during processing, then it can
> +send the status "error" after the content was (partially or
> +completely) sent. Depending on the `filter..required` flag
> +Git will interpret that as error but it will not stop or restart the
> +filter process.
> +
> +packet:  git< status=success\n
> +packet:  git< 
> +packet:  git< HALF_WRITTEN_ERRONEOUS_CONTENT
> +packet:  git< 
> +packet:  git< status=error\n
> +packet:  git< 
> +
> +
> +If the filter dies during the communication or does not adhere to
> +the protocol then Git will stop the filter process and restart it

My personal comment:
When a filter is mis-behaving, Git should say so loud and clear, and
die(). 
The filter process can be left running, so that it can be debugged.

Here I stopped the review for a moment, 
I still think that Git shouldn't kill anything, because we loose
the ability to debug these processes.



Re: [PATCH v4 1/3] tests: move test_lazy_prereq JGIT to test-lib.sh

2016-09-10 Thread Jeff King
On Sat, Sep 10, 2016 at 05:51:19AM +, Torsten Bögershausen wrote:

> > +test_lazy_prereq JGIT '
> > +   type jgit
> > +'
> > +
> 
> Minor note: 
> Typically the stdout of `which` is suppressed like this:
> 
> if ! type cvs >/dev/null 2>&1

But we don't want to suppress the output here; the test harness
redirects the lazy_prereq output to /dev/null unless you specify "-v".
And there is no need for "if", because what we care about is the exit
code of the commands inside the lazy_prereq.

-Peff