Re: [PATCH 3/7] fixup! compat/regex: update the gawk regex engine from upstream

2017-05-04 Thread Johannes Sixt

Am 05.05.2017 um 00:00 schrieb Ævar Arnfjörð Bjarmason:

---
 compat/regex/regcomp.c | 356 +
 1 file changed, 209 insertions(+), 147 deletions(-)

diff --git a/compat/regex/regcomp.c b/compat/regex/regcomp.c
index d8bde06f1a..a1fb2e400e 100644
--- a/compat/regex/regcomp.c
+++ b/compat/regex/regcomp.c
@@ -1,5 +1,12 @@
+/*
+ * This is git.git's copy of gawk.git's regex engine. Please see that
+ * project for the latest version & to submit patches to this code,
+ * and git.git's compat/regex/README for information on how git's copy
+ * of this code is maintained.
+ */
+
 /* Extended regular expression matching and search library.
-   Copyright (C) 2002-2007,2009,2010 Free Software Foundation, Inc.
+   Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa .



GNU C Library 2016? Is this GPL v3 code? that would be incompatible with 
GPL v2, I think.



@@ -14,9 +21,20 @@
Lesser General Public License for more details.

You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301 USA.  */
+   License along with the GNU C Library; if not, see
+   .  */
+
...


-- Hannes



[RFC PATCH 02/10] Move textconv_object to be with other textconv methods

2017-05-04 Thread Jeff Smith
Signed-off-by: Jeff Smith 
---
 builtin.h  |  2 --
 builtin/blame.c| 28 
 builtin/cat-file.c |  1 +
 diff.c | 23 +++
 diff.h |  7 +++
 5 files changed, 31 insertions(+), 30 deletions(-)

diff --git a/builtin.h b/builtin.h
index 9e4a898..498ac80 100644
--- a/builtin.h
+++ b/builtin.h
@@ -25,8 +25,6 @@ struct fmt_merge_msg_opts {
 extern int fmt_merge_msg(struct strbuf *in, struct strbuf *out,
 struct fmt_merge_msg_opts *);
 
-extern int textconv_object(const char *path, unsigned mode, const struct 
object_id *oid, int oid_valid, char **buf, unsigned long *buf_size);
-
 extern int is_builtin(const char *s);
 
 extern int cmd_add(int argc, const char **argv, const char *prefix);
diff --git a/builtin/blame.c b/builtin/blame.c
index 42c56eb..c419981 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -147,34 +147,6 @@ static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b,
 }
 
 /*
- * Prepare diff_filespec and convert it using diff textconv API
- * if the textconv driver exists.
- * Return 1 if the conversion succeeds, 0 otherwise.
- */
-int textconv_object(const char *path,
-   unsigned mode,
-   const struct object_id *oid,
-   int oid_valid,
-   char **buf,
-   unsigned long *buf_size)
-{
-   struct diff_filespec *df;
-   struct userdiff_driver *textconv;
-
-   df = alloc_filespec(path);
-   fill_filespec(df, oid->hash, oid_valid, mode);
-   textconv = get_textconv(df);
-   if (!textconv) {
-   free_filespec(df);
-   return 0;
-   }
-
-   *buf_size = fill_textconv(textconv, df, buf);
-   free_filespec(df);
-   return 1;
-}
-
-/*
  * Given an origin, prepare mmfile_t structure to be used by the
  * diff machinery
  */
diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 1890d7a..79a2c82 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -5,6 +5,7 @@
  */
 #include "cache.h"
 #include "builtin.h"
+#include "diff.h"
 #include "parse-options.h"
 #include "userdiff.h"
 #include "streaming.h"
diff --git a/diff.c b/diff.c
index 11eef1c..a62e989 100644
--- a/diff.c
+++ b/diff.c
@@ -5270,6 +5270,29 @@ size_t fill_textconv(struct userdiff_driver *driver,
return size;
 }
 
+int textconv_object(const char *path,
+   unsigned mode,
+   const struct object_id *oid,
+   int oid_valid,
+   char **buf,
+   unsigned long *buf_size)
+{
+   struct diff_filespec *df;
+   struct userdiff_driver *textconv;
+
+   df = alloc_filespec(path);
+   fill_filespec(df, oid->hash, oid_valid, mode);
+   textconv = get_textconv(df);
+   if (!textconv) {
+   free_filespec(df);
+   return 0;
+   }
+
+   *buf_size = fill_textconv(textconv, df, buf);
+   free_filespec(df);
+   return 1;
+}
+
 void setup_diff_pager(struct diff_options *opt)
 {
/*
diff --git a/diff.h b/diff.h
index 5be1ee7..52ebd54 100644
--- a/diff.h
+++ b/diff.h
@@ -385,6 +385,13 @@ extern size_t fill_textconv(struct userdiff_driver *driver,
  */
 extern struct userdiff_driver *get_textconv(struct diff_filespec *one);
 
+/*
+ * Prepare diff_filespec and convert it using diff textconv API
+ * if the textconv driver exists.
+ * Return 1 if the conversion succeeds, 0 otherwise.
+ */
+extern int textconv_object(const char *path, unsigned mode, const struct 
object_id *oid, int oid_valid, char **buf, unsigned long *buf_size);
+
 extern int parse_rename_score(const char **cp_p);
 
 extern long parse_algorithm_value(const char *value);
-- 
2.9.3



[RFC PATCH 06/10] Move fake_working_tree_commit() to lib

2017-05-04 Thread Jeff Smith
Signed-off-by: Jeff Smith 
---
 Makefile|   1 +
 builtin/blame.c | 198 +--
 commit-fake.c   | 200 
 commit-fake.h   |   9 +++
 4 files changed, 211 insertions(+), 197 deletions(-)
 create mode 100644 commit-fake.c
 create mode 100644 commit-fake.h

diff --git a/Makefile b/Makefile
index 8cbb56c..b197a37 100644
--- a/Makefile
+++ b/Makefile
@@ -727,6 +727,7 @@ LIB_OBJS += color.o
 LIB_OBJS += column.o
 LIB_OBJS += combine-diff.o
 LIB_OBJS += commit.o
+LIB_OBJS += commit-fake.o
 LIB_OBJS += compat/obstack.o
 LIB_OBJS += compat/terminal.o
 LIB_OBJS += config.o
diff --git a/builtin/blame.c b/builtin/blame.c
index 7ee84c1..c873cc2 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -16,7 +16,6 @@
 #include "revision.h"
 #include "quote.h"
 #include "xdiff-interface.h"
-#include "cache-tree.h"
 #include "string-list.h"
 #include "mailmap.h"
 #include "mergesort.h"
@@ -29,6 +28,7 @@
 #include "dir.h"
 #include "progress.h"
 #include "origin.h"
+#include "commit-fake.h"
 
 static char blame_usage[] = N_("git blame [] [] [] 
[--] ");
 
@@ -2044,202 +2044,6 @@ static int git_blame_config(const char *var, const char 
*value, void *cb)
return git_default_config(var, value, cb);
 }
 
-static void verify_working_tree_path(struct commit *work_tree, const char 
*path)
-{
-   struct commit_list *parents;
-   int pos;
-
-   for (parents = work_tree->parents; parents; parents = parents->next) {
-   const struct object_id *commit_oid = >item->object.oid;
-   struct object_id blob_oid;
-   unsigned mode;
-
-   if (!get_tree_entry(commit_oid->hash, path, blob_oid.hash, 
) &&
-   sha1_object_info(blob_oid.hash, NULL) == OBJ_BLOB)
-   return;
-   }
-
-   pos = cache_name_pos(path, strlen(path));
-   if (pos >= 0)
-   ; /* path is in the index */
-   else if (-1 - pos < active_nr &&
-!strcmp(active_cache[-1 - pos]->name, path))
-   ; /* path is in the index, unmerged */
-   else
-   die("no such path '%s' in HEAD", path);
-}
-
-static struct commit_list **append_parent(struct commit_list **tail, const 
struct object_id *oid)
-{
-   struct commit *parent;
-
-   parent = lookup_commit_reference(oid->hash);
-   if (!parent)
-   die("no such commit %s", oid_to_hex(oid));
-   return _list_insert(parent, tail)->next;
-}
-
-static void append_merge_parents(struct commit_list **tail)
-{
-   int merge_head;
-   struct strbuf line = STRBUF_INIT;
-
-   merge_head = open(git_path_merge_head(), O_RDONLY);
-   if (merge_head < 0) {
-   if (errno == ENOENT)
-   return;
-   die("cannot open '%s' for reading", git_path_merge_head());
-   }
-
-   while (!strbuf_getwholeline_fd(, merge_head, '\n')) {
-   struct object_id oid;
-   if (line.len < GIT_SHA1_HEXSZ || get_oid_hex(line.buf, ))
-   die("unknown line in '%s': %s", git_path_merge_head(), 
line.buf);
-   tail = append_parent(tail, );
-   }
-   close(merge_head);
-   strbuf_release();
-}
-
-/*
- * This isn't as simple as passing sb->buf and sb->len, because we
- * want to transfer ownership of the buffer to the commit (so we
- * must use detach).
- */
-static void set_commit_buffer_from_strbuf(struct commit *c, struct strbuf *sb)
-{
-   size_t len;
-   void *buf = strbuf_detach(sb, );
-   set_commit_buffer(c, buf, len);
-}
-
-/*
- * Prepare a dummy commit that represents the work tree (or staged) item.
- * Note that annotating work tree item never works in the reverse.
- */
-static struct commit *fake_working_tree_commit(struct diff_options *opt,
-  const char *path,
-  const char *contents_from)
-{
-   struct commit *commit;
-   struct origin *origin;
-   struct commit_list **parent_tail, *parent;
-   struct object_id head_oid;
-   struct strbuf buf = STRBUF_INIT;
-   const char *ident;
-   time_t now;
-   int size, len;
-   struct cache_entry *ce;
-   unsigned mode;
-   struct strbuf msg = STRBUF_INIT;
-
-   read_cache();
-   time();
-   commit = alloc_commit_node();
-   commit->object.parsed = 1;
-   commit->date = now;
-   parent_tail = >parents;
-
-   if (!resolve_ref_unsafe("HEAD", RESOLVE_REF_READING, head_oid.hash, 
NULL))
-   die("no such ref: HEAD");
-
-   parent_tail = append_parent(parent_tail, _oid);
-   append_merge_parents(parent_tail);
-   verify_working_tree_path(commit, path);
-
-   origin = make_origin(commit, path);
-
-   ident = fmt_ident("Not Committed Yet", "not.committed.yet", NULL, 0);
-   

[RFC PATCH 00/10] Add blame to libgit

2017-05-04 Thread Jeff Smith
In adding blame functionality to cgit, I do not wish to duplicate huge
portions of builtin/blame.c there.  I would rather shift its core
functionality into libgit.a -- which cgit already uses -- and in cgit I
can focus on the web presentation aspect.

I was able to split three separate cohesive pieces from blame.c into
libgit.  The functionality left in blame.c mostly relates to terminal
presentation.

Jeff Smith (10):
  Remove unneeded dependency on blob.h from blame
  Move textconv_object to be with other textconv methods
  Add some missing definitions to header files
  Remove unused parameter from get_origin()
  Split blame origin into its own file
  Move fake_working_tree_commit() to lib
  Break out scoreboard a little better
  Split blame scoreboard into its own file
  Break out scoreboard init and setup
  Move scoreboard init and setup to lib

 Makefile   |3 +
 builtin.h  |2 -
 builtin/blame.c| 2045 ++--
 builtin/cat-file.c |1 +
 commit-fake.c  |  200 +
 commit-fake.h  |9 +
 diff.c |   23 +
 diff.h |7 +
 object.h   |2 +
 origin.c   |   62 ++
 origin.h   |  101 +++
 pathspec.h |4 +
 refs.h |3 +
 scoreboard.c   | 1569 
 scoreboard.h   |   77 ++
 tree-walk.h|2 +
 16 files changed, 2108 insertions(+), 2002 deletions(-)
 create mode 100644 commit-fake.c
 create mode 100644 commit-fake.h
 create mode 100644 origin.c
 create mode 100644 origin.h
 create mode 100644 scoreboard.c
 create mode 100644 scoreboard.h

-- 
2.9.3



[RFC PATCH 07/10] Break out scoreboard a little better

2017-05-04 Thread Jeff Smith
Signed-off-by: Jeff Smith 
---
 builtin/blame.c | 149 +++-
 1 file changed, 93 insertions(+), 56 deletions(-)

diff --git a/builtin/blame.c b/builtin/blame.c
index c873cc2..dc7600c 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -62,11 +62,6 @@ static struct string_list mailmap = STRING_LIST_INIT_NODUP;
 #define DEBUG 0
 #endif
 
-/* stats */
-static int num_read_blob;
-static int num_get_patch;
-static int num_commits;
-
 #define PICKAXE_BLAME_MOVE 01
 #define PICKAXE_BLAME_COPY 02
 #define PICKAXE_BLAME_COPY_HARDER  04
@@ -91,7 +86,7 @@ struct progress_info {
 };
 
 static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b,
- xdl_emit_hunk_consume_func_t hunk_func, void *cb_data)
+ xdl_emit_hunk_consume_func_t hunk_func, void *cb_data, 
int xdl_opts)
 {
xpparam_t xpp = {0};
xdemitconf_t xecfg = {0};
@@ -108,13 +103,13 @@ static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b,
  * diff machinery
  */
 static void fill_origin_blob(struct diff_options *opt,
-struct origin *o, mmfile_t *file)
+struct origin *o, mmfile_t *file, int 
*num_read_blob)
 {
if (!o->file.ptr) {
enum object_type type;
unsigned long file_size;
 
-   num_read_blob++;
+   (*num_read_blob)++;
if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) &&
textconv_object(o->path, o->mode, >blob_oid, 1, 
>ptr, _size))
;
@@ -265,9 +260,36 @@ struct scoreboard {
/* look-up a line in the final buffer */
int num_lines;
int *lineno;
+
+   /*
+* blame for a blame_entry with score lower than these thresholds
+* is not passed to the parent using move/copy logic.
+*/
+   unsigned blame_move_score;
+   unsigned blame_copy_score;
+
+   /* stats */
+   int num_read_blob;
+   int num_get_patch;
+   int num_commits;
+
+   /* options */
+   int show_root;
+   int reverse;
+   int xdl_opts;
+   int no_whole_file_rename;
+
+   /* callbacks */
+   void(*sanity_check)(struct scoreboard *);
+   void(*found_guilty_entry)(struct blame_entry *, void *);
+
+   void *found_guilty_entry_data;
 };
 
-static void sanity_check_refcnt(struct scoreboard *);
+static void blame_sort_final(struct scoreboard *sb)
+{
+   sb->ent = blame_sort(sb->ent, compare_blame_final);
+}
 
 /*
  * If two blame entries that are next to each other came from
@@ -290,8 +312,8 @@ static void coalesce(struct scoreboard *sb)
}
}
 
-   if (DEBUG) /* sanity */
-   sanity_check_refcnt(sb);
+   if (sb->sanity_check) /* sanity */
+   sb->sanity_check(sb);
 }
 
 /*
@@ -787,11 +809,11 @@ static void pass_blame_to_parent(struct scoreboard *sb,
d.offset = 0;
d.dstq =  d.srcq = >suspects;
 
-   fill_origin_blob(>revs->diffopt, parent, _p);
-   fill_origin_blob(>revs->diffopt, target, _o);
-   num_get_patch++;
+   fill_origin_blob(>revs->diffopt, parent, _p, 
>num_read_blob);
+   fill_origin_blob(>revs->diffopt, target, _o, 
>num_read_blob);
+   sb->num_get_patch++;
 
-   if (diff_hunks(_p, _o, blame_chunk_cb, ))
+   if (diff_hunks(_p, _o, blame_chunk_cb, , sb->xdl_opts))
die("unable to generate diff (%s -> %s)",
oid_to_hex(>commit->object.oid),
oid_to_hex(>commit->object.oid));
@@ -940,7 +962,7 @@ static void find_copy_in_blob(struct scoreboard *sb,
 * file_p partially may match that image.
 */
memset(split, 0, sizeof(struct blame_entry [3]));
-   if (diff_hunks(file_p, _o, handle_split_cb, ))
+   if (diff_hunks(file_p, _o, handle_split_cb, , sb->xdl_opts))
die("unable to generate diff (%s)",
oid_to_hex(>commit->object.oid));
/* remainder, if any, all match the preimage */
@@ -993,7 +1015,7 @@ static void find_move_in_parent(struct scoreboard *sb,
if (!unblamed)
return; /* nothing remains for this target */
 
-   fill_origin_blob(>revs->diffopt, parent, _p);
+   fill_origin_blob(>revs->diffopt, parent, _p, 
>num_read_blob);
if (!file_p.ptr)
return;
 
@@ -1009,7 +1031,7 @@ static void find_move_in_parent(struct scoreboard *sb,
next = e->next;
find_copy_in_blob(sb, e, parent, split, _p);
if (split[1].suspect &&
-   blame_move_score < ent_score(sb, [1])) {
+   sb->blame_move_score < ent_score(sb, [1])) {
split_blame(blamed, , split, e);
} else {
e->next = leftover;
@@ -1018,7 +1040,7 @@ 

[RFC PATCH 01/10] Remove unneeded dependency on blob.h from blame

2017-05-04 Thread Jeff Smith
Signed-off-by: Jeff Smith 
---
 builtin/blame.c | 1 -
 1 file changed, 1 deletion(-)

diff --git a/builtin/blame.c b/builtin/blame.c
index 07506a3..42c56eb 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -8,7 +8,6 @@
 #include "cache.h"
 #include "refs.h"
 #include "builtin.h"
-#include "blob.h"
 #include "commit.h"
 #include "tag.h"
 #include "tree-walk.h"
-- 
2.9.3



[RFC PATCH 10/10] Move scoreboard init and setup to lib

2017-05-04 Thread Jeff Smith
Signed-off-by: Jeff Smith 
---
 builtin/blame.c | 274 
 scoreboard.c| 273 +++
 scoreboard.h|   2 +
 3 files changed, 275 insertions(+), 274 deletions(-)

diff --git a/builtin/blame.c b/builtin/blame.c
index 3bde5a6..4e3079f 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -6,11 +6,8 @@
  */
 
 #include "cache.h"
-#include "refs.h"
 #include "builtin.h"
 #include "commit.h"
-#include "tag.h"
-#include "tree-walk.h"
 #include "diff.h"
 #include "revision.h"
 #include "quote.h"
@@ -25,7 +22,6 @@
 #include "dir.h"
 #include "progress.h"
 #include "origin.h"
-#include "commit-fake.h"
 #include "scoreboard.h"
 
 static char blame_usage[] = N_("git blame [] [] [] 
[--] ");
@@ -72,39 +68,6 @@ struct progress_info {
int blamed_lines;
 };
 
-static int compare_commits_by_reverse_commit_date(const void *a,
- const void *b,
- void *c)
-{
-   return -compare_commits_by_commit_date(a, b, c);
-}
-
-/*
- * Fill the blob_sha1 field of an origin if it hasn't, so that later
- * call to fill_origin_blob() can use it to locate the data.  blob_sha1
- * for an origin is also used to pass the blame for the entire file to
- * the parent to detect the case where a child's blob is identical to
- * that of its parent's.
- *
- * This also fills origin->mode for corresponding tree path.
- */
-static int fill_blob_sha1_and_mode(struct origin *origin)
-{
-   if (!is_null_oid(>blob_oid))
-   return 0;
-   if (get_tree_entry(origin->commit->object.oid.hash,
-  origin->path,
-  origin->blob_oid.hash, >mode))
-   goto error_out;
-   if (sha1_object_info(origin->blob_oid.hash, NULL) != OBJ_BLOB)
-   goto error_out;
-   return 0;
- error_out:
-   oidclr(>blob_oid);
-   origin->mode = S_IFINVALID;
-   return -1;
-}
-
 static const char *nth_line_cb(void *data, long lno)
 {
return nth_line((struct scoreboard *)data, lno);
@@ -512,40 +475,6 @@ static void output(struct scoreboard *sb, int option)
}
 }
 
-static const char *get_next_line(const char *start, const char *end)
-{
-   const char *nl = memchr(start, '\n', end - start);
-   return nl ? nl + 1 : end;
-}
-
-/*
- * To allow quick access to the contents of nth line in the
- * final image, prepare an index in the scoreboard.
- */
-static int prepare_lines(struct scoreboard *sb)
-{
-   const char *buf = sb->final_buf;
-   unsigned long len = sb->final_buf_size;
-   const char *end = buf + len;
-   const char *p;
-   int *lineno;
-   int num = 0;
-
-   for (p = buf; p < end; p = get_next_line(p, end))
-   num++;
-
-   ALLOC_ARRAY(sb->lineno, num + 1);
-   lineno = sb->lineno;
-
-   for (p = buf; p < end; p = get_next_line(p, end))
-   *lineno++ = p - buf;
-
-   *lineno = len;
-
-   sb->num_lines = num;
-   return sb->num_lines;
-}
-
 /*
  * Add phony grafts for use with -S; this is primarily to
  * support git's cvsserver that wants to give a linear history
@@ -706,209 +635,6 @@ static int git_blame_config(const char *var, const char 
*value, void *cb)
return git_default_config(var, value, cb);
 }
 
-static struct commit *find_single_final(struct rev_info *revs,
-   const char **name_p)
-{
-   int i;
-   struct commit *found = NULL;
-   const char *name = NULL;
-
-   for (i = 0; i < revs->pending.nr; i++) {
-   struct object *obj = revs->pending.objects[i].item;
-   if (obj->flags & UNINTERESTING)
-   continue;
-   obj = deref_tag(obj, NULL, 0);
-   if (obj->type != OBJ_COMMIT)
-   die("Non commit %s?", revs->pending.objects[i].name);
-   if (found)
-   die("More than one commit to dig from %s and %s?",
-   revs->pending.objects[i].name, name);
-   found = (struct commit *)obj;
-   name = revs->pending.objects[i].name;
-   }
-   if (name_p)
-   *name_p = name;
-   return found;
-}
-
-static struct commit *dwim_reverse_initial(struct rev_info *revs,
-  const char **name_p)
-{
-   /*
-* DWIM "git blame --reverse ONE -- PATH" as
-* "git blame --reverse ONE..HEAD -- PATH" but only do so
-* when it makes sense.
-*/
-   struct object *obj;
-   struct commit *head_commit;
-   unsigned char head_sha1[20];
-
-   if (revs->pending.nr != 1)
-   return NULL;
-
-   /* Is that sole rev a committish? */
-   obj = revs->pending.objects[0].item;
-   obj = deref_tag(obj, NULL, 0);
-   if 

[RFC PATCH 08/10] Split blame scoreboard into its own file

2017-05-04 Thread Jeff Smith
Signed-off-by: Jeff Smith 
---
 Makefile|1 +
 builtin/blame.c | 1358 +--
 scoreboard.c| 1296 
 scoreboard.h|   74 +++
 4 files changed, 1372 insertions(+), 1357 deletions(-)
 create mode 100644 scoreboard.c
 create mode 100644 scoreboard.h

diff --git a/Makefile b/Makefile
index b197a37..6e6726a 100644
--- a/Makefile
+++ b/Makefile
@@ -827,6 +827,7 @@ LIB_OBJS += rerere.o
 LIB_OBJS += resolve-undo.o
 LIB_OBJS += revision.o
 LIB_OBJS += run-command.o
+LIB_OBJS += scoreboard.o
 LIB_OBJS += send-pack.o
 LIB_OBJS += sequencer.o
 LIB_OBJS += server-info.o
diff --git a/builtin/blame.c b/builtin/blame.c
index dc7600c..0af99e3 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -12,13 +12,10 @@
 #include "tag.h"
 #include "tree-walk.h"
 #include "diff.h"
-#include "diffcore.h"
 #include "revision.h"
 #include "quote.h"
-#include "xdiff-interface.h"
 #include "string-list.h"
 #include "mailmap.h"
-#include "mergesort.h"
 #include "parse-options.h"
 #include "prio-queue.h"
 #include "utf8.h"
@@ -29,6 +26,7 @@
 #include "progress.h"
 #include "origin.h"
 #include "commit-fake.h"
+#include "scoreboard.h"
 
 static char blame_usage[] = N_("git blame [] [] [] 
[--] ");
 
@@ -62,19 +60,8 @@ static struct string_list mailmap = STRING_LIST_INIT_NODUP;
 #define DEBUG 0
 #endif
 
-#define PICKAXE_BLAME_MOVE 01
-#define PICKAXE_BLAME_COPY 02
-#define PICKAXE_BLAME_COPY_HARDER  04
-#define PICKAXE_BLAME_COPY_HARDEST 010
-
-/*
- * blame for a blame_entry with score lower than these thresholds
- * is not passed to the parent using move/copy logic.
- */
 static unsigned blame_move_score;
 static unsigned blame_copy_score;
-#define BLAME_DEFAULT_MOVE_SCORE   20
-#define BLAME_DEFAULT_COPY_SCORE   40
 
 /* Remember to update object flag allocation in object.h */
 #define METAINFO_SHOWN (1u<<12)
@@ -85,149 +72,6 @@ struct progress_info {
int blamed_lines;
 };
 
-static int diff_hunks(mmfile_t *file_a, mmfile_t *file_b,
- xdl_emit_hunk_consume_func_t hunk_func, void *cb_data, 
int xdl_opts)
-{
-   xpparam_t xpp = {0};
-   xdemitconf_t xecfg = {0};
-   xdemitcb_t ecb = {NULL};
-
-   xpp.flags = xdl_opts;
-   xecfg.hunk_func = hunk_func;
-   ecb.priv = cb_data;
-   return xdi_diff(file_a, file_b, , , );
-}
-
-/*
- * Given an origin, prepare mmfile_t structure to be used by the
- * diff machinery
- */
-static void fill_origin_blob(struct diff_options *opt,
-struct origin *o, mmfile_t *file, int 
*num_read_blob)
-{
-   if (!o->file.ptr) {
-   enum object_type type;
-   unsigned long file_size;
-
-   (*num_read_blob)++;
-   if (DIFF_OPT_TST(opt, ALLOW_TEXTCONV) &&
-   textconv_object(o->path, o->mode, >blob_oid, 1, 
>ptr, _size))
-   ;
-   else
-   file->ptr = read_sha1_file(o->blob_oid.hash, ,
-  _size);
-   file->size = file_size;
-
-   if (!file->ptr)
-   die("Cannot read blob %s for path %s",
-   oid_to_hex(>blob_oid),
-   o->path);
-   o->file = *file;
-   }
-   else
-   *file = o->file;
-}
-
-static void drop_origin_blob(struct origin *o)
-{
-   if (o->file.ptr) {
-   free(o->file.ptr);
-   o->file.ptr = NULL;
-   }
-}
-
-/*
- * Any merge of blames happens on lists of blames that arrived via
- * different parents in a single suspect.  In this case, we want to
- * sort according to the suspect line numbers as opposed to the final
- * image line numbers.  The function body is somewhat longish because
- * it avoids unnecessary writes.
- */
-
-static struct blame_entry *blame_merge(struct blame_entry *list1,
-  struct blame_entry *list2)
-{
-   struct blame_entry *p1 = list1, *p2 = list2,
-   **tail = 
-
-   if (!p1)
-   return p2;
-   if (!p2)
-   return p1;
-
-   if (p1->s_lno <= p2->s_lno) {
-   do {
-   tail = >next;
-   if ((p1 = *tail) == NULL) {
-   *tail = p2;
-   return list1;
-   }
-   } while (p1->s_lno <= p2->s_lno);
-   }
-   for (;;) {
-   *tail = p2;
-   do {
-   tail = >next;
-   if ((p2 = *tail) == NULL)  {
-   *tail = p1;
-   return list1;
-   }
-   } while (p1->s_lno > p2->s_lno);
-   *tail = p1;
-   do {
- 

[RFC PATCH 09/10] Break out scoreboard init and setup

2017-05-04 Thread Jeff Smith
Signed-off-by: Jeff Smith 
---
 builtin/blame.c | 264 +---
 scoreboard.h|   1 +
 2 files changed, 140 insertions(+), 125 deletions(-)

diff --git a/builtin/blame.c b/builtin/blame.c
index 0af99e3..3bde5a6 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -731,14 +731,8 @@ static struct commit *find_single_final(struct rev_info 
*revs,
return found;
 }
 
-static char *prepare_final(struct scoreboard *sb)
-{
-   const char *name;
-   sb->final = find_single_final(sb->revs, );
-   return xstrdup_or_null(name);
-}
-
-static const char *dwim_reverse_initial(struct scoreboard *sb)
+static struct commit *dwim_reverse_initial(struct rev_info *revs,
+  const char **name_p)
 {
/*
 * DWIM "git blame --reverse ONE -- PATH" as
@@ -749,11 +743,11 @@ static const char *dwim_reverse_initial(struct scoreboard 
*sb)
struct commit *head_commit;
unsigned char head_sha1[20];
 
-   if (sb->revs->pending.nr != 1)
+   if (revs->pending.nr != 1)
return NULL;
 
/* Is that sole rev a committish? */
-   obj = sb->revs->pending.objects[0].item;
+   obj = revs->pending.objects[0].item;
obj = deref_tag(obj, NULL, 0);
if (obj->type != OBJ_COMMIT)
return NULL;
@@ -767,17 +761,19 @@ static const char *dwim_reverse_initial(struct scoreboard 
*sb)
 
/* Turn "ONE" into "ONE..HEAD" then */
obj->flags |= UNINTERESTING;
-   add_pending_object(sb->revs, _commit->object, "HEAD");
+   add_pending_object(revs, _commit->object, "HEAD");
 
-   sb->final = (struct commit *)obj;
-   return sb->revs->pending.objects[0].name;
+   if (name_p)
+   *name_p = revs->pending.objects[0].name;
+   return (struct commit *)obj;
 }
 
-static char *prepare_initial(struct scoreboard *sb)
+static struct commit *find_single_initial(struct rev_info *revs,
+ const char **name_p)
 {
int i;
-   const char *final_commit_name = NULL;
-   struct rev_info *revs = sb->revs;
+   struct commit *found = NULL;
+   const char *name = NULL;
 
/*
 * There must be one and only one negative commit, and it must be
@@ -790,19 +786,127 @@ static char *prepare_initial(struct scoreboard *sb)
obj = deref_tag(obj, NULL, 0);
if (obj->type != OBJ_COMMIT)
die("Non commit %s?", revs->pending.objects[i].name);
-   if (sb->final)
+   if (found)
die("More than one commit to dig up from, %s and %s?",
-   revs->pending.objects[i].name,
-   final_commit_name);
-   sb->final = (struct commit *) obj;
-   final_commit_name = revs->pending.objects[i].name;
+   revs->pending.objects[i].name, name);
+   found = (struct commit *) obj;
+   name = revs->pending.objects[i].name;
}
 
-   if (!final_commit_name)
-   final_commit_name = dwim_reverse_initial(sb);
-   if (!final_commit_name)
+   if (!name)
+   found = dwim_reverse_initial(revs, );
+   if (!name)
die("No commit to dig up from?");
-   return xstrdup(final_commit_name);
+
+   if (name_p)
+   *name_p = name;
+   return found;
+}
+
+void init_scoreboard(struct scoreboard *sb)
+{
+   memset(sb, 0, sizeof(struct scoreboard));
+   sb->blame_move_score = BLAME_DEFAULT_MOVE_SCORE;
+   sb->blame_copy_score = BLAME_DEFAULT_COPY_SCORE;
+}
+
+void setup_scoreboard(struct scoreboard *sb, const char *path, struct origin 
**orig)
+{
+   const char *final_commit_name = NULL;
+   struct origin *o;
+   struct commit *final_commit = NULL;
+   enum object_type type;
+
+   if (sb->reverse && sb->contents_from)
+   die(_("--contents and --reverse do not blend well."));
+
+   if (!sb->reverse) {
+   sb->final = find_single_final(sb->revs, _commit_name);
+   sb->commits.compare = compare_commits_by_commit_date;
+   } else {
+   sb->final = find_single_initial(sb->revs, _commit_name);
+   sb->commits.compare = compare_commits_by_reverse_commit_date;
+   }
+
+   if (sb->final && sb->contents_from)
+   die(_("cannot use --contents with final commit object name"));
+
+   if (sb->reverse && sb->revs->first_parent_only)
+   sb->revs->children.name = NULL;
+
+   if (!sb->final) {
+   /*
+* "--not A B -- path" without anything positive;
+* do not default to HEAD, but use the working tree
+* or "--contents".
+*/
+   setup_work_tree();
+   sb->final = 

[RFC PATCH 04/10] Remove unused parameter from get_origin()

2017-05-04 Thread Jeff Smith
Signed-off-by: Jeff Smith 
---
 builtin/blame.c | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/builtin/blame.c b/builtin/blame.c
index c419981..cc46f56 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -449,9 +449,7 @@ static struct origin *make_origin(struct commit *commit, 
const char *path)
  * Locate an existing origin or create a new one.
  * This moves the origin to front position in the commit util list.
  */
-static struct origin *get_origin(struct scoreboard *sb,
-struct commit *commit,
-const char *path)
+static struct origin *get_origin(struct commit *commit, const char *path)
 {
struct origin *o, *l;
 
@@ -543,7 +541,7 @@ static struct origin *find_origin(struct scoreboard *sb,
 
if (!diff_queued_diff.nr) {
/* The path is the same as parent */
-   porigin = get_origin(sb, parent, origin->path);
+   porigin = get_origin(parent, origin->path);
oidcpy(>blob_oid, >blob_oid);
porigin->mode = origin->mode;
} else {
@@ -569,7 +567,7 @@ static struct origin *find_origin(struct scoreboard *sb,
die("internal error in blame::find_origin (%c)",
p->status);
case 'M':
-   porigin = get_origin(sb, parent, origin->path);
+   porigin = get_origin(parent, origin->path);
oidcpy(>blob_oid, >one->oid);
porigin->mode = p->one->mode;
break;
@@ -615,7 +613,7 @@ static struct origin *find_rename(struct scoreboard *sb,
struct diff_filepair *p = diff_queued_diff.queue[i];
if ((p->status == 'R' || p->status == 'C') &&
!strcmp(p->two->path, origin->path)) {
-   porigin = get_origin(sb, parent, p->one->path);
+   porigin = get_origin(parent, p->one->path);
oidcpy(>blob_oid, >one->oid);
porigin->mode = p->one->mode;
break;
@@ -1270,7 +1268,7 @@ static void find_copy_in_parent(struct scoreboard *sb,
/* find_move already dealt with this path */
continue;
 
-   norigin = get_origin(sb, parent, p->one->path);
+   norigin = get_origin(parent, p->one->path);
oidcpy(>blob_oid, >one->oid);
norigin->mode = p->one->mode;
fill_origin_blob(>revs->diffopt, norigin, _p);
@@ -2806,7 +2804,7 @@ int cmd_blame(int argc, const char **argv, const char 
*prefix)
sb.final_buf_size = o->file.size;
}
else {
-   o = get_origin(, sb.final, path);
+   o = get_origin(sb.final, path);
if (fill_blob_sha1_and_mode(o))
die(_("no such path %s in %s"), path, 
final_commit_name);
 
-- 
2.9.3



[RFC PATCH 03/10] Add some missing definitions to header files

2017-05-04 Thread Jeff Smith
Signed-off-by: Jeff Smith 
---
 object.h| 2 ++
 pathspec.h  | 4 
 refs.h  | 3 +++
 tree-walk.h | 2 ++
 4 files changed, 11 insertions(+)

diff --git a/object.h b/object.h
index f52957d..9737582 100644
--- a/object.h
+++ b/object.h
@@ -1,6 +1,8 @@
 #ifndef OBJECT_H
 #define OBJECT_H
 
+#include "cache.h"
+
 struct object_list {
struct object *item;
struct object_list *next;
diff --git a/pathspec.h b/pathspec.h
index 55e9769..ea18e24 100644
--- a/pathspec.h
+++ b/pathspec.h
@@ -1,6 +1,10 @@
 #ifndef PATHSPEC_H
 #define PATHSPEC_H
 
+#include 
+#include 
+#include 
+
 /* Pathspec magic */
 #define PATHSPEC_FROMTOP   (1<<0)
 #define PATHSPEC_MAXDEPTH  (1<<1)
diff --git a/refs.h b/refs.h
index 07cf4cd..e9d19fd 100644
--- a/refs.h
+++ b/refs.h
@@ -1,6 +1,9 @@
 #ifndef REFS_H
 #define REFS_H
 
+#include 
+#include 
+
 struct object_id;
 struct ref_store;
 struct strbuf;
diff --git a/tree-walk.h b/tree-walk.h
index 68bb78b..8b0fccd 100644
--- a/tree-walk.h
+++ b/tree-walk.h
@@ -1,6 +1,8 @@
 #ifndef TREE_WALK_H
 #define TREE_WALK_H
 
+struct strbuf;
+
 struct name_entry {
const struct object_id *oid;
const char *path;
-- 
2.9.3



[RFC PATCH 05/10] Split blame origin into its own file

2017-05-04 Thread Jeff Smith
Signed-off-by: Jeff Smith 
---
 Makefile|   1 +
 builtin/blame.c | 151 +---
 origin.c|  62 +++
 origin.h| 101 +
 4 files changed, 165 insertions(+), 150 deletions(-)
 create mode 100644 origin.c
 create mode 100644 origin.h

diff --git a/Makefile b/Makefile
index e35542e..8cbb56c 100644
--- a/Makefile
+++ b/Makefile
@@ -791,6 +791,7 @@ LIB_OBJS += notes-merge.o
 LIB_OBJS += notes-utils.o
 LIB_OBJS += object.o
 LIB_OBJS += oidset.o
+LIB_OBJS += origin.o
 LIB_OBJS += pack-bitmap.o
 LIB_OBJS += pack-bitmap-write.o
 LIB_OBJS += pack-check.o
diff --git a/builtin/blame.c b/builtin/blame.c
index cc46f56..7ee84c1 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -28,6 +28,7 @@
 #include "line-log.h"
 #include "dir.h"
 #include "progress.h"
+#include "origin.h"
 
 static char blame_usage[] = N_("git blame [] [] [] 
[--] ");
 
@@ -84,50 +85,6 @@ static unsigned blame_copy_score;
 #define METAINFO_SHOWN (1u<<12)
 #define MORE_THAN_ONE_PATH (1u<<13)
 
-/*
- * One blob in a commit that is being suspected
- */
-struct origin {
-   int refcnt;
-   /* Record preceding blame record for this blob */
-   struct origin *previous;
-   /* origins are put in a list linked via `next' hanging off the
-* corresponding commit's util field in order to make finding
-* them fast.  The presence in this chain does not count
-* towards the origin's reference count.  It is tempting to
-* let it count as long as the commit is pending examination,
-* but even under circumstances where the commit will be
-* present multiple times in the priority queue of unexamined
-* commits, processing the first instance will not leave any
-* work requiring the origin data for the second instance.  An
-* interspersed commit changing that would have to be
-* preexisting with a different ancestry and with the same
-* commit date in order to wedge itself between two instances
-* of the same commit in the priority queue _and_ produce
-* blame entries relevant for it.  While we don't want to let
-* us get tripped up by this case, it certainly does not seem
-* worth optimizing for.
-*/
-   struct origin *next;
-   struct commit *commit;
-   /* `suspects' contains blame entries that may be attributed to
-* this origin's commit or to parent commits.  When a commit
-* is being processed, all suspects will be moved, either by
-* assigning them to an origin in a different commit, or by
-* shipping them to the scoreboard's ent list because they
-* cannot be attributed to a different commit.
-*/
-   struct blame_entry *suspects;
-   mmfile_t file;
-   struct object_id blob_oid;
-   unsigned mode;
-   /* guilty gets set when shipping any suspects to the final
-* blame list instead of other commits
-*/
-   char guilty;
-   char path[FLEX_ARRAY];
-};
-
 struct progress_info {
struct progress *progress;
int blamed_lines;
@@ -176,39 +133,6 @@ static void fill_origin_blob(struct diff_options *opt,
*file = o->file;
 }
 
-/*
- * Origin is refcounted and usually we keep the blob contents to be
- * reused.
- */
-static inline struct origin *origin_incref(struct origin *o)
-{
-   if (o)
-   o->refcnt++;
-   return o;
-}
-
-static void origin_decref(struct origin *o)
-{
-   if (o && --o->refcnt <= 0) {
-   struct origin *p, *l = NULL;
-   if (o->previous)
-   origin_decref(o->previous);
-   free(o->file.ptr);
-   /* Should be present exactly once in commit chain */
-   for (p = o->commit->util; p; l = p, p = p->next) {
-   if (p == o) {
-   if (l)
-   l->next = p->next;
-   else
-   o->commit->util = p->next;
-   free(o);
-   return;
-   }
-   }
-   die("internal error in blame::origin_decref");
-   }
-}
-
 static void drop_origin_blob(struct origin *o)
 {
if (o->file.ptr) {
@@ -218,40 +142,6 @@ static void drop_origin_blob(struct origin *o)
 }
 
 /*
- * Each group of lines is described by a blame_entry; it can be split
- * as we pass blame to the parents.  They are arranged in linked lists
- * kept as `suspects' of some unprocessed origin, or entered (when the
- * blame origin has been finalized) into the scoreboard structure.
- * While the scoreboard structure is only sorted at the end of
- * processing (according to final image line number), the lists
- * attached to an origin are 

git error

2017-05-04 Thread ada
hi,

I used the command below,there's an error,please help me to solve it !
Thank you ~
git push


Best Regards~
ada Wang



[L10N] Kickoff for Git 2.13.0 l10n round 2

2017-05-04 Thread Jiang Xin
Hi,

Git v2.13.0-rc2 introduced 4 new messages, let's start round 2 for Git
2.13.0 l10n.

You can get it from the usual place:

https://github.com/git-l10n/git-po/

As how to update your XX.po and help to translate Git, please see
"Updating a XX.po file" and other sections in “po/README" file.

--
Jiang Xin


hope you remember me???

2017-05-04 Thread Makena Jelani
Hello dear,
I am Miss Makena Jelani. hope you remember me? is been a long time,
where have you been. well, contact me so we can talk
Makena.


Re: [PATCH] archive-tar: fix a sparse 'constant too large' warning

2017-05-04 Thread Ramsay Jones


On 04/05/17 10:26, Johannes Schindelin wrote:
> Hi Ramsay,
> 
> On Thu, 4 May 2017, Ramsay Jones wrote:
> 
>> diff --git a/archive-tar.c b/archive-tar.c
>> index 319a5b1c7..6dddc0cff 100644
>> --- a/archive-tar.c
>> +++ b/archive-tar.c
>> @@ -33,7 +33,7 @@ static int write_tar_filter_archive(const struct archiver 
>> *ar,
>>  #if TIME_MAX == 0x
>>  #define USTAR_MAX_MTIME TIME_MAX
>>  #else
>> -#define USTAR_MAX_MTIME 0777UL
>> +#define USTAR_MAX_MTIME 0777ULL
>>  #endif
>>  
> 
> Funny. This problem was pointed out by Hannes Sixt (IIRC) and I fixed this
> very thing in v6.
> 
> Except I did not. I changed the wrong constant! Instead of
> USTAR_MAX_MTIME, I adjusted USTAR_MAX_SIZE. D'oh.

Ah, I did wonder about that.

> I just saw that my patch series already hit `next`, so I fear you are
> right that we need a follow-up patch. Maybe we want this diff, though?
> 
> -- snipsnap --
> diff --git a/archive-tar.c b/archive-tar.c
> index 319a5b1c7cd..073e60ebd3c 100644
> --- a/archive-tar.c
> +++ b/archive-tar.c
> @@ -28,12 +28,12 @@ static int write_tar_filter_archive(const struct archiver 
> *ar,
>  #if ULONG_MAX == 0x
>  #define USTAR_MAX_SIZE ULONG_MAX
>  #else
> -#define USTAR_MAX_SIZE 0777ULL
> +#define USTAR_MAX_SIZE 0777UL

Sure, I can add this. (Although, I don't think it actually
matters).

Hmm, unless somebody objects in the meantime, I will re-roll
the patch (tomorrow, it's late!).

>  #endif
>  #if TIME_MAX == 0x
>  #define USTAR_MAX_MTIME TIME_MAX
>  #else
> -#define USTAR_MAX_MTIME 0777UL
> +#define USTAR_MAX_MTIME 0777ULL
>  #endif
>  
>  /* writes out the whole block, but only if it is full */
> 

ATB,
Ramsay Jones




Re: What's cooking in git.git (May 2017, #01; Mon, 1)

2017-05-04 Thread Ramsay Jones


On 01/05/17 06:35, Junio C Hamano wrote:
> 
> * nd/fopen-errors (2017-04-23) 17 commits
>  - warn_failure_to_open_read_optional_path(): a new helper
>  - t1308: add a test case on open a config directory
>  - config.c: handle error on failing to fopen()
>  - xdiff-interface.c: report errno on failure to stat() or fopen()
>  - wt-status.c: report error on failure to fopen()
>  - server-info: report error on failure to fopen()
>  - sequencer.c: report error on failure to fopen()
>  - rerere.c: report correct errno
>  - rerere.c: report error on failure to fopen()
>  - remote.c: report error on failure to fopen()
>  - commit.c: report error on failure to fopen() the graft file
>  - log: report errno on failure to fopen() a file
>  - clone: use xfopen() instead of fopen()
>  - blame: report error on open if graft_file is a directory
>  - bisect: report on fopen() error
>  - config.mak.uname: set FREAD_READS_DIRECTORIES for Linux and FreeBSD
>  - git_fopen: fix a sparse 'not declared' warning
> 
>  We often try to open a file for reading whose existence is
>  optional, and silently ignore errors from open/fopen; report such
>  errors if they are not due to missing files.
> 
>  Expecting a reroll that would be much simplified thanks to a higher
>  level helper.

Since this "What's cooking", this has been re-rolled, but the
"git_fopen: fix a sparse 'not declared' warning" patch has been
dropped, so I'm seeing the sparse warning again.

Also, this series removes every external caller of the
'warn_on_inaccessible()' function, so this could be marked
static in wrapper.c. (I would move the definition of that
function above/before warn_on_fopen_errors(), in order not
to require a forward declaration).

ATB,
Ramsay Jones




Re: Delta compression hooks

2017-05-04 Thread Soni L.



On 2017-05-04 07:57 PM, Stefan Beller wrote:

On Thu, May 4, 2017 at 3:48 PM, Soni L.  wrote:

Can we get delta compression hooks? I'm working with voxel data as my source
code (it's a long story...) and git doesn't handle changing lines of voxels
very well.

Example, I have something from position (0,0,0) to position (0,10,0) and I
replace it with something else. Git doesn't handle this difference very well
if the file is encoded in XYZ order (it'd handle it exceptionally well if
the file was in YXZ or YZX order), and Z-order curves aren't much better.
It's even worse if the file is compressed.

Maybe look into smudge filters to store the data in a different format
inside Git
than what is in the working tree.

https://git-scm.com/docs/gitattributes


Those can only do so much. The best it can do is re-encode voxel data 
into Z-order curves, or recompress files into pseudo-compressed format 
(valid gzip file but every block is of type "uncompressed data").


I want something that can say "this file is 3D and we change a line of 
voxels here".


Re: Delta compression hooks

2017-05-04 Thread Stefan Beller
On Thu, May 4, 2017 at 3:48 PM, Soni L.  wrote:
> Can we get delta compression hooks? I'm working with voxel data as my source
> code (it's a long story...) and git doesn't handle changing lines of voxels
> very well.
>
> Example, I have something from position (0,0,0) to position (0,10,0) and I
> replace it with something else. Git doesn't handle this difference very well
> if the file is encoded in XYZ order (it'd handle it exceptionally well if
> the file was in YXZ or YZX order), and Z-order curves aren't much better.
> It's even worse if the file is compressed.

Maybe look into smudge filters to store the data in a different format
inside Git
than what is in the working tree.

https://git-scm.com/docs/gitattributes


Delta compression hooks

2017-05-04 Thread Soni L.
Can we get delta compression hooks? I'm working with voxel data as my 
source code (it's a long story...) and git doesn't handle changing lines 
of voxels very well.


Example, I have something from position (0,0,0) to position (0,10,0) and 
I replace it with something else. Git doesn't handle this difference 
very well if the file is encoded in XYZ order (it'd handle it 
exceptionally well if the file was in YXZ or YZX order), and Z-order 
curves aren't much better. It's even worse if the file is compressed.


The hooks/plugins would be required on the sending end and on the 
receiving end, but bare repos wouldn't need to know about them (github 
could show "unknown diff" or something). This is good because such 
plugins would effectively be machine code, so you don't want bare repos 
running them.


Other source-y things that would benefit from this:

- 2D image source code http://esolangs.org/wiki/Piet
- photoshop files
- database files (well uh, I guess this is more "data" than "source"... 
I'm sure someone out there is storing source code in a database, tho)


[PATCH 6/7] fixup! compat/regex: update the gawk regex engine from upstream

2017-05-04 Thread Ævar Arnfjörð Bjarmason
---
 compat/regex/regex.h   | 120 +---
 compat/regex/regexec.c | 242 +
 2 files changed, 193 insertions(+), 169 deletions(-)

diff --git a/compat/regex/regex.h b/compat/regex/regex.h
index 61c9683872..b602b5567f 100644
--- a/compat/regex/regex.h
+++ b/compat/regex/regex.h
@@ -1,10 +1,13 @@
-#include 
-#include 
+/*
+ * This is git.git's copy of gawk.git's regex engine. Please see that
+ * project for the latest version & to submit patches to this code,
+ * and git.git's compat/regex/README for information on how git's copy
+ * of this code is maintained.
+ */
 
 /* Definitions for data structures and routines for the regular
expression library.
-   Copyright (C) 1985,1989-93,1995-98,2000,2001,2002,2003,2005,2006,2008
-   Free Software Foundation, Inc.
+   Copyright (C) 1985, 1989-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
 
The GNU C Library is free software; you can redistribute it and/or
@@ -18,9 +21,8 @@
Lesser General Public License for more details.
 
You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301 USA.  */
+   License along with the GNU C Library; if not, see
+   .  */
 
 #ifndef _REGEX_H
 #define _REGEX_H 1
@@ -75,10 +77,10 @@ typedef unsigned long int reg_syntax_t;
 /* If this bit is set, then ^ and $ are always anchors (outside bracket
  expressions, of course).
If this bit is not set, then it depends:
-^  is an anchor if it is at the beginning of a regular
-   expression or after an open-group or an alternation operator;
-$  is an anchor if it is at the end of a regular expression, or
-   before a close-group or an alternation operator.
+   ^  is an anchor if it is at the beginning of a regular
+  expression or after an open-group or an alternation operator;
+   $  is an anchor if it is at the end of a regular expression, or
+  before a close-group or an alternation operator.
 
This bit could be (re)combined with RE_CONTEXT_INDEP_OPS, because
POSIX draft 11.2 says that * etc. in leading positions is undefined.
@@ -158,10 +160,18 @@ typedef unsigned long int reg_syntax_t;
If not set, then the GNU regex operators are recognized. */
 # define RE_NO_GNU_OPS (RE_NO_POSIX_BACKTRACKING << 1)
 
+/* If this bit is set, turn on internal regex debugging.
+   If not set, and debugging was on, turn it off.
+   This only works if regex.c is compiled -DDEBUG.
+   We define this bit always, so that all that's needed to turn on
+   debugging is to recompile regex.c; the calling code can always have
+   this bit set, and it won't affect anything in the normal case. */
+# define RE_DEBUG (RE_NO_GNU_OPS << 1)
+
 /* If this bit is set, a syntactically invalid interval is treated as
a string of ordinary characters.  For example, the ERE 'a{1' is
treated as 'a\{1'.  */
-# define RE_INVALID_INTERVAL_ORD (RE_NO_GNU_OPS << 1)
+# define RE_INVALID_INTERVAL_ORD (RE_DEBUG << 1)
 
 /* If this bit is set, then ignore case when matching.
If not set, then case is significant.  */
@@ -178,7 +188,7 @@ typedef unsigned long int reg_syntax_t;
 
 /* If this bit is set, then no_sub will be set to 1 during
re_compile_pattern.  */
-#define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
+# define RE_NO_SUB (RE_CONTEXT_INVALID_DUP << 1)
 #endif
 
 /* This global variable defines the particular regexp syntax to use (for
@@ -199,13 +209,14 @@ extern reg_syntax_t re_syntax_options;
| RE_NO_BK_PARENS  | RE_NO_BK_REFS  \
| RE_NO_BK_VBAR| RE_NO_EMPTY_RANGES \
| RE_DOT_NEWLINE  | RE_CONTEXT_INDEP_ANCHORS\
+   | RE_CHAR_CLASSES   \
| RE_UNMATCHED_RIGHT_PAREN_ORD | RE_NO_GNU_OPS)
 
 #define RE_SYNTAX_GNU_AWK  \
   ((RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS\
-   | RE_INVALID_INTERVAL_ORD)  \
+| RE_INVALID_INTERVAL_ORD) \
& ~(RE_DOT_NOT_NULL | RE_CONTEXT_INDEP_OPS  \
-   | RE_CONTEXT_INVALID_OPS ))
+  | RE_CONTEXT_INVALID_OPS ))
 
 #define RE_SYNTAX_POSIX_AWK\
   (RE_SYNTAX_POSIX_EXTENDED | RE_BACKSLASH_ESCAPE_IN_LISTS \
@@ -323,7 +334,7 @@ typedef enum
   /* POSIX regcomp return error codes.  (In the order listed in the
  standard.)  */
   REG_BADPAT,  /* Invalid pattern.  */
-  REG_ECOLLATE,/* Inalid collating element.  */
+  REG_ECOLLATE,/* Invalid collating element.  */
   REG_ECTYPE,  /* Invalid 

[PATCH 7/7] fixup! compat/regex: update the gawk regex engine from upstream

2017-05-04 Thread Ævar Arnfjörð Bjarmason
---
 compat/regex/intprops.h | 448 
 compat/regex/verify.h   | 286 +++
 2 files changed, 734 insertions(+)
 create mode 100644 compat/regex/intprops.h
 create mode 100644 compat/regex/verify.h

diff --git a/compat/regex/intprops.h b/compat/regex/intprops.h
new file mode 100644
index 00..29f7f40837
--- /dev/null
+++ b/compat/regex/intprops.h
@@ -0,0 +1,448 @@
+/*
+ * This is git.git's copy of gawk.git's regex engine. Please see that
+ * project for the latest version & to submit patches to this code,
+ * and git.git's compat/regex/README for information on how git's copy
+ * of this code is maintained.
+ */
+
+/* intprops.h -- properties of integer types
+
+   Copyright (C) 2001-2016 Free Software Foundation, Inc.
+
+   This program is free software: you can redistribute it and/or modify it
+   under the terms of the GNU Lesser General Public License as published
+   by the Free Software Foundation; either version 2.1 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU Lesser General Public License for more details.
+
+   You should have received a copy of the GNU Lesser General Public License
+   along with this program.  If not, see .  */
+
+/* Written by Paul Eggert.  */
+
+#ifndef _GL_INTPROPS_H
+#define _GL_INTPROPS_H
+
+#include 
+
+#ifndef __has_builtin
+# define __has_builtin(x) 0
+#endif
+
+/* Return a value with the common real type of E and V and the value of V.  */
+#define _GL_INT_CONVERT(e, v) (0 * (e) + (v))
+
+/* Act like _GL_INT_CONVERT (E, -V) but work around a bug in IRIX 6.5 cc; see
+   .  */
+#define _GL_INT_NEGATE_CONVERT(e, v) (0 * (e) - (v))
+
+/* The extra casts in the following macros work around compiler bugs,
+   e.g., in Cray C 5.0.3.0.  */
+
+/* True if the arithmetic type T is an integer type.  bool counts as
+   an integer.  */
+#define TYPE_IS_INTEGER(t) ((t) 1.5 == 1)
+
+/* True if the real type T is signed.  */
+#define TYPE_SIGNED(t) (! ((t) 0 < (t) -1))
+
+/* Return 1 if the real expression E, after promotion, has a
+   signed or floating type.  */
+#define EXPR_SIGNED(e) (_GL_INT_NEGATE_CONVERT (e, 1) < 0)
+
+
+/* Minimum and maximum values for integer types and expressions.  */
+
+/* The width in bits of the integer type or expression T.
+   Padding bits are not supported; this is checked at compile-time below.  */
+#define TYPE_WIDTH(t) (sizeof (t) * CHAR_BIT)
+
+/* The maximum and minimum values for the integer type T.  */
+#define TYPE_MINIMUM(t) ((t) ~ TYPE_MAXIMUM (t))
+#define TYPE_MAXIMUM(t) \
+  ((t) (! TYPE_SIGNED (t)   \
+? (t) -1\
+: t) 1 << (TYPE_WIDTH (t) - 2)) - 1) * 2 + 1)))
+
+/* The maximum and minimum values for the type of the expression E,
+   after integer promotion.  E should not have side effects.  */
+#define _GL_INT_MINIMUM(e)  \
+  (EXPR_SIGNED (e)  \
+   ? ~ _GL_SIGNED_INT_MAXIMUM (e)   \
+   : _GL_INT_CONVERT (e, 0))
+#define _GL_INT_MAXIMUM(e)  \
+  (EXPR_SIGNED (e)  \
+   ? _GL_SIGNED_INT_MAXIMUM (e) \
+   : _GL_INT_NEGATE_CONVERT (e, 1))
+#define _GL_SIGNED_INT_MAXIMUM(e)   \
+  (((_GL_INT_CONVERT (e, 1) << (TYPE_WIDTH ((e) + 0) - 2)) - 1) * 2 + 1)
+
+/* Work around OpenVMS incompatibility with C99.  */
+#if !defined LLONG_MAX && defined __INT64_MAX
+# define LLONG_MAX __INT64_MAX
+# define LLONG_MIN __INT64_MIN
+#endif
+
+/* Does the __typeof__ keyword work?  This could be done by
+   'configure', but for now it's easier to do it by hand.  */
+#if (2 <= __GNUC__ \
+ || (1210 <= __IBMC__ && defined __IBM__TYPEOF__) \
+ || (0x5110 <= __SUNPRO_C && !__STDC__))
+# define _GL_HAVE___TYPEOF__ 1
+#else
+# define _GL_HAVE___TYPEOF__ 0
+#endif
+
+/* Return 1 if the integer type or expression T might be signed.  Return 0
+   if it is definitely unsigned.  This macro does not evaluate its argument,
+   and expands to an integer constant expression.  */
+#if _GL_HAVE___TYPEOF__
+# define _GL_SIGNED_TYPE_OR_EXPR(t) TYPE_SIGNED (__typeof__ (t))
+#else
+# define _GL_SIGNED_TYPE_OR_EXPR(t) 1
+#endif
+
+/* Bound on length of the string representing an unsigned integer
+   value representable in B bits.  log10 (2.0) < 146/485.  The
+   smallest value of B where this bound is not tight is 

[PATCH 3/7] fixup! compat/regex: update the gawk regex engine from upstream

2017-05-04 Thread Ævar Arnfjörð Bjarmason
---
 compat/regex/regcomp.c | 356 +
 1 file changed, 209 insertions(+), 147 deletions(-)

diff --git a/compat/regex/regcomp.c b/compat/regex/regcomp.c
index d8bde06f1a..a1fb2e400e 100644
--- a/compat/regex/regcomp.c
+++ b/compat/regex/regcomp.c
@@ -1,5 +1,12 @@
+/*
+ * This is git.git's copy of gawk.git's regex engine. Please see that
+ * project for the latest version & to submit patches to this code,
+ * and git.git's compat/regex/README for information on how git's copy
+ * of this code is maintained.
+ */
+
 /* Extended regular expression matching and search library.
-   Copyright (C) 2002-2007,2009,2010 Free Software Foundation, Inc.
+   Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa .
 
@@ -14,9 +21,20 @@
Lesser General Public License for more details.
 
You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301 USA.  */
+   License along with the GNU C Library; if not, see
+   .  */
+
+#ifdef HAVE_STDINT_H
+#include 
+#endif
+
+#ifdef HAVE_STRINGS_H
+#include 
+#endif
+
+#ifdef _LIBC
+# include 
+#endif
 
 static reg_errcode_t re_compile_internal (regex_t *preg, const char * pattern,
  size_t length, reg_syntax_t syntax);
@@ -126,7 +144,7 @@ static reg_errcode_t mark_opt_subexp (void *extra, 
bin_tree_t *node);
POSIX doesn't require that we do anything for REG_NOERROR,
but why not be nice?  */
 
-const char __re_error_msgid[] attribute_hidden =
+static const char __re_error_msgid[] =
   {
 #define REG_NOERROR_IDX0
 gettext_noop ("Success")   /* REG_NOERROR */
@@ -150,9 +168,9 @@ const char __re_error_msgid[] attribute_hidden =
 gettext_noop ("Invalid back reference") /* REG_ESUBREG */
 "\0"
 #define REG_EBRACK_IDX (REG_ESUBREG_IDX + sizeof "Invalid back reference")
-gettext_noop ("Unmatched [ or [^") /* REG_EBRACK */
+gettext_noop ("Unmatched [, [^, [:, [., or [=")/* REG_EBRACK */
 "\0"
-#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [ or [^")
+#define REG_EPAREN_IDX (REG_EBRACK_IDX + sizeof "Unmatched [, [^, [:, [., or 
[=")
 gettext_noop ("Unmatched ( or \\(") /* REG_EPAREN */
 "\0"
 #define REG_EBRACE_IDX (REG_EPAREN_IDX + sizeof "Unmatched ( or \\(")
@@ -180,7 +198,7 @@ const char __re_error_msgid[] attribute_hidden =
 gettext_noop ("Unmatched ) or \\)") /* REG_ERPAREN */
   };
 
-const size_t __re_error_msgid_idx[] attribute_hidden =
+static const size_t __re_error_msgid_idx[] =
   {
 REG_NOERROR_IDX,
 REG_NOMATCH_IDX,
@@ -204,20 +222,19 @@ const size_t __re_error_msgid_idx[] attribute_hidden =
 /* Entry points for GNU code.  */
 
 
-#ifdef ZOS_USS
-
-/* For ZOS USS we must define btowc */
-
-wchar_t 
+#ifndef HAVE_BTOWC
+wchar_t
 btowc (int c)
 {
wchar_t wtmp[2];
char tmp[2];
+   mbstate_t mbs;
 
+   memset(& mbs, 0, sizeof(mbs));
tmp[0] = c;
tmp[1] = 0;
 
-   mbtowc (wtmp, tmp, 1);
+   mbrtowc (wtmp, tmp, 1, & mbs);
return wtmp[0];
 }
 #endif
@@ -226,12 +243,11 @@ btowc (int c)
compiles PATTERN (of length LENGTH) and puts the result in BUFP.
Returns 0 if the pattern was valid, otherwise an error string.
 
-   Assumes the `allocated' (and perhaps `buffer') and `translate' fields
+   Assumes the 'allocated' (and perhaps 'buffer') and 'translate' fields
are set in BUFP on entry.  */
 
 const char *
-re_compile_pattern (const char *pattern,
-   size_t length,
+re_compile_pattern (const char *pattern, size_t length,
struct re_pattern_buffer *bufp)
 {
   reg_errcode_t ret;
@@ -254,7 +270,7 @@ re_compile_pattern (const char *pattern,
 weak_alias (__re_compile_pattern, re_compile_pattern)
 #endif
 
-/* Set by `re_set_syntax' to the current regexp syntax to recognize.  Can
+/* Set by 're_set_syntax' to the current regexp syntax to recognize.  Can
also be assigned to arbitrarily: each pattern buffer stores its own
syntax, so it can be changed between regex compilations.  */
 /* This has no initializer because initialized variables in Emacs
@@ -303,8 +319,8 @@ weak_alias (__re_compile_fastmap, re_compile_fastmap)
 #endif
 
 static inline void
-__attribute ((always_inline))
-re_set_fastmap (char *fastmap, int icase, int ch)
+__attribute__ ((always_inline))
+re_set_fastmap (char *fastmap, bool icase, int ch)
 {
   fastmap[ch] = 1;
   if (icase)
@@ -318,7 +334,7 @@ static void
 re_compile_fastmap_iter (regex_t *bufp, const re_dfastate_t *init_state,
 char *fastmap)
 {
-  volatile re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
+  re_dfa_t *dfa = (re_dfa_t *) bufp->buffer;
   int node_cnt;
   int icase = (dfa->mb_cur_max == 1 && (bufp->syntax & 

[PATCH 5/7] fixup! compat/regex: update the gawk regex engine from upstream

2017-05-04 Thread Ævar Arnfjörð Bjarmason
---
 compat/regex/regex_internal.c | 118 +-
 compat/regex/regex_internal.h | 118 ++
 2 files changed, 144 insertions(+), 92 deletions(-)

diff --git a/compat/regex/regex_internal.c b/compat/regex/regex_internal.c
index d4121f2f4f..6d766114a1 100644
--- a/compat/regex/regex_internal.c
+++ b/compat/regex/regex_internal.c
@@ -1,5 +1,12 @@
+/*
+ * This is git.git's copy of gawk.git's regex engine. Please see that
+ * project for the latest version & to submit patches to this code,
+ * and git.git's compat/regex/README for information on how git's copy
+ * of this code is maintained.
+ */
+
 /* Extended regular expression matching and search library.
-   Copyright (C) 2002-2006, 2010 Free Software Foundation, Inc.
+   Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa .
 
@@ -14,9 +21,8 @@
Lesser General Public License for more details.
 
You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301 USA.  */
+   License along with the GNU C Library; if not, see
+   .  */
 
 static void re_string_construct_common (const char *str, int len,
re_string_t *pstr,
@@ -45,7 +51,7 @@ MAX(size_t a, size_t b)
re_string_reconstruct before using the object.  */
 
 static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
 re_string_allocate (re_string_t *pstr, const char *str, int len, int init_len,
RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
 {
@@ -73,7 +79,7 @@ re_string_allocate (re_string_t *pstr, const char *str, int 
len, int init_len,
 /* This function allocate the buffers, and initialize them.  */
 
 static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
 re_string_construct (re_string_t *pstr, const char *str, int len,
 RE_TRANSLATE_TYPE trans, int icase, const re_dfa_t *dfa)
 {
@@ -136,7 +142,7 @@ re_string_construct (re_string_t *pstr, const char *str, 
int len,
 /* Helper functions for re_string_allocate, and re_string_construct.  */
 
 static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
 re_string_realloc_buffers (re_string_t *pstr, int new_buf_len)
 {
 #ifdef RE_ENABLE_I18N
@@ -246,13 +252,8 @@ build_wcs_buffer (re_string_t *pstr)
   else
p = (const char *) pstr->raw_mbs + pstr->raw_mbs_idx + byte_idx;
   mbclen = __mbrtowc (, p, remain_len, >cur_state);
-  if (BE (mbclen == (size_t) -2, 0))
-   {
- /* The buffer doesn't have enough space, finish to build.  */
- pstr->cur_state = prev_st;
- break;
-   }
-  else if (BE (mbclen == (size_t) -1 || mbclen == 0, 0))
+  if (BE (mbclen == (size_t) -1 || mbclen == 0
+ || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len), 0))
{
  /* We treat these cases as a singlebyte character.  */
  mbclen = 1;
@@ -261,6 +262,12 @@ build_wcs_buffer (re_string_t *pstr)
wc = pstr->trans[wc];
  pstr->cur_state = prev_st;
}
+  else if (BE (mbclen == (size_t) -2, 0))
+   {
+ /* The buffer doesn't have enough space, finish to build.  */
+ pstr->cur_state = prev_st;
+ break;
+   }
 
   /* Write wide character and padding.  */
   pstr->wcs[byte_idx++] = wc;
@@ -276,7 +283,7 @@ build_wcs_buffer (re_string_t *pstr)
but for REG_ICASE.  */
 
 static reg_errcode_t
-internal_function
+internal_function __attribute_warn_unused_result__
 build_wcs_upper_buffer (re_string_t *pstr)
 {
   mbstate_t prev_st;
@@ -326,7 +333,7 @@ build_wcs_upper_buffer (re_string_t *pstr)
  size_t mbcdlen;
 
  wcu = towupper (wc);
- mbcdlen = wcrtomb (buf, wcu, _st);
+ mbcdlen = __wcrtomb (buf, wcu, _st);
  if (BE (mbclen == mbcdlen, 1))
memcpy (pstr->mbs + byte_idx, buf, mbclen);
  else
@@ -343,9 +350,11 @@ build_wcs_upper_buffer (re_string_t *pstr)
  for (remain_len = byte_idx + mbclen - 1; byte_idx < remain_len ;)
pstr->wcs[byte_idx++] = WEOF;
}
- else if (mbclen == (size_t) -1 || mbclen == 0)
+ else if (mbclen == (size_t) -1 || mbclen == 0
+  || (mbclen == (size_t) -2 && pstr->bufs_len >= pstr->len))
{
- /* It is an invalid character or '\0'.  Just use the byte.  */
+ /* It is an invalid character, an incomplete character
+at the end of the string, or '\0'.  Just use the byte.  */
  int ch = 

[PATCH 4/7] fixup! compat/regex: update the gawk regex engine from upstream

2017-05-04 Thread Ævar Arnfjörð Bjarmason
---
 compat/regex/regex.c | 32 ++--
 1 file changed, 18 insertions(+), 14 deletions(-)

diff --git a/compat/regex/regex.c b/compat/regex/regex.c
index 5cb23e5d59..d6e525e567 100644
--- a/compat/regex/regex.c
+++ b/compat/regex/regex.c
@@ -1,5 +1,12 @@
+/*
+ * This is git.git's copy of gawk.git's regex engine. Please see that
+ * project for the latest version & to submit patches to this code,
+ * and git.git's compat/regex/README for information on how git's copy
+ * of this code is maintained.
+ */
+
 /* Extended regular expression matching and search library.
-   Copyright (C) 2002, 2003, 2005 Free Software Foundation, Inc.
+   Copyright (C) 2002-2016 Free Software Foundation, Inc.
This file is part of the GNU C Library.
Contributed by Isamu Hasegawa .
 
@@ -14,15 +21,14 @@
Lesser General Public License for more details.
 
You should have received a copy of the GNU Lesser General Public
-   License along with the GNU C Library; if not, write to the Free
-   Software Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA
-   02110-1301 USA.  */
+   License along with the GNU C Library; if not, see
+   .  */
 
 #ifdef HAVE_CONFIG_H
 #include "config.h"
 #endif
 
-/* Make sure no one compiles this code with a C++ compiler.  */
+/* Make sure noone compiles this code with a C++ compiler.  */
 #ifdef __cplusplus
 # error "This is C code, use a C compiler"
 #endif
@@ -52,15 +58,15 @@
 # include "../locale/localeinfo.h"
 #endif
 
-#if defined (_MSC_VER)
-#include  /* for size_t */
-#endif
-
 /* On some systems, limits.h sets RE_DUP_MAX to a lower value than
GNU regex allows.  Include it before , which correctly
#undefs RE_DUP_MAX and sets it to the right value.  */
 #include 
-#include 
+
+/* This header defines the MIN and MAX macros.  */
+#ifdef HAVE_SYS_PARAM_H
+#include 
+#endif /* HAVE_SYS_PARAM_H */
 
 #ifdef GAWK
 #undef alloca
@@ -70,10 +76,8 @@
 #include "regex_internal.h"
 
 #include "regex_internal.c"
-#ifdef GAWK
-#define bool int
-#define true (1)
-#define false (0)
+#ifndef HAVE_STDBOOL_H
+#include "missing_d/gawkbool.h"
 #endif
 #include "regcomp.c"
 #include "regexec.c"
-- 
2.11.0



[PATCH 2/7] compat/regex: update the gawk regex engine from upstream

2017-05-04 Thread Ævar Arnfjörð Bjarmason
Update the gawk regex engine from the upstream gawk.git as detailed in
the README added in a previous change.

This is from gawk.git's gawk-4.1.0-2558-gb2651a80 which is the same
code as in the stable gawk-4.1.4 release, but with one trivial change
on top added in commit 725d2f78 ("Add small regex fix. Add support
directory.", 2016-12-22)[1]

The two patches applied on top of the upstream engine are to,
respectively:

 * Add a notice at the top of each file saying that this copy is
   maintained by the Git project.f

 * Remove the dependency on gawk's verify.h. The library compiles
   as-is when this header file is present, but unfortunately it's
   under GPL v3, unlike the rest of the files which is under LGPL 2.1
   or later.

The changes made in commit a997bf423d ("compat/regex: get the gawk
regex engine to compile within git", 2010-08-17) turned out to be
redundant to achieving the same with defining a few flags to make the
code itself do similar things.

In addition the -DNO_MBSUPPORT flag is not needed, upstream removed
the code that relied on that. It's possible that either -DHAVE_BTOWC
or -D_GNU_SOURCE could cause some problems on non-GNU systems.

The -DHAVE_BTOWC flag indicates that wchar.h has a btowc(3). This
function is defined in POSIX.1-2001 & C99 and later.

The -D_GNU_SOURCE flag is needed because the library itself does:

#ifndef _LIBC
#define __USE_GNU   1
#endif

Which is subsequently picked up by GNU C library headers:

In file included from compat/regex/regex_internal.h:32:0,
 from compat/regex/regex.c:76:
/usr/include/stdio.h:316:6: error: unknown type name 
‘_IO_cookie_io_functions_t’; did you mean ‘__fortify_function’?
  _IO_cookie_io_functions_t __io_funcs) __THROW __wur;
  ^

1. http://git.savannah.gnu.org/cgit/gawk.git/commit/?id=725d2f78

Signed-off-by: Ævar Arnfjörð Bjarmason 
---
 Makefile   |   8 +-
 .../0001-Add-notice-at-top-of-copied-files.patch   | 120 +
 .../0002-Remove-verify.h-use-from-intprops.h.patch |  41 +++
 3 files changed, 168 insertions(+), 1 deletion(-)
 create mode 100644 
compat/regex/patches/0001-Add-notice-at-top-of-copied-files.patch
 create mode 100644 
compat/regex/patches/0002-Remove-verify.h-use-from-intprops.h.patch

diff --git a/Makefile b/Makefile
index e35542e631..6235e1b954 100644
--- a/Makefile
+++ b/Makefile
@@ -2060,7 +2060,13 @@ endif
 
 ifdef NO_REGEX
 compat/regex/regex.sp compat/regex/regex.o: EXTRA_CPPFLAGS = \
-   -DGAWK -DNO_MBSUPPORT
+   -DGAWK \
+   -DHAVE_WCHAR_H \
+   -DHAVE_WCTYPE_H \
+   -DHAVE_STDDEF_H \
+   -DHAVE_STDBOOL_H \
+   -DHAVE_BTOWC \
+   -D_GNU_SOURCE
 endif
 
 ifdef USE_NED_ALLOCATOR
diff --git a/compat/regex/patches/0001-Add-notice-at-top-of-copied-files.patch 
b/compat/regex/patches/0001-Add-notice-at-top-of-copied-files.patch
new file mode 100644
index 00..4b4acc45ba
--- /dev/null
+++ b/compat/regex/patches/0001-Add-notice-at-top-of-copied-files.patch
@@ -0,0 +1,120 @@
+diff --git a/compat/regex/intprops.h b/compat/regex/intprops.h
+index 716741adc5..2aef98d290 100644
+--- a/compat/regex/intprops.h
 b/compat/regex/intprops.h
+@@ -1,3 +1,10 @@
++/*
++ * This is git.git's copy of gawk.git's regex engine. Please see that
++ * project for the latest version & to submit patches to this code,
++ * and git.git's compat/regex/README for information on how git's copy
++ * of this code is maintained.
++ */
++
+ /* intprops.h -- properties of integer types
+ 
+Copyright (C) 2001-2016 Free Software Foundation, Inc.
+diff --git a/compat/regex/regcomp.c b/compat/regex/regcomp.c
+index 5ac5370142..a1fb2e400e 100644
+--- a/compat/regex/regcomp.c
 b/compat/regex/regcomp.c
+@@ -1,3 +1,10 @@
++/*
++ * This is git.git's copy of gawk.git's regex engine. Please see that
++ * project for the latest version & to submit patches to this code,
++ * and git.git's compat/regex/README for information on how git's copy
++ * of this code is maintained.
++ */
++
+ /* Extended regular expression matching and search library.
+Copyright (C) 2002-2016 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+diff --git a/compat/regex/regex.c b/compat/regex/regex.c
+index 9f133fab84..d6e525e567 100644
+--- a/compat/regex/regex.c
 b/compat/regex/regex.c
+@@ -1,3 +1,10 @@
++/*
++ * This is git.git's copy of gawk.git's regex engine. Please see that
++ * project for the latest version & to submit patches to this code,
++ * and git.git's compat/regex/README for information on how git's copy
++ * of this code is maintained.
++ */
++
+ /* Extended regular expression matching and search library.
+Copyright (C) 2002-2016 Free Software Foundation, Inc.
+This file is part of the GNU C Library.
+diff --git a/compat/regex/regex.h b/compat/regex/regex.h
+index 143b3afa89..b602b5567f 100644
+--- a/compat/regex/regex.h

[PATCH 1/7] compat/regex: add a README with a maintenance guide

2017-05-04 Thread Ævar Arnfjörð Bjarmason
Add a README file to compat/regex describing how the copy of the gawk
engine should be maintained.

Since gawk's regex engine was originally imported in git.git in commit
d18f76dccf ("compat/regex: use the regex engine from gawk for compat",
2010-08-17) the Git project has forked the upstream code.

Most of the changes that have been made in that time have been made
redundant by similar changes made upstream. Out of all the
modifications made to it since then, which can be found via:

$ git log --oneline d18f76dccf..v2.13.0-rc2 -- compat/regex/

These are the only real code changes that aren't made fully redundant
by upstream patches:

ce518bbd6c ("Fix compat/regex ANSIfication on MinGW", 2010-08-26)
5b62e6374a ("compat/regex/regexec.c: Fix some sparse warnings", 2013-04-27)
d099b7173d ("Fix some sparse warnings", 2013-07-18)

These look to me like they might be a non-issue due to subsequent
changes, or perhaps aren't needed anymore due to compiler updates.

In addition a few style & typo changes have been made in that time:

ce9171cd63 ("compat/regex: fix spelling and grammar in comments", 
2013-04-12)
749f763dbb ("typofix: in-code comments", 2013-07-22)
c01499ef69 ("C: have space around && and || operators", 2013-10-16)

Some of these could still be applied, but I don't see any point in
doing so. These are typo & style nits, if anyone really cares that
much they should send updates to gawk.git instead of making the
re-merging of code into git.git harder over such trivial issues.

Signed-off-by: Ævar Arnfjörð Bjarmason 
---
 compat/regex/README | 21 +
 1 file changed, 21 insertions(+)
 create mode 100644 compat/regex/README

diff --git a/compat/regex/README b/compat/regex/README
new file mode 100644
index 00..345d322d8c
--- /dev/null
+++ b/compat/regex/README
@@ -0,0 +1,21 @@
+This is the Git project's copy of the GNU awk (Gawk) regex
+engine. It's used when Git is build with e.g. NO_REGEX=NeedsStartEnd,
+or when the C library's regular expression functions are otherwise
+deficient.
+
+This is not a fork, but a source code copy. Upstream is the Gawk
+project, and the sources should be periodically updated from their
+copy, which can be done with:
+
+for f in $(find . -name '*.[ch]' -printf "%f\n"); do wget 
http://git.savannah.gnu.org/cgit/gawk.git/plain/support/$f -O $f; done
+
+For ease of maintenance, and to intentionally make it inconvenient to
+diverge from upstream (since it makes it harder to re-merge) any local
+changes should be stored in the patches/ directory, which after doing
+the above can be applied as:
+
+for p in patches/*; do patch -p3 < $p; done
+
+For any changes that aren't specific to the git.git copy please submit
+a patch to the Gawk project and/or to the GNU C library (the Gawk
+regex engine is a periodically & forked copy from glibc.git).
-- 
2.11.0



[PATCH 0/7] Update the compat/regex engine from upstream

2017-05-04 Thread Ævar Arnfjörð Bjarmason
See the first patch for motivation & why.

The only reason this has a cover letter is to explain the !fixup
commits. IIRC the mailing list has a 100K limit, which this series
would violate, so I split up the second commit.

Consider all these !fixup commits to have by Signed-off-by, easier to
say that here than to modify them all.

Ævar Arnfjörð Bjarmason (7):
  compat/regex: add a README with a maintenance guide
  compat/regex: update the gawk regex engine from upstream
  fixup! compat/regex: update the gawk regex engine from upstream
  fixup! compat/regex: update the gawk regex engine from upstream
  fixup! compat/regex: update the gawk regex engine from upstream
  fixup! compat/regex: update the gawk regex engine from upstream
  fixup! compat/regex: update the gawk regex engine from upstream

 Makefile   |   8 +-
 compat/regex/README|  21 +
 compat/regex/intprops.h| 448 +
 .../0001-Add-notice-at-top-of-copied-files.patch   | 120 ++
 .../0002-Remove-verify.h-use-from-intprops.h.patch |  41 ++
 compat/regex/regcomp.c | 356 +---
 compat/regex/regex.c   |  32 +-
 compat/regex/regex.h   | 120 +++---
 compat/regex/regex_internal.c  | 118 +++---
 compat/regex/regex_internal.h  | 118 +++---
 compat/regex/regexec.c | 242 +--
 compat/regex/verify.h  | 286 +
 12 files changed, 1487 insertions(+), 423 deletions(-)
 create mode 100644 compat/regex/README
 create mode 100644 compat/regex/intprops.h
 create mode 100644 
compat/regex/patches/0001-Add-notice-at-top-of-copied-files.patch
 create mode 100644 
compat/regex/patches/0002-Remove-verify.h-use-from-intprops.h.patch
 create mode 100644 compat/regex/verify.h

-- 
2.11.0



Re: [PATCH] fast-export: deletion action first

2017-05-04 Thread miguel torroja
The previous patch reorders the delete operations in fast-export
(preceding any other one), keeps renaming as last operations to
process (as original source code) and for any other operation it keeps
the same order as "diff"

The non deterministic reordering was one of the concerns when I first
sent the patch.
The behavior for the other corner cases pointed out by Jeff
(delete/rename dir/file ) are not tackled in this patch and the final
result is unknown.


On Thu, May 4, 2017 at 9:36 PM, Miguel Torroja  wrote:
>
> The delete operations of the fast-export output should precede any addition
> belonging to the same commit, Addition and deletion with the same name
> entry could happen in case of file to directory and viceversa.
>
> As an equal comparison doesn't have any deterministic final order,
> it's better to keep original diff order input when there is no prefer order
> ( that's done comparing pointers)
>
> The fast-export sorting was added in 060df62 (fast-export: Fix output
> order of D/F changes). That change was made in order to fix the case of
> directory to file in the same commit, but it broke the reverse case
> (File to directory).
>
> The test "file becomes directory" has been added in order to exercise
> the original motivation of the deletion reorder.
>
> Signed-off-by: Miguel Torroja 
> ---
>  builtin/fast-export.c  | 32 +++-
>  t/t9350-fast-export.sh | 25 +
>  2 files changed, 40 insertions(+), 17 deletions(-)
>
> diff --git a/builtin/fast-export.c b/builtin/fast-export.c
> index e022063..e82f654 100644
> --- a/builtin/fast-export.c
> +++ b/builtin/fast-export.c
> @@ -260,26 +260,19 @@ static void export_blob(const struct object_id *oid)
> free(buf);
>  }
>
> -static int depth_first(const void *a_, const void *b_)
> +/*
> + * Compares two diff types to order based on output priorities.
> + */
> +static int diff_type_cmp(const void *a_, const void *b_)
>  {
> const struct diff_filepair *a = *((const struct diff_filepair **)a_);
> const struct diff_filepair *b = *((const struct diff_filepair **)b_);
> -   const char *name_a, *name_b;
> -   int len_a, len_b, len;
> int cmp;
>
> -   name_a = a->one ? a->one->path : a->two->path;
> -   name_b = b->one ? b->one->path : b->two->path;
> -
> -   len_a = strlen(name_a);
> -   len_b = strlen(name_b);
> -   len = (len_a < len_b) ? len_a : len_b;
> -
> -   /* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */
> -   cmp = memcmp(name_a, name_b, len);
> -   if (cmp)
> -   return cmp;
> -   cmp = len_b - len_a;
> +   /*
> +* Move Delete entries first so that an addition is always reported 
> after
> +*/
> +   cmp = (b->status == DIFF_STATUS_DELETED) - (a->status == 
> DIFF_STATUS_DELETED);
> if (cmp)
> return cmp;
> /*
> @@ -287,7 +280,12 @@ static int depth_first(const void *a_, const void *b_)
>  * appear in the output before it is renamed (e.g., when a file
>  * was copied and renamed in the same commit).
>  */
> -   return (a->status == 'R') - (b->status == 'R');
> +   cmp = (a->status == DIFF_STATUS_RENAMED) - (b->status == 
> DIFF_STATUS_RENAMED);
> +   if (cmp)
> +   return cmp;
> +
> +   /* For the remaining cases we keep the original ordering comparing 
> the pointers */
> +   return (a-b);
>  }
>
>  static void print_path_1(const char *path)
> @@ -347,7 +345,7 @@ static void show_filemodify(struct diff_queue_struct *q,
>  * Handle files below a directory first, in case they are all deleted
>  * and the directory changes to a file or symlink.
>  */
> -   QSORT(q->queue, q->nr, depth_first);
> +   QSORT(q->queue, q->nr, diff_type_cmp);
>
> for (i = 0; i < q->nr; i++) {
> struct diff_filespec *ospec = q->queue[i]->one;
> diff --git a/t/t9350-fast-export.sh b/t/t9350-fast-export.sh
> index b5149fd..d4f369a 100755
> --- a/t/t9350-fast-export.sh
> +++ b/t/t9350-fast-export.sh
> @@ -419,6 +419,31 @@ test_expect_success 'directory becomes symlink''
> (cd result && git show master:foo)
>  '
>
> +test_expect_success 'file becomes directory'  '
> +   git init filetodir_orig &&
> +   git init --bare filetodir_replica.git &&
> +   (
> +   cd filetodir_orig &&
> +   echo foo > filethendir &&
> +   git add filethendir &&
> +   test_tick &&
> +   git commit -mfile &&
> +   git rm filethendir &&
> +   mkdir filethendir &&
> +   echo bar > filethendir/a &&
> +   git add filethendir/a &&
> +   test_tick &&
> +   git commit -mdir
> +   ) &&
> +   git --git-dir=filetodir_orig/.git fast-export master  |
> +   git 

[RESEND PATCH] diff: recurse into nested submodules for inline diff

2017-05-04 Thread Stefan Beller
When fd47ae6a5b (diff: teach diff to display submodule difference with an
inline diff, 2016-08-31) was introduced, we did not think of recursing
into nested submodules.

When showing the inline diff for submodules, automatically recurse
into nested submodules as well with inline submodule diffs.

Signed-off-by: Stefan Beller 
---

This is a resend of 
https://public-inbox.org/git/20170331231733.11123-3-sbel...@google.com/

In the cover letter of that patch, I said I would want to redo the tests with
scrubbed output. However given the widespread use of unscrubbed output,
I think this is fine as-is to include.

Thanks,
Stefan

 submodule.c  |  3 +-
 t/t4060-diff-submodule-option-diff-format.sh | 41 
 2 files changed, 43 insertions(+), 1 deletion(-)

diff --git a/submodule.c b/submodule.c
index d3299e29c0..2d56a9562e 100644
--- a/submodule.c
+++ b/submodule.c
@@ -554,7 +554,8 @@ void show_submodule_inline_diff(FILE *f, const char *path,
cp.no_stdin = 1;
 
/* TODO: other options may need to be passed here. */
-   argv_array_push(, "diff");
+   argv_array_pushl(, "diff", "--submodule=diff", NULL);
+
argv_array_pushf(, "--line-prefix=%s", line_prefix);
if (DIFF_OPT_TST(o, REVERSE_DIFF)) {
argv_array_pushf(, "--src-prefix=%s%s/",
diff --git a/t/t4060-diff-submodule-option-diff-format.sh 
b/t/t4060-diff-submodule-option-diff-format.sh
index d4a3ffa69c..33ec26d755 100755
--- a/t/t4060-diff-submodule-option-diff-format.sh
+++ b/t/t4060-diff-submodule-option-diff-format.sh
@@ -775,4 +775,45 @@ test_expect_success 'diff --submodule=diff with moved 
nested submodule HEAD' '
test_cmp expected actual
 '
 
+test_expect_success 'diff --submodule=diff recurses into nested submodules' '
+   cat >expected <<-EOF &&
+   Submodule sm2 contains modified content
+   Submodule sm2 a5a65c9..280969a:
+   diff --git a/sm2/.gitmodules b/sm2/.gitmodules
+   new file mode 100644
+   index 000..3a816b8
+   --- /dev/null
+   +++ b/sm2/.gitmodules
+   @@ -0,0 +1,3 @@
+   +[submodule "nested"]
+   +   path = nested
+   +   url = ../sm2
+   Submodule nested 000...b55928c (new submodule)
+   diff --git a/sm2/nested/file b/sm2/nested/file
+   new file mode 100644
+   index 000..ca281f5
+   --- /dev/null
+   +++ b/sm2/nested/file
+   @@ -0,0 +1 @@
+   +nested content
+   diff --git a/sm2/nested/foo8 b/sm2/nested/foo8
+   new file mode 100644
+   index 000..db9916b
+   --- /dev/null
+   +++ b/sm2/nested/foo8
+   @@ -0,0 +1 @@
+   +foo8
+   diff --git a/sm2/nested/foo9 b/sm2/nested/foo9
+   new file mode 100644
+   index 000..9c3b4f6
+   --- /dev/null
+   +++ b/sm2/nested/foo9
+   @@ -0,0 +1 @@
+   +foo9
+   EOF
+   git diff --submodule=diff >actual 2>err &&
+   test_must_be_empty err &&
+   test_cmp expected actual
+'
+
 test_done
-- 
2.13.0.rc1.18.g9ce9a66034



not uptodate. Cannot merge

2017-05-04 Thread G. Sylvie Davies
Hi,

My little bitbucket "cherry-pick" button is failing on Windows from a
"git reset --hard" blowing up.

My situation:  Git-2.10.2.windows.1 / Bitbucket-4.14.3 / Windows
10-10.0-amd64.   But I suspect even more recent Git will have the same
problem.

Now, I'm pretty far from Kansas here as you'll see from my "git clone"
invocation:

git.exe clone -c core.checkStat=minimal -c core.fileMode=false -c
core.ignoreStat=true -c core.trustctime=false -c
core.logAllRefUpdates=false --shared
C:\Users\gsylvie\dev\bb\target\bitbucket\home\shared\data\repositories\1
C:\Users\gsylvie\dev\bb\target\bitbucket\home\caches\bbClones\1


Right after cloning I create a ".git/info/attributes" file containing
just this one line:

* -text


After the clone, here's the sequence of commands leading up to the bad
"git reset --hard".  These are all fine (well, the "--aborts" whine a
little, but that's expected):

git.exe branch --unset-upstream
git.exe update-index --refresh
git.exe rebase --abort
git.exe cherry-pick --abort


And here's the "git reset --hard" that fails:

git.exe reset --hard --quiet d6edcbf924697ab811a867421dab60d954ccad99 --

---
Exit=128
error: Entry 'basic_branching/file.txt' not uptodate. Cannot merge.
fatal: Could not reset index file to revision
'd6edcbf924697ab811a867421dab60d954ccad99'.
---

For now I've come up with an astonishing workaround:   I just run "git
status" afterwards, and then everything is fine!

I'm going to limp along with this remedy, but I thought I'd share it
here in case anyone has any ideas, or in case it really is a bug.


- Sylvie


p.s.  Thanks for mentioning "git-reverse.sh" in git rev news!


[PATCH] fast-export: deletion action first

2017-05-04 Thread Miguel Torroja
The delete operations of the fast-export output should precede any addition
belonging to the same commit, Addition and deletion with the same name
entry could happen in case of file to directory and viceversa.

As an equal comparison doesn't have any deterministic final order,
it's better to keep original diff order input when there is no prefer order
( that's done comparing pointers)

The fast-export sorting was added in 060df62 (fast-export: Fix output
order of D/F changes). That change was made in order to fix the case of
directory to file in the same commit, but it broke the reverse case
(File to directory).

The test "file becomes directory" has been added in order to exercise
the original motivation of the deletion reorder.

Signed-off-by: Miguel Torroja 
---
 builtin/fast-export.c  | 32 +++-
 t/t9350-fast-export.sh | 25 +
 2 files changed, 40 insertions(+), 17 deletions(-)

diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index e022063..e82f654 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -260,26 +260,19 @@ static void export_blob(const struct object_id *oid)
free(buf);
 }
 
-static int depth_first(const void *a_, const void *b_)
+/*
+ * Compares two diff types to order based on output priorities.
+ */
+static int diff_type_cmp(const void *a_, const void *b_)
 {
const struct diff_filepair *a = *((const struct diff_filepair **)a_);
const struct diff_filepair *b = *((const struct diff_filepair **)b_);
-   const char *name_a, *name_b;
-   int len_a, len_b, len;
int cmp;
 
-   name_a = a->one ? a->one->path : a->two->path;
-   name_b = b->one ? b->one->path : b->two->path;
-
-   len_a = strlen(name_a);
-   len_b = strlen(name_b);
-   len = (len_a < len_b) ? len_a : len_b;
-
-   /* strcmp will sort 'd' before 'd/e', we want 'd/e' before 'd' */
-   cmp = memcmp(name_a, name_b, len);
-   if (cmp)
-   return cmp;
-   cmp = len_b - len_a;
+   /*
+* Move Delete entries first so that an addition is always reported 
after
+*/
+   cmp = (b->status == DIFF_STATUS_DELETED) - (a->status == 
DIFF_STATUS_DELETED);
if (cmp)
return cmp;
/*
@@ -287,7 +280,12 @@ static int depth_first(const void *a_, const void *b_)
 * appear in the output before it is renamed (e.g., when a file
 * was copied and renamed in the same commit).
 */
-   return (a->status == 'R') - (b->status == 'R');
+   cmp = (a->status == DIFF_STATUS_RENAMED) - (b->status == 
DIFF_STATUS_RENAMED);
+   if (cmp)
+   return cmp;
+
+   /* For the remaining cases we keep the original ordering comparing the 
pointers */
+   return (a-b);
 }
 
 static void print_path_1(const char *path)
@@ -347,7 +345,7 @@ static void show_filemodify(struct diff_queue_struct *q,
 * Handle files below a directory first, in case they are all deleted
 * and the directory changes to a file or symlink.
 */
-   QSORT(q->queue, q->nr, depth_first);
+   QSORT(q->queue, q->nr, diff_type_cmp);
 
for (i = 0; i < q->nr; i++) {
struct diff_filespec *ospec = q->queue[i]->one;
diff --git a/t/t9350-fast-export.sh b/t/t9350-fast-export.sh
index b5149fd..d4f369a 100755
--- a/t/t9350-fast-export.sh
+++ b/t/t9350-fast-export.sh
@@ -419,6 +419,31 @@ test_expect_success 'directory becomes symlink''
(cd result && git show master:foo)
 '
 
+test_expect_success 'file becomes directory'  '
+   git init filetodir_orig &&
+   git init --bare filetodir_replica.git &&
+   (
+   cd filetodir_orig &&
+   echo foo > filethendir &&
+   git add filethendir &&
+   test_tick &&
+   git commit -mfile &&
+   git rm filethendir &&
+   mkdir filethendir &&
+   echo bar > filethendir/a &&
+   git add filethendir/a &&
+   test_tick &&
+   git commit -mdir
+   ) &&
+   git --git-dir=filetodir_orig/.git fast-export master  |
+   git --git-dir=filetodir_replica.git/ fast-import &&
+   (
+   ORIG=$(git --git-dir=filetodir_orig/.git rev-parse --verify 
master) &&
+   REPLICA=$(git --git-dir=filetodir_replica.git rev-parse 
--verify master) &&
+   test $ORIG = $REPLICA
+   )
+'
+
 test_expect_success 'fast-export quotes pathnames' '
git init crazy-paths &&
(cd crazy-paths &&
-- 
2.1.4



Re: [PATCH 0/5] Start of a journey: drop NO_THE_INDEX_COMPATIBILITY_MACROS

2017-05-04 Thread Jonathan Nieder
Hi,

Junio C Hamano wrote:
> Duy Nguyen  writes:

>> I attempted the same thing once or twice in the past, had the same
>> impression and dropped it. But I think it's good to get rid of cache_*
>> macros, at least outside builtin/ directory.
>
> One thing that needs to be kept in mind is that a mechanical
> replacement of active_cache[] to the_index.cache[] in the
> infrastructure code does not get us closer to such an improvement.
> In fact, such a patch only has negative value (keep reading until
> the end of the paragraph that begins with "One thing is certain" to
> understand why).
>
> The entry point to a codechain for an important infrastructure code
> that can currently only work with the_index needs to be identified.
> There may be many.  They need to be made to take an index_state
> instance and to pass it through the callchain.  That kind of change
> would be an improvement, and will get us closer to mark the
> resulting code with NO_THE_INDEX_COMP thing.

Thanks for articulating this.  I agree with this general goal.

[...]
> But one thing is certain. Many users of the API, just like a lot of
> builtin/ code works only with the_index today, will not have to work
> with a non-default repository or a non-default index_state.  Only
> the more advanced and complex "multi-repo" Porcelains would have to
> care.  Keeping active_cache[], active_nr, etc. abstraction would
> isolate the simpler callers from the inevitable code churn we would
> need while getting the "repository" abstraction right.  This is why
> I see that you understood the issues well when you said "builtiln/".

One small change we could make is to reverse the sense of the
NO_THE_INDEX_COMPATIBILITY_MACROS macro.  That way, new library code
authors have to define USE_THE_INDEX_COMPATIBILITY_MACROS at the top
of the file, providing a hint to reviewers that they are using
the_index implicitly instead of relying on explicit repository or
index parameters.

More generally, if we are willing to follow through then I see
Stefan's change as a step in the right direction.  Sure, it replaces
calls like read_cache with read_the_index(_index, ...) which does
not change semantics and may not look like progress.  But:

- uses of 'the_index' are easy to grep for and it is easy to
  understand that they are using global state.  In builtins, this is
  not important (which may be an argument for making builtins get
  USE_THE_INDEX_COMPATIBILITY_MACROS implicitly instead and
  restricting this change to library code), but in libraries it
  communicates what is happening to developers looking at the code.
  It is like a /* NEEDSWORK */ comment, except in code instead of
  comments.

  See Go's context.TODO  for a
  similar example in another set of projects.

- it makes code consistently use the term "index" instead of mixing
  "index" and "cache".  This makes code easier to understand for new
  contributors.

- a minor thing: it means more of git is using functions instead of
  macros.  IDEs, grep habits, etc cope better with functions.  That is
  minor, though: the compatibility macros could easily be replaced
  with compatibility inline functions to achieve the same effect.

[...]
> Also, dropping compatibility macros at the end of the series is
> unfriendly to fellow developers with WIPs that depend on the
> traditional way of doing things.

Agreed with this as well --- for widely used APIs like these, it is
more friendly to decouple adapting callers (in a separate patch) from
the patch that removes the API.

That is, one way to do what this series attempts would be the
following:

 1. rename variables that shadow the_index.

 2. add coccinelle patches (or one coccinelle patch) to
contrib/coccinelle implementing *_cache -> *_index migration.
Is there a way to do this without making it fail "make coccicheck"?

 3. migrate library code (but not builtins) using that semantic patch

 4. make compatibility macros opt-in instead of opt-out
(USE_THE_INDEX_COMPATIBILITY_MACROS). Opt-in all builtins.

 5. optional: change the compatibility macros to compatibility inline
functions, perhaps.
 6. optional: rename *_cache to *_the_index for clarity, perhaps.
 7. optional: migrate builtins, if there is a way to make the patches
for that look sensible.

Thoughts?
Jonathan


Mr Neil Trotter

2017-05-04 Thread Mr Neil Trotter
Eine Spende von 1 Million Britische Pfund zu Ihnen in gutem Glauben


Re: [PATCH 0/5] Abide by our own rules regarding line endings

2017-05-04 Thread Johannes Schindelin
Hi Junio,

On Thu, 4 May 2017, Junio C Hamano wrote:

> Johannes Schindelin  writes:
> 
> > Well, a couple of comments about your comment:
> >
> > - we say "shell scripts", but we're sloppy there: they are "Unix shell
> >   scripts", as they are executed by Unix shells. As such, it is pretty
> >   obvious that they favor Unix line endings, right? And that they do not
> >   really handle anything else well, right?
> 
> Not really.  I would expect that
> 
>   cat A B C
> 
> with CRLF line endings (i.e. "cat A B C") on platforms whose
> eol convention is LF to barf due to the missing file whose name is
> "C", while on platforms whose eol convention is CRLF, I do
> expect the contents of file C comes at the end of the output.

Let me repeat myself: `cat` is a Unix utility. Unix line endings are
LF-only.

So when you say "platform whose eol convention is CRLF", you speak about a
platform that is not Unix.

If you want to run `cat` on Linux, you have to have a Unix-y environment.
Ergo: LF-only line endings.

The real problem arises only because I decided to ship Git for Windows
with a POSIX emulating environment to execute the Unix shell and Perl
scripts.

The real solution would have been to push harder for "builtinification",
but you know yourself how hard of an uphill effort that is, as the idea is
still prevalent that having a large part of Git be implemented as
shell/Perl scripts is not only okay, but even a Good Thing (and
portability is then a matter of making every contributor know what
constructs are portable and to avoid, say, Bashisms or GNU sed's options
that are incompatible with BSD sed's options).

Ciao,
Dscho


Re: [PATCH 00/10] RFC Partial Clone and Fetch

2017-05-04 Thread Jonathan Nieder
Hi again,

Jeff Hostetler wrote:

> In my original RFC there were comments/complaints that with
> missing blobs we lose the ability to detect corruptions.  My
> proposed changes to index-pack and rev-list (and suggestions
> for other commands like fsck) just disabled those errors.
> Personally, I'm OK with that, but I understand that others
> would like to save the ability to distinguish between missing
> and corrupted.

I'm also okay with it.  In a partial clone, in the same way as a
missing ref represents a different valid state and thus passes fsck
regardless of how it happened, a missing blob is a valid state and it
is sensible for it to pass fsck.

A person might object that previously a repository that passed "git
fsck" was a repository where "git fast-export --all" would succeed,
and if I omit a blob that is not present on the remote server then
that invariant is gone.  But that problem exists even if we have a
list of missing blobs.  The server could rewind history and garbage
collect, causing attempts on the client to fetch a previously
advertised missing blob to fail.  Or the server can disappear
completely, or it can lose all its data and have to be restored from
an older backup that is missing newer blobs.

> Right, only the .pack is sent over the wire.  And that's why I
> suggest listing the missing SHAs in it.  I think we need the server
> to send a list of them -- whether in individual per-file type-5
> records as I suggested, or a single record containing a list of
> SHAs+data (which I think I prefer in hindsight).

A list of SHAs+data sounds sensible as a way of conveying additional
per-blob information (such as size).

> WRT being able to discover the missing blobs, is that true in
> all cases?  I don't think it is for thin-packs -- where the server
> only sends stuff you don't (supposedly) already have, right ?

Generate the list of blobs referenced by trees in the pack, when you
are inflating them in git index-pack.  Omit any objects that you
already know about.  The remainder is the set of missing blobs.

One thing this doesn't tell you is if the same missing blob is
available from multiple remotes.  It associates each blob with a
single remote, the first one it was discovered from.

> If instead, we have pack-object indicate that it *would have*
> sent this blob normally, we don't change any of the semantics
> of how pack files are assembled.  This gives the client a
> definitive list of what's missing.

If there is additional information the server wants to convey about
the missing blobs, then this makes sense to me --- it has to send it
somewhere, and a separate list outside the pack seems like a good
place to put that.

When there is no additional information beyond "this is a blob I am
omitting", there is nothing the wire protocol needs to convey.  But
you've convinced me that that's usually moot because the blob size
is valuable information.

[...]
> The more I think about it, I'd like to NOT put the list in the .idx
> file.  If we put it in a separate peer file next to the *.{pack,idx}
> we could decorate it with the name of the remote used in the fetch
> or clone.

I have no strong opinions about this in either direction.

Since it only affects the local repository format and doesn't affect
the protocol, we can experiment without too much fuss. :)

[...]
> I've always wondered if repack, fetch, and friends should build
> a meta-idx that merges all of the current .idx files, but that
> is a different conversation

Yes, we've been thinking about a one-index-for-many-packs facility
to deal with the proliferation of packfiles with only one or a few
large objects without having to waste I/O copying them into a
concatenated pack file.

Another thing we're looking into is incorporating something like
Martin Fick's "git exproll" (
http://public-inbox.org/git/1375756727-1275-1-git-send-email-artag...@gmail.com/)
into "git gc --auto" to more aggressively keep the number of packs
down.

> On 5/3/2017 2:27 PM, Jonathan Nieder wrote:

>> If we were starting over, would this belong in the tree object?
>> (Part of the reason I ask is that we have an opportunity to sort
>> of start over as part of the transition away from SHA-1.)
>
> Yes, putting the size in the tree would be nice.  That does
> add 5-8 bytes to every entry in every tree (on top of the
> larger hash), but it would be useful.
>
> If we're going there, we might just define the new hash
> as the concatenation of the SHA* and the length of the data
> hashed.  So instead of a 32-byte SHA256, we'd have a (32 + 8)
> byte value.  (Or maybe a (32 + 5) if we want to squeeze it.)

Thanks --- that feedback helps.

It doesn't stop us from having to figure something else out in the
short term, of course.

[...]
>> I am worried about the implications of storing this kind of policy
>> information in the pack file.  There may be useful information along
>> these lines for a server to advertise, but I don't think it belongs in
>> the pack 

Re: [PATCH 0/5] Start of a journey: drop NO_THE_INDEX_COMPATIBILITY_MACROS

2017-05-04 Thread Stefan Beller
On Wed, May 3, 2017 at 7:48 PM, Junio C Hamano  wrote:
> Duy Nguyen  writes:
>
 In the long run we may want to drop the macros guarded by
 NO_THE_INDEX_COMPATIBILITY_MACROS. This converts a couple of them.
>>>
>>> Why?
>>
>> Well.. why did you add NO_THE_INDEX_COMP... in the first place? ;-) j/k
>
> That was to mark the code that implements the underlying machinery
> that the code in a particular file has already been vetted and
> converted to be capable of working with an arbitrary index_state
> instance, not just the_index instance.
>
> Your mentioning "at least outside builtin/" shows that you have a
> good understanding of the issue; they are infrastructure code and
> many of them may become more useful if they are enhanced to take an
> index_state instance instead of always working with the_index.  With
> an infrastructure code that insists on working on the_index, an
> application that has something valuable in the_index already needs
> to somehow save it elsewhere before calling the function and use the
> result from the_index before restoring its version.
>
> I think unpack-trees.c used to assume that it is OK to use the_index,
> but after it started taking src/dst_index, it gained NO_THE_INDEX_COMP
> thing.  That's the kind of change in this area we would want to see.
>
>> I attempted the same thing once or twice in the past, had the same
>> impression and dropped it. But I think it's good to get rid of cache_*
>> macros, at least outside builtin/ directory.
>
> One thing that needs to be kept in mind is that a mechanical
> replacement of active_cache[] to the_index.cache[] in the
> infrastructure code does not get us closer to such an improvement.
> In fact, such a patch only has negative value (keep reading until
> the end of the paragraph that begins with "One thing is certain" to
> understand why).
>
> The entry point to a codechain for an important infrastructure code
> that can currently only work with the_index needs to be identified.
> There may be many.  They need to be made to take an index_state
> instance and to pass it through the callchain.  That kind of change
> would be an improvement, and will get us closer to mark the
> resulting code with NO_THE_INDEX_COMP thing.
>
> I think somebody else said this, but I agree that in the longer
> term, we would want to have a "repository" abstraction for in-core
> data structure.
>
> Imagine that a program wants to read into an index_state the tip
> commit of a branch whose name is the same as the current branch in
> one submodule and do something with the objects, while keeping
> the_index for the top-level superproject.  Thanks to Michael's and
> your recent work, we are almost there to have "ref-store" that
> represents the set of refs the submodule in question has that a
> "repository" object that represents the submodule can point at.  We
> can learn the object name at the tip of a specific ref using that
> infrastructure.  Thanks to an ancient change under discussion, we
> can already read the contents of the index file into an instance of
> an index_state that is different from the_index.  But after doing
> these operations, we'd need to actually get to the contents of the
> objects the index_state holds.  How should that work in the
> "repository object represents an in-core repository" world?
>
> What is missing is a way to represent the object store, so that the
> "repository" object can point at an instance of it (which in turn
> may point at a set of its alternates) [*1*].  With that, perhaps the
> most general kind of the infrastructure API that works with an
> index_state instance may additionally take a repository object, or
> perhaps an index_state may have a pointer to the repository where
> the objects listed in it must be taken from (in which case the
> function may take only an index_state).  In the end the enhanced
> function has to allow us to read from the object store of the
> submodule, not from the superproject's.
>
> Depending on how the design of the repository object goes, the
> interface to these functions may have to change.
>
> But one thing is certain. Many users of the API, just like a lot of
> builtin/ code works only with the_index today, will not have to work
> with a non-default repository or a non-default index_state.  Only
> the more advanced and complex "multi-repo" Porcelains would have to
> care.  Keeping active_cache[], active_nr, etc. abstraction would
> isolate the simpler callers from the inevitable code churn we would
> need while getting the "repository" abstraction right.  This is why
> I see that you understood the issues well when you said "builtiln/".
>
> If we want to have the "repository" abstraction, the "object store"
> is the next thing we should be working on, and such a work can take
> inspiration from how the old  active_alloc, active_cache_changed and *active_cache_tree> tuple was
> turned into an "index" 

Re: [PATCH v5] l10n: de.po: update German translation

2017-05-04 Thread Matthias Rüster
Acked-by: Matthias Rüster 


Am 04.05.2017 um 19:08 schrieb Ralf Thielow:
> Translate 96 new messages came from git.pot update in dfc182b (l10n:
> git.pot: v2.13.0 round 1 (96 new, 37 removed)).
> 
> Signed-off-by: Ralf Thielow 
> ---
>  po/de.po | 325 
> ---
>  1 file changed, 143 insertions(+), 182 deletions(-)
> 
> diff --git a/po/de.po b/po/de.po
> index b92c4fe11..cfefa436e 100644
> --- a/po/de.po
> +++ b/po/de.po
> @@ -866,9 +866,9 @@ msgid "Argument not supported for format '%s': -%d"
>  msgstr "Argument für Format '%s' nicht unterstützt: -%d"
>  
>  #: attr.c:212
> -#, fuzzy, c-format
> +#, c-format
>  msgid "%.*s is not a valid attribute name"
> -msgstr "'%s' ist kein gültiger Name für ein Remote-Repository"
> +msgstr "%.*s ist kein gültiger Attributname"
>  
>  #: attr.c:408
>  msgid ""
> @@ -1260,6 +1260,8 @@ msgstr "Speicher verbraucht"
>  #: config.c:191
>  msgid "relative config include conditionals must come from files"
>  msgstr ""
> +"Bedingungen für das Einbinden von Konfigurationen aus relativen Pfaden 
> müssen\n"
> +"aus Dateien kommen."
>  
>  #: config.c:701
>  #, c-format
> @@ -1379,12 +1381,12 @@ msgstr "Ungültiger %s: '%s'"
>  #: config.c:1952
>  #, c-format
>  msgid "unknown core.untrackedCache value '%s'; using 'keep' default value"
> -msgstr ""
> +msgstr "Unbekannter Wert '%s' in core.untrackedCache; benutze Standardwert 
> 'keep'"
>  
>  #: config.c:1978
>  #, c-format
>  msgid "splitIndex.maxPercentChange value '%d' should be between 0 and 100"
> -msgstr ""
> +msgstr "Der Wert '%d' von splitIndex.maxPercentChange sollte zwischen 0 und 
> 100 liegen."
>  
>  #: config.c:1989
>  #, c-format
> @@ -1645,9 +1647,9 @@ msgstr ""
>  "für dieses Verzeichnis deaktiviert."
>  
>  #: dir.c:2776 dir.c:2781
> -#, fuzzy, c-format
> +#, c-format
>  msgid "could not create directories for %s"
> -msgstr "Konnte Verzeichnis '%s' nicht erstellen."
> +msgstr "Konnte Verzeichnisse für '%s' nicht erstellen."
>  
>  #: dir.c:2806
>  #, c-format
> @@ -1655,9 +1657,9 @@ msgid "could not migrate git directory from '%s' to 
> '%s'"
>  msgstr "Konnte Git-Verzeichnis nicht von '%s' nach '%s' migrieren."
>  
>  #: entry.c:280
> -#, fuzzy, c-format
> +#, c-format
>  msgid "could not stat file '%s'"
> -msgstr "konnte Datei '%s' nicht erstellen"
> +msgstr "konnte Datei '%s' nicht lesen"
>  
>  #: fetch-pack.c:249
>  msgid "git fetch-pack: expected shallow list"
> @@ -1827,14 +1829,14 @@ msgid "no matching remote head"
>  msgstr "kein übereinstimmender Remote-Branch"
>  
>  #: fetch-pack.c:1147
> -#, fuzzy, c-format
> +#, c-format
>  msgid "no such remote ref %s"
> -msgstr "Kein solches Remote-Repository: %s"
> +msgstr "keine solche Remote-Referenz %s"
>  
>  #: fetch-pack.c:1150
>  #, c-format
>  msgid "Server does not allow request for unadvertised object %s"
> -msgstr ""
> +msgstr "Der Server lehnt Anfrage nach nicht angebotenem Objekt %s ab."
>  
>  #: gpg-interface.c:185
>  msgid "gpg failed to sign the data"
> @@ -1961,31 +1963,31 @@ msgstr ""
>  
>  #: ident.c:367
>  msgid "no email was given and auto-detection is disabled"
> -msgstr ""
> +msgstr "keine E-Mail angegeben und automatische Erkennung ist deaktiviert"
>  
>  #: ident.c:372
> -#, fuzzy, c-format
> +#, c-format
>  msgid "unable to auto-detect email address (got '%s')"
> -msgstr "Fehler: konnte keine gültige Adresse aus %s extrahieren\n"
> +msgstr "Konnte die E-Mail-Adresse nicht automatisch erkennen ('%s' erhalten)"
>  
>  #: ident.c:382
>  msgid "no name was given and auto-detection is disabled"
> -msgstr ""
> +msgstr "kein Name angegeben und automatische Erkennung ist deaktiviert"
>  
>  #: ident.c:388
> -#, fuzzy, c-format
> +#, c-format
>  msgid "unable to auto-detect name (got '%s')"
> -msgstr "konnte \"Tree\"-Objekt (%s) nicht lesen"
> +msgstr "konnte Namen nicht automatisch erkennen ('%s' erhalten)"
>  
>  #: ident.c:396
>  #, c-format
>  msgid "empty ident name (for <%s>) not allowed"
> -msgstr ""
> +msgstr "Leerer Name in Identifikation (für <%s>) nicht erlaubt."
>  
>  #: ident.c:402
>  #, c-format
>  msgid "name consists only of disallowed characters: %s"
> -msgstr ""
> +msgstr "Name besteht nur aus nicht erlaubten Zeichen: %s"
>  
>  #: ident.c:417 builtin/commit.c:611
>  #, c-format
> @@ -2102,13 +2104,11 @@ msgstr ""
>  "im Arbeitsbereich gelassen."
>  
>  #: merge-recursive.c:1097
> -#, fuzzy, c-format
> +#, c-format
>  msgid ""
>  "CONFLICT (%s/delete): %s deleted in %s and %s to %s in %s. Version %s of %s 
> "
>  "left in tree."
> -msgstr ""
> -"KONFLIKT (%s/löschen): %s gelöscht in %s und %s in %s. Stand %s von %s 
> wurde "
> -"im Arbeitsbereich gelassen."
> +msgstr "KONFLIKT (%s/löschen): %s gelöscht in %s und %s nach %s in %s. Stand 
> %s von %s wurde im Arbeitsbereich gelassen."
>  
>  #: merge-recursive.c:1104
>  #, c-format
> @@ -2120,13 +2120,11 @@ msgstr ""
>  "im Arbeitsbereich bei %s gelassen."
>  
>  #: 

Re: Proposal for missing blob support in Git repos

2017-05-04 Thread Jonathan Tan

On 05/03/2017 09:29 PM, Junio C Hamano wrote:

Jonathan Tan  writes:


I see the semantics as "don't write what you already have", where
"have" means what you have in local storage, but if you extend "have"
to what upstream has, then yes, you're right that this changes
(ignoring shallow clones).

This does remove a resistance that we have against hash collision (in
that normally we would have the correct object for a given hash and
can resist other servers trying to introduce a wrong object, but now
that is no longer the case), but I think it's better than consulting
the hook whenever you want to write anything (which is also a change
in semantics in that you're consulting an external source whenever
you're writing an object, besides the performance implications).


As long as the above pros-and-cons analysis is understood and we are
striking a balance between performance and strictness with such an
understanding of the implications, I am perfectly fine with the
proposal.  That is why my comment has never been "I think that is
wrong" but consistently was "I wonder if that is a good thing."

Thanks.


Noted - if/when I update the patch, I'll include this information in.


[PATCH v5] l10n: de.po: update German translation

2017-05-04 Thread Ralf Thielow
Translate 96 new messages came from git.pot update in dfc182b (l10n:
git.pot: v2.13.0 round 1 (96 new, 37 removed)).

Signed-off-by: Ralf Thielow 
---
 po/de.po | 325 ---
 1 file changed, 143 insertions(+), 182 deletions(-)

diff --git a/po/de.po b/po/de.po
index b92c4fe11..cfefa436e 100644
--- a/po/de.po
+++ b/po/de.po
@@ -866,9 +866,9 @@ msgid "Argument not supported for format '%s': -%d"
 msgstr "Argument für Format '%s' nicht unterstützt: -%d"
 
 #: attr.c:212
-#, fuzzy, c-format
+#, c-format
 msgid "%.*s is not a valid attribute name"
-msgstr "'%s' ist kein gültiger Name für ein Remote-Repository"
+msgstr "%.*s ist kein gültiger Attributname"
 
 #: attr.c:408
 msgid ""
@@ -1260,6 +1260,8 @@ msgstr "Speicher verbraucht"
 #: config.c:191
 msgid "relative config include conditionals must come from files"
 msgstr ""
+"Bedingungen für das Einbinden von Konfigurationen aus relativen Pfaden 
müssen\n"
+"aus Dateien kommen."
 
 #: config.c:701
 #, c-format
@@ -1379,12 +1381,12 @@ msgstr "Ungültiger %s: '%s'"
 #: config.c:1952
 #, c-format
 msgid "unknown core.untrackedCache value '%s'; using 'keep' default value"
-msgstr ""
+msgstr "Unbekannter Wert '%s' in core.untrackedCache; benutze Standardwert 
'keep'"
 
 #: config.c:1978
 #, c-format
 msgid "splitIndex.maxPercentChange value '%d' should be between 0 and 100"
-msgstr ""
+msgstr "Der Wert '%d' von splitIndex.maxPercentChange sollte zwischen 0 und 
100 liegen."
 
 #: config.c:1989
 #, c-format
@@ -1645,9 +1647,9 @@ msgstr ""
 "für dieses Verzeichnis deaktiviert."
 
 #: dir.c:2776 dir.c:2781
-#, fuzzy, c-format
+#, c-format
 msgid "could not create directories for %s"
-msgstr "Konnte Verzeichnis '%s' nicht erstellen."
+msgstr "Konnte Verzeichnisse für '%s' nicht erstellen."
 
 #: dir.c:2806
 #, c-format
@@ -1655,9 +1657,9 @@ msgid "could not migrate git directory from '%s' to '%s'"
 msgstr "Konnte Git-Verzeichnis nicht von '%s' nach '%s' migrieren."
 
 #: entry.c:280
-#, fuzzy, c-format
+#, c-format
 msgid "could not stat file '%s'"
-msgstr "konnte Datei '%s' nicht erstellen"
+msgstr "konnte Datei '%s' nicht lesen"
 
 #: fetch-pack.c:249
 msgid "git fetch-pack: expected shallow list"
@@ -1827,14 +1829,14 @@ msgid "no matching remote head"
 msgstr "kein übereinstimmender Remote-Branch"
 
 #: fetch-pack.c:1147
-#, fuzzy, c-format
+#, c-format
 msgid "no such remote ref %s"
-msgstr "Kein solches Remote-Repository: %s"
+msgstr "keine solche Remote-Referenz %s"
 
 #: fetch-pack.c:1150
 #, c-format
 msgid "Server does not allow request for unadvertised object %s"
-msgstr ""
+msgstr "Der Server lehnt Anfrage nach nicht angebotenem Objekt %s ab."
 
 #: gpg-interface.c:185
 msgid "gpg failed to sign the data"
@@ -1961,31 +1963,31 @@ msgstr ""
 
 #: ident.c:367
 msgid "no email was given and auto-detection is disabled"
-msgstr ""
+msgstr "keine E-Mail angegeben und automatische Erkennung ist deaktiviert"
 
 #: ident.c:372
-#, fuzzy, c-format
+#, c-format
 msgid "unable to auto-detect email address (got '%s')"
-msgstr "Fehler: konnte keine gültige Adresse aus %s extrahieren\n"
+msgstr "Konnte die E-Mail-Adresse nicht automatisch erkennen ('%s' erhalten)"
 
 #: ident.c:382
 msgid "no name was given and auto-detection is disabled"
-msgstr ""
+msgstr "kein Name angegeben und automatische Erkennung ist deaktiviert"
 
 #: ident.c:388
-#, fuzzy, c-format
+#, c-format
 msgid "unable to auto-detect name (got '%s')"
-msgstr "konnte \"Tree\"-Objekt (%s) nicht lesen"
+msgstr "konnte Namen nicht automatisch erkennen ('%s' erhalten)"
 
 #: ident.c:396
 #, c-format
 msgid "empty ident name (for <%s>) not allowed"
-msgstr ""
+msgstr "Leerer Name in Identifikation (für <%s>) nicht erlaubt."
 
 #: ident.c:402
 #, c-format
 msgid "name consists only of disallowed characters: %s"
-msgstr ""
+msgstr "Name besteht nur aus nicht erlaubten Zeichen: %s"
 
 #: ident.c:417 builtin/commit.c:611
 #, c-format
@@ -2102,13 +2104,11 @@ msgstr ""
 "im Arbeitsbereich gelassen."
 
 #: merge-recursive.c:1097
-#, fuzzy, c-format
+#, c-format
 msgid ""
 "CONFLICT (%s/delete): %s deleted in %s and %s to %s in %s. Version %s of %s "
 "left in tree."
-msgstr ""
-"KONFLIKT (%s/löschen): %s gelöscht in %s und %s in %s. Stand %s von %s wurde "
-"im Arbeitsbereich gelassen."
+msgstr "KONFLIKT (%s/löschen): %s gelöscht in %s und %s nach %s in %s. Stand 
%s von %s wurde im Arbeitsbereich gelassen."
 
 #: merge-recursive.c:1104
 #, c-format
@@ -2120,13 +2120,11 @@ msgstr ""
 "im Arbeitsbereich bei %s gelassen."
 
 #: merge-recursive.c:1109
-#, fuzzy, c-format
+#, c-format
 msgid ""
 "CONFLICT (%s/delete): %s deleted in %s and %s to %s in %s. Version %s of %s "
 "left in tree at %s."
-msgstr ""
-"KONFLIKT (%s/löschen): %s gelöscht in %s und %s in %s. Stand %s von %s wurde "
-"im Arbeitsbereich bei %s gelassen."
+msgstr "KONFLIKT (%s/löschen): %s gelöscht in %s und %s nach %s in %s. Stand 
%s von %s wurde im Arbeitsbereich bei %s 

Re: [PATCH 00/10] RFC Partial Clone and Fetch

2017-05-04 Thread Jeff Hostetler



On 5/3/2017 2:27 PM, Jonathan Nieder wrote:

Hi,

Jeff Hostetler wrote:

Missing-Blob Support


Let me offer up an alternative idea for representing
missing blobs.  This is differs from both of our previous
proposals.  (I don't have any code for this new proposal,
I just want to think out loud a bit and see if this is a
direction worth pursuing -- or a complete non-starter.)

Both proposals talk about detecting and adapting to a missing
blob and ways to recover -- when we fail to find a blob.
Comments on the thread asked about:
() being able to detect missing blobs vs corrupt repos
() being unable to detect duplicate blobs
() expense of blob search.

Suppose we store "positive" information about missing blobs?
This would let us know that a blob is intentionally missing
and possibly some meta-data about it.


We've discussed this a little informally but didn't go more into
it, so I'm glad you brought it up.

There are two use cases I care about.  I'll want names to refer to
them later, so describing them now:

 A. A p4 or svn style "monorepo" containing all code within a company.
We want to make git scale well when working with such a
repository.  Disk usage, network usage, index size, and object
lookup time are all issues for such a repository.

A repository can creep up in size so it starts falling into this
category even though git coped well with it before.  Another way
to end up in this category is a conversion from a version control
system like p4 or svn.

 B. A more modestly sized repository with some large blobs in it.  At
clone time we want to omit unneeded large blobs to speed up the
clone, save disk space, and save bandwidth.

For this kind of repository, the idx file already contained all
those blobs and that was not causing problems --- the only problem
was the actual blob size.


Yes, I've been primarily concerned with "case A" repos.
I work with the team converting the Windows source repo
to git.  This was discussed in Brussels as part of the
GVFS presentation.

The Windows tree has 3.5M files in the worktree for a simple checkout
of HEAD.  The index is 450MB.  There are 500K trees/folders in
the commit.  Multiply that by scale factor considering the number
of trunk/release branches, number of developers, typical number of
commits per day per developer, and n years(decades) of history and
we get to a very large number

FWIW, there's also a "case C" which has both, but that
just hurts to think about.




1. Suppose we update the .pack file format slightly.

[...]

2. Make a similar change in the .idx format and git-index-pack
   to include them there.  Then blob lookup operations could
   definitively determine that a blob exists and is just not
   present locally.


Some nits:

- there shouldn't be any need for the blobs to even be mentioned in
  the .pack stored locally.  The .idx file maps from sha1 to offset
  within the packfile --- a special offset could mean "this is a
  missing blob".

- Git protocol works by sending pack files over the wire.  The idx
  file is not transmitted to the client --- the client instead
  reconstructs it from the pack file.  I assume this is why you
  stored the SHA-1 of the object in the pack file, but it could
  equally well be sent through another stream (since this proposal
  involves a change to git protocol anyway).

- However, the list of missing blobs can be inferred from the existing
  pack format, without a change to the pack format used in git
  protocol.  As part of constructing the idx, "git index-pack"
  inflates every object in the pack file sent by the server.  This
  means we know what blobs they reference, so we can easily produce a
  list for the idx file without changing the pack file format.


In my original RFC there were comments/complaints that with
missing blobs we lose the ability to detect corruptions.  My
proposed changes to index-pack and rev-list (and suggestions
for other commands like fsck) just disabled those errors.
Personally, I'm OK with that, but I understand that others
would like to save the ability to distinguish between missing
and corrupted.

Right, only the .pack is sent over the wire.  And that's why I
suggest listing the missing SHAs in it.  I think we need the server
to send a list of them -- whether in individual per-file type-5
records as I suggested, or a single record containing a list of
SHAs+data (which I think I prefer in hindsight).

WRT being able to discover the missing blobs, is that true in
all cases?  I don't think it is for thin-packs -- where the server
only sends stuff you don't (supposedly) already have, right ?

If instead, we have pack-object indicate that it *would have*
sent this blob normally, we don't change any of the semantics
of how pack files are assembled.  This gives the client a
definitive list of what's missing.



If this is done by only changing the idx file format and not the pack
file, then it does not 

Re: Bug? git svn tag Authentication failed

2017-05-04 Thread Eric Wong
Ethan Clevenger  wrote:
> Git version: 2.12.2.windows.2
> 
> `git svn tag 1.0` results in:



Note, "git svn tag" uses the same backend code as "git svn branch",
and there was a recent fix for that in
commit e0688e9b28f2c5ff711460ee8b62077be5df2360
("git svn: fix authentication with 'branch'")

Looks like that change will be in 2.13 (it's in all the
release-canditates), but it should backported to 2.12.x

Junio: I think that commit should go into 'maint', too.  Thanks.

> In my .subversion directory I do have cached credentials under
> auth/svn.simple. A vanilla svn CLI commit also works:
> 
> >  svn ci -m 'do creds work?'
> > Adding trunk\.bar
> > Transmitting file data .done
> > Committing transaction...
> > Committed revision 1651133.
> 
> 
> Unsure if this is actually a bug or my oversight.

Seems like a bug to me.  Can you try one of the 2.13 release
candidates to confirm?  Thanks.


Bug? git svn tag Authentication failed

2017-05-04 Thread Ethan Clevenger
Git version: 2.12.2.windows.2

`git svn tag 1.0` results in:

> Copying https://plugins.svn.wordpress.org/gf-payment-continue/trunk at 
> r1651123 to https://plugins.svn.wordpress.org/gf-payment-continue/tags/1.0...
> Authentication failed: No more credentials or we tried too many times.
> Authentication failed at C:\Program 
> Files\Git\mingw64/libexec/git-core\git-svn line 1200.


In my .subversion directory I do have cached credentials under
auth/svn.simple. A vanilla svn CLI commit also works:

>  svn ci -m 'do creds work?'
> Adding trunk\.bar
> Transmitting file data .done
> Committing transaction...
> Committed revision 1651133.


Unsure if this is actually a bug or my oversight.

Cheers,
Ethan
Sterner Stuff Design
https://sternerstuffdesign.com


[ANNOUNCE] Git v2.13.0-rc2

2017-05-04 Thread Junio C Hamano
A release candidate Git v2.13.0-rc2 is now available for testing
at the usual places.  It is comprised of 699 non-merge commits
since v2.12.0, contributed by 59 people, 15 of which are new faces.

The tarballs are found at:

https://www.kernel.org/pub/software/scm/git/testing/

The following public repositories all have a copy of the
'v2.13.0-rc2' tag and the 'master' branch that the tag points at:

  url = https://kernel.googlesource.com/pub/scm/git/git
  url = git://repo.or.cz/alt-git.git
  url = git://git.sourceforge.jp/gitroot/git-core/git.git
  url = git://git-core.git.sourceforge.net/gitroot/git-core/git-core
  url = https://github.com/gitster/git

New contributors whose contributions weren't in v2.12.0 are as follows.
Welcome to the Git development community!

  Allan Xavier, Andreas Heiduk, Devin J. Pohly, Devin Lehmacher,
  Hiroshi Shirosaki, Johan Hovold, Maxim Moseychuk, Mostyn
  Bramley-Moore, Prathamesh Chavan, Quentin Pradet, René Genz,
  Segev Finer, Sergey Ryazanov, Stephen Hicks, and Valery Tolstov.

Returning contributors who helped this release are as follows.
Thanks for your continued support.

  Ævar Arnfjörð Bjarmason, Alex Henrie, Brandon Williams,
  brian m. carlson, Christian Couder, Cornelius Weig, David
  Aguilar, David Turner, Eric Wong, Giuseppe Bilotta, Jacob Keller,
  Jean-Noel Avila, Jeff Hostetler, Jeff King, Johannes Schindelin,
  Jonathan Nieder, Jonathan Tan, Junio C Hamano, Karthik Nayak,
  Kevin Willford, Kyle Meyer, Lars Schneider, Linus Torvalds,
  Luke Diamand, Matt McCutchen, Michael Haggerty, Michael J Gruber,
  Michael Rappazzo, Mike Hommey, Nguyễn Thái Ngọc Duy, Patrick
  Steinhardt, Peter Krefting, Ralf Thielow, Ramsay Jones, René
  Scharfe, Ross Lagerwall, Santiago Torres, Sebastian Schuberth,
  Simon Ruderich, Stefan Beller, SZEDER Gábor, Thomas Gummerer,
  Torsten Bögershausen, and Vegard Nossum.



Git 2.13 Release Notes (draft)
==

Backward compatibility notes.

 * Use of an empty string as a pathspec element that is used for
   'everything matches' is still warned and Git asks users to use a
   more explicit '.' for that instead.  The hope is that existing
   users will not mind this change, and eventually the warning can be
   turned into a hard error, upgrading the deprecation into removal of
   this (mis)feature.  That is not scheduled to happen in the upcoming
   release (yet).

 * The historical argument order "git merge  HEAD ..."
   has been deprecated for quite some time, and is now removed.

 * The default location "~/.git-credential-cache/socket" for the
   socket used to communicate with the credential-cache daemon has
   been moved to "~/.cache/git/credential/socket".

 * Git now avoids blindly falling back to ".git" when the setup
   sequence said we are _not_ in Git repository.  A corner case that
   happens to work right now may be broken by a call to die("BUG").
   We've tried hard to locate such cases and fixed them, but there
   might still be cases that need to be addressed--bug reports are
   greatly appreciated.


Updates since v2.12
---

UI, Workflows & Features

 * "git describe" and "git name-rev" have been taught to take more
   than one refname patterns to restrict the set of refs to base their
   naming output on, and also learned to take negative patterns to
   name refs not to be used for naming via their "--exclude" option.

 * Deletion of a branch "foo/bar" could remove .git/refs/heads/foo
   once there no longer is any other branch whose name begins with
   "foo/", but we didn't do so so far.  Now we do.

 * When "git merge" detects a path that is renamed in one history
   while the other history deleted (or modified) it, it now reports
   both paths to help the user understand what is going on in the two
   histories being merged.

 * The  part in "http.." configuration variable
   can now be spelled with '*' that serves as wildcard.
   E.g. "http.https://*.example.com.proxy; can be used to specify the
   proxy used for https://a.example.com, https://b.example.com, etc.,
   i.e. any host in the example.com domain.

 * "git tag" did not leave useful message when adding a new entry to
   reflog; this was left unnoticed for a long time because refs/tags/*
   doesn't keep reflog by default.

 * The "negative" pathspec feature was somewhat more cumbersome to use
   than necessary in that its short-hand used "!" which needed to be
   escaped from shells, and it required "exclude from what?" specified.

 * The command line options for ssh invocation needs to be tweaked for
   some implementations of SSH (e.g. PuTTY plink wants "-P "
   while OpenSSH wants "-p " to specify port to connect to), and
   the variant was guessed when GIT_SSH environment variable is used
   to specify it.  The logic to guess now applies to the command
   specified by the newer GIT_SSH_COMMAND and also core.sshcommand
   configuration 

Re: [PATCH 0/5] Abide by our own rules regarding line endings

2017-05-04 Thread Junio C Hamano
Johannes Schindelin  writes:

> Well, a couple of comments about your comment:
>
> - we say "shell scripts", but we're sloppy there: they are "Unix shell
>   scripts", as they are executed by Unix shells. As such, it is pretty
>   obvious that they favor Unix line endings, right? And that they do not
>   really handle anything else well, right?

Not really.  I would expect that

cat A B C

with CRLF line endings (i.e. "cat A B C") on platforms whose
eol convention is LF to barf due to the missing file whose name is
"C", while on platforms whose eol convention is CRLF, I do
expect the contents of file C comes at the end of the output.

> - You try to say that it is not okay for shell scripts to be checked
>   out as LF-only when the platform convention for *text* files is CR/LF,
>   right? 

I didn't try to and I didn't say that, I hope.  I think it is not OK
for shell scripts to be checked out with  when the platform
convention for text is , though.  It may be possible for an
implementation on CRLF platform to tolerate missing  (i.e. a
file in LF convention--instead of treating a lone  as a non-eol
but just a regular "funny" byte, treat it as a normal eol), but that
would be a quality-of-implementation thing.  So when the platform
convention for text files is , I would have thought that it
was more natural to check out shell scripts as such.

However, I did not think things through when I said "I am not sure
if it is OK for shell scripts not to honor the platform convention,
though."  In a sense, the "cat A B C" example does not have to worry
about the platform convention issue very much.  In real scripts, we
have things like a string literal that spans lines (i.e. do we have
a LF, or a CRLF pair, in between these two lines?) and here
documents (ditto).  To handle these "correctly" while treating shell
scripts mere "text" files, a shell implementation on CRLF platform
has to accept CRLF, pretend as if it saw only LF, do all the
processing as if the eol convention were LF, and convert LF in the
output from the script to CRLF.  I think that is _too_ much to ask
for an implementation, and such an attempt would get corner cases
wrong.  IOW, I was not sure if it is OK for scripts not to honor the
platorm convention when I wrote the message you are responding to,
but I think it probably is not just OK but is the simplest/cleanest
for shell implementations not to honor the platform convention and
instead pretend that they always live in LF-only world.

And all of the above ultimately does not matter.  If the shell you
have on Windows cannot take CRLF files, then our shell script must
be checked out as LF files even on Windows.  My "I am not sure" was
mostly from not knowing how ingrained that "cannot take CRLF files"
was (i.e. I didn't know if there may be some knobs similar to
core.crlf we have that you can toggle for your shell to honor the
platform convention).




Re: git v2.13.0-rc0 test failures on cygwin

2017-05-04 Thread Adam Dinwoodie
On Fri, Apr 28, 2017 at 03:20:21PM -0400, Devin Lehmacher wrote:
> > > Test Summary Report
> > > ---
> > > t0301-credential-cache.sh(Wstat: 256 Tests: 29 
> > > Failed: 6)
> > >   Failed tests:  12, 24-28
> > >   Non-zero exit status: 1
> >
> > Confirmed I'm seeing this on v2.13.0-rc1, and this passed in v2.12.2.
> > `git bisect` tells me this failure was introduced by commit
> > v2.12.0-267-g612c49e94, added by Devin Lehmacher (added to the CC
> > list).
> 
> Can someone with cygwin check that `test -S` works on cygwin?

Seems to work for me, using both Bash's built-in `test` and `/bin/test`:

$ socat UNIX-LISTEN:socket-file LISTEN:socket-file &
[1] 2976

$ ls -l socket-file
srw-r--r-- 1 add Domain Users 0 May  4 15:08 socket-file

$ type test
test is a shell builtin

$ test -S socket-file && echo success
success

$ /bin/test -S socket-file && echo success
success

$ touch regular-file

$ test -S regular-file && echo success

$ /bin/test -S regular-file && echo success

$ test -S non-existant-file && echo success

$ /bin/test -S non-existant-file && echo success

> I'll also take a look at verbose test output (maybe with a trace too)
> for t0301 if someone sends me that.

The verbose t0301.12 output is below; the verbose test output from the other
failing tests is more-or-less identical.  I've attached the full trace
output, too: it looks like the fatal error below is on the `git
credential-cache exit` commands.

fatal: read error from cache daemon: Connection reset by peer
not ok 12 - socket defaults to ~/.cache/git/credential/socket
#
#   test_when_finished "
#   git credential-cache exit &&
#   rmdir -p .cache/git/credential/
#   " &&
#   test_path_is_missing "$HOME/.git-credential-cache" &&
#   test -S "$HOME/.cache/git/credential/socket"
#

This specific output came from v2.13.0-rc1 on an up-to-date 64-bit
Cygwin installation.  I'm happy to experiment with other build options /
patches / environments /  if that would be useful.

Adam
Initialized empty Git repository in 
/home/add/vcs/cygwin/git/git-2.13.0-rc1-1.x86_64/build/t/trash 
directory.t0301-credential-cache/.git/
expecting success: 
check fill $HELPER <<-\EOF
protocol=https
host=example.com
--
protocol=https
host=example.com
username=askpass-username
password=askpass-password
--
askpass: Username for 'https://example.com':
askpass: Password for 'https://askpass-usern...@example.com':
EOF

++ check fill cache
++ credential_opts=
++ credential_cmd=fill
++ shift
++ for arg in "$@"
++ credential_opts=' -c credential.helper='\''cache'\'''
++ read_chunk
++ read line
++ case "$line" in
++ echo protocol=https
++ read line
++ case "$line" in
++ echo host=example.com
++ read line
++ case "$line" in
++ break
++ read_chunk
++ read line
++ case "$line" in
++ echo protocol=https
++ read line
++ case "$line" in
++ echo host=example.com
++ read line
++ case "$line" in
++ echo username=askpass-username
++ read line
++ case "$line" in
++ echo password=askpass-password
++ read line
++ case "$line" in
++ break
++ read_chunk
++ read line
++ case "$line" in
++ echo 'askpass: Username for '\''https://example.com'\'':'
++ read line
++ case "$line" in
++ echo 'askpass: Password for '\''https://askpass-usern...@example.com'\'':'
++ read line
++ eval 'git  -c credential.helper='\''cache'\'' credential fill stdout 
2>stderr'
+++ git -c credential.helper=cache credential fill
++ test_cmp expect-stdout stdout
++ diff -u expect-stdout stdout
++ test_cmp expect-stderr stderr
++ diff -u expect-stderr stderr
+ test_eval_ret_=0
+ want_trace
+ test t = t
+ test t = t
+ set +x
ok 1 - helper (cache) has no existing data

expecting success: 
check approve $HELPER <<-\EOF
protocol=https
host=example.com
username=store-user
password=store-pass
EOF

++ check approve cache
++ credential_opts=
++ credential_cmd=approve
++ shift
++ for arg in "$@"
++ credential_opts=' -c credential.helper='\''cache'\'''
++ read_chunk
++ read line
++ case "$line" in
++ echo protocol=https
++ read line
++ case "$line" in
++ echo host=example.com
++ read line
++ case "$line" in
++ echo username=store-user
++ read line
++ case "$line" in
++ echo password=store-pass
++ read line
++ read_chunk
++ read line
++ read_chunk
++ read line
++ eval 'git  -c credential.helper='\''cache'\'' credential approve stdout 2>stderr'
+++ git -c credential.helper=cache credential approve
++ test_cmp expect-stdout stdout
++ diff -u expect-stdout stdout
++ test_cmp expect-stderr stderr
++ diff -u 

[PATCH v4 23/25] name-rev: avoid leaking memory in the `deref` case

2017-05-04 Thread Johannes Schindelin
When the `name_rev()` function is asked to dereference the tip name, it
allocates memory. But when it turns out that another tip already
described the commit better than the current one, we forgot to release
the memory.

Pointed out by Coverity.

Signed-off-by: Johannes Schindelin 
---
 builtin/name-rev.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/builtin/name-rev.c b/builtin/name-rev.c
index 92a5d8a5d26..e7a3fe7ee70 100644
--- a/builtin/name-rev.c
+++ b/builtin/name-rev.c
@@ -28,6 +28,7 @@ static void name_rev(struct commit *commit,
struct rev_name *name = (struct rev_name *)commit->util;
struct commit_list *parents;
int parent_number = 1;
+   char *to_free = NULL;
 
parse_commit(commit);
 
@@ -35,7 +36,7 @@ static void name_rev(struct commit *commit,
return;
 
if (deref) {
-   tip_name = xstrfmt("%s^0", tip_name);
+   tip_name = to_free = xstrfmt("%s^0", tip_name);
 
if (generation)
die("generation: %d, but deref?", generation);
@@ -53,8 +54,10 @@ static void name_rev(struct commit *commit,
name->taggerdate = taggerdate;
name->generation = generation;
name->distance = distance;
-   } else
+   } else {
+   free(to_free);
return;
+   }
 
for (parents = commit->parents;
parents;
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 25/25] submodule_uses_worktrees(): plug memory leak

2017-05-04 Thread Johannes Schindelin
There is really no reason why we would need to hold onto the allocated
string longer than necessary.

Reported by Coverity.

Signed-off-by: Johannes Schindelin 
---
 worktree.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/worktree.c b/worktree.c
index bae787cf8d7..89a81b13de3 100644
--- a/worktree.c
+++ b/worktree.c
@@ -399,6 +399,7 @@ int submodule_uses_worktrees(const char *path)
 
/* The env would be set for the superproject. */
get_common_dir_noenv(, submodule_gitdir);
+   free(submodule_gitdir);
 
/*
 * The check below is only known to be good for repository format
@@ -418,7 +419,6 @@ int submodule_uses_worktrees(const char *path)
/* See if there is any file inside the worktrees directory. */
dir = opendir(sb.buf);
strbuf_release();
-   free(submodule_gitdir);
 
if (!dir)
return 0;
-- 
2.12.2.windows.2.800.gede8f145e06


[PATCH v4 24/25] show_worktree(): plug memory leak

2017-05-04 Thread Johannes Schindelin
The buffer allocated by shorten_unambiguous_ref() needs to be released.

Discovered by Coverity.

Signed-off-by: Johannes Schindelin 
---
 builtin/worktree.c | 8 +---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/builtin/worktree.c b/builtin/worktree.c
index 1722a9bdc2a..ff5dfd2b102 100644
--- a/builtin/worktree.c
+++ b/builtin/worktree.c
@@ -414,9 +414,11 @@ static void show_worktree(struct worktree *wt, int 
path_maxlen, int abbrev_len)
find_unique_abbrev(wt->head_sha1, 
DEFAULT_ABBREV));
if (wt->is_detached)
strbuf_addstr(, "(detached HEAD)");
-   else if (wt->head_ref)
-   strbuf_addf(, "[%s]", 
shorten_unambiguous_ref(wt->head_ref, 0));
-   else
+   else if (wt->head_ref) {
+   char *ref = shorten_unambiguous_ref(wt->head_ref, 0);
+   strbuf_addf(, "[%s]", ref);
+   free(ref);
+   } else
strbuf_addstr(, "(error)");
}
printf("%s\n", sb.buf);
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 15/25] pack-redundant: plug memory leak

2017-05-04 Thread Johannes Schindelin
Identified via Coverity.

Signed-off-by: Johannes Schindelin 
---
 builtin/pack-redundant.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/builtin/pack-redundant.c b/builtin/pack-redundant.c
index 72c815844dd..cb1df1c7614 100644
--- a/builtin/pack-redundant.c
+++ b/builtin/pack-redundant.c
@@ -442,6 +442,7 @@ static void minimize(struct pack_list **min)
/* return if there are no objects missing from the unique set */
if (missing->size == 0) {
*min = unique;
+   free(missing);
return;
}
 
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 22/25] remote: plug memory leak in match_explicit()

2017-05-04 Thread Johannes Schindelin
The `guess_ref()` returns an allocated buffer of which `make_linked_ref()`
does not take custody (`alloc_ref()` makes a copy), therefore we need to
release the buffer afterwards.

Noticed via Coverity.

Signed-off-by: Johannes Schindelin 
---
 remote.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/remote.c b/remote.c
index 801137c72eb..72b4591b983 100644
--- a/remote.c
+++ b/remote.c
@@ -1191,9 +1191,10 @@ static int match_explicit(struct ref *src, struct ref 
*dst,
else if (is_null_oid(_src->new_oid))
error("unable to delete '%s': remote ref does not 
exist",
  dst_value);
-   else if ((dst_guess = guess_ref(dst_value, matched_src)))
+   else if ((dst_guess = guess_ref(dst_value, matched_src))) {
matched_dst = make_linked_ref(dst_guess, dst_tail);
-   else
+   free(dst_guess);
+   } else
error("unable to push to unqualified destination: %s\n"
  "The destination refspec neither matches an "
  "existing ref on the remote nor\n"
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 13/25] setup_bare_git_dir(): help static analysis

2017-05-04 Thread Johannes Schindelin
Coverity reported a memory leak in this function. However, it can only
be called once, as setup_git_directory() changes global state and hence
is not reentrant.

Mark the variable as static to indicate that this is a singleton.

Signed-off-by: Johannes Schindelin 
---
 setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/setup.c b/setup.c
index 0309c278218..0320a9ad14c 100644
--- a/setup.c
+++ b/setup.c
@@ -748,7 +748,7 @@ static const char *setup_bare_git_dir(struct strbuf *cwd, 
int offset,
 
/* --work-tree is set without --git-dir; use discovered one */
if (getenv(GIT_WORK_TREE_ENVIRONMENT) || git_work_tree_cfg) {
-   const char *gitdir;
+   static const char *gitdir;
 
gitdir = offset == cwd->len ? "." : xmemdupz(cwd->buf, offset);
if (chdir(cwd->buf))
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 16/25] mktree: plug memory leaks reported by Coverity

2017-05-04 Thread Johannes Schindelin
Signed-off-by: Johannes Schindelin 
---
 builtin/mktree.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/builtin/mktree.c b/builtin/mktree.c
index de9b40fc63b..da0fd8cd706 100644
--- a/builtin/mktree.c
+++ b/builtin/mktree.c
@@ -72,7 +72,7 @@ static void mktree_line(char *buf, size_t len, int 
nul_term_line, int allow_miss
unsigned mode;
enum object_type mode_type; /* object type derived from mode */
enum object_type obj_type; /* object type derived from sha */
-   char *path;
+   char *path, *to_free = NULL;
unsigned char sha1[20];
 
ptr = buf;
@@ -102,7 +102,7 @@ static void mktree_line(char *buf, size_t len, int 
nul_term_line, int allow_miss
struct strbuf p_uq = STRBUF_INIT;
if (unquote_c_style(_uq, path, NULL))
die("invalid quoting");
-   path = strbuf_detach(_uq, NULL);
+   path = to_free = strbuf_detach(_uq, NULL);
}
 
/*
@@ -136,6 +136,7 @@ static void mktree_line(char *buf, size_t len, int 
nul_term_line, int allow_miss
}
 
append_to_tree(mode, sha1, path);
+   free(to_free);
 }
 
 int cmd_mktree(int ac, const char **av, const char *prefix)
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 21/25] add_reflog_for_walk: avoid memory leak

2017-05-04 Thread Johannes Schindelin
We free()d the `log` buffer when dwim_log() returned 1, but not when it
returned a larger value (which meant that it still allocated the buffer
but we simply ignored it).

While in the vicinity, make sure that the `reflogs` structure as well as
the `branch` variable are released properly, too.

Identified by Coverity.

Signed-off-by: Johannes Schindelin 
---
 reflog-walk.c | 20 +---
 1 file changed, 17 insertions(+), 3 deletions(-)

diff --git a/reflog-walk.c b/reflog-walk.c
index 99679f58255..c63eb1a3fd7 100644
--- a/reflog-walk.c
+++ b/reflog-walk.c
@@ -183,7 +183,11 @@ int add_reflog_for_walk(struct reflog_walk_info *info,
if (!reflogs || reflogs->nr == 0) {
struct object_id oid;
char *b;
-   if (dwim_log(branch, strlen(branch), oid.hash, ) == 
1) {
+   int ret = dwim_log(branch, strlen(branch),
+  oid.hash, );
+   if (ret > 1)
+   free(b);
+   else if (ret == 1) {
if (reflogs) {
free(reflogs->ref);
free(reflogs);
@@ -193,17 +197,27 @@ int add_reflog_for_walk(struct reflog_walk_info *info,
reflogs = read_complete_reflog(branch);
}
}
-   if (!reflogs || reflogs->nr == 0)
+   if (!reflogs || reflogs->nr == 0) {
+   if (reflogs) {
+   free(reflogs->ref);
+   free(reflogs);
+   }
+   free(branch);
return -1;
+   }
string_list_insert(>complete_reflogs, branch)->util
= reflogs;
}
+   free(branch);
 
commit_reflog = xcalloc(1, sizeof(struct commit_reflog));
if (recno < 0) {
commit_reflog->recno = get_reflog_recno_by_time(reflogs, 
timestamp);
if (commit_reflog->recno < 0) {
-   free(branch);
+   if (reflogs) {
+   free(reflogs->ref);
+   free(reflogs);
+   }
free(commit_reflog);
return -1;
}
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 20/25] shallow: avoid memory leak

2017-05-04 Thread Johannes Schindelin
Reported by Coverity.

Signed-off-by: Johannes Schindelin 
---
 shallow.c | 8 ++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/shallow.c b/shallow.c
index 25b6db989bf..f9370961f99 100644
--- a/shallow.c
+++ b/shallow.c
@@ -473,11 +473,15 @@ static void paint_down(struct paint_info *info, const 
unsigned char *sha1,
struct commit_list *head = NULL;
int bitmap_nr = (info->nr_bits + 31) / 32;
size_t bitmap_size = st_mult(sizeof(uint32_t), bitmap_nr);
-   uint32_t *tmp = xmalloc(bitmap_size); /* to be freed before return */
-   uint32_t *bitmap = paint_alloc(info);
struct commit *c = lookup_commit_reference_gently(sha1, 1);
+   uint32_t *tmp; /* to be freed before return */
+   uint32_t *bitmap;
+
if (!c)
return;
+
+   tmp = xmalloc(bitmap_size);
+   bitmap = paint_alloc(info);
memset(bitmap, 0, bitmap_size);
bitmap[id / 32] |= (1U << (id % 32));
commit_list_insert(c, );
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 17/25] fast-export: avoid leaking memory in handle_tag()

2017-05-04 Thread Johannes Schindelin
Reported by, you guessed it, Coverity.

Signed-off-by: Johannes Schindelin 
---
 builtin/fast-export.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/builtin/fast-export.c b/builtin/fast-export.c
index e0220630d00..64617ad8e36 100644
--- a/builtin/fast-export.c
+++ b/builtin/fast-export.c
@@ -734,6 +734,7 @@ static void handle_tag(const char *name, struct tag *tag)
 oid_to_hex(>object.oid));
case DROP:
/* Ignore this tag altogether */
+   free(buf);
return;
case REWRITE:
if (tagged->type != OBJ_COMMIT) {
@@ -765,6 +766,7 @@ static void handle_tag(const char *name, struct tag *tag)
   (int)(tagger_end - tagger), tagger,
   tagger == tagger_end ? "" : "\n",
   (int)message_size, (int)message_size, message ? message : "");
+   free(buf);
 }
 
 static struct commit *get_commit(struct rev_cmdline_entry *e, char *full_name)
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 12/25] split_commit_in_progress(): simplify & fix memory leak

2017-05-04 Thread Johannes Schindelin
This function did a whole lot of unnecessary work, such as reading in
four files just to figure out that, oh, hey, we do not need to look at
them after all because the HEAD is not detached.

Simplify the entire function to return early when possible, to read in
the files only when necessary, and to release the allocated memory
always (there was a leak, reported via Coverity, where we failed to
release the allocated strings if the HEAD is not detached).

Signed-off-by: Johannes Schindelin 
---
 wt-status.c | 28 ++--
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/wt-status.c b/wt-status.c
index 0a6e16dbe0f..117ac8cfb01 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -1082,29 +1082,29 @@ static char *read_line_from_git_path(const char 
*filename)
 static int split_commit_in_progress(struct wt_status *s)
 {
int split_in_progress = 0;
-   char *head = read_line_from_git_path("HEAD");
-   char *orig_head = read_line_from_git_path("ORIG_HEAD");
-   char *rebase_amend = read_line_from_git_path("rebase-merge/amend");
-   char *rebase_orig_head = 
read_line_from_git_path("rebase-merge/orig-head");
+   char *head, *orig_head, *rebase_amend, *rebase_orig_head;
 
-   if (!head || !orig_head || !rebase_amend || !rebase_orig_head ||
+   if ((!s->amend && !s->nowarn && !s->workdir_dirty) ||
!s->branch || strcmp(s->branch, "HEAD"))
-   return split_in_progress;
+   return 0;
 
-   if (!strcmp(rebase_amend, rebase_orig_head)) {
-   if (strcmp(head, rebase_amend))
-   split_in_progress = 1;
-   } else if (strcmp(orig_head, rebase_orig_head)) {
-   split_in_progress = 1;
-   }
+   head = read_line_from_git_path("HEAD");
+   orig_head = read_line_from_git_path("ORIG_HEAD");
+   rebase_amend = read_line_from_git_path("rebase-merge/amend");
+   rebase_orig_head = read_line_from_git_path("rebase-merge/orig-head");
 
-   if (!s->amend && !s->nowarn && !s->workdir_dirty)
-   split_in_progress = 0;
+   if (!head || !orig_head || !rebase_amend || !rebase_orig_head)
+   ; /* fall through, no split in progress */
+   else if (!strcmp(rebase_amend, rebase_orig_head))
+   split_in_progress = !!strcmp(head, rebase_amend);
+   else if (strcmp(orig_head, rebase_orig_head))
+   split_in_progress = 1;
 
free(head);
free(orig_head);
free(rebase_amend);
free(rebase_orig_head);
+
return split_in_progress;
 }
 
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 14/25] setup_discovered_git_dir(): plug memory leak

2017-05-04 Thread Johannes Schindelin
The setup_explicit_git_dir() function does not take custody of the string
passed as first parameter; we have to release it if we turned the value of
git_dir into an absolute path.

Signed-off-by: Johannes Schindelin 
---
 setup.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/setup.c b/setup.c
index 0320a9ad14c..e3f7699a902 100644
--- a/setup.c
+++ b/setup.c
@@ -703,11 +703,16 @@ static const char *setup_discovered_git_dir(const char 
*gitdir,
 
/* --work-tree is set without --git-dir; use discovered one */
if (getenv(GIT_WORK_TREE_ENVIRONMENT) || git_work_tree_cfg) {
+   char *to_free = NULL;
+   const char *ret;
+
if (offset != cwd->len && !is_absolute_path(gitdir))
-   gitdir = real_pathdup(gitdir, 1);
+   gitdir = to_free = real_pathdup(gitdir, 1);
if (chdir(cwd->buf))
die_errno("Could not come back to cwd");
-   return setup_explicit_git_dir(gitdir, cwd, nongit_ok);
+   ret = setup_explicit_git_dir(gitdir, cwd, nongit_ok);
+   free(to_free);
+   return ret;
}
 
/* #16.2, #17.2, #20.2, #21.2, #24, #25, #28, #29 (see t1510) */
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 19/25] line-log: avoid memory leak

2017-05-04 Thread Johannes Schindelin
Discovered by Coverity.

Signed-off-by: Johannes Schindelin 
---
 line-log.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/line-log.c b/line-log.c
index a23b910471b..b9087814b8c 100644
--- a/line-log.c
+++ b/line-log.c
@@ -1125,6 +1125,7 @@ static int process_ranges_ordinary_commit(struct rev_info 
*rev, struct commit *c
changed = process_all_files(_range, rev, , range);
if (parent)
add_line_range(rev, parent, parent_range);
+   free_line_log_data(parent_range);
return changed;
 }
 
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 18/25] receive-pack: plug memory leak in update()

2017-05-04 Thread Johannes Schindelin
Reported via Coverity.

Signed-off-by: Johannes Schindelin 
---
 builtin/receive-pack.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index f96834f42c9..48c07ddb659 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -986,7 +986,8 @@ static const char *update(struct command *cmd, struct 
shallow_info *si)
 {
const char *name = cmd->ref_name;
struct strbuf namespaced_name_buf = STRBUF_INIT;
-   const char *namespaced_name, *ret;
+   static char *namespaced_name;
+   const char *ret;
struct object_id *old_oid = >old_oid;
struct object_id *new_oid = >new_oid;
 
@@ -997,6 +998,7 @@ static const char *update(struct command *cmd, struct 
shallow_info *si)
}
 
strbuf_addf(_name_buf, "%s%s", get_git_namespace(), name);
+   free(namespaced_name);
namespaced_name = strbuf_detach(_name_buf, NULL);
 
if (is_ref_checked_out(namespaced_name)) {
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 11/25] checkout: fix memory leak

2017-05-04 Thread Johannes Schindelin
This change addresses part of the NEEDSWORK comment above the code,
therefore the comment needs to be adjusted, too.

Discovered via Coverity.

Signed-off-by: Johannes Schindelin 
---
 builtin/checkout.c | 17 +
 1 file changed, 9 insertions(+), 8 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index bfa5419f335..5faea3a05fa 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -235,14 +235,14 @@ static int checkout_merged(int pos, const struct checkout 
*state)
/*
 * NEEDSWORK:
 * There is absolutely no reason to write this as a blob object
-* and create a phony cache entry just to leak.  This hack is
-* primarily to get to the write_entry() machinery that massages
-* the contents to work-tree format and writes out which only
-* allows it for a cache entry.  The code in write_entry() needs
-* to be refactored to allow us to feed a 
-* instead of a cache entry.  Such a refactoring would help
-* merge_recursive as well (it also writes the merge result to the
-* object database even when it may contain conflicts).
+* and create a phony cache entry.  This hack is primarily to get
+* to the write_entry() machinery that massages the contents to
+* work-tree format and writes out which only allows it for a
+* cache entry.  The code in write_entry() needs to be refactored
+* to allow us to feed a  instead of a cache
+* entry.  Such a refactoring would help merge_recursive as well
+* (it also writes the merge result to the object database even
+* when it may contain conflicts).
 */
if (write_sha1_file(result_buf.ptr, result_buf.size,
blob_type, oid.hash))
@@ -251,6 +251,7 @@ static int checkout_merged(int pos, const struct checkout 
*state)
if (!ce)
die(_("make_cache_entry failed for path '%s'"), path);
status = checkout_entry(ce, state, NULL);
+   free(ce);
return status;
 }
 
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 10/25] cat-file: fix memory leak

2017-05-04 Thread Johannes Schindelin
Discovered by Coverity.

Signed-off-by: Johannes Schindelin 
---
 builtin/cat-file.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/builtin/cat-file.c b/builtin/cat-file.c
index 1890d7a6390..9af863e7915 100644
--- a/builtin/cat-file.c
+++ b/builtin/cat-file.c
@@ -165,6 +165,7 @@ static int cat_one_file(int opt, const char *exp_type, 
const char *obj_name,
die("git cat-file %s: bad file", obj_name);
 
write_or_die(1, buf, size);
+   free(buf);
return 0;
 }
 
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 01/25] mingw: avoid memory leak when splitting PATH

2017-05-04 Thread Johannes Schindelin
In the (admittedly, concocted) case that PATH consists only of path
delimiters, we would leak the duplicated string.

Reported by Coverity.

Signed-off-by: Johannes Schindelin 
---
 compat/mingw.c | 4 +++-
 1 file changed, 3 insertions(+), 1 deletion(-)

diff --git a/compat/mingw.c b/compat/mingw.c
index 3fbfda5978b..fe0e3ccd248 100644
--- a/compat/mingw.c
+++ b/compat/mingw.c
@@ -961,8 +961,10 @@ static char **get_path_split(void)
++n;
}
}
-   if (!n)
+   if (!n) {
+   free(envpath);
return NULL;
+   }
 
ALLOC_ARRAY(path, n + 1);
p = envpath;
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 09/25] mailinfo & mailsplit: check for EOF while parsing

2017-05-04 Thread Johannes Schindelin
While POSIX states that it is okay to pass EOF to isspace() (and it seems
to be implied that EOF should *not* be treated as whitespace), and also to
pass EOF to ungetc() (which seems to be intended to fail without buffering
the character), it is much better to handle these cases explicitly. Not
only does it reduce head-scratching (and helps static analysis avoid
reporting false positives), it also lets us handle files containing
nothing but whitespace by erroring out.

Reported via Coverity.

Signed-off-by: Johannes Schindelin 
---
 builtin/mailsplit.c | 10 ++
 mailinfo.c  |  9 -
 2 files changed, 18 insertions(+), 1 deletion(-)

diff --git a/builtin/mailsplit.c b/builtin/mailsplit.c
index 30681681c13..664400b8169 100644
--- a/builtin/mailsplit.c
+++ b/builtin/mailsplit.c
@@ -232,6 +232,16 @@ static int split_mbox(const char *file, const char *dir, 
int allow_bare,
 
do {
peek = fgetc(f);
+   if (peek == EOF) {
+   if (f == stdin)
+   /* empty stdin is OK */
+   ret = skip;
+   else {
+   fclose(f);
+   error(_("empty mbox: '%s'"), file);
+   }
+   goto out;
+   }
} while (isspace(peek));
ungetc(peek, f);
 
diff --git a/mailinfo.c b/mailinfo.c
index 68037758f2f..f92cb9f729c 100644
--- a/mailinfo.c
+++ b/mailinfo.c
@@ -882,7 +882,10 @@ static int read_one_header_line(struct strbuf *line, FILE 
*in)
for (;;) {
int peek;
 
-   peek = fgetc(in); ungetc(peek, in);
+   peek = fgetc(in);
+   if (peek == EOF)
+   break;
+   ungetc(peek, in);
if (peek != ' ' && peek != '\t')
break;
if (strbuf_getline_lf(, in))
@@ -1099,6 +1102,10 @@ int mailinfo(struct mailinfo *mi, const char *msg, const 
char *patch)
 
do {
peek = fgetc(mi->input);
+   if (peek == EOF) {
+   fclose(cmitmsg);
+   return error("empty patch: '%s'", patch);
+   }
} while (isspace(peek));
ungetc(peek, mi->input);
 
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 02/25] winansi: avoid use of uninitialized value

2017-05-04 Thread Johannes Schindelin
To initialize the foreground color attributes of "plain text", our ANSI
emulation tries to infer them from the currently attached console while
running the is_console() function. This function first tries to detect any
console attached to stdout, then it is called with stderr.

If neither stdout nor stderr has any console attached, it does not
actually matter what we use for "plain text" attributes, as we never need
to output any text to any console in that case.

However, after working on stdout and stderr, is_console() is called with
stdin, and it still tries to initialize the "plain text" attributes if
they had not been initialized earlier. In this case, we cannot detect any
attributes, and we used an uninitialized value for them.

Naturally, Coverity complained about this use case because it could not
reason about the code deeply enough to figure out that we do not even use
those attributes in that case.

Let's just initialize the value to 0 in that case, both to avoid future
Coverity reports, and to help catch future regressions in case anybody
changes the order of the is_console() calls (which would make the text
black on black).

Signed-off-by: Johannes Schindelin 
---
 compat/winansi.c | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/compat/winansi.c b/compat/winansi.c
index 793420f9d0d..a551de90eb0 100644
--- a/compat/winansi.c
+++ b/compat/winansi.c
@@ -105,6 +105,13 @@ static int is_console(int fd)
if (!fd) {
if (!GetConsoleMode(hcon, ))
return 0;
+   /*
+* This code path is only reached if there is no console
+* attached to stdout/stderr, i.e. we will not need to output
+* any text to any console, therefore we might just as well
+* use black as foreground color.
+*/
+   sbi.wAttributes = 0;
} else if (!GetConsoleScreenBufferInfo(hcon, ))
return 0;
 
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 00/25] Address a couple of issues identified by Coverity

2017-05-04 Thread Johannes Schindelin
I recently registered the git-for-windows fork with Coverity to ensure
that even the Windows-specific patches get some static analysis love.

While at it, I squashed a couple of obvious issues in the part that is
not Windows-specific.

Note: while this patch series squashes some of those issues, the
remaining issues are not necessarily all false positives (e.g. Coverity
getting fooled by our FLEX_ARRAY trick into believing that the last
member of the struct is indeed a 0 or 1 size array) or intentional (e.g.
builtins not releasing memory because exit() is called right after
returning from the function that leaks memory).

Notable examples of the remaining issues are:

- a couple of callers of shorten_unambiguous_ref() assume that they do
  not have to release the memory returned from that function, often
  assigning the pointer to a `const` variable that typically does not
  hold a pointer that needs to be free()d. My hunch is that we will want
  to convert that function to have a fixed number of static buffers and
  use those in a round robin fashion à la sha1_to_hex().

- pack-redundant.c seems to have hard-to-reason-about code paths that
  may eventually leak memory. Essentially, the custody of the allocated
  memory is not clear at all.

- fast-import.c calls strbuf_detach() on the command_buf without any
  obvious rationale. Turns out that *some* code paths assign
  command_buf.buf to a `recent_command` which releases the buffer later.
  However, from a cursory look it appears to me as if there are some
  other code paths that skip that assignment, effectively causing a memory
  leak once strbuf_detach() is called.

Sadly, I lack the time to pursue those remaining issues further.

Changes since v3:

- used 0 (black) for the foreground color attributes in winansi when we
  have no console to print to, anyway.

- clarified in the commit message when we hit the path, and why, where
  we set the foreground color attributes to "all black".

- reworded the commit message talking about splitting the PATH (on
  Windows, it is delimited by semicolons, not colons, but it is even
  better to just talk about the path delimiters because it does not
  really happen which character is used, but it is important which role
  it plays).

- rewrote the split_commit_in_progress() function to have a more natural
  flow while still fixing the memory leak.


Johannes Schindelin (25):
  mingw: avoid memory leak when splitting PATH
  winansi: avoid use of uninitialized value
  winansi: avoid buffer overrun
  add_commit_patch_id(): avoid allocating memory unnecessarily
  git_config_rename_section_in_file(): avoid resource leak
  get_mail_commit_oid(): avoid resource leak
  difftool: address a couple of resource/memory leaks
  status: close file descriptor after reading git-rebase-todo
  mailinfo & mailsplit: check for EOF while parsing
  cat-file: fix memory leak
  checkout: fix memory leak
  split_commit_in_progress(): simplify & fix memory leak
  setup_bare_git_dir(): help static analysis
  setup_discovered_git_dir(): plug memory leak
  pack-redundant: plug memory leak
  mktree: plug memory leaks reported by Coverity
  fast-export: avoid leaking memory in handle_tag()
  receive-pack: plug memory leak in update()
  line-log: avoid memory leak
  shallow: avoid memory leak
  add_reflog_for_walk: avoid memory leak
  remote: plug memory leak in match_explicit()
  name-rev: avoid leaking memory in the `deref` case
  show_worktree(): plug memory leak
  submodule_uses_worktrees(): plug memory leak

 builtin/am.c | 15 ++-
 builtin/cat-file.c   |  1 +
 builtin/checkout.c   | 17 +
 builtin/difftool.c   | 33 +++--
 builtin/fast-export.c|  2 ++
 builtin/mailsplit.c  | 10 ++
 builtin/mktree.c |  5 +++--
 builtin/name-rev.c   |  7 +--
 builtin/pack-redundant.c |  1 +
 builtin/receive-pack.c   |  4 +++-
 builtin/worktree.c   |  8 +---
 compat/mingw.c   |  4 +++-
 compat/winansi.c | 12 
 config.c |  5 -
 line-log.c   |  1 +
 mailinfo.c   |  9 -
 patch-ids.c  |  3 ++-
 reflog-walk.c| 20 +---
 remote.c |  5 +++--
 setup.c  | 11 ---
 shallow.c|  8 ++--
 worktree.c   |  2 +-
 wt-status.c  | 29 +++--
 23 files changed, 148 insertions(+), 64 deletions(-)


base-commit: d2bbb7c2bcf6e77ebfcabf4e12110fe6d5c91de6
Published-As: https://github.com/dscho/git/releases/tag/coverity-v4
Fetch-It-Via: git fetch https://github.com/dscho/git coverity-v4

Interdiff vs v3:

 diff --git a/compat/winansi.c b/compat/winansi.c
 index 861b79d8c31..a11a0f16d27 100644
 --- a/compat/winansi.c
 +++ b/compat/winansi.c
 @@ -105,8 +105,13 @@ static int is_console(int fd)
if (!fd) {
if (!GetConsoleMode(hcon, 

[PATCH v4 03/25] winansi: avoid buffer overrun

2017-05-04 Thread Johannes Schindelin
When we could not convert the UTF-8 sequence into Unicode for writing to
the Console, we should not try to write an insanely-long sequence of
invalid wide characters (mistaking the negative return value for an
unsigned length).

Reported by Coverity.

Signed-off-by: Johannes Schindelin 
---
 compat/winansi.c | 5 +
 1 file changed, 5 insertions(+)

diff --git a/compat/winansi.c b/compat/winansi.c
index a551de90eb0..a11a0f16d27 100644
--- a/compat/winansi.c
+++ b/compat/winansi.c
@@ -140,6 +140,11 @@ static void write_console(unsigned char *str, size_t len)
 
/* convert utf-8 to utf-16 */
int wlen = xutftowcsn(wbuf, (char*) str, ARRAY_SIZE(wbuf), len);
+   if (wlen < 0) {
+   wchar_t *err = L"[invalid]";
+   WriteConsoleW(console, err, wcslen(err), , NULL);
+   return;
+   }
 
/* write directly to console */
WriteConsoleW(console, wbuf, wlen, , NULL);
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 08/25] status: close file descriptor after reading git-rebase-todo

2017-05-04 Thread Johannes Schindelin
Reported via Coverity.

Signed-off-by: Johannes Schindelin 
---
 wt-status.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/wt-status.c b/wt-status.c
index 03754849626..0a6e16dbe0f 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -1168,6 +1168,7 @@ static int read_rebase_todolist(const char *fname, struct 
string_list *lines)
abbrev_sha1_in_line();
string_list_append(lines, line.buf);
}
+   fclose(f);
return 0;
 }
 
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 06/25] get_mail_commit_oid(): avoid resource leak

2017-05-04 Thread Johannes Schindelin
When we fail to read, or parse, the file, we still want to close the file
descriptor and release the strbuf.

Reported via Coverity.

Signed-off-by: Johannes Schindelin 
---
 builtin/am.c | 15 ++-
 1 file changed, 6 insertions(+), 9 deletions(-)

diff --git a/builtin/am.c b/builtin/am.c
index a95dd8b4e6c..9c5c2c778e2 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -1351,19 +1351,16 @@ static int get_mail_commit_oid(struct object_id 
*commit_id, const char *mail)
struct strbuf sb = STRBUF_INIT;
FILE *fp = xfopen(mail, "r");
const char *x;
+   int ret = 0;
 
-   if (strbuf_getline_lf(, fp))
-   return -1;
-
-   if (!skip_prefix(sb.buf, "From ", ))
-   return -1;
-
-   if (get_oid_hex(x, commit_id) < 0)
-   return -1;
+   if (strbuf_getline_lf(, fp) ||
+   !skip_prefix(sb.buf, "From ", ) ||
+   get_oid_hex(x, commit_id) < 0)
+   ret = -1;
 
strbuf_release();
fclose(fp);
-   return 0;
+   return ret;
 }
 
 /**
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 05/25] git_config_rename_section_in_file(): avoid resource leak

2017-05-04 Thread Johannes Schindelin
In case of errors, we really want the file descriptor to be closed.

Discovered by a Coverity scan.

Signed-off-by: Johannes Schindelin 
---
 config.c | 5 -
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/config.c b/config.c
index b4a3205da32..a30056ec7e9 100644
--- a/config.c
+++ b/config.c
@@ -2621,7 +2621,7 @@ int git_config_rename_section_in_file(const char 
*config_filename,
struct lock_file *lock;
int out_fd;
char buf[1024];
-   FILE *config_file;
+   FILE *config_file = NULL;
struct stat st;
 
if (new_name && !section_name_is_ok(new_name)) {
@@ -2703,11 +2703,14 @@ int git_config_rename_section_in_file(const char 
*config_filename,
}
}
fclose(config_file);
+   config_file = NULL;
 commit_and_out:
if (commit_lock_file(lock) < 0)
ret = error_errno("could not write config file %s",
  config_filename);
 out:
+   if (config_file)
+   fclose(config_file);
rollback_lock_file(lock);
 out_no_rollback:
free(filename_buf);
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 04/25] add_commit_patch_id(): avoid allocating memory unnecessarily

2017-05-04 Thread Johannes Schindelin
It would appear that we allocate (and forget to release) memory if the
patch ID is not even defined.

Reported by the Coverity tool.

Signed-off-by: Johannes Schindelin 
---
 patch-ids.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/patch-ids.c b/patch-ids.c
index fa8f11de826..92eba7a059e 100644
--- a/patch-ids.c
+++ b/patch-ids.c
@@ -99,11 +99,12 @@ struct patch_id *has_commit_patch_id(struct commit *commit,
 struct patch_id *add_commit_patch_id(struct commit *commit,
 struct patch_ids *ids)
 {
-   struct patch_id *key = xcalloc(1, sizeof(*key));
+   struct patch_id *key;
 
if (!patch_id_defined(commit))
return NULL;
 
+   key = xcalloc(1, sizeof(*key));
if (init_patch_id_entry(key, commit, ids)) {
free(key);
return NULL;
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v4 07/25] difftool: address a couple of resource/memory leaks

2017-05-04 Thread Johannes Schindelin
This change plugs a couple of memory leaks and makes sure that the file
descriptor is closed in run_dir_diff().

Spotted by Coverity.

Signed-off-by: Johannes Schindelin 
---
 builtin/difftool.c | 33 +++--
 1 file changed, 23 insertions(+), 10 deletions(-)

diff --git a/builtin/difftool.c b/builtin/difftool.c
index 1354d0e4625..b9a892f2693 100644
--- a/builtin/difftool.c
+++ b/builtin/difftool.c
@@ -226,6 +226,7 @@ static void changed_files(struct hashmap *result, const 
char *index_path,
hashmap_entry_init(entry, strhash(buf.buf));
hashmap_add(result, entry);
}
+   fclose(fp);
if (finish_command(_files))
die("diff-files did not exit properly");
strbuf_release(_env);
@@ -439,8 +440,10 @@ static int run_dir_diff(const char *extcmd, int symlinks, 
const char *prefix,
}
 
if (lmode && status != 'C') {
-   if (checkout_path(lmode, , src_path, ))
-   return error("could not write '%s'", src_path);
+   if (checkout_path(lmode, , src_path, )) {
+   ret = error("could not write '%s'", src_path);
+   goto finish;
+   }
}
 
if (rmode && !S_ISLNK(rmode)) {
@@ -456,9 +459,12 @@ static int run_dir_diff(const char *extcmd, int symlinks, 
const char *prefix,
hashmap_add(_tree_dups, entry);
 
if (!use_wt_file(workdir, dst_path, )) {
-   if (checkout_path(rmode, , dst_path, 
))
-   return error("could not write '%s'",
-dst_path);
+   if (checkout_path(rmode, , dst_path,
+ )) {
+   ret = error("could not write '%s'",
+   dst_path);
+   goto finish;
+   }
} else if (!is_null_oid()) {
/*
 * Changes in the working tree need special
@@ -473,10 +479,12 @@ static int run_dir_diff(const char *extcmd, int symlinks, 
const char *prefix,
ADD_CACHE_JUST_APPEND);
 
add_path(, rdir_len, dst_path);
-   if (ensure_leading_directories(rdir.buf))
-   return error("could not create "
-"directory for '%s'",
-dst_path);
+   if (ensure_leading_directories(rdir.buf)) {
+   ret = error("could not create "
+   "directory for '%s'",
+   dst_path);
+   goto finish;
+   }
add_path(, wtdir_len, dst_path);
if (symlinks) {
if (symlink(wtdir.buf, rdir.buf)) {
@@ -497,13 +505,15 @@ static int run_dir_diff(const char *extcmd, int symlinks, 
const char *prefix,
}
}
 
+   fclose(fp);
+   fp = NULL;
if (finish_command()) {
ret = error("error occurred running diff --raw");
goto finish;
}
 
if (!i)
-   return 0;
+   goto finish;
 
/*
 * Changes to submodules require special treatment.This loop writes a
@@ -626,6 +636,9 @@ static int run_dir_diff(const char *extcmd, int symlinks, 
const char *prefix,
exit_cleanup(tmpdir, rc);
 
 finish:
+   if (fp)
+   fclose(fp);
+
free(lbase_dir);
free(rbase_dir);
strbuf_release();
-- 
2.12.2.windows.2.800.gede8f145e06




Re: [PATCH v3] l10n: de.po: update German translation

2017-05-04 Thread Ralf Thielow
2017-05-03 21:27 GMT+02:00 Christian Brabandt :
> On Mi, 03 Mai 2017, Ralf Thielow wrote:
>
>> Ref or Reference is translated as "Referenz" while
>> Rev or Revision is translated as "Commit" so I think
>> the translation is correct.
>
> Oh right. I also noticed that sometimes complete sentences were used and
> sometimes not. It would be nice to have a consistent style there too.
>

Sure.  Using only one style for those messages is a point on my TODO list.

> Thanks for updating.
>
> Best,
> Christian
> --
> Hallo Briefmarkensammler!


Re: [PATCH v4] l10n: de.po: update German translation

2017-05-04 Thread Ralf Thielow
2017-05-03 19:29 GMT+02:00 Matthias Rüster :
> Hi Ralf,
>
> thanks again for your work!
>
>
>>
>>  #: ref-filter.c:565
>> -#, fuzzy, c-format
>> +#, c-format
>>  msgid "format: %%(then) atom used after %%(else)"
>> -msgstr "Format: %%(end) Atom fehlt"
>> +msgstr "format: %%(then) nach %%(else) verwendet"
>>
>
> Is there a "Atom" missing?
> "format: %%(then) Atom nach %%(else) verwendet"
>
>
>>
>>  #: submodule.c:1332
>> -#, fuzzy, c-format
>> +#, c-format
>>  msgid "could not start 'git status' in submodule '%s'"
>>  msgstr "Konnte 'git status' in Submodul '%s' nicht starten."
>>
>>  #: submodule.c:1345
>> -#, fuzzy, c-format
>> +#, c-format
>>  msgid "could not run 'git status' in submodule '%s'"
>>  msgstr "konnte 'git status' in Submodul '%s' nicht ausführen"
>>
>
> Maybe "Konnte 'git status' nicht in Submodul starten/ausführen" would
> sound better?
>
> In general: sometimes there is a sentence (starts with a uppercase
> letter and ends with a dot) and sometimes not.
>
> So for example also here:
>
>  #: fetch-pack.c:1150
>  #, c-format
>  msgid "Server does not allow request for unadvertised object %s"
> -msgstr ""
> +msgstr "Der Server lehnt Anfrage nach nicht angebotenem Objekt %s ab."
>
> I have not looked up the exact circumstances when this would show up,
> though.
>
>
>>  #: submodule.c:1421
>> -#, fuzzy, c-format
>> +#, c-format
>>  msgid "submodule '%s' has dirty index"
>> -msgstr "Submodul '%s' kann Alternative nicht hinzufügen: %s"
>> +msgstr "Submodul '%s' hat geänderten Index"
>>
>
> I would suggest:
> "Submodul '%s' hat einen geänderten Index"
>
>
>>
>>  #: builtin/tag.c:493
>> -#, fuzzy
>>  msgid "--contains option is only allowed in list mode"
>> -msgstr "--contains Option ist nur erlaubt mit -l."
>> +msgstr "--contains Option ist nur im Listenmodus erlaubt"
>>
>>  #: builtin/tag.c:495
>> -#, fuzzy
>>  msgid "--no-contains option is only allowed in list mode"
>> -msgstr "--contains Option ist nur erlaubt mit -l."
>> +msgstr "Option --no-contains ist nur im Listenmodus erlaubt"
>>
>
> Is "Option --contains ist nur im Listenmodus erlaubt" possible there? So
> it would be like the second one...
>
>

Thanks!

>
> Kind regards,
> Matthias


Re: PCRE v2 compile error, was Re: What's cooking in git.git (May 2017, #01; Mon, 1)

2017-05-04 Thread Ævar Arnfjörð Bjarmason
On Thu, May 4, 2017 at 1:32 PM, Johannes Schindelin
 wrote:
> Hi Ævar,
>
> On Thu, 4 May 2017, Ævar Arnfjörð Bjarmason wrote:
>
>> So I think if your criteria for working on integrating v2 is users
>> noticing it elsewhere and asking you for it you'll likely never switch
>> to it.
>
> Speaking for myself, my biggest problem with your patch series is to find
> the time to work on packaging PCRE v2 as an MSYS2 package.
>
> If you force me to make that time *now*, by forcing the switch to v2 in
> `pu`, you will disimprove my mood. That's all.
>
> If, however, you are gently with people like me, offering this as an
> opt-in, so that I can play with it when I find some time to build PCRE v2
> in MSYS2's context, I will be pretty happy, and of course will ship Git
> for Windows with the faster PCRE support as soon as I am satisfied that it
> works correctly.
>
> That should make you happy, too, as you will get quite a bit of testers
> that way. Gently.

Sure, all makes sense. As noted I'll keep it from being the default for now.

The only reason I even replied & kept this thread going now was
because I noticed "make" not enabling it, REG_STARTEND making \0
searches work in practice (even if not specified by the NetBSD
extension) etc.

All stuff I would surely forget by the time we have the discussion
again down the road, so I wanted to get that E-Mail out now for future
reference.


Re: PCRE v2 compile error, was Re: What's cooking in git.git (May 2017, #01; Mon, 1)

2017-05-04 Thread Johannes Schindelin
Hi Ævar,

On Thu, 4 May 2017, Ævar Arnfjörð Bjarmason wrote:

> So I think if your criteria for working on integrating v2 is users
> noticing it elsewhere and asking you for it you'll likely never switch
> to it.

Speaking for myself, my biggest problem with your patch series is to find
the time to work on packaging PCRE v2 as an MSYS2 package.

If you force me to make that time *now*, by forcing the switch to v2 in
`pu`, you will disimprove my mood. That's all.

If, however, you are gently with people like me, offering this as an
opt-in, so that I can play with it when I find some time to build PCRE v2
in MSYS2's context, I will be pretty happy, and of course will ship Git
for Windows with the faster PCRE support as soon as I am satisfied that it
works correctly.

That should make you happy, too, as you will get quite a bit of testers
that way. Gently.

Ciao,
Dscho

Re: CYGWIN git cannot install python packages with pip

2017-05-04 Thread Johannes Schindelin
Hi,

On Thu, 4 May 2017, ankostis wrote:

> On 4 May 2017 at 11:47, Adam Dinwoodie  wrote:
> > Confirmed: the Cygwin project as a general rule doesn't support this
> > sort of mixing of Windows and Cygwin tools.  Either use Python and Git
> > packages both provided by Cygwin, or both provided by Windows.
> >
> > Mixing and matching does work sometimes -- as was apparently the case
> > with Cygwin Git v2.8.3-1 -- but it requires care and you're generally on
> > your own with it.
> 
> MSYS2's git-2.12.1 also works fine, so there must be something
> different in this build of git that breaks this long-standing capability.

MSYS2's runtime has auto-conversion functionality, trying to guess what
arguments passed to a non-MSYS2 program may be Unix paths, converting them
to Windows ones.

So what you may see here has nothing to do with MSYS2's Git *not* having
the change that breaks Cygwin Git for you, but MSYS2's runtime saving you
from trouble.

> Judging from the error-message, it somehow concatenates input & output
> paths.
> Isn't this something to research about?

It is something to research about. But by you, not by me. I need to focus
on bigger-impact issues, because I do not scale, and Git for Windows'
users generally do not experience the problem you described.

Ciao,
Johannes


Re: [PATCH v3 12/25] split_commit_in_progress(): fix memory leak

2017-05-04 Thread Johannes Schindelin
Hi René,

On Wed, 3 May 2017, René Scharfe wrote:

> Am 02.05.2017 um 18:02 schrieb Johannes Schindelin:
> > Reported via Coverity.
> > 
> > Signed-off-by: Johannes Schindelin 
> > ---
> >   wt-status.c | 7 ++-
> >   1 file changed, 6 insertions(+), 1 deletion(-)
> > 
> > diff --git a/wt-status.c b/wt-status.c
> > index 0a6e16dbe0f..1f3f6bcb980 100644
> > --- a/wt-status.c
> > +++ b/wt-status.c
> > @@ -1088,8 +1088,13 @@ static int split_commit_in_progress(struct wt_status
> > *s)
> >char *rebase_orig_head =
> >read_line_from_git_path("rebase-merge/orig-head");
> >   
> > if (!head || !orig_head || !rebase_amend || !rebase_orig_head ||
> > -   !s->branch || strcmp(s->branch, "HEAD"))
> > +   !s->branch || strcmp(s->branch, "HEAD")) {
> > +   free(head);
> > +   free(orig_head);
> > +   free(rebase_amend);
> > +   free(rebase_orig_head);
> > return split_in_progress;
> > +   }
> >   
> >if (!strcmp(rebase_amend, rebase_orig_head)) {
> > if (strcmp(head, rebase_amend))
> > 
> 
> The return line could be replaced by "; else" to achieve the same
> result, without duplicating the free calls.

True. It is much harder to explain it that way, though: the context looks
like this:

static int split_commit_in_progress(struct wt_status *s)
 {
int split_in_progress = 0;
char *head = read_line_from_git_path("HEAD");
char *orig_head = read_line_from_git_path("ORIG_HEAD");
char *rebase_amend = read_line_from_git_path("rebase-merge/amend");
char *rebase_orig_head = 
read_line_from_git_path("rebase-merge/orig-head");

if (!head || !orig_head || !rebase_amend || !rebase_orig_head ||
-   !s->branch || strcmp(s->branch, "HEAD"))
+   !s->branch || strcmp(s->branch, "HEAD")) {
+   free(head);
+   free(orig_head);
+   free(rebase_amend);
+   free(rebase_orig_head);
return split_in_progress;
+   }

if (!strcmp(rebase_amend, rebase_orig_head)) {
if (strcmp(head, rebase_amend))
split_in_progress = 1;
} else if (strcmp(orig_head, rebase_orig_head)) {
split_in_progress = 1;
}

if (!s->amend && !s->nowarn && !s->workdir_dirty)
split_in_progress = 0;

free(head);
free(orig_head);
free(rebase_amend);
free(rebase_orig_head);
return split_in_progress;
 }

So as you see, if we make the change you suggested, the next if() is hit
which possibly sets split_in_progress = 0.

The thing is: this variable has been initialized to 0 in the beginning! So
this conditional assignment would be a noop, unless any of the code paths
before this conditional modified split_in_progress.

After you fully wrapped your head around this code, I am sure you agree
that the code is unnecessarily confusing to begin with: why do we go out
of our way to allocate and read all those strings, compare them to figure
out whether there is a split in progress, just to look at bits in the
wt_status struct (that we had available from the beginning) to potentially
undo all of that.

What's worse, I cannot even find a logical explanation why the code is so
confusing, as it has been added as it is today in commit 2d1ccebae41
(status: better advices when splitting a commit (during rebase -i),
2012-06-10).

So I think this function really wants to be fixed more fully (although I
feel bad for inserting such a non-trivial fix into a patch series
otherwise populated by trivial memory/resource leak plugs):

-- snipsnap --
Subject: [PATCH] squash! split_commit_in_progress(): fix memory leak

split_commit_in_progress(): simplify & fix memory leak

This function did a whole lot of unnecessary work, such as reading in
four files just to figure out that, oh, hey, we do not need to look at
them after all because the HEAD is not detached.

Simplify the entire function to return early when possible, to read in
the files only when necessary, and to release the allocated memory
always (there was a leak, reported via Coverity, where we failed to
release the allocated strings if the HEAD is not detached).

Signed-off-by: Johannes Schindelin 
---
 wt-status.c | 39 +--
 1 file changed, 17 insertions(+), 22 deletions(-)

diff --git a/wt-status.c b/wt-status.c
index 1f3f6bcb980..117ac8cfb01 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -1082,34 +1082,29 @@ static char *read_line_from_git_path(const char 
*filename)
 static int split_commit_in_progress(struct wt_status *s)
 {
int split_in_progress = 0;
-   char *head = read_line_from_git_path("HEAD");
-   char *orig_head = read_line_from_git_path("ORIG_HEAD");
-   char *rebase_amend = read_line_from_git_path("rebase-merge/amend");
-   char *rebase_orig_head = 

RE: Donation Award

2017-05-04 Thread Mayrhofer Family
Good Day,

My wife and I have awarded you with a donation of $ 1,000,000.00 Dollars from 
part of our Jackpot Lottery of 50 Million Dollars, respond with your details 
for claims.

We await your earliest response and God Bless you.

Friedrich And Annand Mayrhofer.

---
This email has been checked for viruses by Avast antivirus software.
https://www.avast.com/antivirus



Re: [PATCH v3 01/25] mingw: avoid memory leak when splitting PATH

2017-05-04 Thread Johannes Schindelin
Hi René,

On Wed, 3 May 2017, René Scharfe wrote:

> Am 02.05.2017 um 18:00 schrieb Johannes Schindelin:
> > In the (admittedly, concocted) case that PATH consists only of colons, we
> > would leak the duplicated string.
> 
> Nit: It's about semicolons, right?  At least that's what get_path_split
> is searching for.  Or is there some kind of translation going on?

True, it is semicolons.

I'll reword the commit message to actually use `path delimiters`, to make
it more understandable to the Unix folk (who would most likely be puzzled
that PATH is separated by semicolons).

Thanks,
Dscho

Re: PCRE v2 compile error, was Re: What's cooking in git.git (May 2017, #01; Mon, 1)

2017-05-04 Thread Ævar Arnfjörð Bjarmason
On Thu, May 4, 2017 at 11:11 AM, Johannes Schindelin
 wrote:
> Hi Ævar,
>
> On Wed, 3 May 2017, Ævar Arnfjörð Bjarmason wrote:
>
>> [Just replying to you & Duy in the same mail, easier]
>
> It makes it harder on everybody else, though, as two slightly different
> discussion points are conflated now. Also, no single online mail archive
> will be able to render the thread correctly (assuming that you edited in
> the In-Reply-To header to loop back to Duy's mail).
>
>> On Wed, May 3, 2017 at 11:45 AM, Johannes Schindelin
>>  wrote:
>> >
>> > At this point, I feel that someone should recall into our collective
>> > memory what happened when we made a change similar in nature that
>> > broke existing build setups: by requiring REG_STARTEND all of a sudden
>> > ("you can easily flip the NO_REGEX switch", as if everybody should
>> > know about those Makefile flags we have).
>>
>> And as a result grep/log -G got faster by default,
>
> Sure. For those developers where the build was not broken.
>
> Software maintenance is always a trade-off, and with software as popular
> as Git, maintainers bear a special responsibility to *not* break builds
> easily, as it is more likely than not that anybody who wants to build Git
> is *unfamiliar* with the specifics.
>
> That is the main reason why we have a configure, even if we try hard to
> make things work with a straight `make`: people who are happily oblivious
> of our discussions on this here high-volume mailing list will be able to
> build Git without even consulting the documentation, just by doing what
> they would do with any Unix-based software: ./configure && make

And they still will. That sort of invocation will not build with
PCRE[1]. This change to v2 by default only impacts builders who are
already going out of their way to customize their build with
USE_LIBPCRE=YesPlease.

I don't think your comments make sense in that context. We're not even
talking about the advanced user/newbie developer who wants to build
git for the first time, we're talking about the user who's already
involved enough to start scouring the Makefile & tweaking the various
flags there.

Changing the current advice from "if you want PCRE enable this" to "if
you want PCRE enable this, we use v2 then, turn this other thing on if
you want v1" is a *very* small imposition on people already deeply
customizing their build to the extent of turning on the non-default
PCRE in the first place.

Yes that's a trade-off, and I'm not denying that some advanced users
will notice / be annoyed by that change. I just think the end-user
benefit of bringing v2 to the attention of distributors who are doing
such customized builds outweighs that.

>> and more importantly since v2.10.1 which includes your 2f8952250a and
>> made a REG_STARTEND engine a hard requirement nobody using git is
>> mysteriously going to miss grep results because of some stray \0 in the
>> string being matched.
>
> That is a misinterpretation of what the REG_STARTEND flag is supposed to
> do. In *some* implementations, REG_STARTEND allows NULs in the haystack.
> Some other implementations do not allow that. It is ill-defined.

We have a test in t7008-grep-binary.sh that'll fail if REG_STARTEND
doesn't behave as I described. Actually I think I got the history a
bit wrong here, I marked that test as succeeding in commit 7e36de5859
("t/t7008-grep-binary.sh: un-TODO a test that needs REG_STARTEND",
2010-08-17).

So I think since 2010 either distributors without REG_STARTEND have
been enabling NO_REGEX=YesPlease giving users the right behavior with
a \0 in their haystack, or their tests have been consistently failing
for 7 years, hopefully the former is the case.

>> I agree that I should drop the patch to make v2 the default from this
>> series for now. Clearly it's controversial, and can be considered on
>> its own merits once the supporting code is in. I'll do that in the
>> next submission, which I'm planning to send after v2.13.0 comes out.
>
> Good. I am really glad that we agree that the move to v2 should be a
> two-step process, with the uncontroversial "optionally use PCRE v2 for
> speed and robustness" first.
>
> Once enough users like it (and speaking for myself, once I heard from
> enough users how good it is so that I can justify to set aside enough time
> to support PCRE v2 in MSYS2), you will find it much smoother sailing to go
> to phase 2.

The v2 API is not a user-visible change. It'll help performance, and
over the long term subject users to fewer internal PCRE bugs in v1
that'll never get fixed.

That's pretty much a textbook example of the sort of thing users will
never even notice, but which is a good thing to do anyway, users don't
notice most of the incremental performance improvements made to git,
but they matter to them in the aggregate.

So I think if your criteria for working on integrating v2 is users
noticing it elsewhere and asking you for it you'll likely 

Re: [PATCH v3 02/25] winansi: avoid use of uninitialized value

2017-05-04 Thread Johannes Schindelin
Hi René,

On Wed, 3 May 2017, René Scharfe wrote:

> Am 02.05.2017 um 18:01 schrieb Johannes Schindelin:
> > 
> > diff --git a/compat/winansi.c b/compat/winansi.c
> > index 793420f9d0d..fd6910746c8 100644
> > --- a/compat/winansi.c
> > +++ b/compat/winansi.c
> > @@ -105,6 +105,8 @@ static int is_console(int fd)
> >if (!fd) {
> > if (!GetConsoleMode(hcon, ))
> > return 0;
> > +   sbi.wAttributes = FOREGROUND_BLUE | FOREGROUND_GREEN |
> > +   FOREGROUND_RED;
> >} else if (!GetConsoleScreenBufferInfo(hcon, ))
> > return 0;
> >   
> 
> So is_console is called with fd being 1 (stdout), 2 (stderr) and 0
> (stdin), in that order.

Correct. I guess this is the important part missing from the commit
message.

Oh, I also saw that I talked about stdout in the commit message, but !fd
tests for stdin!

> If the first two calls abort early for some reason we may end up here.

Yep. "some reason" being: there is no console attached to stdout nor
stderr.

> The added code is for white text on black background.  An alias for that
> combination, FOREGROUND_ALL, is defined a few lines down; for a minimal
> fix it's not worth moving it up.  attr and plain_attr are both
> initialized to sbi.wAttributes.

Exactly.

> That as a bit more complicated than it looked initially.  The order of
> calls is important, "stdout" in the commit message includes stderr as
> well and it doesn't just affect plain_attr.

Right, it also affects the "current" attributes.

> Would a value of 0 (black text on black background) suffice if only
> stdin is connected to a console?  Colors don't matter if there is
> nothing to see, right?

I think that would make it both easier to understand the patch and to
catch regressions in case anybody feels the order of the is_console()
calls should be changed...

This is my current squash! commit (the original commit message will be
replaced by the commit message body of this commit):

-- snipsnap --
Subject: [PATCH] squash! winansi: avoid use of uninitialized value

winansi: avoid use of uninitialized value

To initialize the foreground color attributes of "plain text", our ANSI
emulation tries to infer them from the currently attached console while
running the is_console() function. This function first tries to detect any
console attached to stdout, then it is called with stderr.

If neither stdout nor stderr has any console attached, it does not
actually matter what we use for "plain text" attributes, as we never need
to output any text to any console in that case.

However, after working on stdout and stderr, is_console() is called with
stdin, and it still tries to initialize the "plain text" attributes if
they had not been initialized earlier. In this case, we cannot detect any
attributes, and we used an uninitialized value for them.

Naturally, Coverity complained about this use case because it could not
reason about the code deeply enough to figure out that we do not even use
those attributes in that case.

Let's just initialize the value to 0 in that case, both to avoid future
Coverity reports, and to help catch future regressions in case anybody
changes the order of the is_console() calls (which would make the text
black on black).

Signed-off-by: Johannes Schindelin 
---
 compat/winansi.c | 9 +++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/compat/winansi.c b/compat/winansi.c
index 861b79d8c31..a11a0f16d27 100644
--- a/compat/winansi.c
+++ b/compat/winansi.c
@@ -105,8 +105,13 @@ static int is_console(int fd)
if (!fd) {
if (!GetConsoleMode(hcon, ))
return 0;
-   sbi.wAttributes = FOREGROUND_BLUE | FOREGROUND_GREEN |
-   FOREGROUND_RED;
+   /*
+* This code path is only reached if there is no console
+* attached to stdout/stderr, i.e. we will not need to output
+* any text to any console, therefore we might just as well
+* use black as foreground color.
+*/
+   sbi.wAttributes = 0;
} else if (!GetConsoleScreenBufferInfo(hcon, ))
return 0;
 
-- 
2.12.2.windows.2.800.gede8f145e06


Re: CYGWIN git cannot install python packages with pip

2017-05-04 Thread ankostis
On 4 May 2017 at 11:47, Adam Dinwoodie  wrote:
> Confirmed: the Cygwin project as a general rule doesn't support this
> sort of mixing of Windows and Cygwin tools.  Either use Python and Git
> packages both provided by Cygwin, or both provided by Windows.
>
> Mixing and matching does work sometimes -- as was apparently the case
> with Cygwin Git v2.8.3-1 -- but it requires care and you're generally on
> your own with it.

MSYS2's git-2.12.1 also works fine, so there must be something
different in this build of git that breaks this long-standing capability.

Judging from the error-message, it somehow concatenates input & output
paths.
Isn't this something to research about?

Regards, for your continuous effort,
  Kostis


RE: [PATCH v2 1/3] usability: don't ask questions if no reply is required

2017-05-04 Thread Kerry, Richard

My point was to ensure that where English is used on-screen it should make 
sense, which in this particular case it didn't (a French idiom which, on using 
an automatic translator, didn't make sense in English).  The same of course 
applies to other languages used on-screen.

I agree about ensuring that the application doesn't elicit a response that it 
won't, or can't, actually handle.  A rhetorical question is fine, so long as it 
is clear that the program won't accept any further input.

Though I don't agree about the issue of the length of words, as presented to a 
non-native speaker.  Sometimes a longer word can be very specific in its 
meaning, and can be looked up in a dictionary if the reader is not familiar 
with it.  Sometimes using shorter words can result in a less clear meaning, or 
perhaps be an idiomatic usage, which might be missed by a non-native speaker.

Regards,
Richard.




Richard Kerry
BNCS Engineer, SI SOL Telco & Media Vertical Practice
T: +44 (0)20 3618 2669
M: +44 (0)7812 325518
4 Triton Square, Regent’s Place, London NW1 3HG
richard.ke...@atos.net


This e-mail and the documents attached are confidential and intended solely for 
the addressee; it may also be privileged. If you receive this e-mail in error, 
please notify the sender immediately and destroy it. As its integrity cannot be 
secured on the Internet, the Atos group liability cannot be triggered for the 
message content. Although the sender endeavours to maintain a computer 
virus-free network, the sender does not warrant that this transmission is 
virus-free and will not be liable for any damages resulting from any virus 
transmitted.


From: Ævar Arnfjörð Bjarmason [ava...@gmail.com]
Sent: 04 May 2017 10:09
To: Kerry, Richard
Cc: git@vger.kernel.org
Subject: Re: [PATCH v2 1/3] usability: don't ask questions if no reply is 
required

On Thu, May 4, 2017 at 10:52 AM, Kerry, Richard  wrote:
>
> May I suggest that " The most approaching commands" doesn't make much sense 
> as English (I don't think a command can "approach").
> Perhaps it should be " The most appropriate commands".

I had the same concern, saying "appropriate" is IMO also confusing.
The point of this UI is not to point out what you should be running,
which "appropriate" implies, but just "we couldn't find what you
meant, did you mean one of these?".

I think nothing needs to change here. The whole premise here is that a
program should never ask a question when you can't give an answer, I
think that's nonsense. There's such a thing as a rhetorical question,
and sometimes using that form is the most obvious & succinct way to
put things.

Which is not to say that phrasing these things as a non-question can't
be better, but the suggestions so far just seem more complex.

Also keep in mind that a huge part of the user base for git using the
English UI consists of non-native speakers, and when in doubt we
should definitely be picking simpler English like "did you mean?" v.s.
alternatives with >10 character more obscure words.

> Richard Kerry
> BNCS Engineer, SI SOL Telco & Media Vertical Practice
>
> T: +44 (0)20 3618 2669
> M: +44 (0)7812 325518
> Lync: +44 (0) 20 3618 0778
> Room G300, Stadium House, Wood Lane, London, W12 7TA
> richard.ke...@atos.net
>
>
>
>
> -Original Message-
> From: git-ow...@vger.kernel.org [mailto:git-ow...@vger.kernel.org] On Behalf 
> Of Jean-Noel Avila
> Sent: Wednesday, May 03, 2017 10:07 PM
> To: git@vger.kernel.org
> Cc: rashmipa...@gmail.com; Jean-Noel Avila 
> Subject: [PATCH v2 1/3] usability: don't ask questions if no reply is required
>
> There has been a bug report by a corporate user that stated that "spelling 
> mistake of stash followed by a yes prints character 'y'
> infinite times."
>
> This analysis was false. When the spelling of a command contains errors, the 
> git program tries to help the user by providing candidates which are close to 
> the unexisting command. E.g Git prints the
> following:
>
> git: 'stahs' is not a git command. See 'git --help'.
> Did you mean this?
>
> stash
>
> and then exits.
>
> The problem with this hint is that it is not formally indicated as an hint 
> and the user is in fact encouraged to reply to the question, whereas the Git 
> command is already finished.
>
> The user was unlucky enough that it was the command he was looking for, and 
> replied "yes" on the command line, effectively launching the `yes` program.
>
> The initial error is that the Git programs, when launched in command-line 
> mode (without interaction) must not ask questions, because these questions 
> would normally require a user input as a reply while they won't handle 
> indeed. That's a source of confusion on UX level.
>
> To improve the general usability of the Git suite, the following rule was 
> applied:
>
> if the sentence
>  * appears in a non-interactive session
>  * is printed last before 

[PATCH v2 7/7] t4051: mark supporting files as requiring LF-only line endings

2017-05-04 Thread Johannes Schindelin
The test t4051-diff-function-context.sh passes on Linux when
core.autocrlf=true even without marking its support files as LF-only,
but they fail when core.autocrlf=true in Git for Windows' SDK.

The reason is that `grep ... >file.c.new` will keep CR/LF line endings
on Linux (obviously treating CRs as if they were regular characters),
but will be converted to LF-only line endings with MSYS2's grep that is
used in Git for Windows.

As we do not want to validate the way the available `grep` works, let's
just mark the input as LF-only and move on.

Reviewed-by: Jonathan Nieder 
Signed-off-by: Johannes Schindelin 
---
 t/.gitattributes | 1 +
 1 file changed, 1 insertion(+)

diff --git a/t/.gitattributes b/t/.gitattributes
index 3525ca43f30..bdd82cf31f7 100644
--- a/t/.gitattributes
+++ b/t/.gitattributes
@@ -5,6 +5,7 @@ t[0-9][0-9][0-9][0-9]/* -whitespace
 /t4034/*/* eol=lf
 /t4013/* eol=lf
 /t4018/* eol=lf
+/t4051/* eol=lf
 /t4100/* eol=lf
 /t4101/* eol=lf
 /t4109/* eol=lf
-- 
2.12.2.windows.2.800.gede8f145e06


[PATCH v2 6/7] Fix the remaining tests that failed with core.autocrlf=true

2017-05-04 Thread Johannes Schindelin
The test suite is mainly developed on Linux and MacOSX, which is the
reason that nobody thought to mark files as LF-only as needed.

The symptom is a test suite that fails left and right when being checked
out using Git for Windows (which defaults to core.autocrlf=true).

Mostly, the problems stem from Git's (LF-only) output being compared to
hard-coded files that are checked out with line endings according to
core.autocrlf (which is of course incorrect).

Note: the test suite also uses the t/README file as well as the COPYING
file in t/diff-lib/, expecting LF-only line endings explicitly and
failing if that assumption does not hold true. That is why we mark them
as LF-only in the .gitattributes, too.

This patch can be validated even on Linux by using this cadence:

git config core.autocrlf true
rm .git/index && git stash
make -j15 DEVELOPER=1 test

Reviewed-by: Jonathan Nieder 
Signed-off-by: Johannes Schindelin 
---
 t/.gitattributes | 20 +++-
 1 file changed, 19 insertions(+), 1 deletion(-)

diff --git a/t/.gitattributes b/t/.gitattributes
index 2d44088f56e..3525ca43f30 100644
--- a/t/.gitattributes
+++ b/t/.gitattributes
@@ -1,2 +1,20 @@
 t[0-9][0-9][0-9][0-9]/* -whitespace
-t0110/url-* binary
+/t0110/url-* binary
+/t3900/*.txt eol=lf
+/t3901/*.txt eol=lf
+/t4034/*/* eol=lf
+/t4013/* eol=lf
+/t4018/* eol=lf
+/t4100/* eol=lf
+/t4101/* eol=lf
+/t4109/* eol=lf
+/t4110/* eol=lf
+/t4135/* eol=lf
+/t4211/* eol=lf
+/t4252/* eol=lf
+/t5100/* eol=lf
+/t5515/* eol=lf
+/t556x_common eol=lf
+/t7500/* eol=lf
+/t8005/*.txt eol=lf
+/t9*/*.dump eol=lf
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v2 3/7] completion: mark bash script as LF-only

2017-05-04 Thread Johannes Schindelin
Without this change, the completion script does not work, as Bash expects
its scripts to have line feeds as end-of-line markers (this is
particularly prominent in quoted multi-line strings, where carriage
returns would slip into the strings as verbatim characters otherwise).

This change is required to let t9902-completion pass when Git's source
code is checked out with `core.autocrlf = true`.

Reviewed-by: Jonathan Nieder 
Signed-off-by: Johannes Schindelin 
---
 contrib/completion/.gitattributes | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 contrib/completion/.gitattributes

diff --git a/contrib/completion/.gitattributes 
b/contrib/completion/.gitattributes
new file mode 100644
index 000..19116944c15
--- /dev/null
+++ b/contrib/completion/.gitattributes
@@ -0,0 +1 @@
+*.bash eol=lf
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v2 5/7] t4003, t4005, t4007 & t4008: handle CR/LF in t/README & t/diff-lib/README

2017-05-04 Thread Johannes Schindelin
Seeing as Git originates from the Linux ecosystem, it is understandable
that the assumption of Unix line endings is deeply ingrained in Git's
source code as well as its test suite.

However, we must not force files that are otherwise unrelated to tests
to have Unix line endings just to appease test scripts that may use
them. Instead, the test scripts should be indifferent what line endings
files outside their corresponding t/ directories have.

As t4003-diff-rename-1.sh, t4005-diff-rename-2.sh, t4007-rename-3.sh &
t4008-diff-break-rewrite.sh make hard-coded assumptions about the SHA-1
of the tested files, and as those files' contents originate from outside
this script's sphere of authority, it must handle CR/LF line endings in
those files gracefully. We do that by simply stripping out CR bytes.

Signed-off-by: Johannes Schindelin 
---
 t/t4003-diff-rename-1.sh  | 4 ++--
 t/t4005-diff-rename-2.sh  | 4 ++--
 t/t4007-rename-3.sh   | 2 +-
 t/t4008-diff-break-rewrite.sh | 4 ++--
 4 files changed, 7 insertions(+), 7 deletions(-)

diff --git a/t/t4003-diff-rename-1.sh b/t/t4003-diff-rename-1.sh
index df2accb6555..c3e0a3c3fc9 100755
--- a/t/t4003-diff-rename-1.sh
+++ b/t/t4003-diff-rename-1.sh
@@ -11,7 +11,7 @@ test_description='More rename detection
 
 test_expect_success \
 'prepare reference tree' \
-'cat "$TEST_DIRECTORY"/diff-lib/COPYING >COPYING &&
+'tr -d "\015" <"$TEST_DIRECTORY"/diff-lib/COPYING >COPYING &&
  echo frotz >rezrov &&
 git update-index --add COPYING rezrov &&
 tree=$(git write-tree) &&
@@ -99,7 +99,7 @@ test_expect_success \
 
 test_expect_success \
 'prepare work tree once again' \
-'cat "$TEST_DIRECTORY"/diff-lib/COPYING >COPYING &&
+'tr -d "\015" <"$TEST_DIRECTORY"/diff-lib/COPYING >COPYING &&
  git update-index --add --remove COPYING COPYING.1'
 
 # tree has COPYING and rezrov.  work tree has COPYING and COPYING.1,
diff --git a/t/t4005-diff-rename-2.sh b/t/t4005-diff-rename-2.sh
index 135addbfbda..f1641c35ee2 100755
--- a/t/t4005-diff-rename-2.sh
+++ b/t/t4005-diff-rename-2.sh
@@ -11,7 +11,7 @@ test_description='Same rename detection as t4003 but testing 
diff-raw.
 
 test_expect_success \
 'prepare reference tree' \
-'cat "$TEST_DIRECTORY"/diff-lib/COPYING >COPYING &&
+'tr -d "\015" <"$TEST_DIRECTORY"/diff-lib/COPYING >COPYING &&
  echo frotz >rezrov &&
 git update-index --add COPYING rezrov &&
 tree=$(git write-tree) &&
@@ -71,7 +71,7 @@ test_expect_success \
 
 test_expect_success \
 'prepare work tree once again' \
-'cat "$TEST_DIRECTORY"/diff-lib/COPYING >COPYING &&
+'tr -d "\015" <"$TEST_DIRECTORY"/diff-lib/COPYING >COPYING &&
  git update-index --add --remove COPYING COPYING.1'
 
 git diff-index -C --find-copies-harder $tree >current
diff --git a/t/t4007-rename-3.sh b/t/t4007-rename-3.sh
index dae327fabbf..0157fde5503 100755
--- a/t/t4007-rename-3.sh
+++ b/t/t4007-rename-3.sh
@@ -11,7 +11,7 @@ test_description='Rename interaction with pathspec.
 
 test_expect_success 'prepare reference tree' '
mkdir path0 path1 &&
-   cp "$TEST_DIRECTORY"/diff-lib/COPYING path0/COPYING &&
+   tr -d "\015" <"$TEST_DIRECTORY"/diff-lib/COPYING >path0/COPYING &&
git update-index --add path0/COPYING &&
tree=$(git write-tree) &&
echo $tree
diff --git a/t/t4008-diff-break-rewrite.sh b/t/t4008-diff-break-rewrite.sh
index 9dd1bc5e162..5af4fa6aadb 100755
--- a/t/t4008-diff-break-rewrite.sh
+++ b/t/t4008-diff-break-rewrite.sh
@@ -25,8 +25,8 @@ Further, with -B and -M together, these should turn into two 
renames.
 . "$TEST_DIRECTORY"/diff-lib.sh ;# test-lib chdir's into trash
 
 test_expect_success setup '
-   cat "$TEST_DIRECTORY"/diff-lib/README >file0 &&
-   cat "$TEST_DIRECTORY"/diff-lib/COPYING >file1 &&
+   tr -d "\015" <"$TEST_DIRECTORY"/diff-lib/README >file0 &&
+   tr -d "\015" <"$TEST_DIRECTORY"/diff-lib/COPYING >file1 &&
git update-index --add file0 file1 &&
git tag reference $(git write-tree)
 '
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v2 4/7] t3901: move supporting files into t/t3901/

2017-05-04 Thread Johannes Schindelin
The current convention is to either generate files on the fly in tests,
or to use supporting files taken from a t/t/ directory (where 
matches the test's number, or the number of the test from which we
borrow supporting files).

The test t3901-i18n-patch.sh was obviously introduced before that
convention was in full swing, hence its supporting files still lived in
t/t3901-8859-1.txt and t/t3901-utf8.txt, respectively.

Let's adjust to the current convention.

Signed-off-by: Johannes Schindelin 
---
 t/t0203-gettext-setlocale-sanity.sh  |  4 ++--
 t/t3901-i18n-patch.sh| 38 
 t/{t3901-8859-1.txt => t3901/8859-1.txt} |  0
 t/{t3901-utf8.txt => t3901/utf8.txt} |  0
 t/t9350-fast-export.sh   |  2 +-
 t/t9500-gitweb-standalone-no-errors.sh   |  4 ++--
 6 files changed, 24 insertions(+), 24 deletions(-)
 rename t/{t3901-8859-1.txt => t3901/8859-1.txt} (100%)
 rename t/{t3901-utf8.txt => t3901/utf8.txt} (100%)

diff --git a/t/t0203-gettext-setlocale-sanity.sh 
b/t/t0203-gettext-setlocale-sanity.sh
index a2124600811..71b0d74b4dd 100755
--- a/t/t0203-gettext-setlocale-sanity.sh
+++ b/t/t0203-gettext-setlocale-sanity.sh
@@ -8,7 +8,7 @@ test_description="The Git C functions aren't broken by 
setlocale(3)"
 . ./lib-gettext.sh
 
 test_expect_success 'git show a ISO-8859-1 commit under C locale' '
-   . "$TEST_DIRECTORY"/t3901-8859-1.txt &&
+   . "$TEST_DIRECTORY"/t3901/8859-1.txt &&
test_commit "iso-c-commit" iso-under-c &&
git show >out 2>err &&
! test -s err &&
@@ -16,7 +16,7 @@ test_expect_success 'git show a ISO-8859-1 commit under C 
locale' '
 '
 
 test_expect_success GETTEXT_LOCALE 'git show a ISO-8859-1 commit under a UTF-8 
locale' '
-   . "$TEST_DIRECTORY"/t3901-8859-1.txt &&
+   . "$TEST_DIRECTORY"/t3901/8859-1.txt &&
test_commit "iso-utf8-commit" iso-under-utf8 &&
LANGUAGE=is LC_ALL="$is_IS_locale" git show >out 2>err &&
! test -s err &&
diff --git a/t/t3901-i18n-patch.sh b/t/t3901-i18n-patch.sh
index f663d567c8a..923eb01f0ea 100755
--- a/t/t3901-i18n-patch.sh
+++ b/t/t3901-i18n-patch.sh
@@ -31,7 +31,7 @@ test_expect_success setup '
 
# use UTF-8 in author and committer name to match the
# i18n.commitencoding settings
-   . "$TEST_DIRECTORY"/t3901-utf8.txt &&
+   . "$TEST_DIRECTORY"/t3901/utf8.txt &&
 
test_tick &&
echo "$GIT_AUTHOR_NAME" >mine &&
@@ -55,7 +55,7 @@ test_expect_success setup '
# the second one on the side branch is ISO-8859-1
git config i18n.commitencoding ISO8859-1 &&
# use author and committer name in ISO-8859-1 to match it.
-   . "$TEST_DIRECTORY"/t3901-8859-1.txt
+   . "$TEST_DIRECTORY"/t3901/8859-1.txt
fi &&
test_tick &&
echo Yet another >theirs &&
@@ -100,7 +100,7 @@ test_expect_success 'rebase (U/U)' '
 
# The result will be committed by GIT_COMMITTER_NAME --
# we want UTF-8 encoded name.
-   . "$TEST_DIRECTORY"/t3901-utf8.txt &&
+   . "$TEST_DIRECTORY"/t3901/utf8.txt &&
git checkout -b test &&
git rebase master &&
 
@@ -110,7 +110,7 @@ test_expect_success 'rebase (U/U)' '
 test_expect_success 'rebase (U/L)' '
git config i18n.commitencoding UTF-8 &&
git config i18n.logoutputencoding ISO8859-1 &&
-   . "$TEST_DIRECTORY"/t3901-utf8.txt &&
+   . "$TEST_DIRECTORY"/t3901/utf8.txt &&
 
git reset --hard side &&
git rebase master &&
@@ -122,7 +122,7 @@ test_expect_success !MINGW 'rebase (L/L)' '
# In this test we want ISO-8859-1 encoded commits as the result
git config i18n.commitencoding ISO8859-1 &&
git config i18n.logoutputencoding ISO8859-1 &&
-   . "$TEST_DIRECTORY"/t3901-8859-1.txt &&
+   . "$TEST_DIRECTORY"/t3901/8859-1.txt &&
 
git reset --hard side &&
git rebase master &&
@@ -135,7 +135,7 @@ test_expect_success !MINGW 'rebase (L/U)' '
# to get ISO-8859-1 results.
git config i18n.commitencoding ISO8859-1 &&
git config i18n.logoutputencoding UTF-8 &&
-   . "$TEST_DIRECTORY"/t3901-8859-1.txt &&
+   . "$TEST_DIRECTORY"/t3901/8859-1.txt &&
 
git reset --hard side &&
git rebase master &&
@@ -148,7 +148,7 @@ test_expect_success 'cherry-pick(U/U)' '
 
git config i18n.commitencoding UTF-8 &&
git config i18n.logoutputencoding UTF-8 &&
-   . "$TEST_DIRECTORY"/t3901-utf8.txt &&
+   . "$TEST_DIRECTORY"/t3901/utf8.txt &&
 
git reset --hard master &&
git cherry-pick side^ &&
@@ -163,7 +163,7 @@ test_expect_success !MINGW 'cherry-pick(L/L)' '
 
git config i18n.commitencoding ISO8859-1 &&
git config i18n.logoutputencoding ISO8859-1 &&
-   . "$TEST_DIRECTORY"/t3901-8859-1.txt &&
+   . "$TEST_DIRECTORY"/t3901/8859-1.txt &&
 
git reset --hard master &&
  

[PATCH v2 2/7] git-new-workdir: mark script as LF-only

2017-05-04 Thread Johannes Schindelin
Bash does not handle scripts with CR/LF line endings correctly, therefore
they *have* to be forced to LF-only line endings.

Funnily enough, this fixes t3000-ls-files-others and
t1021-rerere-in-workdir when git.git was checked out with
core.autocrlf=true, as these test still use git-new-workdir (once `git
worktree` is no longer marked as experimental, both scripts probably
want to be ported to using that command instead).

Reviewed-by: Jonathan Nieder 
Signed-off-by: Johannes Schindelin 
---
 contrib/workdir/.gitattributes | 1 +
 1 file changed, 1 insertion(+)
 create mode 100644 contrib/workdir/.gitattributes

diff --git a/contrib/workdir/.gitattributes b/contrib/workdir/.gitattributes
new file mode 100644
index 000..1f78c5d1bd3
--- /dev/null
+++ b/contrib/workdir/.gitattributes
@@ -0,0 +1 @@
+/git-new-workdir eol=lf
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v2 1/7] Fix build with core.autocrlf=true

2017-05-04 Thread Johannes Schindelin
On Windows, the default line endings are denoted by a Carriage Return
byte followed by a Line Feed byte, while Linux and MacOSX use a single
Line Feed byte to denote a line ending.

To help with this situation, Git introduced several mechanisms over the
last decade, most prominently the `core.autocrlf` setting.

Sometimes, however, a single setting is incorrect, e.g. when certain
files in the source code are to be consumed by software that can handle
only LF line endings, while other files can use whatever is appropriate
for the current platform.

To allow for that, Git added the `eol` option to its .gitattributes
handling, expecting every user of Git to mark their source code
appropriately.

Bash assumes that line-endings of scripts are denoted by a single Line
Feed byte. Therefore, shell scripts in Git's source code are one example
where that `eol=lf` option is *required*.

When generating common-cmds.h, the Unix tools we use generally operate on
the assumption that input and output deliminate their lines using LF-only
line endings. Consequently, they would happily copy the CR byte verbatim
into the strings in common-cmds.h, which in turn makes the C preprocessor
barf (that interprets them as MacOS-style line endings). Therefore, we
have to mark the input files as LF-only: command-list.txt and
Documentation/git-*.txt.

Quite a bit belatedly, this patch brings Git's own source code in line
with those expectations by setting those attributes to allow for a
correct build even when core.autocrlf=true.

This patch can be validated even on Linux, by using this cadence:

git config core.autocrlf true
rm .git/index && git stash
make -j15 DEVELOPER=1

Reviewed-by: Jonathan Nieder 
Signed-off-by: Johannes Schindelin 
---
 .gitattributes | 8 +++-
 git-gui/.gitattributes | 1 +
 2 files changed, 8 insertions(+), 1 deletion(-)

diff --git a/.gitattributes b/.gitattributes
index 320e33c327c..8ce9c6b 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,9 @@
 * whitespace=!indent,trail,space
 *.[ch] whitespace=indent,trail,space diff=cpp
-*.sh whitespace=indent,trail,space
+*.sh whitespace=indent,trail,space eol=lf
+*.perl eol=lf
+*.pm eol=lf
+/Documentation/git-*.txt eol=lf
+/command-list.txt eol=lf
+/GIT-VERSION-GEN eol=lf
+/mergetools/* eol=lf
diff --git a/git-gui/.gitattributes b/git-gui/.gitattributes
index 33d07c06bd9..59cd41dbff7 100644
--- a/git-gui/.gitattributes
+++ b/git-gui/.gitattributes
@@ -2,3 +2,4 @@
 *   encoding=US-ASCII
 git-gui.sh  encoding=UTF-8
 /po/*.poencoding=UTF-8
+/GIT-VERSION-GEN eol=lf
-- 
2.12.2.windows.2.800.gede8f145e06




[PATCH v2 0/7] Abide by our own rules regarding line endings

2017-05-04 Thread Johannes Schindelin
[Cc:ing Jonathan Nieder, who reviewed the first iteration of this patch
series, and Jeff Hostetler who is guilty of prodding me into finishing
this patch series in the first place.]

Over the past decade, there have been a couple of attempts to remedy the
situation regarding line endings (Windows/DOS line endings are
traditionally CR/LF even if many current applications can handle LF,
too, Linux/MacOSX/*BSD/Unix uses LF-only line handlings, and old MacOS
software used to use CR-only line endings).

The current idea seems to be that the default line endings differ
depending on the platform, and for files that should be exempt from that
default, we strongly recommend using .gitattributes to define what the
source code requires.

It is time to heed our own recommendation and mark the files that
require LF-only line endings in our very own .gitattributes.

For starters, those files include shell scripts: the most prevalent
shell interpreter in use (and certainly used in Git for Windows) is
Bash, and Bash does not handle CR/LF line endings gracefully.

There are even two shell scripts that are used in the test suite even if
they are not technically supposed to be part of core Git, as indicated
by their habitat inside contrib/: git-new-workdir and
git-completion.bash.

Related to shell scripts: when generating common-cmds.h, we use tools
that generally operate on the assumption that input and output
deliminate their lines using LF-only line endings. Consequently, they
would happily copy the CR byte verbatim into the strings in
common-cmds.h, which in turn makes the C preprocessor barf (that
interprets them as MacOS-style line endings).

Further, the most obvious required fixes concern tests' support files
committed verbatim, to be compared to Git's output, which is always
LF-only.

There are a few SVN dump files, too, supporting the Subversion-related
tests, requiring LF-only line endings.

Finally, the test suite makes use of text files that are not obviously
supporting tests, such as t/README, comparing them to LF-only versions
(and consequently failing if the files have been checked out with CR/LF
line endings). Therefore we convert those to LF-only versions when
the test hard-codes that expectation.

Without these fixes, Git will fail to build and pass the test suite, as
can be verified even on Linux using this cadence:

git config core.autocrlf true
rm .git/index && git stash
make DEVELOPER=1 -j15 test

Note: I separated out the change marking t/t4051/* as LF-only into an
individual commit for one reason: it would appear that the test passes
if checked out using core.autocrlf=true on Linux, but on Windows the
test fails. In that respect, this test is special, as the other changes
can be easily validated even without using Windows.

Changes since v1:

- the t/t3901-*.txt files have been moved into t/t3901/, in line with
  the de facto convention all other tests follow.

- instead of marking files outside of t/t[0-9]* as LF-only, the
  following tests are now adjusted *not* to expect LF-only line endings
  in files they do not control: t4003, t4005, t4007 & t4008.

- as a consequence, files used in the tests but living outside
  t/t[0-9]*, such as COPYING, etc, are no longer marked as LF-only.

- clarified in the commit message of the last patch why t4051 passes on
  Linux with core.autocrlf=true but not on Windows.


Johannes Schindelin (7):
  Fix build with core.autocrlf=true
  git-new-workdir: mark script as LF-only
  completion: mark bash script as LF-only
  t3901: move supporting files into t/t3901/
  t4003, t4005, t4007 & t4008: handle CR/LF in t/README &
t/diff-lib/README
  Fix the remaining tests that failed with core.autocrlf=true
  t4051: mark supporting files as requiring LF-only line endings

 .gitattributes   |  8 ++-
 contrib/completion/.gitattributes|  1 +
 contrib/workdir/.gitattributes   |  1 +
 git-gui/.gitattributes   |  1 +
 t/.gitattributes | 21 +-
 t/t0203-gettext-setlocale-sanity.sh  |  4 ++--
 t/t3901-i18n-patch.sh| 38 
 t/{t3901-8859-1.txt => t3901/8859-1.txt} |  0
 t/{t3901-utf8.txt => t3901/utf8.txt} |  0
 t/t4003-diff-rename-1.sh |  4 ++--
 t/t4005-diff-rename-2.sh |  4 ++--
 t/t4007-rename-3.sh  |  2 +-
 t/t4008-diff-break-rewrite.sh|  4 ++--
 t/t9350-fast-export.sh   |  2 +-
 t/t9500-gitweb-standalone-no-errors.sh   |  4 ++--
 15 files changed, 61 insertions(+), 33 deletions(-)
 create mode 100644 contrib/completion/.gitattributes
 create mode 100644 contrib/workdir/.gitattributes
 rename t/{t3901-8859-1.txt => t3901/8859-1.txt} (100%)
 rename t/{t3901-utf8.txt => t3901/utf8.txt} (100%)


base-commit: d2bbb7c2bcf6e77ebfcabf4e12110fe6d5c91de6
Published-As: https://github.com/dscho/git/releases/tag/lf-attrs-v2

Re: CYGWIN git cannot install python packages with pip

2017-05-04 Thread Adam Dinwoodie
On Wed, May 03, 2017 at 04:32:15PM +0200, Johannes Schindelin wrote:
> On Wed, 3 May 2017, ankostis wrote:
> 
> > On 3 May 2017 11:47, Johannes Schindelin wrote:
> > > On Tue, 2 May 2017, ankostis wrote:
> > >
> > >> On Windows, with Cygwin-git 02.12.2-1 the python command:
> > >> [...]
> > >
> > > You forgot to mention what python/pip you use (is it the Cygwin one,
> > > or a standalone one?). Intentional?
> > 
> > I have tested it on using WinPython[1] 3.6.1 & 3.5.3.
> > WinPython is built out of the "standard" python on Windows.
> 
> I am afraid that you run into path conversion problems here. /cygdrive/d/
> is something Cygwin understands, but not WinPython nor Git for Windows.
> 
> It appears to be difficult to impossible to mix Cygwin with non-Cygwin
> executables...

Confirmed: the Cygwin project as a general rule doesn't support this
sort of mixing of Windows and Cygwin tools.  Either use Python and Git
packages both provided by Cygwin, or both provided by Windows.

Mixing and matching does work sometimes -- as was apparently the case
with Cygwin Git v2.8.3-1 -- but it requires care and you're generally on
your own with it.

Adam


Re: [PATCH v2 1/3] usability: don't ask questions if no reply is required

2017-05-04 Thread Jean-Noël AVILA
Le jeudi 4 mai 2017, 08:52:43 CEST Kerry, Richard a écrit :
> 
> May I suggest that " The most approaching commands" doesn't make much sense 
> as English (I don't think a command can "approach").
> Perhaps it should be " The most appropriate commands".
> 
> 
> Regards,
> Richard.
> 



Thank you for your proposition. "approaching" is a frenchism  doubled with 
google translate (sorry!). Maybe "similar" would also work.
  


Re: [PATCH] __git_ps1: Don't kill shell if user types `set -e`

2017-05-04 Thread Tom Hale
On 04/05/17 16:16, Ævar Arnfjörð Bjarmason wrote:

> What's the real use-case here? If you do "set -e" in your shell you
> also get e.g.:
> 
> $ ls -l blah
> ls: cannot access blah: No such file or directory
> === Command terminated with exit status 2 (Thu May  4 11:16:03 2017) ===
> 
> I.e. any little failure will terminate your shell, are you actually
> running a shell like this? For what purpose?

If I want to copy and paste a list of commands and have the execution
stop at a failure, I type "bash" then "set -e", then paste the commands.

It saves me creating a script file and then removing it later.

-- 
Tom Hale


Re: [PATCH] archive-tar: fix a sparse 'constant too large' warning

2017-05-04 Thread Johannes Schindelin
Hi Ramsay,

On Thu, 4 May 2017, Ramsay Jones wrote:

> diff --git a/archive-tar.c b/archive-tar.c
> index 319a5b1c7..6dddc0cff 100644
> --- a/archive-tar.c
> +++ b/archive-tar.c
> @@ -33,7 +33,7 @@ static int write_tar_filter_archive(const struct archiver 
> *ar,
>  #if TIME_MAX == 0x
>  #define USTAR_MAX_MTIME TIME_MAX
>  #else
> -#define USTAR_MAX_MTIME 0777UL
> +#define USTAR_MAX_MTIME 0777ULL
>  #endif
>  

Funny. This problem was pointed out by Hannes Sixt (IIRC) and I fixed this
very thing in v6.

Except I did not. I changed the wrong constant! Instead of
USTAR_MAX_MTIME, I adjusted USTAR_MAX_SIZE. D'oh.

I just saw that my patch series already hit `next`, so I fear you are
right that we need a follow-up patch. Maybe we want this diff, though?

-- snipsnap --
diff --git a/archive-tar.c b/archive-tar.c
index 319a5b1c7cd..073e60ebd3c 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -28,12 +28,12 @@ static int write_tar_filter_archive(const struct archiver 
*ar,
 #if ULONG_MAX == 0x
 #define USTAR_MAX_SIZE ULONG_MAX
 #else
-#define USTAR_MAX_SIZE 0777ULL
+#define USTAR_MAX_SIZE 0777UL
 #endif
 #if TIME_MAX == 0x
 #define USTAR_MAX_MTIME TIME_MAX
 #else
-#define USTAR_MAX_MTIME 0777UL
+#define USTAR_MAX_MTIME 0777ULL
 #endif
 
 /* writes out the whole block, but only if it is full */


Re: [PATCH v2] travis-ci: retry if Git for Windows CI returns HTTP error 502 or 503

2017-05-04 Thread Johannes Schindelin
Hi Lars,


On Wed, 3 May 2017, Lars Schneider wrote:

> The Git for Windows CI web app sometimes returns HTTP errors of
> "502 bad gateway" or "503 service unavailable" [1]. We also need to
> check the HTTP content because the GfW web app seems to pass through
> (error) results from other Azure calls with HTTP code 200.
> Wait a little and retry the request if this happens.

Thanks. In theory, it would be better to fix the web app to pass through
also the 502 error code, in practice I have a hard time finding the time
to make it so ;-)

Therefore, I would be very much in favor of the current version of the
patch.

Ciao,
Dscho


Re: [PATCH] __git_ps1: Don't kill shell if user types `set -e`

2017-05-04 Thread Ævar Arnfjörð Bjarmason
On Thu, May 4, 2017 at 10:26 AM, Tom Hale  wrote:
>
> On 17/04/17 11:24, Junio C Hamano wrote:
>> "Tom \"Ravi\" Hale"  writes:
>>
>> > If a user types `set -e` in an interactive shell, and is using __git_ps1
>> > to set
>> > their prompt, the shell will die if the current directory isn't inside a 
>> > git
>> > repository.
>> >
>> Hmph.  So the fix would be something like this?
>>
>>  repo_info="$(git rev-parse --git-dir --is-inside-git-dir \
>>  --is-bare-repository --is-inside-work-tree \
>> - --short HEAD 2>/dev/null)"
>> + --short HEAD 2>/dev/null || :)"
>
> Nope, that would cause the next line
> rev_parse_exit_code="$?"
> to always be assigned 0.
>
> I believe the patch we're after is attached.

I see how this would fix things, but this just seems like a rather
crazy thing for us to support, we can take this patch, but it's going
to take quite some maintenance burden to ensure that this code is set
-e clean going forward, and I don't think we should take a patch like
this without some general support in the regression tests to ensure
that the completion code is set -e clean, otherwise this is going to
very easily regress the next time someone not aware of this patches
it.

What's the real use-case here? If you do "set -e" in your shell you
also get e.g.:

$ ls -l blah
ls: cannot access blah: No such file or directory
=== Command terminated with exit status 2 (Thu May  4 11:16:03 2017) ===

I.e. any little failure will terminate your shell, are you actually
running a shell like this? For what purpose?


Re: PCRE v2 compile error, was Re: What's cooking in git.git (May 2017, #01; Mon, 1)

2017-05-04 Thread Johannes Schindelin
Hi Ævar,

On Wed, 3 May 2017, Ævar Arnfjörð Bjarmason wrote:

> [Just replying to you & Duy in the same mail, easier]

It makes it harder on everybody else, though, as two slightly different
discussion points are conflated now. Also, no single online mail archive
will be able to render the thread correctly (assuming that you edited in
the In-Reply-To header to loop back to Duy's mail).

> On Wed, May 3, 2017 at 11:45 AM, Johannes Schindelin
>  wrote:
> >
> > At this point, I feel that someone should recall into our collective
> > memory what happened when we made a change similar in nature that
> > broke existing build setups: by requiring REG_STARTEND all of a sudden
> > ("you can easily flip the NO_REGEX switch", as if everybody should
> > know about those Makefile flags we have).
> 
> And as a result grep/log -G got faster by default,

Sure. For those developers where the build was not broken.

Software maintenance is always a trade-off, and with software as popular
as Git, maintainers bear a special responsibility to *not* break builds
easily, as it is more likely than not that anybody who wants to build Git
is *unfamiliar* with the specifics.

That is the main reason why we have a configure, even if we try hard to
make things work with a straight `make`: people who are happily oblivious
of our discussions on this here high-volume mailing list will be able to
build Git without even consulting the documentation, just by doing what
they would do with any Unix-based software: ./configure && make

> and more importantly since v2.10.1 which includes your 2f8952250a and
> made a REG_STARTEND engine a hard requirement nobody using git is
> mysteriously going to miss grep results because of some stray \0 in the
> string being matched.

That is a misinterpretation of what the REG_STARTEND flag is supposed to
do. In *some* implementations, REG_STARTEND allows NULs in the haystack.
Some other implementations do not allow that. It is ill-defined.

> I agree that I should drop the patch to make v2 the default from this
> series for now. Clearly it's controversial, and can be considered on
> its own merits once the supporting code is in. I'll do that in the
> next submission, which I'm planning to send after v2.13.0 comes out.

Good. I am really glad that we agree that the move to v2 should be a
two-step process, with the uncontroversial "optionally use PCRE v2 for
speed and robustness" first.

Once enough users like it (and speaking for myself, once I heard from
enough users how good it is so that I can justify to set aside enough time
to support PCRE v2 in MSYS2), you will find it much smoother sailing to go
to phase 2.

Ciao,
Dscho

Re: [PATCH v2 1/3] usability: don't ask questions if no reply is required

2017-05-04 Thread Ævar Arnfjörð Bjarmason
On Thu, May 4, 2017 at 10:52 AM, Kerry, Richard  wrote:
>
> May I suggest that " The most approaching commands" doesn't make much sense 
> as English (I don't think a command can "approach").
> Perhaps it should be " The most appropriate commands".

I had the same concern, saying "appropriate" is IMO also confusing.
The point of this UI is not to point out what you should be running,
which "appropriate" implies, but just "we couldn't find what you
meant, did you mean one of these?".

I think nothing needs to change here. The whole premise here is that a
program should never ask a question when you can't give an answer, I
think that's nonsense. There's such a thing as a rhetorical question,
and sometimes using that form is the most obvious & succinct way to
put things.

Which is not to say that phrasing these things as a non-question can't
be better, but the suggestions so far just seem more complex.

Also keep in mind that a huge part of the user base for git using the
English UI consists of non-native speakers, and when in doubt we
should definitely be picking simpler English like "did you mean?" v.s.
alternatives with >10 character more obscure words.

> Richard Kerry
> BNCS Engineer, SI SOL Telco & Media Vertical Practice
>
> T: +44 (0)20 3618 2669
> M: +44 (0)7812 325518
> Lync: +44 (0) 20 3618 0778
> Room G300, Stadium House, Wood Lane, London, W12 7TA
> richard.ke...@atos.net
>
>
>
>
> -Original Message-
> From: git-ow...@vger.kernel.org [mailto:git-ow...@vger.kernel.org] On Behalf 
> Of Jean-Noel Avila
> Sent: Wednesday, May 03, 2017 10:07 PM
> To: git@vger.kernel.org
> Cc: rashmipa...@gmail.com; Jean-Noel Avila 
> Subject: [PATCH v2 1/3] usability: don't ask questions if no reply is required
>
> There has been a bug report by a corporate user that stated that "spelling 
> mistake of stash followed by a yes prints character 'y'
> infinite times."
>
> This analysis was false. When the spelling of a command contains errors, the 
> git program tries to help the user by providing candidates which are close to 
> the unexisting command. E.g Git prints the
> following:
>
> git: 'stahs' is not a git command. See 'git --help'.
> Did you mean this?
>
> stash
>
> and then exits.
>
> The problem with this hint is that it is not formally indicated as an hint 
> and the user is in fact encouraged to reply to the question, whereas the Git 
> command is already finished.
>
> The user was unlucky enough that it was the command he was looking for, and 
> replied "yes" on the command line, effectively launching the `yes` program.
>
> The initial error is that the Git programs, when launched in command-line 
> mode (without interaction) must not ask questions, because these questions 
> would normally require a user input as a reply while they won't handle 
> indeed. That's a source of confusion on UX level.
>
> To improve the general usability of the Git suite, the following rule was 
> applied:
>
> if the sentence
>  * appears in a non-interactive session
>  * is printed last before exit
>  * is a question addressing the user ("you")
>
> the sentence is turned into affirmative and proposes the option.
>
> The basic rewording of the question sentences has been extended to other 
> spots found in the source.
>
> Requested at https://github.com/git/git-scm.com/issues/999 by rpai1
>
> Signed-off-by: Jean-Noel Avila 
> ---
>  builtin/am.c   | 4 ++--
>  builtin/checkout.c | 2 +-
>  help.c | 4 ++--
>  3 files changed, 5 insertions(+), 5 deletions(-)
>
> diff --git a/builtin/am.c b/builtin/am.c index a95dd8b4e..f5afa438d 100644
> --- a/builtin/am.c
> +++ b/builtin/am.c
> @@ -1312,7 +1312,7 @@ static int parse_mail(struct am_state *state, const 
> char *mail)
> }
>
> if (is_empty_file(am_path(state, "patch"))) {
> -   printf_ln(_("Patch is empty. Was it split wrong?"));
> +   printf_ln(_("Patch is empty. It may have been split wrong."));
> die_user_resolve(state);
> }
>
> @@ -1940,7 +1940,7 @@ static void am_resolve(struct am_state *state)
>
> if (unmerged_cache()) {
> printf_ln(_("You still have unmerged paths in your index.\n"
> -   "Did you forget to use 'git add'?"));
> +   "You might want to use 'git add' on them."));
> die_user_resolve(state);
> }
>
> diff --git a/builtin/checkout.c b/builtin/checkout.c index 
> bfa5419f3..05037b9b6 100644
> --- a/builtin/checkout.c
> +++ b/builtin/checkout.c
> @@ -1287,7 +1287,7 @@ int cmd_checkout(int argc, const char **argv, const 
> char *prefix)
>  */
> if (opts.new_branch && argc == 1)
> die(_("Cannot update paths and switch to branch '%s' 
> at the same time.\n"
> - "Did you intend to checkout '%s' which can not 
> be resolved as commit?"),

  1   2   >