from:"René Scharfe"

Re: bug: git-archive does not use the zip64 extension for archives with more than 16k entries

2015-08-12 Thread René Scharfe


Am 11.08.2015 um 12:40 schrieb Johannes Schauer:

Hi,

for repositories with more than 16k files and folders, git-archive will create
zip files which store the wrong number of entries. That is, it stores the
number of entries modulo 16k. This will break unpackers that do not include
code to support this brokenness.


The limit is rather 65535 entries, isn't it?  The entries field has two 
bytes, and they are used fully.


Which programs are affected? InfoZIP Zip 3.0 and 7-Zip 9.20 seem to 
handle an archive with more entries just fine.  The built-in 
functionality of Windows 10 doesn't.


Besides, 64K entries should be enough for anybody. ;-)

Seriously, though: What kind of repository has that many files and uses 
the ZIP format to distribute snapshots?  Just curious.



Instead, git-archive should use the zip64 extension to handle more than 16k
files and folders correctly.


That seems to be what InfoZIP does and Windows 10 handles it just fine. 
 If lower Windows versions and other popular extractors can unzip such 
archives as well then this might indeed be the way to go.


Thanks,
René

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 3/3] archive-zip: support more than 65535 entries

2015-08-22 Thread René Scharfe

Support more than 65535 entries cleanly by writing a "zip64 end of
central directory record" (with a 64-bit field for the number of
entries) before the usual "end of central directory record" (which
contains only a 16-bit field).  InfoZIP's zip does the same.
Archives with 65535 or less entries are not affected.

Programs that extract all files like InfoZIP's zip and 7-Zip
ignored the field and could extract all files already.  Software
that relies on the ZIP file directory to show a list of contained
files quickly to simulate to normal directory like Windows'
built-in ZIP functionality only saw a subset of the included files.

Windows supports ZIP64 since Vista according to
https://en.wikipedia.org/wiki/Zip_%28file_format%29#ZIP64.

Suggested-by: Johannes Schauer 
Signed-off-by: Rene Scharfe 
---
 archive-zip.c   | 93 +++--
 t/t5004-archive-corner-cases.sh |  2 +-
 2 files changed, 91 insertions(+), 4 deletions(-)

diff --git a/archive-zip.c b/archive-zip.c
index 2a76156..9db4735 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -16,7 +16,9 @@ static unsigned int zip_dir_size;
 
 static unsigned int zip_offset;
 static unsigned int zip_dir_offset;
-static unsigned int zip_dir_entries;
+static uint64_t zip_dir_entries;
+
+static unsigned int max_creator_version;
 
 #define ZIP_DIRECTORY_MIN_SIZE (1024 * 1024)
 #define ZIP_STREAM (1 <<  3)
@@ -86,6 +88,28 @@ struct zip_extra_mtime {
unsigned char _end[1];
 };
 
+struct zip64_dir_trailer {
+   unsigned char magic[4];
+   unsigned char record_size[8];
+   unsigned char creator_version[2];
+   unsigned char version[2];
+   unsigned char disk[4];
+   unsigned char directory_start_disk[4];
+   unsigned char entries_on_this_disk[8];
+   unsigned char entries[8];
+   unsigned char size[8];
+   unsigned char offset[8];
+   unsigned char _end[1];
+};
+
+struct zip64_dir_trailer_locator {
+   unsigned char magic[4];
+   unsigned char disk[4];
+   unsigned char offset[8];
+   unsigned char number_of_disks[4];
+   unsigned char _end[1];
+};
+
 /*
  * On ARM, padding is added at the end of the struct, so a simple
  * sizeof(struct ...) reports two bytes more than the payload size
@@ -98,6 +122,12 @@ struct zip_extra_mtime {
 #define ZIP_EXTRA_MTIME_SIZE   offsetof(struct zip_extra_mtime, _end)
 #define ZIP_EXTRA_MTIME_PAYLOAD_SIZE \
(ZIP_EXTRA_MTIME_SIZE - offsetof(struct zip_extra_mtime, flags))
+#define ZIP64_DIR_TRAILER_SIZE offsetof(struct zip64_dir_trailer, _end)
+#define ZIP64_DIR_TRAILER_RECORD_SIZE \
+   (ZIP64_DIR_TRAILER_SIZE - \
+offsetof(struct zip64_dir_trailer, creator_version))
+#define ZIP64_DIR_TRAILER_LOCATOR_SIZE \
+   offsetof(struct zip64_dir_trailer_locator, _end)
 
 static void copy_le16(unsigned char *dest, unsigned int n)
 {
@@ -113,6 +143,31 @@ static void copy_le32(unsigned char *dest, unsigned int n)
dest[3] = 0xff & (n >> 030);
 }
 
+static void copy_le64(unsigned char *dest, uint64_t n)
+{
+   dest[0] = 0xff & n;
+   dest[1] = 0xff & (n >> 010);
+   dest[2] = 0xff & (n >> 020);
+   dest[3] = 0xff & (n >> 030);
+   dest[4] = 0xff & (n >> 040);
+   dest[5] = 0xff & (n >> 050);
+   dest[6] = 0xff & (n >> 060);
+   dest[7] = 0xff & (n >> 070);
+}
+
+static uint64_t clamp_max(uint64_t n, uint64_t max, int *clamped)
+{
+   if (n <= max)
+   return n;
+   *clamped = 1;
+   return max;
+}
+
+static void copy_le16_clamp(unsigned char *dest, uint64_t n, int *clamped)
+{
+   copy_le16(dest, clamp_max(n, 0x, clamped));
+}
+
 static void *zlib_deflate_raw(void *data, unsigned long size,
  int compression_level,
  unsigned long *compressed_size)
@@ -282,6 +337,9 @@ static int write_zip_entry(struct archiver_args *args,
sha1_to_hex(sha1));
}
 
+   if (creator_version > max_creator_version)
+   max_creator_version = creator_version;
+
if (buffer && method == 8) {
out = deflated = zlib_deflate_raw(buffer, size,
  args->compression_level,
@@ -439,20 +497,49 @@ static int write_zip_entry(struct archiver_args *args,
return 0;
 }
 
+static void write_zip64_trailer(void)
+{
+   struct zip64_dir_trailer trailer64;
+   struct zip64_dir_trailer_locator locator64;
+
+   copy_le32(trailer64.magic, 0x06064b50);
+   copy_le64(trailer64.record_size, ZIP64_DIR_TRAILER_RECORD_SIZE);
+   copy_le16(trailer64.creator_version, max_creator_version);
+   copy_le16(trailer64.version, 45);
+   copy_le32(trailer64.disk, 0);
+   copy_le32(trailer64.directory_start_disk, 0);
+   copy_le64(trailer64.entries_on_this_disk, zip_dir_entries);
+   copy_le64(trailer64.entries, zip_dir_entries);
+   copy_le64(trailer64.size, zip_dir_offset

[PATCH 2/3] archive-zip: use a local variable to store the creator version

2015-08-22 Thread René Scharfe

Use a simpler conditional right next to the code which makes a higher
creator version necessary -- namely symlink handling and support for
executable files -- instead of a long line with a ternary operator.
The resulting code has more lines but is simpler and allows reuse of
the value easily.

Signed-off-by: Rene Scharfe 
---
 archive-zip.c | 6 --
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/archive-zip.c b/archive-zip.c
index ae3d67f..2a76156 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -223,6 +223,7 @@ static int write_zip_entry(struct archiver_args *args,
unsigned long size;
int is_binary = -1;
const char *path_without_prefix = path + args->baselen;
+   unsigned int creator_version = 0;
 
crc = crc32(0, NULL, 0);
 
@@ -251,6 +252,8 @@ static int write_zip_entry(struct archiver_args *args,
method = 0;
attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) :
(mode & 0111) ? ((mode) << 16) : 0;
+   if (S_ISLNK(mode) || (mode & 0111))
+   creator_version = 0x0317;
if (S_ISREG(mode) && args->compression_level != 0 && size > 0)
method = 8;
 
@@ -303,8 +306,7 @@ static int write_zip_entry(struct archiver_args *args,
}
 
copy_le32(dirent.magic, 0x02014b50);
-   copy_le16(dirent.creator_version,
-   S_ISLNK(mode) || (S_ISREG(mode) && (mode & 0111)) ? 0x0317 : 0);
+   copy_le16(dirent.creator_version, creator_version);
copy_le16(dirent.version, 10);
copy_le16(dirent.flags, flags);
copy_le16(dirent.compression_method, method);
-- 
2.5.0


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH 1/3] t5004: test ZIP archives with many entries

2015-08-22 Thread René Scharfe

A ZIP file directory has a 16-bit field for the number of entries it
contains.  There are 64-bit extensions to deal with that.  Demonstrate
that git archive --format=zip currently doesn't use them and instead
overflows the field.

InfoZIP's unzip doesn't care about this field and extracts all files
anyway.  Software that uses the directory for presenting a filesystem
like view quickly -- notably Windows -- depends on it, but doesn't
lend itself to an automatic test case easily.  Use InfoZIP's zipinfo,
which probably isn't available everywhere but at least can provides
*some* way to check this field.

To speed things up a bit create and commit only a subset of the files
and build a fake tree out of duplicates and pass that to git archive.

Signed-off-by: Rene Scharfe 
---
 t/t5004-archive-corner-cases.sh | 40 
 1 file changed, 40 insertions(+)

diff --git a/t/t5004-archive-corner-cases.sh b/t/t5004-archive-corner-cases.sh
index 654adda..c6bd729 100755
--- a/t/t5004-archive-corner-cases.sh
+++ b/t/t5004-archive-corner-cases.sh
@@ -115,4 +115,44 @@ test_expect_success 'archive empty subtree by direct 
pathspec' '
check_dir extract sub
 '
 
+ZIPINFO=zipinfo
+
+test_lazy_prereq ZIPINFO '
+   n=$("$ZIPINFO" "$TEST_DIRECTORY"/t5004/empty.zip | sed -n "2s/.* //p")
+   test "x$n" = "x0"
+'
+
+test_expect_failure ZIPINFO 'zip archive with many entries' '
+   # add a directory with 256 files
+   mkdir 00 &&
+   for a in 0 1 2 3 4 5 6 7 8 9 a b c d e f
+   do
+   for b in 0 1 2 3 4 5 6 7 8 9 a b c d e f
+   do
+   : >00/$a$b
+   done
+   done &&
+   git add 00 &&
+   git commit -m "256 files in 1 directory" &&
+
+   # duplicate it to get 65536 files in 256 directories
+   subtree=$(git write-tree --prefix=00/) &&
+   for c in 0 1 2 3 4 5 6 7 8 9 a b c d e f
+   do
+   for d in 0 1 2 3 4 5 6 7 8 9 a b c d e f
+   do
+   echo "04 tree $subtree  $c$d"
+   done
+   done >tree &&
+   tree=$(git mktree expect &&
+   "$ZIPINFO" many.zip | head -2 | sed -n "2s/.* //p" >actual &&
+   test_cmp expect actual
+'
+
 test_done
-- 
2.5.0


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/3] t5004: test ZIP archives with many entries

2015-08-23 Thread René Scharfe

Am 23.08.2015 um 07:54 schrieb Eric Sunshine:
> On Sat, Aug 22, 2015 at 3:06 PM, René Scharfe  wrote:
>> diff --git a/t/t5004-archive-corner-cases.sh 
>> b/t/t5004-archive-corner-cases.sh
>> index 654adda..c6bd729 100755
>> --- a/t/t5004-archive-corner-cases.sh
>> +++ b/t/t5004-archive-corner-cases.sh
>> @@ -115,4 +115,44 @@ test_expect_success 'archive empty subtree by direct 
>> pathspec' '
>>  check_dir extract sub
>>   '
>>
>> +ZIPINFO=zipinfo
>> +
>> +test_lazy_prereq ZIPINFO '
>> +   n=$("$ZIPINFO" "$TEST_DIRECTORY"/t5004/empty.zip | sed -n "2s/.* 
>> //p")
>> +   test "x$n" = "x0"
>> +'
> 
> Unfortunately, this sed expression isn't portable due to dissimilar
> output of various zipinfo implementations. On Linux, the output of
> zipinfo is:
> 
>  $ zipinfo t/t5004/empty.zip
>  Archive:  t/t5004/empty.zip
>  Zip file size: 62 bytes, number of entries: 0
>  Empty zipfile.
>  $
> 
> however, on Mac OS X:
> 
>  $ zipinfo t/t5004/empty.zip
>  Archive:  t/t5004/empty.zip   62 bytes   0 files
>  Empty zipfile.
>  $
> 
> and on FreeBSD, the zipinfo command seems to have been removed
> altogether in favor of "unzip -Z" (emulate zipinfo).

Thanks for your thorough checks!

I suspected that zipinfo's output might be formatted differently on
different platforms and tried to guard against it by checking for the
number zero there. Git's ZIP file creation is platform independent
(modulo bugs), so having a test run at least somewhere should
suffice. In theory.

We could add support for the one-line-summary variant on OS X easily,
though.

> One might hope that "unzip -Z" would be a reasonable replacement for
> zipinfo, however, it is apparently only partially implemented on
> FreeBSD, and requires that -1 be passed, as well. Even with "unzip -Z
> -1", there are issues. The output on Linux and Mac OS X is:
> 
>  $ unzip -Z -1 t/t5004/empty.zip
>  Empty zipfile.
>  $
> 
> but FreeBSD differs:
> 
>  $ unzip -Z -1 t/t5004/empty.zip
>  $
> 
> With a non-empty zip file, the output is identical on all platforms:
> 
>  $ unzip -Z -1 twofiles.zip
>  file1
>  file2
>  $
> 
> So, if you combine that with "wc -l" or test_line_count, you may have
> a portable and reliable entry counter.

Counting all entries is slow, and more importantly it's not what we
want. In this test we need the number of entries recorded in the ZIP
directory, not the actual number of entries found by scanning the
archive, or the directory.

On Linux "unzip -Z -1 many.zip | wc -l" reports 65792 even before
adding ZIP64 support; only without -1 we get the interesting numbers
(specifically with "unzip -Z many.zip | sed -n '2p;$p'"):

Zip file size: 6841366 bytes, number of entries: 256
65792 files, 0 bytes uncompressed, 0 bytes compressed: 0.0%

> With these three patches applied, Mac OS X has trouble with 'many.zip':
> 
>  $ unzip -Z -1 many.zip
>  warning [many.zip]:  76 extra bytes at beginning or within zipfile
>(attempting to process anyway)
>  error [many.zip]:  reported length of central directory is
>-76 bytes too long (Atari STZip zipfile?  J.H.Holm ZIPSPLIT 1.1
>zipfile?).  Compensating...
>  00/
>  00/00
>  ...
>  ff/ff
>  error: expected central file header signature not found (file
>#65793). (please check that you have transferred or created the
>zipfile in the appropriate BINARY mode and that you have compiled
>UnZip properly)
> 
> And FreeBSD doesn't like it either:
> 
>  $ unzip -Z -1 many.zip
>  unzip: Invalid central directory signature
>  $
> 

Looks like they don't support ZIP64. Or I got some of the fields wrong
after all.

https://en.wikipedia.org/wiki/Zip_%28file_format%29#ZIP64 says: "OS X
Yosemite does support the creation of ZIP64 archives, but does not
support unzipping these archives using the shipped unzip command-line
utility or graphical Archive Utility.[citation needed]".

How does unzip react to a ZIP file with more than 65535 entries that
was created natively on these platforms? And what does zipinfo (a real
one, without -1) report at the top for such files?

Thanks,
René

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Eric Sunshine mail delivery failure

2015-08-23 Thread René Scharfe

Eric, hope you see this reply on the list. Direct replies to 
sunsh...@sunshineco.com are rejected by my mail provider on submit in 
Thunderbird with the following message:


Requested action not taken: mailbox unavailable
invalid DNS MX or A/ resource record.

And with this one when using their web interface:

A message that you sent could not be delivered to one or more of
its recipients. This is a permanent error. The following address
failed:

"sunsh...@sunshineco.com":
no valid MX hosts found

It seems web.de wants you to get an  record before I'm allowed to 
send mails to you.  Sounds crazy.  Sorry about that.  Time to find a

better provider, I guess. :-(

René
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 60/68] prefer memcpy to strcpy

2015-09-27 Thread René Scharfe


Am 24.09.2015 um 23:08 schrieb Jeff King:

When we already know the length of a string (e.g., because
we just malloc'd to fit it), it's nicer to use memcpy than
strcpy, as it makes it more obvious that we are not going to
overflow the buffer (because the size we pass matches the
size in the allocation).

This also eliminates calls to strcpy, which make auditing
the code base harder.

Signed-off-by: Jeff King 
---
  compat/nedmalloc/nedmalloc.c | 5 +++--
  fast-import.c| 5 +++--
  revision.c   | 2 +-
  3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/compat/nedmalloc/nedmalloc.c b/compat/nedmalloc/nedmalloc.c
index 609ebba..a0a16eb 100644
--- a/compat/nedmalloc/nedmalloc.c
+++ b/compat/nedmalloc/nedmalloc.c
@@ -957,8 +957,9 @@ char *strdup(const char *s1)
  {
char *s2 = 0;
if (s1) {
-   s2 = malloc(strlen(s1) + 1);
-   strcpy(s2, s1);
+   size_t len = strlen(s1) + 1;
+   s2 = malloc(len);
+   memcpy(s2, s1, len);


This leaves the last byte uninitialized; it was set to NUL by strcpy() 
before.



}
return s2;
  }
diff --git a/fast-import.c b/fast-import.c
index 895c6b4..cf6d8bc 100644
--- a/fast-import.c
+++ b/fast-import.c
@@ -644,8 +644,9 @@ static void *pool_calloc(size_t count, size_t size)

  static char *pool_strdup(const char *s)
  {
-   char *r = pool_alloc(strlen(s) + 1);
-   strcpy(r, s);
+   size_t len = strlen(s) + 1;
+   char *r = pool_alloc(len);
+   memcpy(r, s, len);


Same here.


return r;
  }

diff --git a/revision.c b/revision.c
index af2a18e..2236463 100644
--- a/revision.c
+++ b/revision.c
@@ -38,7 +38,7 @@ char *path_name(const struct name_path *path, const char 
*name)
}
n = xmalloc(len);
m = n + len - (nlen + 1);
-   strcpy(m, name);
+   memcpy(m, name, nlen + 1);


This copies the NUL byte terminating the string, so it's OK.  However, I 
wonder if using a strbuf for building the path in one go instead would 
simplify this function without too much of a performance impact.



for (p = path; p; p = p->up) {
if (p->elem_len) {
m -= p->elem_len + 1;



--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 60/68] prefer memcpy to strcpy

2015-09-27 Thread René Scharfe


Am 27.09.2015 um 15:06 schrieb Torsten Bögershausen:

On 2015-09-27 13.19, René Scharfe wrote:

Am 24.09.2015 um 23:08 schrieb Jeff King:

When we already know the length of a string (e.g., because
we just malloc'd to fit it), it's nicer to use memcpy than
strcpy, as it makes it more obvious that we are not going to
overflow the buffer (because the size we pass matches the
size in the allocation).

This also eliminates calls to strcpy, which make auditing
the code base harder.

Signed-off-by: Jeff King 
---
   compat/nedmalloc/nedmalloc.c | 5 +++--
   fast-import.c| 5 +++--
   revision.c   | 2 +-
   3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/compat/nedmalloc/nedmalloc.c b/compat/nedmalloc/nedmalloc.c
index 609ebba..a0a16eb 100644
--- a/compat/nedmalloc/nedmalloc.c
+++ b/compat/nedmalloc/nedmalloc.c
@@ -957,8 +957,9 @@ char *strdup(const char *s1)
   {
   char *s2 = 0;
   if (s1) {
-s2 = malloc(strlen(s1) + 1);
-strcpy(s2, s1);
+size_t len = strlen(s1) + 1;
+s2 = malloc(len);
+memcpy(s2, s1, len);


This leaves the last byte uninitialized; it was set to NUL by strcpy() before.


len is == strlen() +1, which should cover the NUL:

1 byte extra for NUL is allocated,
and memcpy will copy NUL from source.
(Or do I miss somethong ?)


No, you're right.  Sorry for the noise.

I fully blame this on lack of coffeine because my electric kettle just 
broke. O_o


René
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 60/68] prefer memcpy to strcpy

2015-09-27 Thread René Scharfe


Am 27.09.2015 um 15:13 schrieb René Scharfe:

Am 27.09.2015 um 15:06 schrieb Torsten Bögershausen:

On 2015-09-27 13.19, René Scharfe wrote:

Am 24.09.2015 um 23:08 schrieb Jeff King:

When we already know the length of a string (e.g., because
we just malloc'd to fit it), it's nicer to use memcpy than
strcpy, as it makes it more obvious that we are not going to
overflow the buffer (because the size we pass matches the
size in the allocation).

This also eliminates calls to strcpy, which make auditing
the code base harder.

Signed-off-by: Jeff King 
---
   compat/nedmalloc/nedmalloc.c | 5 +++--
   fast-import.c| 5 +++--
   revision.c   | 2 +-
   3 files changed, 7 insertions(+), 5 deletions(-)

diff --git a/compat/nedmalloc/nedmalloc.c
b/compat/nedmalloc/nedmalloc.c
index 609ebba..a0a16eb 100644
--- a/compat/nedmalloc/nedmalloc.c
+++ b/compat/nedmalloc/nedmalloc.c
@@ -957,8 +957,9 @@ char *strdup(const char *s1)
   {
   char *s2 = 0;
   if (s1) {
-s2 = malloc(strlen(s1) + 1);
-strcpy(s2, s1);
+size_t len = strlen(s1) + 1;
+s2 = malloc(len);
+memcpy(s2, s1, len);


This leaves the last byte uninitialized; it was set to NUL by
strcpy() before.


len is == strlen() +1, which should cover the NUL:

1 byte extra for NUL is allocated,
and memcpy will copy NUL from source.
(Or do I miss somethong ?)


No, you're right.  Sorry for the noise.

I fully blame this on lack of coffeine because my electric kettle just
broke. O_o


Thinking a bit more about it (slowly): The choice of the variable name 
might have been a factor as well.  When I see "len" for a string then I 
don't expect it to include the trailing NUL.  "size" would be better 
because I expect it to contain the number of bytes needed to store an 
object.


René

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] grep: use regcomp() for icase search with non-ascii patterns

2015-07-06 Thread René Scharfe


Am 06.07.2015 um 14:42 schrieb Nguyễn Thái Ngọc Duy:

Noticed-by: Plamen Totev 
Signed-off-by: Nguyễn Thái Ngọc Duy 
---
  grep.c | 14 +++---
  1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/grep.c b/grep.c
index b58c7c6..48db15a 100644
--- a/grep.c
+++ b/grep.c
@@ -378,7 +378,7 @@ static void free_pcre_regexp(struct grep_pat *p)
  }
  #endif /* !USE_LIBPCRE */

-static int is_fixed(const char *s, size_t len)
+static int is_fixed(const char *s, size_t len, int ignore_icase)
  {
size_t i;

@@ -391,6 +391,13 @@ static int is_fixed(const char *s, size_t len)
for (i = 0; i < len; i++) {
if (is_regex_special(s[i]))
return 0;
+   /*
+* The builtin substring search can only deal with case
+* insensitivity in ascii range. If there is something outside
+* of that range, fall back to regcomp.
+*/
+   if (ignore_icase && (unsigned char)s[i] >= 128)
+   return 0;


How about "isascii(s[i])"?


}

return 1;
@@ -398,18 +405,19 @@ static int is_fixed(const char *s, size_t len)

  static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
  {
+   int ignore_icase = opt->regflags & REG_ICASE || p->ignore_case;
int err;

p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;


Using p->ignore_case before this line, as in initialization of the new 
variable ignore_icase above, changes the meaning.




-   if (opt->fixed || is_fixed(p->pattern, p->patternlen))
+   if (opt->fixed || is_fixed(p->pattern, p->patternlen, ignore_icase))
p->fixed = 1;
else
p->fixed = 0;

if (p->fixed) {
-   if (opt->regflags & REG_ICASE || p->ignore_case)
+   if (ignore_case)


ignore_icase instead?  ignore_case is for the config variable 
core.ignorecase.  Tricky.



p->kws = kwsalloc(tolower_trans_tbl);
else
p->kws = kwsalloc(NULL);



So the optimization before this patch was that if a string was searched 
for without -F then it would be treated as a fixed string anyway unless 
it contained regex special characters.  Searching for fixed strings 
using the kwset functions is faster than using regcomp and regexec, 
which makes the exercise worthwhile.


Your patch disables the optimization if non-ASCII characters are 
searched for because kwset handles case transformations only for ASCII 
chars.


Another consequence of this limitation is that -Fi (explicit 
case-insensitive fixed-string search) doesn't work properly with 
non-ASCII chars neither.  How can we handle this one?  Fall back to 
regcomp by escaping all special characters?  Or at least warn?


Tests would be nice. :)

René

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

git@vger.kernel.org

2015-07-10 Thread René Scharfe

Signed-off-by: Rene Scharfe 
---
GIT_TEST_CHAIN_LINT complains about the missing &&s and is enabled
by default now.

 t/perf/p5310-pack-bitmaps.sh | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/t/perf/p5310-pack-bitmaps.sh b/t/perf/p5310-pack-bitmaps.sh
index f8ed857..de2a224 100755
--- a/t/perf/p5310-pack-bitmaps.sh
+++ b/t/perf/p5310-pack-bitmaps.sh
@@ -39,14 +39,14 @@ test_expect_success 'create partial bitmap state' '
 
# now kill off all of the refs and pretend we had
# just the one tip
-   rm -rf .git/logs .git/refs/* .git/packed-refs
-   git update-ref HEAD $cutoff
+   rm -rf .git/logs .git/refs/* .git/packed-refs &&
+   git update-ref HEAD $cutoff &&
 
# and then repack, which will leave us with a nice
# big bitmap pack of the "old" history, and all of
# the new history will be loose, as if it had been pushed
# up incrementally and exploded via unpack-objects
-   git repack -Ad
+   git repack -Ad &&
 
# and now restore our original tip, as if the pushes
# had happened
-- 
2.4.4

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

git@vger.kernel.org

2015-07-11 Thread René Scharfe


Am 10.07.2015 um 22:50 schrieb Jeff King:

Thanks, this definitely is a problem, but we already have a fix in the
sb/p5310-and-chain topic. I thought that had been merged-up, but it
looks like it is only in "next" right now.


All the better.  And I see it's in master now.

René

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] diff: parse ws-error-highlight option more strictly

2015-07-11 Thread René Scharfe

Check if a matched token is followed by a delimiter before advancing the
pointer arg.  This avoids accepting composite words like "allnew" or
"defaultcontext".

Signed-off-by: Rene Scharfe 
---
 diff.c | 7 ++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/diff.c b/diff.c
index 87b16d5..0f17ec5 100644
--- a/diff.c
+++ b/diff.c
@@ -3653,7 +3653,12 @@ static void enable_patch_output(int *fmt) {
 
 static int parse_one_token(const char **arg, const char *token)
 {
-   return skip_prefix(*arg, token, arg) && (!**arg || **arg == ',');
+   const char *rest;
+   if (skip_prefix(*arg, token, &rest) && (!*rest || *rest == ',')) {
+   *arg = rest;
+   return 1;
+   }
+   return 0;
 }
 
 static int parse_ws_error_highlight(struct diff_options *opt, const char *arg)
-- 
2.4.4

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] archive-tar: write extended headers for far-future mtime

2016-06-20 Thread René Scharfe


Am 16.06.2016 um 06:37 schrieb Jeff King:

The ustar format represents timestamps as seconds since the
epoch, but only has room to store 11 octal digits.  To
express anything larger, we need to use an extended header.
This is exactly the same case we fixed for the size field in
the previous commit, and the solution here follows the same
pattern.

This is even mentioned as an issue in f2f0267 (archive-tar:
use xsnprintf for trivial formatting, 2015-09-24), but since
it only affected things far in the future, it wasn't worth
dealing with. But note that my calculations claiming
thousands of years were off there; because our xsnprintf
produces a NUL byte, we only have until the year 2242 to
fix this.

Given that this is just around the corner (geologically
speaking), and because the fix for "size" makes doing this
on top trivial, let's just make it work.

Note that we don't (and never did) handle negative
timestamps (i.e., before 1970). This would probably not be
too hard to support in the same way, but since git does not
support negative timestamps at all, I didn't bother here.

Unlike the last patch for "size", this one is easy to test
efficiently (the magic date is octal 010001):

   $ echo content >file
   $ git add file
   $ GIT_COMMITTER_DATE='@68719476737 +' \
 git commit -q -m 'tempori parendum'
   $ git archive HEAD | tar tvf -
   -rw-rw-r-- root/root 8 4147-08-20 03:32 file

With the current tip of master, this dies in xsnprintf (and
prior to f2f0267, it overflowed into the checksum field, and
the resulting tarfile claimed to be from the year 2242).

However, I decided not to include this in the test suite,
because I suspect it will create portability headaches,
including:

   1. The exact format of the system tar's "t" output.


Probably not worth it.


   2. System tars that cannot handle pax headers.


In t5000 there is a simple interpreter for path headers for systems 
whose tar doesn't handle them.  Adding one for mtime headers may be 
feasible.


It's just a bit complicated to link a pax header file to the file it 
applies to when it doesn't also contain a path header.  But we know that 
the mtime of all entries in tar files created by git archive are is the 
same, so we could simply read one header and then adjust the mtime of 
all files accordingly.



   3. System tars tha cannot handle dates that far in the
  future.

   4. If we replace "tar t" with "tar x", then filesystems
  that cannot handle dates that far in the future.


We can test that by supplying a test archive and set a prerequisite if 
tar (possibly with our header interpreter) and the filesystem can cope 
with that.



In other words, we do not really have a reliable tar reader
for these corner cases, so the best we could do is a
byte-for-byte comparison of the output.

Signed-off-by: Jeff King 
---
  archive-tar.c | 13 -
  1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/archive-tar.c b/archive-tar.c
index 7340b64..749722f 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -185,6 +185,14 @@ static inline unsigned long ustar_size(uintmax_t size)
return 0;
  }

+static inline unsigned long ustar_mtime(time_t mtime)
+{
+   if (mtime < 0777)


That should be less-or-equal, right?


+   return mtime;
+   else
+   return 0;
+}
+
  static void prepare_header(struct archiver_args *args,
   struct ustar_header *header,
   unsigned int mode, unsigned long size)
@@ -192,7 +200,8 @@ static void prepare_header(struct archiver_args *args,
xsnprintf(header->mode, sizeof(header->mode), "%07o", mode & 0);
xsnprintf(header->size, sizeof(header->size), "%011lo",
  S_ISREG(mode) ? ustar_size(size) : 0);
-   xsnprintf(header->mtime, sizeof(header->mtime), "%011lo", (unsigned long) 
args->time);
+   xsnprintf(header->mtime, sizeof(header->mtime), "%011lo",
+ ustar_mtime(args->time));

xsnprintf(header->uid, sizeof(header->uid), "%07o", 0);
xsnprintf(header->gid, sizeof(header->gid), "%07o", 0);
@@ -292,6 +301,8 @@ static int write_tar_entry(struct archiver_args *args,

if (ustar_size(size) != size)
strbuf_append_ext_header_uint(&ext_header, "size", size);
+   if (ustar_mtime(args->time) != args->time)
+   strbuf_append_ext_header_uint(&ext_header, "mtime", args->time);

prepare_header(args, &header, mode, size);



--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] archive-tar: write extended headers for file sizes >= 8GB

2016-06-20 Thread René Scharfe


Am 16.06.2016 um 06:37 schrieb Jeff King:

The ustar format has a fixed-length field for the size of
each file entry which is supposed to contain up to 11 bytes
of octal-formatted data plus a NUL or space terminator.

These means that the largest size we can represent is
0777, or 1 byte short of 8GB. The correct solution
for a larger file, according to POSIX.1-2001, is to add an
extended pax header, similar to how we handle long
filenames. This patch does that, and writes zero for the
size field in the ustar header (the last bit is not
mentioned by POSIX, but it matches how GNU tar behaves with
--format=pax).

This should be a strict improvement over the current
behavior, which is to die in xsnprintf with a "BUG".
However, there's some interesting history here.

Prior to f2f0267 (archive-tar: use xsnprintf for trivial
formatting, 2015-09-24), we silently overflowed the "size"
field. The extra bytes ended up in the "mtime" field of the
header, which was then immediately written itself,
overwriting our extra bytes. What that means depends on how
many bytes we wrote.

If the size was 64GB or greater, then we actually overflowed
digits into the mtime field, meaning our value was was
effectively right-shifted by those lost octal digits. And
this patch is again a strict improvement over that.

But if the size was between 8GB and 64GB, then our 12-byte
field held all of the actual digits, and only our NUL
terminator overflowed. According to POSIX, there should be a
NUL or space at the end of the field. However, GNU tar seems
to be lenient here, and will correctly parse a size up 64GB
(minus one) from the field. So sizes in this range might
have just worked, depending on the implementation reading
the tarfile.

This patch is mostly still an improvement there, as the 8GB
limit is specifically mentioned in POSIX as the correct
limit. But it's possible that it could be a regression
(versus the pre-f2f0267 state) if all of the following are
true:

   1. You have a file between 8GB and 64GB.

   2. Your tar implementation _doesn't_ know about pax
  extended headers.

   3. Your tar implementation _does_ parse 12-byte sizes from
  the ustar header without a delimiter.

It's probably not worth worrying about such an obscure set
of conditions, but I'm documenting it here just in case.

There's no test included here. I did confirm that this works
(using GNU tar) with:

   $ dd if=/dev/zero seek=64G bs=1 count=1 of=huge
   $ git add huge
   $ git commit -q -m foo
   $ git archive HEAD | head -c 1 | tar tvf - 2>/dev/null
   -rw-rw-r-- root/root 68719476737 2016-06-15 21:07 huge

Pre-f2f0267, this would yield a bogus size of 8GB, and
post-f2f0267, git-archive simply dies.

Unfortunately, it's quite an expensive test to run. For one
thing, unless your filesystem supports files with holes, it
takes 64GB of disk space (you might think piping straight to
`hash-object --stdin` would be better, but it's not; that
tries to buffer all 64GB in RAM!). Furthermore, hashing and
compressing the object takes several minutes of CPU time.

We could ship just the resulting compressed object data as a
loose object, but even that takes 64MB. So sadly, this code
path remains untested in the test suite.


If we could set the limit to a lower value than 8GB for testing then we 
could at least check if the extended header is written, e.g. if 
ustar_size() could be convinced to return 0 every time using a hidden 
command line parameter or an environment variable or something better.




Signed-off-by: Jeff King 
---
  archive-tar.c | 28 +++-
  1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/archive-tar.c b/archive-tar.c
index cb99df2..7340b64 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -137,6 +137,20 @@ static void strbuf_append_ext_header(struct strbuf *sb, 
const char *keyword,
strbuf_addch(sb, '\n');
  }

+/*
+ * Like strbuf_append_ext_header, but for numeric values.
+ */
+static void strbuf_append_ext_header_uint(struct strbuf *sb,
+ const char *keyword,
+ uintmax_t value)
+{
+   char buf[40]; /* big enough for 2^128 in decimal, plus NUL */
+   int len;
+
+   len = xsnprintf(buf, sizeof(buf), "%"PRIuMAX, value);
+   strbuf_append_ext_header(sb, keyword, buf, len);
+}
+
  static unsigned int ustar_header_chksum(const struct ustar_header *header)
  {
const unsigned char *p = (const unsigned char *)header;
@@ -163,12 +177,21 @@ static size_t get_path_prefix(const char *path, size_t 
pathlen, size_t maxlen)
return i;
  }

+static inline unsigned long ustar_size(uintmax_t size)
+{
+   if (size < 0777UL)


Shouldn't that be less-or-equal?


+   return size;
+   else
+   return 0;
+}
+
  static void prepare_header(struct archiver_args *args,
   struct ustar_header *header,
   unsigned

Re: [PATCH 1/2] archive-tar: write extended headers for file sizes >= 8GB

2016-06-21 Thread René Scharfe

Am 21.06.2016 um 17:59 schrieb Jeff King:
> On Tue, Jun 21, 2016 at 12:54:11AM +0200, René Scharfe wrote:
> 
>>> Unfortunately, it's quite an expensive test to run. For one
>>> thing, unless your filesystem supports files with holes, it
>>> takes 64GB of disk space (you might think piping straight to
>>> `hash-object --stdin` would be better, but it's not; that
>>> tries to buffer all 64GB in RAM!). Furthermore, hashing and
>>> compressing the object takes several minutes of CPU time.
>>>
>>> We could ship just the resulting compressed object data as a
>>> loose object, but even that takes 64MB. So sadly, this code
>>> path remains untested in the test suite.
>>
>> If we could set the limit to a lower value than 8GB for testing then we
>> could at least check if the extended header is written, e.g. if ustar_size()
>> could be convinced to return 0 every time using a hidden command line
>> parameter or an environment variable or something better.
> 
> Yes, we could do that, though I think it loses most of the value of the
> test. We can check that if we hit an arbitrary value we generate the pax
> header, but I think what we _really_ care about is: did we generate an
> output that somebody else's tar implementation can handle.

I agree with the last point, but don't see how that diminishes the
value of such a test.  If we provide file sizes only through extended
headers (the normal header field being set to 0) and we can extract
files with correct sizes then tar must have interpreted those header
as intended, right?

(Or it just guessed the sizes by searching for the next header magic,
but such a fallback won't be accurate for files ending with NUL
characters due to NUL-padding, so we just have to add such a file.)

René


-- >8 --
Subject: archive-tar: test creation of pax extended size headers

---
The value 120 is magic; we need it to pass the tests.  That's
because prepare_header() is used for building extended header
records as well and we don't create extended headers for extended
headers (not sure if that would work anyway), so they simply
vanish when they're over the limit as their size field is set to
zero.

 archive-tar.c   | 7 ++-
 t/t5000-tar-tree.sh | 7 +++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/archive-tar.c b/archive-tar.c
index f53e61c..fbbc4cc 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -14,6 +14,7 @@ static char block[BLOCKSIZE];
 static unsigned long offset;
 
 static int tar_umask = 002;
+static unsigned long ustar_size_max = 0777UL;
 
 static int write_tar_filter_archive(const struct archiver *ar,
struct archiver_args *args);
@@ -179,7 +180,7 @@ static size_t get_path_prefix(const char *path, size_t 
pathlen, size_t maxlen)
 
 static inline unsigned long ustar_size(uintmax_t size)
 {
-   if (size <= 0777UL)
+   if (size <= ustar_size_max)
return size;
else
return 0;
@@ -412,6 +413,10 @@ static int git_tar_config(const char *var, const char 
*value, void *cb)
}
return 0;
}
+   if (!strcmp(var, "tar.ustarsizemax")) {
+   ustar_size_max = git_config_ulong(var, value);
+   return 0;
+   }
 
return tar_filter_config(var, value, cb);
 }
diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh
index 4b68bba..03bb4c7 100755
--- a/t/t5000-tar-tree.sh
+++ b/t/t5000-tar-tree.sh
@@ -102,6 +102,7 @@ test_expect_success \
  echo long filename >a/four$hundred &&
  mkdir a/bin &&
  test-genrandom "frotz" 50 >a/bin/sh &&
+ printf "\0\0\0" >>a/bin/sh &&
  printf "A\$Format:%s\$O" "$SUBSTFORMAT" >a/substfile1 &&
  printf "A not substituted O" >a/substfile2 &&
  if test_have_prereq SYMLINKS; then
@@ -157,6 +158,12 @@ test_expect_success 'git-archive --prefix=olde-' '
 
 check_tar with_olde-prefix olde-
 
+test_expect_success !TAR_NEEDS_PAX_FALLBACK 'pax extended size headers' '
+   git -c tar.ustarsizemax=120 archive HEAD >extended_size_header.tar
+'
+
+check_tar extended_size_header
+
 test_expect_success 'git archive on large files' '
 test_config core.bigfilethreshold 1 &&
 git archive HEAD >b3.tar &&
-- 
2.9.0

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] archive-tar: write extended headers for file sizes >= 8GB

2016-06-21 Thread René Scharfe

Am 21.06.2016 um 22:42 schrieb René Scharfe:
> The value 120 is magic; we need it to pass the tests.  That's
> because prepare_header() is used for building extended header
> records as well and we don't create extended headers for extended
> headers (not sure if that would work anyway), so they simply
> vanish when they're over the limit as their size field is set to
> zero.

So how about something like this to make sure extended headers are
only written for regular files and not for other extended headers?
---
 archive-tar.c | 10 ++
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/archive-tar.c b/archive-tar.c
index ed562d4..f53e61c 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -199,7 +199,7 @@ static void prepare_header(struct archiver_args *args,
 {
xsnprintf(header->mode, sizeof(header->mode), "%07o", mode & 0);
xsnprintf(header->size, sizeof(header->size), "%011lo",
- S_ISREG(mode) ? ustar_size(size) : 0);
+ S_ISREG(mode) ? size : 0);
xsnprintf(header->mtime, sizeof(header->mtime), "%011lo",
  ustar_mtime(args->time));
 
@@ -240,7 +240,7 @@ static int write_tar_entry(struct archiver_args *args,
struct ustar_header header;
struct strbuf ext_header = STRBUF_INIT;
unsigned int old_mode = mode;
-   unsigned long size;
+   unsigned long size, size_in_header;
void *buffer;
int err = 0;
 
@@ -299,12 +299,14 @@ static int write_tar_entry(struct archiver_args *args,
memcpy(header.linkname, buffer, size);
}
 
-   if (S_ISREG(mode) && ustar_size(size) != size)
+   size_in_header = S_ISREG(mode) ? ustar_size(size) : size;
+   if (size_in_header != size)
strbuf_append_ext_header_uint(&ext_header, "size", size);
+
if (ustar_mtime(args->time) != args->time)
strbuf_append_ext_header_uint(&ext_header, "mtime", args->time);
 
-   prepare_header(args, &header, mode, size);
+   prepare_header(args, &header, mode, size_in_header);
 
if (ext_header.len > 0) {
err = write_extended_header(args, sha1, ext_header.buf,
-- 
2.9.0


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] archive-tar: write extended headers for file sizes >= 8GB

2016-06-21 Thread René Scharfe

Am 21.06.2016 um 17:59 schrieb Jeff King:
> On Tue, Jun 21, 2016 at 12:54:11AM +0200, René Scharfe wrote:
>
>>> Unfortunately, it's quite an expensive test to run. For one
>>> thing, unless your filesystem supports files with holes, it
>>> takes 64GB of disk space (you might think piping straight to
>>> `hash-object --stdin` would be better, but it's not; that
>>> tries to buffer all 64GB in RAM!). Furthermore, hashing and
>>> compressing the object takes several minutes of CPU time.
>>>
>>> We could ship just the resulting compressed object data as a
>>> loose object, but even that takes 64MB. So sadly, this code
>>> path remains untested in the test suite.
>>
>> If we could set the limit to a lower value than 8GB for testing then we
>> could at least check if the extended header is written, e.g. if ustar_size()
>> could be convinced to return 0 every time using a hidden command line
>> parameter or an environment variable or something better.
>
> Yes, we could do that, though I think it loses most of the value of the
> test. We can check that if we hit an arbitrary value we generate the pax
> header, but I think what we _really_ care about is: did we generate an
> output that somebody else's tar implementation can handle.

I agree with the last point, but don't see how that diminishes the
value of such a test.  If we provide file sizes only through extended
headers (the normal header field being set to 0) and we can extract
files with correct sizes then tar must have interpreted those header
as intended, right?

(Or it just guessed the sizes by searching for the next header magic,
but such a fallback won't be accurate for files ending with NUL
characters due to NUL-padding, so we just have to add such a file.)

René


-- >8 --
Subject: archive-tar: test creation of pax extended size headers

---
The value 120 is magic; we need it to pass the tests.  That's
because prepare_header() is used for building extended header
records as well and we don't create extended headers for extended
headers (not sure if that would work anyway), so they simply
vanish when they're over the limit as their size field is set to
zero.

 archive-tar.c   | 7 ++-
 t/t5000-tar-tree.sh | 7 +++
 2 files changed, 13 insertions(+), 1 deletion(-)

diff --git a/archive-tar.c b/archive-tar.c
index f53e61c..fbbc4cc 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -14,6 +14,7 @@ static char block[BLOCKSIZE];
 static unsigned long offset;
 
 static int tar_umask = 002;
+static unsigned long ustar_size_max = 0777UL;
 
 static int write_tar_filter_archive(const struct archiver *ar,
struct archiver_args *args);
@@ -179,7 +180,7 @@ static size_t get_path_prefix(const char *path, size_t 
pathlen, size_t maxlen)
 
 static inline unsigned long ustar_size(uintmax_t size)
 {
-   if (size <= 0777UL)
+   if (size <= ustar_size_max)
return size;
else
return 0;
@@ -412,6 +413,10 @@ static int git_tar_config(const char *var, const char 
*value, void *cb)
}
return 0;
}
+   if (!strcmp(var, "tar.ustarsizemax")) {
+   ustar_size_max = git_config_ulong(var, value);
+   return 0;
+   }
 
return tar_filter_config(var, value, cb);
 }
diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh
index 4b68bba..03bb4c7 100755
--- a/t/t5000-tar-tree.sh
+++ b/t/t5000-tar-tree.sh
@@ -102,6 +102,7 @@ test_expect_success \
  echo long filename >a/four$hundred &&
  mkdir a/bin &&
  test-genrandom "frotz" 50 >a/bin/sh &&
+ printf "\0\0\0" >>a/bin/sh &&
  printf "A\$Format:%s\$O" "$SUBSTFORMAT" >a/substfile1 &&
  printf "A not substituted O" >a/substfile2 &&
  if test_have_prereq SYMLINKS; then
@@ -157,6 +158,12 @@ test_expect_success 'git-archive --prefix=olde-' '
 
 check_tar with_olde-prefix olde-
 
+test_expect_success !TAR_NEEDS_PAX_FALLBACK 'pax extended size headers' '
+   git -c tar.ustarsizemax=120 archive HEAD >extended_size_header.tar
+'
+
+check_tar extended_size_header
+
 test_expect_success 'git archive on large files' '
 test_config core.bigfilethreshold 1 &&
 git archive HEAD >b3.tar &&
-- 
2.9.0

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] archive-tar: write extended headers for file sizes >= 8GB

2016-06-21 Thread René Scharfe


Am 21.06.2016 um 23:02 schrieb Jeff King:

On Tue, Jun 21, 2016 at 10:42:52PM +0200, René Scharfe wrote:


If we could set the limit to a lower value than 8GB for testing then we
could at least check if the extended header is written, e.g. if ustar_size()
could be convinced to return 0 every time using a hidden command line
parameter or an environment variable or something better.


Yes, we could do that, though I think it loses most of the value of the
test. We can check that if we hit an arbitrary value we generate the pax
header, but I think what we _really_ care about is: did we generate an
output that somebody else's tar implementation can handle.


I agree with the last point, but don't see how that diminishes the
value of such a test.  If we provide file sizes only through extended
headers (the normal header field being set to 0) and we can extract
files with correct sizes then tar must have interpreted those header
as intended, right?


The diminished value is:

   1. This is a situation that doesn't actually happen in real life.

   2. Now we're carrying extra code inside git only for the sake of
  testing (which can have its own bugs, etc).

Still, it may be better than nothing.


-- >8 --
Subject: archive-tar: test creation of pax extended size headers

---
The value 120 is magic; we need it to pass the tests.  That's
because prepare_header() is used for building extended header
records as well and we don't create extended headers for extended
headers (not sure if that would work anyway), so they simply
vanish when they're over the limit as their size field is set to
zero.


Right, so this is sort of what I meant in (2). Now we have a
tar.ustarsizemax setting shipped in git that is totally broken if you
set it to "1".

I can live with it as a tradeoff, but it is definitely a negative IMHO.


Yes, it's only useful as a debug flag, but the fact that it breaks 
highlights a (admittedly mostly theoretical) shortcoming: The code 
doesn't handle extended headers that are over the size limit as nicely 
as it could.  So the test was already worth it, even if won't land in 
master. :)


René

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 1/2] archive-tar: write extended headers for file sizes >= 8GB

2016-06-21 Thread René Scharfe

Am 21.06.2016 um 23:04 schrieb Jeff King:
> On Tue, Jun 21, 2016 at 10:57:43PM +0200, René Scharfe wrote:
> 
>> Am 21.06.2016 um 22:42 schrieb René Scharfe:
>>> The value 120 is magic; we need it to pass the tests.  That's
>>> because prepare_header() is used for building extended header
>>> records as well and we don't create extended headers for extended
>>> headers (not sure if that would work anyway), so they simply
>>> vanish when they're over the limit as their size field is set to
>>> zero.
>>
>> So how about something like this to make sure extended headers are
>> only written for regular files and not for other extended headers?
> 
> This is quite similar to what I wrote originally, but I moved to the
> ustar_size() format to better match the mtime code (which needs
> something like that, because we pass around args->time).
> 
> I think you could drop ustar_size() completely here and just put the
> "if" into write_tar_entry().

Which would look like this:

---
 archive-tar.c | 24 ++--
 1 file changed, 22 insertions(+), 2 deletions(-)

diff --git a/archive-tar.c b/archive-tar.c
index cb99df2..274bdfa 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -137,6 +137,20 @@ static void strbuf_append_ext_header(struct strbuf *sb, 
const char *keyword,
strbuf_addch(sb, '\n');
 }
 
+/*
+ * Like strbuf_append_ext_header, but for numeric values.
+ */
+static void strbuf_append_ext_header_uint(struct strbuf *sb,
+ const char *keyword,
+ uintmax_t value)
+{
+   char buf[40]; /* big enough for 2^128 in decimal, plus NUL */
+   int len;
+
+   len = xsnprintf(buf, sizeof(buf), "%"PRIuMAX, value);
+   strbuf_append_ext_header(sb, keyword, buf, len);
+}
+
 static unsigned int ustar_header_chksum(const struct ustar_header *header)
 {
const unsigned char *p = (const unsigned char *)header;
@@ -208,7 +222,7 @@ static int write_tar_entry(struct archiver_args *args,
struct ustar_header header;
struct strbuf ext_header = STRBUF_INIT;
unsigned int old_mode = mode;
-   unsigned long size;
+   unsigned long size, size_in_header;
void *buffer;
int err = 0;
 
@@ -267,7 +281,13 @@ static int write_tar_entry(struct archiver_args *args,
memcpy(header.linkname, buffer, size);
}
 
-   prepare_header(args, &header, mode, size);
+   size_in_header = size;
+   if (S_ISREG(mode) && size > 0777UL) {
+   size_in_header = 0;
+   strbuf_append_ext_header_uint(&ext_header, "size", size);
+   }
+
+   prepare_header(args, &header, mode, size_in_header);
 
if (ext_header.len > 0) {
err = write_extended_header(args, sha1, ext_header.buf,
-- 
2.9.0
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] archive-tar: write extended headers for far-future mtime

2016-06-21 Thread René Scharfe


Am 21.06.2016 um 00:54 schrieb René Scharfe:

Am 16.06.2016 um 06:37 schrieb Jeff King:

   2. System tars that cannot handle pax headers.


In t5000 there is a simple interpreter for path headers for systems
whose tar doesn't handle them.  Adding one for mtime headers may be
feasible.

It's just a bit complicated to link a pax header file to the file it
applies to when it doesn't also contain a path header.  But we know that
the mtime of all entries in tar files created by git archive are is the
same, so we could simply read one header and then adjust the mtime of
all files accordingly.


This brings me to the idea of using a single global pax header for mtime 
instead of one per entry.  It reduces the size of the archive and allows 
for slightly easier testing -- it just fits better since we know that 
all our mtimes are the same.


René

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] archive-tar: write extended headers for far-future mtime

2016-06-23 Thread René Scharfe


Am 23.06.2016 um 21:22 schrieb Jeff King:

On Wed, Jun 22, 2016 at 07:46:25AM +0200, René Scharfe wrote:


Am 21.06.2016 um 00:54 schrieb René Scharfe:

Am 16.06.2016 um 06:37 schrieb Jeff King:

2. System tars that cannot handle pax headers.


In t5000 there is a simple interpreter for path headers for systems
whose tar doesn't handle them.  Adding one for mtime headers may be
feasible.

It's just a bit complicated to link a pax header file to the file it
applies to when it doesn't also contain a path header.  But we know that
the mtime of all entries in tar files created by git archive are is the
same, so we could simply read one header and then adjust the mtime of
all files accordingly.


This brings me to the idea of using a single global pax header for mtime
instead of one per entry.  It reduces the size of the archive and allows for
slightly easier testing -- it just fits better since we know that all our
mtimes are the same.


Yeah, I had a similar thought while writing it, but wasn't quite sure
how that was supposed to be formatted. I modeled my output after what
GNU tar writes, but of course they are expecting a different mtime for
each file.

I'll look into how global pax headers should look.


Moving the strbuf_append_ext_header() call into 
write_global_extended_header() should be enough.  The changes to the 
test script are a bit more interesting, though.  Perhaps I can come up 
with something over the weekend.


René
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] am: ignore return value of write_file()

2016-07-07 Thread René Scharfe

write_file() either returns 0 or dies, so there is no point in checking
its return value.  The callers of the wrappers write_state_text(),
write_state_count() and write_state_bool() consequently already ignore
their return values.  Stop pretenting we care and make them void.

Signed-off-by: Rene Scharfe 
---
 builtin/am.c | 18 +-
 1 file changed, 9 insertions(+), 9 deletions(-)

diff --git a/builtin/am.c b/builtin/am.c
index d5da5fe..339e360 100644
--- a/builtin/am.c
+++ b/builtin/am.c
@@ -184,22 +184,22 @@ static inline const char *am_path(const struct am_state 
*state, const char *path
 /**
  * For convenience to call write_file()
  */
-static int write_state_text(const struct am_state *state,
-   const char *name, const char *string)
+static void write_state_text(const struct am_state *state,
+const char *name, const char *string)
 {
-   return write_file(am_path(state, name), "%s", string);
+   write_file(am_path(state, name), "%s", string);
 }
 
-static int write_state_count(const struct am_state *state,
-const char *name, int value)
+static void write_state_count(const struct am_state *state,
+ const char *name, int value)
 {
-   return write_file(am_path(state, name), "%d", value);
+   write_file(am_path(state, name), "%d", value);
 }
 
-static int write_state_bool(const struct am_state *state,
-   const char *name, int value)
+static void write_state_bool(const struct am_state *state,
+const char *name, int value)
 {
-   return write_state_text(state, name, value ? "t" : "f");
+   write_state_text(state, name, value ? "t" : "f");
 }
 
 /**
-- 
2.9.0

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] notes-merge: use O_EXCL to avoid overwriting existing files

2016-07-07 Thread René Scharfe

Use the open(2) flag O_EXCL to ensure the file doesn't already exist
instead of (racily) calling stat(2) through file_exists().  While at it
switch to xopen() to reduce code duplication and get more consistent
error messages.

Signed-off-by: Rene Scharfe 
---
 notes-merge.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/notes-merge.c b/notes-merge.c
index b7814c9..2b29fc4 100644
--- a/notes-merge.c
+++ b/notes-merge.c
@@ -298,12 +298,8 @@ static void write_buf_to_worktree(const unsigned char *obj,
char *path = git_pathdup(NOTES_MERGE_WORKTREE "/%s", sha1_to_hex(obj));
if (safe_create_leading_directories_const(path))
die_errno("unable to create directory for '%s'", path);
-   if (file_exists(path))
-   die("found existing file at '%s'", path);
 
-   fd = open(path, O_WRONLY | O_TRUNC | O_CREAT, 0666);
-   if (fd < 0)
-   die_errno("failed to open '%s'", path);
+   fd = xopen(path, O_WRONLY | O_EXCL | O_CREAT, 0666);
 
while (size > 0) {
long ret = write_in_full(fd, buf, size);
-- 
2.9.0

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] .gitattributes: set file type for C files

2016-07-07 Thread René Scharfe

Set the diff attribute for C source file to "cpp" in order to improve
git's ability to determine hunk headers.  In particular it helps avoid
showing unindented labels in hunk headers.  That in turn is useful for
git diff -W and git grep -W, which show whole functions now instead of
stopping at a label.

Signed-off-by: Rene Scharfe 
---
 .gitattributes | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.gitattributes b/.gitattributes
index 5e98806..320e33c 100644
--- a/.gitattributes
+++ b/.gitattributes
@@ -1,3 +1,3 @@
 * whitespace=!indent,trail,space
-*.[ch] whitespace=indent,trail,space
+*.[ch] whitespace=indent,trail,space diff=cpp
 *.sh whitespace=indent,trail,space
-- 
2.9.0

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] am: ignore return value of write_file()

2016-07-08 Thread René Scharfe


Hi Dscho,

Am 08.07.2016 um 08:33 schrieb Johannes Schindelin:

On Thu, 7 Jul 2016, René Scharfe wrote:

write_file() either returns 0 or dies, so there is no point in checking
its return value.


The question is whether it makes sense for write_file() to die(). It is a
library function and not every caller can be happy with that function to
exit the program when some file could not be written, without a chance to
tell the user what to do about the situation.

If write_file() was defined in builtin/am.c, as a static function, I would
grudgingly acquiesce, but it is not.

IMO it would be better to fix write_file() to *not* die() but return
error() instead.


there is write_file_gently() for that purpose, but it's used only by a 
single caller that exits on failure after all, and in fact Peff's series 
drops it.


So I think write_file() is fine, and it's rather a question of whether 
am should use write_file_gently() instead.  I don't see why, but perhaps 
that's because it's Friday..


René
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 0/8] write_file cleanups

2016-07-08 Thread René Scharfe


Am 08.07.2016 um 11:04 schrieb Jeff King:

Here it is. There actually weren't that many spots to clean up, as quite
a few of them have a "twist" where they want to do something clever,
like open the file and feed the descriptor to a sub-function, or open
with funny things like O_EXCL.

But still, the diffstat is pleasing:

  builtin/am.c | 25 +++--
  builtin/branch.c |  5 +---
  builtin/config.c |  2 +-
  builtin/merge.c  | 45 --
  cache.h  | 17 ++--
  wrapper.c| 52 ---
  6 files changed, 44 insertions(+), 102 deletions(-)

and that even includes adding some function documentation.


Haha, feels like Candy Crush. :)

(Sent a single small patch, lots of places get patched as if by magic.)

Thanks,
René
--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] rm: reuse strbuf for all remove_dir_recursively() calls

2016-07-09 Thread René Scharfe

Don't throw the memory allocated for remove_dir_recursively() away after
a single call, use it for the other entries as well instead.

Signed-off-by: Rene Scharfe 
---
 builtin/rm.c | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/builtin/rm.c b/builtin/rm.c
index 8abb020..b2fee3e 100644
--- a/builtin/rm.c
+++ b/builtin/rm.c
@@ -387,6 +387,7 @@ int cmd_rm(int argc, const char **argv, const char *prefix)
 */
if (!index_only) {
int removed = 0, gitmodules_modified = 0;
+   struct strbuf buf = STRBUF_INIT;
for (i = 0; i < list.nr; i++) {
const char *path = list.entry[i].name;
if (list.entry[i].is_submodule) {
@@ -398,7 +399,7 @@ int cmd_rm(int argc, const char **argv, const char *prefix)
continue;
}
} else {
-   struct strbuf buf = STRBUF_INIT;
+   strbuf_reset(&buf);
strbuf_addstr(&buf, path);
if (!remove_dir_recursively(&buf, 0)) {
removed = 1;
@@ -410,7 +411,6 @@ int cmd_rm(int argc, const char **argv, const char *prefix)
/* Submodule was removed by 
user */
if 
(!remove_path_from_gitmodules(path))
gitmodules_modified = 1;
-   strbuf_release(&buf);
/* Fallthrough and let remove_path() 
fail. */
}
}
@@ -421,6 +421,7 @@ int cmd_rm(int argc, const char **argv, const char *prefix)
if (!removed)
die_errno("git rm: '%s'", path);
}
+   strbuf_release(&buf);
if (gitmodules_modified)
stage_updated_gitmodules();
}
-- 
2.9.0

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] worktree: use strbuf_add_absolute_path() directly

2016-07-09 Thread René Scharfe

absolute_path() is a wrapper for strbuf_add_absolute_path().  Call the
latter directly for adding absolute paths to a strbuf.  That's shorter
and avoids an extra string copy.

Signed-off-by: Rene Scharfe 
---
 worktree.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/worktree.c b/worktree.c
index e2a94e0..b819baf 100644
--- a/worktree.c
+++ b/worktree.c
@@ -80,7 +80,7 @@ static struct worktree *get_main_worktree(void)
int is_bare = 0;
int is_detached = 0;
 
-   strbuf_addstr(&worktree_path, absolute_path(get_git_common_dir()));
+   strbuf_add_absolute_path(&worktree_path, get_git_common_dir());
is_bare = !strbuf_strip_suffix(&worktree_path, "/.git");
if (is_bare)
strbuf_strip_suffix(&worktree_path, "/.");
@@ -125,7 +125,7 @@ static struct worktree *get_linked_worktree(const char *id)
strbuf_rtrim(&worktree_path);
if (!strbuf_strip_suffix(&worktree_path, "/.git")) {
strbuf_reset(&worktree_path);
-   strbuf_addstr(&worktree_path, absolute_path("."));
+   strbuf_add_absolute_path(&worktree_path, ".");
strbuf_strip_suffix(&worktree_path, "/.");
}
 
-- 
2.9.0

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Question: Getting 'git diff' to generate /usr/bin/diff output

2016-07-17 Thread René Scharfe


Am 16.07.2016 um 21:12 schrieb n...@dad.org:

I am trying to learn how to use git, and am having difficulty using 'git diff'.

I can't deal with its output very well.


The other replies covered how to use the system's own diff instead. 
Just curious: What makes using git diff difficult and its output hard to 
deal with for you?


Thanks,
René

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] use strbuf_addbuf() for appending a strbuf to another

2016-07-19 Thread René Scharfe

Use strbuf_addbuf() where possible; it's shorter and more efficient.

Signed-off-by: Rene Scharfe 
---
 dir.c   | 2 +-
 path.c  | 2 +-
 wt-status.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/dir.c b/dir.c
index 6172b34..0ea235f 100644
--- a/dir.c
+++ b/dir.c
@@ -2364,7 +2364,7 @@ void write_untracked_extension(struct strbuf *out, struct 
untracked_cache *untra
 
varint_len = encode_varint(untracked->ident.len, varbuf);
strbuf_add(out, varbuf, varint_len);
-   strbuf_add(out, untracked->ident.buf, untracked->ident.len);
+   strbuf_addbuf(out, &untracked->ident);
 
strbuf_add(out, ouc, ouc_size(len));
free(ouc);
diff --git a/path.c b/path.c
index 259aeed..17551c4 100644
--- a/path.c
+++ b/path.c
@@ -483,7 +483,7 @@ static void do_submodule_path(struct strbuf *buf, const 
char *path,
strbuf_addstr(buf, git_dir);
}
strbuf_addch(buf, '/');
-   strbuf_addstr(&git_submodule_dir, buf->buf);
+   strbuf_addbuf(&git_submodule_dir, buf);
 
strbuf_vaddf(buf, fmt, args);
 
diff --git a/wt-status.c b/wt-status.c
index c19b52c..dbdb543 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -1062,7 +1062,7 @@ static void abbrev_sha1_in_line(struct strbuf *line)
strbuf_addf(split[1], "%s ", abbrev);
strbuf_reset(line);
for (i = 0; split[i]; i++)
-   strbuf_addf(line, "%s", split[i]->buf);
+   strbuf_addbuf(line, split[i]);
}
}
strbuf_list_free(split);
-- 
2.9.2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] submodule-config: use explicit empty string instead of strbuf in config_from()

2016-07-19 Thread René Scharfe

Use a string constant instead of an empty strbuf to shorten the code
and make it easier to read.

Signed-off-by: Rene Scharfe 
---
... unless someone can come up with a suitable non-empty string to feed
to git_config_from_mem() as its name parameter.

 submodule-config.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/submodule-config.c b/submodule-config.c
index db1847f..44eb162 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -397,7 +397,6 @@ static const struct submodule *config_from(struct 
submodule_cache *cache,
const unsigned char *commit_sha1, const char *key,
enum lookup_type lookup_type)
 {
-   struct strbuf rev = STRBUF_INIT;
unsigned long config_size;
char *config;
unsigned char sha1[20];
@@ -448,7 +447,7 @@ static const struct submodule *config_from(struct 
submodule_cache *cache,
parameter.commit_sha1 = commit_sha1;
parameter.gitmodules_sha1 = sha1;
parameter.overwrite = 0;
-   git_config_from_mem(parse_config, "submodule-blob", rev.buf,
+   git_config_from_mem(parse_config, "submodule-blob", "",
config, config_size, ¶meter);
free(config);
 
-- 
2.9.2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] use strbuf_addbuf() for appending a strbuf to another

2016-07-21 Thread René Scharfe

Am 20.07.2016 um 15:20 schrieb Jeff King:
> On Tue, Jul 19, 2016 at 08:36:29PM +0200, René Scharfe wrote:
> 
>> Use strbuf_addbuf() where possible; it's shorter and more efficient.
> 
> After seeing "efficient", I was momentarily surprised by the first hunk:
> 
>> diff --git a/dir.c b/dir.c
>> index 6172b34..0ea235f 100644
>> --- a/dir.c
>> +++ b/dir.c
>> @@ -2364,7 +2364,7 @@ void write_untracked_extension(struct strbuf *out, 
>> struct untracked_cache *untra
>>   
>>  varint_len = encode_varint(untracked->ident.len, varbuf);
>>  strbuf_add(out, varbuf, varint_len);
>> -strbuf_add(out, untracked->ident.buf, untracked->ident.len);
>> +strbuf_addbuf(out, &untracked->ident);
> 
> This is actually slightly _less_ efficient, because we already are using
> the precomputed len, and the new code will call an extra strbuf_grow()
> to cover the case where the two arguments are the same.  See 81d2cae
> (strbuf_addbuf(): allow passing the same buf to dst and src,
> 2010-01-12).

Ah, I wasn't aware of that.  Calling strbuf_grow() twice shouldn't be
thaaat bad.  However, I wonder where we duplicate strbufs, or why we
would ever want to do so.  Anyway, here's a patch for that:

-- >8 --
Subject: strbuf: avoid calling strbuf_grow() twice in strbuf_addbuf()

Implement strbuf_addbuf() as a normal function in order to avoid calling
strbuf_grow() twice, with the second callinside strbud_add() being a
no-op.  This is slightly faster and also reduces the text size a bit.

Signed-off-by: Rene Scharfe 
---
 strbuf.c | 7 +++
 strbuf.h | 6 +-
 2 files changed, 8 insertions(+), 5 deletions(-)

diff --git a/strbuf.c b/strbuf.c
index 1ba600b..f3bd571 100644
--- a/strbuf.c
+++ b/strbuf.c
@@ -197,6 +197,13 @@ void strbuf_add(struct strbuf *sb, const void *data, 
size_t len)
strbuf_setlen(sb, sb->len + len);
 }
 
+void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2)
+{
+   strbuf_grow(sb, sb2->len);
+   memcpy(sb->buf + sb->len, sb2->buf, sb2->len);
+   strbuf_setlen(sb, sb->len + sb2->len);
+}
+
 void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len)
 {
strbuf_grow(sb, len);
diff --git a/strbuf.h b/strbuf.h
index 83c5c98..ba8d5f1 100644
--- a/strbuf.h
+++ b/strbuf.h
@@ -263,11 +263,7 @@ static inline void strbuf_addstr(struct strbuf *sb, const 
char *s)
 /**
  * Copy the contents of another buffer at the end of the current one.
  */
-static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2)
-{
-   strbuf_grow(sb, sb2->len);
-   strbuf_add(sb, sb2->buf, sb2->len);
-}
+extern void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2);
 
 /**
  * Copy part of the buffer from a given position till a given length to the
-- 
2.9.2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH] submodule-config: use explicit empty string instead of strbuf in config_from()

2016-07-21 Thread René Scharfe


Am 20.07.2016 um 10:25 schrieb Heiko Voigt:

Hi,

On Tue, Jul 19, 2016 at 09:05:43PM +0200, René Scharfe wrote:

Use a string constant instead of an empty strbuf to shorten the code
and make it easier to read.


This must have been some oversight from my original code. I also can not
see any purpose.


Signed-off-by: Rene Scharfe 
---
... unless someone can come up with a suitable non-empty string to feed
to git_config_from_mem() as its name parameter.


If we would want to be absolutely correct we could use something like
"SHA1:.gitmodules". E.g. like we use to lookup the blob in
gitmodule_sha1_from_commit():

strbuf_addf(&rev, "%s:.gitmodules", sha1_to_hex(commit_sha1));

And now I see where this was leftover from... before extracting this
function this code was filling the strbuf.

How about this instead?


I like it.


---8<--
Subject: [PATCH] fix passing a name for config from submodules

In commit 959b5455 we implemented the initial version of the submodule
config cache. During development of that initial version we extracted
the function gitmodule_sha1_from_commit(). During that process we missed
that the strbuf rev was still used in config_from() and now is left
empty. Lets fix this by also returning this string.

Signed-off-by: Heiko Voigt 
---

Its not exactly pretty with all the releases before the returns but
this is what I could quickly come up with...


Indeed.


  submodule-config.c | 23 +++
  1 file changed, 15 insertions(+), 8 deletions(-)

diff --git a/submodule-config.c b/submodule-config.c
index 077db40..dccea59 100644
--- a/submodule-config.c
+++ b/submodule-config.c
@@ -371,9 +371,9 @@ static int parse_config(const char *var, const char *value, 
void *data)
  }

  static int gitmodule_sha1_from_commit(const unsigned char *commit_sha1,
- unsigned char *gitmodules_sha1)
+ unsigned char *gitmodules_sha1,
+ struct strbuf *rev)
  {
-   struct strbuf rev = STRBUF_INIT;
int ret = 0;

if (is_null_sha1(commit_sha1)) {
@@ -381,11 +381,10 @@ static int gitmodule_sha1_from_commit(const unsigned char 
*commit_sha1,
return 1;
}

-   strbuf_addf(&rev, "%s:.gitmodules", sha1_to_hex(commit_sha1));
-   if (get_sha1(rev.buf, gitmodules_sha1) >= 0)
+   strbuf_addf(rev, "%s:.gitmodules", sha1_to_hex(commit_sha1));
+   if (get_sha1(rev->buf, gitmodules_sha1) >= 0)
ret = 1;

-   strbuf_release(&rev);
return ret;
  }

@@ -420,8 +419,10 @@ static const struct submodule *config_from(struct 
submodule_cache *cache,
return entry->config;
}

-   if (!gitmodule_sha1_from_commit(commit_sha1, sha1))
+   if (!gitmodule_sha1_from_commit(commit_sha1, sha1, &rev)) {
+   strbuf_release(&rev);
return NULL;
+   }

switch (lookup_type) {
case lookup_name:
@@ -431,14 +432,19 @@ static const struct submodule *config_from(struct 
submodule_cache *cache,
submodule = cache_lookup_path(cache, sha1, key);
break;
}
-   if (submodule)
+   if (submodule) {
+   strbuf_release(&rev);
return submodule;
+   }

config = read_sha1_file(sha1, &type, &config_size);
-   if (!config)
+   if (!config) {
+   strbuf_release(&rev);
return NULL;
+   }

if (type != OBJ_BLOB) {
+   strbuf_release(&rev);
free(config);
return NULL;
}


A separate patch could combine the previous two conditionals; free(NULL) 
is allowed.



@@ -450,6 +456,7 @@ static const struct submodule *config_from(struct 
submodule_cache *cache,
parameter.overwrite = 0;
git_config_from_mem(parse_config, "submodule-blob", rev.buf,
config, config_size, ¶meter);
+   strbuf_release(&rev);
free(config);

switch (lookup_type) {



--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] use strbuf_addstr() for adding constant strings to a strbuf

2016-07-30 Thread René Scharfe

Replace uses of strbuf_addf() for adding strings with more lightweight
strbuf_addstr() calls.

In http-push.c it becomes easier to see what's going on without having
to verfiy that the definition of PROPFIND_ALL_REQUEST doesn't contain
any format specifiers.

Signed-off-by: Rene Scharfe 
---
 builtin/rev-parse.c | 2 +-
 http-push.c | 2 +-
 send-pack.c | 2 +-
 t/helper/test-run-command.c | 6 +++---
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c
index c961b74..76cf05e 100644
--- a/builtin/rev-parse.c
+++ b/builtin/rev-parse.c
@@ -469,7 +469,7 @@ static int cmd_parseopt(int argc, const char **argv, const 
char *prefix)
(stop_at_non_option ? PARSE_OPT_STOP_AT_NON_OPTION : 0) 
|
PARSE_OPT_SHELL_EVAL);
 
-   strbuf_addf(&parsed, " --");
+   strbuf_addstr(&parsed, " --");
sq_quote_argv(&parsed, argv, 0);
puts(parsed.buf);
return 0;
diff --git a/http-push.c b/http-push.c
index a092f02..d0b29ac 100644
--- a/http-push.c
+++ b/http-push.c
@@ -1137,7 +1137,7 @@ static void remote_ls(const char *path, int flags,
ls.userData = userData;
ls.userFunc = userFunc;
 
-   strbuf_addf(&out_buffer.buf, PROPFIND_ALL_REQUEST);
+   strbuf_addstr(&out_buffer.buf, PROPFIND_ALL_REQUEST);
 
dav_headers = curl_slist_append(dav_headers, "Depth: 1");
dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml");
diff --git a/send-pack.c b/send-pack.c
index 299d303..1f85c56 100644
--- a/send-pack.c
+++ b/send-pack.c
@@ -265,7 +265,7 @@ static int generate_push_cert(struct strbuf *req_buf,
struct strbuf cert = STRBUF_INIT;
int update_seen = 0;
 
-   strbuf_addf(&cert, "certificate version 0.1\n");
+   strbuf_addstr(&cert, "certificate version 0.1\n");
strbuf_addf(&cert, "pusher %s ", signing_key);
datestamp(&cert);
strbuf_addch(&cert, '\n');
diff --git a/t/helper/test-run-command.c b/t/helper/test-run-command.c
index 30a64a9..6bb53da 100644
--- a/t/helper/test-run-command.c
+++ b/t/helper/test-run-command.c
@@ -26,7 +26,7 @@ static int parallel_next(struct child_process *cp,
return 0;
 
argv_array_pushv(&cp->args, d->argv);
-   strbuf_addf(err, "preloaded output of a child\n");
+   strbuf_addstr(err, "preloaded output of a child\n");
number_callbacks++;
return 1;
 }
@@ -36,7 +36,7 @@ static int no_job(struct child_process *cp,
  void *cb,
  void **task_cb)
 {
-   strbuf_addf(err, "no further jobs available\n");
+   strbuf_addstr(err, "no further jobs available\n");
return 0;
 }
 
@@ -45,7 +45,7 @@ static int task_finished(int result,
 void *pp_cb,
 void *pp_task_cb)
 {
-   strbuf_addf(err, "asking for a quick stop\n");
+   strbuf_addstr(err, "asking for a quick stop\n");
return 1;
 }
 
-- 
2.9.2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] pass constants as first argument to st_mult()

2016-07-30 Thread René Scharfe

The result of st_mult() is the same no matter the order of its
arguments.  It invokes the macro unsigned_mult_overflows(), which
divides the second parameter by the first one.  Pass constants
first to allow that division to be done already at compile time.

Signed-off-by: Rene Scharfe 
---
 diffcore-rename.c | 2 +-
 refs.c| 2 +-
 shallow.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/diffcore-rename.c b/diffcore-rename.c
index 58ac0a5..73d003a 100644
--- a/diffcore-rename.c
+++ b/diffcore-rename.c
@@ -541,7 +541,7 @@ void diffcore_rename(struct diff_options *options)
rename_dst_nr * rename_src_nr, 50, 1);
}
 
-   mx = xcalloc(st_mult(num_create, NUM_CANDIDATE_PER_DST), sizeof(*mx));
+   mx = xcalloc(st_mult(NUM_CANDIDATE_PER_DST, num_create), sizeof(*mx));
for (dst_cnt = i = 0; i < rename_dst_nr; i++) {
struct diff_filespec *two = rename_dst[i].two;
struct diff_score *m;
diff --git a/refs.c b/refs.c
index 814cad3..b4e7cac 100644
--- a/refs.c
+++ b/refs.c
@@ -922,7 +922,7 @@ char *shorten_unambiguous_ref(const char *refname, int 
strict)
/* -2 for strlen("%.*s") - strlen("%s"); +1 for NUL */
total_len += strlen(ref_rev_parse_rules[nr_rules]) - 2 
+ 1;
 
-   scanf_fmts = xmalloc(st_add(st_mult(nr_rules, sizeof(char *)), 
total_len));
+   scanf_fmts = xmalloc(st_add(st_mult(sizeof(char *), nr_rules), 
total_len));
 
offset = 0;
for (i = 0; i < nr_rules; i++) {
diff --git a/shallow.c b/shallow.c
index 4d554ca..54e2db7 100644
--- a/shallow.c
+++ b/shallow.c
@@ -389,7 +389,7 @@ static void paint_down(struct paint_info *info, const 
unsigned char *sha1,
unsigned int i, nr;
struct commit_list *head = NULL;
int bitmap_nr = (info->nr_bits + 31) / 32;
-   size_t bitmap_size = st_mult(bitmap_nr, sizeof(uint32_t));
+   size_t bitmap_size = st_mult(sizeof(uint32_t), bitmap_nr);
uint32_t *tmp = xmalloc(bitmap_size); /* to be freed before return */
uint32_t *bitmap = paint_alloc(info);
struct commit *c = lookup_commit_reference_gently(sha1, 1);
-- 
2.9.2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] nedmalloc: work around overzealous GCC 6 warning

2016-08-04 Thread René Scharfe


Am 04.08.2016 um 18:07 schrieb Johannes Schindelin:

With GCC 6, the strdup() function is declared with the "nonnull"
attribute, stating that it is not allowed to pass a NULL value as
parameter.

In nedmalloc()'s reimplementation of strdup(), Postel's Law is heeded
and NULL parameters are handled gracefully. GCC 6 complains about that
now because it thinks that NULL cannot be passed to strdup() anyway.

Let's just shut up GCC >= 6 in that case and go on with our lives.


This version of strdup() is only compiled if nedmalloc is used instead
of the system allocator.  That means we can't rely on strdup() being
able to take NULL -- some (most?) platforms won't like it.  Removing
the NULL check would be a more general and overall easier way out, no?

But it should check the result of malloc() before copying.
---
 compat/nedmalloc/nedmalloc.c | 8 +++-
 1 file changed, 3 insertions(+), 5 deletions(-)

diff --git a/compat/nedmalloc/nedmalloc.c b/compat/nedmalloc/nedmalloc.c
index a0a16eb..cc18f0c 100644
--- a/compat/nedmalloc/nedmalloc.c
+++ b/compat/nedmalloc/nedmalloc.c
@@ -955,12 +955,10 @@ void **nedpindependent_comalloc(nedpool *p, size_t 
elems, size_t *sizes, void **

  */
 char *strdup(const char *s1)
 {
-   char *s2 = 0;
-   if (s1) {
-   size_t len = strlen(s1) + 1;
-   s2 = malloc(len);
+   size_t len = strlen(s1) + 1;
+   char *s2 = malloc(len);
+   if (s2)
memcpy(s2, s1, len);
-   }
return s2;
 }
 #endif
--
2.9.2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] use strbuf_addstr() instead of strbuf_addf() with "%s"

2016-08-05 Thread René Scharfe

Call strbuf_addstr() for adding a simple string to a strbuf instead of
using the heavier strbuf_addf().  This is shorter and documents the
intent more clearly.

Signed-off-by: Rene Scharfe 
---
 builtin/fmt-merge-msg.c | 2 +-
 http.c  | 2 +-
 sequencer.c | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c
index e5658c3..ac84e99 100644
--- a/builtin/fmt-merge-msg.c
+++ b/builtin/fmt-merge-msg.c
@@ -272,7 +272,7 @@ static int cmp_string_list_util_as_integral(const void *a_, 
const void *b_)
 static void add_people_count(struct strbuf *out, struct string_list *people)
 {
if (people->nr == 1)
-   strbuf_addf(out, "%s", people->items[0].string);
+   strbuf_addstr(out, people->items[0].string);
else if (people->nr == 2)
strbuf_addf(out, "%s (%d) and %s (%d)",
people->items[0].string,
diff --git a/http.c b/http.c
index e81dd13..cd40b01 100644
--- a/http.c
+++ b/http.c
@@ -1225,7 +1225,7 @@ void append_remote_object_url(struct strbuf *buf, const 
char *url,
 
strbuf_addf(buf, "objects/%.*s/", 2, hex);
if (!only_two_digit_prefix)
-   strbuf_addf(buf, "%s", hex+2);
+   strbuf_addstr(buf, hex + 2);
 }
 
 char *get_remote_object_url(const char *url, const char *hex,
diff --git a/sequencer.c b/sequencer.c
index cdfac82..7b1eb14 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -112,7 +112,7 @@ static void remove_sequencer_state(void)
 {
struct strbuf seq_dir = STRBUF_INIT;
 
-   strbuf_addf(&seq_dir, "%s", git_path(SEQ_DIR));
+   strbuf_addstr(&seq_dir, git_path(SEQ_DIR));
remove_dir_recursively(&seq_dir, 0);
strbuf_release(&seq_dir);
 }
-- 
2.9.2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] use CHILD_PROCESS_INIT to initialize automatic variables

2016-08-05 Thread René Scharfe

Initialize struct child_process variables already when they're defined.
That's shorter and saves a function call.

Signed-off-by: Rene Scharfe 
---
 builtin/submodule--helper.c | 3 +--
 builtin/worktree.c  | 6 ++
 2 files changed, 3 insertions(+), 6 deletions(-)

diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index b22352b..957f42a 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -444,8 +444,7 @@ static int module_name(int argc, const char **argv, const 
char *prefix)
 static int clone_submodule(const char *path, const char *gitdir, const char 
*url,
   const char *depth, const char *reference, int quiet)
 {
-   struct child_process cp;
-   child_process_init(&cp);
+   struct child_process cp = CHILD_PROCESS_INIT;
 
argv_array_push(&cp.args, "clone");
argv_array_push(&cp.args, "--no-checkout");
diff --git a/builtin/worktree.c b/builtin/worktree.c
index 5a41788..6dcf7bd 100644
--- a/builtin/worktree.c
+++ b/builtin/worktree.c
@@ -194,7 +194,7 @@ static int add_worktree(const char *path, const char 
*refname,
struct strbuf sb = STRBUF_INIT;
const char *name;
struct stat st;
-   struct child_process cp;
+   struct child_process cp = CHILD_PROCESS_INIT;
struct argv_array child_env = ARGV_ARRAY_INIT;
int counter = 0, len, ret;
struct strbuf symref = STRBUF_INIT;
@@ -273,7 +273,6 @@ static int add_worktree(const char *path, const char 
*refname,
 
argv_array_pushf(&child_env, "%s=%s", GIT_DIR_ENVIRONMENT, sb_git.buf);
argv_array_pushf(&child_env, "%s=%s", GIT_WORK_TREE_ENVIRONMENT, path);
-   memset(&cp, 0, sizeof(cp));
cp.git_cmd = 1;
 
if (commit)
@@ -365,8 +364,7 @@ static int add(int ac, const char **av, const char *prefix)
}
 
if (opts.new_branch) {
-   struct child_process cp;
-   memset(&cp, 0, sizeof(cp));
+   struct child_process cp = CHILD_PROCESS_INIT;
cp.git_cmd = 1;
argv_array_push(&cp.args, "branch");
if (opts.force_new_branch)
-- 
2.9.2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] merge-recursive: use STRING_LIST_INIT_NODUP

2016-08-05 Thread René Scharfe

Initialize a string_list right when it's defined.  That's shorter, saves
a function call and makes it more obvious that we're using the NODUP
variant here.

Signed-off-by: Rene Scharfe 
---
 merge-recursive.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/merge-recursive.c b/merge-recursive.c
index a4a1195..97e6737 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -409,7 +409,7 @@ static void record_df_conflict_files(struct merge_options 
*o,
 * and the file need to be present, then the D/F file will be
 * reinstated with a new unique name at the time it is processed.
 */
-   struct string_list df_sorted_entries;
+   struct string_list df_sorted_entries = STRING_LIST_INIT_NODUP;
const char *last_file = NULL;
int last_len = 0;
int i;
@@ -422,7 +422,6 @@ static void record_df_conflict_files(struct merge_options 
*o,
return;
 
/* Ensure D/F conflicts are adjacent in the entries list. */
-   memset(&df_sorted_entries, 0, sizeof(struct string_list));
for (i = 0; i < entries->nr; i++) {
struct string_list_item *next = &entries->items[i];
string_list_append(&df_sorted_entries, next->string)->util =
-- 
2.9.2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] merge: use string_list_split() in add_strategies()

2016-08-05 Thread René Scharfe

Call string_list_split() for cutting a space separated list into pieces
instead of reimplementing it based on struct strategy.  The attr member
of struct strategy was not used split_merge_strategies(); it was a pure
string operation.  Also be nice and clean up once we're done splitting;
the old code didn't bother freeing any of the allocated memory.

Signed-off-by: Rene Scharfe 
---
 builtin/merge.c | 44 ++--
 1 file changed, 10 insertions(+), 34 deletions(-)

diff --git a/builtin/merge.c b/builtin/merge.c
index 19b3bc2..a95a801 100644
--- a/builtin/merge.c
+++ b/builtin/merge.c
@@ -30,6 +30,7 @@
 #include "fmt-merge-msg.h"
 #include "gpg-interface.h"
 #include "sequencer.h"
+#include "string-list.h"
 
 #define DEFAULT_TWOHEAD (1<<0)
 #define DEFAULT_OCTOPUS (1<<1)
@@ -703,42 +704,17 @@ static int count_unmerged_entries(void)
return ret;
 }
 
-static void split_merge_strategies(const char *string, struct strategy **list,
-  int *nr, int *alloc)
-{
-   char *p, *q, *buf;
-
-   if (!string)
-   return;
-
-   buf = xstrdup(string);
-   q = buf;
-   for (;;) {
-   p = strchr(q, ' ');
-   if (!p) {
-   ALLOC_GROW(*list, *nr + 1, *alloc);
-   (*list)[(*nr)++].name = xstrdup(q);
-   free(buf);
-   return;
-   } else {
-   *p = '\0';
-   ALLOC_GROW(*list, *nr + 1, *alloc);
-   (*list)[(*nr)++].name = xstrdup(q);
-   q = ++p;
-   }
-   }
-}
-
 static void add_strategies(const char *string, unsigned attr)
 {
-   struct strategy *list = NULL;
-   int list_alloc = 0, list_nr = 0, i;
-
-   memset(&list, 0, sizeof(list));
-   split_merge_strategies(string, &list, &list_nr, &list_alloc);
-   if (list) {
-   for (i = 0; i < list_nr; i++)
-   append_strategy(get_strategy(list[i].name));
+   int i;
+
+   if (string) {
+   struct string_list list = STRING_LIST_INIT_DUP;
+   struct string_list_item *item;
+   string_list_split(&list, string, ' ', -1);
+   for_each_string_list_item(item, &list)
+   append_strategy(get_strategy(item->string));
+   string_list_clear(&list, 0);
return;
}
for (i = 0; i < ARRAY_SIZE(all_strategy); i++)
-- 
2.9.2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: [PATCH 2/2] nedmalloc: work around overzealous GCC 6 warning

2016-08-06 Thread René Scharfe


Am 05.08.2016 um 23:57 schrieb Junio C Hamano:

Johannes Schindelin  writes:


Hi Junio & René,

On Thu, 4 Aug 2016, Junio C Hamano wrote:


Let's try it this way.  How about this as a replacement?


I like it (with the if (s2) test intead of if (s1), of course). But please
record René as author, maybe mentioning myself with a "Diagnosed-by:"
line.


Hmph.  I cannot do that unilaterally without waiting for René to
respond, though.  In any case, with only header and footer changes,
here is what will appear in 'pu'.


It's fine with me, thanks. :)  Minor comments below.


Thanks.

-- >8 --
From: René Scharfe 
Date: Thu, 4 Aug 2016 23:56:54 +0200
Subject: [PATCH] nedmalloc: work around overzealous GCC 6 warning

With GCC 6, the strdup() function is declared with the "nonnull"
attribute, stating that it is not allowed to pass a NULL value as
parameter.

In nedmalloc()'s reimplementation of strdup(), Postel's Law is heeded
and NULL parameters are handled gracefully. GCC 6 complains about that
now because it thinks that NULL cannot be passed to strdup() anyway.

Because the callers in this project of strdup() must be prepared to
call any implementation of strdup() supplied by the platform, so it
is pointless to pretend that it is OK to call it with NULL.

Remove the conditional based on NULL-ness of the input; this
squelches the warning.


My commit message would have been much shorter, probably too short.  But 
perhaps add here: "Check the return value of malloc() instead to make 
sure we actually got memory to write to."



See https://gcc.gnu.org/gcc-6/porting_to.html for details.

Diagnosed-by: Johannes Schindelin 
Signed-off-by: René Scharfe 
Signed-off-by: Junio C Hamano 
---
  compat/nedmalloc/nedmalloc.c | 9 -
  1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/compat/nedmalloc/nedmalloc.c b/compat/nedmalloc/nedmalloc.c
index 677d1b2..2d4ef59 100644
--- a/compat/nedmalloc/nedmalloc.c
+++ b/compat/nedmalloc/nedmalloc.c
@@ -955,12 +955,11 @@ void **nedpindependent_comalloc(nedpool *p, size_t elems, 
size_t *sizes, void **
   */
  char *strdup(const char *s1)
  {
-   char *s2 = 0;
-   if (s1) {
-   size_t len = strlen(s1) + 1;
-   s2 = malloc(len);
+   size_t len = strlen(s1) + 1;
+   char *s2 = malloc(len);
+


You added this blank line.  OK.


+   if (s2)
memcpy(s2, s1, len);
-   }
return s2;
  }
  #endif


--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] archive-tar: make write_extended_header() void

2016-08-06 Thread René Scharfe

The function write_extended_header() only ever returns 0.  Simplify
it and its caller by dropping its return value, like we did with
write_global_extended_header() earlier.

Signed-off-by: Rene Scharfe 
---
 archive-tar.c | 15 +--
 1 file changed, 5 insertions(+), 10 deletions(-)

diff --git a/archive-tar.c b/archive-tar.c
index 5568240..380e3ae 100644
--- a/archive-tar.c
+++ b/archive-tar.c
@@ -213,9 +213,9 @@ static void prepare_header(struct archiver_args *args,
xsnprintf(header->chksum, sizeof(header->chksum), "%07o", 
ustar_header_chksum(header));
 }
 
-static int write_extended_header(struct archiver_args *args,
-const unsigned char *sha1,
-const void *buffer, unsigned long size)
+static void write_extended_header(struct archiver_args *args,
+ const unsigned char *sha1,
+ const void *buffer, unsigned long size)
 {
struct ustar_header header;
unsigned int mode;
@@ -226,7 +226,6 @@ static int write_extended_header(struct archiver_args *args,
prepare_header(args, &header, mode, size);
write_blocked(&header, sizeof(header));
write_blocked(buffer, size);
-   return 0;
 }
 
 static int write_tar_entry(struct archiver_args *args,
@@ -305,12 +304,8 @@ static int write_tar_entry(struct archiver_args *args,
prepare_header(args, &header, mode, size_in_header);
 
if (ext_header.len > 0) {
-   err = write_extended_header(args, sha1, ext_header.buf,
-   ext_header.len);
-   if (err) {
-   free(buffer);
-   return err;
-   }
+   write_extended_header(args, sha1, ext_header.buf,
+ ext_header.len);
}
strbuf_release(&ext_header);
write_blocked(&header, sizeof(header));
-- 
2.9.2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

[PATCH] use strbuf_add_unique_abbrev() for adding short hashes

2016-08-06 Thread René Scharfe

Call strbuf_add_unique_abbrev() to add abbreviated hashes to strbufs
instead of taking detours through find_unique_abbrev() and its static
buffer.  This is shorter and a bit more efficient.

Signed-off-by: Rene Scharfe 
---
 builtin/checkout.c |  3 +--
 pretty.c   | 13 ++---
 transport.c| 11 ---
 3 files changed, 11 insertions(+), 16 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index 27c1a05..0a7d24c 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -703,8 +703,7 @@ static int add_pending_uninteresting_ref(const char 
*refname,
 static void describe_one_orphan(struct strbuf *sb, struct commit *commit)
 {
strbuf_addstr(sb, "  ");
-   strbuf_addstr(sb,
-   find_unique_abbrev(commit->object.oid.hash, DEFAULT_ABBREV));
+   strbuf_add_unique_abbrev(sb, commit->object.oid.hash, DEFAULT_ABBREV);
strbuf_addch(sb, ' ');
if (!parse_commit(commit))
pp_commit_easy(CMIT_FMT_ONELINE, commit, sb);
diff --git a/pretty.c b/pretty.c
index 9fa42c2..9609afb 100644
--- a/pretty.c
+++ b/pretty.c
@@ -1143,8 +1143,8 @@ static size_t format_commit_one(struct strbuf *sb, /* in 
UTF-8 */
strbuf_addstr(sb, diff_get_color(c->auto_color, 
DIFF_RESET));
return 1;
}
-   strbuf_addstr(sb, find_unique_abbrev(commit->object.oid.hash,
-c->pretty_ctx->abbrev));
+   strbuf_add_unique_abbrev(sb, commit->object.oid.hash,
+c->pretty_ctx->abbrev);
strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET));
c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off;
return 1;
@@ -1154,8 +1154,8 @@ static size_t format_commit_one(struct strbuf *sb, /* in 
UTF-8 */
case 't':   /* abbreviated tree hash */
if (add_again(sb, &c->abbrev_tree_hash))
return 1;
-   strbuf_addstr(sb, 
find_unique_abbrev(commit->tree->object.oid.hash,
-c->pretty_ctx->abbrev));
+   strbuf_add_unique_abbrev(sb, commit->tree->object.oid.hash,
+c->pretty_ctx->abbrev);
c->abbrev_tree_hash.len = sb->len - c->abbrev_tree_hash.off;
return 1;
case 'P':   /* parent hashes */
@@ -1171,9 +1171,8 @@ static size_t format_commit_one(struct strbuf *sb, /* in 
UTF-8 */
for (p = commit->parents; p; p = p->next) {
if (p != commit->parents)
strbuf_addch(sb, ' ');
-   strbuf_addstr(sb, find_unique_abbrev(
-   p->item->object.oid.hash,
-   c->pretty_ctx->abbrev));
+   strbuf_add_unique_abbrev(sb, p->item->object.oid.hash,
+c->pretty_ctx->abbrev);
}
c->abbrev_parent_hashes.len = sb->len -
  c->abbrev_parent_hashes.off;
diff --git a/transport.c b/transport.c
index 4ba48b0..557f399 100644
--- a/transport.c
+++ b/transport.c
@@ -321,11 +321,6 @@ static void print_ref_status(char flag, const char 
*summary, struct ref *to, str
}
 }
 
-static const char *status_abbrev(unsigned char sha1[20])
-{
-   return find_unique_abbrev(sha1, DEFAULT_ABBREV);
-}
-
 static void print_ok_ref_status(struct ref *ref, int porcelain)
 {
if (ref->deletion)
@@ -340,7 +335,8 @@ static void print_ok_ref_status(struct ref *ref, int 
porcelain)
char type;
const char *msg;
 
-   strbuf_addstr(&quickref, status_abbrev(ref->old_oid.hash));
+   strbuf_add_unique_abbrev(&quickref, ref->old_oid.hash,
+DEFAULT_ABBREV);
if (ref->forced_update) {
strbuf_addstr(&quickref, "...");
type = '+';
@@ -350,7 +346,8 @@ static void print_ok_ref_status(struct ref *ref, int 
porcelain)
type = ' ';
msg = NULL;
}
-   strbuf_addstr(&quickref, status_abbrev(ref->new_oid.hash));
+   strbuf_add_unique_abbrev(&quickref, ref->new_oid.hash,
+DEFAULT_ABBREV);
 
print_ref_status(type, quickref.buf, ref, ref->peer_ref, msg, 
porcelain);
strbuf_release(&quickref);
-- 
2.9.2

--
To unsubscribe from this list: send the line "unsubscribe git" in
the body of a message to majord...@vger.kernel.org
More majordomo info at  http://vger.kernel.org/majordomo-info.html

Re: Rename detection within in files WAS: [PATCH 2/6] t7408: merge short tests, factor out testing method

2016-08-07 Thread René Scharfe


Am 06.08.2016 um 01:26 schrieb Stefan Beller:

When moving code around, we usually get large chunks of text. If the contributor
is not 100% trustworthy, we need to review all the code without much intelectual
joy. Essentially the reviewer is just making sure the parts of the text are the
same.

I'd like to propose a new addition to the diff format that makes this use case
easier. The idea is to mark up lines that were just moved around in the file
instead of adding and removing them.

Currently we have 3 characters that
are allowed to start a line within a hunk:
' ' to indicate context
'+' to add a line
'-' to remove a line

I'd propose to add the following characters:
'*' which is the same as '+', but it indicates that the line was moved
 from somewhere else without change.
'X' The same as '-', with the addition that this line was moved to a different
 place without change.

The patch below uses these new '*' and 'X'. Each hunk that makes use of these
additions, is followed other sections, [moved-from, moved-to] that indicate
where the corresponding line is.


Interesting idea. It should be easy to convert the result into a regular 
unified diff for consumption with patch(1) or git am/apply by replacing 
the new flags with + and - and removing the moved-* hunks.


Your example ignores whitespace changes at the start of the line and 
within it, the added "-C $working_dir", s/expected/expect/; is this all 
intended?  Only a single blank line was moved verbatim.


The moved-from and moved-to hunks make this diff quite verbose.

If multiple lines from different sources are moved to the same hunk then 
you'd get multiple moved-from hunks following that single destination, 
right?  (Same with lines moved from a single hunk to multiple 
destinations and moved-to.)


But does it even warrent a new format? It's a display problem; the 
necessary information is already in the diffs we have today.  A 
graphical diff viewer could connect moved blocks with lines, like 
http://www.araxis.com/merge/ does in its side-by-side view.  A 
Thunderbird extension (or a bookmarklet or browser extendiion for 
webmail users) could do that for an email-based workflow.


Still, what about adding information about moved lines as an extended 
header (like that index line)?  Line numbers are included in hunk 
headers and can serve as orientation.  A reader would have to do some 
mental arithmetic (ugh), but incompatible format changes would be 
avoided.  For your example it should look something like this:


move from t/t7408-submodule-reference.sh:52,1
move to t/t7408-submodule-reference.sh:22,1



There are multiple things to tackle when going for such an addition:
* How to present this to the user (it's covered in this email)
* how to find the renamed lines algorithmically.
   (there are already approaches to that, e.g. 
https://github.com/stefanbeller/duplo
   which is http://duplo.sourceforge.net/ with no substantial additions)

Any comments welcome,

Thanks,
Stefan

---
  t/t7408-submodule-reference.sh | 50 +++---
  1 file changed, 15 insertions(+), 29 deletions(-), 6 moved lines

diff --git a/t/t7408-submodule-reference.sh b/t/t7408-submodule-reference.sh
index afcc629..1416cbd 100755
--- a/t/t7408-submodule-reference.sh
+++ b/t/t7408-submodule-reference.sh
@@ -10,6 +10,16 @@ base_dir=$(pwd)

  U=$base_dir/UPLOAD_LOG

+test_alternate_usage()
+{
+   alternates_file=$1
+   working_dir=$2
+   test_line_count = 1 $alternates_file &&
*   echo "0 objects, 0 kilobytes" >expect &&
*   git -C $working_dir count-objects >current &&
*   diff expect current
+}
+


Post-image line 22.


  test_expect_success 'preparing first repository' '
test_create_repo A &&
(
@@ move-source 42,6 @@ test_expect_success 'that reference gets used with add' '
  test_expect_success 'that reference gets used with add' '
(
cd super/sub &&
X   echo "0 objects, 0 kilobytes" > expected &&
X   git count-objects > current &&
X   diff expected current
)
  '
@@ -42,44 +52,20 @@ test_expect_success 'preparing superproject' '
)
  '

-test_expect_success 'submodule add --reference' '
+test_expect_success 'submodule add --reference uses alternates' '
(
cd super &&
git submodule add --reference ../B "file://$base_dir/A" sub &&
git commit -m B-super-added
-   )
-'
-


Pre-image line 52.


-test_expect_success 'after add: existence of info/alternates' '
-   test_line_count = 1 super/.git/modules/sub/objects/info/alternates
-'
-
-test_expect_success 'that reference gets used with add' '
-   (
-   cd super/sub &&
X   echo "0 objects, 0 kilobytes" > expected &&
X   git count-objects > current &&
X   diff expected current
-   )
-'
-
-test_expect_success 'cloning superproject' '
-   git

Re: [PATCH 1/3] compat: add qsort_s()

2016-12-21 Thread René Scharfe


Am 12.12.2016 um 20:57 schrieb Jeff King:

On Mon, Dec 12, 2016 at 08:51:14PM +0100, René Scharfe wrote:


It's kinda cool to have a bespoke compatibility layer for major platforms,
but the more I think about it the less I can answer why we would want that.
Safety, reliability and performance can't be good reasons -- if our fallback
function lacks in these regards then we have to improve it in any case.


There may be cases that we don't want to support because of portability
issues. E.g., if your libc has an assembly-optimized qsort() we wouldn't
want to replicate that.


Offloading to GPUs might be a better example; I don't know of a libc 
that does any of that, though (yet).



I dunno. I am not that opposed to just saying "forget libc qsort, we
always use our internal one which is consistent, performant, and safe".
But when I suggested something similar for our regex library, I seem to
recall there were complaints.


Well, I'm not sure how comparable they are, but perhaps we can avoid 
compat code altogether in this case.  Patch coming in a new thread.


René

[PATCH] string-list: make compare function compatible with qsort(3)

2016-12-21 Thread René Scharfe

The cmp member of struct string_list points to a comparison function
that is used for sorting and searching of list items.  It takes two
string pointers -- like strcmp(3), which is in fact the default;
cmp_items() provides a qsort(3) compatible interface by passing the
string members of two struct string_list_item pointers to cmp.

One shortcoming is that the comparison function is restricted to working
with the string members of items; util is inaccessible to it.  Another
one is that the value of cmp is passed in a global variable to
cmp_items(), making string_list_sort() non-reentrant.

Remove the intermediate layer, i.e. cmp_items(), make the comparison
functions compatible with qsort(3) and pass them pointers to full items.
This allows comparisons to also take the util member into account, and
avoids the need to pass the real comparison function to an intermediate
function, removing the need for a global function.

A downside is that comparison functions take void pointers now and each
of them needs to cast its arguments to struct string_list_item pointers,
weakening type safety and adding some repetitive code.  Programmers are
used to that, however, as that's par for the course with qsort(3).

Also two unsightly casts are added that remove the const qualifiers of
strings while building the structs to pass to the comparison function as
search keys.  It should be safe, though, as we only ever use them for
reading.

Signed-off-by: Rene Scharfe 
---
Alternative approach to the qsort_s()-based series "string-list: make
string_list_sort() reentrant".

 Documentation/technical/api-string-list.txt |  6 +++--
 builtin/mailsplit.c |  5 +++-
 mailmap.c   |  5 ++--
 merge-recursive.c   |  4 ++-
 string-list.c   | 39 +++--
 string-list.h   |  4 +--
 tmp-objdir.c|  4 ++-
 7 files changed, 39 insertions(+), 28 deletions(-)

diff --git a/Documentation/technical/api-string-list.txt 
b/Documentation/technical/api-string-list.txt
index c08402b12..39eac59c7 100644
--- a/Documentation/technical/api-string-list.txt
+++ b/Documentation/technical/api-string-list.txt
@@ -205,5 +205,7 @@ Represents the list itself.
   You should not tamper with it.
 . Setting the `strdup_strings` member to 1 will strdup() the strings
   before adding them, see above.
-. The `compare_strings_fn` member is used to specify a custom compare
-  function, otherwise `strcmp()` is used as the default function.
+. The `cmp` member is used to specify a custom compare function. It has
+  the same signature as the one for qsort(1) and is passed two pointers
+  to `struct string_list_item`. If it's NULL then the `string` members
+  are compared with `strcmp(1)`; this is the default.
diff --git a/builtin/mailsplit.c b/builtin/mailsplit.c
index 30681681c..4e72e3128 100644
--- a/builtin/mailsplit.c
+++ b/builtin/mailsplit.c
@@ -147,8 +147,11 @@ static int populate_maildir_list(struct string_list *list, 
const char *path)
return ret;
 }
 
-static int maildir_filename_cmp(const char *a, const char *b)
+static int maildir_filename_cmp(const void *one, const void *two)
 {
+   const struct string_list_item *item_one = one, *item_two = two;
+   const char *a = item_one->string, *b = item_two->string;
+
while (*a && *b) {
if (isdigit(*a) && isdigit(*b)) {
long int na, nb;
diff --git a/mailmap.c b/mailmap.c
index c1a79c100..5290b5153 100644
--- a/mailmap.c
+++ b/mailmap.c
@@ -61,9 +61,10 @@ static void free_mailmap_entry(void *p, const char *s)
  * namemap.cmp until we know no systems that matter have such an
  * "unusual" string.h.
  */
-static int namemap_cmp(const char *a, const char *b)
+static int namemap_cmp(const void *one, const void *two)
 {
-   return strcasecmp(a, b);
+   const struct string_list_item *item_one = one, *item_two = two;
+   return strcasecmp(item_one->string, item_two->string);
 }
 
 static void add_mapping(struct string_list *map,
diff --git a/merge-recursive.c b/merge-recursive.c
index d32720944..4683ba43f 100644
--- a/merge-recursive.c
+++ b/merge-recursive.c
@@ -390,8 +390,10 @@ static struct string_list *get_unmerged(void)
return unmerged;
 }
 
-static int string_list_df_name_compare(const char *one, const char *two)
+static int string_list_df_name_compare(const void *a, const void *b)
 {
+   const struct string_list_item *item_a = a, *item_b = b;
+   const char *one = item_a->string, *two = item_b->string;
int onelen = strlen(one);
int twolen = strlen(two);
/*
diff --git a/string-list.c b/string-list.c
index 8c83cac18..c583a04ee 100644
--- a/string-list.c
+++ b/string-list.c
@@ -7,17 +7,26 @@ void string_list_init(struct string_list *list, int 
strdup_strings)
list->strdup_strings = strdup_strings;
 }
 
+static int string_lis

Re: [PATCH] unpack-trees: move checkout state into check_updates

2016-12-29 Thread René Scharfe


Am 29.12.2016 um 00:26 schrieb Stefan Beller:

The checkout state was introduced via 16da134b1f9
(read-trees: refactor the unpack_trees() part, 2006-07-30). An attempt to
refactor the checkout state was done in b56aa5b268e (unpack-trees: pass
checkout state explicitly to check_updates(), 2016-09-13), but we can
go even further.

The `struct checkout state` is not used in unpack_trees apart from
initializing it, so move it into the function that makes use of it,
which is `check_updates`.


Thanks for catching this, the result looks nicer.


Signed-off-by: Stefan Beller 
---
 unpack-trees.c | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index ea799d37c5..78703af135 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -224,14 +224,18 @@ static void unlink_entry(const struct cache_entry *ce)
schedule_dir_for_removal(ce->name, ce_namelen(ce));
 }

-static int check_updates(struct unpack_trees_options *o,
-const struct checkout *state)
+static int check_updates(struct unpack_trees_options *o)
 {
unsigned cnt = 0, total = 0;
struct progress *progress = NULL;
struct index_state *index = &o->result;
int i;
int errs = 0;
+   struct checkout state = CHECKOUT_INIT;
+   state.force = 1;
+   state.quiet = 1;
+   state.refresh_cache = 1;
+   state.istate = &o->result;


And here (as well as in two more places in this function) we could use 
"index" instead of "&o->result".  Not sure if it's worth a patch, though.




if (o->update && o->verbose_update) {
for (total = cnt = 0; cnt < index->cache_nr; cnt++) {
@@ -270,7 +274,7 @@ static int check_updates(struct unpack_trees_options *o,
display_progress(progress, ++cnt);
ce->ce_flags &= ~CE_UPDATE;
if (o->update && !o->dry_run) {
-   errs |= checkout_entry(ce, state, NULL);
+   errs |= checkout_entry(ce, &state, NULL);
}
}
}
@@ -1100,14 +1104,9 @@ int unpack_trees(unsigned len, struct tree_desc *t, 
struct unpack_trees_options
int i, ret;
static struct cache_entry *dfc;
struct exclude_list el;
-   struct checkout state = CHECKOUT_INIT;

if (len > MAX_UNPACK_TREES)
die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES);
-   state.force = 1;
-   state.quiet = 1;
-   state.refresh_cache = 1;
-   state.istate = &o->result;

memset(&el, 0, sizeof(el));
if (!core_apply_sparse_checkout || !o->update)
@@ -1244,7 +1243,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, 
struct unpack_trees_options
}

o->src_index = NULL;
-   ret = check_updates(o, &state) ? (-2) : 0;
+   ret = check_updates(o) ? (-2) : 0;
if (o->dst_index) {
if (!ret) {
if (!o->result.cache_tree)

Re: [PATCH] string-list: make compare function compatible with qsort(3)

2016-12-29 Thread René Scharfe


Am 21.12.2016 um 17:12 schrieb Jeff King:

On Wed, Dec 21, 2016 at 10:36:41AM +0100, René Scharfe wrote:


One shortcoming is that the comparison function is restricted to working
with the string members of items; util is inaccessible to it.  Another
one is that the value of cmp is passed in a global variable to
cmp_items(), making string_list_sort() non-reentrant.


I think this approach is OK for string_list, but it doesn't help the
general case that wants qsort_s() to actually access global data. I
don't know how common that is in our codebase, though.

So I'm fine with it, but I think we might eventually need to revisit the
qsort_s() thing anyway.


I have to admit I didn't even consider the possibility that the pattern 
of accessing global variables in qsort(1) compare functions could have 
spread.


And indeed, at least ref-filter.c::compare_refs() and 
worktree.c::compare_worktree() so that as well.  The latter uses 
fspathcmp(), which is OK as ignore_case is only set once when reading 
the config, but the first one looks, well, interesting.  Perhaps a 
single ref filter per program is enough?


Anyway, that's a good enough argument for me for adding that newfangled 
C11 function after all..


Btw.: Found with

  git grep 'QSORT.*;$' |
  sed 's/.* /int /; s/);//' |
  sort -u |
  git grep -Ww -f-

and

  git grep -A1 'QSORT.*,$'


Remove the intermediate layer, i.e. cmp_items(), make the comparison
functions compatible with qsort(3) and pass them pointers to full items.
This allows comparisons to also take the util member into account, and
avoids the need to pass the real comparison function to an intermediate
function, removing the need for a global function.


I'm not sure if access to the util field is really of any value, after
looking at it in:

  
http://public-inbox.org/git/20161125171546.fa3zpapbjngjc...@sigill.intra.peff.net/

Though note that if we do take this patch, there are probably one or two
spots that could switch from QSORT() to string_list_sort().


Yes, but as you noted in that thread there is not much point in doing 
that; the only net win is that we can pass a list as a single pointer 
instead of as base pointer and element count -- the special compare 
function needs to be specified anyway (once), somehow.


René

Re: [PATCH] submodule.c: use GIT_DIR_ENVIRONMENT consistently

2016-12-29 Thread René Scharfe


Am 30.12.2016 um 01:47 schrieb Stefan Beller:

diff --git a/submodule.c b/submodule.c
index ece17315d6..973b9f3f96 100644
--- a/submodule.c
+++ b/submodule.c
@@ -1333,5 +1333,6 @@ void prepare_submodule_repo_env(struct argv_array *out)
if (strcmp(*var, CONFIG_DATA_ENVIRONMENT))
argv_array_push(out, *var);
}
-   argv_array_push(out, "GIT_DIR=.git");
+   argv_array_push(out, "%s=%s", GIT_DIR_ENVIRONMENT,
+   DEFAULT_GIT_DIR_ENVIRONMENT);


argv_array_pushf (with added "f") instead?

And indent continued lines to align them with the left parenthesis, like 
this:


fn(arg1,
   arg2);


 }

Re: [PATCH] archive-zip: load userdiff config

2017-01-03 Thread René Scharfe


Am 02.01.2017 um 23:25 schrieb Jeff King:

Since 4aff646d17 (archive-zip: mark text files in archives,
2015-03-05), the zip archiver will look at the userdiff
driver to decide whether a file is text or binary. This
usually doesn't need to look any further than the attributes
themselves (e.g., "-diff", etc). But if the user defines a
custom driver like "diff=foo", we need to look at
"diff.foo.binary" in the config. Prior to this patch, we
didn't actually load it.


Ah, didn't think of that, obviously.

Would it make sense for userdiff_find_by_path() to die if 
userdiff_config() hasn't been called, yet?



I also happened to notice that zipfiles are created using the local
timezone (because they have no notion of the timezone, so we have to
pick _something_). That's probably the least-terrible option, but it was
certainly surprising to me when I tried to bit-for-bit reproduce a
zipfile from GitHub on my local machine.


That reminds me of an old request to allow users better control over the 
meta-data written into archives.  Being able to specify a time zone 
offset could be a start.



 archive-zip.c  |  7 +++
 t/t5003-archive-zip.sh | 22 ++
 2 files changed, 25 insertions(+), 4 deletions(-)

diff --git a/archive-zip.c b/archive-zip.c
index 9db47357b0..b429a8d974 100644
--- a/archive-zip.c
+++ b/archive-zip.c
@@ -554,11 +554,18 @@ static void dos_time(time_t *time, int *dos_date, int 
*dos_time)
*dos_time = t->tm_sec / 2 + t->tm_min * 32 + t->tm_hour * 2048;
 }

+static int archive_zip_config(const char *var, const char *value, void *data)
+{
+   return userdiff_config(var, value);
+}
+
 static int write_zip_archive(const struct archiver *ar,
 struct archiver_args *args)
 {
int err;

+   git_config(archive_zip_config, NULL);
+


I briefly thought about moving this call to archive.c with the rest of 
the config-related stuff, but I agree it's better kept here.



dos_time(&args->time, &zip_date, &zip_time);

zip_dir = xmalloc(ZIP_DIRECTORY_MIN_SIZE);
diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh
index 14744b2a4b..55c7870997 100755
--- a/t/t5003-archive-zip.sh
+++ b/t/t5003-archive-zip.sh
@@ -64,6 +64,12 @@ check_zip() {
test_cmp_bin $original/nodiff.crlf $extracted/nodiff.crlf &&
test_cmp_bin $original/nodiff.lf   $extracted/nodiff.lf
"
+
+   test_expect_success UNZIP " validate that custom diff is unchanged " "
+   test_cmp_bin $original/custom.cr   $extracted/custom.cr &&
+   test_cmp_bin $original/custom.crlf $extracted/custom.crlf &&
+   test_cmp_bin $original/custom.lf   $extracted/custom.lf
+   "
 }

 test_expect_success \
@@ -78,6 +84,9 @@ test_expect_success \
  printf "text\r" >a/nodiff.cr &&
  printf "text\r\n"   >a/nodiff.crlf &&
  printf "text\n" >a/nodiff.lf &&
+ printf "text\r" >a/custom.cr &&
+ printf "text\r\n"   >a/custom.crlf &&
+ printf "text\n" >a/custom.lf &&
  printf "\0\r"   >a/binary.cr &&
  printf "\0\r\n" >a/binary.crlf &&
  printf "\0\n"   >a/binary.lf &&
@@ -112,15 +121,20 @@ test_expect_success 'add files to repository' '
 test_expect_success 'setup export-subst and diff attributes' '
echo "a/nodiff.* -diff" >>.git/info/attributes &&
echo "a/diff.* diff" >>.git/info/attributes &&
+   echo "a/custom.* diff=custom" >>.git/info/attributes &&
+   git config diff.custom.binary true &&
echo "substfile?" export-subst >>.git/info/attributes &&
git log --max-count=1 "--pretty=format:A${SUBSTFORMAT}O" HEAD \
>a/substfile1
 '

-test_expect_success \
-'create bare clone' \
-'git clone --bare . bare.git &&
- cp .git/info/attributes bare.git/info/attributes'
+test_expect_success 'create bare clone' '
+   git clone --bare . bare.git &&
+   cp .git/info/attributes bare.git/info/attributes &&
+   # Recreate our changes to .git/config rather than just copying it, as
+   # we do not want to clobber core.bare or other settings.
+   git -C bare.git config diff.custom.binary true
+'

 test_expect_success \
 'remove ignored file' \



Looks good, thanks!

René

Re: [PATCH 1/3] xdiff: -W: relax end-of-file function detection

2017-01-13 Thread René Scharfe


Am 13.01.2017 um 17:15 schrieb Vegard Nossum:

When adding a new function to the end of a file, it's enough to know
that 1) the addition is at the end of the file; and 2) there is a
function _somewhere_ in there.

If we had simply been changing the end of an existing function, then we
would also be deleting something from the old version.


That makes sense, thanks.


This fixes the case where we add e.g.

// Begin of dummy
static int dummy(void)
{
}

to the end of the file.


Without this patch the unchanged function before the added lines is 
shown in its entirety as (uncalled for) context.




Cc: René Scharfe 
Signed-off-by: Vegard Nossum 
---
 xdiff/xemit.c | 14 ++
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index 7389ce4..8c88dbd 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -183,16 +183,14 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, 
xdemitcb_t *ecb,

/*
 * We don't need additional context if
-* a whole function was added, possibly
-* starting with empty lines.
+* a whole function was added.
 */
-   while (i2 < xe->xdf2.nrec &&
-  is_empty_rec(&xe->xdf2, i2))
+   while (i2 < xe->xdf2.nrec) {
+   if (match_func_rec(&xe->xdf2, xecfg, i2,
+   dummy, sizeof(dummy)) >= 0)


Nit: I don't like the indentation here.  Giving "dummy" its own line is 
also not exactly pretty, but at least would allow the parameters to be 
aligned on the opening parenthesis.



+   goto post_context_calculation;
i2++;
-   if (i2 < xe->xdf2.nrec &&
-   match_func_rec(&xe->xdf2, xecfg, i2,
-  dummy, sizeof(dummy)) >= 0)
-   goto post_context_calculation;
+   }

/*
 * Otherwise get more context from the

Re: [PATCH 2/3] xdiff: -W: include immediately preceding non-empty lines in context

2017-01-13 Thread René Scharfe


Am 13.01.2017 um 17:15 schrieb Vegard Nossum:

When using -W to include the whole function in the diff context, you
are typically doing this to be able to review the change in its entirety
within the context of the function. It is therefore almost always
desirable to include any comments that immediately precede the function.

This also the fixes the case for C where the declaration is split across
multiple lines (where the first line of the declaration would not be
included in the output), e.g.:

void
dummy(void)
{
...
}



That's true, but I'm not sure "non-empty line before function line" is 
good enough a definition for desirable lines.  It wouldn't work for 
people who don't believe in empty lines.  Or for those that put a blank 
line between comment and function.  (I have an opinion on such habits, 
but git diff should probably stay neutral.)  And that's just for C code; 
I have no idea how this heuristic would hold up for other file types 
like HTML.


We can identify function lines with arbitrary precision (with a 
xfuncname regex, if needed), but there is no accurate way to classify 
lines as comments, or as the end of functions.  Adding optional regexes 
for single- and multi-line comments would help, at least for C.


René

Re: [PATCH 2/3] xdiff: -W: include immediately preceding non-empty lines in context

2017-01-14 Thread René Scharfe


Am 14.01.2017 um 00:56 schrieb Junio C Hamano:

Vegard Nossum  writes:


The patch will work as intended and as expected for 95% of the users out
there (javadoc, Doxygen, kerneldoc, etc. all have the comment
immediately preceding the function) and fixes a very real problem for me
(and I expect many others) _today_; for the remaining 5% (who put a
blank line between their comment and the start of the function) it will
revert back to the current behaviour, so there should be no regression
for them.


I notice your 95% are all programming languages, but I am more
worried about the contents written in non programming languages
(René gave HTML an an example--there may be other types of contents
that we programmer types do not deal with every day, but Git users
depend on).

I am also more focused on keeping the codebase maintainable in good
health by making sure that we made an effort to find a solution that
is general-enough before solving a single specific problem you have
today.  We may end up deciding that a blank-line heuristics gives us
good enough tradeoff, but I do not want us to make a decision before
thinking.


How about extending the context upward only up to and excluding a line 
that is either empty *or* a function line?  That would limit the extra 
context to a single function in the worst case.


Reducing context at the bottom with the aim to remove comments for the 
next section is more tricky as it could remove part of the function that 
we'd like to show if we get the boundary wrong.  How bad would it be to 
keep the southern border unchanged?


René

Re: [PATCHv3 2/2] builtin/commit.c: switch to xstrfmt(), instead of snprintf,

2017-01-14 Thread René Scharfe

Am 13.01.2017 um 18:58 schrieb Elia Pinto:
> In this patch, instead of using xnprintf instead of snprintf, which asserts
> that we don't truncate, we are switching to dynamic allocation with  
> xstrfmt(),
> , so we can avoid dealing with magic numbers in the code and reduce the 
> cognitive burden from
> the programmers, because they no longer have to count bytes needed for static 
> allocation.
> As a side effect of this patch we have also reduced the snprintf() calls, 
> that may silently truncate 
> results if the programmer is not careful.
> 
> Helped-by: Junio C Hamano 
> Helped-by: Jeff King  
> Signed-off-by: Elia Pinto 
> ---
> This is the third  version of the patch.
> 
> Changes from the first version: I have split the original commit in two, as 
> discussed here
> http://public-inbox.org/git/20161213132717.42965-1-gitter.spi...@gmail.com/.
> 
> Changes from the second version:
> - Changed the commit message to clarify the purpose of the patch (
> as suggested by Junio)
> https://public-inbox.org/git/xmqqtw95mfo3@gitster.mtv.corp.google.com/T/#m2e6405a8a78a8ca1ed770614c91398290574c4a1
> 
> 
> 
>  builtin/commit.c | 10 --
>  1 file changed, 4 insertions(+), 6 deletions(-)
> 
> diff --git a/builtin/commit.c b/builtin/commit.c
> index 09bcc0f13..37228330c 100644
> --- a/builtin/commit.c
> +++ b/builtin/commit.c
> @@ -1526,12 +1526,10 @@ static int git_commit_config(const char *k, const 
> char *v, void *cb)
>  static int run_rewrite_hook(const unsigned char *oldsha1,
>   const unsigned char *newsha1)
>  {
> - /* oldsha1 SP newsha1 LF NUL */
> - static char buf[2*40 + 3];
> + char *buf;
>   struct child_process proc = CHILD_PROCESS_INIT;
>   const char *argv[3];
>   int code;
> - size_t n;
>  
>   argv[0] = find_hook("post-rewrite");
>   if (!argv[0])
> @@ -1547,11 +1545,11 @@ static int run_rewrite_hook(const unsigned char 
> *oldsha1,
>   code = start_command(&proc);
>   if (code)
>   return code;
> - n = snprintf(buf, sizeof(buf), "%s %s\n",
> -  sha1_to_hex(oldsha1), sha1_to_hex(newsha1));
> + buf = xstrfmt("%s %s\n", sha1_to_hex(oldsha1), sha1_to_hex(newsha1));
>   sigchain_push(SIGPIPE, SIG_IGN);
> - write_in_full(proc.in, buf, n);
> + write_in_full(proc.in, buf, strlen(buf));
>   close(proc.in);
> + free(buf);
>   sigchain_pop(SIGPIPE);
>   return finish_command(&proc);
>  }
> 

Perhaps I missed it from the discussion, but why not use strbuf?  It
would avoid counting the generated string's length.  That's probably
not going to make a measurable difference performance-wise, but it's
easy to avoid and doesn't even take up more lines:
---
 builtin/commit.c | 10 --
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index 711f96cc43..73bb72016f 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -1525,12 +1525,10 @@ static int git_commit_config(const char *k, const char 
*v, void *cb)
 static int run_rewrite_hook(const unsigned char *oldsha1,
const unsigned char *newsha1)
 {
-   /* oldsha1 SP newsha1 LF NUL */
-   static char buf[2*40 + 3];
+   struct strbuf sb = STRBUF_INIT;
struct child_process proc = CHILD_PROCESS_INIT;
const char *argv[3];
int code;
-   size_t n;
 
argv[0] = find_hook("post-rewrite");
if (!argv[0])
@@ -1546,11 +1544,11 @@ static int run_rewrite_hook(const unsigned char 
*oldsha1,
code = start_command(&proc);
if (code)
return code;
-   n = snprintf(buf, sizeof(buf), "%s %s\n",
-sha1_to_hex(oldsha1), sha1_to_hex(newsha1));
+   strbuf_addf(&sb, "%s %s\n", sha1_to_hex(oldsha1), sha1_to_hex(newsha1));
sigchain_push(SIGPIPE, SIG_IGN);
-   write_in_full(proc.in, buf, n);
+   write_in_full(proc.in, sb.buf, sb.len);
close(proc.in);
+   strbuf_release(&sb);
sigchain_pop(SIGPIPE);
return finish_command(&proc);
 }
-- 
2.11.0

Re: [PATCH 2/3] xdiff: -W: include immediately preceding non-empty lines in context

2017-01-15 Thread René Scharfe

Am 15.01.2017 um 03:39 schrieb Junio C Hamano:
> René Scharfe  writes:
> 
>>> I am also more focused on keeping the codebase maintainable in good
>>> health by making sure that we made an effort to find a solution that
>>> is general-enough before solving a single specific problem you have
>>> today.  We may end up deciding that a blank-line heuristics gives us
>>> good enough tradeoff, but I do not want us to make a decision before
>>> thinking.
>>
>> How about extending the context upward only up to and excluding a line
>> that is either empty *or* a function line?  That would limit the extra
>> context to a single function in the worst case.
>>
>> Reducing context at the bottom with the aim to remove comments for the
>> next section is more tricky as it could remove part of the function
>> that we'd like to show if we get the boundary wrong.  How bad would it
>> be to keep the southern border unchanged?
> 
> I personally do not think there is any robust heuristic other than
> Vegard's "a blank line may be a signal enough that lines before that
> are not part of the beginning of the function", and I think your
> "hence we look for a blank line but if there is a line that matches
> the function header, stop there as we know we came too far back"
> will be a good-enough safety measure.
> 
> I also agree with you that we probably do not want to futz with the
> southern border.

A replacement patch for 2/3 with these changes would look like this:

diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index 8c88dbde38..9ed54cd318 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -174,11 +174,11 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, 
xdemitcb_t *ecb,
s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0);
 
if (xecfg->flags & XDL_EMIT_FUNCCONTEXT) {
+   char dummy[1];
long fs1, i1 = xch->i1;
 
/* Appended chunk? */
if (i1 >= xe->xdf1.nrec) {
-   char dummy[1];
long i2 = xch->i2;
 
/*
@@ -200,6 +200,10 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, 
xdemitcb_t *ecb,
}
 
fs1 = get_func_line(xe, xecfg, NULL, i1, -1);
+   while (fs1 > 0 && !is_empty_rec(&xe->xdf1, fs1 - 1) &&
+  match_func_rec(&xe->xdf1, xecfg, fs1 - 1,
+ dummy, sizeof(dummy)) < 0)
+   fs1--;
if (fs1 < 0)
fs1 = 0;
if (fs1 < s1) {

Re: [PATCH 2/3] xdiff: -W: include immediately preceding non-empty lines in context

2017-01-15 Thread René Scharfe


Am 15.01.2017 um 11:06 schrieb Vegard Nossum:

On 15/01/2017 03:39, Junio C Hamano wrote:

René Scharfe  writes:

How about extending the context upward only up to and excluding a line
that is either empty *or* a function line?  That would limit the extra
context to a single function in the worst case.

Reducing context at the bottom with the aim to remove comments for the
next section is more tricky as it could remove part of the function
that we'd like to show if we get the boundary wrong.  How bad would it
be to keep the southern border unchanged?


I personally do not think there is any robust heuristic other than
Vegard's "a blank line may be a signal enough that lines before that
are not part of the beginning of the function", and I think your
"hence we look for a blank line but if there is a line that matches
the function header, stop there as we know we came too far back"
will be a good-enough safety measure.

I also agree with you that we probably do not want to futz with the
southern border.


You are right, trying to change the southern border in this way is not
quite reliable if there are no empty lines whatsoever and can
erroneously cause the function context to not include the bottom of the
function being changed.

I'm splitting the function boundary detection logic into separate
functions and trying to solve the above case without breaking the tests
(and adding a new test for the above case too).

I'll see if I can additionally provide some toggles (flags or config
variables) to control the new behaviour, what I had in mind was:

  -W[=preamble,=no-preamble]
  --function-context[=preamble,=no-preamble]
  diff.functionContextPreamble = 

(where the new logic is controlled by the new config variable and
overridden by the presence of =preamble or =no-preamble).


Adding comments before a function is useful, removing comments after a 
function sounds to me as only nice to have (under the assumption that 
they belong to the next function[*]).  How bad would it be to only 
implement the first part (as in the patch I just sent) without adding 
new config settings or parameters?


Thanks,
René


[*] Silly counter-example (the #endif line):
#ifdef SOMETHING
int f(...) {
// implementation for SOMETHING
}
#else
inf f(...) {
// implementation without SOMETHING
}
#endif /* SOMETHING */

[PATCH v2 0/5] string-list: make string_list_sort() reentrant

2017-01-22 Thread René Scharfe

Use qsort_s() from C11 Annex K to make string_list_sort() safer, in
particular when called from parallel threads.

Changes from v1:
* Renamed HAVE_QSORT_S to HAVE_ISO_QSORT_S in Makefile to disambiguate.
* Added basic perf test (patch 3).
* Converted a second caller to QSORT_S, in ref-filter.c (patch 5).

  compat: add qsort_s()
  add QSORT_S
  perf: add basic sort performance test
  string-list: use QSORT_S in string_list_sort()
  ref-filter: use QSORT_S in ref_array_sort()

 Makefile|  8 ++
 compat/qsort_s.c| 69 +
 git-compat-util.h   | 11 
 ref-filter.c|  6 ++--
 string-list.c   | 13 -
 t/helper/test-string-list.c | 25 
 t/perf/p0071-sort.sh| 26 +
 7 files changed, 146 insertions(+), 12 deletions(-)
 create mode 100644 compat/qsort_s.c
 create mode 100755 t/perf/p0071-sort.sh

-- 
2.11.0

[PATCH v2 1/5] compat: add qsort_s()

2017-01-22 Thread René Scharfe

The function qsort_s() was introduced with C11 Annex K; it provides the
ability to pass a context pointer to the comparison function, supports
the convention of using a NULL pointer for an empty array and performs a
few safety checks.

Add an implementation based on compat/qsort.c for platforms that lack a
native standards-compliant qsort_s() (i.e. basically everyone).  It
doesn't perform the full range of possible checks: It uses size_t
instead of rsize_t and doesn't check nmemb and size against RSIZE_MAX
because we probably don't have the restricted size type defined.  For
the same reason it returns int instead of errno_t.

Signed-off-by: Rene Scharfe 
---
 Makefile  |  8 +++
 compat/qsort_s.c  | 69 +++
 git-compat-util.h |  6 +
 3 files changed, 83 insertions(+)
 create mode 100644 compat/qsort_s.c

diff --git a/Makefile b/Makefile
index 27afd0f378..53ecc84e28 100644
--- a/Makefile
+++ b/Makefile
@@ -279,6 +279,9 @@ all::
 # is a simplified version of the merge sort used in glibc. This is
 # recommended if Git triggers O(n^2) behavior in your platform's qsort().
 #
+# Define HAVE_ISO_QSORT_S if your platform provides a qsort_s() that's
+# compatible with the one described in C11 Annex K.
+#
 # Define UNRELIABLE_FSTAT if your system's fstat does not return the same
 # information on a not yet closed file that lstat would return for the same
 # file after it was closed.
@@ -1418,6 +1421,11 @@ ifdef INTERNAL_QSORT
COMPAT_CFLAGS += -DINTERNAL_QSORT
COMPAT_OBJS += compat/qsort.o
 endif
+ifdef HAVE_ISO_QSORT_S
+   COMPAT_CFLAGS += -DHAVE_ISO_QSORT_S
+else
+   COMPAT_OBJS += compat/qsort_s.o
+endif
 ifdef RUNTIME_PREFIX
COMPAT_CFLAGS += -DRUNTIME_PREFIX
 endif
diff --git a/compat/qsort_s.c b/compat/qsort_s.c
new file mode 100644
index 00..52d1f0a73d
--- /dev/null
+++ b/compat/qsort_s.c
@@ -0,0 +1,69 @@
+#include "../git-compat-util.h"
+
+/*
+ * A merge sort implementation, simplified from the qsort implementation
+ * by Mike Haertel, which is a part of the GNU C Library.
+ * Added context pointer, safety checks and return value.
+ */
+
+static void msort_with_tmp(void *b, size_t n, size_t s,
+  int (*cmp)(const void *, const void *, void *),
+  char *t, void *ctx)
+{
+   char *tmp;
+   char *b1, *b2;
+   size_t n1, n2;
+
+   if (n <= 1)
+   return;
+
+   n1 = n / 2;
+   n2 = n - n1;
+   b1 = b;
+   b2 = (char *)b + (n1 * s);
+
+   msort_with_tmp(b1, n1, s, cmp, t, ctx);
+   msort_with_tmp(b2, n2, s, cmp, t, ctx);
+
+   tmp = t;
+
+   while (n1 > 0 && n2 > 0) {
+   if (cmp(b1, b2, ctx) <= 0) {
+   memcpy(tmp, b1, s);
+   tmp += s;
+   b1 += s;
+   --n1;
+   } else {
+   memcpy(tmp, b2, s);
+   tmp += s;
+   b2 += s;
+   --n2;
+   }
+   }
+   if (n1 > 0)
+   memcpy(tmp, b1, n1 * s);
+   memcpy(b, t, (n - n2) * s);
+}
+
+int git_qsort_s(void *b, size_t n, size_t s,
+   int (*cmp)(const void *, const void *, void *), void *ctx)
+{
+   const size_t size = st_mult(n, s);
+   char buf[1024];
+
+   if (!n)
+   return 0;
+   if (!b || !cmp)
+   return -1;
+
+   if (size < sizeof(buf)) {
+   /* The temporary array fits on the small on-stack buffer. */
+   msort_with_tmp(b, n, s, cmp, buf, ctx);
+   } else {
+   /* It's somewhat large, so malloc it.  */
+   char *tmp = xmalloc(size);
+   msort_with_tmp(b, n, s, cmp, tmp, ctx);
+   free(tmp);
+   }
+   return 0;
+}
diff --git a/git-compat-util.h b/git-compat-util.h
index 87237b092b..f706744e6a 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -988,6 +988,12 @@ static inline void sane_qsort(void *base, size_t nmemb, 
size_t size,
qsort(base, nmemb, size, compar);
 }
 
+#ifndef HAVE_ISO_QSORT_S
+int git_qsort_s(void *base, size_t nmemb, size_t size,
+   int (*compar)(const void *, const void *, void *), void *ctx);
+#define qsort_s git_qsort_s
+#endif
+
 #ifndef REG_STARTEND
 #error "Git requires REG_STARTEND support. Compile with NO_REGEX=NeedsStartEnd"
 #endif
-- 
2.11.0

[PATCH v2 3/5] perf: add basic sort performance test

2017-01-22 Thread René Scharfe

Add a sort command to test-string-list that reads lines from stdin,
stores them in a string_list and then sorts it.  Use it in a simple
perf test script to measure the performance of string_list_sort().

Signed-off-by: Rene Scharfe 
---
 t/helper/test-string-list.c | 25 +
 t/perf/p0071-sort.sh| 26 ++
 2 files changed, 51 insertions(+)
 create mode 100755 t/perf/p0071-sort.sh

diff --git a/t/helper/test-string-list.c b/t/helper/test-string-list.c
index 4a68967bd1..c502fa16d3 100644
--- a/t/helper/test-string-list.c
+++ b/t/helper/test-string-list.c
@@ -97,6 +97,31 @@ int cmd_main(int argc, const char **argv)
return 0;
}
 
+   if (argc == 2 && !strcmp(argv[1], "sort")) {
+   struct string_list list = STRING_LIST_INIT_NODUP;
+   struct strbuf sb = STRBUF_INIT;
+   struct string_list_item *item;
+
+   strbuf_read(&sb, 0, 0);
+
+   /*
+* Split by newline, but don't create a string_list item
+* for the empty string after the last separator.
+*/
+   if (sb.buf[sb.len - 1] == '\n')
+   strbuf_setlen(&sb, sb.len - 1);
+   string_list_split_in_place(&list, sb.buf, '\n', -1);
+
+   string_list_sort(&list);
+
+   for_each_string_list_item(item, &list)
+   puts(item->string);
+
+   string_list_clear(&list, 0);
+   strbuf_release(&sb);
+   return 0;
+   }
+
fprintf(stderr, "%s: unknown function name: %s\n", argv[0],
argv[1] ? argv[1] : "(there was none)");
return 1;
diff --git a/t/perf/p0071-sort.sh b/t/perf/p0071-sort.sh
new file mode 100755
index 00..7c9a35a646
--- /dev/null
+++ b/t/perf/p0071-sort.sh
@@ -0,0 +1,26 @@
+#!/bin/sh
+
+test_description='Basic sort performance tests'
+. ./perf-lib.sh
+
+test_perf_default_repo
+
+test_expect_success 'setup' '
+   git ls-files --stage "*.[ch]" "*.sh" |
+   cut -f2 -d" " |
+   git cat-file --batch >unsorted
+'
+
+test_perf 'sort(1)' '
+   sort expect
+'
+
+test_perf 'string_list_sort()' '
+   test-string-list sort actual
+'
+
+test_expect_success 'string_list_sort() sorts like sort(1)' '
+   test_cmp_bin expect actual
+'
+
+test_done
-- 
2.11.0

[PATCH v2 4/5] string-list: use QSORT_S in string_list_sort()

2017-01-22 Thread René Scharfe

Pass the comparison function to cmp_items() via the context parameter of
qsort_s() instead of using a global variable.  That allows calling
string_list_sort() from multiple parallel threads.

Our qsort_s() in compat/ is slightly slower than qsort(1) from glibc
2.24 for sorting lots of lines:

Test HEAD^ HEAD
-
0071.2: sort(1)  0.10(0.22+0.01)   0.09(0.21+0.00) -10.0%
0071.3: string_list_sort()   0.16(0.15+0.01)   0.17(0.15+0.00) +6.3%

GNU sort(1) version 8.26 is significantly faster because it uses
multiple parallel threads; with the unportable option --parallel=1 it
becomes slower:

Test HEAD^ HEAD

0071.2: sort(1)  0.21(0.18+0.01)   0.20(0.18+0.01) -4.8%
0071.3: string_list_sort()   0.16(0.13+0.02)   0.17(0.15+0.01) +6.3%

There is some instability -- the numbers for the sort(1) check shouldn't
be affected by this patch.  Anyway, the performance of our qsort_s()
implementation is apparently good enough, at least for this test.

Signed-off-by: Rene Scharfe 
---
 string-list.c | 13 +
 1 file changed, 5 insertions(+), 8 deletions(-)

diff --git a/string-list.c b/string-list.c
index 8c83cac189..45016ad86d 100644
--- a/string-list.c
+++ b/string-list.c
@@ -211,21 +211,18 @@ struct string_list_item *string_list_append(struct 
string_list *list,
list->strdup_strings ? xstrdup(string) : (char 
*)string);
 }
 
-/* Yuck */
-static compare_strings_fn compare_for_qsort;
-
-/* Only call this from inside string_list_sort! */
-static int cmp_items(const void *a, const void *b)
+static int cmp_items(const void *a, const void *b, void *ctx)
 {
+   compare_strings_fn cmp = ctx;
const struct string_list_item *one = a;
const struct string_list_item *two = b;
-   return compare_for_qsort(one->string, two->string);
+   return cmp(one->string, two->string);
 }
 
 void string_list_sort(struct string_list *list)
 {
-   compare_for_qsort = list->cmp ? list->cmp : strcmp;
-   QSORT(list->items, list->nr, cmp_items);
+   QSORT_S(list->items, list->nr, cmp_items,
+   list->cmp ? list->cmp : strcmp);
 }
 
 struct string_list_item *unsorted_string_list_lookup(struct string_list *list,
-- 
2.11.0

[PATCH v2 5/5] ref-filter: use QSORT_S in ref_array_sort()

2017-01-22 Thread René Scharfe

Pass the array of sort keys to compare_refs() via the context parameter
of qsort_s() instead of using a global variable; that's cleaner and
simpler.  If ref_array_sort() is to be called from multiple parallel
threads then care still needs to be taken that the global variable
used_atom is not modified concurrently.

Signed-off-by: Rene Scharfe 
---
 ref-filter.c | 6 ++
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/ref-filter.c b/ref-filter.c
index 1a978405e6..3975022c88 100644
--- a/ref-filter.c
+++ b/ref-filter.c
@@ -1589,8 +1589,7 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct 
ref_array_item *a, stru
return (s->reverse) ? -cmp : cmp;
 }
 
-static struct ref_sorting *ref_sorting;
-static int compare_refs(const void *a_, const void *b_)
+static int compare_refs(const void *a_, const void *b_, void *ref_sorting)
 {
struct ref_array_item *a = *((struct ref_array_item **)a_);
struct ref_array_item *b = *((struct ref_array_item **)b_);
@@ -1606,8 +1605,7 @@ static int compare_refs(const void *a_, const void *b_)
 
 void ref_array_sort(struct ref_sorting *sorting, struct ref_array *array)
 {
-   ref_sorting = sorting;
-   QSORT(array->items, array->nr, compare_refs);
+   QSORT_S(array->items, array->nr, compare_refs, sorting);
 }
 
 static void append_literal(const char *cp, const char *ep, struct 
ref_formatting_state *state)
-- 
2.11.0

[PATCH v2 2/5] add QSORT_S

2017-01-22 Thread René Scharfe

Add the macro QSORT_S, a convenient wrapper for qsort_s() that infers
the size of the array elements and dies on error.

Basically all possible errors are programming mistakes (passing NULL as
base of a non-empty array, passing NULL as comparison function,
out-of-bounds accesses), so terminating the program should be acceptable
for most callers.

Signed-off-by: Rene Scharfe 
---
 git-compat-util.h | 5 +
 1 file changed, 5 insertions(+)

diff --git a/git-compat-util.h b/git-compat-util.h
index f706744e6a..f46d40e4a4 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -994,6 +994,11 @@ int git_qsort_s(void *base, size_t nmemb, size_t size,
 #define qsort_s git_qsort_s
 #endif
 
+#define QSORT_S(base, n, compar, ctx) do { \
+   if (qsort_s((base), (n), sizeof(*(base)), compar, ctx)) \
+   die("BUG: qsort_s() failed");   \
+} while (0)
+
 #ifndef REG_STARTEND
 #error "Git requires REG_STARTEND support. Compile with NO_REGEX=NeedsStartEnd"
 #endif
-- 
2.11.0

[DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1)

2017-01-22 Thread René Scharfe

Implement qsort_s() as a wrapper to the GNU version of qsort_r(1) and
use it on Linux.  Performance increases slightly:

Test HEAD^ HEAD

0071.2: sort(1)  0.10(0.20+0.02)   0.10(0.21+0.01) +0.0%
0071.3: string_list_sort()   0.17(0.15+0.01)   0.16(0.15+0.01) -5.9%

Additionally the unstripped size of compat/qsort_s.o falls from 24576
to 16544 bytes in my build.

IMHO these savings aren't worth the increased complexity of having to
support two implementations.
---
 Makefile |  6 ++
 compat/qsort_s.c | 18 ++
 config.mak.uname |  1 +
 3 files changed, 25 insertions(+)

diff --git a/Makefile b/Makefile
index 53ecc84e28..46db1c773f 100644
--- a/Makefile
+++ b/Makefile
@@ -282,6 +282,9 @@ all::
 # Define HAVE_ISO_QSORT_S if your platform provides a qsort_s() that's
 # compatible with the one described in C11 Annex K.
 #
+# Define HAVE_GNU_QSORT_R if your platform provides a qsort_r() that's
+# compatible with the one introduced with glibc 2.8.
+#
 # Define UNRELIABLE_FSTAT if your system's fstat does not return the same
 # information on a not yet closed file that lstat would return for the same
 # file after it was closed.
@@ -1426,6 +1429,9 @@ ifdef HAVE_ISO_QSORT_S
 else
COMPAT_OBJS += compat/qsort_s.o
 endif
+ifdef HAVE_GNU_QSORT_R
+   COMPAT_CFLAGS += -DHAVE_GNU_QSORT_R
+endif
 ifdef RUNTIME_PREFIX
COMPAT_CFLAGS += -DRUNTIME_PREFIX
 endif
diff --git a/compat/qsort_s.c b/compat/qsort_s.c
index 52d1f0a73d..763ee1faae 100644
--- a/compat/qsort_s.c
+++ b/compat/qsort_s.c
@@ -1,5 +1,21 @@
 #include "../git-compat-util.h"
 
+#if defined HAVE_GNU_QSORT_R
+
+int git_qsort_s(void *b, size_t n, size_t s,
+   int (*cmp)(const void *, const void *, void *), void *ctx)
+{
+   if (!n)
+   return 0;
+   if (!b || !cmp)
+   return -1;
+
+   qsort_r(b, n, s, cmp, ctx);
+   return 0;
+}
+
+#else
+
 /*
  * A merge sort implementation, simplified from the qsort implementation
  * by Mike Haertel, which is a part of the GNU C Library.
@@ -67,3 +83,5 @@ int git_qsort_s(void *b, size_t n, size_t s,
}
return 0;
 }
+
+#endif
diff --git a/config.mak.uname b/config.mak.uname
index 447f36ac2e..a1858f54ff 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -37,6 +37,7 @@ ifeq ($(uname_S),Linux)
NEEDS_LIBRT = YesPlease
HAVE_GETDELIM = YesPlease
SANE_TEXT_GREP=-a
+   HAVE_GNU_QSORT_R = YesPlease
 endif
 ifeq ($(uname_S),GNU/kFreeBSD)
HAVE_ALLOCA_H = YesPlease
-- 
2.11.0

Re: [DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1)

2017-01-24 Thread René Scharfe

Am 23.01.2017 um 20:07 schrieb Junio C Hamano:
> René Scharfe  writes:
> 
>> Implement qsort_s() as a wrapper to the GNU version of qsort_r(1) and
>> use it on Linux.  Performance increases slightly:
>>
>> Test HEAD^ HEAD
>> 
>> 0071.2: sort(1)  0.10(0.20+0.02)   0.10(0.21+0.01) +0.0%
>> 0071.3: string_list_sort()   0.17(0.15+0.01)   0.16(0.15+0.01) -5.9%
>>
>> Additionally the unstripped size of compat/qsort_s.o falls from 24576
>> to 16544 bytes in my build.
>>
>> IMHO these savings aren't worth the increased complexity of having to
>> support two implementations.
> 
> I do worry about having to support more implementations in the
> future that have different function signature for the comparison
> callbacks, which will make things ugly, but this addition alone
> doesn't look too bad to me.

It is unfair of me to show a 5% speedup and then recommend to not
include it. ;-)  That difference won't be measurable in real use cases
and the patch is not necessary.  This patch is simple, but the overall
complexity (incl. #ifdefs etc.) will be lower without it.

But here's another one, with even higher performance and with an even
bigger recommendation to not include it! :)  It veers off into another
direction: Parallel execution.  It requires thread-safe comparison
functions, which might surprise callers.  The value 1000 for the minimum
number of items before threading kicks in is just a guess, not based on
measurements.  So it's quite raw -- and I'm not sure why it's still a
bit slower than sort(1).

Test HEAD^ HEAD
-
0071.2: sort(1)  0.10(0.18+0.03)   0.10(0.20+0.02) +0.0%
0071.3: string_list_sort()   0.17(0.14+0.02)   0.11(0.18+0.02) -35.3%

---
 compat/qsort_s.c | 76 ++--
 1 file changed, 58 insertions(+), 18 deletions(-)

diff --git a/compat/qsort_s.c b/compat/qsort_s.c
index 52d1f0a73d..1304a089af 100644
--- a/compat/qsort_s.c
+++ b/compat/qsort_s.c
@@ -1,4 +1,5 @@
 #include "../git-compat-util.h"
+#include "../thread-utils.h"
 
 /*
  * A merge sort implementation, simplified from the qsort implementation
@@ -6,29 +7,58 @@
  * Added context pointer, safety checks and return value.
  */
 
-static void msort_with_tmp(void *b, size_t n, size_t s,
-  int (*cmp)(const void *, const void *, void *),
-  char *t, void *ctx)
+#define MIN_ITEMS_FOR_THREAD 1000
+
+struct work {
+   int nr_threads;
+   void *base;
+   size_t nmemb;
+   size_t size;
+   char *tmp;
+   int (*cmp)(const void *, const void *, void *);
+   void *ctx;
+};
+
+static void *msort_with_tmp(void *work)
 {
+   struct work one, two, *w = work;
char *tmp;
char *b1, *b2;
size_t n1, n2;
+   size_t s, n;
 
-   if (n <= 1)
-   return;
+   if (w->nmemb <= 1)
+   return NULL;
 
-   n1 = n / 2;
-   n2 = n - n1;
-   b1 = b;
-   b2 = (char *)b + (n1 * s);
+   one = two = *w;
+   one.nr_threads /= 2;
+   two.nr_threads -= one.nr_threads;
+   n = one.nmemb;
+   s = one.size;
+   n1 = one.nmemb = n / 2;
+   n2 = two.nmemb = n - n1;
+   b1 = one.base;
+   b2 = two.base = b1 + n1 * s;
+   two.tmp += n1 * s;
 
-   msort_with_tmp(b1, n1, s, cmp, t, ctx);
-   msort_with_tmp(b2, n2, s, cmp, t, ctx);
+#ifndef NO_PTHREADS
+   if (one.nr_threads && n > MIN_ITEMS_FOR_THREAD) {
+   pthread_t thread;
+   int err = pthread_create(&thread, NULL, msort_with_tmp, &one);
+   msort_with_tmp(&two);
+   if (err || pthread_join(thread, NULL))
+   msort_with_tmp(&one);
+   } else
+#endif
+   {
+   msort_with_tmp(&one);
+   msort_with_tmp(&two);
+   }
 
-   tmp = t;
+   tmp = one.tmp;
 
while (n1 > 0 && n2 > 0) {
-   if (cmp(b1, b2, ctx) <= 0) {
+   if (one.cmp(b1, b2, one.ctx) <= 0) {
memcpy(tmp, b1, s);
tmp += s;
b1 += s;
@@ -42,7 +72,8 @@ static void msort_with_tmp(void *b, size_t n, size_t s,
}
if (n1 > 0)
memcpy(tmp, b1, n1 * s);
-   memcpy(b, t, (n - n2) * s);
+   memcpy(one.base, one.tmp, (n - n2) * s);
+   return NULL;
 }
 
 int git_qsort_s(void *b, size_t n, size_t s,
@@ -50,20 +81,29 @@ int git_qsort_s(void *b, size_t n, size_t s,
 {
const size_t size = st_mult(n, s);
char buf[1024];
+   struct wo

Re: [PATCH v2 0/5] string-list: make string_list_sort() reentrant

2017-01-24 Thread René Scharfe


Am 24.01.2017 um 00:54 schrieb Jeff King:

The speed looks like a reasonable outcome. I'm torn on the qsort_r()
demo patch. I don't think it looks too bad. OTOH, I don't think I would
want to deal with the opposite-argument-order versions.


The code itself may look OK, but it's not really necessary and the 
special implementation for Linux makes increases maintenance costs.  Can 
we save it for later and first give the common implemention a chance to 
prove itself?



Is there any interest in people adding the ISO qsort_s() to their libc
implementations? It seems like it's been a fair number of years by now.


https://sourceware.org/ml/libc-alpha/2014-12/msg00513.html is the last 
post mentioning qsort_s on the glibc mailing list, but it didn't even 
make it into https://sourceware.org/glibc/wiki/Development_Todo/Master.
Not sure what's planned in BSD land, didn't find anything (but didn't 
look too hard).


René

Re: [DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1)

2017-01-25 Thread René Scharfe


Am 24.01.2017 um 21:39 schrieb Jeff King:

On Tue, Jan 24, 2017 at 07:00:03PM +0100, René Scharfe wrote:


I do worry about having to support more implementations in the
future that have different function signature for the comparison
callbacks, which will make things ugly, but this addition alone
doesn't look too bad to me.


It is unfair of me to show a 5% speedup and then recommend to not
include it. ;-)  That difference won't be measurable in real use cases
and the patch is not necessary.  This patch is simple, but the overall
complexity (incl. #ifdefs etc.) will be lower without it.


I care less about squeezing out the last few percent performance and
more that somebody libc qsort_r() might offer some other improvement.
For instance, it could sort in-place to lower memory use for some cases,
or do some clever thing that gives more than a few percent in the real
world (something like TimSort).

I don't know to what degree we should care about that.


That's a good question.  Which workloads spend a significant amount of 
time in qsort/qsort_s?


We could track processor time spent and memory allocated in QSORT_S and 
the whole program and show a warning at the end if one of the two 
exceeded, say, 5% of the total, asking nicely to send it to our mailing 
list.  Would something like this be useful for other functions or 
metrics as well?  Would it be too impolite to use users as telemetry 
transports?


If we find such cases then we'd better fix them for all platforms, e.g. 
by importing timsort, no?


René

PATCH 1/2] abspath: add absolute_pathdup()

2017-01-26 Thread René Scharfe

Add a function that returns a buffer containing the absolute path of its
argument and a semantic patch for its intended use.  It avoids an extra
string copy to a static buffer.

Signed-off-by: Rene Scharfe 
---
 abspath.c| 7 +++
 cache.h  | 1 +
 contrib/coccinelle/xstrdup_or_null.cocci | 6 ++
 3 files changed, 14 insertions(+)

diff --git a/abspath.c b/abspath.c
index fce40fddcc..2f0c26e0e2 100644
--- a/abspath.c
+++ b/abspath.c
@@ -239,6 +239,13 @@ const char *absolute_path(const char *path)
return sb.buf;
 }
 
+char *absolute_pathdup(const char *path)
+{
+   struct strbuf sb = STRBUF_INIT;
+   strbuf_add_absolute_path(&sb, path);
+   return strbuf_detach(&sb, NULL);
+}
+
 /*
  * Unlike prefix_path, this should be used if the named file does
  * not have to interact with index entry; i.e. name of a random file
diff --git a/cache.h b/cache.h
index 00a029af36..d7b7a8cd7a 100644
--- a/cache.h
+++ b/cache.h
@@ -1069,6 +1069,7 @@ const char *real_path(const char *path);
 const char *real_path_if_valid(const char *path);
 char *real_pathdup(const char *path);
 const char *absolute_path(const char *path);
+char *absolute_pathdup(const char *path);
 const char *remove_leading_path(const char *in, const char *prefix);
 const char *relative_path(const char *in, const char *prefix, struct strbuf 
*sb);
 int normalize_path_copy_len(char *dst, const char *src, int *prefix_len);
diff --git a/contrib/coccinelle/xstrdup_or_null.cocci 
b/contrib/coccinelle/xstrdup_or_null.cocci
index 3fceef132b..8e05d1ca4b 100644
--- a/contrib/coccinelle/xstrdup_or_null.cocci
+++ b/contrib/coccinelle/xstrdup_or_null.cocci
@@ -5,3 +5,9 @@ expression V;
 - if (E)
 -V = xstrdup(E);
 + V = xstrdup_or_null(E);
+
+@@
+expression E;
+@@
+- xstrdup(absolute_path(E))
++ absolute_pathdup(E)
-- 
2.11.0

[PATCH 2/2] use absolute_pathdup()

2017-01-26 Thread René Scharfe

Apply the symantic patch for converting callers that duplicate the
result of absolute_path() to call absolute_pathdup() instead, which
avoids an extra string copy to a static buffer.

Signed-off-by: Rene Scharfe 
---
 builtin/clone.c | 4 ++--
 builtin/submodule--helper.c | 2 +-
 worktree.c  | 2 +-
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/builtin/clone.c b/builtin/clone.c
index 5ef81927a6..3f63edbbf9 100644
--- a/builtin/clone.c
+++ b/builtin/clone.c
@@ -170,7 +170,7 @@ static char *get_repo_path(const char *repo, int *is_bundle)
 
strbuf_addstr(&path, repo);
raw = get_repo_path_1(&path, is_bundle);
-   canon = raw ? xstrdup(absolute_path(raw)) : NULL;
+   canon = raw ? absolute_pathdup(raw) : NULL;
strbuf_release(&path);
return canon;
 }
@@ -894,7 +894,7 @@ int cmd_clone(int argc, const char **argv, const char 
*prefix)
 
path = get_repo_path(repo_name, &is_bundle);
if (path)
-   repo = xstrdup(absolute_path(repo_name));
+   repo = absolute_pathdup(repo_name);
else if (!strchr(repo_name, ':'))
die(_("repository '%s' does not exist"), repo_name);
else
diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c
index 74614a951e..899dc334e3 100644
--- a/builtin/submodule--helper.c
+++ b/builtin/submodule--helper.c
@@ -626,7 +626,7 @@ static int module_clone(int argc, const char **argv, const 
char *prefix)
   module_clone_options);
 
strbuf_addf(&sb, "%s/modules/%s", get_git_dir(), name);
-   sm_gitdir = xstrdup(absolute_path(sb.buf));
+   sm_gitdir = absolute_pathdup(sb.buf);
strbuf_reset(&sb);
 
if (!is_absolute_path(path)) {
diff --git a/worktree.c b/worktree.c
index 53b4771c04..d633761575 100644
--- a/worktree.c
+++ b/worktree.c
@@ -145,7 +145,7 @@ static struct worktree *get_linked_worktree(const char *id)
 
 static void mark_current_worktree(struct worktree **worktrees)
 {
-   char *git_dir = xstrdup(absolute_path(get_git_dir()));
+   char *git_dir = absolute_pathdup(get_git_dir());
int i;
 
for (i = 0; worktrees[i]; i++) {
-- 
2.11.0

Re: [PATCH 2/2] use absolute_pathdup()

2017-01-27 Thread René Scharfe


Hi Dscho,

Am 27.01.2017 um 11:21 schrieb Johannes Schindelin:

On Thu, 26 Jan 2017, René Scharfe wrote:

Apply the symantic patch for converting callers that duplicate the


s/symantic/semantic/


thank you!  I wonder where this came from.  And where my spellchecker 
went without as much as a farewell.  Reinstalled it now..


René

[PATCH 0/5] introduce SWAP macro

2017-01-28 Thread René Scharfe

Exchanging the value of two variables requires declaring a temporary
variable and repeating their names.  The swap macro in apply.c
simplifies this for its callers without changing the compiled binary.
Polish this macro and export it, then use it throughout the code to
reduce repetition and hide the declaration of temporary variables.

  add SWAP macro
  apply: use SWAP macro
  use SWAP macro
  diff: use SWAP macro
  graph: use SWAP macro

 apply.c   | 23 +++
 builtin/diff-tree.c   |  4 +---
 builtin/diff.c|  9 +++--
 contrib/coccinelle/swap.cocci | 28 
 diff-no-index.c   |  6 ++
 diff.c| 12 
 git-compat-util.h | 10 ++
 graph.c   | 11 ++-
 line-range.c  |  3 +--
 merge-recursive.c |  5 +
 object.c  |  4 +---
 pack-revindex.c   |  5 +
 prio-queue.c  |  4 +---
 strbuf.h  |  4 +---
 14 files changed, 63 insertions(+), 65 deletions(-)
 create mode 100644 contrib/coccinelle/swap.cocci

-- 
2.11.0

[PATCH 1/5] add SWAP macro

2017-01-28 Thread René Scharfe

Add a macro for exchanging the values of variables.  It allows users
to avoid repetition and takes care of the temporary variable for them.
It also makes sure that the storage sizes of its two parameters are the
same.  Its memcpy(1) calls are optimized away by current compilers.

Also add a conservative semantic patch for transforming only swaps of
variables of the same type.

Signed-off-by: Rene Scharfe 
---
 contrib/coccinelle/swap.cocci | 28 
 git-compat-util.h | 10 ++
 2 files changed, 38 insertions(+)
 create mode 100644 contrib/coccinelle/swap.cocci

diff --git a/contrib/coccinelle/swap.cocci b/contrib/coccinelle/swap.cocci
new file mode 100644
index 00..a0934d1fda
--- /dev/null
+++ b/contrib/coccinelle/swap.cocci
@@ -0,0 +1,28 @@
+@ swap_with_declaration @
+type T;
+identifier tmp;
+T a, b;
+@@
+- T tmp = a;
++ T tmp;
++ tmp = a;
+  a = b;
+  b = tmp;
+
+@ swap @
+type T;
+T tmp, a, b;
+@@
+- tmp = a;
+- a = b;
+- b = tmp;
++ SWAP(a, b);
+
+@ extends swap @
+identifier unused;
+@@
+  {
+  ...
+- T unused;
+  ... when != unused
+  }
diff --git a/git-compat-util.h b/git-compat-util.h
index 87237b092b..66cd466eea 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -527,6 +527,16 @@ static inline int ends_with(const char *str, const char 
*suffix)
return strip_suffix(str, suffix, &len);
 }
 
+#define SWAP(a, b) do {\
+   void *_swap_a_ptr = &(a);   \
+   void *_swap_b_ptr = &(b);   \
+   unsigned char _swap_buffer[sizeof(a)];  \
+   memcpy(_swap_buffer, _swap_a_ptr, sizeof(a));   \
+   memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a) +\
+  BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b)));   \
+   memcpy(_swap_b_ptr, _swap_buffer, sizeof(a));   \
+} while (0)
+
 #if defined(NO_MMAP) || defined(USE_WIN32_MMAP)
 
 #ifndef PROT_READ
-- 
2.11.0

[PATCH 2/5] apply: use SWAP macro

2017-01-28 Thread René Scharfe

Use the exported macro SWAP instead of the file-scoped macro swap and
remove the latter's definition.

Signed-off-by: Rene Scharfe 
---
 apply.c | 23 +++
 1 file changed, 7 insertions(+), 16 deletions(-)

diff --git a/apply.c b/apply.c
index 2ed808d429..0e2caeab9c 100644
--- a/apply.c
+++ b/apply.c
@@ -2187,29 +2187,20 @@ static int parse_chunk(struct apply_state *state, char 
*buffer, unsigned long si
return offset + hdrsize + patchsize;
 }
 
-#define swap(a,b) myswap((a),(b),sizeof(a))
-
-#define myswap(a, b, size) do {\
-   unsigned char mytmp[size];  \
-   memcpy(mytmp, &a, size);\
-   memcpy(&a, &b, size);   \
-   memcpy(&b, mytmp, size);\
-} while (0)
-
 static void reverse_patches(struct patch *p)
 {
for (; p; p = p->next) {
struct fragment *frag = p->fragments;
 
-   swap(p->new_name, p->old_name);
-   swap(p->new_mode, p->old_mode);
-   swap(p->is_new, p->is_delete);
-   swap(p->lines_added, p->lines_deleted);
-   swap(p->old_sha1_prefix, p->new_sha1_prefix);
+   SWAP(p->new_name, p->old_name);
+   SWAP(p->new_mode, p->old_mode);
+   SWAP(p->is_new, p->is_delete);
+   SWAP(p->lines_added, p->lines_deleted);
+   SWAP(p->old_sha1_prefix, p->new_sha1_prefix);
 
for (; frag; frag = frag->next) {
-   swap(frag->newpos, frag->oldpos);
-   swap(frag->newlines, frag->oldlines);
+   SWAP(frag->newpos, frag->oldpos);
+   SWAP(frag->newlines, frag->oldlines);
}
}
 }
-- 
2.11.0

[PATCH 4/5] diff: use SWAP macro

2017-01-28 Thread René Scharfe

Use the macro SWAP to exchange the value of pairs of variables instead
of swapping them manually with the help of a temporary variable.  The
resulting code is shorter and easier to read.

The two cases were not transformed by the semantic patch swap.cocci
because it's extra careful and handles only cases where the types of all
variables are the same -- and here we swap two ints and use an unsigned
temporary variable for that.  Nevertheless the conversion is safe, as
the value range is preserved with and without the patch.

Signed-off-by: Rene Scharfe 
---
 diff-no-index.c | 3 +--
 diff.c  | 4 +---
 2 files changed, 2 insertions(+), 5 deletions(-)

diff --git a/diff-no-index.c b/diff-no-index.c
index 1ae09894d7..df762fd0f7 100644
--- a/diff-no-index.c
+++ b/diff-no-index.c
@@ -185,8 +185,7 @@ static int queue_diff(struct diff_options *o,
struct diff_filespec *d1, *d2;
 
if (DIFF_OPT_TST(o, REVERSE_DIFF)) {
-   unsigned tmp;
-   tmp = mode1; mode1 = mode2; mode2 = tmp;
+   SWAP(mode1, mode2);
SWAP(name1, name2);
}
 
diff --git a/diff.c b/diff.c
index 9de1ba264f..6c4f3f6b72 100644
--- a/diff.c
+++ b/diff.c
@@ -5117,11 +5117,9 @@ void diff_change(struct diff_options *options,
return;
 
if (DIFF_OPT_TST(options, REVERSE_DIFF)) {
-   unsigned tmp;
SWAP(old_mode, new_mode);
SWAP(old_sha1, new_sha1);
-   tmp = old_sha1_valid; old_sha1_valid = new_sha1_valid;
-   new_sha1_valid = tmp;
+   SWAP(old_sha1_valid, new_sha1_valid);
SWAP(old_dirty_submodule, new_dirty_submodule);
}
 
-- 
2.11.0

[PATCH 3/5] use SWAP macro

2017-01-28 Thread René Scharfe

Apply the semantic patch swap.cocci to convert hand-rolled swaps to use
the macro SWAP.  The resulting code is shorter and easier to read, the
object code is effectively unchanged.

The patch for object.c had to be hand-edited in order to preserve the
comment before the change; Coccinelle tried to eat it for some reason.

Signed-off-by: Rene Scharfe 
---
 builtin/diff-tree.c | 4 +---
 builtin/diff.c  | 9 +++--
 diff-no-index.c | 3 +--
 diff.c  | 8 +++-
 graph.c | 5 +
 line-range.c| 3 +--
 merge-recursive.c   | 5 +
 object.c| 4 +---
 pack-revindex.c | 5 +
 prio-queue.c| 4 +---
 strbuf.h| 4 +---
 11 files changed, 15 insertions(+), 39 deletions(-)

diff --git a/builtin/diff-tree.c b/builtin/diff-tree.c
index 806dd7a885..8ce00480cd 100644
--- a/builtin/diff-tree.c
+++ b/builtin/diff-tree.c
@@ -147,9 +147,7 @@ int cmd_diff_tree(int argc, const char **argv, const char 
*prefix)
tree1 = opt->pending.objects[0].item;
tree2 = opt->pending.objects[1].item;
if (tree2->flags & UNINTERESTING) {
-   struct object *tmp = tree2;
-   tree2 = tree1;
-   tree1 = tmp;
+   SWAP(tree2, tree1);
}
diff_tree_sha1(tree1->oid.hash,
   tree2->oid.hash,
diff --git a/builtin/diff.c b/builtin/diff.c
index 7f91f6d226..3d64b85337 100644
--- a/builtin/diff.c
+++ b/builtin/diff.c
@@ -45,12 +45,9 @@ static void stuff_change(struct diff_options *opt,
return;
 
if (DIFF_OPT_TST(opt, REVERSE_DIFF)) {
-   unsigned tmp;
-   const unsigned char *tmp_u;
-   const char *tmp_c;
-   tmp = old_mode; old_mode = new_mode; new_mode = tmp;
-   tmp_u = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_u;
-   tmp_c = old_name; old_name = new_name; new_name = tmp_c;
+   SWAP(old_mode, new_mode);
+   SWAP(old_sha1, new_sha1);
+   SWAP(old_name, new_name);
}
 
if (opt->prefix &&
diff --git a/diff-no-index.c b/diff-no-index.c
index f420786039..1ae09894d7 100644
--- a/diff-no-index.c
+++ b/diff-no-index.c
@@ -186,9 +186,8 @@ static int queue_diff(struct diff_options *o,
 
if (DIFF_OPT_TST(o, REVERSE_DIFF)) {
unsigned tmp;
-   const char *tmp_c;
tmp = mode1; mode1 = mode2; mode2 = tmp;
-   tmp_c = name1; name1 = name2; name2 = tmp_c;
+   SWAP(name1, name2);
}
 
d1 = noindex_filespec(name1, mode1);
diff --git a/diff.c b/diff.c
index f08cd8e033..9de1ba264f 100644
--- a/diff.c
+++ b/diff.c
@@ -5118,13 +5118,11 @@ void diff_change(struct diff_options *options,
 
if (DIFF_OPT_TST(options, REVERSE_DIFF)) {
unsigned tmp;
-   const unsigned char *tmp_c;
-   tmp = old_mode; old_mode = new_mode; new_mode = tmp;
-   tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c;
+   SWAP(old_mode, new_mode);
+   SWAP(old_sha1, new_sha1);
tmp = old_sha1_valid; old_sha1_valid = new_sha1_valid;
new_sha1_valid = tmp;
-   tmp = old_dirty_submodule; old_dirty_submodule = 
new_dirty_submodule;
-   new_dirty_submodule = tmp;
+   SWAP(old_dirty_submodule, new_dirty_submodule);
}
 
if (options->prefix &&
diff --git a/graph.c b/graph.c
index d4e8519c90..4c722303d2 100644
--- a/graph.c
+++ b/graph.c
@@ -997,7 +997,6 @@ static void graph_output_post_merge_line(struct git_graph 
*graph, struct strbuf
 static void graph_output_collapsing_line(struct git_graph *graph, struct 
strbuf *sb)
 {
int i;
-   int *tmp_mapping;
short used_horizontal = 0;
int horizontal_edge = -1;
int horizontal_edge_target = -1;
@@ -1132,9 +1131,7 @@ static void graph_output_collapsing_line(struct git_graph 
*graph, struct strbuf
/*
 * Swap mapping and new_mapping
 */
-   tmp_mapping = graph->mapping;
-   graph->mapping = graph->new_mapping;
-   graph->new_mapping = tmp_mapping;
+   SWAP(graph->mapping, graph->new_mapping);
 
/*
 * If graph->mapping indicates that all of the branch lines
diff --git a/line-range.c b/line-range.c
index de4e32f942..323399d16c 100644
--- a/line-range.c
+++ b/line-range.c
@@ -269,8 +269,7 @@ int parse_range_arg(const char *arg, nth_line_fn_t 
nth_line_cb,
return -1;
 
if (*begin && *end && *end < *begin) {
-   long tmp;
-   tmp = *end; *end = *begin; *begin = tmp;
+   SWAP(*end, *begin);
}
 
return 0;
diff --git a/merge-recursive.c b/merge-recursive.c
index d327209443..b7ff1a

[PATCH 5/5] graph: use SWAP macro

2017-01-28 Thread René Scharfe

Exchange the values of graph->columns and graph->new_columns using the
macro SWAP instead of hand-rolled code.  The result is shorter and
easier to read.

This transformation was not done by the semantic patch swap.cocci
because there's an unrelated statement between the second and the last
step of the exchange, so it didn't match the expected pattern.

Signed-off-by: Rene Scharfe 
---
 graph.c | 6 +-
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/graph.c b/graph.c
index 4c722303d2..29b0f51dc5 100644
--- a/graph.c
+++ b/graph.c
@@ -463,7 +463,6 @@ static void graph_update_width(struct git_graph *graph,
 static void graph_update_columns(struct git_graph *graph)
 {
struct commit_list *parent;
-   struct column *tmp_columns;
int max_new_columns;
int mapping_idx;
int i, seen_this, is_commit_in_columns;
@@ -476,11 +475,8 @@ static void graph_update_columns(struct git_graph *graph)
 * We'll re-use the old columns array as storage to compute the new
 * columns list for the commit after this one.
 */
-   tmp_columns = graph->columns;
-   graph->columns = graph->new_columns;
+   SWAP(graph->columns, graph->new_columns);
graph->num_columns = graph->num_new_columns;
-
-   graph->new_columns = tmp_columns;
graph->num_new_columns = 0;
 
/*
-- 
2.11.0

[PATCH] use oidcpy() for copying hashes between instances of struct object_id

2017-01-28 Thread René Scharfe

Patch generated by Coccinelle and contrib/coccinelle/object_id.cocci.

Signed-off-by: Rene Scharfe 
---
 refs/files-backend.c | 2 +-
 wt-status.c  | 4 ++--
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/refs/files-backend.c b/refs/files-backend.c
index f9023939d5..8ee2aba39f 100644
--- a/refs/files-backend.c
+++ b/refs/files-backend.c
@@ -697,7 +697,7 @@ static int cache_ref_iterator_peel(struct ref_iterator 
*ref_iterator,
 
if (peel_entry(entry, 0))
return -1;
-   hashcpy(peeled->hash, entry->u.value.peeled.hash);
+   oidcpy(peeled, &entry->u.value.peeled);
return 0;
 }
 
diff --git a/wt-status.c b/wt-status.c
index a715e71906..a05328dc48 100644
--- a/wt-status.c
+++ b/wt-status.c
@@ -628,7 +628,7 @@ static void wt_status_collect_changes_initial(struct 
wt_status *s)
d->index_status = DIFF_STATUS_ADDED;
/* Leave {mode,oid}_head zero for adds. */
d->mode_index = ce->ce_mode;
-   hashcpy(d->oid_index.hash, ce->oid.hash);
+   oidcpy(&d->oid_index, &ce->oid);
}
}
 }
@@ -2096,7 +2096,7 @@ static void wt_porcelain_v2_print_unmerged_entry(
if (strcmp(ce->name, it->string) || !stage)
break;
stages[stage - 1].mode = ce->ce_mode;
-   hashcpy(stages[stage - 1].oid.hash, ce->oid.hash);
+   oidcpy(&stages[stage - 1].oid, &ce->oid);
sum |= (1 << (stage - 1));
}
if (sum != d->stagemask)
-- 
2.11.0

[PATCH] checkout: convert post_checkout_hook() to struct object_id

2017-01-28 Thread René Scharfe

Signed-off-by: Rene Scharfe 
---
 builtin/checkout.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/builtin/checkout.c b/builtin/checkout.c
index bfe685c198..80d5e38981 100644
--- a/builtin/checkout.c
+++ b/builtin/checkout.c
@@ -56,8 +56,8 @@ static int post_checkout_hook(struct commit *old, struct 
commit *new,
  int changed)
 {
return run_hook_le(NULL, "post-checkout",
-  sha1_to_hex(old ? old->object.oid.hash : null_sha1),
-  sha1_to_hex(new ? new->object.oid.hash : null_sha1),
+  oid_to_hex(old ? &old->object.oid : &null_oid),
+  oid_to_hex(new ? &new->object.oid : &null_oid),
   changed ? "1" : "0", NULL);
/* "new" can be NULL when checking out from the index before
   a commit exists. */
-- 
2.11.0

[PATCH] use oid_to_hex_r() for converting struct object_id hashes to hex strings

2017-01-28 Thread René Scharfe

Patch generated by Coccinelle and contrib/coccinelle/object_id.cocci.

Signed-off-by: Rene Scharfe 
---
 builtin/blame.c   | 4 ++--
 builtin/merge-index.c | 2 +-
 builtin/rev-list.c| 2 +-
 diff.c| 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/builtin/blame.c b/builtin/blame.c
index 126b8c9e5b..cffc626540 100644
--- a/builtin/blame.c
+++ b/builtin/blame.c
@@ -1901,7 +1901,7 @@ static void emit_porcelain(struct scoreboard *sb, struct 
blame_entry *ent,
struct origin *suspect = ent->suspect;
char hex[GIT_SHA1_HEXSZ + 1];
 
-   sha1_to_hex_r(hex, suspect->commit->object.oid.hash);
+   oid_to_hex_r(hex, &suspect->commit->object.oid);
printf("%s %d %d %d\n",
   hex,
   ent->s_lno + 1,
@@ -1941,7 +1941,7 @@ static void emit_other(struct scoreboard *sb, struct 
blame_entry *ent, int opt)
int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP);
 
get_commit_info(suspect->commit, &ci, 1);
-   sha1_to_hex_r(hex, suspect->commit->object.oid.hash);
+   oid_to_hex_r(hex, &suspect->commit->object.oid);
 
cp = nth_line(sb, ent->lno);
for (cnt = 0; cnt < ent->num_lines; cnt++) {
diff --git a/builtin/merge-index.c b/builtin/merge-index.c
index ce356b1da1..2d1b6db6bd 100644
--- a/builtin/merge-index.c
+++ b/builtin/merge-index.c
@@ -22,7 +22,7 @@ static int merge_entry(int pos, const char *path)
if (strcmp(ce->name, path))
break;
found++;
-   sha1_to_hex_r(hexbuf[stage], ce->oid.hash);
+   oid_to_hex_r(hexbuf[stage], &ce->oid);
xsnprintf(ownbuf[stage], sizeof(ownbuf[stage]), "%o", 
ce->ce_mode);
arguments[stage] = hexbuf[stage];
arguments[stage + 4] = ownbuf[stage];
diff --git a/builtin/rev-list.c b/builtin/rev-list.c
index c43decda70..0aa93d5891 100644
--- a/builtin/rev-list.c
+++ b/builtin/rev-list.c
@@ -237,7 +237,7 @@ static int show_bisect_vars(struct rev_list_info *info, int 
reaches, int all)
cnt = reaches;
 
if (revs->commits)
-   sha1_to_hex_r(hex, revs->commits->item->object.oid.hash);
+   oid_to_hex_r(hex, &revs->commits->item->object.oid);
 
if (flags & BISECT_SHOW_ALL) {
traverse_commit_list(revs, show_commit, show_object, info);
diff --git a/diff.c b/diff.c
index f08cd8e033..d91a344908 100644
--- a/diff.c
+++ b/diff.c
@@ -3015,7 +3015,7 @@ static struct diff_tempfile *prepare_temp_file(const char 
*name,
if (!one->oid_valid)
sha1_to_hex_r(temp->hex, null_sha1);
else
-   sha1_to_hex_r(temp->hex, one->oid.hash);
+   oid_to_hex_r(temp->hex, &one->oid);
/* Even though we may sometimes borrow the
 * contents from the work tree, we always want
 * one->mode.  mode is trustworthy even when
-- 
2.11.0

[PATCH] receive-pack: call string_list_clear() unconditionally

2017-01-29 Thread René Scharfe

string_list_clear() handles empty lists just fine, so remove the
redundant check.

Signed-off-by: Rene Scharfe 
---
 builtin/receive-pack.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 6b97cbdbe..1dbb8a069 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -1942,8 +1942,7 @@ int cmd_receive_pack(int argc, const char **argv, const 
char *prefix)
run_receive_hook(commands, "post-receive", 1,
 &push_options);
run_update_post_hook(commands);
-   if (push_options.nr)
-   string_list_clear(&push_options, 0);
+   string_list_clear(&push_options, 0);
if (auto_gc) {
const char *argv_gc_auto[] = {
"gc", "--auto", "--quiet", NULL,
-- 
2.11.0

Re: [PATCH 1/5] add SWAP macro

2017-01-30 Thread René Scharfe


Am 30.01.2017 um 16:39 schrieb Johannes Schindelin:

Hi René,

On Sat, 28 Jan 2017, René Scharfe wrote:


Add a macro for exchanging the values of variables.  It allows users to
avoid repetition and takes care of the temporary variable for them.  It
also makes sure that the storage sizes of its two parameters are the
same.  Its memcpy(1) calls are optimized away by current compilers.


How certain are we that "current compilers" optimize that away? And about
which "current compilers" are we talking? GCC? GCC 6? Clang? Clang 3?
Clang 3.8.*? Visual C 2005+?


GCC 4.4.7 and clang 3.2 on x86-64 compile the macro to the same object 
code as a manual swap ; see https://godbolt.org/g/F4b9M9.  Both were 
released 2012.  That website doesn't offer Visual C(++), but since you 
added the original macro in e5a94313c0 ("Teach git-apply about '-R'", 
2006) I guess we have that part covered. ;)


René

Re: [PATCH 1/5] add SWAP macro

2017-01-30 Thread René Scharfe


Am 30.01.2017 um 17:01 schrieb Johannes Schindelin:

Hi René,

On Sat, 28 Jan 2017, René Scharfe wrote:


diff --git a/git-compat-util.h b/git-compat-util.h
index 87237b092b..66cd466eea 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -527,6 +527,16 @@ static inline int ends_with(const char *str, const char 
*suffix)
return strip_suffix(str, suffix, &len);
 }

+#define SWAP(a, b) do {\
+   void *_swap_a_ptr = &(a);   \
+   void *_swap_b_ptr = &(b);   \
+   unsigned char _swap_buffer[sizeof(a)];  \
+   memcpy(_swap_buffer, _swap_a_ptr, sizeof(a));   \
+   memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a) +\
+  BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b)));   \
+   memcpy(_swap_b_ptr, _swap_buffer, sizeof(a));   \
+} while (0)
+
 #if defined(NO_MMAP) || defined(USE_WIN32_MMAP)


It may seem as a matter of taste, or maybe not: I prefer this without the
_swap_a_ptr (and I would also prefer not to use identifiers starting with
an underscore, as section 7.1.3 Reserved Identifiers of the C99 standard
says they are reserved):

+#define SWAP(a, b) do {\
+   unsigned char swap_buffer_[sizeof(a)];  \
+   memcpy(swap_buffer_, &(a), sizeof(a));  \
+   memcpy(&(a), &(b), sizeof(a) +  \
+  BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b)));   \
+   memcpy(&(b), swap_buffer_, sizeof(a));  \
+} while (0)


We can move the underscore to the end, but using a and b directly will 
give surprising results if the parameters have side effects.  E.g. if 
you want to swap the first two elements of two arrays you might want to 
do this:


SWAP(*x++, *y++);
SWAP(*x++, *y++);

And that would increment twice as much as one would guess and access 
unexpected elements.



One idea to address the concern that not all C compilers people use to
build Git may optimize away those memcpy()s: we could also introduce a
SWAP_PRIMITIVE_TYPE (or SWAP2 or SIMPLE_SWAP or whatever) that accepts
only primitive types. But since __typeof__() is not portable...


I wouldn't worry too much about such a solution before seeing that SWAP 
(even with memcpy(3) -- this function is probably optimized quite 
heavily on most platforms) causes an actual performance problem.


René

Re: [PATCH 4/5] diff: use SWAP macro

2017-01-30 Thread René Scharfe


Am 30.01.2017 um 17:04 schrieb Johannes Schindelin:

Hi René,

On Sat, 28 Jan 2017, René Scharfe wrote:


Use the macro SWAP to exchange the value of pairs of variables instead
of swapping them manually with the help of a temporary variable.  The
resulting code is shorter and easier to read.

The two cases were not transformed by the semantic patch swap.cocci
because it's extra careful and handles only cases where the types of all
variables are the same -- and here we swap two ints and use an unsigned
temporary variable for that.  Nevertheless the conversion is safe, as
the value range is preserved with and without the patch.


One way to make this more obvious would be to change the type to signed
first, and then transform (which then would catch these cases too,
right?).


I'm not sure it would be more obvious, but it would certainly make the 
type change more explicit.  In diff-index.c we might even want to change 
the type of the swapped values from int to unsigned, which is more 
fitting for file modes.  In diff.c we'd need to add a separate variable, 
as tmp is shared with other (unsigned) swaps.


René

Re: [PATCH 3/5] use SWAP macro

2017-01-30 Thread René Scharfe


Am 30.01.2017 um 17:03 schrieb Johannes Schindelin:

Hi René,

On Sat, 28 Jan 2017, René Scharfe wrote:


diff --git a/builtin/diff-tree.c b/builtin/diff-tree.c
index 806dd7a885..8ce00480cd 100644
--- a/builtin/diff-tree.c
+++ b/builtin/diff-tree.c
@@ -147,9 +147,7 @@ int cmd_diff_tree(int argc, const char **argv, const char 
*prefix)
tree1 = opt->pending.objects[0].item;
tree2 = opt->pending.objects[1].item;
if (tree2->flags & UNINTERESTING) {
-   struct object *tmp = tree2;
-   tree2 = tree1;
-   tree1 = tmp;
+   SWAP(tree2, tree1);
}


Is there a way to transform away the curly braces for blocks that become
single-line blocks, too?


Interesting question.  I guess this can be done by using a Python script 
(see contrib/coccinelle/strbuf.cocci for an example).  I'll leave this 
as homework for readers interested in Coccinelle, at least for a while. :)


René

Re: [PATCH 5/5] graph: use SWAP macro

2017-01-30 Thread René Scharfe


Am 30.01.2017 um 17:16 schrieb Johannes Schindelin:

Hi René,

On Sat, 28 Jan 2017, René Scharfe wrote:


Exchange the values of graph->columns and graph->new_columns using the
macro SWAP instead of hand-rolled code.  The result is shorter and
easier to read.

This transformation was not done by the semantic patch swap.cocci
because there's an unrelated statement between the second and the last
step of the exchange, so it didn't match the expected pattern.


Is it really true that Coccinelle cannot be told to look for a code block
that declares a variable that is then used *only* in the lines we want to
match and replace?


Hope I parsed your question correctly; my answer would be that it can't 
be true because that's basically what the proposed semantic patch does:


@ swap @
type T;
T tmp, a, b;
@@
- tmp = a;
- a = b;
- b = tmp;
+ SWAP(a, b);

@ extends swap @
identifier unused;
@@
  {
  ...
- T unused;
  ... when != unused
  }

The first part (up to the "+") looks for a opportunities to use SWAP, 
and the second part looks for blocks where that transformation was done 
and we declare identifiers that are/became unused.


It did not match the code in graph.c because the pattern was basically:

tmp = a;
a = b;
something = totally_different;
b = tmp;

Coccinelle can be told to ignore such unrelated code by adding "... when 
!= tmp" etc. (which matches context lines that don't reference tmp), but 
that's slooow.  (Perhaps I just did it wrong, though.)


René

Re: [PATCH 1/5] add SWAP macro

2017-01-30 Thread René Scharfe


Am 30.01.2017 um 21:48 schrieb Johannes Schindelin:

So I tried to verify that Visual C optimizes this well, and oh my deity,
this was not easy. In Debug mode, it does not optimize, i.e. the memcpy()
will be called, even for simple 32-bit integers. In Release mode, Visual
Studio's defaults turn on "whole-program optimization" which means that
the only swapping that is going on in the end is that the meaning of two
registers is swapped, i.e. the SWAP() macro is expanded to... no assembler
code at all.

After trying this and that and something else, I finally ended up with the
file-scope optimized SWAP() resulting in this assembly code:

7FF791311000  mov eax,dword ptr [rcx]
7FF791311002  mov r8d,dword ptr [rdx]
7FF791311005  mov dword ptr [rcx],r8d
7FF791311008  mov dword ptr [rdx],eax


This looks good.


However, recent events (including some discussions on this mailing list
which had unfortunate consequences) made me trust my instincts more. And
my instincts tell me that it would be unwise to replace code that swaps
primitive types in the straight-forward way by code that relies on
advanced compiler optimization to generate efficient code, otherwise
producing very suboptimal code.


I don't know how difficult it was to arrive at the result above, but I 
wouldn't call inlining memcpy(3) an advanced optimization (anymore?), 
since the major compilers seem to be doing that.


The SWAP in prio-queue.c seems to be the one with the biggest 
performance impact.  Or perhaps it's the one in lookup_object()?  The 
former is easier to measure, though.


Here's what I get with CFLAGS="-builtin -O2" (best of five):

$ time ./t/helper/test-prio-queue $(seq 10 -1 1) dump >/dev/null

real0m0.142s
user0m0.120s
sys 0m0.020s

And this is with CFLAGS="-no-builtin -O2":

$ time ./t/helper/test-prio-queue $(seq 10 -1 1) dump >/dev/null

real0m0.170s
user0m0.156s
sys 0m0.012s

Hmm.  Not nice, but also not prohibitively slow.


The commit you quoted embarrasses me, and I have no excuse for it. I would
love to see that myswap() ugliness fixed by replacing it with a construct
that is simpler, and generates good code even without any smart compiler.


I don't see a way to do that without adding a type parameter.

René

Re: [PATCH 1/5] add SWAP macro

2017-01-30 Thread René Scharfe


Am 30.01.2017 um 22:03 schrieb Johannes Schindelin:

It is curious, though, that an
expression like "sizeof(a++)" would not be rejected.


Clang normally warns about something like this ("warning: expression 
with side effects has no effect in an unevaluated context 
[-Wunevaluated-expression]"), but not if the code is part of a macro.  I 
don't know if that's intended, but it sure is helpful in the case of SWAP.



Further, what would SWAP(a++, b) do? Swap a and b, and *then* increment a?


That might be a valid expectation, but GCC says "error: lvalue required 
as unary '&' operand" and clang puts it "error: cannot take the address 
of an rvalue of type".


René

Re: [PATCH 1/5] add SWAP macro

2017-01-31 Thread René Scharfe


Am 31.01.2017 um 13:13 schrieb Johannes Schindelin:

Hi René,

On Mon, 30 Jan 2017, René Scharfe wrote:


Am 30.01.2017 um 21:48 schrieb Johannes Schindelin:


The commit you quoted embarrasses me, and I have no excuse for it. I
would love to see that myswap() ugliness fixed by replacing it with a
construct that is simpler, and generates good code even without any
smart compiler.


I don't see a way to do that without adding a type parameter.


Exactly. And you know what? I would be very okay with that type parameter.

Coccinelle [*1*] should be able to cope with that, too, mehtinks.


Yes, a semantic patch can turn the type of the temporary variable into a 
macro parameter.  Programmers would have to type the type, though, 
making the macro only half as good.



It would be trivially "optimized" out of the box, even when compiling with
Tiny C or in debug mode.


Such a compiler is already slowed down by memset(3) calls for 
initializing objects and lack of other optimizations.  I doubt a few 
more memcpy(3) calls would make that much of a difference.


NB: git as compiled with TCC fails several tests, alas.  Builds wickedly 
fast, though.



And it would even allow things like this:

#define SIMPLE_SWAP(T, a, b) do { T tmp_ = a; a = b; b = tmp_; } while (0)
...
uint32_t large;
char nybble;

...

if (!(large & ~0xf)) {
SIMPLE_SWAP(char, nybble, large);
...
}

i.e. mixing types, when possible.

And while I do not necessarily expect that we need anything like this
anytime soon, merely the fact that it allows for this flexibility, while
being very readable at the same time, would make it a pretty good design
in my book.


Such a skinny macro which only hides repetition is kind of attractive 
due to its simplicity; I can't say the same about the mixed type example 
above, though.


The fat version isn't that bad either even without inlining, includes a 
few safety checks and doesn't require us to tell the compiler something 
it already knows very well.  I'd rather let the machine do the work.


René

Re: [PATCH 3/5] use SWAP macro

2017-01-31 Thread René Scharfe


Am 30.01.2017 um 23:22 schrieb Junio C Hamano:

René Scharfe  writes:


if (tree2->flags & UNINTERESTING) {
-   struct object *tmp = tree2;
-   tree2 = tree1;
-   tree1 = tmp;
+   SWAP(tree2, tree1);


A human would have written this SWAP(tree1, tree2).

Not that I think such a manual fix-up should be made in _this_
patch, which may end up mixing mechanical conversion (which we may
want to keep reproducible) and hand tweaks.  But this swapped swap
reads somewhat silly.


Well, a human wrote "tmp = tree2" -- sometimes one likes to count down 
instead of up, I guess.


We can make Coccinelle order the parameters alphabetically, but I don't 
know how to do so without duplicating most of the semantic patch.  And 
thats would result in e.g. SWAP(new_tree, old_tree), which might be odd 
as well.


I'd just leave them in whatever order they were, but that's probably 
because I'm lazy.


René

Re: [PATCH 1/5] add SWAP macro

2017-01-31 Thread René Scharfe


Am 30.01.2017 um 23:21 schrieb Brandon Williams:

On 01/30, René Scharfe wrote:

Am 30.01.2017 um 22:03 schrieb Johannes Schindelin:

It is curious, though, that an
expression like "sizeof(a++)" would not be rejected.


Clang normally warns about something like this ("warning: expression
with side effects has no effect in an unevaluated context
[-Wunevaluated-expression]"), but not if the code is part of a
macro.  I don't know if that's intended, but it sure is helpful in
the case of SWAP.


Further, what would SWAP(a++, b) do? Swap a and b, and *then* increment a?


That might be a valid expectation, but GCC says "error: lvalue
required as unary '&' operand" and clang puts it "error: cannot take
the address of an rvalue of type".

René


Perhaps we could disallow a side-effect operator in the macro.  By
disallow I mean place a comment at the definition to the macro and
hopefully catch something like that in code-review.  We have the same
issue with the `ALLOC_GROW()` macro.


SWAP(a++, ...) is caught by the compiler, SWAP(*a++, ...) works fine. 
Technically that should be enough. :)  A comment wouldn't hurt, of course.


René

Re: [PATCH 1/5] add SWAP macro

2017-02-01 Thread René Scharfe


Am 01.02.2017 um 12:47 schrieb Jeff King:

I'm not altogether convinced that SWAP() is an improvement in
readability. I really like that it's shorter than the code it replaces,
but there is a danger with introducing opaque constructs. It's one more
thing for somebody familiar with C but new to the project to learn or
get wrong.


I'm biased, of course, but SIMPLE_SWAP and SWAP exchange the values of 
two variables -- how could someone get that wrong?  Would it help to 
name the macro SWAP_VALUES?  Or XCHG? ;)



IMHO the main maintenance gain from René's patch is that you don't have
to specify the type, which means you can never have a memory-overflow
bug due to incorrect types. If we lose that, then I don't see all that
much value in the whole thing.


Size checks could be added to SIMPLE_SWAP as well.

The main benefit of a swap macro is reduced repetition IMHO: Users 
specify the variables to swap once instead of twice in a special order, 
and with SWAP they don't need to declare their type again.  Squeezing 
out redundancy makes the code easier to read and modify.


René

[PATCH] p5302: create repositories for index-pack results explicitly

2017-02-05 Thread René Scharfe

Before 7176a314 (index-pack: complain when --stdin is used outside of a
repo) index-pack silently created a non-existing target directory; now
the command refuses to work unless it's used against a valid repository.
That causes p5302 to fail, which relies on the former behavior.  Fix it
by setting up the destinations for its performance tests using git init.

Signed-off-by: Rene Scharfe 
---
 t/perf/p5302-pack-index.sh | 7 +++
 1 file changed, 7 insertions(+)

diff --git a/t/perf/p5302-pack-index.sh b/t/perf/p5302-pack-index.sh
index 5ee9211f98..99bdb16c85 100755
--- a/t/perf/p5302-pack-index.sh
+++ b/t/perf/p5302-pack-index.sh
@@ -13,6 +13,13 @@ test_expect_success 'repack' '
export PACK
 '
 
+test_expect_success 'create target repositories' '
+   for repo in t1 t2 t3 t4 t5 t6
+   do
+   git init --bare $repo
+   done
+'
+
 test_perf 'index-pack 0 threads' '
GIT_DIR=t1 git index-pack --threads=1 --stdin < $PACK
 '
-- 
2.11.1

[PATCH] dir: avoid allocation in fill_directory()

2017-02-07 Thread René Scharfe

Pass the match member of the first pathspec item directly to
read_directory() instead of using common_prefix() to duplicate it first,
thus avoiding memory duplication, strlen(3) and free(3).

Signed-off-by: Rene Scharfe 
---
This updates 966de3028 (dir: convert fill_directory to use the pathspec
struct interface).  The result is closer to the original, yet still
using the modern interface.

This patch eerily resembles 2b189435 (dir: simplify fill_directory()).

 dir.c | 7 +++
 1 file changed, 3 insertions(+), 4 deletions(-)

diff --git a/dir.c b/dir.c
index 65c3e681b8..4541f9e146 100644
--- a/dir.c
+++ b/dir.c
@@ -174,20 +174,19 @@ char *common_prefix(const struct pathspec *pathspec)
 
 int fill_directory(struct dir_struct *dir, const struct pathspec *pathspec)
 {
-   char *prefix;
+   const char *prefix;
size_t prefix_len;
 
/*
 * Calculate common prefix for the pathspec, and
 * use that to optimize the directory walk
 */
-   prefix = common_prefix(pathspec);
-   prefix_len = prefix ? strlen(prefix) : 0;
+   prefix_len = common_prefix_len(pathspec);
+   prefix = prefix_len ? pathspec->items[0].match : "";
 
/* Read the directory and prune it */
read_directory(dir, prefix, prefix_len, pathspec);
 
-   free(prefix);
return prefix_len;
 }
 
-- 
2.11.1

Re: [PATCH 1/5] add SWAP macro

2017-02-07 Thread René Scharfe

Am 01.02.2017 um 19:33 schrieb Junio C Hamano:
> René Scharfe  writes:
> 
>> Size checks could be added to SIMPLE_SWAP as well.
> 
> Between SWAP() and SIMPLE_SWAP() I am undecided.
> 
> If the compiler can infer the type and the size of the two
> "locations" given to the macro, there is no technical reason to
> require the caller to specify the type as an extra argument, so
> SIMPLE_SWAP() may not necessarily an improvement over SWAP() from
> that point of view.  If the redundancy is used as a sanity check,
> I'd be in favor of SIMPLE_SWAP(), though.
> 
> If the goal of SIMPLE_SWAP(), on the other hand, were to support the
> "only swap char with int for small value" example earlier in the
> thread, it means you cannot sanity check the type of things being
> swapped in the macro, and the readers of the code need to know about
> the details of what variables are being swapped.  It looks to me
> that it defeats the main benefit of using a macro.

Full type inference could be done with C11's _Generic for basic types,
while typeof would be needed for complex ones, I guess.  Checking that
sizes match is better than nothing and portable to ancient platforms,
though.  Having an explicit type given is portable and easy to use for
checks, of course, e.g. like this:

#define SIMPLE_SWAP(T, a, b) do { \
T swap_tmp_ = a + BUILD_ASSERT_OR_ZERO(sizeof(T) == sizeof(a)); \
a = b + BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b)); \
b = swap_tmp_; \
} while(0)

It doesn't support expressions with side effects, but that's probably
not much of a concern.

Swapping between different types would then still have to be done
manually, but I wonder how common that is -- couldn't find such a case
in our tree.

René

Re: Trying to use xfuncname without success.

2017-02-07 Thread René Scharfe


Am 07.02.2017 um 20:21 schrieb Jack Adrian Zappa:

I'm trying to setup a hunk header for .natvis files. For some reason,
it doesn't seem to be working. I'm following their instructions from
here, which doesn't say much in terms of restrictions of the regex,
such as, is the matched item considered the hunk header or do I need a
group? I have tried both with no success. This is what I have:

[diff "natvis"]
xfuncname = "^[\\\t ]*

The extra "\\" allow backslashes to be used for indentation as well as 
between Type and Name, which is probably not what you want.  And your 
expression only matches single-char Name attributes.  Try:


xfuncname = "^[\t ]*

Re: Trying to use xfuncname without success.

2017-02-08 Thread René Scharfe

Am 08.02.2017 um 18:11 schrieb Jack Adrian Zappa:
> Thanks Rene, but you seem to have missed the point.  NOTHING is
> working.  No matter what I put there, it doesn't seem to get matched.

I'm not so sure about that.  With your example I get this diff without
setting diff.natvis.xfuncname:

diff --git a/a.natvis b/a.natvis
index 7f9bdf5..bc3c090 100644
--- a/a.natvis
+++ b/a.natvis
@@ -19,7 +19,7 @@ 
xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010";>
 
 
   
-  added_var
+  added_vars
 
 
   var2

Note the XML namespace in the hunk header.  It's put there by the
default rule because "xmlns" starts at the beginning of the line.  Your
diff has nothing there, which means the default rule is not used, i.e.
your user-defined rule is in effect.

Come to think of it, this line break in the middle of the AutoVisualizer
tab might have been added by your email client unintentionally, so that
we use different test files, which then of course results in different
diffs.  Is that the case?

Anyway, if I run the following two commands:

$ git config diff.natvis.xfuncname "^[\t ]*.gitattributes

... then I get this, both on Linux (git version 2.11.1) and on Windows
(git version 2.11.1.windows.1):

diff --git a/a.natvis b/a.natvis
index 7f9bdf5..bc3c090 100644
--- a/a.natvis
+++ b/a.natvis
@@ -19,7 +19,7 @@ test
 
 
   
-  added_var
+  added_vars
 
 
   var2

> Just to be sure, I tested your regex and again it didn't work.

At this point I'm out of ideas, sorry. :(  The only way I was able to
break it was due to mistyping the extension as "netvis" several times
for some reason.

René

Re: Trying to use xfuncname without success.

2017-02-10 Thread René Scharfe


Am 09.02.2017 um 01:10 schrieb Jack Adrian Zappa:

 where it has grabbed a line at 126 and is using that for the hunk header.


When I say that, I mean that it is using that line for *every* hunk
header, for every change, regardless if it has passed a hunk head that
it should have matched.


Strange.  That should only happen if no other function lines are 
recognized before the changes.  You can check if that's the case using 
git grep and your xfuncline regex, e.g. like this:



  $ git grep -En "^[\t ]*And you can check *that* result with regular grep -E or egrep if it 
looks funny.


René

Re: [PATCH] dir: avoid allocation in fill_directory()

2017-02-10 Thread René Scharfe


Am 08.02.2017 um 07:22 schrieb Duy Nguyen:

On Wed, Feb 8, 2017 at 5:04 AM, René Scharfe  wrote:

Pass the match member of the first pathspec item directly to
read_directory() instead of using common_prefix() to duplicate it first,
thus avoiding memory duplication, strlen(3) and free(3).


How about killing common_prefix()? There are two other callers in
ls-files.c and commit.c and it looks safe to do (but I didn't look
very hard).


I would like that, but it doesn't look like it's worth it.  I have a 
patch series for making overlay_tree_on_cache() take pointer+length, but 
it's surprisingly long and bloats the code.  Duplicating a small piece 
of memory once per command doesn't look so bad in comparison.


(The payoff for avoiding an allocation is higher for library functions 
like fill_directory().)


But while working on that I found two opportunities for improvement in 
prune_cache().  I'll send patches shortly.



There's a subtle difference. Before the patch, prefix[prefix_len] is
NUL. After the patch, it's not always true. If some code (incorrectly)
depends on that, this patch exposes it. I had a look inside
read_directory() though and it looks like no such code exists. So, all
good.


Thanks for checking.

NB: The code before 966de302 (dir: convert fill_directory to use the 
pathspec struct interface, committed 2017-01-04) made the same 
assumption, i.e. that NUL is not needed.


René

[PATCH 1/2] ls-files: pass prefix length explicitly to prune_cache()

2017-02-10 Thread René Scharfe

The function prune_cache() relies on the fact that it is only called on
max_prefix and sneakily uses the matching global variable max_prefix_len
directly.  Tighten its interface by passing both the string and its
length as parameters.  While at it move the NULL check into the function
to collect all cache-pruning related logic in one place.

Signed-off-by: Rene Scharfe 
---
Not urgent, of course.

 builtin/ls-files.c | 12 +++-
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 1592290815..18105ec7ea 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -369,11 +369,14 @@ static void show_files(struct dir_struct *dir)
 /*
  * Prune the index to only contain stuff starting with "prefix"
  */
-static void prune_cache(const char *prefix)
+static void prune_cache(const char *prefix, size_t prefixlen)
 {
-   int pos = cache_name_pos(prefix, max_prefix_len);
+   int pos;
unsigned int first, last;
 
+   if (!prefix)
+   return;
+   pos = cache_name_pos(prefix, prefixlen);
if (pos < 0)
pos = -pos-1;
memmove(active_cache, active_cache + pos,
@@ -384,7 +387,7 @@ static void prune_cache(const char *prefix)
while (last > first) {
int next = (last + first) >> 1;
const struct cache_entry *ce = active_cache[next];
-   if (!strncmp(ce->name, prefix, max_prefix_len)) {
+   if (!strncmp(ce->name, prefix, prefixlen)) {
first = next+1;
continue;
}
@@ -641,8 +644,7 @@ int cmd_ls_files(int argc, const char **argv, const char 
*cmd_prefix)
  show_killed || show_modified || show_resolve_undo))
show_cached = 1;
 
-   if (max_prefix)
-   prune_cache(max_prefix);
+   prune_cache(max_prefix, max_prefix_len);
if (with_tree) {
/*
 * Basic sanity check; show-stages and show-unmerged
-- 
2.11.1

[PATCH 2/2] ls-files: move only kept cache entries in prune_cache()

2017-02-10 Thread René Scharfe

prune_cache() first identifies those entries at the start of the sorted
array that can be discarded.  Then it moves the rest of the entries up.
Last it identifies the unwanted trailing entries among the moved ones
and cuts them off.

Change the order: Identify both start *and* end of the range to keep
first and then move only those entries to the top.  The resulting code
is slightly shorter and a bit more efficient.

Signed-off-by: Rene Scharfe 
---
The performance impact is probably only measurable with a *really* big
index.

 builtin/ls-files.c | 9 -
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/builtin/ls-files.c b/builtin/ls-files.c
index 18105ec7ea..1c0f057d02 100644
--- a/builtin/ls-files.c
+++ b/builtin/ls-files.c
@@ -379,10 +379,7 @@ static void prune_cache(const char *prefix, size_t 
prefixlen)
pos = cache_name_pos(prefix, prefixlen);
if (pos < 0)
pos = -pos-1;
-   memmove(active_cache, active_cache + pos,
-   (active_nr - pos) * sizeof(struct cache_entry *));
-   active_nr -= pos;
-   first = 0;
+   first = pos;
last = active_nr;
while (last > first) {
int next = (last + first) >> 1;
@@ -393,7 +390,9 @@ static void prune_cache(const char *prefix, size_t 
prefixlen)
}
last = next;
}
-   active_nr = last;
+   memmove(active_cache, active_cache + pos,
+   (last - pos) * sizeof(struct cache_entry *));
+   active_nr = last - pos;
 }
 
 /*
-- 
2.11.1

1 2 3 4 5 6 7 8 9 10 >

1 - 100 of 1338 matches

Mail list logo