Re: bug: git-archive does not use the zip64 extension for archives with more than 16k entries
Am 11.08.2015 um 12:40 schrieb Johannes Schauer: Hi, for repositories with more than 16k files and folders, git-archive will create zip files which store the wrong number of entries. That is, it stores the number of entries modulo 16k. This will break unpackers that do not include code to support this brokenness. The limit is rather 65535 entries, isn't it? The entries field has two bytes, and they are used fully. Which programs are affected? InfoZIP Zip 3.0 and 7-Zip 9.20 seem to handle an archive with more entries just fine. The built-in functionality of Windows 10 doesn't. Besides, 64K entries should be enough for anybody. ;-) Seriously, though: What kind of repository has that many files and uses the ZIP format to distribute snapshots? Just curious. Instead, git-archive should use the zip64 extension to handle more than 16k files and folders correctly. That seems to be what InfoZIP does and Windows 10 handles it just fine. If lower Windows versions and other popular extractors can unzip such archives as well then this might indeed be the way to go. Thanks, René -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 3/3] archive-zip: support more than 65535 entries
Support more than 65535 entries cleanly by writing a "zip64 end of central directory record" (with a 64-bit field for the number of entries) before the usual "end of central directory record" (which contains only a 16-bit field). InfoZIP's zip does the same. Archives with 65535 or less entries are not affected. Programs that extract all files like InfoZIP's zip and 7-Zip ignored the field and could extract all files already. Software that relies on the ZIP file directory to show a list of contained files quickly to simulate to normal directory like Windows' built-in ZIP functionality only saw a subset of the included files. Windows supports ZIP64 since Vista according to https://en.wikipedia.org/wiki/Zip_%28file_format%29#ZIP64. Suggested-by: Johannes Schauer Signed-off-by: Rene Scharfe --- archive-zip.c | 93 +++-- t/t5004-archive-corner-cases.sh | 2 +- 2 files changed, 91 insertions(+), 4 deletions(-) diff --git a/archive-zip.c b/archive-zip.c index 2a76156..9db4735 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -16,7 +16,9 @@ static unsigned int zip_dir_size; static unsigned int zip_offset; static unsigned int zip_dir_offset; -static unsigned int zip_dir_entries; +static uint64_t zip_dir_entries; + +static unsigned int max_creator_version; #define ZIP_DIRECTORY_MIN_SIZE (1024 * 1024) #define ZIP_STREAM (1 << 3) @@ -86,6 +88,28 @@ struct zip_extra_mtime { unsigned char _end[1]; }; +struct zip64_dir_trailer { + unsigned char magic[4]; + unsigned char record_size[8]; + unsigned char creator_version[2]; + unsigned char version[2]; + unsigned char disk[4]; + unsigned char directory_start_disk[4]; + unsigned char entries_on_this_disk[8]; + unsigned char entries[8]; + unsigned char size[8]; + unsigned char offset[8]; + unsigned char _end[1]; +}; + +struct zip64_dir_trailer_locator { + unsigned char magic[4]; + unsigned char disk[4]; + unsigned char offset[8]; + unsigned char number_of_disks[4]; + unsigned char _end[1]; +}; + /* * On ARM, padding is added at the end of the struct, so a simple * sizeof(struct ...) reports two bytes more than the payload size @@ -98,6 +122,12 @@ struct zip_extra_mtime { #define ZIP_EXTRA_MTIME_SIZE offsetof(struct zip_extra_mtime, _end) #define ZIP_EXTRA_MTIME_PAYLOAD_SIZE \ (ZIP_EXTRA_MTIME_SIZE - offsetof(struct zip_extra_mtime, flags)) +#define ZIP64_DIR_TRAILER_SIZE offsetof(struct zip64_dir_trailer, _end) +#define ZIP64_DIR_TRAILER_RECORD_SIZE \ + (ZIP64_DIR_TRAILER_SIZE - \ +offsetof(struct zip64_dir_trailer, creator_version)) +#define ZIP64_DIR_TRAILER_LOCATOR_SIZE \ + offsetof(struct zip64_dir_trailer_locator, _end) static void copy_le16(unsigned char *dest, unsigned int n) { @@ -113,6 +143,31 @@ static void copy_le32(unsigned char *dest, unsigned int n) dest[3] = 0xff & (n >> 030); } +static void copy_le64(unsigned char *dest, uint64_t n) +{ + dest[0] = 0xff & n; + dest[1] = 0xff & (n >> 010); + dest[2] = 0xff & (n >> 020); + dest[3] = 0xff & (n >> 030); + dest[4] = 0xff & (n >> 040); + dest[5] = 0xff & (n >> 050); + dest[6] = 0xff & (n >> 060); + dest[7] = 0xff & (n >> 070); +} + +static uint64_t clamp_max(uint64_t n, uint64_t max, int *clamped) +{ + if (n <= max) + return n; + *clamped = 1; + return max; +} + +static void copy_le16_clamp(unsigned char *dest, uint64_t n, int *clamped) +{ + copy_le16(dest, clamp_max(n, 0x, clamped)); +} + static void *zlib_deflate_raw(void *data, unsigned long size, int compression_level, unsigned long *compressed_size) @@ -282,6 +337,9 @@ static int write_zip_entry(struct archiver_args *args, sha1_to_hex(sha1)); } + if (creator_version > max_creator_version) + max_creator_version = creator_version; + if (buffer && method == 8) { out = deflated = zlib_deflate_raw(buffer, size, args->compression_level, @@ -439,20 +497,49 @@ static int write_zip_entry(struct archiver_args *args, return 0; } +static void write_zip64_trailer(void) +{ + struct zip64_dir_trailer trailer64; + struct zip64_dir_trailer_locator locator64; + + copy_le32(trailer64.magic, 0x06064b50); + copy_le64(trailer64.record_size, ZIP64_DIR_TRAILER_RECORD_SIZE); + copy_le16(trailer64.creator_version, max_creator_version); + copy_le16(trailer64.version, 45); + copy_le32(trailer64.disk, 0); + copy_le32(trailer64.directory_start_disk, 0); + copy_le64(trailer64.entries_on_this_disk, zip_dir_entries); + copy_le64(trailer64.entries, zip_dir_entries); + copy_le64(trailer64.size, zip_dir_offset
[PATCH 2/3] archive-zip: use a local variable to store the creator version
Use a simpler conditional right next to the code which makes a higher creator version necessary -- namely symlink handling and support for executable files -- instead of a long line with a ternary operator. The resulting code has more lines but is simpler and allows reuse of the value easily. Signed-off-by: Rene Scharfe --- archive-zip.c | 6 -- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/archive-zip.c b/archive-zip.c index ae3d67f..2a76156 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -223,6 +223,7 @@ static int write_zip_entry(struct archiver_args *args, unsigned long size; int is_binary = -1; const char *path_without_prefix = path + args->baselen; + unsigned int creator_version = 0; crc = crc32(0, NULL, 0); @@ -251,6 +252,8 @@ static int write_zip_entry(struct archiver_args *args, method = 0; attr2 = S_ISLNK(mode) ? ((mode | 0777) << 16) : (mode & 0111) ? ((mode) << 16) : 0; + if (S_ISLNK(mode) || (mode & 0111)) + creator_version = 0x0317; if (S_ISREG(mode) && args->compression_level != 0 && size > 0) method = 8; @@ -303,8 +306,7 @@ static int write_zip_entry(struct archiver_args *args, } copy_le32(dirent.magic, 0x02014b50); - copy_le16(dirent.creator_version, - S_ISLNK(mode) || (S_ISREG(mode) && (mode & 0111)) ? 0x0317 : 0); + copy_le16(dirent.creator_version, creator_version); copy_le16(dirent.version, 10); copy_le16(dirent.flags, flags); copy_le16(dirent.compression_method, method); -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH 1/3] t5004: test ZIP archives with many entries
A ZIP file directory has a 16-bit field for the number of entries it contains. There are 64-bit extensions to deal with that. Demonstrate that git archive --format=zip currently doesn't use them and instead overflows the field. InfoZIP's unzip doesn't care about this field and extracts all files anyway. Software that uses the directory for presenting a filesystem like view quickly -- notably Windows -- depends on it, but doesn't lend itself to an automatic test case easily. Use InfoZIP's zipinfo, which probably isn't available everywhere but at least can provides *some* way to check this field. To speed things up a bit create and commit only a subset of the files and build a fake tree out of duplicates and pass that to git archive. Signed-off-by: Rene Scharfe --- t/t5004-archive-corner-cases.sh | 40 1 file changed, 40 insertions(+) diff --git a/t/t5004-archive-corner-cases.sh b/t/t5004-archive-corner-cases.sh index 654adda..c6bd729 100755 --- a/t/t5004-archive-corner-cases.sh +++ b/t/t5004-archive-corner-cases.sh @@ -115,4 +115,44 @@ test_expect_success 'archive empty subtree by direct pathspec' ' check_dir extract sub ' +ZIPINFO=zipinfo + +test_lazy_prereq ZIPINFO ' + n=$("$ZIPINFO" "$TEST_DIRECTORY"/t5004/empty.zip | sed -n "2s/.* //p") + test "x$n" = "x0" +' + +test_expect_failure ZIPINFO 'zip archive with many entries' ' + # add a directory with 256 files + mkdir 00 && + for a in 0 1 2 3 4 5 6 7 8 9 a b c d e f + do + for b in 0 1 2 3 4 5 6 7 8 9 a b c d e f + do + : >00/$a$b + done + done && + git add 00 && + git commit -m "256 files in 1 directory" && + + # duplicate it to get 65536 files in 256 directories + subtree=$(git write-tree --prefix=00/) && + for c in 0 1 2 3 4 5 6 7 8 9 a b c d e f + do + for d in 0 1 2 3 4 5 6 7 8 9 a b c d e f + do + echo "04 tree $subtree $c$d" + done + done >tree && + tree=$(git mktree expect && + "$ZIPINFO" many.zip | head -2 | sed -n "2s/.* //p" >actual && + test_cmp expect actual +' + test_done -- 2.5.0 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/3] t5004: test ZIP archives with many entries
Am 23.08.2015 um 07:54 schrieb Eric Sunshine: > On Sat, Aug 22, 2015 at 3:06 PM, René Scharfe wrote: >> diff --git a/t/t5004-archive-corner-cases.sh >> b/t/t5004-archive-corner-cases.sh >> index 654adda..c6bd729 100755 >> --- a/t/t5004-archive-corner-cases.sh >> +++ b/t/t5004-archive-corner-cases.sh >> @@ -115,4 +115,44 @@ test_expect_success 'archive empty subtree by direct >> pathspec' ' >> check_dir extract sub >> ' >> >> +ZIPINFO=zipinfo >> + >> +test_lazy_prereq ZIPINFO ' >> + n=$("$ZIPINFO" "$TEST_DIRECTORY"/t5004/empty.zip | sed -n "2s/.* >> //p") >> + test "x$n" = "x0" >> +' > > Unfortunately, this sed expression isn't portable due to dissimilar > output of various zipinfo implementations. On Linux, the output of > zipinfo is: > > $ zipinfo t/t5004/empty.zip > Archive: t/t5004/empty.zip > Zip file size: 62 bytes, number of entries: 0 > Empty zipfile. > $ > > however, on Mac OS X: > > $ zipinfo t/t5004/empty.zip > Archive: t/t5004/empty.zip 62 bytes 0 files > Empty zipfile. > $ > > and on FreeBSD, the zipinfo command seems to have been removed > altogether in favor of "unzip -Z" (emulate zipinfo). Thanks for your thorough checks! I suspected that zipinfo's output might be formatted differently on different platforms and tried to guard against it by checking for the number zero there. Git's ZIP file creation is platform independent (modulo bugs), so having a test run at least somewhere should suffice. In theory. We could add support for the one-line-summary variant on OS X easily, though. > One might hope that "unzip -Z" would be a reasonable replacement for > zipinfo, however, it is apparently only partially implemented on > FreeBSD, and requires that -1 be passed, as well. Even with "unzip -Z > -1", there are issues. The output on Linux and Mac OS X is: > > $ unzip -Z -1 t/t5004/empty.zip > Empty zipfile. > $ > > but FreeBSD differs: > > $ unzip -Z -1 t/t5004/empty.zip > $ > > With a non-empty zip file, the output is identical on all platforms: > > $ unzip -Z -1 twofiles.zip > file1 > file2 > $ > > So, if you combine that with "wc -l" or test_line_count, you may have > a portable and reliable entry counter. Counting all entries is slow, and more importantly it's not what we want. In this test we need the number of entries recorded in the ZIP directory, not the actual number of entries found by scanning the archive, or the directory. On Linux "unzip -Z -1 many.zip | wc -l" reports 65792 even before adding ZIP64 support; only without -1 we get the interesting numbers (specifically with "unzip -Z many.zip | sed -n '2p;$p'"): Zip file size: 6841366 bytes, number of entries: 256 65792 files, 0 bytes uncompressed, 0 bytes compressed: 0.0% > With these three patches applied, Mac OS X has trouble with 'many.zip': > > $ unzip -Z -1 many.zip > warning [many.zip]: 76 extra bytes at beginning or within zipfile >(attempting to process anyway) > error [many.zip]: reported length of central directory is >-76 bytes too long (Atari STZip zipfile? J.H.Holm ZIPSPLIT 1.1 >zipfile?). Compensating... > 00/ > 00/00 > ... > ff/ff > error: expected central file header signature not found (file >#65793). (please check that you have transferred or created the >zipfile in the appropriate BINARY mode and that you have compiled >UnZip properly) > > And FreeBSD doesn't like it either: > > $ unzip -Z -1 many.zip > unzip: Invalid central directory signature > $ > Looks like they don't support ZIP64. Or I got some of the fields wrong after all. https://en.wikipedia.org/wiki/Zip_%28file_format%29#ZIP64 says: "OS X Yosemite does support the creation of ZIP64 archives, but does not support unzipping these archives using the shipped unzip command-line utility or graphical Archive Utility.[citation needed]". How does unzip react to a ZIP file with more than 65535 entries that was created natively on these platforms? And what does zipinfo (a real one, without -1) report at the top for such files? Thanks, René -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Eric Sunshine mail delivery failure
Eric, hope you see this reply on the list. Direct replies to sunsh...@sunshineco.com are rejected by my mail provider on submit in Thunderbird with the following message: Requested action not taken: mailbox unavailable invalid DNS MX or A/ resource record. And with this one when using their web interface: A message that you sent could not be delivered to one or more of its recipients. This is a permanent error. The following address failed: "sunsh...@sunshineco.com": no valid MX hosts found It seems web.de wants you to get an record before I'm allowed to send mails to you. Sounds crazy. Sorry about that. Time to find a better provider, I guess. :-( René -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 60/68] prefer memcpy to strcpy
Am 24.09.2015 um 23:08 schrieb Jeff King: When we already know the length of a string (e.g., because we just malloc'd to fit it), it's nicer to use memcpy than strcpy, as it makes it more obvious that we are not going to overflow the buffer (because the size we pass matches the size in the allocation). This also eliminates calls to strcpy, which make auditing the code base harder. Signed-off-by: Jeff King --- compat/nedmalloc/nedmalloc.c | 5 +++-- fast-import.c| 5 +++-- revision.c | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/compat/nedmalloc/nedmalloc.c b/compat/nedmalloc/nedmalloc.c index 609ebba..a0a16eb 100644 --- a/compat/nedmalloc/nedmalloc.c +++ b/compat/nedmalloc/nedmalloc.c @@ -957,8 +957,9 @@ char *strdup(const char *s1) { char *s2 = 0; if (s1) { - s2 = malloc(strlen(s1) + 1); - strcpy(s2, s1); + size_t len = strlen(s1) + 1; + s2 = malloc(len); + memcpy(s2, s1, len); This leaves the last byte uninitialized; it was set to NUL by strcpy() before. } return s2; } diff --git a/fast-import.c b/fast-import.c index 895c6b4..cf6d8bc 100644 --- a/fast-import.c +++ b/fast-import.c @@ -644,8 +644,9 @@ static void *pool_calloc(size_t count, size_t size) static char *pool_strdup(const char *s) { - char *r = pool_alloc(strlen(s) + 1); - strcpy(r, s); + size_t len = strlen(s) + 1; + char *r = pool_alloc(len); + memcpy(r, s, len); Same here. return r; } diff --git a/revision.c b/revision.c index af2a18e..2236463 100644 --- a/revision.c +++ b/revision.c @@ -38,7 +38,7 @@ char *path_name(const struct name_path *path, const char *name) } n = xmalloc(len); m = n + len - (nlen + 1); - strcpy(m, name); + memcpy(m, name, nlen + 1); This copies the NUL byte terminating the string, so it's OK. However, I wonder if using a strbuf for building the path in one go instead would simplify this function without too much of a performance impact. for (p = path; p; p = p->up) { if (p->elem_len) { m -= p->elem_len + 1; -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 60/68] prefer memcpy to strcpy
Am 27.09.2015 um 15:06 schrieb Torsten Bögershausen: On 2015-09-27 13.19, René Scharfe wrote: Am 24.09.2015 um 23:08 schrieb Jeff King: When we already know the length of a string (e.g., because we just malloc'd to fit it), it's nicer to use memcpy than strcpy, as it makes it more obvious that we are not going to overflow the buffer (because the size we pass matches the size in the allocation). This also eliminates calls to strcpy, which make auditing the code base harder. Signed-off-by: Jeff King --- compat/nedmalloc/nedmalloc.c | 5 +++-- fast-import.c| 5 +++-- revision.c | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/compat/nedmalloc/nedmalloc.c b/compat/nedmalloc/nedmalloc.c index 609ebba..a0a16eb 100644 --- a/compat/nedmalloc/nedmalloc.c +++ b/compat/nedmalloc/nedmalloc.c @@ -957,8 +957,9 @@ char *strdup(const char *s1) { char *s2 = 0; if (s1) { -s2 = malloc(strlen(s1) + 1); -strcpy(s2, s1); +size_t len = strlen(s1) + 1; +s2 = malloc(len); +memcpy(s2, s1, len); This leaves the last byte uninitialized; it was set to NUL by strcpy() before. len is == strlen() +1, which should cover the NUL: 1 byte extra for NUL is allocated, and memcpy will copy NUL from source. (Or do I miss somethong ?) No, you're right. Sorry for the noise. I fully blame this on lack of coffeine because my electric kettle just broke. O_o René -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 60/68] prefer memcpy to strcpy
Am 27.09.2015 um 15:13 schrieb René Scharfe: Am 27.09.2015 um 15:06 schrieb Torsten Bögershausen: On 2015-09-27 13.19, René Scharfe wrote: Am 24.09.2015 um 23:08 schrieb Jeff King: When we already know the length of a string (e.g., because we just malloc'd to fit it), it's nicer to use memcpy than strcpy, as it makes it more obvious that we are not going to overflow the buffer (because the size we pass matches the size in the allocation). This also eliminates calls to strcpy, which make auditing the code base harder. Signed-off-by: Jeff King --- compat/nedmalloc/nedmalloc.c | 5 +++-- fast-import.c| 5 +++-- revision.c | 2 +- 3 files changed, 7 insertions(+), 5 deletions(-) diff --git a/compat/nedmalloc/nedmalloc.c b/compat/nedmalloc/nedmalloc.c index 609ebba..a0a16eb 100644 --- a/compat/nedmalloc/nedmalloc.c +++ b/compat/nedmalloc/nedmalloc.c @@ -957,8 +957,9 @@ char *strdup(const char *s1) { char *s2 = 0; if (s1) { -s2 = malloc(strlen(s1) + 1); -strcpy(s2, s1); +size_t len = strlen(s1) + 1; +s2 = malloc(len); +memcpy(s2, s1, len); This leaves the last byte uninitialized; it was set to NUL by strcpy() before. len is == strlen() +1, which should cover the NUL: 1 byte extra for NUL is allocated, and memcpy will copy NUL from source. (Or do I miss somethong ?) No, you're right. Sorry for the noise. I fully blame this on lack of coffeine because my electric kettle just broke. O_o Thinking a bit more about it (slowly): The choice of the variable name might have been a factor as well. When I see "len" for a string then I don't expect it to include the trailing NUL. "size" would be better because I expect it to contain the number of bytes needed to store an object. René -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] grep: use regcomp() for icase search with non-ascii patterns
Am 06.07.2015 um 14:42 schrieb Nguyễn Thái Ngọc Duy: Noticed-by: Plamen Totev Signed-off-by: Nguyễn Thái Ngọc Duy --- grep.c | 14 +++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/grep.c b/grep.c index b58c7c6..48db15a 100644 --- a/grep.c +++ b/grep.c @@ -378,7 +378,7 @@ static void free_pcre_regexp(struct grep_pat *p) } #endif /* !USE_LIBPCRE */ -static int is_fixed(const char *s, size_t len) +static int is_fixed(const char *s, size_t len, int ignore_icase) { size_t i; @@ -391,6 +391,13 @@ static int is_fixed(const char *s, size_t len) for (i = 0; i < len; i++) { if (is_regex_special(s[i])) return 0; + /* +* The builtin substring search can only deal with case +* insensitivity in ascii range. If there is something outside +* of that range, fall back to regcomp. +*/ + if (ignore_icase && (unsigned char)s[i] >= 128) + return 0; How about "isascii(s[i])"? } return 1; @@ -398,18 +405,19 @@ static int is_fixed(const char *s, size_t len) static void compile_regexp(struct grep_pat *p, struct grep_opt *opt) { + int ignore_icase = opt->regflags & REG_ICASE || p->ignore_case; int err; p->word_regexp = opt->word_regexp; p->ignore_case = opt->ignore_case; Using p->ignore_case before this line, as in initialization of the new variable ignore_icase above, changes the meaning. - if (opt->fixed || is_fixed(p->pattern, p->patternlen)) + if (opt->fixed || is_fixed(p->pattern, p->patternlen, ignore_icase)) p->fixed = 1; else p->fixed = 0; if (p->fixed) { - if (opt->regflags & REG_ICASE || p->ignore_case) + if (ignore_case) ignore_icase instead? ignore_case is for the config variable core.ignorecase. Tricky. p->kws = kwsalloc(tolower_trans_tbl); else p->kws = kwsalloc(NULL); So the optimization before this patch was that if a string was searched for without -F then it would be treated as a fixed string anyway unless it contained regex special characters. Searching for fixed strings using the kwset functions is faster than using regcomp and regexec, which makes the exercise worthwhile. Your patch disables the optimization if non-ASCII characters are searched for because kwset handles case transformations only for ASCII chars. Another consequence of this limitation is that -Fi (explicit case-insensitive fixed-string search) doesn't work properly with non-ASCII chars neither. How can we handle this one? Fall back to regcomp by escaping all special characters? Or at least warn? Tests would be nice. :) René -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
git@vger.kernel.org
Signed-off-by: Rene Scharfe --- GIT_TEST_CHAIN_LINT complains about the missing &&s and is enabled by default now. t/perf/p5310-pack-bitmaps.sh | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/t/perf/p5310-pack-bitmaps.sh b/t/perf/p5310-pack-bitmaps.sh index f8ed857..de2a224 100755 --- a/t/perf/p5310-pack-bitmaps.sh +++ b/t/perf/p5310-pack-bitmaps.sh @@ -39,14 +39,14 @@ test_expect_success 'create partial bitmap state' ' # now kill off all of the refs and pretend we had # just the one tip - rm -rf .git/logs .git/refs/* .git/packed-refs - git update-ref HEAD $cutoff + rm -rf .git/logs .git/refs/* .git/packed-refs && + git update-ref HEAD $cutoff && # and then repack, which will leave us with a nice # big bitmap pack of the "old" history, and all of # the new history will be loose, as if it had been pushed # up incrementally and exploded via unpack-objects - git repack -Ad + git repack -Ad && # and now restore our original tip, as if the pushes # had happened -- 2.4.4 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
git@vger.kernel.org
Am 10.07.2015 um 22:50 schrieb Jeff King: Thanks, this definitely is a problem, but we already have a fix in the sb/p5310-and-chain topic. I thought that had been merged-up, but it looks like it is only in "next" right now. All the better. And I see it's in master now. René -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] diff: parse ws-error-highlight option more strictly
Check if a matched token is followed by a delimiter before advancing the pointer arg. This avoids accepting composite words like "allnew" or "defaultcontext". Signed-off-by: Rene Scharfe --- diff.c | 7 ++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/diff.c b/diff.c index 87b16d5..0f17ec5 100644 --- a/diff.c +++ b/diff.c @@ -3653,7 +3653,12 @@ static void enable_patch_output(int *fmt) { static int parse_one_token(const char **arg, const char *token) { - return skip_prefix(*arg, token, arg) && (!**arg || **arg == ','); + const char *rest; + if (skip_prefix(*arg, token, &rest) && (!*rest || *rest == ',')) { + *arg = rest; + return 1; + } + return 0; } static int parse_ws_error_highlight(struct diff_options *opt, const char *arg) -- 2.4.4 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] archive-tar: write extended headers for far-future mtime
Am 16.06.2016 um 06:37 schrieb Jeff King: The ustar format represents timestamps as seconds since the epoch, but only has room to store 11 octal digits. To express anything larger, we need to use an extended header. This is exactly the same case we fixed for the size field in the previous commit, and the solution here follows the same pattern. This is even mentioned as an issue in f2f0267 (archive-tar: use xsnprintf for trivial formatting, 2015-09-24), but since it only affected things far in the future, it wasn't worth dealing with. But note that my calculations claiming thousands of years were off there; because our xsnprintf produces a NUL byte, we only have until the year 2242 to fix this. Given that this is just around the corner (geologically speaking), and because the fix for "size" makes doing this on top trivial, let's just make it work. Note that we don't (and never did) handle negative timestamps (i.e., before 1970). This would probably not be too hard to support in the same way, but since git does not support negative timestamps at all, I didn't bother here. Unlike the last patch for "size", this one is easy to test efficiently (the magic date is octal 010001): $ echo content >file $ git add file $ GIT_COMMITTER_DATE='@68719476737 +' \ git commit -q -m 'tempori parendum' $ git archive HEAD | tar tvf - -rw-rw-r-- root/root 8 4147-08-20 03:32 file With the current tip of master, this dies in xsnprintf (and prior to f2f0267, it overflowed into the checksum field, and the resulting tarfile claimed to be from the year 2242). However, I decided not to include this in the test suite, because I suspect it will create portability headaches, including: 1. The exact format of the system tar's "t" output. Probably not worth it. 2. System tars that cannot handle pax headers. In t5000 there is a simple interpreter for path headers for systems whose tar doesn't handle them. Adding one for mtime headers may be feasible. It's just a bit complicated to link a pax header file to the file it applies to when it doesn't also contain a path header. But we know that the mtime of all entries in tar files created by git archive are is the same, so we could simply read one header and then adjust the mtime of all files accordingly. 3. System tars tha cannot handle dates that far in the future. 4. If we replace "tar t" with "tar x", then filesystems that cannot handle dates that far in the future. We can test that by supplying a test archive and set a prerequisite if tar (possibly with our header interpreter) and the filesystem can cope with that. In other words, we do not really have a reliable tar reader for these corner cases, so the best we could do is a byte-for-byte comparison of the output. Signed-off-by: Jeff King --- archive-tar.c | 13 - 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/archive-tar.c b/archive-tar.c index 7340b64..749722f 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -185,6 +185,14 @@ static inline unsigned long ustar_size(uintmax_t size) return 0; } +static inline unsigned long ustar_mtime(time_t mtime) +{ + if (mtime < 0777) That should be less-or-equal, right? + return mtime; + else + return 0; +} + static void prepare_header(struct archiver_args *args, struct ustar_header *header, unsigned int mode, unsigned long size) @@ -192,7 +200,8 @@ static void prepare_header(struct archiver_args *args, xsnprintf(header->mode, sizeof(header->mode), "%07o", mode & 0); xsnprintf(header->size, sizeof(header->size), "%011lo", S_ISREG(mode) ? ustar_size(size) : 0); - xsnprintf(header->mtime, sizeof(header->mtime), "%011lo", (unsigned long) args->time); + xsnprintf(header->mtime, sizeof(header->mtime), "%011lo", + ustar_mtime(args->time)); xsnprintf(header->uid, sizeof(header->uid), "%07o", 0); xsnprintf(header->gid, sizeof(header->gid), "%07o", 0); @@ -292,6 +301,8 @@ static int write_tar_entry(struct archiver_args *args, if (ustar_size(size) != size) strbuf_append_ext_header_uint(&ext_header, "size", size); + if (ustar_mtime(args->time) != args->time) + strbuf_append_ext_header_uint(&ext_header, "mtime", args->time); prepare_header(args, &header, mode, size); -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] archive-tar: write extended headers for file sizes >= 8GB
Am 16.06.2016 um 06:37 schrieb Jeff King: The ustar format has a fixed-length field for the size of each file entry which is supposed to contain up to 11 bytes of octal-formatted data plus a NUL or space terminator. These means that the largest size we can represent is 0777, or 1 byte short of 8GB. The correct solution for a larger file, according to POSIX.1-2001, is to add an extended pax header, similar to how we handle long filenames. This patch does that, and writes zero for the size field in the ustar header (the last bit is not mentioned by POSIX, but it matches how GNU tar behaves with --format=pax). This should be a strict improvement over the current behavior, which is to die in xsnprintf with a "BUG". However, there's some interesting history here. Prior to f2f0267 (archive-tar: use xsnprintf for trivial formatting, 2015-09-24), we silently overflowed the "size" field. The extra bytes ended up in the "mtime" field of the header, which was then immediately written itself, overwriting our extra bytes. What that means depends on how many bytes we wrote. If the size was 64GB or greater, then we actually overflowed digits into the mtime field, meaning our value was was effectively right-shifted by those lost octal digits. And this patch is again a strict improvement over that. But if the size was between 8GB and 64GB, then our 12-byte field held all of the actual digits, and only our NUL terminator overflowed. According to POSIX, there should be a NUL or space at the end of the field. However, GNU tar seems to be lenient here, and will correctly parse a size up 64GB (minus one) from the field. So sizes in this range might have just worked, depending on the implementation reading the tarfile. This patch is mostly still an improvement there, as the 8GB limit is specifically mentioned in POSIX as the correct limit. But it's possible that it could be a regression (versus the pre-f2f0267 state) if all of the following are true: 1. You have a file between 8GB and 64GB. 2. Your tar implementation _doesn't_ know about pax extended headers. 3. Your tar implementation _does_ parse 12-byte sizes from the ustar header without a delimiter. It's probably not worth worrying about such an obscure set of conditions, but I'm documenting it here just in case. There's no test included here. I did confirm that this works (using GNU tar) with: $ dd if=/dev/zero seek=64G bs=1 count=1 of=huge $ git add huge $ git commit -q -m foo $ git archive HEAD | head -c 1 | tar tvf - 2>/dev/null -rw-rw-r-- root/root 68719476737 2016-06-15 21:07 huge Pre-f2f0267, this would yield a bogus size of 8GB, and post-f2f0267, git-archive simply dies. Unfortunately, it's quite an expensive test to run. For one thing, unless your filesystem supports files with holes, it takes 64GB of disk space (you might think piping straight to `hash-object --stdin` would be better, but it's not; that tries to buffer all 64GB in RAM!). Furthermore, hashing and compressing the object takes several minutes of CPU time. We could ship just the resulting compressed object data as a loose object, but even that takes 64MB. So sadly, this code path remains untested in the test suite. If we could set the limit to a lower value than 8GB for testing then we could at least check if the extended header is written, e.g. if ustar_size() could be convinced to return 0 every time using a hidden command line parameter or an environment variable or something better. Signed-off-by: Jeff King --- archive-tar.c | 28 +++- 1 file changed, 27 insertions(+), 1 deletion(-) diff --git a/archive-tar.c b/archive-tar.c index cb99df2..7340b64 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -137,6 +137,20 @@ static void strbuf_append_ext_header(struct strbuf *sb, const char *keyword, strbuf_addch(sb, '\n'); } +/* + * Like strbuf_append_ext_header, but for numeric values. + */ +static void strbuf_append_ext_header_uint(struct strbuf *sb, + const char *keyword, + uintmax_t value) +{ + char buf[40]; /* big enough for 2^128 in decimal, plus NUL */ + int len; + + len = xsnprintf(buf, sizeof(buf), "%"PRIuMAX, value); + strbuf_append_ext_header(sb, keyword, buf, len); +} + static unsigned int ustar_header_chksum(const struct ustar_header *header) { const unsigned char *p = (const unsigned char *)header; @@ -163,12 +177,21 @@ static size_t get_path_prefix(const char *path, size_t pathlen, size_t maxlen) return i; } +static inline unsigned long ustar_size(uintmax_t size) +{ + if (size < 0777UL) Shouldn't that be less-or-equal? + return size; + else + return 0; +} + static void prepare_header(struct archiver_args *args, struct ustar_header *header, unsigned
Re: [PATCH 1/2] archive-tar: write extended headers for file sizes >= 8GB
Am 21.06.2016 um 17:59 schrieb Jeff King: > On Tue, Jun 21, 2016 at 12:54:11AM +0200, René Scharfe wrote: > >>> Unfortunately, it's quite an expensive test to run. For one >>> thing, unless your filesystem supports files with holes, it >>> takes 64GB of disk space (you might think piping straight to >>> `hash-object --stdin` would be better, but it's not; that >>> tries to buffer all 64GB in RAM!). Furthermore, hashing and >>> compressing the object takes several minutes of CPU time. >>> >>> We could ship just the resulting compressed object data as a >>> loose object, but even that takes 64MB. So sadly, this code >>> path remains untested in the test suite. >> >> If we could set the limit to a lower value than 8GB for testing then we >> could at least check if the extended header is written, e.g. if ustar_size() >> could be convinced to return 0 every time using a hidden command line >> parameter or an environment variable or something better. > > Yes, we could do that, though I think it loses most of the value of the > test. We can check that if we hit an arbitrary value we generate the pax > header, but I think what we _really_ care about is: did we generate an > output that somebody else's tar implementation can handle. I agree with the last point, but don't see how that diminishes the value of such a test. If we provide file sizes only through extended headers (the normal header field being set to 0) and we can extract files with correct sizes then tar must have interpreted those header as intended, right? (Or it just guessed the sizes by searching for the next header magic, but such a fallback won't be accurate for files ending with NUL characters due to NUL-padding, so we just have to add such a file.) René -- >8 -- Subject: archive-tar: test creation of pax extended size headers --- The value 120 is magic; we need it to pass the tests. That's because prepare_header() is used for building extended header records as well and we don't create extended headers for extended headers (not sure if that would work anyway), so they simply vanish when they're over the limit as their size field is set to zero. archive-tar.c | 7 ++- t/t5000-tar-tree.sh | 7 +++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/archive-tar.c b/archive-tar.c index f53e61c..fbbc4cc 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -14,6 +14,7 @@ static char block[BLOCKSIZE]; static unsigned long offset; static int tar_umask = 002; +static unsigned long ustar_size_max = 0777UL; static int write_tar_filter_archive(const struct archiver *ar, struct archiver_args *args); @@ -179,7 +180,7 @@ static size_t get_path_prefix(const char *path, size_t pathlen, size_t maxlen) static inline unsigned long ustar_size(uintmax_t size) { - if (size <= 0777UL) + if (size <= ustar_size_max) return size; else return 0; @@ -412,6 +413,10 @@ static int git_tar_config(const char *var, const char *value, void *cb) } return 0; } + if (!strcmp(var, "tar.ustarsizemax")) { + ustar_size_max = git_config_ulong(var, value); + return 0; + } return tar_filter_config(var, value, cb); } diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh index 4b68bba..03bb4c7 100755 --- a/t/t5000-tar-tree.sh +++ b/t/t5000-tar-tree.sh @@ -102,6 +102,7 @@ test_expect_success \ echo long filename >a/four$hundred && mkdir a/bin && test-genrandom "frotz" 50 >a/bin/sh && + printf "\0\0\0" >>a/bin/sh && printf "A\$Format:%s\$O" "$SUBSTFORMAT" >a/substfile1 && printf "A not substituted O" >a/substfile2 && if test_have_prereq SYMLINKS; then @@ -157,6 +158,12 @@ test_expect_success 'git-archive --prefix=olde-' ' check_tar with_olde-prefix olde- +test_expect_success !TAR_NEEDS_PAX_FALLBACK 'pax extended size headers' ' + git -c tar.ustarsizemax=120 archive HEAD >extended_size_header.tar +' + +check_tar extended_size_header + test_expect_success 'git archive on large files' ' test_config core.bigfilethreshold 1 && git archive HEAD >b3.tar && -- 2.9.0 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] archive-tar: write extended headers for file sizes >= 8GB
Am 21.06.2016 um 22:42 schrieb René Scharfe: > The value 120 is magic; we need it to pass the tests. That's > because prepare_header() is used for building extended header > records as well and we don't create extended headers for extended > headers (not sure if that would work anyway), so they simply > vanish when they're over the limit as their size field is set to > zero. So how about something like this to make sure extended headers are only written for regular files and not for other extended headers? --- archive-tar.c | 10 ++ 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index ed562d4..f53e61c 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -199,7 +199,7 @@ static void prepare_header(struct archiver_args *args, { xsnprintf(header->mode, sizeof(header->mode), "%07o", mode & 0); xsnprintf(header->size, sizeof(header->size), "%011lo", - S_ISREG(mode) ? ustar_size(size) : 0); + S_ISREG(mode) ? size : 0); xsnprintf(header->mtime, sizeof(header->mtime), "%011lo", ustar_mtime(args->time)); @@ -240,7 +240,7 @@ static int write_tar_entry(struct archiver_args *args, struct ustar_header header; struct strbuf ext_header = STRBUF_INIT; unsigned int old_mode = mode; - unsigned long size; + unsigned long size, size_in_header; void *buffer; int err = 0; @@ -299,12 +299,14 @@ static int write_tar_entry(struct archiver_args *args, memcpy(header.linkname, buffer, size); } - if (S_ISREG(mode) && ustar_size(size) != size) + size_in_header = S_ISREG(mode) ? ustar_size(size) : size; + if (size_in_header != size) strbuf_append_ext_header_uint(&ext_header, "size", size); + if (ustar_mtime(args->time) != args->time) strbuf_append_ext_header_uint(&ext_header, "mtime", args->time); - prepare_header(args, &header, mode, size); + prepare_header(args, &header, mode, size_in_header); if (ext_header.len > 0) { err = write_extended_header(args, sha1, ext_header.buf, -- 2.9.0 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] archive-tar: write extended headers for file sizes >= 8GB
Am 21.06.2016 um 17:59 schrieb Jeff King: > On Tue, Jun 21, 2016 at 12:54:11AM +0200, René Scharfe wrote: > >>> Unfortunately, it's quite an expensive test to run. For one >>> thing, unless your filesystem supports files with holes, it >>> takes 64GB of disk space (you might think piping straight to >>> `hash-object --stdin` would be better, but it's not; that >>> tries to buffer all 64GB in RAM!). Furthermore, hashing and >>> compressing the object takes several minutes of CPU time. >>> >>> We could ship just the resulting compressed object data as a >>> loose object, but even that takes 64MB. So sadly, this code >>> path remains untested in the test suite. >> >> If we could set the limit to a lower value than 8GB for testing then we >> could at least check if the extended header is written, e.g. if ustar_size() >> could be convinced to return 0 every time using a hidden command line >> parameter or an environment variable or something better. > > Yes, we could do that, though I think it loses most of the value of the > test. We can check that if we hit an arbitrary value we generate the pax > header, but I think what we _really_ care about is: did we generate an > output that somebody else's tar implementation can handle. I agree with the last point, but don't see how that diminishes the value of such a test. If we provide file sizes only through extended headers (the normal header field being set to 0) and we can extract files with correct sizes then tar must have interpreted those header as intended, right? (Or it just guessed the sizes by searching for the next header magic, but such a fallback won't be accurate for files ending with NUL characters due to NUL-padding, so we just have to add such a file.) René -- >8 -- Subject: archive-tar: test creation of pax extended size headers --- The value 120 is magic; we need it to pass the tests. That's because prepare_header() is used for building extended header records as well and we don't create extended headers for extended headers (not sure if that would work anyway), so they simply vanish when they're over the limit as their size field is set to zero. archive-tar.c | 7 ++- t/t5000-tar-tree.sh | 7 +++ 2 files changed, 13 insertions(+), 1 deletion(-) diff --git a/archive-tar.c b/archive-tar.c index f53e61c..fbbc4cc 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -14,6 +14,7 @@ static char block[BLOCKSIZE]; static unsigned long offset; static int tar_umask = 002; +static unsigned long ustar_size_max = 0777UL; static int write_tar_filter_archive(const struct archiver *ar, struct archiver_args *args); @@ -179,7 +180,7 @@ static size_t get_path_prefix(const char *path, size_t pathlen, size_t maxlen) static inline unsigned long ustar_size(uintmax_t size) { - if (size <= 0777UL) + if (size <= ustar_size_max) return size; else return 0; @@ -412,6 +413,10 @@ static int git_tar_config(const char *var, const char *value, void *cb) } return 0; } + if (!strcmp(var, "tar.ustarsizemax")) { + ustar_size_max = git_config_ulong(var, value); + return 0; + } return tar_filter_config(var, value, cb); } diff --git a/t/t5000-tar-tree.sh b/t/t5000-tar-tree.sh index 4b68bba..03bb4c7 100755 --- a/t/t5000-tar-tree.sh +++ b/t/t5000-tar-tree.sh @@ -102,6 +102,7 @@ test_expect_success \ echo long filename >a/four$hundred && mkdir a/bin && test-genrandom "frotz" 50 >a/bin/sh && + printf "\0\0\0" >>a/bin/sh && printf "A\$Format:%s\$O" "$SUBSTFORMAT" >a/substfile1 && printf "A not substituted O" >a/substfile2 && if test_have_prereq SYMLINKS; then @@ -157,6 +158,12 @@ test_expect_success 'git-archive --prefix=olde-' ' check_tar with_olde-prefix olde- +test_expect_success !TAR_NEEDS_PAX_FALLBACK 'pax extended size headers' ' + git -c tar.ustarsizemax=120 archive HEAD >extended_size_header.tar +' + +check_tar extended_size_header + test_expect_success 'git archive on large files' ' test_config core.bigfilethreshold 1 && git archive HEAD >b3.tar && -- 2.9.0 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] archive-tar: write extended headers for file sizes >= 8GB
Am 21.06.2016 um 23:02 schrieb Jeff King: On Tue, Jun 21, 2016 at 10:42:52PM +0200, René Scharfe wrote: If we could set the limit to a lower value than 8GB for testing then we could at least check if the extended header is written, e.g. if ustar_size() could be convinced to return 0 every time using a hidden command line parameter or an environment variable or something better. Yes, we could do that, though I think it loses most of the value of the test. We can check that if we hit an arbitrary value we generate the pax header, but I think what we _really_ care about is: did we generate an output that somebody else's tar implementation can handle. I agree with the last point, but don't see how that diminishes the value of such a test. If we provide file sizes only through extended headers (the normal header field being set to 0) and we can extract files with correct sizes then tar must have interpreted those header as intended, right? The diminished value is: 1. This is a situation that doesn't actually happen in real life. 2. Now we're carrying extra code inside git only for the sake of testing (which can have its own bugs, etc). Still, it may be better than nothing. -- >8 -- Subject: archive-tar: test creation of pax extended size headers --- The value 120 is magic; we need it to pass the tests. That's because prepare_header() is used for building extended header records as well and we don't create extended headers for extended headers (not sure if that would work anyway), so they simply vanish when they're over the limit as their size field is set to zero. Right, so this is sort of what I meant in (2). Now we have a tar.ustarsizemax setting shipped in git that is totally broken if you set it to "1". I can live with it as a tradeoff, but it is definitely a negative IMHO. Yes, it's only useful as a debug flag, but the fact that it breaks highlights a (admittedly mostly theoretical) shortcoming: The code doesn't handle extended headers that are over the size limit as nicely as it could. So the test was already worth it, even if won't land in master. :) René -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 1/2] archive-tar: write extended headers for file sizes >= 8GB
Am 21.06.2016 um 23:04 schrieb Jeff King: > On Tue, Jun 21, 2016 at 10:57:43PM +0200, René Scharfe wrote: > >> Am 21.06.2016 um 22:42 schrieb René Scharfe: >>> The value 120 is magic; we need it to pass the tests. That's >>> because prepare_header() is used for building extended header >>> records as well and we don't create extended headers for extended >>> headers (not sure if that would work anyway), so they simply >>> vanish when they're over the limit as their size field is set to >>> zero. >> >> So how about something like this to make sure extended headers are >> only written for regular files and not for other extended headers? > > This is quite similar to what I wrote originally, but I moved to the > ustar_size() format to better match the mtime code (which needs > something like that, because we pass around args->time). > > I think you could drop ustar_size() completely here and just put the > "if" into write_tar_entry(). Which would look like this: --- archive-tar.c | 24 ++-- 1 file changed, 22 insertions(+), 2 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index cb99df2..274bdfa 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -137,6 +137,20 @@ static void strbuf_append_ext_header(struct strbuf *sb, const char *keyword, strbuf_addch(sb, '\n'); } +/* + * Like strbuf_append_ext_header, but for numeric values. + */ +static void strbuf_append_ext_header_uint(struct strbuf *sb, + const char *keyword, + uintmax_t value) +{ + char buf[40]; /* big enough for 2^128 in decimal, plus NUL */ + int len; + + len = xsnprintf(buf, sizeof(buf), "%"PRIuMAX, value); + strbuf_append_ext_header(sb, keyword, buf, len); +} + static unsigned int ustar_header_chksum(const struct ustar_header *header) { const unsigned char *p = (const unsigned char *)header; @@ -208,7 +222,7 @@ static int write_tar_entry(struct archiver_args *args, struct ustar_header header; struct strbuf ext_header = STRBUF_INIT; unsigned int old_mode = mode; - unsigned long size; + unsigned long size, size_in_header; void *buffer; int err = 0; @@ -267,7 +281,13 @@ static int write_tar_entry(struct archiver_args *args, memcpy(header.linkname, buffer, size); } - prepare_header(args, &header, mode, size); + size_in_header = size; + if (S_ISREG(mode) && size > 0777UL) { + size_in_header = 0; + strbuf_append_ext_header_uint(&ext_header, "size", size); + } + + prepare_header(args, &header, mode, size_in_header); if (ext_header.len > 0) { err = write_extended_header(args, sha1, ext_header.buf, -- 2.9.0 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] archive-tar: write extended headers for far-future mtime
Am 21.06.2016 um 00:54 schrieb René Scharfe: Am 16.06.2016 um 06:37 schrieb Jeff King: 2. System tars that cannot handle pax headers. In t5000 there is a simple interpreter for path headers for systems whose tar doesn't handle them. Adding one for mtime headers may be feasible. It's just a bit complicated to link a pax header file to the file it applies to when it doesn't also contain a path header. But we know that the mtime of all entries in tar files created by git archive are is the same, so we could simply read one header and then adjust the mtime of all files accordingly. This brings me to the idea of using a single global pax header for mtime instead of one per entry. It reduces the size of the archive and allows for slightly easier testing -- it just fits better since we know that all our mtimes are the same. René -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] archive-tar: write extended headers for far-future mtime
Am 23.06.2016 um 21:22 schrieb Jeff King: On Wed, Jun 22, 2016 at 07:46:25AM +0200, René Scharfe wrote: Am 21.06.2016 um 00:54 schrieb René Scharfe: Am 16.06.2016 um 06:37 schrieb Jeff King: 2. System tars that cannot handle pax headers. In t5000 there is a simple interpreter for path headers for systems whose tar doesn't handle them. Adding one for mtime headers may be feasible. It's just a bit complicated to link a pax header file to the file it applies to when it doesn't also contain a path header. But we know that the mtime of all entries in tar files created by git archive are is the same, so we could simply read one header and then adjust the mtime of all files accordingly. This brings me to the idea of using a single global pax header for mtime instead of one per entry. It reduces the size of the archive and allows for slightly easier testing -- it just fits better since we know that all our mtimes are the same. Yeah, I had a similar thought while writing it, but wasn't quite sure how that was supposed to be formatted. I modeled my output after what GNU tar writes, but of course they are expecting a different mtime for each file. I'll look into how global pax headers should look. Moving the strbuf_append_ext_header() call into write_global_extended_header() should be enough. The changes to the test script are a bit more interesting, though. Perhaps I can come up with something over the weekend. René -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] am: ignore return value of write_file()
write_file() either returns 0 or dies, so there is no point in checking its return value. The callers of the wrappers write_state_text(), write_state_count() and write_state_bool() consequently already ignore their return values. Stop pretenting we care and make them void. Signed-off-by: Rene Scharfe --- builtin/am.c | 18 +- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/builtin/am.c b/builtin/am.c index d5da5fe..339e360 100644 --- a/builtin/am.c +++ b/builtin/am.c @@ -184,22 +184,22 @@ static inline const char *am_path(const struct am_state *state, const char *path /** * For convenience to call write_file() */ -static int write_state_text(const struct am_state *state, - const char *name, const char *string) +static void write_state_text(const struct am_state *state, +const char *name, const char *string) { - return write_file(am_path(state, name), "%s", string); + write_file(am_path(state, name), "%s", string); } -static int write_state_count(const struct am_state *state, -const char *name, int value) +static void write_state_count(const struct am_state *state, + const char *name, int value) { - return write_file(am_path(state, name), "%d", value); + write_file(am_path(state, name), "%d", value); } -static int write_state_bool(const struct am_state *state, - const char *name, int value) +static void write_state_bool(const struct am_state *state, +const char *name, int value) { - return write_state_text(state, name, value ? "t" : "f"); + write_state_text(state, name, value ? "t" : "f"); } /** -- 2.9.0 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] notes-merge: use O_EXCL to avoid overwriting existing files
Use the open(2) flag O_EXCL to ensure the file doesn't already exist instead of (racily) calling stat(2) through file_exists(). While at it switch to xopen() to reduce code duplication and get more consistent error messages. Signed-off-by: Rene Scharfe --- notes-merge.c | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/notes-merge.c b/notes-merge.c index b7814c9..2b29fc4 100644 --- a/notes-merge.c +++ b/notes-merge.c @@ -298,12 +298,8 @@ static void write_buf_to_worktree(const unsigned char *obj, char *path = git_pathdup(NOTES_MERGE_WORKTREE "/%s", sha1_to_hex(obj)); if (safe_create_leading_directories_const(path)) die_errno("unable to create directory for '%s'", path); - if (file_exists(path)) - die("found existing file at '%s'", path); - fd = open(path, O_WRONLY | O_TRUNC | O_CREAT, 0666); - if (fd < 0) - die_errno("failed to open '%s'", path); + fd = xopen(path, O_WRONLY | O_EXCL | O_CREAT, 0666); while (size > 0) { long ret = write_in_full(fd, buf, size); -- 2.9.0 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] .gitattributes: set file type for C files
Set the diff attribute for C source file to "cpp" in order to improve git's ability to determine hunk headers. In particular it helps avoid showing unindented labels in hunk headers. That in turn is useful for git diff -W and git grep -W, which show whole functions now instead of stopping at a label. Signed-off-by: Rene Scharfe --- .gitattributes | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/.gitattributes b/.gitattributes index 5e98806..320e33c 100644 --- a/.gitattributes +++ b/.gitattributes @@ -1,3 +1,3 @@ * whitespace=!indent,trail,space -*.[ch] whitespace=indent,trail,space +*.[ch] whitespace=indent,trail,space diff=cpp *.sh whitespace=indent,trail,space -- 2.9.0 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] am: ignore return value of write_file()
Hi Dscho, Am 08.07.2016 um 08:33 schrieb Johannes Schindelin: On Thu, 7 Jul 2016, René Scharfe wrote: write_file() either returns 0 or dies, so there is no point in checking its return value. The question is whether it makes sense for write_file() to die(). It is a library function and not every caller can be happy with that function to exit the program when some file could not be written, without a chance to tell the user what to do about the situation. If write_file() was defined in builtin/am.c, as a static function, I would grudgingly acquiesce, but it is not. IMO it would be better to fix write_file() to *not* die() but return error() instead. there is write_file_gently() for that purpose, but it's used only by a single caller that exits on failure after all, and in fact Peff's series drops it. So I think write_file() is fine, and it's rather a question of whether am should use write_file_gently() instead. I don't see why, but perhaps that's because it's Friday.. René -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 0/8] write_file cleanups
Am 08.07.2016 um 11:04 schrieb Jeff King: Here it is. There actually weren't that many spots to clean up, as quite a few of them have a "twist" where they want to do something clever, like open the file and feed the descriptor to a sub-function, or open with funny things like O_EXCL. But still, the diffstat is pleasing: builtin/am.c | 25 +++-- builtin/branch.c | 5 +--- builtin/config.c | 2 +- builtin/merge.c | 45 -- cache.h | 17 ++-- wrapper.c| 52 --- 6 files changed, 44 insertions(+), 102 deletions(-) and that even includes adding some function documentation. Haha, feels like Candy Crush. :) (Sent a single small patch, lots of places get patched as if by magic.) Thanks, René -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] rm: reuse strbuf for all remove_dir_recursively() calls
Don't throw the memory allocated for remove_dir_recursively() away after a single call, use it for the other entries as well instead. Signed-off-by: Rene Scharfe --- builtin/rm.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) diff --git a/builtin/rm.c b/builtin/rm.c index 8abb020..b2fee3e 100644 --- a/builtin/rm.c +++ b/builtin/rm.c @@ -387,6 +387,7 @@ int cmd_rm(int argc, const char **argv, const char *prefix) */ if (!index_only) { int removed = 0, gitmodules_modified = 0; + struct strbuf buf = STRBUF_INIT; for (i = 0; i < list.nr; i++) { const char *path = list.entry[i].name; if (list.entry[i].is_submodule) { @@ -398,7 +399,7 @@ int cmd_rm(int argc, const char **argv, const char *prefix) continue; } } else { - struct strbuf buf = STRBUF_INIT; + strbuf_reset(&buf); strbuf_addstr(&buf, path); if (!remove_dir_recursively(&buf, 0)) { removed = 1; @@ -410,7 +411,6 @@ int cmd_rm(int argc, const char **argv, const char *prefix) /* Submodule was removed by user */ if (!remove_path_from_gitmodules(path)) gitmodules_modified = 1; - strbuf_release(&buf); /* Fallthrough and let remove_path() fail. */ } } @@ -421,6 +421,7 @@ int cmd_rm(int argc, const char **argv, const char *prefix) if (!removed) die_errno("git rm: '%s'", path); } + strbuf_release(&buf); if (gitmodules_modified) stage_updated_gitmodules(); } -- 2.9.0 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] worktree: use strbuf_add_absolute_path() directly
absolute_path() is a wrapper for strbuf_add_absolute_path(). Call the latter directly for adding absolute paths to a strbuf. That's shorter and avoids an extra string copy. Signed-off-by: Rene Scharfe --- worktree.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/worktree.c b/worktree.c index e2a94e0..b819baf 100644 --- a/worktree.c +++ b/worktree.c @@ -80,7 +80,7 @@ static struct worktree *get_main_worktree(void) int is_bare = 0; int is_detached = 0; - strbuf_addstr(&worktree_path, absolute_path(get_git_common_dir())); + strbuf_add_absolute_path(&worktree_path, get_git_common_dir()); is_bare = !strbuf_strip_suffix(&worktree_path, "/.git"); if (is_bare) strbuf_strip_suffix(&worktree_path, "/."); @@ -125,7 +125,7 @@ static struct worktree *get_linked_worktree(const char *id) strbuf_rtrim(&worktree_path); if (!strbuf_strip_suffix(&worktree_path, "/.git")) { strbuf_reset(&worktree_path); - strbuf_addstr(&worktree_path, absolute_path(".")); + strbuf_add_absolute_path(&worktree_path, "."); strbuf_strip_suffix(&worktree_path, "/."); } -- 2.9.0 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Question: Getting 'git diff' to generate /usr/bin/diff output
Am 16.07.2016 um 21:12 schrieb n...@dad.org: I am trying to learn how to use git, and am having difficulty using 'git diff'. I can't deal with its output very well. The other replies covered how to use the system's own diff instead. Just curious: What makes using git diff difficult and its output hard to deal with for you? Thanks, René -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] use strbuf_addbuf() for appending a strbuf to another
Use strbuf_addbuf() where possible; it's shorter and more efficient. Signed-off-by: Rene Scharfe --- dir.c | 2 +- path.c | 2 +- wt-status.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/dir.c b/dir.c index 6172b34..0ea235f 100644 --- a/dir.c +++ b/dir.c @@ -2364,7 +2364,7 @@ void write_untracked_extension(struct strbuf *out, struct untracked_cache *untra varint_len = encode_varint(untracked->ident.len, varbuf); strbuf_add(out, varbuf, varint_len); - strbuf_add(out, untracked->ident.buf, untracked->ident.len); + strbuf_addbuf(out, &untracked->ident); strbuf_add(out, ouc, ouc_size(len)); free(ouc); diff --git a/path.c b/path.c index 259aeed..17551c4 100644 --- a/path.c +++ b/path.c @@ -483,7 +483,7 @@ static void do_submodule_path(struct strbuf *buf, const char *path, strbuf_addstr(buf, git_dir); } strbuf_addch(buf, '/'); - strbuf_addstr(&git_submodule_dir, buf->buf); + strbuf_addbuf(&git_submodule_dir, buf); strbuf_vaddf(buf, fmt, args); diff --git a/wt-status.c b/wt-status.c index c19b52c..dbdb543 100644 --- a/wt-status.c +++ b/wt-status.c @@ -1062,7 +1062,7 @@ static void abbrev_sha1_in_line(struct strbuf *line) strbuf_addf(split[1], "%s ", abbrev); strbuf_reset(line); for (i = 0; split[i]; i++) - strbuf_addf(line, "%s", split[i]->buf); + strbuf_addbuf(line, split[i]); } } strbuf_list_free(split); -- 2.9.2 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] submodule-config: use explicit empty string instead of strbuf in config_from()
Use a string constant instead of an empty strbuf to shorten the code and make it easier to read. Signed-off-by: Rene Scharfe --- ... unless someone can come up with a suitable non-empty string to feed to git_config_from_mem() as its name parameter. submodule-config.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/submodule-config.c b/submodule-config.c index db1847f..44eb162 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -397,7 +397,6 @@ static const struct submodule *config_from(struct submodule_cache *cache, const unsigned char *commit_sha1, const char *key, enum lookup_type lookup_type) { - struct strbuf rev = STRBUF_INIT; unsigned long config_size; char *config; unsigned char sha1[20]; @@ -448,7 +447,7 @@ static const struct submodule *config_from(struct submodule_cache *cache, parameter.commit_sha1 = commit_sha1; parameter.gitmodules_sha1 = sha1; parameter.overwrite = 0; - git_config_from_mem(parse_config, "submodule-blob", rev.buf, + git_config_from_mem(parse_config, "submodule-blob", "", config, config_size, ¶meter); free(config); -- 2.9.2 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] use strbuf_addbuf() for appending a strbuf to another
Am 20.07.2016 um 15:20 schrieb Jeff King: > On Tue, Jul 19, 2016 at 08:36:29PM +0200, René Scharfe wrote: > >> Use strbuf_addbuf() where possible; it's shorter and more efficient. > > After seeing "efficient", I was momentarily surprised by the first hunk: > >> diff --git a/dir.c b/dir.c >> index 6172b34..0ea235f 100644 >> --- a/dir.c >> +++ b/dir.c >> @@ -2364,7 +2364,7 @@ void write_untracked_extension(struct strbuf *out, >> struct untracked_cache *untra >> >> varint_len = encode_varint(untracked->ident.len, varbuf); >> strbuf_add(out, varbuf, varint_len); >> -strbuf_add(out, untracked->ident.buf, untracked->ident.len); >> +strbuf_addbuf(out, &untracked->ident); > > This is actually slightly _less_ efficient, because we already are using > the precomputed len, and the new code will call an extra strbuf_grow() > to cover the case where the two arguments are the same. See 81d2cae > (strbuf_addbuf(): allow passing the same buf to dst and src, > 2010-01-12). Ah, I wasn't aware of that. Calling strbuf_grow() twice shouldn't be thaaat bad. However, I wonder where we duplicate strbufs, or why we would ever want to do so. Anyway, here's a patch for that: -- >8 -- Subject: strbuf: avoid calling strbuf_grow() twice in strbuf_addbuf() Implement strbuf_addbuf() as a normal function in order to avoid calling strbuf_grow() twice, with the second callinside strbud_add() being a no-op. This is slightly faster and also reduces the text size a bit. Signed-off-by: Rene Scharfe --- strbuf.c | 7 +++ strbuf.h | 6 +- 2 files changed, 8 insertions(+), 5 deletions(-) diff --git a/strbuf.c b/strbuf.c index 1ba600b..f3bd571 100644 --- a/strbuf.c +++ b/strbuf.c @@ -197,6 +197,13 @@ void strbuf_add(struct strbuf *sb, const void *data, size_t len) strbuf_setlen(sb, sb->len + len); } +void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) +{ + strbuf_grow(sb, sb2->len); + memcpy(sb->buf + sb->len, sb2->buf, sb2->len); + strbuf_setlen(sb, sb->len + sb2->len); +} + void strbuf_adddup(struct strbuf *sb, size_t pos, size_t len) { strbuf_grow(sb, len); diff --git a/strbuf.h b/strbuf.h index 83c5c98..ba8d5f1 100644 --- a/strbuf.h +++ b/strbuf.h @@ -263,11 +263,7 @@ static inline void strbuf_addstr(struct strbuf *sb, const char *s) /** * Copy the contents of another buffer at the end of the current one. */ -static inline void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2) -{ - strbuf_grow(sb, sb2->len); - strbuf_add(sb, sb2->buf, sb2->len); -} +extern void strbuf_addbuf(struct strbuf *sb, const struct strbuf *sb2); /** * Copy part of the buffer from a given position till a given length to the -- 2.9.2 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH] submodule-config: use explicit empty string instead of strbuf in config_from()
Am 20.07.2016 um 10:25 schrieb Heiko Voigt: Hi, On Tue, Jul 19, 2016 at 09:05:43PM +0200, René Scharfe wrote: Use a string constant instead of an empty strbuf to shorten the code and make it easier to read. This must have been some oversight from my original code. I also can not see any purpose. Signed-off-by: Rene Scharfe --- ... unless someone can come up with a suitable non-empty string to feed to git_config_from_mem() as its name parameter. If we would want to be absolutely correct we could use something like "SHA1:.gitmodules". E.g. like we use to lookup the blob in gitmodule_sha1_from_commit(): strbuf_addf(&rev, "%s:.gitmodules", sha1_to_hex(commit_sha1)); And now I see where this was leftover from... before extracting this function this code was filling the strbuf. How about this instead? I like it. ---8<-- Subject: [PATCH] fix passing a name for config from submodules In commit 959b5455 we implemented the initial version of the submodule config cache. During development of that initial version we extracted the function gitmodule_sha1_from_commit(). During that process we missed that the strbuf rev was still used in config_from() and now is left empty. Lets fix this by also returning this string. Signed-off-by: Heiko Voigt --- Its not exactly pretty with all the releases before the returns but this is what I could quickly come up with... Indeed. submodule-config.c | 23 +++ 1 file changed, 15 insertions(+), 8 deletions(-) diff --git a/submodule-config.c b/submodule-config.c index 077db40..dccea59 100644 --- a/submodule-config.c +++ b/submodule-config.c @@ -371,9 +371,9 @@ static int parse_config(const char *var, const char *value, void *data) } static int gitmodule_sha1_from_commit(const unsigned char *commit_sha1, - unsigned char *gitmodules_sha1) + unsigned char *gitmodules_sha1, + struct strbuf *rev) { - struct strbuf rev = STRBUF_INIT; int ret = 0; if (is_null_sha1(commit_sha1)) { @@ -381,11 +381,10 @@ static int gitmodule_sha1_from_commit(const unsigned char *commit_sha1, return 1; } - strbuf_addf(&rev, "%s:.gitmodules", sha1_to_hex(commit_sha1)); - if (get_sha1(rev.buf, gitmodules_sha1) >= 0) + strbuf_addf(rev, "%s:.gitmodules", sha1_to_hex(commit_sha1)); + if (get_sha1(rev->buf, gitmodules_sha1) >= 0) ret = 1; - strbuf_release(&rev); return ret; } @@ -420,8 +419,10 @@ static const struct submodule *config_from(struct submodule_cache *cache, return entry->config; } - if (!gitmodule_sha1_from_commit(commit_sha1, sha1)) + if (!gitmodule_sha1_from_commit(commit_sha1, sha1, &rev)) { + strbuf_release(&rev); return NULL; + } switch (lookup_type) { case lookup_name: @@ -431,14 +432,19 @@ static const struct submodule *config_from(struct submodule_cache *cache, submodule = cache_lookup_path(cache, sha1, key); break; } - if (submodule) + if (submodule) { + strbuf_release(&rev); return submodule; + } config = read_sha1_file(sha1, &type, &config_size); - if (!config) + if (!config) { + strbuf_release(&rev); return NULL; + } if (type != OBJ_BLOB) { + strbuf_release(&rev); free(config); return NULL; } A separate patch could combine the previous two conditionals; free(NULL) is allowed. @@ -450,6 +456,7 @@ static const struct submodule *config_from(struct submodule_cache *cache, parameter.overwrite = 0; git_config_from_mem(parse_config, "submodule-blob", rev.buf, config, config_size, ¶meter); + strbuf_release(&rev); free(config); switch (lookup_type) { -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] use strbuf_addstr() for adding constant strings to a strbuf
Replace uses of strbuf_addf() for adding strings with more lightweight strbuf_addstr() calls. In http-push.c it becomes easier to see what's going on without having to verfiy that the definition of PROPFIND_ALL_REQUEST doesn't contain any format specifiers. Signed-off-by: Rene Scharfe --- builtin/rev-parse.c | 2 +- http-push.c | 2 +- send-pack.c | 2 +- t/helper/test-run-command.c | 6 +++--- 4 files changed, 6 insertions(+), 6 deletions(-) diff --git a/builtin/rev-parse.c b/builtin/rev-parse.c index c961b74..76cf05e 100644 --- a/builtin/rev-parse.c +++ b/builtin/rev-parse.c @@ -469,7 +469,7 @@ static int cmd_parseopt(int argc, const char **argv, const char *prefix) (stop_at_non_option ? PARSE_OPT_STOP_AT_NON_OPTION : 0) | PARSE_OPT_SHELL_EVAL); - strbuf_addf(&parsed, " --"); + strbuf_addstr(&parsed, " --"); sq_quote_argv(&parsed, argv, 0); puts(parsed.buf); return 0; diff --git a/http-push.c b/http-push.c index a092f02..d0b29ac 100644 --- a/http-push.c +++ b/http-push.c @@ -1137,7 +1137,7 @@ static void remote_ls(const char *path, int flags, ls.userData = userData; ls.userFunc = userFunc; - strbuf_addf(&out_buffer.buf, PROPFIND_ALL_REQUEST); + strbuf_addstr(&out_buffer.buf, PROPFIND_ALL_REQUEST); dav_headers = curl_slist_append(dav_headers, "Depth: 1"); dav_headers = curl_slist_append(dav_headers, "Content-Type: text/xml"); diff --git a/send-pack.c b/send-pack.c index 299d303..1f85c56 100644 --- a/send-pack.c +++ b/send-pack.c @@ -265,7 +265,7 @@ static int generate_push_cert(struct strbuf *req_buf, struct strbuf cert = STRBUF_INIT; int update_seen = 0; - strbuf_addf(&cert, "certificate version 0.1\n"); + strbuf_addstr(&cert, "certificate version 0.1\n"); strbuf_addf(&cert, "pusher %s ", signing_key); datestamp(&cert); strbuf_addch(&cert, '\n'); diff --git a/t/helper/test-run-command.c b/t/helper/test-run-command.c index 30a64a9..6bb53da 100644 --- a/t/helper/test-run-command.c +++ b/t/helper/test-run-command.c @@ -26,7 +26,7 @@ static int parallel_next(struct child_process *cp, return 0; argv_array_pushv(&cp->args, d->argv); - strbuf_addf(err, "preloaded output of a child\n"); + strbuf_addstr(err, "preloaded output of a child\n"); number_callbacks++; return 1; } @@ -36,7 +36,7 @@ static int no_job(struct child_process *cp, void *cb, void **task_cb) { - strbuf_addf(err, "no further jobs available\n"); + strbuf_addstr(err, "no further jobs available\n"); return 0; } @@ -45,7 +45,7 @@ static int task_finished(int result, void *pp_cb, void *pp_task_cb) { - strbuf_addf(err, "asking for a quick stop\n"); + strbuf_addstr(err, "asking for a quick stop\n"); return 1; } -- 2.9.2 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] pass constants as first argument to st_mult()
The result of st_mult() is the same no matter the order of its arguments. It invokes the macro unsigned_mult_overflows(), which divides the second parameter by the first one. Pass constants first to allow that division to be done already at compile time. Signed-off-by: Rene Scharfe --- diffcore-rename.c | 2 +- refs.c| 2 +- shallow.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/diffcore-rename.c b/diffcore-rename.c index 58ac0a5..73d003a 100644 --- a/diffcore-rename.c +++ b/diffcore-rename.c @@ -541,7 +541,7 @@ void diffcore_rename(struct diff_options *options) rename_dst_nr * rename_src_nr, 50, 1); } - mx = xcalloc(st_mult(num_create, NUM_CANDIDATE_PER_DST), sizeof(*mx)); + mx = xcalloc(st_mult(NUM_CANDIDATE_PER_DST, num_create), sizeof(*mx)); for (dst_cnt = i = 0; i < rename_dst_nr; i++) { struct diff_filespec *two = rename_dst[i].two; struct diff_score *m; diff --git a/refs.c b/refs.c index 814cad3..b4e7cac 100644 --- a/refs.c +++ b/refs.c @@ -922,7 +922,7 @@ char *shorten_unambiguous_ref(const char *refname, int strict) /* -2 for strlen("%.*s") - strlen("%s"); +1 for NUL */ total_len += strlen(ref_rev_parse_rules[nr_rules]) - 2 + 1; - scanf_fmts = xmalloc(st_add(st_mult(nr_rules, sizeof(char *)), total_len)); + scanf_fmts = xmalloc(st_add(st_mult(sizeof(char *), nr_rules), total_len)); offset = 0; for (i = 0; i < nr_rules; i++) { diff --git a/shallow.c b/shallow.c index 4d554ca..54e2db7 100644 --- a/shallow.c +++ b/shallow.c @@ -389,7 +389,7 @@ static void paint_down(struct paint_info *info, const unsigned char *sha1, unsigned int i, nr; struct commit_list *head = NULL; int bitmap_nr = (info->nr_bits + 31) / 32; - size_t bitmap_size = st_mult(bitmap_nr, sizeof(uint32_t)); + size_t bitmap_size = st_mult(sizeof(uint32_t), bitmap_nr); uint32_t *tmp = xmalloc(bitmap_size); /* to be freed before return */ uint32_t *bitmap = paint_alloc(info); struct commit *c = lookup_commit_reference_gently(sha1, 1); -- 2.9.2 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] nedmalloc: work around overzealous GCC 6 warning
Am 04.08.2016 um 18:07 schrieb Johannes Schindelin: With GCC 6, the strdup() function is declared with the "nonnull" attribute, stating that it is not allowed to pass a NULL value as parameter. In nedmalloc()'s reimplementation of strdup(), Postel's Law is heeded and NULL parameters are handled gracefully. GCC 6 complains about that now because it thinks that NULL cannot be passed to strdup() anyway. Let's just shut up GCC >= 6 in that case and go on with our lives. This version of strdup() is only compiled if nedmalloc is used instead of the system allocator. That means we can't rely on strdup() being able to take NULL -- some (most?) platforms won't like it. Removing the NULL check would be a more general and overall easier way out, no? But it should check the result of malloc() before copying. --- compat/nedmalloc/nedmalloc.c | 8 +++- 1 file changed, 3 insertions(+), 5 deletions(-) diff --git a/compat/nedmalloc/nedmalloc.c b/compat/nedmalloc/nedmalloc.c index a0a16eb..cc18f0c 100644 --- a/compat/nedmalloc/nedmalloc.c +++ b/compat/nedmalloc/nedmalloc.c @@ -955,12 +955,10 @@ void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void ** */ char *strdup(const char *s1) { - char *s2 = 0; - if (s1) { - size_t len = strlen(s1) + 1; - s2 = malloc(len); + size_t len = strlen(s1) + 1; + char *s2 = malloc(len); + if (s2) memcpy(s2, s1, len); - } return s2; } #endif -- 2.9.2 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] use strbuf_addstr() instead of strbuf_addf() with "%s"
Call strbuf_addstr() for adding a simple string to a strbuf instead of using the heavier strbuf_addf(). This is shorter and documents the intent more clearly. Signed-off-by: Rene Scharfe --- builtin/fmt-merge-msg.c | 2 +- http.c | 2 +- sequencer.c | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/builtin/fmt-merge-msg.c b/builtin/fmt-merge-msg.c index e5658c3..ac84e99 100644 --- a/builtin/fmt-merge-msg.c +++ b/builtin/fmt-merge-msg.c @@ -272,7 +272,7 @@ static int cmp_string_list_util_as_integral(const void *a_, const void *b_) static void add_people_count(struct strbuf *out, struct string_list *people) { if (people->nr == 1) - strbuf_addf(out, "%s", people->items[0].string); + strbuf_addstr(out, people->items[0].string); else if (people->nr == 2) strbuf_addf(out, "%s (%d) and %s (%d)", people->items[0].string, diff --git a/http.c b/http.c index e81dd13..cd40b01 100644 --- a/http.c +++ b/http.c @@ -1225,7 +1225,7 @@ void append_remote_object_url(struct strbuf *buf, const char *url, strbuf_addf(buf, "objects/%.*s/", 2, hex); if (!only_two_digit_prefix) - strbuf_addf(buf, "%s", hex+2); + strbuf_addstr(buf, hex + 2); } char *get_remote_object_url(const char *url, const char *hex, diff --git a/sequencer.c b/sequencer.c index cdfac82..7b1eb14 100644 --- a/sequencer.c +++ b/sequencer.c @@ -112,7 +112,7 @@ static void remove_sequencer_state(void) { struct strbuf seq_dir = STRBUF_INIT; - strbuf_addf(&seq_dir, "%s", git_path(SEQ_DIR)); + strbuf_addstr(&seq_dir, git_path(SEQ_DIR)); remove_dir_recursively(&seq_dir, 0); strbuf_release(&seq_dir); } -- 2.9.2 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] use CHILD_PROCESS_INIT to initialize automatic variables
Initialize struct child_process variables already when they're defined. That's shorter and saves a function call. Signed-off-by: Rene Scharfe --- builtin/submodule--helper.c | 3 +-- builtin/worktree.c | 6 ++ 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index b22352b..957f42a 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -444,8 +444,7 @@ static int module_name(int argc, const char **argv, const char *prefix) static int clone_submodule(const char *path, const char *gitdir, const char *url, const char *depth, const char *reference, int quiet) { - struct child_process cp; - child_process_init(&cp); + struct child_process cp = CHILD_PROCESS_INIT; argv_array_push(&cp.args, "clone"); argv_array_push(&cp.args, "--no-checkout"); diff --git a/builtin/worktree.c b/builtin/worktree.c index 5a41788..6dcf7bd 100644 --- a/builtin/worktree.c +++ b/builtin/worktree.c @@ -194,7 +194,7 @@ static int add_worktree(const char *path, const char *refname, struct strbuf sb = STRBUF_INIT; const char *name; struct stat st; - struct child_process cp; + struct child_process cp = CHILD_PROCESS_INIT; struct argv_array child_env = ARGV_ARRAY_INIT; int counter = 0, len, ret; struct strbuf symref = STRBUF_INIT; @@ -273,7 +273,6 @@ static int add_worktree(const char *path, const char *refname, argv_array_pushf(&child_env, "%s=%s", GIT_DIR_ENVIRONMENT, sb_git.buf); argv_array_pushf(&child_env, "%s=%s", GIT_WORK_TREE_ENVIRONMENT, path); - memset(&cp, 0, sizeof(cp)); cp.git_cmd = 1; if (commit) @@ -365,8 +364,7 @@ static int add(int ac, const char **av, const char *prefix) } if (opts.new_branch) { - struct child_process cp; - memset(&cp, 0, sizeof(cp)); + struct child_process cp = CHILD_PROCESS_INIT; cp.git_cmd = 1; argv_array_push(&cp.args, "branch"); if (opts.force_new_branch) -- 2.9.2 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] merge-recursive: use STRING_LIST_INIT_NODUP
Initialize a string_list right when it's defined. That's shorter, saves a function call and makes it more obvious that we're using the NODUP variant here. Signed-off-by: Rene Scharfe --- merge-recursive.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/merge-recursive.c b/merge-recursive.c index a4a1195..97e6737 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -409,7 +409,7 @@ static void record_df_conflict_files(struct merge_options *o, * and the file need to be present, then the D/F file will be * reinstated with a new unique name at the time it is processed. */ - struct string_list df_sorted_entries; + struct string_list df_sorted_entries = STRING_LIST_INIT_NODUP; const char *last_file = NULL; int last_len = 0; int i; @@ -422,7 +422,6 @@ static void record_df_conflict_files(struct merge_options *o, return; /* Ensure D/F conflicts are adjacent in the entries list. */ - memset(&df_sorted_entries, 0, sizeof(struct string_list)); for (i = 0; i < entries->nr; i++) { struct string_list_item *next = &entries->items[i]; string_list_append(&df_sorted_entries, next->string)->util = -- 2.9.2 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] merge: use string_list_split() in add_strategies()
Call string_list_split() for cutting a space separated list into pieces instead of reimplementing it based on struct strategy. The attr member of struct strategy was not used split_merge_strategies(); it was a pure string operation. Also be nice and clean up once we're done splitting; the old code didn't bother freeing any of the allocated memory. Signed-off-by: Rene Scharfe --- builtin/merge.c | 44 ++-- 1 file changed, 10 insertions(+), 34 deletions(-) diff --git a/builtin/merge.c b/builtin/merge.c index 19b3bc2..a95a801 100644 --- a/builtin/merge.c +++ b/builtin/merge.c @@ -30,6 +30,7 @@ #include "fmt-merge-msg.h" #include "gpg-interface.h" #include "sequencer.h" +#include "string-list.h" #define DEFAULT_TWOHEAD (1<<0) #define DEFAULT_OCTOPUS (1<<1) @@ -703,42 +704,17 @@ static int count_unmerged_entries(void) return ret; } -static void split_merge_strategies(const char *string, struct strategy **list, - int *nr, int *alloc) -{ - char *p, *q, *buf; - - if (!string) - return; - - buf = xstrdup(string); - q = buf; - for (;;) { - p = strchr(q, ' '); - if (!p) { - ALLOC_GROW(*list, *nr + 1, *alloc); - (*list)[(*nr)++].name = xstrdup(q); - free(buf); - return; - } else { - *p = '\0'; - ALLOC_GROW(*list, *nr + 1, *alloc); - (*list)[(*nr)++].name = xstrdup(q); - q = ++p; - } - } -} - static void add_strategies(const char *string, unsigned attr) { - struct strategy *list = NULL; - int list_alloc = 0, list_nr = 0, i; - - memset(&list, 0, sizeof(list)); - split_merge_strategies(string, &list, &list_nr, &list_alloc); - if (list) { - for (i = 0; i < list_nr; i++) - append_strategy(get_strategy(list[i].name)); + int i; + + if (string) { + struct string_list list = STRING_LIST_INIT_DUP; + struct string_list_item *item; + string_list_split(&list, string, ' ', -1); + for_each_string_list_item(item, &list) + append_strategy(get_strategy(item->string)); + string_list_clear(&list, 0); return; } for (i = 0; i < ARRAY_SIZE(all_strategy); i++) -- 2.9.2 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: [PATCH 2/2] nedmalloc: work around overzealous GCC 6 warning
Am 05.08.2016 um 23:57 schrieb Junio C Hamano: Johannes Schindelin writes: Hi Junio & René, On Thu, 4 Aug 2016, Junio C Hamano wrote: Let's try it this way. How about this as a replacement? I like it (with the if (s2) test intead of if (s1), of course). But please record René as author, maybe mentioning myself with a "Diagnosed-by:" line. Hmph. I cannot do that unilaterally without waiting for René to respond, though. In any case, with only header and footer changes, here is what will appear in 'pu'. It's fine with me, thanks. :) Minor comments below. Thanks. -- >8 -- From: René Scharfe Date: Thu, 4 Aug 2016 23:56:54 +0200 Subject: [PATCH] nedmalloc: work around overzealous GCC 6 warning With GCC 6, the strdup() function is declared with the "nonnull" attribute, stating that it is not allowed to pass a NULL value as parameter. In nedmalloc()'s reimplementation of strdup(), Postel's Law is heeded and NULL parameters are handled gracefully. GCC 6 complains about that now because it thinks that NULL cannot be passed to strdup() anyway. Because the callers in this project of strdup() must be prepared to call any implementation of strdup() supplied by the platform, so it is pointless to pretend that it is OK to call it with NULL. Remove the conditional based on NULL-ness of the input; this squelches the warning. My commit message would have been much shorter, probably too short. But perhaps add here: "Check the return value of malloc() instead to make sure we actually got memory to write to." See https://gcc.gnu.org/gcc-6/porting_to.html for details. Diagnosed-by: Johannes Schindelin Signed-off-by: René Scharfe Signed-off-by: Junio C Hamano --- compat/nedmalloc/nedmalloc.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/compat/nedmalloc/nedmalloc.c b/compat/nedmalloc/nedmalloc.c index 677d1b2..2d4ef59 100644 --- a/compat/nedmalloc/nedmalloc.c +++ b/compat/nedmalloc/nedmalloc.c @@ -955,12 +955,11 @@ void **nedpindependent_comalloc(nedpool *p, size_t elems, size_t *sizes, void ** */ char *strdup(const char *s1) { - char *s2 = 0; - if (s1) { - size_t len = strlen(s1) + 1; - s2 = malloc(len); + size_t len = strlen(s1) + 1; + char *s2 = malloc(len); + You added this blank line. OK. + if (s2) memcpy(s2, s1, len); - } return s2; } #endif -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] archive-tar: make write_extended_header() void
The function write_extended_header() only ever returns 0. Simplify it and its caller by dropping its return value, like we did with write_global_extended_header() earlier. Signed-off-by: Rene Scharfe --- archive-tar.c | 15 +-- 1 file changed, 5 insertions(+), 10 deletions(-) diff --git a/archive-tar.c b/archive-tar.c index 5568240..380e3ae 100644 --- a/archive-tar.c +++ b/archive-tar.c @@ -213,9 +213,9 @@ static void prepare_header(struct archiver_args *args, xsnprintf(header->chksum, sizeof(header->chksum), "%07o", ustar_header_chksum(header)); } -static int write_extended_header(struct archiver_args *args, -const unsigned char *sha1, -const void *buffer, unsigned long size) +static void write_extended_header(struct archiver_args *args, + const unsigned char *sha1, + const void *buffer, unsigned long size) { struct ustar_header header; unsigned int mode; @@ -226,7 +226,6 @@ static int write_extended_header(struct archiver_args *args, prepare_header(args, &header, mode, size); write_blocked(&header, sizeof(header)); write_blocked(buffer, size); - return 0; } static int write_tar_entry(struct archiver_args *args, @@ -305,12 +304,8 @@ static int write_tar_entry(struct archiver_args *args, prepare_header(args, &header, mode, size_in_header); if (ext_header.len > 0) { - err = write_extended_header(args, sha1, ext_header.buf, - ext_header.len); - if (err) { - free(buffer); - return err; - } + write_extended_header(args, sha1, ext_header.buf, + ext_header.len); } strbuf_release(&ext_header); write_blocked(&header, sizeof(header)); -- 2.9.2 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
[PATCH] use strbuf_add_unique_abbrev() for adding short hashes
Call strbuf_add_unique_abbrev() to add abbreviated hashes to strbufs instead of taking detours through find_unique_abbrev() and its static buffer. This is shorter and a bit more efficient. Signed-off-by: Rene Scharfe --- builtin/checkout.c | 3 +-- pretty.c | 13 ++--- transport.c| 11 --- 3 files changed, 11 insertions(+), 16 deletions(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index 27c1a05..0a7d24c 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -703,8 +703,7 @@ static int add_pending_uninteresting_ref(const char *refname, static void describe_one_orphan(struct strbuf *sb, struct commit *commit) { strbuf_addstr(sb, " "); - strbuf_addstr(sb, - find_unique_abbrev(commit->object.oid.hash, DEFAULT_ABBREV)); + strbuf_add_unique_abbrev(sb, commit->object.oid.hash, DEFAULT_ABBREV); strbuf_addch(sb, ' '); if (!parse_commit(commit)) pp_commit_easy(CMIT_FMT_ONELINE, commit, sb); diff --git a/pretty.c b/pretty.c index 9fa42c2..9609afb 100644 --- a/pretty.c +++ b/pretty.c @@ -1143,8 +1143,8 @@ static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */ strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET)); return 1; } - strbuf_addstr(sb, find_unique_abbrev(commit->object.oid.hash, -c->pretty_ctx->abbrev)); + strbuf_add_unique_abbrev(sb, commit->object.oid.hash, +c->pretty_ctx->abbrev); strbuf_addstr(sb, diff_get_color(c->auto_color, DIFF_RESET)); c->abbrev_commit_hash.len = sb->len - c->abbrev_commit_hash.off; return 1; @@ -1154,8 +1154,8 @@ static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */ case 't': /* abbreviated tree hash */ if (add_again(sb, &c->abbrev_tree_hash)) return 1; - strbuf_addstr(sb, find_unique_abbrev(commit->tree->object.oid.hash, -c->pretty_ctx->abbrev)); + strbuf_add_unique_abbrev(sb, commit->tree->object.oid.hash, +c->pretty_ctx->abbrev); c->abbrev_tree_hash.len = sb->len - c->abbrev_tree_hash.off; return 1; case 'P': /* parent hashes */ @@ -1171,9 +1171,8 @@ static size_t format_commit_one(struct strbuf *sb, /* in UTF-8 */ for (p = commit->parents; p; p = p->next) { if (p != commit->parents) strbuf_addch(sb, ' '); - strbuf_addstr(sb, find_unique_abbrev( - p->item->object.oid.hash, - c->pretty_ctx->abbrev)); + strbuf_add_unique_abbrev(sb, p->item->object.oid.hash, +c->pretty_ctx->abbrev); } c->abbrev_parent_hashes.len = sb->len - c->abbrev_parent_hashes.off; diff --git a/transport.c b/transport.c index 4ba48b0..557f399 100644 --- a/transport.c +++ b/transport.c @@ -321,11 +321,6 @@ static void print_ref_status(char flag, const char *summary, struct ref *to, str } } -static const char *status_abbrev(unsigned char sha1[20]) -{ - return find_unique_abbrev(sha1, DEFAULT_ABBREV); -} - static void print_ok_ref_status(struct ref *ref, int porcelain) { if (ref->deletion) @@ -340,7 +335,8 @@ static void print_ok_ref_status(struct ref *ref, int porcelain) char type; const char *msg; - strbuf_addstr(&quickref, status_abbrev(ref->old_oid.hash)); + strbuf_add_unique_abbrev(&quickref, ref->old_oid.hash, +DEFAULT_ABBREV); if (ref->forced_update) { strbuf_addstr(&quickref, "..."); type = '+'; @@ -350,7 +346,8 @@ static void print_ok_ref_status(struct ref *ref, int porcelain) type = ' '; msg = NULL; } - strbuf_addstr(&quickref, status_abbrev(ref->new_oid.hash)); + strbuf_add_unique_abbrev(&quickref, ref->new_oid.hash, +DEFAULT_ABBREV); print_ref_status(type, quickref.buf, ref, ref->peer_ref, msg, porcelain); strbuf_release(&quickref); -- 2.9.2 -- To unsubscribe from this list: send the line "unsubscribe git" in the body of a message to majord...@vger.kernel.org More majordomo info at http://vger.kernel.org/majordomo-info.html
Re: Rename detection within in files WAS: [PATCH 2/6] t7408: merge short tests, factor out testing method
Am 06.08.2016 um 01:26 schrieb Stefan Beller: When moving code around, we usually get large chunks of text. If the contributor is not 100% trustworthy, we need to review all the code without much intelectual joy. Essentially the reviewer is just making sure the parts of the text are the same. I'd like to propose a new addition to the diff format that makes this use case easier. The idea is to mark up lines that were just moved around in the file instead of adding and removing them. Currently we have 3 characters that are allowed to start a line within a hunk: ' ' to indicate context '+' to add a line '-' to remove a line I'd propose to add the following characters: '*' which is the same as '+', but it indicates that the line was moved from somewhere else without change. 'X' The same as '-', with the addition that this line was moved to a different place without change. The patch below uses these new '*' and 'X'. Each hunk that makes use of these additions, is followed other sections, [moved-from, moved-to] that indicate where the corresponding line is. Interesting idea. It should be easy to convert the result into a regular unified diff for consumption with patch(1) or git am/apply by replacing the new flags with + and - and removing the moved-* hunks. Your example ignores whitespace changes at the start of the line and within it, the added "-C $working_dir", s/expected/expect/; is this all intended? Only a single blank line was moved verbatim. The moved-from and moved-to hunks make this diff quite verbose. If multiple lines from different sources are moved to the same hunk then you'd get multiple moved-from hunks following that single destination, right? (Same with lines moved from a single hunk to multiple destinations and moved-to.) But does it even warrent a new format? It's a display problem; the necessary information is already in the diffs we have today. A graphical diff viewer could connect moved blocks with lines, like http://www.araxis.com/merge/ does in its side-by-side view. A Thunderbird extension (or a bookmarklet or browser extendiion for webmail users) could do that for an email-based workflow. Still, what about adding information about moved lines as an extended header (like that index line)? Line numbers are included in hunk headers and can serve as orientation. A reader would have to do some mental arithmetic (ugh), but incompatible format changes would be avoided. For your example it should look something like this: move from t/t7408-submodule-reference.sh:52,1 move to t/t7408-submodule-reference.sh:22,1 There are multiple things to tackle when going for such an addition: * How to present this to the user (it's covered in this email) * how to find the renamed lines algorithmically. (there are already approaches to that, e.g. https://github.com/stefanbeller/duplo which is http://duplo.sourceforge.net/ with no substantial additions) Any comments welcome, Thanks, Stefan --- t/t7408-submodule-reference.sh | 50 +++--- 1 file changed, 15 insertions(+), 29 deletions(-), 6 moved lines diff --git a/t/t7408-submodule-reference.sh b/t/t7408-submodule-reference.sh index afcc629..1416cbd 100755 --- a/t/t7408-submodule-reference.sh +++ b/t/t7408-submodule-reference.sh @@ -10,6 +10,16 @@ base_dir=$(pwd) U=$base_dir/UPLOAD_LOG +test_alternate_usage() +{ + alternates_file=$1 + working_dir=$2 + test_line_count = 1 $alternates_file && * echo "0 objects, 0 kilobytes" >expect && * git -C $working_dir count-objects >current && * diff expect current +} + Post-image line 22. test_expect_success 'preparing first repository' ' test_create_repo A && ( @@ move-source 42,6 @@ test_expect_success 'that reference gets used with add' ' test_expect_success 'that reference gets used with add' ' ( cd super/sub && X echo "0 objects, 0 kilobytes" > expected && X git count-objects > current && X diff expected current ) ' @@ -42,44 +52,20 @@ test_expect_success 'preparing superproject' ' ) ' -test_expect_success 'submodule add --reference' ' +test_expect_success 'submodule add --reference uses alternates' ' ( cd super && git submodule add --reference ../B "file://$base_dir/A" sub && git commit -m B-super-added - ) -' - Pre-image line 52. -test_expect_success 'after add: existence of info/alternates' ' - test_line_count = 1 super/.git/modules/sub/objects/info/alternates -' - -test_expect_success 'that reference gets used with add' ' - ( - cd super/sub && X echo "0 objects, 0 kilobytes" > expected && X git count-objects > current && X diff expected current - ) -' - -test_expect_success 'cloning superproject' ' - git
Re: [PATCH 1/3] compat: add qsort_s()
Am 12.12.2016 um 20:57 schrieb Jeff King: On Mon, Dec 12, 2016 at 08:51:14PM +0100, René Scharfe wrote: It's kinda cool to have a bespoke compatibility layer for major platforms, but the more I think about it the less I can answer why we would want that. Safety, reliability and performance can't be good reasons -- if our fallback function lacks in these regards then we have to improve it in any case. There may be cases that we don't want to support because of portability issues. E.g., if your libc has an assembly-optimized qsort() we wouldn't want to replicate that. Offloading to GPUs might be a better example; I don't know of a libc that does any of that, though (yet). I dunno. I am not that opposed to just saying "forget libc qsort, we always use our internal one which is consistent, performant, and safe". But when I suggested something similar for our regex library, I seem to recall there were complaints. Well, I'm not sure how comparable they are, but perhaps we can avoid compat code altogether in this case. Patch coming in a new thread. René
[PATCH] string-list: make compare function compatible with qsort(3)
The cmp member of struct string_list points to a comparison function that is used for sorting and searching of list items. It takes two string pointers -- like strcmp(3), which is in fact the default; cmp_items() provides a qsort(3) compatible interface by passing the string members of two struct string_list_item pointers to cmp. One shortcoming is that the comparison function is restricted to working with the string members of items; util is inaccessible to it. Another one is that the value of cmp is passed in a global variable to cmp_items(), making string_list_sort() non-reentrant. Remove the intermediate layer, i.e. cmp_items(), make the comparison functions compatible with qsort(3) and pass them pointers to full items. This allows comparisons to also take the util member into account, and avoids the need to pass the real comparison function to an intermediate function, removing the need for a global function. A downside is that comparison functions take void pointers now and each of them needs to cast its arguments to struct string_list_item pointers, weakening type safety and adding some repetitive code. Programmers are used to that, however, as that's par for the course with qsort(3). Also two unsightly casts are added that remove the const qualifiers of strings while building the structs to pass to the comparison function as search keys. It should be safe, though, as we only ever use them for reading. Signed-off-by: Rene Scharfe --- Alternative approach to the qsort_s()-based series "string-list: make string_list_sort() reentrant". Documentation/technical/api-string-list.txt | 6 +++-- builtin/mailsplit.c | 5 +++- mailmap.c | 5 ++-- merge-recursive.c | 4 ++- string-list.c | 39 +++-- string-list.h | 4 +-- tmp-objdir.c| 4 ++- 7 files changed, 39 insertions(+), 28 deletions(-) diff --git a/Documentation/technical/api-string-list.txt b/Documentation/technical/api-string-list.txt index c08402b12..39eac59c7 100644 --- a/Documentation/technical/api-string-list.txt +++ b/Documentation/technical/api-string-list.txt @@ -205,5 +205,7 @@ Represents the list itself. You should not tamper with it. . Setting the `strdup_strings` member to 1 will strdup() the strings before adding them, see above. -. The `compare_strings_fn` member is used to specify a custom compare - function, otherwise `strcmp()` is used as the default function. +. The `cmp` member is used to specify a custom compare function. It has + the same signature as the one for qsort(1) and is passed two pointers + to `struct string_list_item`. If it's NULL then the `string` members + are compared with `strcmp(1)`; this is the default. diff --git a/builtin/mailsplit.c b/builtin/mailsplit.c index 30681681c..4e72e3128 100644 --- a/builtin/mailsplit.c +++ b/builtin/mailsplit.c @@ -147,8 +147,11 @@ static int populate_maildir_list(struct string_list *list, const char *path) return ret; } -static int maildir_filename_cmp(const char *a, const char *b) +static int maildir_filename_cmp(const void *one, const void *two) { + const struct string_list_item *item_one = one, *item_two = two; + const char *a = item_one->string, *b = item_two->string; + while (*a && *b) { if (isdigit(*a) && isdigit(*b)) { long int na, nb; diff --git a/mailmap.c b/mailmap.c index c1a79c100..5290b5153 100644 --- a/mailmap.c +++ b/mailmap.c @@ -61,9 +61,10 @@ static void free_mailmap_entry(void *p, const char *s) * namemap.cmp until we know no systems that matter have such an * "unusual" string.h. */ -static int namemap_cmp(const char *a, const char *b) +static int namemap_cmp(const void *one, const void *two) { - return strcasecmp(a, b); + const struct string_list_item *item_one = one, *item_two = two; + return strcasecmp(item_one->string, item_two->string); } static void add_mapping(struct string_list *map, diff --git a/merge-recursive.c b/merge-recursive.c index d32720944..4683ba43f 100644 --- a/merge-recursive.c +++ b/merge-recursive.c @@ -390,8 +390,10 @@ static struct string_list *get_unmerged(void) return unmerged; } -static int string_list_df_name_compare(const char *one, const char *two) +static int string_list_df_name_compare(const void *a, const void *b) { + const struct string_list_item *item_a = a, *item_b = b; + const char *one = item_a->string, *two = item_b->string; int onelen = strlen(one); int twolen = strlen(two); /* diff --git a/string-list.c b/string-list.c index 8c83cac18..c583a04ee 100644 --- a/string-list.c +++ b/string-list.c @@ -7,17 +7,26 @@ void string_list_init(struct string_list *list, int strdup_strings) list->strdup_strings = strdup_strings; } +static int string_lis
Re: [PATCH] unpack-trees: move checkout state into check_updates
Am 29.12.2016 um 00:26 schrieb Stefan Beller: The checkout state was introduced via 16da134b1f9 (read-trees: refactor the unpack_trees() part, 2006-07-30). An attempt to refactor the checkout state was done in b56aa5b268e (unpack-trees: pass checkout state explicitly to check_updates(), 2016-09-13), but we can go even further. The `struct checkout state` is not used in unpack_trees apart from initializing it, so move it into the function that makes use of it, which is `check_updates`. Thanks for catching this, the result looks nicer. Signed-off-by: Stefan Beller --- unpack-trees.c | 17 - 1 file changed, 8 insertions(+), 9 deletions(-) diff --git a/unpack-trees.c b/unpack-trees.c index ea799d37c5..78703af135 100644 --- a/unpack-trees.c +++ b/unpack-trees.c @@ -224,14 +224,18 @@ static void unlink_entry(const struct cache_entry *ce) schedule_dir_for_removal(ce->name, ce_namelen(ce)); } -static int check_updates(struct unpack_trees_options *o, -const struct checkout *state) +static int check_updates(struct unpack_trees_options *o) { unsigned cnt = 0, total = 0; struct progress *progress = NULL; struct index_state *index = &o->result; int i; int errs = 0; + struct checkout state = CHECKOUT_INIT; + state.force = 1; + state.quiet = 1; + state.refresh_cache = 1; + state.istate = &o->result; And here (as well as in two more places in this function) we could use "index" instead of "&o->result". Not sure if it's worth a patch, though. if (o->update && o->verbose_update) { for (total = cnt = 0; cnt < index->cache_nr; cnt++) { @@ -270,7 +274,7 @@ static int check_updates(struct unpack_trees_options *o, display_progress(progress, ++cnt); ce->ce_flags &= ~CE_UPDATE; if (o->update && !o->dry_run) { - errs |= checkout_entry(ce, state, NULL); + errs |= checkout_entry(ce, &state, NULL); } } } @@ -1100,14 +1104,9 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options int i, ret; static struct cache_entry *dfc; struct exclude_list el; - struct checkout state = CHECKOUT_INIT; if (len > MAX_UNPACK_TREES) die("unpack_trees takes at most %d trees", MAX_UNPACK_TREES); - state.force = 1; - state.quiet = 1; - state.refresh_cache = 1; - state.istate = &o->result; memset(&el, 0, sizeof(el)); if (!core_apply_sparse_checkout || !o->update) @@ -1244,7 +1243,7 @@ int unpack_trees(unsigned len, struct tree_desc *t, struct unpack_trees_options } o->src_index = NULL; - ret = check_updates(o, &state) ? (-2) : 0; + ret = check_updates(o) ? (-2) : 0; if (o->dst_index) { if (!ret) { if (!o->result.cache_tree)
Re: [PATCH] string-list: make compare function compatible with qsort(3)
Am 21.12.2016 um 17:12 schrieb Jeff King: On Wed, Dec 21, 2016 at 10:36:41AM +0100, René Scharfe wrote: One shortcoming is that the comparison function is restricted to working with the string members of items; util is inaccessible to it. Another one is that the value of cmp is passed in a global variable to cmp_items(), making string_list_sort() non-reentrant. I think this approach is OK for string_list, but it doesn't help the general case that wants qsort_s() to actually access global data. I don't know how common that is in our codebase, though. So I'm fine with it, but I think we might eventually need to revisit the qsort_s() thing anyway. I have to admit I didn't even consider the possibility that the pattern of accessing global variables in qsort(1) compare functions could have spread. And indeed, at least ref-filter.c::compare_refs() and worktree.c::compare_worktree() so that as well. The latter uses fspathcmp(), which is OK as ignore_case is only set once when reading the config, but the first one looks, well, interesting. Perhaps a single ref filter per program is enough? Anyway, that's a good enough argument for me for adding that newfangled C11 function after all.. Btw.: Found with git grep 'QSORT.*;$' | sed 's/.* /int /; s/);//' | sort -u | git grep -Ww -f- and git grep -A1 'QSORT.*,$' Remove the intermediate layer, i.e. cmp_items(), make the comparison functions compatible with qsort(3) and pass them pointers to full items. This allows comparisons to also take the util member into account, and avoids the need to pass the real comparison function to an intermediate function, removing the need for a global function. I'm not sure if access to the util field is really of any value, after looking at it in: http://public-inbox.org/git/20161125171546.fa3zpapbjngjc...@sigill.intra.peff.net/ Though note that if we do take this patch, there are probably one or two spots that could switch from QSORT() to string_list_sort(). Yes, but as you noted in that thread there is not much point in doing that; the only net win is that we can pass a list as a single pointer instead of as base pointer and element count -- the special compare function needs to be specified anyway (once), somehow. René
Re: [PATCH] submodule.c: use GIT_DIR_ENVIRONMENT consistently
Am 30.12.2016 um 01:47 schrieb Stefan Beller: diff --git a/submodule.c b/submodule.c index ece17315d6..973b9f3f96 100644 --- a/submodule.c +++ b/submodule.c @@ -1333,5 +1333,6 @@ void prepare_submodule_repo_env(struct argv_array *out) if (strcmp(*var, CONFIG_DATA_ENVIRONMENT)) argv_array_push(out, *var); } - argv_array_push(out, "GIT_DIR=.git"); + argv_array_push(out, "%s=%s", GIT_DIR_ENVIRONMENT, + DEFAULT_GIT_DIR_ENVIRONMENT); argv_array_pushf (with added "f") instead? And indent continued lines to align them with the left parenthesis, like this: fn(arg1, arg2); }
Re: [PATCH] archive-zip: load userdiff config
Am 02.01.2017 um 23:25 schrieb Jeff King: Since 4aff646d17 (archive-zip: mark text files in archives, 2015-03-05), the zip archiver will look at the userdiff driver to decide whether a file is text or binary. This usually doesn't need to look any further than the attributes themselves (e.g., "-diff", etc). But if the user defines a custom driver like "diff=foo", we need to look at "diff.foo.binary" in the config. Prior to this patch, we didn't actually load it. Ah, didn't think of that, obviously. Would it make sense for userdiff_find_by_path() to die if userdiff_config() hasn't been called, yet? I also happened to notice that zipfiles are created using the local timezone (because they have no notion of the timezone, so we have to pick _something_). That's probably the least-terrible option, but it was certainly surprising to me when I tried to bit-for-bit reproduce a zipfile from GitHub on my local machine. That reminds me of an old request to allow users better control over the meta-data written into archives. Being able to specify a time zone offset could be a start. archive-zip.c | 7 +++ t/t5003-archive-zip.sh | 22 ++ 2 files changed, 25 insertions(+), 4 deletions(-) diff --git a/archive-zip.c b/archive-zip.c index 9db47357b0..b429a8d974 100644 --- a/archive-zip.c +++ b/archive-zip.c @@ -554,11 +554,18 @@ static void dos_time(time_t *time, int *dos_date, int *dos_time) *dos_time = t->tm_sec / 2 + t->tm_min * 32 + t->tm_hour * 2048; } +static int archive_zip_config(const char *var, const char *value, void *data) +{ + return userdiff_config(var, value); +} + static int write_zip_archive(const struct archiver *ar, struct archiver_args *args) { int err; + git_config(archive_zip_config, NULL); + I briefly thought about moving this call to archive.c with the rest of the config-related stuff, but I agree it's better kept here. dos_time(&args->time, &zip_date, &zip_time); zip_dir = xmalloc(ZIP_DIRECTORY_MIN_SIZE); diff --git a/t/t5003-archive-zip.sh b/t/t5003-archive-zip.sh index 14744b2a4b..55c7870997 100755 --- a/t/t5003-archive-zip.sh +++ b/t/t5003-archive-zip.sh @@ -64,6 +64,12 @@ check_zip() { test_cmp_bin $original/nodiff.crlf $extracted/nodiff.crlf && test_cmp_bin $original/nodiff.lf $extracted/nodiff.lf " + + test_expect_success UNZIP " validate that custom diff is unchanged " " + test_cmp_bin $original/custom.cr $extracted/custom.cr && + test_cmp_bin $original/custom.crlf $extracted/custom.crlf && + test_cmp_bin $original/custom.lf $extracted/custom.lf + " } test_expect_success \ @@ -78,6 +84,9 @@ test_expect_success \ printf "text\r" >a/nodiff.cr && printf "text\r\n" >a/nodiff.crlf && printf "text\n" >a/nodiff.lf && + printf "text\r" >a/custom.cr && + printf "text\r\n" >a/custom.crlf && + printf "text\n" >a/custom.lf && printf "\0\r" >a/binary.cr && printf "\0\r\n" >a/binary.crlf && printf "\0\n" >a/binary.lf && @@ -112,15 +121,20 @@ test_expect_success 'add files to repository' ' test_expect_success 'setup export-subst and diff attributes' ' echo "a/nodiff.* -diff" >>.git/info/attributes && echo "a/diff.* diff" >>.git/info/attributes && + echo "a/custom.* diff=custom" >>.git/info/attributes && + git config diff.custom.binary true && echo "substfile?" export-subst >>.git/info/attributes && git log --max-count=1 "--pretty=format:A${SUBSTFORMAT}O" HEAD \ >a/substfile1 ' -test_expect_success \ -'create bare clone' \ -'git clone --bare . bare.git && - cp .git/info/attributes bare.git/info/attributes' +test_expect_success 'create bare clone' ' + git clone --bare . bare.git && + cp .git/info/attributes bare.git/info/attributes && + # Recreate our changes to .git/config rather than just copying it, as + # we do not want to clobber core.bare or other settings. + git -C bare.git config diff.custom.binary true +' test_expect_success \ 'remove ignored file' \ Looks good, thanks! René
Re: [PATCH 1/3] xdiff: -W: relax end-of-file function detection
Am 13.01.2017 um 17:15 schrieb Vegard Nossum: When adding a new function to the end of a file, it's enough to know that 1) the addition is at the end of the file; and 2) there is a function _somewhere_ in there. If we had simply been changing the end of an existing function, then we would also be deleting something from the old version. That makes sense, thanks. This fixes the case where we add e.g. // Begin of dummy static int dummy(void) { } to the end of the file. Without this patch the unchanged function before the added lines is shown in its entirety as (uncalled for) context. Cc: René Scharfe Signed-off-by: Vegard Nossum --- xdiff/xemit.c | 14 ++ 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/xdiff/xemit.c b/xdiff/xemit.c index 7389ce4..8c88dbd 100644 --- a/xdiff/xemit.c +++ b/xdiff/xemit.c @@ -183,16 +183,14 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, /* * We don't need additional context if -* a whole function was added, possibly -* starting with empty lines. +* a whole function was added. */ - while (i2 < xe->xdf2.nrec && - is_empty_rec(&xe->xdf2, i2)) + while (i2 < xe->xdf2.nrec) { + if (match_func_rec(&xe->xdf2, xecfg, i2, + dummy, sizeof(dummy)) >= 0) Nit: I don't like the indentation here. Giving "dummy" its own line is also not exactly pretty, but at least would allow the parameters to be aligned on the opening parenthesis. + goto post_context_calculation; i2++; - if (i2 < xe->xdf2.nrec && - match_func_rec(&xe->xdf2, xecfg, i2, - dummy, sizeof(dummy)) >= 0) - goto post_context_calculation; + } /* * Otherwise get more context from the
Re: [PATCH 2/3] xdiff: -W: include immediately preceding non-empty lines in context
Am 13.01.2017 um 17:15 schrieb Vegard Nossum: When using -W to include the whole function in the diff context, you are typically doing this to be able to review the change in its entirety within the context of the function. It is therefore almost always desirable to include any comments that immediately precede the function. This also the fixes the case for C where the declaration is split across multiple lines (where the first line of the declaration would not be included in the output), e.g.: void dummy(void) { ... } That's true, but I'm not sure "non-empty line before function line" is good enough a definition for desirable lines. It wouldn't work for people who don't believe in empty lines. Or for those that put a blank line between comment and function. (I have an opinion on such habits, but git diff should probably stay neutral.) And that's just for C code; I have no idea how this heuristic would hold up for other file types like HTML. We can identify function lines with arbitrary precision (with a xfuncname regex, if needed), but there is no accurate way to classify lines as comments, or as the end of functions. Adding optional regexes for single- and multi-line comments would help, at least for C. René
Re: [PATCH 2/3] xdiff: -W: include immediately preceding non-empty lines in context
Am 14.01.2017 um 00:56 schrieb Junio C Hamano: Vegard Nossum writes: The patch will work as intended and as expected for 95% of the users out there (javadoc, Doxygen, kerneldoc, etc. all have the comment immediately preceding the function) and fixes a very real problem for me (and I expect many others) _today_; for the remaining 5% (who put a blank line between their comment and the start of the function) it will revert back to the current behaviour, so there should be no regression for them. I notice your 95% are all programming languages, but I am more worried about the contents written in non programming languages (René gave HTML an an example--there may be other types of contents that we programmer types do not deal with every day, but Git users depend on). I am also more focused on keeping the codebase maintainable in good health by making sure that we made an effort to find a solution that is general-enough before solving a single specific problem you have today. We may end up deciding that a blank-line heuristics gives us good enough tradeoff, but I do not want us to make a decision before thinking. How about extending the context upward only up to and excluding a line that is either empty *or* a function line? That would limit the extra context to a single function in the worst case. Reducing context at the bottom with the aim to remove comments for the next section is more tricky as it could remove part of the function that we'd like to show if we get the boundary wrong. How bad would it be to keep the southern border unchanged? René
Re: [PATCHv3 2/2] builtin/commit.c: switch to xstrfmt(), instead of snprintf,
Am 13.01.2017 um 18:58 schrieb Elia Pinto: > In this patch, instead of using xnprintf instead of snprintf, which asserts > that we don't truncate, we are switching to dynamic allocation with > xstrfmt(), > , so we can avoid dealing with magic numbers in the code and reduce the > cognitive burden from > the programmers, because they no longer have to count bytes needed for static > allocation. > As a side effect of this patch we have also reduced the snprintf() calls, > that may silently truncate > results if the programmer is not careful. > > Helped-by: Junio C Hamano > Helped-by: Jeff King > Signed-off-by: Elia Pinto > --- > This is the third version of the patch. > > Changes from the first version: I have split the original commit in two, as > discussed here > http://public-inbox.org/git/20161213132717.42965-1-gitter.spi...@gmail.com/. > > Changes from the second version: > - Changed the commit message to clarify the purpose of the patch ( > as suggested by Junio) > https://public-inbox.org/git/xmqqtw95mfo3@gitster.mtv.corp.google.com/T/#m2e6405a8a78a8ca1ed770614c91398290574c4a1 > > > > builtin/commit.c | 10 -- > 1 file changed, 4 insertions(+), 6 deletions(-) > > diff --git a/builtin/commit.c b/builtin/commit.c > index 09bcc0f13..37228330c 100644 > --- a/builtin/commit.c > +++ b/builtin/commit.c > @@ -1526,12 +1526,10 @@ static int git_commit_config(const char *k, const > char *v, void *cb) > static int run_rewrite_hook(const unsigned char *oldsha1, > const unsigned char *newsha1) > { > - /* oldsha1 SP newsha1 LF NUL */ > - static char buf[2*40 + 3]; > + char *buf; > struct child_process proc = CHILD_PROCESS_INIT; > const char *argv[3]; > int code; > - size_t n; > > argv[0] = find_hook("post-rewrite"); > if (!argv[0]) > @@ -1547,11 +1545,11 @@ static int run_rewrite_hook(const unsigned char > *oldsha1, > code = start_command(&proc); > if (code) > return code; > - n = snprintf(buf, sizeof(buf), "%s %s\n", > - sha1_to_hex(oldsha1), sha1_to_hex(newsha1)); > + buf = xstrfmt("%s %s\n", sha1_to_hex(oldsha1), sha1_to_hex(newsha1)); > sigchain_push(SIGPIPE, SIG_IGN); > - write_in_full(proc.in, buf, n); > + write_in_full(proc.in, buf, strlen(buf)); > close(proc.in); > + free(buf); > sigchain_pop(SIGPIPE); > return finish_command(&proc); > } > Perhaps I missed it from the discussion, but why not use strbuf? It would avoid counting the generated string's length. That's probably not going to make a measurable difference performance-wise, but it's easy to avoid and doesn't even take up more lines: --- builtin/commit.c | 10 -- 1 file changed, 4 insertions(+), 6 deletions(-) diff --git a/builtin/commit.c b/builtin/commit.c index 711f96cc43..73bb72016f 100644 --- a/builtin/commit.c +++ b/builtin/commit.c @@ -1525,12 +1525,10 @@ static int git_commit_config(const char *k, const char *v, void *cb) static int run_rewrite_hook(const unsigned char *oldsha1, const unsigned char *newsha1) { - /* oldsha1 SP newsha1 LF NUL */ - static char buf[2*40 + 3]; + struct strbuf sb = STRBUF_INIT; struct child_process proc = CHILD_PROCESS_INIT; const char *argv[3]; int code; - size_t n; argv[0] = find_hook("post-rewrite"); if (!argv[0]) @@ -1546,11 +1544,11 @@ static int run_rewrite_hook(const unsigned char *oldsha1, code = start_command(&proc); if (code) return code; - n = snprintf(buf, sizeof(buf), "%s %s\n", -sha1_to_hex(oldsha1), sha1_to_hex(newsha1)); + strbuf_addf(&sb, "%s %s\n", sha1_to_hex(oldsha1), sha1_to_hex(newsha1)); sigchain_push(SIGPIPE, SIG_IGN); - write_in_full(proc.in, buf, n); + write_in_full(proc.in, sb.buf, sb.len); close(proc.in); + strbuf_release(&sb); sigchain_pop(SIGPIPE); return finish_command(&proc); } -- 2.11.0
Re: [PATCH 2/3] xdiff: -W: include immediately preceding non-empty lines in context
Am 15.01.2017 um 03:39 schrieb Junio C Hamano: > René Scharfe writes: > >>> I am also more focused on keeping the codebase maintainable in good >>> health by making sure that we made an effort to find a solution that >>> is general-enough before solving a single specific problem you have >>> today. We may end up deciding that a blank-line heuristics gives us >>> good enough tradeoff, but I do not want us to make a decision before >>> thinking. >> >> How about extending the context upward only up to and excluding a line >> that is either empty *or* a function line? That would limit the extra >> context to a single function in the worst case. >> >> Reducing context at the bottom with the aim to remove comments for the >> next section is more tricky as it could remove part of the function >> that we'd like to show if we get the boundary wrong. How bad would it >> be to keep the southern border unchanged? > > I personally do not think there is any robust heuristic other than > Vegard's "a blank line may be a signal enough that lines before that > are not part of the beginning of the function", and I think your > "hence we look for a blank line but if there is a line that matches > the function header, stop there as we know we came too far back" > will be a good-enough safety measure. > > I also agree with you that we probably do not want to futz with the > southern border. A replacement patch for 2/3 with these changes would look like this: diff --git a/xdiff/xemit.c b/xdiff/xemit.c index 8c88dbde38..9ed54cd318 100644 --- a/xdiff/xemit.c +++ b/xdiff/xemit.c @@ -174,11 +174,11 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, s2 = XDL_MAX(xch->i2 - xecfg->ctxlen, 0); if (xecfg->flags & XDL_EMIT_FUNCCONTEXT) { + char dummy[1]; long fs1, i1 = xch->i1; /* Appended chunk? */ if (i1 >= xe->xdf1.nrec) { - char dummy[1]; long i2 = xch->i2; /* @@ -200,6 +200,10 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, xdemitcb_t *ecb, } fs1 = get_func_line(xe, xecfg, NULL, i1, -1); + while (fs1 > 0 && !is_empty_rec(&xe->xdf1, fs1 - 1) && + match_func_rec(&xe->xdf1, xecfg, fs1 - 1, + dummy, sizeof(dummy)) < 0) + fs1--; if (fs1 < 0) fs1 = 0; if (fs1 < s1) {
Re: [PATCH 2/3] xdiff: -W: include immediately preceding non-empty lines in context
Am 15.01.2017 um 11:06 schrieb Vegard Nossum: On 15/01/2017 03:39, Junio C Hamano wrote: René Scharfe writes: How about extending the context upward only up to and excluding a line that is either empty *or* a function line? That would limit the extra context to a single function in the worst case. Reducing context at the bottom with the aim to remove comments for the next section is more tricky as it could remove part of the function that we'd like to show if we get the boundary wrong. How bad would it be to keep the southern border unchanged? I personally do not think there is any robust heuristic other than Vegard's "a blank line may be a signal enough that lines before that are not part of the beginning of the function", and I think your "hence we look for a blank line but if there is a line that matches the function header, stop there as we know we came too far back" will be a good-enough safety measure. I also agree with you that we probably do not want to futz with the southern border. You are right, trying to change the southern border in this way is not quite reliable if there are no empty lines whatsoever and can erroneously cause the function context to not include the bottom of the function being changed. I'm splitting the function boundary detection logic into separate functions and trying to solve the above case without breaking the tests (and adding a new test for the above case too). I'll see if I can additionally provide some toggles (flags or config variables) to control the new behaviour, what I had in mind was: -W[=preamble,=no-preamble] --function-context[=preamble,=no-preamble] diff.functionContextPreamble = (where the new logic is controlled by the new config variable and overridden by the presence of =preamble or =no-preamble). Adding comments before a function is useful, removing comments after a function sounds to me as only nice to have (under the assumption that they belong to the next function[*]). How bad would it be to only implement the first part (as in the patch I just sent) without adding new config settings or parameters? Thanks, René [*] Silly counter-example (the #endif line): #ifdef SOMETHING int f(...) { // implementation for SOMETHING } #else inf f(...) { // implementation without SOMETHING } #endif /* SOMETHING */
[PATCH v2 0/5] string-list: make string_list_sort() reentrant
Use qsort_s() from C11 Annex K to make string_list_sort() safer, in particular when called from parallel threads. Changes from v1: * Renamed HAVE_QSORT_S to HAVE_ISO_QSORT_S in Makefile to disambiguate. * Added basic perf test (patch 3). * Converted a second caller to QSORT_S, in ref-filter.c (patch 5). compat: add qsort_s() add QSORT_S perf: add basic sort performance test string-list: use QSORT_S in string_list_sort() ref-filter: use QSORT_S in ref_array_sort() Makefile| 8 ++ compat/qsort_s.c| 69 + git-compat-util.h | 11 ref-filter.c| 6 ++-- string-list.c | 13 - t/helper/test-string-list.c | 25 t/perf/p0071-sort.sh| 26 + 7 files changed, 146 insertions(+), 12 deletions(-) create mode 100644 compat/qsort_s.c create mode 100755 t/perf/p0071-sort.sh -- 2.11.0
[PATCH v2 1/5] compat: add qsort_s()
The function qsort_s() was introduced with C11 Annex K; it provides the ability to pass a context pointer to the comparison function, supports the convention of using a NULL pointer for an empty array and performs a few safety checks. Add an implementation based on compat/qsort.c for platforms that lack a native standards-compliant qsort_s() (i.e. basically everyone). It doesn't perform the full range of possible checks: It uses size_t instead of rsize_t and doesn't check nmemb and size against RSIZE_MAX because we probably don't have the restricted size type defined. For the same reason it returns int instead of errno_t. Signed-off-by: Rene Scharfe --- Makefile | 8 +++ compat/qsort_s.c | 69 +++ git-compat-util.h | 6 + 3 files changed, 83 insertions(+) create mode 100644 compat/qsort_s.c diff --git a/Makefile b/Makefile index 27afd0f378..53ecc84e28 100644 --- a/Makefile +++ b/Makefile @@ -279,6 +279,9 @@ all:: # is a simplified version of the merge sort used in glibc. This is # recommended if Git triggers O(n^2) behavior in your platform's qsort(). # +# Define HAVE_ISO_QSORT_S if your platform provides a qsort_s() that's +# compatible with the one described in C11 Annex K. +# # Define UNRELIABLE_FSTAT if your system's fstat does not return the same # information on a not yet closed file that lstat would return for the same # file after it was closed. @@ -1418,6 +1421,11 @@ ifdef INTERNAL_QSORT COMPAT_CFLAGS += -DINTERNAL_QSORT COMPAT_OBJS += compat/qsort.o endif +ifdef HAVE_ISO_QSORT_S + COMPAT_CFLAGS += -DHAVE_ISO_QSORT_S +else + COMPAT_OBJS += compat/qsort_s.o +endif ifdef RUNTIME_PREFIX COMPAT_CFLAGS += -DRUNTIME_PREFIX endif diff --git a/compat/qsort_s.c b/compat/qsort_s.c new file mode 100644 index 00..52d1f0a73d --- /dev/null +++ b/compat/qsort_s.c @@ -0,0 +1,69 @@ +#include "../git-compat-util.h" + +/* + * A merge sort implementation, simplified from the qsort implementation + * by Mike Haertel, which is a part of the GNU C Library. + * Added context pointer, safety checks and return value. + */ + +static void msort_with_tmp(void *b, size_t n, size_t s, + int (*cmp)(const void *, const void *, void *), + char *t, void *ctx) +{ + char *tmp; + char *b1, *b2; + size_t n1, n2; + + if (n <= 1) + return; + + n1 = n / 2; + n2 = n - n1; + b1 = b; + b2 = (char *)b + (n1 * s); + + msort_with_tmp(b1, n1, s, cmp, t, ctx); + msort_with_tmp(b2, n2, s, cmp, t, ctx); + + tmp = t; + + while (n1 > 0 && n2 > 0) { + if (cmp(b1, b2, ctx) <= 0) { + memcpy(tmp, b1, s); + tmp += s; + b1 += s; + --n1; + } else { + memcpy(tmp, b2, s); + tmp += s; + b2 += s; + --n2; + } + } + if (n1 > 0) + memcpy(tmp, b1, n1 * s); + memcpy(b, t, (n - n2) * s); +} + +int git_qsort_s(void *b, size_t n, size_t s, + int (*cmp)(const void *, const void *, void *), void *ctx) +{ + const size_t size = st_mult(n, s); + char buf[1024]; + + if (!n) + return 0; + if (!b || !cmp) + return -1; + + if (size < sizeof(buf)) { + /* The temporary array fits on the small on-stack buffer. */ + msort_with_tmp(b, n, s, cmp, buf, ctx); + } else { + /* It's somewhat large, so malloc it. */ + char *tmp = xmalloc(size); + msort_with_tmp(b, n, s, cmp, tmp, ctx); + free(tmp); + } + return 0; +} diff --git a/git-compat-util.h b/git-compat-util.h index 87237b092b..f706744e6a 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -988,6 +988,12 @@ static inline void sane_qsort(void *base, size_t nmemb, size_t size, qsort(base, nmemb, size, compar); } +#ifndef HAVE_ISO_QSORT_S +int git_qsort_s(void *base, size_t nmemb, size_t size, + int (*compar)(const void *, const void *, void *), void *ctx); +#define qsort_s git_qsort_s +#endif + #ifndef REG_STARTEND #error "Git requires REG_STARTEND support. Compile with NO_REGEX=NeedsStartEnd" #endif -- 2.11.0
[PATCH v2 3/5] perf: add basic sort performance test
Add a sort command to test-string-list that reads lines from stdin, stores them in a string_list and then sorts it. Use it in a simple perf test script to measure the performance of string_list_sort(). Signed-off-by: Rene Scharfe --- t/helper/test-string-list.c | 25 + t/perf/p0071-sort.sh| 26 ++ 2 files changed, 51 insertions(+) create mode 100755 t/perf/p0071-sort.sh diff --git a/t/helper/test-string-list.c b/t/helper/test-string-list.c index 4a68967bd1..c502fa16d3 100644 --- a/t/helper/test-string-list.c +++ b/t/helper/test-string-list.c @@ -97,6 +97,31 @@ int cmd_main(int argc, const char **argv) return 0; } + if (argc == 2 && !strcmp(argv[1], "sort")) { + struct string_list list = STRING_LIST_INIT_NODUP; + struct strbuf sb = STRBUF_INIT; + struct string_list_item *item; + + strbuf_read(&sb, 0, 0); + + /* +* Split by newline, but don't create a string_list item +* for the empty string after the last separator. +*/ + if (sb.buf[sb.len - 1] == '\n') + strbuf_setlen(&sb, sb.len - 1); + string_list_split_in_place(&list, sb.buf, '\n', -1); + + string_list_sort(&list); + + for_each_string_list_item(item, &list) + puts(item->string); + + string_list_clear(&list, 0); + strbuf_release(&sb); + return 0; + } + fprintf(stderr, "%s: unknown function name: %s\n", argv[0], argv[1] ? argv[1] : "(there was none)"); return 1; diff --git a/t/perf/p0071-sort.sh b/t/perf/p0071-sort.sh new file mode 100755 index 00..7c9a35a646 --- /dev/null +++ b/t/perf/p0071-sort.sh @@ -0,0 +1,26 @@ +#!/bin/sh + +test_description='Basic sort performance tests' +. ./perf-lib.sh + +test_perf_default_repo + +test_expect_success 'setup' ' + git ls-files --stage "*.[ch]" "*.sh" | + cut -f2 -d" " | + git cat-file --batch >unsorted +' + +test_perf 'sort(1)' ' + sort expect +' + +test_perf 'string_list_sort()' ' + test-string-list sort actual +' + +test_expect_success 'string_list_sort() sorts like sort(1)' ' + test_cmp_bin expect actual +' + +test_done -- 2.11.0
[PATCH v2 4/5] string-list: use QSORT_S in string_list_sort()
Pass the comparison function to cmp_items() via the context parameter of qsort_s() instead of using a global variable. That allows calling string_list_sort() from multiple parallel threads. Our qsort_s() in compat/ is slightly slower than qsort(1) from glibc 2.24 for sorting lots of lines: Test HEAD^ HEAD - 0071.2: sort(1) 0.10(0.22+0.01) 0.09(0.21+0.00) -10.0% 0071.3: string_list_sort() 0.16(0.15+0.01) 0.17(0.15+0.00) +6.3% GNU sort(1) version 8.26 is significantly faster because it uses multiple parallel threads; with the unportable option --parallel=1 it becomes slower: Test HEAD^ HEAD 0071.2: sort(1) 0.21(0.18+0.01) 0.20(0.18+0.01) -4.8% 0071.3: string_list_sort() 0.16(0.13+0.02) 0.17(0.15+0.01) +6.3% There is some instability -- the numbers for the sort(1) check shouldn't be affected by this patch. Anyway, the performance of our qsort_s() implementation is apparently good enough, at least for this test. Signed-off-by: Rene Scharfe --- string-list.c | 13 + 1 file changed, 5 insertions(+), 8 deletions(-) diff --git a/string-list.c b/string-list.c index 8c83cac189..45016ad86d 100644 --- a/string-list.c +++ b/string-list.c @@ -211,21 +211,18 @@ struct string_list_item *string_list_append(struct string_list *list, list->strdup_strings ? xstrdup(string) : (char *)string); } -/* Yuck */ -static compare_strings_fn compare_for_qsort; - -/* Only call this from inside string_list_sort! */ -static int cmp_items(const void *a, const void *b) +static int cmp_items(const void *a, const void *b, void *ctx) { + compare_strings_fn cmp = ctx; const struct string_list_item *one = a; const struct string_list_item *two = b; - return compare_for_qsort(one->string, two->string); + return cmp(one->string, two->string); } void string_list_sort(struct string_list *list) { - compare_for_qsort = list->cmp ? list->cmp : strcmp; - QSORT(list->items, list->nr, cmp_items); + QSORT_S(list->items, list->nr, cmp_items, + list->cmp ? list->cmp : strcmp); } struct string_list_item *unsorted_string_list_lookup(struct string_list *list, -- 2.11.0
[PATCH v2 5/5] ref-filter: use QSORT_S in ref_array_sort()
Pass the array of sort keys to compare_refs() via the context parameter of qsort_s() instead of using a global variable; that's cleaner and simpler. If ref_array_sort() is to be called from multiple parallel threads then care still needs to be taken that the global variable used_atom is not modified concurrently. Signed-off-by: Rene Scharfe --- ref-filter.c | 6 ++ 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/ref-filter.c b/ref-filter.c index 1a978405e6..3975022c88 100644 --- a/ref-filter.c +++ b/ref-filter.c @@ -1589,8 +1589,7 @@ static int cmp_ref_sorting(struct ref_sorting *s, struct ref_array_item *a, stru return (s->reverse) ? -cmp : cmp; } -static struct ref_sorting *ref_sorting; -static int compare_refs(const void *a_, const void *b_) +static int compare_refs(const void *a_, const void *b_, void *ref_sorting) { struct ref_array_item *a = *((struct ref_array_item **)a_); struct ref_array_item *b = *((struct ref_array_item **)b_); @@ -1606,8 +1605,7 @@ static int compare_refs(const void *a_, const void *b_) void ref_array_sort(struct ref_sorting *sorting, struct ref_array *array) { - ref_sorting = sorting; - QSORT(array->items, array->nr, compare_refs); + QSORT_S(array->items, array->nr, compare_refs, sorting); } static void append_literal(const char *cp, const char *ep, struct ref_formatting_state *state) -- 2.11.0
[PATCH v2 2/5] add QSORT_S
Add the macro QSORT_S, a convenient wrapper for qsort_s() that infers the size of the array elements and dies on error. Basically all possible errors are programming mistakes (passing NULL as base of a non-empty array, passing NULL as comparison function, out-of-bounds accesses), so terminating the program should be acceptable for most callers. Signed-off-by: Rene Scharfe --- git-compat-util.h | 5 + 1 file changed, 5 insertions(+) diff --git a/git-compat-util.h b/git-compat-util.h index f706744e6a..f46d40e4a4 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -994,6 +994,11 @@ int git_qsort_s(void *base, size_t nmemb, size_t size, #define qsort_s git_qsort_s #endif +#define QSORT_S(base, n, compar, ctx) do { \ + if (qsort_s((base), (n), sizeof(*(base)), compar, ctx)) \ + die("BUG: qsort_s() failed"); \ +} while (0) + #ifndef REG_STARTEND #error "Git requires REG_STARTEND support. Compile with NO_REGEX=NeedsStartEnd" #endif -- 2.11.0
[DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1)
Implement qsort_s() as a wrapper to the GNU version of qsort_r(1) and use it on Linux. Performance increases slightly: Test HEAD^ HEAD 0071.2: sort(1) 0.10(0.20+0.02) 0.10(0.21+0.01) +0.0% 0071.3: string_list_sort() 0.17(0.15+0.01) 0.16(0.15+0.01) -5.9% Additionally the unstripped size of compat/qsort_s.o falls from 24576 to 16544 bytes in my build. IMHO these savings aren't worth the increased complexity of having to support two implementations. --- Makefile | 6 ++ compat/qsort_s.c | 18 ++ config.mak.uname | 1 + 3 files changed, 25 insertions(+) diff --git a/Makefile b/Makefile index 53ecc84e28..46db1c773f 100644 --- a/Makefile +++ b/Makefile @@ -282,6 +282,9 @@ all:: # Define HAVE_ISO_QSORT_S if your platform provides a qsort_s() that's # compatible with the one described in C11 Annex K. # +# Define HAVE_GNU_QSORT_R if your platform provides a qsort_r() that's +# compatible with the one introduced with glibc 2.8. +# # Define UNRELIABLE_FSTAT if your system's fstat does not return the same # information on a not yet closed file that lstat would return for the same # file after it was closed. @@ -1426,6 +1429,9 @@ ifdef HAVE_ISO_QSORT_S else COMPAT_OBJS += compat/qsort_s.o endif +ifdef HAVE_GNU_QSORT_R + COMPAT_CFLAGS += -DHAVE_GNU_QSORT_R +endif ifdef RUNTIME_PREFIX COMPAT_CFLAGS += -DRUNTIME_PREFIX endif diff --git a/compat/qsort_s.c b/compat/qsort_s.c index 52d1f0a73d..763ee1faae 100644 --- a/compat/qsort_s.c +++ b/compat/qsort_s.c @@ -1,5 +1,21 @@ #include "../git-compat-util.h" +#if defined HAVE_GNU_QSORT_R + +int git_qsort_s(void *b, size_t n, size_t s, + int (*cmp)(const void *, const void *, void *), void *ctx) +{ + if (!n) + return 0; + if (!b || !cmp) + return -1; + + qsort_r(b, n, s, cmp, ctx); + return 0; +} + +#else + /* * A merge sort implementation, simplified from the qsort implementation * by Mike Haertel, which is a part of the GNU C Library. @@ -67,3 +83,5 @@ int git_qsort_s(void *b, size_t n, size_t s, } return 0; } + +#endif diff --git a/config.mak.uname b/config.mak.uname index 447f36ac2e..a1858f54ff 100644 --- a/config.mak.uname +++ b/config.mak.uname @@ -37,6 +37,7 @@ ifeq ($(uname_S),Linux) NEEDS_LIBRT = YesPlease HAVE_GETDELIM = YesPlease SANE_TEXT_GREP=-a + HAVE_GNU_QSORT_R = YesPlease endif ifeq ($(uname_S),GNU/kFreeBSD) HAVE_ALLOCA_H = YesPlease -- 2.11.0
Re: [DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1)
Am 23.01.2017 um 20:07 schrieb Junio C Hamano: > René Scharfe writes: > >> Implement qsort_s() as a wrapper to the GNU version of qsort_r(1) and >> use it on Linux. Performance increases slightly: >> >> Test HEAD^ HEAD >> >> 0071.2: sort(1) 0.10(0.20+0.02) 0.10(0.21+0.01) +0.0% >> 0071.3: string_list_sort() 0.17(0.15+0.01) 0.16(0.15+0.01) -5.9% >> >> Additionally the unstripped size of compat/qsort_s.o falls from 24576 >> to 16544 bytes in my build. >> >> IMHO these savings aren't worth the increased complexity of having to >> support two implementations. > > I do worry about having to support more implementations in the > future that have different function signature for the comparison > callbacks, which will make things ugly, but this addition alone > doesn't look too bad to me. It is unfair of me to show a 5% speedup and then recommend to not include it. ;-) That difference won't be measurable in real use cases and the patch is not necessary. This patch is simple, but the overall complexity (incl. #ifdefs etc.) will be lower without it. But here's another one, with even higher performance and with an even bigger recommendation to not include it! :) It veers off into another direction: Parallel execution. It requires thread-safe comparison functions, which might surprise callers. The value 1000 for the minimum number of items before threading kicks in is just a guess, not based on measurements. So it's quite raw -- and I'm not sure why it's still a bit slower than sort(1). Test HEAD^ HEAD - 0071.2: sort(1) 0.10(0.18+0.03) 0.10(0.20+0.02) +0.0% 0071.3: string_list_sort() 0.17(0.14+0.02) 0.11(0.18+0.02) -35.3% --- compat/qsort_s.c | 76 ++-- 1 file changed, 58 insertions(+), 18 deletions(-) diff --git a/compat/qsort_s.c b/compat/qsort_s.c index 52d1f0a73d..1304a089af 100644 --- a/compat/qsort_s.c +++ b/compat/qsort_s.c @@ -1,4 +1,5 @@ #include "../git-compat-util.h" +#include "../thread-utils.h" /* * A merge sort implementation, simplified from the qsort implementation @@ -6,29 +7,58 @@ * Added context pointer, safety checks and return value. */ -static void msort_with_tmp(void *b, size_t n, size_t s, - int (*cmp)(const void *, const void *, void *), - char *t, void *ctx) +#define MIN_ITEMS_FOR_THREAD 1000 + +struct work { + int nr_threads; + void *base; + size_t nmemb; + size_t size; + char *tmp; + int (*cmp)(const void *, const void *, void *); + void *ctx; +}; + +static void *msort_with_tmp(void *work) { + struct work one, two, *w = work; char *tmp; char *b1, *b2; size_t n1, n2; + size_t s, n; - if (n <= 1) - return; + if (w->nmemb <= 1) + return NULL; - n1 = n / 2; - n2 = n - n1; - b1 = b; - b2 = (char *)b + (n1 * s); + one = two = *w; + one.nr_threads /= 2; + two.nr_threads -= one.nr_threads; + n = one.nmemb; + s = one.size; + n1 = one.nmemb = n / 2; + n2 = two.nmemb = n - n1; + b1 = one.base; + b2 = two.base = b1 + n1 * s; + two.tmp += n1 * s; - msort_with_tmp(b1, n1, s, cmp, t, ctx); - msort_with_tmp(b2, n2, s, cmp, t, ctx); +#ifndef NO_PTHREADS + if (one.nr_threads && n > MIN_ITEMS_FOR_THREAD) { + pthread_t thread; + int err = pthread_create(&thread, NULL, msort_with_tmp, &one); + msort_with_tmp(&two); + if (err || pthread_join(thread, NULL)) + msort_with_tmp(&one); + } else +#endif + { + msort_with_tmp(&one); + msort_with_tmp(&two); + } - tmp = t; + tmp = one.tmp; while (n1 > 0 && n2 > 0) { - if (cmp(b1, b2, ctx) <= 0) { + if (one.cmp(b1, b2, one.ctx) <= 0) { memcpy(tmp, b1, s); tmp += s; b1 += s; @@ -42,7 +72,8 @@ static void msort_with_tmp(void *b, size_t n, size_t s, } if (n1 > 0) memcpy(tmp, b1, n1 * s); - memcpy(b, t, (n - n2) * s); + memcpy(one.base, one.tmp, (n - n2) * s); + return NULL; } int git_qsort_s(void *b, size_t n, size_t s, @@ -50,20 +81,29 @@ int git_qsort_s(void *b, size_t n, size_t s, { const size_t size = st_mult(n, s); char buf[1024]; + struct wo
Re: [PATCH v2 0/5] string-list: make string_list_sort() reentrant
Am 24.01.2017 um 00:54 schrieb Jeff King: The speed looks like a reasonable outcome. I'm torn on the qsort_r() demo patch. I don't think it looks too bad. OTOH, I don't think I would want to deal with the opposite-argument-order versions. The code itself may look OK, but it's not really necessary and the special implementation for Linux makes increases maintenance costs. Can we save it for later and first give the common implemention a chance to prove itself? Is there any interest in people adding the ISO qsort_s() to their libc implementations? It seems like it's been a fair number of years by now. https://sourceware.org/ml/libc-alpha/2014-12/msg00513.html is the last post mentioning qsort_s on the glibc mailing list, but it didn't even make it into https://sourceware.org/glibc/wiki/Development_Todo/Master. Not sure what's planned in BSD land, didn't find anything (but didn't look too hard). René
Re: [DEMO][PATCH v2 6/5] compat: add a qsort_s() implementation based on GNU's qsort_r(1)
Am 24.01.2017 um 21:39 schrieb Jeff King: On Tue, Jan 24, 2017 at 07:00:03PM +0100, René Scharfe wrote: I do worry about having to support more implementations in the future that have different function signature for the comparison callbacks, which will make things ugly, but this addition alone doesn't look too bad to me. It is unfair of me to show a 5% speedup and then recommend to not include it. ;-) That difference won't be measurable in real use cases and the patch is not necessary. This patch is simple, but the overall complexity (incl. #ifdefs etc.) will be lower without it. I care less about squeezing out the last few percent performance and more that somebody libc qsort_r() might offer some other improvement. For instance, it could sort in-place to lower memory use for some cases, or do some clever thing that gives more than a few percent in the real world (something like TimSort). I don't know to what degree we should care about that. That's a good question. Which workloads spend a significant amount of time in qsort/qsort_s? We could track processor time spent and memory allocated in QSORT_S and the whole program and show a warning at the end if one of the two exceeded, say, 5% of the total, asking nicely to send it to our mailing list. Would something like this be useful for other functions or metrics as well? Would it be too impolite to use users as telemetry transports? If we find such cases then we'd better fix them for all platforms, e.g. by importing timsort, no? René
PATCH 1/2] abspath: add absolute_pathdup()
Add a function that returns a buffer containing the absolute path of its argument and a semantic patch for its intended use. It avoids an extra string copy to a static buffer. Signed-off-by: Rene Scharfe --- abspath.c| 7 +++ cache.h | 1 + contrib/coccinelle/xstrdup_or_null.cocci | 6 ++ 3 files changed, 14 insertions(+) diff --git a/abspath.c b/abspath.c index fce40fddcc..2f0c26e0e2 100644 --- a/abspath.c +++ b/abspath.c @@ -239,6 +239,13 @@ const char *absolute_path(const char *path) return sb.buf; } +char *absolute_pathdup(const char *path) +{ + struct strbuf sb = STRBUF_INIT; + strbuf_add_absolute_path(&sb, path); + return strbuf_detach(&sb, NULL); +} + /* * Unlike prefix_path, this should be used if the named file does * not have to interact with index entry; i.e. name of a random file diff --git a/cache.h b/cache.h index 00a029af36..d7b7a8cd7a 100644 --- a/cache.h +++ b/cache.h @@ -1069,6 +1069,7 @@ const char *real_path(const char *path); const char *real_path_if_valid(const char *path); char *real_pathdup(const char *path); const char *absolute_path(const char *path); +char *absolute_pathdup(const char *path); const char *remove_leading_path(const char *in, const char *prefix); const char *relative_path(const char *in, const char *prefix, struct strbuf *sb); int normalize_path_copy_len(char *dst, const char *src, int *prefix_len); diff --git a/contrib/coccinelle/xstrdup_or_null.cocci b/contrib/coccinelle/xstrdup_or_null.cocci index 3fceef132b..8e05d1ca4b 100644 --- a/contrib/coccinelle/xstrdup_or_null.cocci +++ b/contrib/coccinelle/xstrdup_or_null.cocci @@ -5,3 +5,9 @@ expression V; - if (E) -V = xstrdup(E); + V = xstrdup_or_null(E); + +@@ +expression E; +@@ +- xstrdup(absolute_path(E)) ++ absolute_pathdup(E) -- 2.11.0
[PATCH 2/2] use absolute_pathdup()
Apply the symantic patch for converting callers that duplicate the result of absolute_path() to call absolute_pathdup() instead, which avoids an extra string copy to a static buffer. Signed-off-by: Rene Scharfe --- builtin/clone.c | 4 ++-- builtin/submodule--helper.c | 2 +- worktree.c | 2 +- 3 files changed, 4 insertions(+), 4 deletions(-) diff --git a/builtin/clone.c b/builtin/clone.c index 5ef81927a6..3f63edbbf9 100644 --- a/builtin/clone.c +++ b/builtin/clone.c @@ -170,7 +170,7 @@ static char *get_repo_path(const char *repo, int *is_bundle) strbuf_addstr(&path, repo); raw = get_repo_path_1(&path, is_bundle); - canon = raw ? xstrdup(absolute_path(raw)) : NULL; + canon = raw ? absolute_pathdup(raw) : NULL; strbuf_release(&path); return canon; } @@ -894,7 +894,7 @@ int cmd_clone(int argc, const char **argv, const char *prefix) path = get_repo_path(repo_name, &is_bundle); if (path) - repo = xstrdup(absolute_path(repo_name)); + repo = absolute_pathdup(repo_name); else if (!strchr(repo_name, ':')) die(_("repository '%s' does not exist"), repo_name); else diff --git a/builtin/submodule--helper.c b/builtin/submodule--helper.c index 74614a951e..899dc334e3 100644 --- a/builtin/submodule--helper.c +++ b/builtin/submodule--helper.c @@ -626,7 +626,7 @@ static int module_clone(int argc, const char **argv, const char *prefix) module_clone_options); strbuf_addf(&sb, "%s/modules/%s", get_git_dir(), name); - sm_gitdir = xstrdup(absolute_path(sb.buf)); + sm_gitdir = absolute_pathdup(sb.buf); strbuf_reset(&sb); if (!is_absolute_path(path)) { diff --git a/worktree.c b/worktree.c index 53b4771c04..d633761575 100644 --- a/worktree.c +++ b/worktree.c @@ -145,7 +145,7 @@ static struct worktree *get_linked_worktree(const char *id) static void mark_current_worktree(struct worktree **worktrees) { - char *git_dir = xstrdup(absolute_path(get_git_dir())); + char *git_dir = absolute_pathdup(get_git_dir()); int i; for (i = 0; worktrees[i]; i++) { -- 2.11.0
Re: [PATCH 2/2] use absolute_pathdup()
Hi Dscho, Am 27.01.2017 um 11:21 schrieb Johannes Schindelin: On Thu, 26 Jan 2017, René Scharfe wrote: Apply the symantic patch for converting callers that duplicate the s/symantic/semantic/ thank you! I wonder where this came from. And where my spellchecker went without as much as a farewell. Reinstalled it now.. René
[PATCH 0/5] introduce SWAP macro
Exchanging the value of two variables requires declaring a temporary variable and repeating their names. The swap macro in apply.c simplifies this for its callers without changing the compiled binary. Polish this macro and export it, then use it throughout the code to reduce repetition and hide the declaration of temporary variables. add SWAP macro apply: use SWAP macro use SWAP macro diff: use SWAP macro graph: use SWAP macro apply.c | 23 +++ builtin/diff-tree.c | 4 +--- builtin/diff.c| 9 +++-- contrib/coccinelle/swap.cocci | 28 diff-no-index.c | 6 ++ diff.c| 12 git-compat-util.h | 10 ++ graph.c | 11 ++- line-range.c | 3 +-- merge-recursive.c | 5 + object.c | 4 +--- pack-revindex.c | 5 + prio-queue.c | 4 +--- strbuf.h | 4 +--- 14 files changed, 63 insertions(+), 65 deletions(-) create mode 100644 contrib/coccinelle/swap.cocci -- 2.11.0
[PATCH 1/5] add SWAP macro
Add a macro for exchanging the values of variables. It allows users to avoid repetition and takes care of the temporary variable for them. It also makes sure that the storage sizes of its two parameters are the same. Its memcpy(1) calls are optimized away by current compilers. Also add a conservative semantic patch for transforming only swaps of variables of the same type. Signed-off-by: Rene Scharfe --- contrib/coccinelle/swap.cocci | 28 git-compat-util.h | 10 ++ 2 files changed, 38 insertions(+) create mode 100644 contrib/coccinelle/swap.cocci diff --git a/contrib/coccinelle/swap.cocci b/contrib/coccinelle/swap.cocci new file mode 100644 index 00..a0934d1fda --- /dev/null +++ b/contrib/coccinelle/swap.cocci @@ -0,0 +1,28 @@ +@ swap_with_declaration @ +type T; +identifier tmp; +T a, b; +@@ +- T tmp = a; ++ T tmp; ++ tmp = a; + a = b; + b = tmp; + +@ swap @ +type T; +T tmp, a, b; +@@ +- tmp = a; +- a = b; +- b = tmp; ++ SWAP(a, b); + +@ extends swap @ +identifier unused; +@@ + { + ... +- T unused; + ... when != unused + } diff --git a/git-compat-util.h b/git-compat-util.h index 87237b092b..66cd466eea 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -527,6 +527,16 @@ static inline int ends_with(const char *str, const char *suffix) return strip_suffix(str, suffix, &len); } +#define SWAP(a, b) do {\ + void *_swap_a_ptr = &(a); \ + void *_swap_b_ptr = &(b); \ + unsigned char _swap_buffer[sizeof(a)]; \ + memcpy(_swap_buffer, _swap_a_ptr, sizeof(a)); \ + memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a) +\ + BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b))); \ + memcpy(_swap_b_ptr, _swap_buffer, sizeof(a)); \ +} while (0) + #if defined(NO_MMAP) || defined(USE_WIN32_MMAP) #ifndef PROT_READ -- 2.11.0
[PATCH 2/5] apply: use SWAP macro
Use the exported macro SWAP instead of the file-scoped macro swap and remove the latter's definition. Signed-off-by: Rene Scharfe --- apply.c | 23 +++ 1 file changed, 7 insertions(+), 16 deletions(-) diff --git a/apply.c b/apply.c index 2ed808d429..0e2caeab9c 100644 --- a/apply.c +++ b/apply.c @@ -2187,29 +2187,20 @@ static int parse_chunk(struct apply_state *state, char *buffer, unsigned long si return offset + hdrsize + patchsize; } -#define swap(a,b) myswap((a),(b),sizeof(a)) - -#define myswap(a, b, size) do {\ - unsigned char mytmp[size]; \ - memcpy(mytmp, &a, size);\ - memcpy(&a, &b, size); \ - memcpy(&b, mytmp, size);\ -} while (0) - static void reverse_patches(struct patch *p) { for (; p; p = p->next) { struct fragment *frag = p->fragments; - swap(p->new_name, p->old_name); - swap(p->new_mode, p->old_mode); - swap(p->is_new, p->is_delete); - swap(p->lines_added, p->lines_deleted); - swap(p->old_sha1_prefix, p->new_sha1_prefix); + SWAP(p->new_name, p->old_name); + SWAP(p->new_mode, p->old_mode); + SWAP(p->is_new, p->is_delete); + SWAP(p->lines_added, p->lines_deleted); + SWAP(p->old_sha1_prefix, p->new_sha1_prefix); for (; frag; frag = frag->next) { - swap(frag->newpos, frag->oldpos); - swap(frag->newlines, frag->oldlines); + SWAP(frag->newpos, frag->oldpos); + SWAP(frag->newlines, frag->oldlines); } } } -- 2.11.0
[PATCH 4/5] diff: use SWAP macro
Use the macro SWAP to exchange the value of pairs of variables instead of swapping them manually with the help of a temporary variable. The resulting code is shorter and easier to read. The two cases were not transformed by the semantic patch swap.cocci because it's extra careful and handles only cases where the types of all variables are the same -- and here we swap two ints and use an unsigned temporary variable for that. Nevertheless the conversion is safe, as the value range is preserved with and without the patch. Signed-off-by: Rene Scharfe --- diff-no-index.c | 3 +-- diff.c | 4 +--- 2 files changed, 2 insertions(+), 5 deletions(-) diff --git a/diff-no-index.c b/diff-no-index.c index 1ae09894d7..df762fd0f7 100644 --- a/diff-no-index.c +++ b/diff-no-index.c @@ -185,8 +185,7 @@ static int queue_diff(struct diff_options *o, struct diff_filespec *d1, *d2; if (DIFF_OPT_TST(o, REVERSE_DIFF)) { - unsigned tmp; - tmp = mode1; mode1 = mode2; mode2 = tmp; + SWAP(mode1, mode2); SWAP(name1, name2); } diff --git a/diff.c b/diff.c index 9de1ba264f..6c4f3f6b72 100644 --- a/diff.c +++ b/diff.c @@ -5117,11 +5117,9 @@ void diff_change(struct diff_options *options, return; if (DIFF_OPT_TST(options, REVERSE_DIFF)) { - unsigned tmp; SWAP(old_mode, new_mode); SWAP(old_sha1, new_sha1); - tmp = old_sha1_valid; old_sha1_valid = new_sha1_valid; - new_sha1_valid = tmp; + SWAP(old_sha1_valid, new_sha1_valid); SWAP(old_dirty_submodule, new_dirty_submodule); } -- 2.11.0
[PATCH 3/5] use SWAP macro
Apply the semantic patch swap.cocci to convert hand-rolled swaps to use the macro SWAP. The resulting code is shorter and easier to read, the object code is effectively unchanged. The patch for object.c had to be hand-edited in order to preserve the comment before the change; Coccinelle tried to eat it for some reason. Signed-off-by: Rene Scharfe --- builtin/diff-tree.c | 4 +--- builtin/diff.c | 9 +++-- diff-no-index.c | 3 +-- diff.c | 8 +++- graph.c | 5 + line-range.c| 3 +-- merge-recursive.c | 5 + object.c| 4 +--- pack-revindex.c | 5 + prio-queue.c| 4 +--- strbuf.h| 4 +--- 11 files changed, 15 insertions(+), 39 deletions(-) diff --git a/builtin/diff-tree.c b/builtin/diff-tree.c index 806dd7a885..8ce00480cd 100644 --- a/builtin/diff-tree.c +++ b/builtin/diff-tree.c @@ -147,9 +147,7 @@ int cmd_diff_tree(int argc, const char **argv, const char *prefix) tree1 = opt->pending.objects[0].item; tree2 = opt->pending.objects[1].item; if (tree2->flags & UNINTERESTING) { - struct object *tmp = tree2; - tree2 = tree1; - tree1 = tmp; + SWAP(tree2, tree1); } diff_tree_sha1(tree1->oid.hash, tree2->oid.hash, diff --git a/builtin/diff.c b/builtin/diff.c index 7f91f6d226..3d64b85337 100644 --- a/builtin/diff.c +++ b/builtin/diff.c @@ -45,12 +45,9 @@ static void stuff_change(struct diff_options *opt, return; if (DIFF_OPT_TST(opt, REVERSE_DIFF)) { - unsigned tmp; - const unsigned char *tmp_u; - const char *tmp_c; - tmp = old_mode; old_mode = new_mode; new_mode = tmp; - tmp_u = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_u; - tmp_c = old_name; old_name = new_name; new_name = tmp_c; + SWAP(old_mode, new_mode); + SWAP(old_sha1, new_sha1); + SWAP(old_name, new_name); } if (opt->prefix && diff --git a/diff-no-index.c b/diff-no-index.c index f420786039..1ae09894d7 100644 --- a/diff-no-index.c +++ b/diff-no-index.c @@ -186,9 +186,8 @@ static int queue_diff(struct diff_options *o, if (DIFF_OPT_TST(o, REVERSE_DIFF)) { unsigned tmp; - const char *tmp_c; tmp = mode1; mode1 = mode2; mode2 = tmp; - tmp_c = name1; name1 = name2; name2 = tmp_c; + SWAP(name1, name2); } d1 = noindex_filespec(name1, mode1); diff --git a/diff.c b/diff.c index f08cd8e033..9de1ba264f 100644 --- a/diff.c +++ b/diff.c @@ -5118,13 +5118,11 @@ void diff_change(struct diff_options *options, if (DIFF_OPT_TST(options, REVERSE_DIFF)) { unsigned tmp; - const unsigned char *tmp_c; - tmp = old_mode; old_mode = new_mode; new_mode = tmp; - tmp_c = old_sha1; old_sha1 = new_sha1; new_sha1 = tmp_c; + SWAP(old_mode, new_mode); + SWAP(old_sha1, new_sha1); tmp = old_sha1_valid; old_sha1_valid = new_sha1_valid; new_sha1_valid = tmp; - tmp = old_dirty_submodule; old_dirty_submodule = new_dirty_submodule; - new_dirty_submodule = tmp; + SWAP(old_dirty_submodule, new_dirty_submodule); } if (options->prefix && diff --git a/graph.c b/graph.c index d4e8519c90..4c722303d2 100644 --- a/graph.c +++ b/graph.c @@ -997,7 +997,6 @@ static void graph_output_post_merge_line(struct git_graph *graph, struct strbuf static void graph_output_collapsing_line(struct git_graph *graph, struct strbuf *sb) { int i; - int *tmp_mapping; short used_horizontal = 0; int horizontal_edge = -1; int horizontal_edge_target = -1; @@ -1132,9 +1131,7 @@ static void graph_output_collapsing_line(struct git_graph *graph, struct strbuf /* * Swap mapping and new_mapping */ - tmp_mapping = graph->mapping; - graph->mapping = graph->new_mapping; - graph->new_mapping = tmp_mapping; + SWAP(graph->mapping, graph->new_mapping); /* * If graph->mapping indicates that all of the branch lines diff --git a/line-range.c b/line-range.c index de4e32f942..323399d16c 100644 --- a/line-range.c +++ b/line-range.c @@ -269,8 +269,7 @@ int parse_range_arg(const char *arg, nth_line_fn_t nth_line_cb, return -1; if (*begin && *end && *end < *begin) { - long tmp; - tmp = *end; *end = *begin; *begin = tmp; + SWAP(*end, *begin); } return 0; diff --git a/merge-recursive.c b/merge-recursive.c index d327209443..b7ff1a
[PATCH 5/5] graph: use SWAP macro
Exchange the values of graph->columns and graph->new_columns using the macro SWAP instead of hand-rolled code. The result is shorter and easier to read. This transformation was not done by the semantic patch swap.cocci because there's an unrelated statement between the second and the last step of the exchange, so it didn't match the expected pattern. Signed-off-by: Rene Scharfe --- graph.c | 6 +- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/graph.c b/graph.c index 4c722303d2..29b0f51dc5 100644 --- a/graph.c +++ b/graph.c @@ -463,7 +463,6 @@ static void graph_update_width(struct git_graph *graph, static void graph_update_columns(struct git_graph *graph) { struct commit_list *parent; - struct column *tmp_columns; int max_new_columns; int mapping_idx; int i, seen_this, is_commit_in_columns; @@ -476,11 +475,8 @@ static void graph_update_columns(struct git_graph *graph) * We'll re-use the old columns array as storage to compute the new * columns list for the commit after this one. */ - tmp_columns = graph->columns; - graph->columns = graph->new_columns; + SWAP(graph->columns, graph->new_columns); graph->num_columns = graph->num_new_columns; - - graph->new_columns = tmp_columns; graph->num_new_columns = 0; /* -- 2.11.0
[PATCH] use oidcpy() for copying hashes between instances of struct object_id
Patch generated by Coccinelle and contrib/coccinelle/object_id.cocci. Signed-off-by: Rene Scharfe --- refs/files-backend.c | 2 +- wt-status.c | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/refs/files-backend.c b/refs/files-backend.c index f9023939d5..8ee2aba39f 100644 --- a/refs/files-backend.c +++ b/refs/files-backend.c @@ -697,7 +697,7 @@ static int cache_ref_iterator_peel(struct ref_iterator *ref_iterator, if (peel_entry(entry, 0)) return -1; - hashcpy(peeled->hash, entry->u.value.peeled.hash); + oidcpy(peeled, &entry->u.value.peeled); return 0; } diff --git a/wt-status.c b/wt-status.c index a715e71906..a05328dc48 100644 --- a/wt-status.c +++ b/wt-status.c @@ -628,7 +628,7 @@ static void wt_status_collect_changes_initial(struct wt_status *s) d->index_status = DIFF_STATUS_ADDED; /* Leave {mode,oid}_head zero for adds. */ d->mode_index = ce->ce_mode; - hashcpy(d->oid_index.hash, ce->oid.hash); + oidcpy(&d->oid_index, &ce->oid); } } } @@ -2096,7 +2096,7 @@ static void wt_porcelain_v2_print_unmerged_entry( if (strcmp(ce->name, it->string) || !stage) break; stages[stage - 1].mode = ce->ce_mode; - hashcpy(stages[stage - 1].oid.hash, ce->oid.hash); + oidcpy(&stages[stage - 1].oid, &ce->oid); sum |= (1 << (stage - 1)); } if (sum != d->stagemask) -- 2.11.0
[PATCH] checkout: convert post_checkout_hook() to struct object_id
Signed-off-by: Rene Scharfe --- builtin/checkout.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/builtin/checkout.c b/builtin/checkout.c index bfe685c198..80d5e38981 100644 --- a/builtin/checkout.c +++ b/builtin/checkout.c @@ -56,8 +56,8 @@ static int post_checkout_hook(struct commit *old, struct commit *new, int changed) { return run_hook_le(NULL, "post-checkout", - sha1_to_hex(old ? old->object.oid.hash : null_sha1), - sha1_to_hex(new ? new->object.oid.hash : null_sha1), + oid_to_hex(old ? &old->object.oid : &null_oid), + oid_to_hex(new ? &new->object.oid : &null_oid), changed ? "1" : "0", NULL); /* "new" can be NULL when checking out from the index before a commit exists. */ -- 2.11.0
[PATCH] use oid_to_hex_r() for converting struct object_id hashes to hex strings
Patch generated by Coccinelle and contrib/coccinelle/object_id.cocci. Signed-off-by: Rene Scharfe --- builtin/blame.c | 4 ++-- builtin/merge-index.c | 2 +- builtin/rev-list.c| 2 +- diff.c| 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/builtin/blame.c b/builtin/blame.c index 126b8c9e5b..cffc626540 100644 --- a/builtin/blame.c +++ b/builtin/blame.c @@ -1901,7 +1901,7 @@ static void emit_porcelain(struct scoreboard *sb, struct blame_entry *ent, struct origin *suspect = ent->suspect; char hex[GIT_SHA1_HEXSZ + 1]; - sha1_to_hex_r(hex, suspect->commit->object.oid.hash); + oid_to_hex_r(hex, &suspect->commit->object.oid); printf("%s %d %d %d\n", hex, ent->s_lno + 1, @@ -1941,7 +1941,7 @@ static void emit_other(struct scoreboard *sb, struct blame_entry *ent, int opt) int show_raw_time = !!(opt & OUTPUT_RAW_TIMESTAMP); get_commit_info(suspect->commit, &ci, 1); - sha1_to_hex_r(hex, suspect->commit->object.oid.hash); + oid_to_hex_r(hex, &suspect->commit->object.oid); cp = nth_line(sb, ent->lno); for (cnt = 0; cnt < ent->num_lines; cnt++) { diff --git a/builtin/merge-index.c b/builtin/merge-index.c index ce356b1da1..2d1b6db6bd 100644 --- a/builtin/merge-index.c +++ b/builtin/merge-index.c @@ -22,7 +22,7 @@ static int merge_entry(int pos, const char *path) if (strcmp(ce->name, path)) break; found++; - sha1_to_hex_r(hexbuf[stage], ce->oid.hash); + oid_to_hex_r(hexbuf[stage], &ce->oid); xsnprintf(ownbuf[stage], sizeof(ownbuf[stage]), "%o", ce->ce_mode); arguments[stage] = hexbuf[stage]; arguments[stage + 4] = ownbuf[stage]; diff --git a/builtin/rev-list.c b/builtin/rev-list.c index c43decda70..0aa93d5891 100644 --- a/builtin/rev-list.c +++ b/builtin/rev-list.c @@ -237,7 +237,7 @@ static int show_bisect_vars(struct rev_list_info *info, int reaches, int all) cnt = reaches; if (revs->commits) - sha1_to_hex_r(hex, revs->commits->item->object.oid.hash); + oid_to_hex_r(hex, &revs->commits->item->object.oid); if (flags & BISECT_SHOW_ALL) { traverse_commit_list(revs, show_commit, show_object, info); diff --git a/diff.c b/diff.c index f08cd8e033..d91a344908 100644 --- a/diff.c +++ b/diff.c @@ -3015,7 +3015,7 @@ static struct diff_tempfile *prepare_temp_file(const char *name, if (!one->oid_valid) sha1_to_hex_r(temp->hex, null_sha1); else - sha1_to_hex_r(temp->hex, one->oid.hash); + oid_to_hex_r(temp->hex, &one->oid); /* Even though we may sometimes borrow the * contents from the work tree, we always want * one->mode. mode is trustworthy even when -- 2.11.0
[PATCH] receive-pack: call string_list_clear() unconditionally
string_list_clear() handles empty lists just fine, so remove the redundant check. Signed-off-by: Rene Scharfe --- builtin/receive-pack.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c index 6b97cbdbe..1dbb8a069 100644 --- a/builtin/receive-pack.c +++ b/builtin/receive-pack.c @@ -1942,8 +1942,7 @@ int cmd_receive_pack(int argc, const char **argv, const char *prefix) run_receive_hook(commands, "post-receive", 1, &push_options); run_update_post_hook(commands); - if (push_options.nr) - string_list_clear(&push_options, 0); + string_list_clear(&push_options, 0); if (auto_gc) { const char *argv_gc_auto[] = { "gc", "--auto", "--quiet", NULL, -- 2.11.0
Re: [PATCH 1/5] add SWAP macro
Am 30.01.2017 um 16:39 schrieb Johannes Schindelin: Hi René, On Sat, 28 Jan 2017, René Scharfe wrote: Add a macro for exchanging the values of variables. It allows users to avoid repetition and takes care of the temporary variable for them. It also makes sure that the storage sizes of its two parameters are the same. Its memcpy(1) calls are optimized away by current compilers. How certain are we that "current compilers" optimize that away? And about which "current compilers" are we talking? GCC? GCC 6? Clang? Clang 3? Clang 3.8.*? Visual C 2005+? GCC 4.4.7 and clang 3.2 on x86-64 compile the macro to the same object code as a manual swap ; see https://godbolt.org/g/F4b9M9. Both were released 2012. That website doesn't offer Visual C(++), but since you added the original macro in e5a94313c0 ("Teach git-apply about '-R'", 2006) I guess we have that part covered. ;) René
Re: [PATCH 1/5] add SWAP macro
Am 30.01.2017 um 17:01 schrieb Johannes Schindelin: Hi René, On Sat, 28 Jan 2017, René Scharfe wrote: diff --git a/git-compat-util.h b/git-compat-util.h index 87237b092b..66cd466eea 100644 --- a/git-compat-util.h +++ b/git-compat-util.h @@ -527,6 +527,16 @@ static inline int ends_with(const char *str, const char *suffix) return strip_suffix(str, suffix, &len); } +#define SWAP(a, b) do {\ + void *_swap_a_ptr = &(a); \ + void *_swap_b_ptr = &(b); \ + unsigned char _swap_buffer[sizeof(a)]; \ + memcpy(_swap_buffer, _swap_a_ptr, sizeof(a)); \ + memcpy(_swap_a_ptr, _swap_b_ptr, sizeof(a) +\ + BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b))); \ + memcpy(_swap_b_ptr, _swap_buffer, sizeof(a)); \ +} while (0) + #if defined(NO_MMAP) || defined(USE_WIN32_MMAP) It may seem as a matter of taste, or maybe not: I prefer this without the _swap_a_ptr (and I would also prefer not to use identifiers starting with an underscore, as section 7.1.3 Reserved Identifiers of the C99 standard says they are reserved): +#define SWAP(a, b) do {\ + unsigned char swap_buffer_[sizeof(a)]; \ + memcpy(swap_buffer_, &(a), sizeof(a)); \ + memcpy(&(a), &(b), sizeof(a) + \ + BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b))); \ + memcpy(&(b), swap_buffer_, sizeof(a)); \ +} while (0) We can move the underscore to the end, but using a and b directly will give surprising results if the parameters have side effects. E.g. if you want to swap the first two elements of two arrays you might want to do this: SWAP(*x++, *y++); SWAP(*x++, *y++); And that would increment twice as much as one would guess and access unexpected elements. One idea to address the concern that not all C compilers people use to build Git may optimize away those memcpy()s: we could also introduce a SWAP_PRIMITIVE_TYPE (or SWAP2 or SIMPLE_SWAP or whatever) that accepts only primitive types. But since __typeof__() is not portable... I wouldn't worry too much about such a solution before seeing that SWAP (even with memcpy(3) -- this function is probably optimized quite heavily on most platforms) causes an actual performance problem. René
Re: [PATCH 4/5] diff: use SWAP macro
Am 30.01.2017 um 17:04 schrieb Johannes Schindelin: Hi René, On Sat, 28 Jan 2017, René Scharfe wrote: Use the macro SWAP to exchange the value of pairs of variables instead of swapping them manually with the help of a temporary variable. The resulting code is shorter and easier to read. The two cases were not transformed by the semantic patch swap.cocci because it's extra careful and handles only cases where the types of all variables are the same -- and here we swap two ints and use an unsigned temporary variable for that. Nevertheless the conversion is safe, as the value range is preserved with and without the patch. One way to make this more obvious would be to change the type to signed first, and then transform (which then would catch these cases too, right?). I'm not sure it would be more obvious, but it would certainly make the type change more explicit. In diff-index.c we might even want to change the type of the swapped values from int to unsigned, which is more fitting for file modes. In diff.c we'd need to add a separate variable, as tmp is shared with other (unsigned) swaps. René
Re: [PATCH 3/5] use SWAP macro
Am 30.01.2017 um 17:03 schrieb Johannes Schindelin: Hi René, On Sat, 28 Jan 2017, René Scharfe wrote: diff --git a/builtin/diff-tree.c b/builtin/diff-tree.c index 806dd7a885..8ce00480cd 100644 --- a/builtin/diff-tree.c +++ b/builtin/diff-tree.c @@ -147,9 +147,7 @@ int cmd_diff_tree(int argc, const char **argv, const char *prefix) tree1 = opt->pending.objects[0].item; tree2 = opt->pending.objects[1].item; if (tree2->flags & UNINTERESTING) { - struct object *tmp = tree2; - tree2 = tree1; - tree1 = tmp; + SWAP(tree2, tree1); } Is there a way to transform away the curly braces for blocks that become single-line blocks, too? Interesting question. I guess this can be done by using a Python script (see contrib/coccinelle/strbuf.cocci for an example). I'll leave this as homework for readers interested in Coccinelle, at least for a while. :) René
Re: [PATCH 5/5] graph: use SWAP macro
Am 30.01.2017 um 17:16 schrieb Johannes Schindelin: Hi René, On Sat, 28 Jan 2017, René Scharfe wrote: Exchange the values of graph->columns and graph->new_columns using the macro SWAP instead of hand-rolled code. The result is shorter and easier to read. This transformation was not done by the semantic patch swap.cocci because there's an unrelated statement between the second and the last step of the exchange, so it didn't match the expected pattern. Is it really true that Coccinelle cannot be told to look for a code block that declares a variable that is then used *only* in the lines we want to match and replace? Hope I parsed your question correctly; my answer would be that it can't be true because that's basically what the proposed semantic patch does: @ swap @ type T; T tmp, a, b; @@ - tmp = a; - a = b; - b = tmp; + SWAP(a, b); @ extends swap @ identifier unused; @@ { ... - T unused; ... when != unused } The first part (up to the "+") looks for a opportunities to use SWAP, and the second part looks for blocks where that transformation was done and we declare identifiers that are/became unused. It did not match the code in graph.c because the pattern was basically: tmp = a; a = b; something = totally_different; b = tmp; Coccinelle can be told to ignore such unrelated code by adding "... when != tmp" etc. (which matches context lines that don't reference tmp), but that's slooow. (Perhaps I just did it wrong, though.) René
Re: [PATCH 1/5] add SWAP macro
Am 30.01.2017 um 21:48 schrieb Johannes Schindelin: So I tried to verify that Visual C optimizes this well, and oh my deity, this was not easy. In Debug mode, it does not optimize, i.e. the memcpy() will be called, even for simple 32-bit integers. In Release mode, Visual Studio's defaults turn on "whole-program optimization" which means that the only swapping that is going on in the end is that the meaning of two registers is swapped, i.e. the SWAP() macro is expanded to... no assembler code at all. After trying this and that and something else, I finally ended up with the file-scope optimized SWAP() resulting in this assembly code: 7FF791311000 mov eax,dword ptr [rcx] 7FF791311002 mov r8d,dword ptr [rdx] 7FF791311005 mov dword ptr [rcx],r8d 7FF791311008 mov dword ptr [rdx],eax This looks good. However, recent events (including some discussions on this mailing list which had unfortunate consequences) made me trust my instincts more. And my instincts tell me that it would be unwise to replace code that swaps primitive types in the straight-forward way by code that relies on advanced compiler optimization to generate efficient code, otherwise producing very suboptimal code. I don't know how difficult it was to arrive at the result above, but I wouldn't call inlining memcpy(3) an advanced optimization (anymore?), since the major compilers seem to be doing that. The SWAP in prio-queue.c seems to be the one with the biggest performance impact. Or perhaps it's the one in lookup_object()? The former is easier to measure, though. Here's what I get with CFLAGS="-builtin -O2" (best of five): $ time ./t/helper/test-prio-queue $(seq 10 -1 1) dump >/dev/null real0m0.142s user0m0.120s sys 0m0.020s And this is with CFLAGS="-no-builtin -O2": $ time ./t/helper/test-prio-queue $(seq 10 -1 1) dump >/dev/null real0m0.170s user0m0.156s sys 0m0.012s Hmm. Not nice, but also not prohibitively slow. The commit you quoted embarrasses me, and I have no excuse for it. I would love to see that myswap() ugliness fixed by replacing it with a construct that is simpler, and generates good code even without any smart compiler. I don't see a way to do that without adding a type parameter. René
Re: [PATCH 1/5] add SWAP macro
Am 30.01.2017 um 22:03 schrieb Johannes Schindelin: It is curious, though, that an expression like "sizeof(a++)" would not be rejected. Clang normally warns about something like this ("warning: expression with side effects has no effect in an unevaluated context [-Wunevaluated-expression]"), but not if the code is part of a macro. I don't know if that's intended, but it sure is helpful in the case of SWAP. Further, what would SWAP(a++, b) do? Swap a and b, and *then* increment a? That might be a valid expectation, but GCC says "error: lvalue required as unary '&' operand" and clang puts it "error: cannot take the address of an rvalue of type". René
Re: [PATCH 1/5] add SWAP macro
Am 31.01.2017 um 13:13 schrieb Johannes Schindelin: Hi René, On Mon, 30 Jan 2017, René Scharfe wrote: Am 30.01.2017 um 21:48 schrieb Johannes Schindelin: The commit you quoted embarrasses me, and I have no excuse for it. I would love to see that myswap() ugliness fixed by replacing it with a construct that is simpler, and generates good code even without any smart compiler. I don't see a way to do that without adding a type parameter. Exactly. And you know what? I would be very okay with that type parameter. Coccinelle [*1*] should be able to cope with that, too, mehtinks. Yes, a semantic patch can turn the type of the temporary variable into a macro parameter. Programmers would have to type the type, though, making the macro only half as good. It would be trivially "optimized" out of the box, even when compiling with Tiny C or in debug mode. Such a compiler is already slowed down by memset(3) calls for initializing objects and lack of other optimizations. I doubt a few more memcpy(3) calls would make that much of a difference. NB: git as compiled with TCC fails several tests, alas. Builds wickedly fast, though. And it would even allow things like this: #define SIMPLE_SWAP(T, a, b) do { T tmp_ = a; a = b; b = tmp_; } while (0) ... uint32_t large; char nybble; ... if (!(large & ~0xf)) { SIMPLE_SWAP(char, nybble, large); ... } i.e. mixing types, when possible. And while I do not necessarily expect that we need anything like this anytime soon, merely the fact that it allows for this flexibility, while being very readable at the same time, would make it a pretty good design in my book. Such a skinny macro which only hides repetition is kind of attractive due to its simplicity; I can't say the same about the mixed type example above, though. The fat version isn't that bad either even without inlining, includes a few safety checks and doesn't require us to tell the compiler something it already knows very well. I'd rather let the machine do the work. René
Re: [PATCH 3/5] use SWAP macro
Am 30.01.2017 um 23:22 schrieb Junio C Hamano: René Scharfe writes: if (tree2->flags & UNINTERESTING) { - struct object *tmp = tree2; - tree2 = tree1; - tree1 = tmp; + SWAP(tree2, tree1); A human would have written this SWAP(tree1, tree2). Not that I think such a manual fix-up should be made in _this_ patch, which may end up mixing mechanical conversion (which we may want to keep reproducible) and hand tweaks. But this swapped swap reads somewhat silly. Well, a human wrote "tmp = tree2" -- sometimes one likes to count down instead of up, I guess. We can make Coccinelle order the parameters alphabetically, but I don't know how to do so without duplicating most of the semantic patch. And thats would result in e.g. SWAP(new_tree, old_tree), which might be odd as well. I'd just leave them in whatever order they were, but that's probably because I'm lazy. René
Re: [PATCH 1/5] add SWAP macro
Am 30.01.2017 um 23:21 schrieb Brandon Williams: On 01/30, René Scharfe wrote: Am 30.01.2017 um 22:03 schrieb Johannes Schindelin: It is curious, though, that an expression like "sizeof(a++)" would not be rejected. Clang normally warns about something like this ("warning: expression with side effects has no effect in an unevaluated context [-Wunevaluated-expression]"), but not if the code is part of a macro. I don't know if that's intended, but it sure is helpful in the case of SWAP. Further, what would SWAP(a++, b) do? Swap a and b, and *then* increment a? That might be a valid expectation, but GCC says "error: lvalue required as unary '&' operand" and clang puts it "error: cannot take the address of an rvalue of type". René Perhaps we could disallow a side-effect operator in the macro. By disallow I mean place a comment at the definition to the macro and hopefully catch something like that in code-review. We have the same issue with the `ALLOC_GROW()` macro. SWAP(a++, ...) is caught by the compiler, SWAP(*a++, ...) works fine. Technically that should be enough. :) A comment wouldn't hurt, of course. René
Re: [PATCH 1/5] add SWAP macro
Am 01.02.2017 um 12:47 schrieb Jeff King: I'm not altogether convinced that SWAP() is an improvement in readability. I really like that it's shorter than the code it replaces, but there is a danger with introducing opaque constructs. It's one more thing for somebody familiar with C but new to the project to learn or get wrong. I'm biased, of course, but SIMPLE_SWAP and SWAP exchange the values of two variables -- how could someone get that wrong? Would it help to name the macro SWAP_VALUES? Or XCHG? ;) IMHO the main maintenance gain from René's patch is that you don't have to specify the type, which means you can never have a memory-overflow bug due to incorrect types. If we lose that, then I don't see all that much value in the whole thing. Size checks could be added to SIMPLE_SWAP as well. The main benefit of a swap macro is reduced repetition IMHO: Users specify the variables to swap once instead of twice in a special order, and with SWAP they don't need to declare their type again. Squeezing out redundancy makes the code easier to read and modify. René
[PATCH] p5302: create repositories for index-pack results explicitly
Before 7176a314 (index-pack: complain when --stdin is used outside of a repo) index-pack silently created a non-existing target directory; now the command refuses to work unless it's used against a valid repository. That causes p5302 to fail, which relies on the former behavior. Fix it by setting up the destinations for its performance tests using git init. Signed-off-by: Rene Scharfe --- t/perf/p5302-pack-index.sh | 7 +++ 1 file changed, 7 insertions(+) diff --git a/t/perf/p5302-pack-index.sh b/t/perf/p5302-pack-index.sh index 5ee9211f98..99bdb16c85 100755 --- a/t/perf/p5302-pack-index.sh +++ b/t/perf/p5302-pack-index.sh @@ -13,6 +13,13 @@ test_expect_success 'repack' ' export PACK ' +test_expect_success 'create target repositories' ' + for repo in t1 t2 t3 t4 t5 t6 + do + git init --bare $repo + done +' + test_perf 'index-pack 0 threads' ' GIT_DIR=t1 git index-pack --threads=1 --stdin < $PACK ' -- 2.11.1
[PATCH] dir: avoid allocation in fill_directory()
Pass the match member of the first pathspec item directly to read_directory() instead of using common_prefix() to duplicate it first, thus avoiding memory duplication, strlen(3) and free(3). Signed-off-by: Rene Scharfe --- This updates 966de3028 (dir: convert fill_directory to use the pathspec struct interface). The result is closer to the original, yet still using the modern interface. This patch eerily resembles 2b189435 (dir: simplify fill_directory()). dir.c | 7 +++ 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/dir.c b/dir.c index 65c3e681b8..4541f9e146 100644 --- a/dir.c +++ b/dir.c @@ -174,20 +174,19 @@ char *common_prefix(const struct pathspec *pathspec) int fill_directory(struct dir_struct *dir, const struct pathspec *pathspec) { - char *prefix; + const char *prefix; size_t prefix_len; /* * Calculate common prefix for the pathspec, and * use that to optimize the directory walk */ - prefix = common_prefix(pathspec); - prefix_len = prefix ? strlen(prefix) : 0; + prefix_len = common_prefix_len(pathspec); + prefix = prefix_len ? pathspec->items[0].match : ""; /* Read the directory and prune it */ read_directory(dir, prefix, prefix_len, pathspec); - free(prefix); return prefix_len; } -- 2.11.1
Re: [PATCH 1/5] add SWAP macro
Am 01.02.2017 um 19:33 schrieb Junio C Hamano: > René Scharfe writes: > >> Size checks could be added to SIMPLE_SWAP as well. > > Between SWAP() and SIMPLE_SWAP() I am undecided. > > If the compiler can infer the type and the size of the two > "locations" given to the macro, there is no technical reason to > require the caller to specify the type as an extra argument, so > SIMPLE_SWAP() may not necessarily an improvement over SWAP() from > that point of view. If the redundancy is used as a sanity check, > I'd be in favor of SIMPLE_SWAP(), though. > > If the goal of SIMPLE_SWAP(), on the other hand, were to support the > "only swap char with int for small value" example earlier in the > thread, it means you cannot sanity check the type of things being > swapped in the macro, and the readers of the code need to know about > the details of what variables are being swapped. It looks to me > that it defeats the main benefit of using a macro. Full type inference could be done with C11's _Generic for basic types, while typeof would be needed for complex ones, I guess. Checking that sizes match is better than nothing and portable to ancient platforms, though. Having an explicit type given is portable and easy to use for checks, of course, e.g. like this: #define SIMPLE_SWAP(T, a, b) do { \ T swap_tmp_ = a + BUILD_ASSERT_OR_ZERO(sizeof(T) == sizeof(a)); \ a = b + BUILD_ASSERT_OR_ZERO(sizeof(a) == sizeof(b)); \ b = swap_tmp_; \ } while(0) It doesn't support expressions with side effects, but that's probably not much of a concern. Swapping between different types would then still have to be done manually, but I wonder how common that is -- couldn't find such a case in our tree. René
Re: Trying to use xfuncname without success.
Am 07.02.2017 um 20:21 schrieb Jack Adrian Zappa: I'm trying to setup a hunk header for .natvis files. For some reason, it doesn't seem to be working. I'm following their instructions from here, which doesn't say much in terms of restrictions of the regex, such as, is the matched item considered the hunk header or do I need a group? I have tried both with no success. This is what I have: [diff "natvis"] xfuncname = "^[\\\t ]* The extra "\\" allow backslashes to be used for indentation as well as between Type and Name, which is probably not what you want. And your expression only matches single-char Name attributes. Try: xfuncname = "^[\t ]*
Re: Trying to use xfuncname without success.
Am 08.02.2017 um 18:11 schrieb Jack Adrian Zappa: > Thanks Rene, but you seem to have missed the point. NOTHING is > working. No matter what I put there, it doesn't seem to get matched. I'm not so sure about that. With your example I get this diff without setting diff.natvis.xfuncname: diff --git a/a.natvis b/a.natvis index 7f9bdf5..bc3c090 100644 --- a/a.natvis +++ b/a.natvis @@ -19,7 +19,7 @@ xmlns="http://schemas.microsoft.com/vstudio/debugger/natvis/2010";> - added_var + added_vars var2 Note the XML namespace in the hunk header. It's put there by the default rule because "xmlns" starts at the beginning of the line. Your diff has nothing there, which means the default rule is not used, i.e. your user-defined rule is in effect. Come to think of it, this line break in the middle of the AutoVisualizer tab might have been added by your email client unintentionally, so that we use different test files, which then of course results in different diffs. Is that the case? Anyway, if I run the following two commands: $ git config diff.natvis.xfuncname "^[\t ]*.gitattributes ... then I get this, both on Linux (git version 2.11.1) and on Windows (git version 2.11.1.windows.1): diff --git a/a.natvis b/a.natvis index 7f9bdf5..bc3c090 100644 --- a/a.natvis +++ b/a.natvis @@ -19,7 +19,7 @@ test - added_var + added_vars var2 > Just to be sure, I tested your regex and again it didn't work. At this point I'm out of ideas, sorry. :( The only way I was able to break it was due to mistyping the extension as "netvis" several times for some reason. René
Re: Trying to use xfuncname without success.
Am 09.02.2017 um 01:10 schrieb Jack Adrian Zappa: where it has grabbed a line at 126 and is using that for the hunk header. When I say that, I mean that it is using that line for *every* hunk header, for every change, regardless if it has passed a hunk head that it should have matched. Strange. That should only happen if no other function lines are recognized before the changes. You can check if that's the case using git grep and your xfuncline regex, e.g. like this: $ git grep -En "^[\t ]*And you can check *that* result with regular grep -E or egrep if it looks funny. René
Re: [PATCH] dir: avoid allocation in fill_directory()
Am 08.02.2017 um 07:22 schrieb Duy Nguyen: On Wed, Feb 8, 2017 at 5:04 AM, René Scharfe wrote: Pass the match member of the first pathspec item directly to read_directory() instead of using common_prefix() to duplicate it first, thus avoiding memory duplication, strlen(3) and free(3). How about killing common_prefix()? There are two other callers in ls-files.c and commit.c and it looks safe to do (but I didn't look very hard). I would like that, but it doesn't look like it's worth it. I have a patch series for making overlay_tree_on_cache() take pointer+length, but it's surprisingly long and bloats the code. Duplicating a small piece of memory once per command doesn't look so bad in comparison. (The payoff for avoiding an allocation is higher for library functions like fill_directory().) But while working on that I found two opportunities for improvement in prune_cache(). I'll send patches shortly. There's a subtle difference. Before the patch, prefix[prefix_len] is NUL. After the patch, it's not always true. If some code (incorrectly) depends on that, this patch exposes it. I had a look inside read_directory() though and it looks like no such code exists. So, all good. Thanks for checking. NB: The code before 966de302 (dir: convert fill_directory to use the pathspec struct interface, committed 2017-01-04) made the same assumption, i.e. that NUL is not needed. René
[PATCH 1/2] ls-files: pass prefix length explicitly to prune_cache()
The function prune_cache() relies on the fact that it is only called on max_prefix and sneakily uses the matching global variable max_prefix_len directly. Tighten its interface by passing both the string and its length as parameters. While at it move the NULL check into the function to collect all cache-pruning related logic in one place. Signed-off-by: Rene Scharfe --- Not urgent, of course. builtin/ls-files.c | 12 +++- 1 file changed, 7 insertions(+), 5 deletions(-) diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 1592290815..18105ec7ea 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -369,11 +369,14 @@ static void show_files(struct dir_struct *dir) /* * Prune the index to only contain stuff starting with "prefix" */ -static void prune_cache(const char *prefix) +static void prune_cache(const char *prefix, size_t prefixlen) { - int pos = cache_name_pos(prefix, max_prefix_len); + int pos; unsigned int first, last; + if (!prefix) + return; + pos = cache_name_pos(prefix, prefixlen); if (pos < 0) pos = -pos-1; memmove(active_cache, active_cache + pos, @@ -384,7 +387,7 @@ static void prune_cache(const char *prefix) while (last > first) { int next = (last + first) >> 1; const struct cache_entry *ce = active_cache[next]; - if (!strncmp(ce->name, prefix, max_prefix_len)) { + if (!strncmp(ce->name, prefix, prefixlen)) { first = next+1; continue; } @@ -641,8 +644,7 @@ int cmd_ls_files(int argc, const char **argv, const char *cmd_prefix) show_killed || show_modified || show_resolve_undo)) show_cached = 1; - if (max_prefix) - prune_cache(max_prefix); + prune_cache(max_prefix, max_prefix_len); if (with_tree) { /* * Basic sanity check; show-stages and show-unmerged -- 2.11.1
[PATCH 2/2] ls-files: move only kept cache entries in prune_cache()
prune_cache() first identifies those entries at the start of the sorted array that can be discarded. Then it moves the rest of the entries up. Last it identifies the unwanted trailing entries among the moved ones and cuts them off. Change the order: Identify both start *and* end of the range to keep first and then move only those entries to the top. The resulting code is slightly shorter and a bit more efficient. Signed-off-by: Rene Scharfe --- The performance impact is probably only measurable with a *really* big index. builtin/ls-files.c | 9 - 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/builtin/ls-files.c b/builtin/ls-files.c index 18105ec7ea..1c0f057d02 100644 --- a/builtin/ls-files.c +++ b/builtin/ls-files.c @@ -379,10 +379,7 @@ static void prune_cache(const char *prefix, size_t prefixlen) pos = cache_name_pos(prefix, prefixlen); if (pos < 0) pos = -pos-1; - memmove(active_cache, active_cache + pos, - (active_nr - pos) * sizeof(struct cache_entry *)); - active_nr -= pos; - first = 0; + first = pos; last = active_nr; while (last > first) { int next = (last + first) >> 1; @@ -393,7 +390,9 @@ static void prune_cache(const char *prefix, size_t prefixlen) } last = next; } - active_nr = last; + memmove(active_cache, active_cache + pos, + (last - pos) * sizeof(struct cache_entry *)); + active_nr = last - pos; } /* -- 2.11.1