[RFC PATCH 0/2] fallback to interpreter if JIT fails with pcre

2018-12-09 Thread Carlo Marcelo Arenas Belón
while testing in NetBSD 8, was surprised to find that most test cases
using PCRE2 where failing with some cryptic error from git :

  fatal: Couldn't JIT the PCRE2 pattern '$PATTERN', got '-48'

interestingly enough, using a JIT enabled PCRE1 library (not the default)
will show a similar error but a different error code.

the underlying problem is the same though; NetBSD includes PAX support
which restricts the use of memory that is both writeable and executable
and that prevents the JIT to create a compiled expression to jump into,
and while the "fix" for NetBSD is simple it would seem the user experience
could be improved if instead of aborting, git will instead return the matches
using the slower interpreter (which seem to be also the recomendation from
the library developers)

it is important to note that the problem is not unique to NetBSD and had
reproduced it in OpenBSD where working around the issue is more complicated
as WˆX exceptions require a filesystem mount option, and I can see it being
problematic with linux (as shown by the open bug[1] and the development of
an alternative allocator to workaround the issue with seLinux) and with
macOS (specially versions older than 10.14) where there are restrictions to
the number of maps allowed with those flags.

I am also curious if expanding NO_LIBPCRE1_JIT as an option to disable JIT
with PCRE2 (with a different name) might be worth pursuing? as well as some
ways to narrow the failures that will trigger the fallback, but the later
is likely to need library changes which might not be possible with the old
version anyway.

[1] https://bugs.exim.org/show_bug.cgi?id=1749

Carlo Marcelo Arenas Belón (2):
  grep: fallback to interpreter if JIT fails with pcre1
  grep: fallback to interpreter if JIT fails with pcre2

 Makefile | 12 ++--
 grep.c   | 13 +++--
 2 files changed, 17 insertions(+), 8 deletions(-)

-- 
2.20.0



[RFC PATCH 2/2] grep: fallback to interpreter if JIT fails with pcre2

2018-12-09 Thread Carlo Marcelo Arenas Belón
starting with 10.23, and as a side effect of the work for bug1749[1] (grep
-P crash with seLinux), pcre2grep was modified to ignore any errors from
pcre2_jit_compile so the interpreter could be used as a fallback

[1] https://bugs.exim.org/show_bug.cgi?id=1749

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 grep.c | 7 +--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/grep.c b/grep.c
index 5ccc0421a1..c751c8cc74 100644
--- a/grep.c
+++ b/grep.c
@@ -530,8 +530,11 @@ static void compile_pcre2_pattern(struct grep_pat *p, 
const struct grep_opt *opt
pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
if (p->pcre2_jit_on == 1) {
jitret = pcre2_jit_compile(p->pcre2_pattern, 
PCRE2_JIT_COMPLETE);
-   if (jitret)
-   die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", 
p->pattern, jitret);
+   if (jitret) {
+   /* JIT failed so fallback to the interpreter */
+   p->pcre2_jit_on = 0;
+   return;
+   }
 
/*
 * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
-- 
2.20.0



[RFC PATCH 1/2] grep: fallback to interpreter if JIT fails with pcre1

2018-12-09 Thread Carlo Marcelo Arenas Belón
JIT support was added to 8.20 but the interface we rely on is only
enabled after 8.32 so try to make the message clearer.

in systems where there are restrictions against creating executable
pages programatically (like OpenBSD, NetBSD, macOS or seLinux) JIT
will fail, resulting in a error message to the user.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 Makefile | 12 ++--
 grep.c   |  6 ++
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/Makefile b/Makefile
index 1a44c811aa..62b0cb6ee6 100644
--- a/Makefile
+++ b/Makefile
@@ -32,14 +32,14 @@ all::
 # USE_LIBPCRE is a synonym for USE_LIBPCRE2, define USE_LIBPCRE1
 # instead if you'd like to use the legacy version 1 of the PCRE
 # library. Support for version 1 will likely be removed in some future
-# release of Git, as upstream has all but abandoned it.
+# release of Git, as upstream is focusing all development for new
+# features in the newer version instead.
 #
 # When using USE_LIBPCRE1, define NO_LIBPCRE1_JIT if the PCRE v1
-# library is compiled without --enable-jit. We will auto-detect
-# whether the version of the PCRE v1 library in use has JIT support at
-# all, but we unfortunately can't auto-detect whether JIT support
-# hasn't been compiled in in an otherwise JIT-supporting version. If
-# you have link-time errors about a missing `pcre_jit_exec` define
+# library is newer than 8.32 but compiled without --enable-jit or
+# you want to disable JIT
+#
+# If you have link-time errors about a missing `pcre_jit_exec` define
 # this, or recompile PCRE v1 with --enable-jit.
 #
 # Define LIBPCREDIR=/foo/bar if your PCRE header and library files are
diff --git a/grep.c b/grep.c
index 4db1510d16..5ccc0421a1 100644
--- a/grep.c
+++ b/grep.c
@@ -405,6 +405,12 @@ static void compile_pcre1_regexp(struct grep_pat *p, const 
struct grep_opt *opt)
die("%s", error);
 
 #ifdef GIT_PCRE1_USE_JIT
+   if (p->pcre1_extra_info &&
+   !(p->pcre1_extra_info->flags & PCRE_EXTRA_EXECUTABLE_JIT)) {
+   /* JIT failed so fallback to the interpreter */
+   p->pcre1_jit_on = 0;
+   return;
+   }
pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
if (p->pcre1_jit_on == 1) {
p->pcre1_jit_stack = pcre_jit_stack_alloc(1, 1024 * 1024);
-- 
2.20.0



[PATCH] http-push: workaround for format overflow warning in gcc >= 9

2019-05-09 Thread Carlo Marcelo Arenas Belón
In function 'finish_request',
inlined from 'process_response' at http-push.c:248:2:
http-push.c:587:4: warning: '%s' directive argument is null [-Wformat-overflow=]
  587 |fprintf(stderr, "Unable to get pack file %s\n%s",
  |^
  588 | request->url, curl_errorstr);
  | 
---
 http-push.c | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/http-push.c b/http-push.c
index f675a96316..2db553ef38 100644
--- a/http-push.c
+++ b/http-push.c
@@ -585,7 +585,8 @@ static void finish_request(struct transfer_request *request)
int fail = 1;
if (request->curl_result != CURLE_OK) {
fprintf(stderr, "Unable to get pack file %s\n%s",
-   request->url, curl_errorstr);
+   request->url ? request->url : "",
+   curl_errorstr);
} else {
preq = (struct http_pack_request *)request->userData;
 
-- 
2.21.0



[PATCH v2] http-push: prevent format overflow warning with gcc >= 9

2019-05-14 Thread Carlo Marcelo Arenas Belón
In function 'finish_request',
inlined from 'process_response' at http-push.c:248:2:
http-push.c:587:4: warning: '%s' directive argument is null [-Wformat-overflow=]
  587 |fprintf(stderr, "Unable to get pack file %s\n%s",
  |^
  588 | request->url, curl_errorstr);
  | 

request->url is needed for the error message if there was a failure
during fetch but was being cleared unnecessarily earlier.

note that the leak is prevented by calling release_request unconditionally
at the end.

Signed-off-by: Carlo Marcelo Arenas Belón 
Suggested-by: Eric Sunshine 
---
 http-push.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/http-push.c b/http-push.c
index f675a96316..e36561a6db 100644
--- a/http-push.c
+++ b/http-push.c
@@ -526,8 +526,8 @@ static void finish_request(struct transfer_request *request)
if (request->headers != NULL)
curl_slist_free_all(request->headers);
 
-   /* URL is reused for MOVE after PUT */
-   if (request->state != RUN_PUT) {
+   /* URL is reused for MOVE after PUT and used during FETCH */
+   if (request->state != RUN_PUT && request->state != RUN_FETCH_PACKED) {
FREE_AND_NULL(request->url);
}
 
-- 
2.21.0



[PATCH] fsmonitor: avoid signed integer overflow / infinite loop

2019-06-15 Thread Carlo Marcelo Arenas Belón
883e248b8a ("fsmonitor: teach git to optionally utilize a file system
monitor to speed up detecting new or changed files.", 2017-09-22) uses
an int in a loop that would wrap if index_state->cache_nr (unsigned)
is bigger than INT_MAX

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 fsmonitor.c | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/fsmonitor.c b/fsmonitor.c
index 1dee0aded1..231e83a94d 100644
--- a/fsmonitor.c
+++ b/fsmonitor.c
@@ -56,7 +56,7 @@ int read_fsmonitor_extension(struct index_state *istate, 
const void *data,
 
 void fill_fsmonitor_bitmap(struct index_state *istate)
 {
-   int i;
+   unsigned int i;
istate->fsmonitor_dirty = ewah_new();
for (i = 0; i < istate->cache_nr; i++)
if (!(istate->cache[i]->ce_flags & CE_FSMONITOR_VALID))
@@ -134,7 +134,7 @@ void refresh_fsmonitor(struct index_state *istate)
size_t bol; /* beginning of line */
uint64_t last_update;
char *buf;
-   int i;
+   unsigned int i;
 
if (!core_fsmonitor || istate->fsmonitor_has_run_once)
return;
@@ -192,7 +192,7 @@ void refresh_fsmonitor(struct index_state *istate)
 
 void add_fsmonitor(struct index_state *istate)
 {
-   int i;
+   unsigned int i;
 
if (!istate->fsmonitor_last_update) {
trace_printf_key(&trace_fsmonitor, "add fsmonitor");
@@ -225,7 +225,7 @@ void remove_fsmonitor(struct index_state *istate)
 
 void tweak_fsmonitor(struct index_state *istate)
 {
-   int i;
+   unsigned int i;
int fsmonitor_enabled = git_config_get_fsmonitor();
 
if (istate->fsmonitor_dirty) {
-- 
2.22.0



[PATCH] wrapper: avoid UB in macOS

2019-06-16 Thread Carlo Marcelo Arenas Belón
0620b39b3b ("compat: add a mkstemps() compatibility function", 2009-05-31)
included a function based on code from libiberty which would result in
undefined behaviour in platforms where timeval's tv_usec is a 32-bit signed
type as shown by:

wrapper.c:505:31: runtime error: left shift of 594546 by 16 places cannot be 
represented in type '__darwin_suseconds_t' (aka 'int')

interestingly the version of this code from gcc never had this bug and the
code had a cast that would had prevented the issue (at least in 64-bit
platforms) but was misapplied.

change the cast to uint64_t so it also works in 32-bit platforms.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 wrapper.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/wrapper.c b/wrapper.c
index ea3cf64d4c..1e45ab7b92 100644
--- a/wrapper.c
+++ b/wrapper.c
@@ -502,7 +502,7 @@ int git_mkstemps_mode(char *pattern, int suffix_len, int 
mode)
 * Try TMP_MAX different filenames.
 */
gettimeofday(&tv, NULL);
-   value = ((size_t)(tv.tv_usec << 16)) ^ tv.tv_sec ^ getpid();
+   value = ((uint64_t)tv.tv_usec << 16) ^ tv.tv_sec ^ getpid();
filename_template = &pattern[len - 6 - suffix_len];
for (count = 0; count < TMP_MAX; ++count) {
uint64_t v = value;
-- 
2.22.0



[PATCH] xdiff: avoid accidental redefinition of LFS feature in OpenIndiana

2019-06-17 Thread Carlo Marcelo Arenas Belón
after b46054b374 ("xdiff: use git-compat-util", 2019-04-11), two system
headers added in 2012 to xutils where no longer needed and could conflict
as shown below:

In file included from xdiff/xinclude.h:26:0,
 from xdiff/xutils.c:25:
./git-compat-util.h:4:0: warning: "_FILE_OFFSET_BITS" redefined
 #define _FILE_OFFSET_BITS 64

In file included from /usr/include/limits.h:37:0,
 from xdiff/xutils.c:23:
/usr/include/sys/feature_tests.h:231:0: note: this is the location of the 
previous definition
 #define _FILE_OFFSET_BITS 32

make sure git-compat-util.h is the first header (through xinclude.h)

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 xdiff/xutils.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 963e1c58b9..cfa6e2220f 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -20,13 +20,9 @@
  *
  */
 
-#include 
-#include 
 #include "xinclude.h"
 
 
-
-
 long xdl_bogosqrt(long n) {
long i;
 
-- 
2.22.0



[PATCH] trace2: correct typo in technical documentation

2019-06-26 Thread Carlo Marcelo Arenas Belón
an apparent typo for the environment variable was included with 81567caf87
("trace2: update docs to describe system/global config settings",
2019-04-15), and was missed when renamed variables by e4b75d6a1d
("trace2: rename environment variables to GIT_TRACE2*", 2019-05-19)

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 Documentation/technical/api-trace2.txt | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/Documentation/technical/api-trace2.txt 
b/Documentation/technical/api-trace2.txt
index 23c3cc7a37..f7ffe7d599 100644
--- a/Documentation/technical/api-trace2.txt
+++ b/Documentation/technical/api-trace2.txt
@@ -35,7 +35,7 @@ Format details are given in a later section.
 === The Normal Format Target
 
 The normal format target is a tradition printf format and similar
-to GIT_TRACE format.  This format is enabled with the `GIT_TR`
+to GIT_TRACE format.  This format is enabled with the `GIT_TRACE2`
 environment variable or the `trace2.normalTarget` system or global
 config setting.
 
-- 
2.22.0



[PATCH] grep: skip UTF8 checks explicitally

2019-07-21 Thread Carlo Marcelo Arenas Belón
Usually PCRE is compiled with JIT support, and therefore the code
path used includes calling pcre2_jit_match (for PCRE2), that ignores
invalid UTF-8 in the corpus.

Make that option explicit so it can be also used when JIT is not
enabled and pcre2_match is called instead, preventing `git grep`
to abort when hitting the first binary blob in a fixed match
after ed0479ce3d ("Merge branch 'ab/no-kwset' into next", 2019-07-15)

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 grep.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/grep.c b/grep.c
index fc0ed73ef3..146093f590 100644
--- a/grep.c
+++ b/grep.c
@@ -409,7 +409,7 @@ static void compile_pcre1_regexp(struct grep_pat *p, const 
struct grep_opt *opt)
 static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
regmatch_t *match, int eflags)
 {
-   int ovector[30], ret, flags = 0;
+   int ovector[30], ret, flags = PCRE_NO_UTF8_CHECK;
 
if (eflags & REG_NOTBOL)
flags |= PCRE_NOTBOL;
@@ -554,7 +554,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const 
struct grep_opt *opt
 static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
regmatch_t *match, int eflags)
 {
-   int ret, flags = 0;
+   int ret, flags = PCRE2_NO_UTF_CHECK;
PCRE2_SIZE *ovector;
PCRE2_UCHAR errbuf[256];
 
-- 
2.22.0



[PATCH] grep: use custom JIT stack with pcre2

2019-07-21 Thread Carlo Marcelo Arenas Belón
94da9193a6 ("grep: add support for PCRE v2", 2017-06-01) allocate
a stack and assign it to a match context, but never pass it to
pcre2_jit_match, using instead the default.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
This might have positive performance consequences (per the comments)
but haven't tested them; if there is no difference might be better
instead to remove the stack and match_context and save the related
memory, since it seems the default was working fine anyway.

 grep.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/grep.c b/grep.c
index 146093f590..ff76907ceb 100644
--- a/grep.c
+++ b/grep.c
@@ -564,7 +564,7 @@ static int pcre2match(struct grep_pat *p, const char *line, 
const char *eol,
if (p->pcre2_jit_on)
ret = pcre2_jit_match(p->pcre2_pattern, (unsigned char *)line,
  eol - line, 0, flags, p->pcre2_match_data,
- NULL);
+ p->pcre2_match_context);
else
ret = pcre2_match(p->pcre2_pattern, (unsigned char *)line,
  eol - line, 0, flags, p->pcre2_match_data,
-- 
2.22.0



[PATCH] grep: skip UTF8 checks explicitly

2019-07-22 Thread Carlo Marcelo Arenas Belón
Usually PCRE is compiled with JIT support, and therefore the code
path used includes calling pcre2_jit_match (for PCRE2), that ignores
invalid UTF-8 in the corpus.

Make that option explicit so it can be also used when JIT is not
enabled and pcre2_match is called instead, preventing `git grep`
to abort when hitting the first binary blob in a fixed match
after ed0479ce3d ("Merge branch 'ab/no-kwset' into next", 2019-07-15)

Reviewed-by: Johannes Schindelin 
Signed-off-by: Carlo Marcelo Arenas Belón 
---
V2: spelling fixes from Eric Sunshine

 grep.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/grep.c b/grep.c
index fc0ed73ef3..146093f590 100644
--- a/grep.c
+++ b/grep.c
@@ -409,7 +409,7 @@ static void compile_pcre1_regexp(struct grep_pat *p, const 
struct grep_opt *opt)
 static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
regmatch_t *match, int eflags)
 {
-   int ovector[30], ret, flags = 0;
+   int ovector[30], ret, flags = PCRE_NO_UTF8_CHECK;
 
if (eflags & REG_NOTBOL)
flags |= PCRE_NOTBOL;
@@ -554,7 +554,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const 
struct grep_opt *opt
 static int pcre2match(struct grep_pat *p, const char *line, const char *eol,
regmatch_t *match, int eflags)
 {
-   int ret, flags = 0;
+   int ret, flags = PCRE2_NO_UTF_CHECK;
PCRE2_SIZE *ovector;
PCRE2_UCHAR errbuf[256];
 
-- 
2.22.0



[RFC PATCH 0/2] PCRE1 cleanup

2019-07-26 Thread Carlo Marcelo Arenas Belón
Sent as an RFC since it was meant to be applied against ab/pcre-jit-fixes
but that is likely to change with the reroll of that branch.

 [PATCH 1/2] grep: make sure NO_LIBPCRE1_JIT disable JIT in PCRE1
 [PATCH 2/2] grep: refactor and simplify PCRE1 support

The end result could be squashed together before merging but sent as
independentt changes to make review easier, with the first change doing
the minimum required to make PCRE1 great again.

 Makefile |  9 ++---
 grep.c   | 15 +--
 grep.h   | 11 ---
 3 files changed, 11 insertions(+), 24 deletions(-)

base-commit: 0f8c4ddfdddb72dc62d76864f5d3d31f136c7129
-- 
2.22.0


[RFC PATCH 2/2] grep: refactor and simplify PCRE1 support

2019-07-26 Thread Carlo Marcelo Arenas Belón
The code used both a macro and a variable to keep track if JIT
support was desired and relied on the fact that a non JIT
enabled library will ignore a request for JIT compilation
(as defined by the second parameter of the call to pcre_study)

Cleanup the multiple levels of macros used and call pcre_study
with the right parameter after JIT support has been confirmed
and unless it was requested to be disabled with NO_LIBPCRE1_JIT

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 grep.c | 15 +--
 grep.h |  9 -
 2 files changed, 9 insertions(+), 15 deletions(-)

diff --git a/grep.c b/grep.c
index 6b52fed53a..599765c5c1 100644
--- a/grep.c
+++ b/grep.c
@@ -386,6 +386,7 @@ static void compile_pcre1_regexp(struct grep_pat *p, const 
struct grep_opt *opt)
const char *error;
int erroffset;
int options = PCRE_MULTILINE;
+   int study_options = 0;
 
if (opt->ignore_case) {
if (has_non_ascii(p->pattern))
@@ -400,13 +401,15 @@ static void compile_pcre1_regexp(struct grep_pat *p, 
const struct grep_opt *opt)
if (!p->pcre1_regexp)
compile_regexp_failed(p, error);
 
-   p->pcre1_extra_info = pcre_study(p->pcre1_regexp, 
GIT_PCRE_STUDY_JIT_COMPILE, &error);
-   if (!p->pcre1_extra_info && error)
-   die("%s", error);
-
-#ifdef GIT_PCRE1_USE_JIT
+#if defined(PCRE_CONFIG_JIT) && !defined(NO_LIBPCRE1_JIT)
pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
+   if (p->pcre1_jit_on)
+   study_options = PCRE_STUDY_JIT_COMPILE;
 #endif
+
+   p->pcre1_extra_info = pcre_study(p->pcre1_regexp, study_options, 
&error);
+   if (!p->pcre1_extra_info && error)
+   die("%s", error);
 }
 
 static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
@@ -435,7 +438,7 @@ static int pcre1match(struct grep_pat *p, const char *line, 
const char *eol,
 static void free_pcre1_regexp(struct grep_pat *p)
 {
pcre_free(p->pcre1_regexp);
-#ifdef GIT_PCRE1_USE_JIT
+#ifdef PCRE_CONFIG_JIT
if (p->pcre1_jit_on)
pcre_free_study(p->pcre1_extra_info);
else
diff --git a/grep.h b/grep.h
index 2a74e28d94..30f2503121 100644
--- a/grep.h
+++ b/grep.h
@@ -3,15 +3,6 @@
 #include "color.h"
 #ifdef USE_LIBPCRE1
 #include 
-#ifndef NO_LIBPCRE1_JIT
-#ifdef PCRE_CONFIG_JIT
-#define GIT_PCRE1_USE_JIT
-#define GIT_PCRE_STUDY_JIT_COMPILE PCRE_STUDY_JIT_COMPILE
-#endif
-#endif
-#ifndef GIT_PCRE_STUDY_JIT_COMPILE
-#define GIT_PCRE_STUDY_JIT_COMPILE 0
-#endif
 #else
 typedef int pcre;
 typedef int pcre_extra;
-- 
2.22.0


[RFC PATCH 1/2] grep: make sure NO_LIBPCRE1_JIT disable JIT in PCRE1

2019-07-26 Thread Carlo Marcelo Arenas Belón
e87de7cab4 ("grep: un-break building with PCRE < 8.32", 2017-05-25)
added a restriction for JIT support that is no longer needed after
pcre_jit_exec() calls were removed.

Reorganize the definitions in grep.h so that JIT support could be
detected early and NO_LIBPCRE1_JIT could be used reliably to enforce
JIT doesn't get used.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 Makefile | 9 ++---
 grep.h   | 4 +---
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/Makefile b/Makefile
index 11ccea4071..7e0e6cc129 100644
--- a/Makefile
+++ b/Makefile
@@ -34,13 +34,8 @@ all::
 # library. Support for version 1 will likely be removed in some future
 # release of Git, as upstream has all but abandoned it.
 #
-# When using USE_LIBPCRE1, define NO_LIBPCRE1_JIT if the PCRE v1
-# library is compiled without --enable-jit. We will auto-detect
-# whether the version of the PCRE v1 library in use has JIT support at
-# all, but we unfortunately can't auto-detect whether JIT support
-# hasn't been compiled in in an otherwise JIT-supporting version. If
-# you have link-time errors about a missing `pcre_jit_exec` define
-# this, or recompile PCRE v1 with --enable-jit.
+# When using USE_LIBPCRE1, define NO_LIBPCRE1_JIT if you want to
+# disable JIT even if supported by your library.
 #
 # Define LIBPCREDIR=/foo/bar if your PCRE header and library files are
 # in /foo/bar/include and /foo/bar/lib directories. Which version of
diff --git a/grep.h b/grep.h
index a405fc870c..2a74e28d94 100644
--- a/grep.h
+++ b/grep.h
@@ -3,14 +3,12 @@
 #include "color.h"
 #ifdef USE_LIBPCRE1
 #include 
-#ifdef PCRE_CONFIG_JIT
-#if PCRE_MAJOR >= 8 && PCRE_MINOR >= 32
 #ifndef NO_LIBPCRE1_JIT
+#ifdef PCRE_CONFIG_JIT
 #define GIT_PCRE1_USE_JIT
 #define GIT_PCRE_STUDY_JIT_COMPILE PCRE_STUDY_JIT_COMPILE
 #endif
 #endif
-#endif
 #ifndef GIT_PCRE_STUDY_JIT_COMPILE
 #define GIT_PCRE_STUDY_JIT_COMPILE 0
 #endif
-- 
2.22.0


[PATCH 1/3] grep: make pcre1_tables version agnostic

2019-07-27 Thread Carlo Marcelo Arenas Belón
6d4b5747f0 ("grep: change internal *pcre* variable & function names
to be *pcre1*", 2017-05-25), renamed most variables to be PCRE1
specific to give space to similarly named variables for PCRE2, but
in this case the change wasn't needed as the types were compatible
enough (const unsigned char* vs const uint8_t*) to be shared.

Revert that change, as 94da9193a6 ("grep: add support for PCRE v2",
2017-06-01) failed to create an equivalent PCRE2 version.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 grep.c | 6 +++---
 grep.h | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/grep.c b/grep.c
index f7c3a5803e..cc65f7a987 100644
--- a/grep.c
+++ b/grep.c
@@ -389,14 +389,14 @@ static void compile_pcre1_regexp(struct grep_pat *p, 
const struct grep_opt *opt)
 
if (opt->ignore_case) {
if (has_non_ascii(p->pattern))
-   p->pcre1_tables = pcre_maketables();
+   p->pcre_tables = pcre_maketables();
options |= PCRE_CASELESS;
}
if (is_utf8_locale() && has_non_ascii(p->pattern))
options |= PCRE_UTF8;
 
p->pcre1_regexp = pcre_compile(p->pattern, options, &error, &erroffset,
- p->pcre1_tables);
+ p->pcre_tables);
if (!p->pcre1_regexp)
compile_regexp_failed(p, error);
 
@@ -462,7 +462,7 @@ static void free_pcre1_regexp(struct grep_pat *p)
{
pcre_free(p->pcre1_extra_info);
}
-   pcre_free((void *)p->pcre1_tables);
+   pcre_free((void *)p->pcre_tables);
 }
 #else /* !USE_LIBPCRE1 */
 static void compile_pcre1_regexp(struct grep_pat *p, const struct grep_opt 
*opt)
diff --git a/grep.h b/grep.h
index 1875880f37..d34f66b384 100644
--- a/grep.h
+++ b/grep.h
@@ -89,7 +89,7 @@ struct grep_pat {
pcre *pcre1_regexp;
pcre_extra *pcre1_extra_info;
pcre_jit_stack *pcre1_jit_stack;
-   const unsigned char *pcre1_tables;
+   const unsigned char *pcre_tables;
int pcre1_jit_on;
pcre2_code *pcre2_pattern;
pcre2_match_data *pcre2_match_data;
-- 
2.22.0


[PATCH 2/3] grep: use pcre_tables also for PCRE2

2019-07-27 Thread Carlo Marcelo Arenas Belón
94da9193a6 ("grep: add support for PCRE v2", 2017-06-01) added a
local variable to keep track of the chartables (used when locale
is not UTF-8 but non-ASCII characters are needed)

Remove that local variable in favor of the shared one within the
grep structure and that can be shared by all functions.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 grep.c | 5 ++---
 1 file changed, 2 insertions(+), 3 deletions(-)

diff --git a/grep.c b/grep.c
index cc65f7a987..d04635fad4 100644
--- a/grep.c
+++ b/grep.c
@@ -488,7 +488,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const 
struct grep_opt *opt
PCRE2_UCHAR errbuf[256];
PCRE2_SIZE erroffset;
int options = PCRE2_MULTILINE;
-   const uint8_t *character_tables = NULL;
int jitret;
int patinforet;
size_t jitsizearg;
@@ -499,9 +498,9 @@ static void compile_pcre2_pattern(struct grep_pat *p, const 
struct grep_opt *opt
 
if (opt->ignore_case) {
if (has_non_ascii(p->pattern)) {
-   character_tables = pcre2_maketables(NULL);
+   p->pcre_tables = pcre2_maketables(NULL);
p->pcre2_compile_context = 
pcre2_compile_context_create(NULL);
-   pcre2_set_character_tables(p->pcre2_compile_context, 
character_tables);
+   pcre2_set_character_tables(p->pcre2_compile_context, 
p->pcre_tables);
}
options |= PCRE2_CASELESS;
}
-- 
2.22.0


[PATCH 3/3] grep: plug leak of pcre chartables in PCRE2

2019-07-27 Thread Carlo Marcelo Arenas Belón
Just as it is done with PCRE1, make sure that the allocated chartables
get free at cleanup time.

This assumes no global context is used (NULL passed when created the
tables), but will likely be updated in tandem if that ever changes.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 grep.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/grep.c b/grep.c
index d04635fad4..d9768c5f05 100644
--- a/grep.c
+++ b/grep.c
@@ -604,6 +604,7 @@ static void free_pcre2_pattern(struct grep_pat *p)
pcre2_match_data_free(p->pcre2_match_data);
pcre2_jit_stack_free(p->pcre2_jit_stack);
pcre2_match_context_free(p->pcre2_match_context);
+   free((void *)p->pcre_tables);
 }
 #else /* !USE_LIBPCRE2 */
 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt 
*opt)
-- 
2.22.0


[PATCH 0/3] grep: memory leak in PCRE2

2019-07-27 Thread Carlo Marcelo Arenas Belón
This series fixes a small memory leak that was introduced when PCRE2
support was added, and that should be visible with t7813 and valgrind

Applies cleanly to maint and master but will conflict slightly with
ab/no-kwset (currently in next) and most likely other changes introduced
about this same codebase (ex: ab/pcre-jit-fixes) but hopefully the
spreading on short simple commits helps with reviewing.

Carlo Marcelo Arenas Belón (3):
  grep: make pcre1_tables version agnostic
  grep: use pcre_tables also for PCRE2
  grep: plug leak of pcre chartables in PCRE2

 grep.c | 12 ++--
 grep.h |  2 +-
 2 files changed, 7 insertions(+), 7 deletions(-)

-- 
2.22.0


[PATCH v2 2/5] wt-status.h: drop stdio.h include

2019-07-28 Thread Carlo Marcelo Arenas Belón
From: Jeff King 

We started including stdio.h to pick up the declaration of "FILE" in
f26a001226 (Enable wt-status output to a given FILE pointer.,
2007-09-17). But there's no need, since headers can assume that
git-compat-util.h has been included, which covers stdio.

This should just be redundant, and not hurting anything (like pulling in
includes out of order) because C files are supposed to always include
git-compat-util.h first. But it's worth cleaning up to model good
behavior.

Signed-off-by: Jeff King 
Signed-off-by: Junio C Hamano 
---
 wt-status.h | 1 -
 1 file changed, 1 deletion(-)

diff --git a/wt-status.h b/wt-status.h
index 64f1ddc9fd..8849768e92 100644
--- a/wt-status.h
+++ b/wt-status.h
@@ -1,7 +1,6 @@
 #ifndef STATUS_H
 #define STATUS_H
 
-#include 
 #include "string-list.h"
 #include "color.h"
 #include "pathspec.h"
-- 
2.22.0



[PATCH v2 1/5] verify-tag: drop signal.h include

2019-07-28 Thread Carlo Marcelo Arenas Belón
From: Jeff King 

There's no reason verify-tag.c needs to include signal.h. It's already
in git-compat-util.h, which we properly include as the first header.
And there doesn't seem to be a particular reason for this include; it's
just an artifact from the file creation in 2ae68fcb78 (Make verify-tag a
builtin., 2007-07-27).

Likewise verify-commit.c has the same issue, probably because it was
created using verify-tag as a template in d07b00b7f3 (verify-commit:
scriptable commit signature verification, 2014-06-23).

These includes are probably just redundant, and not hurting anything by
circumventing the order that git-compat-util.h tries to impose, since
we'll always have loaded git-compat-util by the time we get to these. So
this is just a cleanup, and shouldn't fix or break any platforms.

Signed-off-by: Jeff King 
Signed-off-by: Junio C Hamano 
---
 builtin/verify-commit.c | 1 -
 builtin/verify-tag.c| 1 -
 2 files changed, 2 deletions(-)

diff --git a/builtin/verify-commit.c b/builtin/verify-commit.c
index 7772c07ed7..4e93914e59 100644
--- a/builtin/verify-commit.c
+++ b/builtin/verify-commit.c
@@ -12,7 +12,6 @@
 #include "repository.h"
 #include "commit.h"
 #include "run-command.h"
-#include 
 #include "parse-options.h"
 #include "gpg-interface.h"
 
diff --git a/builtin/verify-tag.c b/builtin/verify-tag.c
index 6fa04b751a..f45136a06b 100644
--- a/builtin/verify-tag.c
+++ b/builtin/verify-tag.c
@@ -10,7 +10,6 @@
 #include "builtin.h"
 #include "tag.h"
 #include "run-command.h"
-#include 
 #include "parse-options.h"
 #include "gpg-interface.h"
 #include "ref-filter.h"
-- 
2.22.0



[PATCH v2 4/5] xdiff: remove duplicate headers from xhistogram.c

2019-07-28 Thread Carlo Marcelo Arenas Belón
8c912eea94 ("teach --histogram to diff", 2011-07-12) included them, but
were already part of xinclude.h

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 xdiff/xhistogram.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index ec85f5992b..c7b35a9667 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -42,8 +42,6 @@
  */
 
 #include "xinclude.h"
-#include "xtypes.h"
-#include "xdiff.h"
 
 #define MAX_PTRUINT_MAX
 #define MAX_CNTUINT_MAX
-- 
2.22.0



[PATCH v2 5/5] xdiff: remove duplicate headers from xpatience.c

2019-07-28 Thread Carlo Marcelo Arenas Belón
92b7de93fb (Implement the patience diff algorithm, 2009-01-07) added them
but were already part of xinclude.h

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 xdiff/xpatience.c | 2 --
 1 file changed, 2 deletions(-)

diff --git a/xdiff/xpatience.c b/xdiff/xpatience.c
index f3573d9f00..3c5601b602 100644
--- a/xdiff/xpatience.c
+++ b/xdiff/xpatience.c
@@ -20,8 +20,6 @@
  *
  */
 #include "xinclude.h"
-#include "xtypes.h"
-#include "xdiff.h"
 
 /*
  * The basic idea of patience diff is to find lines that are unique in
-- 
2.22.0



[PATCH v2 0/5] system header cleanup

2019-07-28 Thread Carlo Marcelo Arenas Belón
This series is a reroll of jk/no-system-includes-in-dot-c with
cb/xdiff-no-system-includes-in-dot-c applied on top with minor
fixes for the commit message based on feedback from Johannes
and the example put forward by Peff with his own patches.

The changes proposed shouldn't affect any systems (except for
the 3rd one) and that has since shown to also be needed
in Alpine Linux (because of _XOPEN_SOURCE redefinition).

The last 2 patches are new to the series and just cleanup
the dependency list in xdiff.

Carlo Marcelo Arenas Belón (3):
  xdiff: drop system includes in xutils.c
  xdiff: remove duplicate headers from xhistogram.c
  xdiff: remove duplicate headers from xpatience.c

Jeff King (2):
  verify-tag: drop signal.h include
  wt-status.h: drop stdio.h include

 builtin/verify-commit.c | 1 -
 builtin/verify-tag.c| 1 -
 wt-status.h | 1 -
 xdiff/xhistogram.c  | 2 --
 xdiff/xpatience.c   | 2 --
 xdiff/xutils.c  | 4 
 6 files changed, 11 deletions(-)

-- 
2.22.0


[PATCH v2 3/5] xdiff: drop system includes in xutils.c

2019-07-28 Thread Carlo Marcelo Arenas Belón
After b46054b374 ("xdiff: use git-compat-util", 2019-04-11), two system
headers added with 6942efcfa9 ("xdiff: load full words in the inner loop
of xdl_hash_record", 2012-04-06) to xutils.c are no longer needed and
could conflict as shown below from an OpenIndiana build:

In file included from xdiff/xinclude.h:26:0,
 from xdiff/xutils.c:25:
./git-compat-util.h:4:0: warning: "_FILE_OFFSET_BITS" redefined
 #define _FILE_OFFSET_BITS 64

In file included from /usr/include/limits.h:37:0,
 from xdiff/xutils.c:23:
/usr/include/sys/feature_tests.h:231:0: note: this is the location of the 
previous definition
 #define _FILE_OFFSET_BITS 32

Make sure git-compat-util.h is the first header (through xinclude.h)

Signed-off-by: Carlo Marcelo Arenas Belón 
Signed-off-by: Junio C Hamano 
---
V2: reword commit with feedback from Johannes

 xdiff/xutils.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/xdiff/xutils.c b/xdiff/xutils.c
index 963e1c58b9..cfa6e2220f 100644
--- a/xdiff/xutils.c
+++ b/xdiff/xutils.c
@@ -20,13 +20,9 @@
  *
  */
 
-#include 
-#include 
 #include "xinclude.h"
 
 
-
-
 long xdl_bogosqrt(long n) {
long i;
 
-- 
2.22.0



[RFC PATCH] grep: allow for run time disabling of JIT in PCRE

2019-07-28 Thread Carlo Marcelo Arenas Belón
PCRE1 allowed for a compile time flag to disable JIT, but PCRE2 never
had one, forcing the use of JIT if -P was requested.

After ed0479ce3d (Merge branch 'ab/no-kwset' into next, 2019-07-15)
the PCRE2 engine will be used more broadly and therefore adding this
knob will give users a fallback for situations like the one observed
in OpenBSD with a JIT enabled PCRE2, because of W^X restrictions:

  $ git grep 'foo bar'
  fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
  $ git grep -G 'foo bar'
  fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
  $ git grep -E 'foo bar'
  fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'
  $ git grep -F 'foo bar'
  fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 Documentation/git-grep.txt |  4 
 grep.c | 15 +--
 grep.h |  1 +
 3 files changed, 18 insertions(+), 2 deletions(-)

diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index c89fb569e3..ff544bdeec 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -69,6 +69,10 @@ grep.fallbackToNoIndex::
If set to true, fall back to git grep --no-index if git grep
is executed outside of a git repository.  Defaults to false.
 
+pcre.jit::
+   If set to false, disable JIT when using PCRE.  Defaults to
+   true.
+
 
 OPTIONS
 ---
diff --git a/grep.c b/grep.c
index c7c06ae08d..3524d353dd 100644
--- a/grep.c
+++ b/grep.c
@@ -56,6 +56,7 @@ void init_grep_defaults(struct repository *repo)
opt->repo = repo;
opt->relative = 1;
opt->pathname = 1;
+   opt->pcre_jit = 1;
opt->max_depth = -1;
opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
color_set(opt->colors[GREP_COLOR_CONTEXT], "");
@@ -125,6 +126,12 @@ int grep_config(const char *var, const char *value, void 
*cb)
return 0;
}
 
+   if (!strcmp(var, "pcre.jit")) {
+   int is_bool;
+   opt->pcre_jit = git_config_bool_or_int(var, value, &is_bool);
+   return 0;
+   }
+
if (!strcmp(var, "color.grep"))
opt->color = git_config_colorbool(var, value);
if (!strcmp(var, "color.grep.match")) {
@@ -163,6 +170,7 @@ void grep_init(struct grep_opt *opt, struct repository 
*repo, const char *prefix
opt->pattern_tail = &opt->pattern_list;
opt->header_tail = &opt->header_list;
 
+   opt->pcre_jit = def->pcre_jit;
opt->only_matching = def->only_matching;
opt->color = def->color;
opt->extended_regexp_option = def->extended_regexp_option;
@@ -393,7 +401,8 @@ static void compile_pcre1_regexp(struct grep_pat *p, const 
struct grep_opt *opt)
die("%s", error);
 
 #ifdef GIT_PCRE1_USE_JIT
-   pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
+   if (opt->pcre_jit)
+   pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
 #endif
 }
 
@@ -489,7 +498,9 @@ static void compile_pcre2_pattern(struct grep_pat *p, const 
struct grep_opt *opt
compile_regexp_failed(p, (const char *)&errbuf);
}
 
-   pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
+   if (opt->pcre_jit)
+   pcre2_config(PCRE2_CONFIG_JIT, &p->pcre2_jit_on);
+
if (p->pcre2_jit_on) {
jitret = pcre2_jit_compile(p->pcre2_pattern, 
PCRE2_JIT_COMPLETE);
if (jitret)
diff --git a/grep.h b/grep.h
index c0c71eb4a9..fff152e606 100644
--- a/grep.h
+++ b/grep.h
@@ -151,6 +151,7 @@ struct grep_opt {
int allow_textconv;
int extended;
int use_reflog_filter;
+   int pcre_jit;
int pcre1;
int pcre2;
int relative;
-- 
2.22.0



[RFC PATCH v2] grep: allow for run time disabling of JIT in PCRE

2019-07-29 Thread Carlo Marcelo Arenas Belón
PCRE1 allowed for a compile time flag to disable JIT, but PCRE2 never
had one, forcing the use of JIT if -P was requested.

After ed0479ce3d (Merge branch 'ab/no-kwset' into next, 2019-07-15)
the PCRE2 engine will be used more broadly and therefore adding this
knob will allow users a escape from situations where JIT might be
problematic.

JIT will be used by default but it can be disabled with the --no-pcre-jit
option in `git grep` or by setting 0/false into the pcre.jit config.

If a value of -1 is used instead then the following error is prevented by
using the interpreter when a JIT failure consistent with known security
restrictions is found at regex compilation time.

  $ git grep 'foo bar'
  fatal: Couldn't JIT the PCRE2 pattern 'foo bar', got '-48'

Signed-off-by: Carlo Marcelo Arenas Belón 
---
V2: add command line to grep as suggested by Junio

 Documentation/git-grep.txt | 11 +++
 builtin/grep.c |  4 
 grep.c | 30 ++
 grep.h |  1 +
 4 files changed, 42 insertions(+), 4 deletions(-)

diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index c89fb569e3..895c6b34ec 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -13,6 +13,7 @@ SYNOPSIS
   [-v | --invert-match] [-h|-H] [--full-name]
   [-E | --extended-regexp] [-G | --basic-regexp]
   [-P | --perl-regexp]
+  [-j | --[no]-pcre-jit]
   [-F | --fixed-strings] [-n | --line-number] [--column]
   [-l | --files-with-matches] [-L | --files-without-match]
   [(-O | --open-files-in-pager) []]
@@ -69,6 +70,12 @@ grep.fallbackToNoIndex::
If set to true, fall back to git grep --no-index if git grep
is executed outside of a git repository.  Defaults to false.
 
+pcre.jit::
+   If set to false, disable JIT when using PCRE.  Defaults to
+   true.
+   if set to -1 will try first to use JIT and fallback to the
+   interpreter instead of returning an error.
+
 
 OPTIONS
 ---
@@ -175,6 +182,10 @@ providing this option will cause it to die.
Use fixed strings for patterns (don't interpret pattern
as a regex).
 
+-j::
+--[no-]pcre-jit::
+   Diable JIT in PCRE with --no-pcre-jit.
+
 -n::
 --line-number::
Prefix the line number to matching lines.
diff --git a/builtin/grep.c b/builtin/grep.c
index 580fd38f41..b0e94875b2 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -923,6 +923,10 @@ int cmd_grep(int argc, const char **argv, const char 
*prefix)
OPT_BOOL_F(0, "ext-grep", &external_grep_allowed__ignored,
   N_("allow calling of grep(1) (ignored by this 
build)"),
   PARSE_OPT_NOCOMPLETE),
+   OPT_GROUP("PCRE"),
+   OPT_SET_INT('j', "pcre-jit", &opt.pcre_jit,
+   N_("when to use JIT with PCRE"),
+   1),
OPT_END()
};
 
diff --git a/grep.c b/grep.c
index c7c06ae08d..d58cad0257 100644
--- a/grep.c
+++ b/grep.c
@@ -56,6 +56,7 @@ void init_grep_defaults(struct repository *repo)
opt->repo = repo;
opt->relative = 1;
opt->pathname = 1;
+   opt->pcre_jit = 1;
opt->max_depth = -1;
opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
color_set(opt->colors[GREP_COLOR_CONTEXT], "");
@@ -125,6 +126,12 @@ int grep_config(const char *var, const char *value, void 
*cb)
return 0;
}
 
+   if (!strcmp(var, "pcre.jit")) {
+   int is_bool;
+   opt->pcre_jit = git_config_bool_or_int(var, value, &is_bool);
+   return 0;
+   }
+
if (!strcmp(var, "color.grep"))
opt->color = git_config_colorbool(var, value);
if (!strcmp(var, "color.grep.match")) {
@@ -163,6 +170,7 @@ void grep_init(struct grep_opt *opt, struct repository 
*repo, const char *prefix
opt->pattern_tail = &opt->pattern_list;
opt->header_tail = &opt->header_list;
 
+   opt->pcre_jit = def->pcre_jit;
opt->only_matching = def->only_matching;
opt->color = def->color;
opt->extended_regexp_option = def->extended_regexp_option;
@@ -393,7 +401,8 @@ static void compile_pcre1_regexp(struct grep_pat *p, const 
struct grep_opt *opt)
die("%s", error);
 
 #ifdef GIT_PCRE1_USE_JIT
-   pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
+   if (opt->pcre_jit)
+   pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
 #endif
 }
 
@@ -489,11 +498,24 @@ static void compile_pcre2_pattern(struct grep_pat *p, 
const struct grep_opt *opt
compile_regexp_failed(p, (c

[PATCH v2] grep: avoid leak of chartables in PCRE2

2019-08-01 Thread Carlo Marcelo Arenas Belón
94da9193a6 ("grep: add support for PCRE v2", 2017-06-01) introduced
a small memory leak visible with valgrind in t7813.

Complete the creation of a PCRE2 specific variable that was missing from
the original change and free the generated table just like it is done
for PCRE1.

The table cleanup use free as there is no global context defined when
it was created (pcre2_maketables is passed a NULL pointer) but if that
gets ever changed will need to be updated in tandem.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
V2:
* better document why free is used as suggested by René
* avoid reusing PCRE1 variable for easy of maintenance (per Ævar)

 grep.c | 7 ---
 grep.h | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/grep.c b/grep.c
index f7c3a5803e..fbd3f3757c 100644
--- a/grep.c
+++ b/grep.c
@@ -488,7 +488,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const 
struct grep_opt *opt
PCRE2_UCHAR errbuf[256];
PCRE2_SIZE erroffset;
int options = PCRE2_MULTILINE;
-   const uint8_t *character_tables = NULL;
int jitret;
int patinforet;
size_t jitsizearg;
@@ -499,9 +498,10 @@ static void compile_pcre2_pattern(struct grep_pat *p, 
const struct grep_opt *opt
 
if (opt->ignore_case) {
if (has_non_ascii(p->pattern)) {
-   character_tables = pcre2_maketables(NULL);
+   p->pcre2_tables = pcre2_maketables(NULL);
p->pcre2_compile_context = 
pcre2_compile_context_create(NULL);
-   pcre2_set_character_tables(p->pcre2_compile_context, 
character_tables);
+   pcre2_set_character_tables(p->pcre2_compile_context,
+   p->pcre2_tables);
}
options |= PCRE2_CASELESS;
}
@@ -605,6 +605,7 @@ static void free_pcre2_pattern(struct grep_pat *p)
pcre2_match_data_free(p->pcre2_match_data);
pcre2_jit_stack_free(p->pcre2_jit_stack);
pcre2_match_context_free(p->pcre2_match_context);
+   free((void *)p->pcre2_tables);
 }
 #else /* !USE_LIBPCRE2 */
 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt 
*opt)
diff --git a/grep.h b/grep.h
index 1875880f37..26d21a3433 100644
--- a/grep.h
+++ b/grep.h
@@ -96,6 +96,7 @@ struct grep_pat {
pcre2_compile_context *pcre2_compile_context;
pcre2_match_context *pcre2_match_context;
pcre2_jit_stack *pcre2_jit_stack;
+   const uint8_t *pcre2_tables;
uint32_t pcre2_jit_on;
kwset_t kws;
unsigned fixed:1;
-- 
2.22.0



[RFC PATCH v3] grep: treat PCRE2 jit compilation memory error as non fatal

2019-08-03 Thread Carlo Marcelo Arenas Belón
94da9193a6 (grep: add support for PCRE v2, 2017-06-01) uses the
JIT fast path unless JIT support has not been compiled in the
linked library.

Starting from 10.23 of PCRE2, pcre2grep ignores any errors from
pcre2_jit_cpmpile as a workaround for their bug1749[1] and we
should do too, so that the interpreter could be used as a fallback
in cases where JIT was not available because of a security policy.

To be conservative, we are restricting initially the error to the
known error that would be returned in that case (and to be documented
as such in a future release of PCRE) and printing a warning so that
corrective action could be taken.

[1] https://bugs.exim.org/show_bug.cgi?id=1749

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 grep.c | 8 +++-
 1 file changed, 7 insertions(+), 1 deletion(-)

diff --git a/grep.c b/grep.c
index f7c3a5803e..593a1cb7a0 100644
--- a/grep.c
+++ b/grep.c
@@ -525,7 +525,13 @@ static void compile_pcre2_pattern(struct grep_pat *p, 
const struct grep_opt *opt
if (p->pcre2_jit_on == 1) {
jitret = pcre2_jit_compile(p->pcre2_pattern, 
PCRE2_JIT_COMPLETE);
if (jitret)
-   die("Couldn't JIT the PCRE2 pattern '%s', got '%d'\n", 
p->pattern, jitret);
+   if (jitret == PCRE2_ERROR_NOMEMORY) {
+   warning("JIT couldn't be used in PCRE2");
+   p->pcre2_jit_on = 0;
+   return;
+   }
+   else
+   die("Couldn't JIT the PCRE2 pattern '%s', got 
'%d'\n", p->pattern, jitret);
 
/*
 * The pcre2_config(PCRE2_CONFIG_JIT, ...) call just
-- 
2.23.0.rc1



[RFC PATCH 2/2] grep: make default number of threads reflect runtime

2019-08-04 Thread Carlo Marcelo Arenas Belón
5b594f457a (Threaded grep, 2010-01-25) added a hardcoded number of
threads(8) to use in grep and 89f09dd34e (grep: add --threads=
option and grep.threads configuration, 2015-12-15) made it configurable
through a knob as a workaround for systems where that default was not
effective.

Use instead the industry standard of 2x number of CPUs (to allow for
IO wait) for the default.

Using Debian 10 amd64 in a 2 CPU VirtualBox running in macOS 10.14.6
and that might had been representative of the original author environment
shows an overall performance improvement by avoiding thread trashing:

Testorigin/maintHEAD
---
7810.1: grep worktree, fixed regex (no match)   0.52(0.43+0.49) 
0.50(0.44+0.46) -3.8%
7810.2: grep worktree, fixed regex (common) 0.94(1.20+0.50) 
0.91(1.24+0.44) -3.2%
7810.3: grep -I, fixed non binary regex (common)0.98(1.24+0.51) 
0.94(1.30+0.44) -4.1%
7810.4: grep -i, fixed caseless regex (common)  0.97(1.31+0.45) 
0.93(1.18+0.56) -4.1%
7810.5: grep --no-index, fixed regex (common)   1.02(1.28+0.50) 
0.97(1.14+0.59) -4.9%
7810.6: grep worktree, simple regex (common)0.77(0.96+0.45) 
0.73(0.88+0.48) -5.2%
7810.7: grep -I, simple non binary regex (common)   0.78(0.96+0.48) 
0.73(0.94+0.43) -6.4%
7810.8: grep -i, simple caseless regex (common) 0.87(1.11+0.48) 
0.82(1.16+0.38) -5.7%
7810.9: grep worktree, expensive regex  10.37(19.67+0.76)   
10.20(19.46+0.76) -1.6%
7810.10: grep --cached, fixed regex 4.48(4.37+0.10) 
4.63(4.54+0.09) +3.3%
7810.11: grep --cached, expensive regex 23.74(23.61+0.11)   
23.39(23.28+0.09) -1.5%

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 Documentation/git-grep.txt | 2 +-
 builtin/grep.c | 3 +--
 2 files changed, 2 insertions(+), 3 deletions(-)

diff --git a/Documentation/git-grep.txt b/Documentation/git-grep.txt
index 2d27969057..5d72e03b2e 100644
--- a/Documentation/git-grep.txt
+++ b/Documentation/git-grep.txt
@@ -60,7 +60,7 @@ grep.extendedRegexp::
 
 grep.threads::
Number of grep worker threads to use.  If unset (or set to 0),
-   8 threads are used by default (for now).
+   2 threads per core are used by default.
 
 grep.fullName::
If set to true, enable `--full-name` option by default.
diff --git a/builtin/grep.c b/builtin/grep.c
index 580fd38f41..0ed8da30f8 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -32,7 +32,6 @@ static char const * const grep_usage[] = {
 
 static int recurse_submodules;
 
-#define GREP_NUM_THREADS_DEFAULT 8
 static int num_threads;
 
 static pthread_t *threads;
@@ -1068,7 +1067,7 @@ int cmd_grep(int argc, const char **argv, const char 
*prefix)
} else if (num_threads < 0)
die(_("invalid number of threads specified (%d)"), num_threads);
else if (num_threads == 0)
-   num_threads = HAVE_THREADS ? GREP_NUM_THREADS_DEFAULT : 1;
+   num_threads = HAVE_THREADS ? online_cpus() * 2 : 1;
 
if (num_threads > 1) {
if (!HAVE_THREADS)
-- 
2.23.0.rc1



[RFC PATCH 1/2] p7810: add more grep performance relevant cases

2019-08-04 Thread Carlo Marcelo Arenas Belón
Add a baseline for a matching regex and make clear the distinction
between fixed (now using kwset) and a real simple expression.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 t/perf/p7810-grep.sh | 25 +++--
 1 file changed, 23 insertions(+), 2 deletions(-)

diff --git a/t/perf/p7810-grep.sh b/t/perf/p7810-grep.sh
index 9f4ade639f..9a4e335659 100755
--- a/t/perf/p7810-grep.sh
+++ b/t/perf/p7810-grep.sh
@@ -7,13 +7,34 @@ test_description="git-grep performance in various modes"
 test_perf_large_repo
 test_checkout_worktree
 
-test_perf 'grep worktree, cheap regex' '
+test_perf 'grep worktree, fixed regex (no match)' '
git grep some_nonexistent_string || :
 '
+test_perf 'grep worktree, fixed regex (common)' '
+   git grep void || :
+'
+test_perf 'grep -I, fixed non binary regex (common)' '
+   git grep -i void || :
+'
+test_perf 'grep -i, fixed caseless regex (common)' '
+   git grep -i void || :
+'
+test_perf 'grep --no-index, fixed regex (common)' '
+   git grep --no-index void || :
+'
+test_perf 'grep worktree, simple regex (common)' '
+   git grep "void|int" || :
+'
+test_perf 'grep -I, simple non binary regex (common)' '
+   git grep -I "void|int" || :
+'
+test_perf 'grep -i, simple caseless regex (common)' '
+   git grep -i "void|int" || :
+'
 test_perf 'grep worktree, expensive regex' '
git grep "^.* *some_nonexistent_string$" || :
 '
-test_perf 'grep --cached, cheap regex' '
+test_perf 'grep --cached, fixed regex' '
git grep --cached some_nonexistent_string || :
 '
 test_perf 'grep --cached, expensive regex' '
-- 
2.23.0.rc1



[RFC PATCH 0/2] grep: make threading smarter

2019-08-04 Thread Carlo Marcelo Arenas Belón
833e3df171 (pack-objects: Add runtime detection of online CPU's, 2008-02-22)
added the capability to check the number of online CPUs at runtime to do
better threading, so use that as well with grep.

Testing with a large (more than 4) number of cores and no grep.threads
configuration in real hardware encouraged, to confirm that no other
bottleneck is preventing the additional threads to improve performance.

If platform specific testing shows degradation (specially with HP-UX),
make sure that the right number of CPUs is reported by :

  $ ./t/helper/test-tool online-cpus
  4

There are additional cleanups possible in the grep code but had left it
out of this RFC to avoid confusion and make the change in patch 2 as
straight forward as possible.

There is also a chance that the online_cpus() function will be updated
as it predates POSIX and might be associated with one known performance
issue in HP-UX[1]

Lastly the performance numbers point to deficiencies in kwset and the
compat/regex code that will need to be addressed independently.

Carlo Marcelo Arenas Belón (2):
  p7810: add more grep performance relevant cases
  grep: make default number of threads reflect runtime

 Documentation/git-grep.txt |  2 +-
 builtin/grep.c |  3 +--
 t/perf/p7810-grep.sh   | 25 +++--
 3 files changed, 25 insertions(+), 5 deletions(-)

[1] 
https://public-inbox.org/git/tu4pr8401mb121664a8a588d799803f1e84e1...@tu4pr8401mb1216.namprd84.prod.outlook.com/
-- 
2.23.0.rc1


[PATCH 1/3] grep: make PCRE1 aware of custom allocator

2019-08-06 Thread Carlo Marcelo Arenas Belón
63e7e9d8b6 ("git-grep: Learn PCRE", 2011-05-09) didn't include a way
to override the system alocator, and so it is incompatible with
USE_NED_ALLOCATOR as reported by Dscho[1] (in similar code from PCRE2)

Make the minimum change possible to ensure this combination is supported

[1] https://public-inbox.org/git/pull.306.git.gitgitgad...@gmail.com

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 Makefile |  2 +-
 grep.c   | 10 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index bd246f2989..4b384f3759 100644
--- a/Makefile
+++ b/Makefile
@@ -1764,7 +1764,7 @@ ifdef NATIVE_CRLF
 endif
 
 ifdef USE_NED_ALLOCATOR
-   COMPAT_CFLAGS += -Icompat/nedmalloc
+   COMPAT_CFLAGS += -DUSE_NED_ALLOCATOR -Icompat/nedmalloc
COMPAT_OBJS += compat/nedmalloc/nedmalloc.o
OVERRIDE_STRDUP = YesPlease
 endif
diff --git a/grep.c b/grep.c
index cd952ef5d3..0154998695 100644
--- a/grep.c
+++ b/grep.c
@@ -150,12 +150,22 @@ int grep_config(const char *var, const char *value, void 
*cb)
  * Initialize one instance of grep_opt and copy the
  * default values from the template we read the configuration
  * information in an earlier call to git_config(grep_config).
+ *
+ * If using PCRE make sure that the library is configured
+ * to use the right allocator (ex: NED)
  */
 void grep_init(struct grep_opt *opt, struct repository *repo, const char 
*prefix)
 {
struct grep_opt *def = &grep_defaults;
int i;
 
+#ifdef USE_NED_ALLOCATOR
+#ifdef USE_LIBPCRE1
+   pcre_malloc = malloc;
+   pcre_free = free;
+#endif
+#endif
+
memset(opt, 0, sizeof(*opt));
opt->repo = repo;
opt->prefix = prefix;
-- 
2.23.0.rc1



[PATCH 0/3] grep: no leaks (WIP)

2019-08-06 Thread Carlo Marcelo Arenas Belón
This is an incomplete reroll for cb/pcre2-chartables-leakfix that attempts
to address the root cause of the problem reported[1] by Dscho with PCRE2
and that is that until now PCRE and NED never worked together.

The first patch is likely correct but since I'd been unable to replicate
the issue I can't be completely sure, if a kind soul with access to a
windows developer environment could give it a try we will know for sure
but FWIW it runs fine and doesn't introduce any failures in our tests

The second patch is obviously incomplete but should address the problem
reported; there are still more things to consider as explained there

The third patch is the original leak patch rebased on top.

Carlo Marcelo Arenas Belón (3):
  grep: make PCRE1 aware of custom allocator
  grep: make PCRE2 aware of custom allocator
  grep: avoid leak of chartables in PCRE2

 Makefile   |  2 +-
 builtin/grep.c |  1 +
 grep.c | 46 ++
 grep.h |  2 ++
 4 files changed, 46 insertions(+), 5 deletions(-)

[1] https://public-inbox.org/git/pull.306.git.gitgitgad...@gmail.com/
-- 
2.23.0.rc1


[PATCH 2/3] grep: make PCRE2 aware of custom allocator

2019-08-06 Thread Carlo Marcelo Arenas Belón
Most of the code stolen from[1] to easy on comparison and including
the deficiency of setting the global context even for patterns that
won't need it.

Ideally, the call from grep_init could be moved to a place where it
could be set without needing a lock and at least with this approach
we have a place to clear it (which is obviously missing more callers,
but at least shows how it will look for the grep subcommand)

I had also dropped most other users of the global context in a failed
attempt to make the change smaller, but also to keep the current
behaviour so that we could see the effect of enabling NED for PCRE2
more clearly.

Sadly, that will likely require a Windows box, as NED (at least our
version) is horribly broken in macOS (maybe it wasn't 64 bit clean)
and in Linux builds, but I can't reproduce your crasher and it is
most likely slower than the system malloc.

[1] https://public-inbox.org/git/pull.306.git.gitgitgad...@gmail.com/

Suggested-by: Johannes Schindelin 
---
 builtin/grep.c |  1 +
 grep.c | 31 +--
 grep.h |  1 +
 3 files changed, 31 insertions(+), 2 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 560051784e..e49c20df60 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -1145,5 +1145,6 @@ int cmd_grep(int argc, const char **argv, const char 
*prefix)
run_pager(&opt, prefix);
clear_pathspec(&pathspec);
free_grep_patterns(&opt);
+   grep_destroy();
return !hit;
 }
diff --git a/grep.c b/grep.c
index 0154998695..e748a6d68c 100644
--- a/grep.c
+++ b/grep.c
@@ -16,6 +16,20 @@ static int grep_source_is_binary(struct grep_source *gs,
 
 static struct grep_opt grep_defaults;
 
+#ifdef USE_LIBPCRE2
+static pcre2_general_context *pcre2_global_context;
+
+static void *pcre2_malloc(PCRE2_SIZE size, void *memory_data)
+{
+   return malloc(size);
+}
+
+static void pcre2_free(void *pointer, void *memory_data)
+{
+   return free(pointer);
+}
+#endif
+
 static const char *color_grep_slots[] = {
[GREP_COLOR_CONTEXT]= "context",
[GREP_COLOR_FILENAME]   = "filename",
@@ -153,6 +167,7 @@ int grep_config(const char *var, const char *value, void 
*cb)
  *
  * If using PCRE make sure that the library is configured
  * to use the right allocator (ex: NED)
+ * if any object is created it should be cleaned up in grep_destroy()
  */
 void grep_init(struct grep_opt *opt, struct repository *repo, const char 
*prefix)
 {
@@ -164,6 +179,10 @@ void grep_init(struct grep_opt *opt, struct repository 
*repo, const char *prefix
pcre_malloc = malloc;
pcre_free = free;
 #endif
+#ifdef USE_LIBPCRE2
+   pcre2_global_context = pcre2_general_context_create(pcre2_malloc,
+   pcre2_free, NULL);
+#endif
 #endif
 
memset(opt, 0, sizeof(*opt));
@@ -188,6 +207,13 @@ void grep_init(struct grep_opt *opt, struct repository 
*repo, const char *prefix
color_set(opt->colors[i], def->colors[i]);
 }
 
+void grep_destroy(void)
+{
+#ifdef USE_LIBPCRE2
+   pcre2_general_context_free(pcre2_global_context);
+#endif
+}
+
 static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, 
struct grep_opt *opt)
 {
/*
@@ -509,7 +535,7 @@ static void compile_pcre2_pattern(struct grep_pat *p, const 
struct grep_opt *opt
 
if (opt->ignore_case) {
if (has_non_ascii(p->pattern)) {
-   character_tables = pcre2_maketables(NULL);
+   character_tables = 
pcre2_maketables(pcre2_global_context);
p->pcre2_compile_context = 
pcre2_compile_context_create(NULL);
pcre2_set_character_tables(p->pcre2_compile_context, 
character_tables);
}
@@ -560,7 +586,8 @@ static void compile_pcre2_pattern(struct grep_pat *p, const 
struct grep_opt *opt
return;
}
 
-   p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024, 
NULL);
+   p->pcre2_jit_stack = pcre2_jit_stack_create(1, 1024 * 1024,
+   pcre2_global_context);
if (!p->pcre2_jit_stack)
die("Couldn't allocate PCRE2 JIT stack");
p->pcre2_match_context = pcre2_match_context_create(NULL);
diff --git a/grep.h b/grep.h
index 1875880f37..526c2db9ef 100644
--- a/grep.h
+++ b/grep.h
@@ -189,6 +189,7 @@ struct grep_opt {
 void init_grep_defaults(struct repository *);
 int grep_config(const char *var, const char *value, void *);
 void grep_init(struct grep_opt *, struct repository *repo, const char *prefix);
+void grep_destroy(void);
 void grep_commit_pattern_type(enum grep_pattern_type, struct grep_opt *opt);
 
 void append_grep_pat(struct grep_opt *opt, const char *pat, size_t patlen, 
const char *origin, int no, enum grep_pat_token t);
-- 
2.23.0.rc1



[PATCH 3/3] grep: avoid leak of chartables in PCRE2

2019-08-06 Thread Carlo Marcelo Arenas Belón
94da9193a6 ("grep: add support for PCRE v2", 2017-06-01) introduced
a small memory leak visible with valgrind in t7813.

Complete the creation of a PCRE2 specific variable that was missing from
the original change and free the generated table just like it is done
for PCRE1.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 grep.c | 7 ---
 grep.h | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/grep.c b/grep.c
index e748a6d68c..3e14ec91a6 100644
--- a/grep.c
+++ b/grep.c
@@ -524,7 +524,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const 
struct grep_opt *opt
PCRE2_UCHAR errbuf[256];
PCRE2_SIZE erroffset;
int options = PCRE2_MULTILINE;
-   const uint8_t *character_tables = NULL;
int jitret;
int patinforet;
size_t jitsizearg;
@@ -535,9 +534,10 @@ static void compile_pcre2_pattern(struct grep_pat *p, 
const struct grep_opt *opt
 
if (opt->ignore_case) {
if (has_non_ascii(p->pattern)) {
-   character_tables = 
pcre2_maketables(pcre2_global_context);
+   p->pcre2_tables = 
pcre2_maketables(pcre2_global_context);
p->pcre2_compile_context = 
pcre2_compile_context_create(NULL);
-   pcre2_set_character_tables(p->pcre2_compile_context, 
character_tables);
+   pcre2_set_character_tables(p->pcre2_compile_context,
+   p->pcre2_tables);
}
options |= PCRE2_CASELESS;
}
@@ -642,6 +642,7 @@ static void free_pcre2_pattern(struct grep_pat *p)
pcre2_match_data_free(p->pcre2_match_data);
pcre2_jit_stack_free(p->pcre2_jit_stack);
pcre2_match_context_free(p->pcre2_match_context);
+   free((void *)p->pcre2_tables);
 }
 #else /* !USE_LIBPCRE2 */
 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt 
*opt)
diff --git a/grep.h b/grep.h
index 526c2db9ef..533165c21a 100644
--- a/grep.h
+++ b/grep.h
@@ -96,6 +96,7 @@ struct grep_pat {
pcre2_compile_context *pcre2_compile_context;
pcre2_match_context *pcre2_match_context;
pcre2_jit_stack *pcre2_jit_stack;
+   const uint8_t *pcre2_tables;
uint32_t pcre2_jit_on;
kwset_t kws;
unsigned fixed:1;
-- 
2.23.0.rc1



[RFC PATCH v3 0/3] grep: no leaks or crashes (windows testing needed)

2019-08-06 Thread Carlo Marcelo Arenas Belón
This series is a candidate reroll for cb/pcre2-chartables-leakfix, that
hopefully addresses the root cause of the problem reported by Dscho in
Windows, where the PCRE2 library wasn't aware of the custom allocator and
was returning a pointer created with the system malloc but passing it to
NED's free, resulting in a segfault.

The reason why it was triggered by the original leak fix is the layering
violation reported by René and that is exclusive to PCRE2 (hence why it
hasn't been reported with PCRE1).  The first patch could be droped and
then used in a different series that will fully integrate the custom
allocator with the PCRE library and that is currently missing with PCRE2.

Eitherway, since I am unable to replicate the original bug or take
performance numbers in a representative environment without Windows
this is only published as an RFC, eventhough it has been tested and
considered mostly complete.

Junio, could you comment in my assumption that the use of grep in
revision.c doesn't require initializing a PCRE2 global context and
therefore not doing the cleanup?

Carlo Marcelo Arenas Belón (3):
  grep: make PCRE1 aware of custom allocator
  grep: make PCRE2 aware of custom allocator
  grep: avoid leak of chartables in PCRE2

 Makefile   |  2 +-
 builtin/grep.c |  1 +
 grep.c | 51 +++---
 grep.h |  2 ++
 4 files changed, 52 insertions(+), 4 deletions(-)

-- 
2.23.0.rc1


[RFC PATCH v3 1/3] grep: make PCRE1 aware of custom allocator

2019-08-06 Thread Carlo Marcelo Arenas Belón
63e7e9d8b6 ("git-grep: Learn PCRE", 2011-05-09) didn't include a way
to override the system alocator, and so it is incompatible with
USE_NED_ALLOCATOR as reported by Dscho[1] (in similar code from PCRE2)

Make the minimum change possible to ensure this combination is supported
by extending grep_init to set the PCRE1 specific functions to the NED
versions (using the aliased names though) and therefore making sure all
alocations are done inside PCRE1 with the same allocator than the rest
of git.

This change might have performance impact (hopefully for the best) and
so will be good idea to test it in a platform where NED might have a
positive impact (ex: Windows 7)

[1] https://public-inbox.org/git/pull.306.git.gitgitgad...@gmail.com

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 Makefile |  2 +-
 grep.c   | 10 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index bd246f2989..4b384f3759 100644
--- a/Makefile
+++ b/Makefile
@@ -1764,7 +1764,7 @@ ifdef NATIVE_CRLF
 endif
 
 ifdef USE_NED_ALLOCATOR
-   COMPAT_CFLAGS += -Icompat/nedmalloc
+   COMPAT_CFLAGS += -DUSE_NED_ALLOCATOR -Icompat/nedmalloc
COMPAT_OBJS += compat/nedmalloc/nedmalloc.o
OVERRIDE_STRDUP = YesPlease
 endif
diff --git a/grep.c b/grep.c
index cd952ef5d3..0154998695 100644
--- a/grep.c
+++ b/grep.c
@@ -150,12 +150,22 @@ int grep_config(const char *var, const char *value, void 
*cb)
  * Initialize one instance of grep_opt and copy the
  * default values from the template we read the configuration
  * information in an earlier call to git_config(grep_config).
+ *
+ * If using PCRE make sure that the library is configured
+ * to use the right allocator (ex: NED)
  */
 void grep_init(struct grep_opt *opt, struct repository *repo, const char 
*prefix)
 {
struct grep_opt *def = &grep_defaults;
int i;
 
+#ifdef USE_NED_ALLOCATOR
+#ifdef USE_LIBPCRE1
+   pcre_malloc = malloc;
+   pcre_free = free;
+#endif
+#endif
+
memset(opt, 0, sizeof(*opt));
opt->repo = repo;
opt->prefix = prefix;
-- 
2.23.0.rc1



[RFC PATCH v3 2/3] grep: make PCRE2 aware of custom allocator

2019-08-06 Thread Carlo Marcelo Arenas Belón
94da9193a6 (grep: add support for PCRE v2, 2017-06-01) didn't include
a way to override the system allocator, and so it is incompatible with
USE_NED_ALLOCATOR.  The problem was made visible when an attempt to
avoid a leak in a data structure that is created by the library was
passed to NED's free for disposal triggering a segfault in Windows.

PCRE2 requires the use of a general context to override the allocator
and therefore, there is a lot more code needed than in PCRE1, including
a couple of wrapper functions.

Extend the grep API with a "destructor" that could be called to cleanup
any objects that were created and used globally.

Update builtin/grep to use that new API, but any other future
users should make sure to have matching grep_init/grep_destroy calls
if they are using the pattern matching functionality (currently only
relevant when using both NED and PCRE2)

Move some of the logic that was before done per thread (in the workers)
into an earlier phase to avoid degrading performance, but as the use
of PCRE2 with NED is better understood it is expected more of its
functions will be instructed to use the custom allocator as well as
was done in the original code[1] this work was based on.

[1] 
https://public-inbox.org/git/3397e6797f872aedd18c6d795f4976e1c579514b.1565005867.git.gitgitgad...@gmail.com/

Reported-by: Johannes Schindelin 
Signed-off-by: Carlo Marcelo Arenas Belón 
---
 builtin/grep.c |  1 +
 grep.c | 36 +++-
 grep.h |  1 +
 3 files changed, 37 insertions(+), 1 deletion(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 560051784e..e49c20df60 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -1145,5 +1145,6 @@ int cmd_grep(int argc, const char **argv, const char 
*prefix)
run_pager(&opt, prefix);
clear_pathspec(&pathspec);
free_grep_patterns(&opt);
+   grep_destroy();
return !hit;
 }
diff --git a/grep.c b/grep.c
index 0154998695..3e5bdf94a6 100644
--- a/grep.c
+++ b/grep.c
@@ -16,6 +16,22 @@ static int grep_source_is_binary(struct grep_source *gs,
 
 static struct grep_opt grep_defaults;
 
+#ifdef USE_LIBPCRE2
+static pcre2_general_context *pcre2_global_context;
+
+#ifdef USE_NED_ALLOCATOR
+static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
+{
+   return malloc(size);
+}
+
+static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
+{
+   return free(pointer);
+}
+#endif
+#endif
+
 static const char *color_grep_slots[] = {
[GREP_COLOR_CONTEXT]= "context",
[GREP_COLOR_FILENAME]   = "filename",
@@ -153,6 +169,7 @@ int grep_config(const char *var, const char *value, void 
*cb)
  *
  * If using PCRE make sure that the library is configured
  * to use the right allocator (ex: NED)
+ * if any object is created it should be cleaned up in grep_destroy()
  */
 void grep_init(struct grep_opt *opt, struct repository *repo, const char 
*prefix)
 {
@@ -188,6 +205,13 @@ void grep_init(struct grep_opt *opt, struct repository 
*repo, const char *prefix
color_set(opt->colors[i], def->colors[i]);
 }
 
+void grep_destroy(void)
+{
+#ifdef USE_LIBPCRE2
+   pcre2_general_context_free(pcre2_global_context);
+#endif
+}
+
 static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, 
struct grep_opt *opt)
 {
/*
@@ -319,6 +343,11 @@ void append_header_grep_pattern(struct grep_opt *opt,
 void append_grep_pattern(struct grep_opt *opt, const char *pat,
 const char *origin, int no, enum grep_pat_token t)
 {
+#if defined(USE_LIBPCRE2) && defined(USE_NED_ALLOCATOR)
+   if (!pcre2_global_context && opt->ignore_case && has_non_ascii(pat))
+   pcre2_global_context = pcre2_general_context_create(
+   pcre2_malloc, pcre2_free, NULL);
+#endif
append_grep_pat(opt, pat, strlen(pat), origin, no, t);
 }
 
@@ -507,9 +536,14 @@ static void compile_pcre2_pattern(struct grep_pat *p, 
const struct grep_opt *opt
 
p->pcre2_compile_context = NULL;
 
+   /* pcre2_global_context is initialized in append_grep_pattern */
if (opt->ignore_case) {
if (has_non_ascii(p->pattern)) {
-   character_tables = pcre2_maketables(NULL);
+#ifdef USE_NED_ALLOCATOR
+   if (!pcre2_global_context)
+   BUG("pcre2_global_context uninitialized");
+#endif
+   character_tables = 
pcre2_maketables(pcre2_global_context);
p->pcre2_compile_context = 
pcre2_compile_context_create(NULL);
pcre2_set_character_tables(p->pcre2_compile_context, 
character_tables);
}
diff --git a/grep.h b/grep.h
index 1875880f37..526c2db9ef 100644
--- a/grep.h
+++ b/grep.h
@@ -189,6 +189,7 @@ struct grep_

[RFC PATCH v3 3/3] grep: avoid leak of chartables in PCRE2

2019-08-06 Thread Carlo Marcelo Arenas Belón
94da9193a6 ("grep: add support for PCRE v2", 2017-06-01) introduced
a small memory leak visible with valgrind in t7813.

Complete the creation of a PCRE2 specific variable that was missing from
the original change and free the generated table just like it is done
for PCRE1.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 grep.c | 7 ---
 grep.h | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/grep.c b/grep.c
index 3e5bdf94a6..3d3ea0414e 100644
--- a/grep.c
+++ b/grep.c
@@ -527,7 +527,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const 
struct grep_opt *opt
PCRE2_UCHAR errbuf[256];
PCRE2_SIZE erroffset;
int options = PCRE2_MULTILINE;
-   const uint8_t *character_tables = NULL;
int jitret;
int patinforet;
size_t jitsizearg;
@@ -543,9 +542,10 @@ static void compile_pcre2_pattern(struct grep_pat *p, 
const struct grep_opt *opt
if (!pcre2_global_context)
BUG("pcre2_global_context uninitialized");
 #endif
-   character_tables = 
pcre2_maketables(pcre2_global_context);
+   p->pcre2_tables = 
pcre2_maketables(pcre2_global_context);
p->pcre2_compile_context = 
pcre2_compile_context_create(NULL);
-   pcre2_set_character_tables(p->pcre2_compile_context, 
character_tables);
+   pcre2_set_character_tables(p->pcre2_compile_context,
+   p->pcre2_tables);
}
options |= PCRE2_CASELESS;
}
@@ -649,6 +649,7 @@ static void free_pcre2_pattern(struct grep_pat *p)
pcre2_match_data_free(p->pcre2_match_data);
pcre2_jit_stack_free(p->pcre2_jit_stack);
pcre2_match_context_free(p->pcre2_match_context);
+   free((void *)p->pcre2_tables);
 }
 #else /* !USE_LIBPCRE2 */
 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt 
*opt)
diff --git a/grep.h b/grep.h
index 526c2db9ef..533165c21a 100644
--- a/grep.h
+++ b/grep.h
@@ -96,6 +96,7 @@ struct grep_pat {
pcre2_compile_context *pcre2_compile_context;
pcre2_match_context *pcre2_match_context;
pcre2_jit_stack *pcre2_jit_stack;
+   const uint8_t *pcre2_tables;
uint32_t pcre2_jit_on;
kwset_t kws;
unsigned fixed:1;
-- 
2.23.0.rc1



[RFC/PATCH 0/3] refactor MIN macro

2018-10-21 Thread Carlo Marcelo Arenas Belón
otherwise will warn on platforms where it is already defined (macOS)

convert the internal version from xdiff to a common one from
the compatibilty header additionally




[PATCH 2/3] git-compat-util: define MIN through compat

2018-10-21 Thread Carlo Marcelo Arenas Belón
this macro is commonly defined in system headers (usually )
but if it is not define it here so it can be used elsewhere

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 git-compat-util.h | 5 +
 sha256/block/sha256.c | 3 ---
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/git-compat-util.h b/git-compat-util.h
index 5f2e90932f..7a0b48452b 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -218,6 +218,11 @@
 #else
 #include 
 #endif
+
+#ifndef MIN
+#define MIN(x, y) ((x) < (y) ? (x) : (y))
+#endif
+
 #ifdef NO_INTPTR_T
 /*
  * On I16LP32, ILP32 and LP64 "long" is the safe bet, however
diff --git a/sha256/block/sha256.c b/sha256/block/sha256.c
index 0d4939cc2c..5fba943c79 100644
--- a/sha256/block/sha256.c
+++ b/sha256/block/sha256.c
@@ -130,9 +130,6 @@ static void blk_SHA256_Transform(blk_SHA256_CTX *ctx, const 
unsigned char *buf)
}
 }
 
-#ifndef MIN
-#define MIN(x, y) ((x) < (y) ? (x) : (y))
-#endif
 void blk_SHA256_Update(blk_SHA256_CTX *ctx, const void *data, size_t len)
 {
const unsigned char *in = data;
-- 
2.19.1



[PATCH 1/3] sha256: avoid redefinition for MIN

2018-10-21 Thread Carlo Marcelo Arenas Belón
it is already defined whenever "sys/param.h" is available

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 sha256/block/sha256.c | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/sha256/block/sha256.c b/sha256/block/sha256.c
index 18350c161a..0d4939cc2c 100644
--- a/sha256/block/sha256.c
+++ b/sha256/block/sha256.c
@@ -130,7 +130,9 @@ static void blk_SHA256_Transform(blk_SHA256_CTX *ctx, const 
unsigned char *buf)
}
 }
 
+#ifndef MIN
 #define MIN(x, y) ((x) < (y) ? (x) : (y))
+#endif
 void blk_SHA256_Update(blk_SHA256_CTX *ctx, const void *data, size_t len)
 {
const unsigned char *in = data;
-- 
2.19.1



[PATCH 3/3] xdiff: use compat's MIN instead

2018-10-21 Thread Carlo Marcelo Arenas Belón
Signed-off-by: Carlo Marcelo Arenas Belón 
---
 xdiff/xdiffi.c | 2 +-
 xdiff/xemit.c  | 6 +++---
 xdiff/xhistogram.c | 6 +++---
 xdiff/xmacros.h| 4 +---
 xdiff/xprepare.c   | 2 +-
 5 files changed, 9 insertions(+), 11 deletions(-)

diff --git a/xdiff/xdiffi.c b/xdiff/xdiffi.c
index 1f1f4a3c78..0754dc17ed 100644
--- a/xdiff/xdiffi.c
+++ b/xdiff/xdiffi.c
@@ -204,7 +204,7 @@ static long xdl_split(unsigned long const *ha1, long off1, 
long lim1,
 
fbest = fbest1 = -1;
for (d = fmax; d >= fmin; d -= 2) {
-   i1 = XDL_MIN(kvdf[d], lim1);
+   i1 = MIN(kvdf[d], lim1);
i2 = i1 - d;
if (lim2 < i2)
i1 = lim2 + d, i2 = lim2;
diff --git a/xdiff/xemit.c b/xdiff/xemit.c
index 7778dc2b19..43d455b404 100644
--- a/xdiff/xemit.c
+++ b/xdiff/xemit.c
@@ -217,8 +217,8 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, 
xdemitcb_t *ecb,
 
  post_context_calculation:
lctx = xecfg->ctxlen;
-   lctx = XDL_MIN(lctx, xe->xdf1.nrec - (xche->i1 + xche->chg1));
-   lctx = XDL_MIN(lctx, xe->xdf2.nrec - (xche->i2 + xche->chg2));
+   lctx = MIN(lctx, xe->xdf1.nrec - (xche->i1 + xche->chg1));
+   lctx = MIN(lctx, xe->xdf2.nrec - (xche->i2 + xche->chg2));
 
e1 = xche->i1 + xche->chg1 + lctx;
e2 = xche->i2 + xche->chg2 + lctx;
@@ -242,7 +242,7 @@ int xdl_emit_diff(xdfenv_t *xe, xdchange_t *xscr, 
xdemitcb_t *ecb,
 * its new end.
 */
if (xche->next) {
-   long l = XDL_MIN(xche->next->i1,
+   long l = MIN(xche->next->i1,
 xe->xdf1.nrec - 1);
if (l - xecfg->ctxlen <= e1 ||
get_func_line(xe, xecfg, NULL, l, e1) < 0) {
diff --git a/xdiff/xhistogram.c b/xdiff/xhistogram.c
index ec85f5992b..562cab6d14 100644
--- a/xdiff/xhistogram.c
+++ b/xdiff/xhistogram.c
@@ -129,7 +129,7 @@ static int scanA(struct histindex *index, int line1, int 
count1)
NEXT_PTR(index, ptr) = rec->ptr;
rec->ptr = ptr;
/* cap rec->cnt at MAX_CNT */
-   rec->cnt = XDL_MIN(MAX_CNT, rec->cnt + 1);
+   rec->cnt = MIN(MAX_CNT, rec->cnt + 1);
LINE_MAP(index, ptr) = rec;
goto continue_scan;
}
@@ -193,14 +193,14 @@ static int try_lcs(struct histindex *index, struct region 
*lcs, int b_ptr,
as--;
bs--;
if (1 < rc)
-   rc = XDL_MIN(rc, CNT(index, as));
+   rc = MIN(rc, CNT(index, as));
}
while (ae < LINE_END(1) && be < LINE_END(2)
&& CMP(index, 1, ae + 1, 2, be + 1)) {
ae++;
be++;
if (1 < rc)
-   rc = XDL_MIN(rc, CNT(index, ae));
+   rc = MIN(rc, CNT(index, ae));
}
 
if (b_next <= be)
diff --git a/xdiff/xmacros.h b/xdiff/xmacros.h
index 2809a28ca9..495b9dfac4 100644
--- a/xdiff/xmacros.h
+++ b/xdiff/xmacros.h
@@ -23,10 +23,8 @@
 #if !defined(XMACROS_H)
 #define XMACROS_H
 
+#include "../git-compat-util.h"
 
-
-
-#define XDL_MIN(a, b) ((a) < (b) ? (a): (b))
 #define XDL_MAX(a, b) ((a) > (b) ? (a): (b))
 #define XDL_ABS(v) ((v) >= 0 ? (v): -(v))
 #define XDL_ISDIGIT(c) ((c) >= '0' && (c) <= '9')
diff --git a/xdiff/xprepare.c b/xdiff/xprepare.c
index abeb8fb84e..7436064498 100644
--- a/xdiff/xprepare.c
+++ b/xdiff/xprepare.c
@@ -451,7 +451,7 @@ static int xdl_trim_ends(xdfile_t *xdf1, xdfile_t *xdf2) {
 
recs1 = xdf1->recs;
recs2 = xdf2->recs;
-   for (i = 0, lim = XDL_MIN(xdf1->nrec, xdf2->nrec); i < lim;
+   for (i = 0, lim = MIN(xdf1->nrec, xdf2->nrec); i < lim;
 i++, recs1++, recs2++)
if ((*recs1)->ha != (*recs2)->ha)
break;
-- 
2.19.1



[PATCH] test: avoid failures when USE_NED_ALLOCATOR

2018-10-23 Thread Carlo Marcelo Arenas Belón
contrib/nedmalloc doesn't support MALLOC_CHECK_ or MALLOC_PERTURB_
so add it to the same exception that is being used with valgrind

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 t/test-lib.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/test-lib.sh b/t/test-lib.sh
index 44288cbb59..2ad9e6176c 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -143,7 +143,7 @@ fi
 # Add libc MALLOC and MALLOC_PERTURB test
 # only if we are not executing the test with valgrind
 if expr " $GIT_TEST_OPTS " : ".* --valgrind " >/dev/null ||
-   test -n "$TEST_NO_MALLOC_CHECK"
+   test -n "$TEST_NO_MALLOC_CHECK" || test -n "$USE_NED_ALLOCATOR"
 then
setup_malloc_check () {
: nothing
-- 
2.19.1



[PATCH] khash: silence -Wunused-function

2018-10-23 Thread Carlo Marcelo Arenas Belón
after 36da893114 ("config.mak.dev: enable -Wunused-function", 2018-10-18)
macro generated code should use a similar solution than commit-slab to
silence the compiler.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 khash.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/khash.h b/khash.h
index d10caa0c35..39c2833877 100644
--- a/khash.h
+++ b/khash.h
@@ -234,7 +234,7 @@ static const double __ac_HASH_UPPER = 0.77;
__KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, 
__hash_equal)
 
 #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, 
__hash_equal) \
-   KHASH_INIT2(name, static inline, khkey_t, khval_t, kh_is_map, 
__hash_func, __hash_equal)
+   KHASH_INIT2(name, __attribute__((__unused__)) static inline, khkey_t, 
khval_t, kh_is_map, __hash_func, __hash_equal)
 
 /* Other convenient macros... */
 
-- 
2.19.1



[PATCH v2] compat: make sure git_mmap is not expected to write

2018-10-23 Thread Carlo Marcelo Arenas Belón
in f48000fc ("Yank writing-back support from gitfakemmap.", 2005-10-08)
support for writting back changes was removed but the specific prot
flag that would be used was not checked for

Acked-by: Johannes Schindelin 
Signed-off-by: Carlo Marcelo Arenas Belón 
---
Changes in v2:

* reset-author to match signature
* cleanup commit message and add ACK

 compat/mmap.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/compat/mmap.c b/compat/mmap.c
index 7f662fef7b..14d31010df 100644
--- a/compat/mmap.c
+++ b/compat/mmap.c
@@ -4,7 +4,7 @@ void *git_mmap(void *start, size_t length, int prot, int flags, 
int fd, off_t of
 {
size_t n = 0;
 
-   if (start != NULL || !(flags & MAP_PRIVATE))
+   if (start != NULL || flags != MAP_PRIVATE || prot != PROT_READ)
die("Invalid usage of mmap when built with NO_MMAP");
 
start = xmalloc(length);
-- 
2.19.1



[PATCH v2 1/2] commit-slabs: move MAYBE_UNUSED out

2018-10-23 Thread Carlo Marcelo Arenas Belón
after 36da893114 ("config.mak.dev: enable -Wunused-function", 2018-10-18)
it is expected to be used to prevent -Wunused-function warnings for code
that was macro generated

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 commit-slab-impl.h | 4 ++--
 git-compat-util.h  | 2 ++
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/commit-slab-impl.h b/commit-slab-impl.h
index ac1e6d409a..5c0eb91a5d 100644
--- a/commit-slab-impl.h
+++ b/commit-slab-impl.h
@@ -1,10 +1,10 @@
 #ifndef COMMIT_SLAB_IMPL_H
 #define COMMIT_SLAB_IMPL_H
 
-#define MAYBE_UNUSED __attribute__((__unused__))
+#include "git-compat-util.h"
 
 #define implement_static_commit_slab(slabname, elemtype) \
-   implement_commit_slab(slabname, elemtype, static MAYBE_UNUSED)
+   implement_commit_slab(slabname, elemtype, MAYBE_UNUSED static)
 
 #define implement_shared_commit_slab(slabname, elemtype) \
implement_commit_slab(slabname, elemtype, )
diff --git a/git-compat-util.h b/git-compat-util.h
index 9a64998b24..e4d3967a23 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -408,6 +408,8 @@ static inline char *git_find_last_dir_sep(const char *path)
 #define LAST_ARG_MUST_BE_NULL
 #endif
 
+#define MAYBE_UNUSED __attribute__((__unused__))
+
 #include "compat/bswap.h"
 
 #include "wildmatch.h"
-- 
2.19.1



[PATCH v2 2/2] khash: silence -Wunused-function for delta-islands

2018-10-23 Thread Carlo Marcelo Arenas Belón
showing the following when compiled with latest clang (OpenBSD, Fedors
and macOS):

delta-islands.c:23:1: warning: unused function 'kh_destroy_str'
  [-Wunused-function]
delta-islands.c:23:1: warning: unused function 'kh_clear_str'
  [-Wunused-function]
delta-islands.c:23:1: warning: unused function 'kh_del_str' [-Wunused-function]

Reported-by: René Scharfe 
Suggested-by: Nguyễn Thái Ngọc Duy 
Signed-off-by: Carlo Marcelo Arenas Belón 
---
 khash.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/khash.h b/khash.h
index c0da40daa7..fb71c92547 100644
--- a/khash.h
+++ b/khash.h
@@ -226,7 +226,7 @@ static const double __ac_HASH_UPPER = 0.77;
__KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, 
__hash_equal)
 
 #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, 
__hash_equal) \
-   KHASH_INIT2(name, static inline, khkey_t, khval_t, kh_is_map, 
__hash_func, __hash_equal)
+   KHASH_INIT2(name, MAYBE_UNUSED static inline, khkey_t, khval_t, 
kh_is_map, __hash_func, __hash_equal)
 
 /* Other convenient macros... */
 
-- 
2.19.1



[PATCH v2 0/2] delta-islands: avoid unused function messages

2018-10-23 Thread Carlo Marcelo Arenas Belón
the macro generated code from delta-islands (using khash) triggers
some unused function warnings in macOS, OpenBSD and some linux with a
newer version of clang

Carlo Marcelo Arenas Belón (2):
  commit-slabs: move MAYBE_UNUSED out
  khash: silence -Wunused-function for delta-islands

 commit-slab-impl.h | 4 ++--
 git-compat-util.h  | 2 ++
 khash.h| 2 +-
 3 files changed, 5 insertions(+), 3 deletions(-)

-- 
2.19.1



[PATCH] sequencer: cleanup for gcc 8.2.1 warning

2018-10-24 Thread Carlo Marcelo Arenas Belón
sequencer.c: In function ‘write_basic_state’:
sequencer.c:2392:37: warning: zero-length gnu_printf format string 
[-Wformat-zero-length]
   write_file(rebase_path_verbose(), "");

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 sequencer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sequencer.c b/sequencer.c
index 8dd6db5a01..358e83bf6b 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -2335,7 +2335,7 @@ int write_basic_state(struct replay_opts *opts, const 
char *head_name,
write_file(rebase_path_quiet(), "\n");
 
if (opts->verbose)
-   write_file(rebase_path_verbose(), "");
+   write_file(rebase_path_verbose(), "\n");
if (opts->strategy)
write_file(rebase_path_strategy(), "%s\n", opts->strategy);
if (opts->xopts_nr > 0)
-- 
2.19.1



[PATCH v2] sequencer: cleanup for gcc warning in non developer mode

2018-10-25 Thread Carlo Marcelo Arenas Belón
as shown by:

  sequencer.c: In function ‘write_basic_state’:
  sequencer.c:2392:37: warning: zero-length gnu_printf format string 
[-Wformat-zero-length]
 write_file(rebase_path_verbose(), "");

where write_file will create an empty file if told to write an empty string
as can be inferred by the previous call

the somehow more convoluted syntax works around the issue by providing a non
empty format string and is already being used for the abort safety file since
1e41229d96 ("sequencer: make sequencer abort safer", 2016-12-07)

Signed-off-by: Carlo Marcelo Arenas Belón 
---

Notes:
Changes in v2

 - Avoid change of behaviour as suggested by Junio

 sequencer.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/sequencer.c b/sequencer.c
index 8dd6db5a01..10f602c4d4 100644
--- a/sequencer.c
+++ b/sequencer.c
@@ -2335,7 +2335,7 @@ int write_basic_state(struct replay_opts *opts, const 
char *head_name,
write_file(rebase_path_quiet(), "\n");
 
if (opts->verbose)
-   write_file(rebase_path_verbose(), "");
+   write_file(rebase_path_verbose(), "%s", "");
if (opts->strategy)
write_file(rebase_path_strategy(), "%s\n", opts->strategy);
if (opts->xopts_nr > 0)
-- 
2.19.1



[PATCH v3 1/3] commit-slab: move MAYBE_UNUSED into git-compat-util

2018-10-25 Thread Carlo Marcelo Arenas Belón
after 36da893114 ("config.mak.dev: enable -Wunused-function", 2018-10-18)
it is expected to be used to prevent -Wunused-function warnings for code
that was macro generated

Signed-off-by: Carlo Marcelo Arenas Belón 
Signed-off-by: Junio C Hamano 
---
 commit-slab-impl.h | 4 +---
 git-compat-util.h  | 2 ++
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/commit-slab-impl.h b/commit-slab-impl.h
index ac1e6d409a..e352c2f8c1 100644
--- a/commit-slab-impl.h
+++ b/commit-slab-impl.h
@@ -1,10 +1,8 @@
 #ifndef COMMIT_SLAB_IMPL_H
 #define COMMIT_SLAB_IMPL_H
 
-#define MAYBE_UNUSED __attribute__((__unused__))
-
 #define implement_static_commit_slab(slabname, elemtype) \
-   implement_commit_slab(slabname, elemtype, static MAYBE_UNUSED)
+   implement_commit_slab(slabname, elemtype, MAYBE_UNUSED static)
 
 #define implement_shared_commit_slab(slabname, elemtype) \
implement_commit_slab(slabname, elemtype, )
diff --git a/git-compat-util.h b/git-compat-util.h
index 48c955541e..8121b73b4a 100644
--- a/git-compat-util.h
+++ b/git-compat-util.h
@@ -408,6 +408,8 @@ static inline char *git_find_last_dir_sep(const char *path)
 #define LAST_ARG_MUST_BE_NULL
 #endif
 
+#define MAYBE_UNUSED __attribute__((__unused__))
+
 #include "compat/bswap.h"
 
 #include "wildmatch.h"
-- 
2.19.1



[PATCH v3 0/3] delta-islands: avoid unused function messages

2018-10-25 Thread Carlo Marcelo Arenas Belón
the macro generated code from delta-islands (using khash) triggers
some unused function warnings in macOS, OpenBSD and some linux with a
newer version of clang because of its use of static functions.

Changes from v2:
* Relay in the C code including git-compat-util as suggested by Jeff
* Commit message cleanup
* Make changes hdr-check clean

Changes from v1:
* Use MAYBE_UNUSED for all cases as suggested by Duy

Carlo Marcelo Arenas Belón (3):
  commit-slab: move MAYBE_UNUSED into git-compat-util
  khash: silence -Wunused-function in delta-islands from khash
  commit-slab: missing definitions and forward declarations (hdr-check)

 commit-slab-impl.h | 4 ++--
 commit-slab.h  | 2 ++
 git-compat-util.h  | 2 ++
 khash.h| 2 +-
 4 files changed, 7 insertions(+), 3 deletions(-)

-- 
2.19.1



[PATCH v3 2/3] khash: silence -Wunused-function in delta-islands from khash

2018-10-25 Thread Carlo Marcelo Arenas Belón
showing the following when compiled with latest clang (OpenBSD, Fedora
and macOS):

delta-islands.c:23:1: warning: unused function 'kh_destroy_str'
  [-Wunused-function]
delta-islands.c:23:1: warning: unused function 'kh_clear_str'
  [-Wunused-function]
delta-islands.c:23:1: warning: unused function 'kh_del_str' [-Wunused-function]

Reported-by: René Scharfe 
Suggested-by: Nguyễn Thái Ngọc Duy 
Signed-off-by: Carlo Marcelo Arenas Belón 
Signed-off-by: Junio C Hamano 
---
 khash.h | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/khash.h b/khash.h
index d10caa0c35..532109c87f 100644
--- a/khash.h
+++ b/khash.h
@@ -234,7 +234,7 @@ static const double __ac_HASH_UPPER = 0.77;
__KHASH_IMPL(name, SCOPE, khkey_t, khval_t, kh_is_map, __hash_func, 
__hash_equal)
 
 #define KHASH_INIT(name, khkey_t, khval_t, kh_is_map, __hash_func, 
__hash_equal) \
-   KHASH_INIT2(name, static inline, khkey_t, khval_t, kh_is_map, 
__hash_func, __hash_equal)
+   KHASH_INIT2(name, MAYBE_UNUSED static inline, khkey_t, khval_t, 
kh_is_map, __hash_func, __hash_equal)
 
 /* Other convenient macros... */
 
-- 
2.19.1



[PATCH v3 3/3] commit-slab: missing definitions and forward declarations (hdr-check)

2018-10-25 Thread Carlo Marcelo Arenas Belón
struct commmit needs to be defined before commit-slab can generate
working code, object_id should be at least known through a forward
declaration

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 commit-slab-impl.h | 2 ++
 commit-slab.h  | 2 ++
 2 files changed, 4 insertions(+)

diff --git a/commit-slab-impl.h b/commit-slab-impl.h
index e352c2f8c1..db7cf3f19b 100644
--- a/commit-slab-impl.h
+++ b/commit-slab-impl.h
@@ -1,6 +1,8 @@
 #ifndef COMMIT_SLAB_IMPL_H
 #define COMMIT_SLAB_IMPL_H
 
+#include "commit.h"
+
 #define implement_static_commit_slab(slabname, elemtype) \
implement_commit_slab(slabname, elemtype, MAYBE_UNUSED static)
 
diff --git a/commit-slab.h b/commit-slab.h
index 69bf0c807c..722252de61 100644
--- a/commit-slab.h
+++ b/commit-slab.h
@@ -1,6 +1,8 @@
 #ifndef COMMIT_SLAB_H
 #define COMMIT_SLAB_H
 
+struct object_id;
+
 #include "commit-slab-decl.h"
 #include "commit-slab-impl.h"
 
-- 
2.19.1



[PATCH] multi-pack-index: make code -Wunused-parameter clean

2018-11-03 Thread Carlo Marcelo Arenas Belón
introduced in 662148c435 ("midx: write object offsets", 2018-07-12)
but included on all previous versions as well.

midx.c:713:54: warning: unused parameter 'nr_objects' [-Wunused-parameter]

likely an oversight as the information needed to iterate over is
embedded in nr_large_offset

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 midx.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/midx.c b/midx.c
index 4fac0cd08a..a2c17e3108 100644
--- a/midx.c
+++ b/midx.c
@@ -710,7 +710,7 @@ static size_t write_midx_object_offsets(struct hashfile *f, 
int large_offset_nee
 }
 
 static size_t write_midx_large_offsets(struct hashfile *f, uint32_t 
nr_large_offset,
-  struct pack_midx_entry *objects, 
uint32_t nr_objects)
+  struct pack_midx_entry *objects)
 {
struct pack_midx_entry *list = objects;
size_t written = 0;
@@ -880,7 +880,7 @@ int write_midx_file(const char *object_dir)
break;
 
case MIDX_CHUNKID_LARGEOFFSETS:
-   written += write_midx_large_offsets(f, 
num_large_offsets, entries, nr_entries);
+   written += write_midx_large_offsets(f, 
num_large_offsets, entries);
break;
 
default:
-- 
2.19.1.816.gcd69ec8cd



[PATCH] parse-options: deprecate OPT_DATE

2018-11-05 Thread Carlo Marcelo Arenas Belón
Signed-off-by: Carlo Marcelo Arenas Belón 
---
 Documentation/technical/api-parse-options.txt | 4 
 1 file changed, 4 deletions(-)

diff --git a/Documentation/technical/api-parse-options.txt 
b/Documentation/technical/api-parse-options.txt
index 829b558110..2b036d7838 100644
--- a/Documentation/technical/api-parse-options.txt
+++ b/Documentation/technical/api-parse-options.txt
@@ -183,10 +183,6 @@ There are some macros to easily define options:
scale the provided value by 1024, 1024^2 or 1024^3 respectively.
The scaled value is put into `unsigned_long_var`.
 
-`OPT_DATE(short, long, ×tamp_t_var, description)`::
-   Introduce an option with date argument, see `approxidate()`.
-   The timestamp is put into `timestamp_t_var`.
-
 `OPT_EXPIRY_DATE(short, long, ×tamp_t_var, description)`::
Introduce an option with expiry date argument, see 
`parse_expiry_date()`.
The timestamp is put into `timestamp_t_var`.
-- 
2.19.1.816.gcd69ec8cd



[PATCH] builtin/notes: remove unnecessary free

2018-11-11 Thread Carlo Marcelo Arenas Belón
511726e4b1 ("builtin/notes: fix premature failure when trying to add
the empty blob", 2014-11-09) removed the check for !len but left a
call to free the buffer that will be otherwise NULL

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 builtin/notes.c | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/builtin/notes.c b/builtin/notes.c
index c05cd004ab..68062f7475 100644
--- a/builtin/notes.c
+++ b/builtin/notes.c
@@ -255,10 +255,8 @@ static int parse_reuse_arg(const struct option *opt, const 
char *arg, int unset)
 
if (get_oid(arg, &object))
die(_("failed to resolve '%s' as a valid ref."), arg);
-   if (!(buf = read_object_file(&object, &type, &len))) {
-   free(buf);
+   if (!(buf = read_object_file(&object, &type, &len)))
die(_("failed to read object '%s'."), arg);
-   }
if (type != OBJ_BLOB) {
free(buf);
die(_("cannot read note data from non-blob object '%s'."), arg);
-- 
2.19.1.856.g8858448bb



[PATCH 0/2] avoid unsigned long for timestamps

2018-11-12 Thread Carlo Marcelo Arenas Belón
specially problematic in Windows where unsigned long is only 32bit wide
and therefore the assumption that a time_t would fit will lead to loss
of precision in a 64bit OS.

 builtin/commit.c | 4 ++--
 read-cache.c | 6 +++---
 2 files changed, 5 insertions(+), 5 deletions(-)




[PATCH 1/2] builtin/commit: use timestamp_t in parse_force_date

2018-11-12 Thread Carlo Marcelo Arenas Belón
when dddbad728c ("timestamp_t: a new data type for timestamps", 2017-04-26)
was introduced, the fallback to use approxidate that was introduced in
14ac2864dc ("commit: accept more date formats for "--date"", 2014-05-01)
was not updated to use the new type instead of unsigned long.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 builtin/commit.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/builtin/commit.c b/builtin/commit.c
index 0d9828e29e..a447e08f62 100644
--- a/builtin/commit.c
+++ b/builtin/commit.c
@@ -537,10 +537,10 @@ static int parse_force_date(const char *in, struct strbuf 
*out)
 
if (parse_date(in, out) < 0) {
int errors = 0;
-   unsigned long t = approxidate_careful(in, &errors);
+   timestamp_t t = approxidate_careful(in, &errors);
if (errors)
return -1;
-   strbuf_addf(out, "%lu", t);
+   strbuf_addf(out, "%"PRItime, t);
}
 
return 0;
-- 
2.19.1.856.g8858448bb



[PATCH 2/2] read-cache: use time_t instead of unsigned long

2018-11-12 Thread Carlo Marcelo Arenas Belón
b968372279 ("read-cache: unlink old sharedindex files", 2017-03-06)
introduced get_shared_index_expire_date using unsigned long to track
the modification times of a shared index.

dddbad728c ("timestamp_t: a new data type for timestamps", 2017-04-26)
shows why that might problematic so move to time_t instead.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 read-cache.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 7b1354d759..5525d8e679 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2625,9 +2625,9 @@ static int write_split_index(struct index_state *istate,
 
 static const char *shared_index_expire = "2.weeks.ago";
 
-static unsigned long get_shared_index_expire_date(void)
+static time_t get_shared_index_expire_date(void)
 {
-   static unsigned long shared_index_expire_date;
+   static time_t shared_index_expire_date;
static int shared_index_expire_date_prepared;
 
if (!shared_index_expire_date_prepared) {
@@ -2643,7 +2643,7 @@ static unsigned long get_shared_index_expire_date(void)
 static int should_delete_shared_index(const char *shared_index_path)
 {
struct stat st;
-   unsigned long expiration;
+   time_t expiration;
 
/* Check timestamp */
expiration = get_shared_index_expire_date();
-- 
2.19.1.856.g8858448bb



[PATCH 2/2] read-cache: use time_t instead of unsigned long

2018-11-12 Thread Carlo Marcelo Arenas Belón
There are still some more possible improvements around this code but
they are orthogonal to this change :

* migrate to approxidate_careful or parse_expiry_date
* maybe make sure only approxidate are used for expiration

Changes in v2:
* improved commit message as suggested by Eric
* failsafe against time_t truncation as suggested by Junio

-- >8 --
Subject: [PATCH v2 2/2] read-cache: use time specific types instead of
 unsigned long

b968372279 ("read-cache: unlink old sharedindex files", 2017-03-06)
introduced get_shared_index_expire_date using unsigned long to track
the modification times of a shared index.

dddbad728c ("timestamp_t: a new data type for timestamps", 2017-04-26)
shows why that might be problematic so move to timestamp_t/time_t.

if time_t can't represent a valid time keep the indexes for failsafe

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 read-cache.c | 12 ++--
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/read-cache.c b/read-cache.c
index 7b1354d759..7d322f11c8 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2625,9 +2625,9 @@ static int write_split_index(struct index_state *istate,
 
 static const char *shared_index_expire = "2.weeks.ago";
 
-static unsigned long get_shared_index_expire_date(void)
+static timestamp_t get_shared_index_expire_date(void)
 {
-   static unsigned long shared_index_expire_date;
+   static timestamp_t shared_index_expire_date;
static int shared_index_expire_date_prepared;
 
if (!shared_index_expire_date_prepared) {
@@ -2643,12 +2643,12 @@ static unsigned long get_shared_index_expire_date(void)
 static int should_delete_shared_index(const char *shared_index_path)
 {
struct stat st;
-   unsigned long expiration;
+   time_t expiration;
+   timestamp_t t = get_shared_index_expire_date();
 
-   /* Check timestamp */
-   expiration = get_shared_index_expire_date();
-   if (!expiration)
+   if (!t || date_overflows(t))
return 0;
+   expiration = t;
if (stat(shared_index_path, &st))
return error_errno(_("could not stat '%s'"), shared_index_path);
if (st.st_mtime > expiration)
-- 
2.19.1.856.g8858448bb



[PATCH v5] clone: report duplicate entries on case-insensitive filesystems

2018-11-19 Thread Carlo Marcelo Arenas Belón
While I don't have an HFS+ volume to test, I suspect this patch should be
needed for both, even if I have to say thay even the broken output was
better than the current state.

Travis seems to be using a case sensitive filesystem so wouldn't catch this.

Was windows/cygwin tested?

Carlo
-- >8 --
Subject: [PATCH] entry: fix t5061 on macOS

b878579ae7 ("clone: report duplicate entries on case-insensitive filesystems",
2018-08-17) was tested on Linux with an excemption for Windows that needs
to be expanded for macOS (using APFS), which then would show :

$ git clone git://git.kernel.org/pub/scm/docs/man-pages/man-pages.git
warning: the following paths have collided (e.g. case-sensitive paths
on a case-insensitive filesystem) and only one from the same
colliding group is in the working tree:

  'man2/_Exit.2'
  'man2/_exit.2'
  'man3/NAN.3'
  'man3/nan.3'

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 entry.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/entry.c b/entry.c
index 5d136c5d55..3845f570f7 100644
--- a/entry.c
+++ b/entry.c
@@ -404,7 +404,7 @@ static void mark_colliding_entries(const struct checkout 
*state,
 {
int i, trust_ino = check_stat;
 
-#if defined(GIT_WINDOWS_NATIVE)
+#if defined(GIT_WINDOWS_NATIVE) || defined(__APPLE__)
trust_ino = 0;
 #endif
 
-- 
2.20.0.rc0



[PATCH] t5562: skip if NO_CURL is enabled

2018-11-19 Thread Carlo Marcelo Arenas Belón
6c213e863a ("http-backend: respect CONTENT_LENGTH for receive-pack", 2018-07-27)
introduced all tests but without a check for CURL support from git.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 t/t5562-http-backend-content-length.sh | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/t/t5562-http-backend-content-length.sh 
b/t/t5562-http-backend-content-length.sh
index b24d8b05a4..7594899471 100755
--- a/t/t5562-http-backend-content-length.sh
+++ b/t/t5562-http-backend-content-length.sh
@@ -3,6 +3,12 @@
 test_description='test git-http-backend respects CONTENT_LENGTH'
 . ./test-lib.sh
 
+if test -n "$NO_CURL"
+then
+   skip_all='skipping test, git built without http support'
+   test_done
+fi
+
 test_lazy_prereq GZIP 'gzip --version'
 
 verify_http_result() {
-- 
2.20.0.rc0



[PATCH v1 1/1] t5601-99: Enable colliding file detection for MINGW

2018-11-22 Thread Carlo Marcelo Arenas Belón
Which FS was this tested on?, is Git LFS I keep hearing about also considered
a "filesystem" for git?

Could you also test with the following applied on top?

Carlo
-- >8 --
Subject: [PATCH] entry: remove windows fallback to inode checking

this test is really FS specific, so is better to avoid any compiled
assumptions about the platform and let the user drive the fallback
through core.checkStat instead

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 entry.c | 4 
 1 file changed, 4 deletions(-)

diff --git a/entry.c b/entry.c
index 0a3c451f5f..5ae74856e6 100644
--- a/entry.c
+++ b/entry.c
@@ -404,10 +404,6 @@ static void mark_colliding_entries(const struct checkout 
*state,
 {
int i, trust_ino = check_stat;
 
-#if defined(GIT_WINDOWS_NATIVE) || defined(__CYGWIN__)
-   trust_ino = 0;
-#endif
-
ce->ce_flags |= CE_MATCHED;
 
for (i = 0; i < state->istate->cache_nr; i++) {
-- 
2.20.0.rc1



[RFC PATCH 1/7] Documentation: update INSTALL for NetBSD

2018-11-25 Thread Carlo Marcelo Arenas Belón
NetBSD added a BSD licensed reimplementation of GNU libintl to
its base at least since release 4.0 (mid 2012) and git can be
configured to build with it.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 INSTALL | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/INSTALL b/INSTALL
index c39006e8e7..100718989f 100644
--- a/INSTALL
+++ b/INSTALL
@@ -154,11 +154,11 @@ Issues of note:
  git-gui, you can use NO_TCLTK.
 
- A gettext library is used by default for localizing Git. The
- primary target is GNU libintl, but the Solaris gettext
- implementation also works.
+ primary target is GNU libintl, but the Solaris and NetBSD gettext
+ implementations also work.
 
  We need a gettext.h on the system for C code, gettext.sh (or
- Solaris gettext(1)) for shell scripts, and libintl-perl for Perl
+ gettext(1) utility) for shell scripts, and libintl-perl for Perl
  programs.
 
  Set NO_GETTEXT to disable localization support and make Git only
-- 
2.20.0.rc1



[RFC PATCH 0/7] test: NetBSD support

2018-11-25 Thread Carlo Marcelo Arenas Belón
Likely still missing changes as it only completes a run with a minimal
number of dependencies but open for feedback

Requires pkgsrc packages for gmake, perl, bash and curl and completes a run

  $ gmake SHELL_PATH=/usr/pkg/bin/bash NO_PYTHON=1 CURL_DIR=/usr/pkg test

Carlo Marcelo Arenas Belón (7):
  Documentation: update INSTALL for NetBSD
  t0301: remove trailing / for dir creation
  config.mak.uname: NetBSD uses BSD semantics with fread for directories
  config.mak.uname: NetBSD uses old iconv interface
  test-lib: use pkgsrc provided unzip for NetBSD
  t5004: use GNU tar to avoid known issues with BSD tar
  config.mak.uname: use pkgsrc perl for NetBSD

 INSTALL | 6 +++---
 config.mak.uname| 3 +++
 t/t0301-credential-cache.sh | 8 
 t/t5004-archive-corner-cases.sh | 2 ++
 t/test-lib.sh   | 1 +
 5 files changed, 13 insertions(+), 7 deletions(-)

-- 
2.20.0.rc1



[RFC PATCH 2/7] t0301: remove trailing / for dir creation

2018-11-25 Thread Carlo Marcelo Arenas Belón
the semantics of how mkdir -p should work, specially when using -m are
not standard and in this case NetBSD will assume that the permision
should not be changed, breaking the test

-p is technically not needed either, but will be cleared in a future
patch eventhough it could be considered an alternative fix

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 t/t0301-credential-cache.sh | 8 
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/t/t0301-credential-cache.sh b/t/t0301-credential-cache.sh
index fd92533acf..9529c612af 100755
--- a/t/t0301-credential-cache.sh
+++ b/t/t0301-credential-cache.sh
@@ -77,9 +77,9 @@ test_expect_success "use custom XDG_CACHE_HOME even if xdg 
socket exists" '
 test_expect_success 'use user socket if user directory exists' '
test_when_finished "
git credential-cache exit &&
-   rmdir \"\$HOME/.git-credential-cache/\"
+   rmdir \"\$HOME/.git-credential-cache\"
" &&
-   mkdir -p -m 700 "$HOME/.git-credential-cache/" &&
+   mkdir -p -m 700 "$HOME/.git-credential-cache" &&
check approve cache <<-\EOF &&
protocol=https
host=example.com
@@ -92,10 +92,10 @@ test_expect_success 'use user socket if user directory 
exists' '
 test_expect_success SYMLINKS 'use user socket if user directory is a symlink 
to a directory' '
test_when_finished "
git credential-cache exit &&
-   rmdir \"\$HOME/dir/\" &&
+   rmdir \"\$HOME/dir\" &&
rm \"\$HOME/.git-credential-cache\"
" &&
-   mkdir -p -m 700 "$HOME/dir/" &&
+   mkdir -p -m 700 "$HOME/dir" &&
ln -s "$HOME/dir" "$HOME/.git-credential-cache" &&
check approve cache <<-\EOF &&
protocol=https
-- 
2.20.0.rc1



[RFC PATCH 4/7] config.mak.uname: NetBSD uses old iconv interface

2018-11-25 Thread Carlo Marcelo Arenas Belón
prevents the following warning :

utf8.c: In function 'reencode_string_iconv':
utf8.c:486:28: warning: passing argument 2 of 'iconv' from incompatible pointer 
type [-Wincompatible-pointer-types]
   size_t cnt = iconv(conv, &cp, &insz, &outpos, &outsz);
^
In file included from git-compat-util.h:275:0,
 from utf8.c:1:
/usr/include/iconv.h:46:8: note: expected 'const char ** restrict' but argument 
is of type 'char **'
 size_t iconv(iconv_t, const char ** __restrict,
^

it is set by optional configure at least since NetBSD 6.0

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 config.mak.uname | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config.mak.uname b/config.mak.uname
index 36c973c7e6..59ce03819b 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -246,6 +246,7 @@ ifeq ($(uname_S),NetBSD)
ifeq ($(shell expr "$(uname_R)" : '[01]\.'),2)
NEEDS_LIBICONV = YesPlease
endif
+   OLD_ICONV = UnfortunatelyYes
BASIC_CFLAGS += -I/usr/pkg/include
BASIC_LDFLAGS += -L/usr/pkg/lib $(CC_LD_DYNPATH)/usr/pkg/lib
USE_ST_TIMESPEC = YesPlease
-- 
2.20.0.rc1



[RFC PATCH 3/7] config.mak.uname: NetBSD uses BSD semantics with fread for directories

2018-11-25 Thread Carlo Marcelo Arenas Belón
this "fixes" test 23 (proper error on directory "files") from t1308

other BSD (OpenBSD, MirBSD) likely also affected but they will be
fixed in a different series

the optional 'configure' sets this automatically and is probably what
most users from this platform had been doing as a workaround

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 config.mak.uname | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config.mak.uname b/config.mak.uname
index 3ee7da0e23..36c973c7e6 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -253,6 +253,7 @@ ifeq ($(uname_S),NetBSD)
HAVE_BSD_SYSCTL = YesPlease
HAVE_BSD_KERN_PROC_SYSCTL = YesPlease
PROCFS_EXECUTABLE_PATH = /proc/curproc/exe
+   FREAD_READS_DIRECTORIES = UnfortunatelyYes
 endif
 ifeq ($(uname_S),AIX)
DEFAULT_PAGER = more
-- 
2.20.0.rc1



[RFC PATCH 6/7] t5004: use GNU tar to avoid known issues with BSD tar

2018-11-25 Thread Carlo Marcelo Arenas Belón
56ee96572a ("t5004: resurrect original empty tar archive test", 2013-05-09)
added a test to try to detect and workaround issues with the standard tar
from BSD, but at least in NetBSD would be better to instead require GNU tar
which is available from pkgsrc

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 t/t5004-archive-corner-cases.sh | 2 ++
 1 file changed, 2 insertions(+)

diff --git a/t/t5004-archive-corner-cases.sh b/t/t5004-archive-corner-cases.sh
index ced44355ca..baafc553f8 100755
--- a/t/t5004-archive-corner-cases.sh
+++ b/t/t5004-archive-corner-cases.sh
@@ -31,6 +31,8 @@ test_lazy_prereq UNZIP_ZIP64_SUPPORT '
"$GIT_UNZIP" -v | grep ZIP64_SUPPORT
 '
 
+test $uname_s = NetBSD && TAR="gtar"
+
 # bsdtar/libarchive versions before 3.1.3 consider a tar file with a
 # global pax header that is not followed by a file record as corrupt.
 if "$TAR" tf "$TEST_DIRECTORY"/t5004/empty-with-pax-header.tar >/dev/null 2>&1
-- 
2.20.0.rc1



[RFC PATCH 5/7] test-lib: use pkgsrc provided unzip for NetBSD

2018-11-25 Thread Carlo Marcelo Arenas Belón
d98b2c5fce ("test-lib: on FreeBSD, look for unzip(1) in /usr/local/bin/",
2016-07-21) added an exception to the test suite for FreeBSD because the
tests assume functionality not provided by its base unzip tool.

NetBSD shares that limitation and provides a package that could be used
instead so all tests from t5003 succeed

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 t/test-lib.sh | 1 +
 1 file changed, 1 insertion(+)

diff --git a/t/test-lib.sh b/t/test-lib.sh
index 6c6c0af7a1..2acb35f277 100644
--- a/t/test-lib.sh
+++ b/t/test-lib.sh
@@ -1244,6 +1244,7 @@ test_lazy_prereq SANITY '
 '
 
 test FreeBSD != $uname_s || GIT_UNZIP=${GIT_UNZIP:-/usr/local/bin/unzip}
+test $uname_s = NetBSD && GIT_UNZIP=${GIT_UNZIP:-/usr/pkg/bin/unzip}
 GIT_UNZIP=${GIT_UNZIP:-unzip}
 test_lazy_prereq UNZIP '
"$GIT_UNZIP" -v
-- 
2.20.0.rc1



[RFC PATCH 7/7] config.mak.uname: use pkgsrc perl for NetBSD

2018-11-25 Thread Carlo Marcelo Arenas Belón
otherwise will default to /usr/bin/perl which wouldn't normally exist

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 config.mak.uname | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config.mak.uname b/config.mak.uname
index 59ce03819b..d2edb723f4 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -249,6 +249,7 @@ ifeq ($(uname_S),NetBSD)
OLD_ICONV = UnfortunatelyYes
BASIC_CFLAGS += -I/usr/pkg/include
BASIC_LDFLAGS += -L/usr/pkg/lib $(CC_LD_DYNPATH)/usr/pkg/lib
+   PERL_PATH = /usr/pkg/bin/perl
USE_ST_TIMESPEC = YesPlease
HAVE_PATHS_H = YesPlease
HAVE_BSD_SYSCTL = YesPlease
-- 
2.20.0.rc1



[PATCH] config.mak.dev: enable -Wpedantic in clang

2018-11-27 Thread Carlo Marcelo Arenas Belón
DEVOPTS=pedantic adds -pedantic to the compiler flags, but misses on some
diagnostics when using clang, and that are only enabled with -Wpedantic

46c0eb5843 ("files-backend.c: fix build error on Solaris", 2018-11-25)
fixes an issue that was visible also with gcc but not clang so correct
that with the hope that in the future CI could be used for early detection
of similar issues

-Wpedantic is only enabled for clang 10 or higher (only available in macOS
with latest Xcode) but this restriction should be relaxed further as more
environments are tested

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 config.mak.dev | 11 +++
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/config.mak.dev b/config.mak.dev
index bbeeff44fe..ad25beacd8 100644
--- a/config.mak.dev
+++ b/config.mak.dev
@@ -1,8 +1,15 @@
+ifndef COMPILER_FEATURES
+COMPILER_FEATURES := $(shell ./detect-compiler $(CC))
+endif
+
 ifeq ($(filter no-error,$(DEVOPTS)),)
 CFLAGS += -Werror
 endif
 ifneq ($(filter pedantic,$(DEVOPTS)),)
 CFLAGS += -pedantic
+ifneq ($(filter clang10,$(COMPILER_FEATURES)),)
+CFLAGS += -Wpedantic
+endif
 # don't warn for each N_ use
 CFLAGS += -DUSE_PARENS_AROUND_GETTEXT_N=0
 endif
@@ -16,10 +23,6 @@ CFLAGS += -Wstrict-prototypes
 CFLAGS += -Wunused
 CFLAGS += -Wvla
 
-ifndef COMPILER_FEATURES
-COMPILER_FEATURES := $(shell ./detect-compiler $(CC))
-endif
-
 ifneq ($(filter clang4,$(COMPILER_FEATURES)),)
 CFLAGS += -Wtautological-constant-out-of-range-compare
 endif
-- 
2.20.0.rc1



[PATCH] t6036: avoid "cp -a"

2018-11-30 Thread Carlo Marcelo Arenas Belón
b8cd1bb713 ("t6036, t6043: increase code coverage for file collision handling", 
2018-11-07) uses this GNU extension that is not available in a POSIX complaint
cp; use cp -R instead

Signed-off-by: Carlo Marcelo Arenas Belón 
---
to be applied on top of en/merge-path-collision for next

 t/t6036-recursive-corner-cases.sh | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/t/t6036-recursive-corner-cases.sh 
b/t/t6036-recursive-corner-cases.sh
index b7488b00c0..fdb120d0dc 100755
--- a/t/t6036-recursive-corner-cases.sh
+++ b/t/t6036-recursive-corner-cases.sh
@@ -1631,7 +1631,7 @@ test_expect_success 'check nested conflicts' '
 
# Compare m to expected contents
>empty &&
-   cp -a m_stage_2 expected_final_m &&
+   cp -R m_stage_2 expected_final_m &&
test_must_fail git merge-file --diff3 \
-L "HEAD" \
-L "merged common ancestors"  \
-- 
2.20.0.rc1.6.ga1598010f



Re: [PATCH] t6036: avoid "cp -a"

2018-12-01 Thread Carlo Marcelo Arenas Belón
Thanks both. Agree with Junio it would be better if squashed; apologize
for not catching it earlier, but the following might help to make it
visible for anyone that care to run the linter:

  $ make test-lint-shell-syntax

Carlo
-- >8 --
From: =?UTF-8?q?Carlo=20Marcelo=20Arenas=20Bel=C3=B3n?= 
Subject: [PATCH] tests: add lint for non portable cp -a

cp -a, while a common flag isn't in POSIX and will therefore fail
on systems that don't have GNUish tools (like OpenBSD, AIX or Solaris)

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 t/check-non-portable-shell.pl | 1 +
 1 file changed, 1 insertion(+)

diff --git a/t/check-non-portable-shell.pl b/t/check-non-portable-shell.pl
index b45bdac688..8037eef777 100755
--- a/t/check-non-portable-shell.pl
+++ b/t/check-non-portable-shell.pl
@@ -35,6 +35,7 @@ sub err {
chomp;
}
 
+   /\bcp\s+-a/ and err 'cp -a is not portable';
/\bsed\s+-i/ and err 'sed -i is not portable';
/\becho\s+-[neE]/ and err 'echo with option is not portable (use 
printf)';
/^\s*declare\s+/ and err 'arrays/declare not portable';
-- 
2.20.0.rc2



[PATCH] t5004: avoid using tar for empty packages

2018-12-01 Thread Carlo Marcelo Arenas Belón
ea2d20d4c2 ("t5004: avoid using tar for checking emptiness of archive",
2013-05-09), introduced a fake empty tar archive to allow for portable
tests of emptiness without having to invoke tar

4318094047 ("archive: don't add empty directories to archives", 2017-09-13)
changed the expected result for its tests from one containing an empty
directory to a plain empty archive but the portable test wasn't updated
resulting on them failing again in (at least) NetBSD and OpenBSD

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 t/t5004-archive-corner-cases.sh | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/t/t5004-archive-corner-cases.sh b/t/t5004-archive-corner-cases.sh
index ced44355ca..271eb5a1fd 100755
--- a/t/t5004-archive-corner-cases.sh
+++ b/t/t5004-archive-corner-cases.sh
@@ -3,8 +3,12 @@
 test_description='test corner cases of git-archive'
 . ./test-lib.sh
 
-test_expect_success 'create commit with empty tree' '
-   git commit --allow-empty -m foo
+# the 10knuls.tar file is used to test for an empty git generated tar
+# without having to invoke tar because an otherwise valid empty GNU tar
+# will be considered broken by {Open,Net}BSD tar
+test_expect_success 'create commit with empty tree and fake empty tar' '
+   git commit --allow-empty -m foo &&
+   perl -e "print \"\\0\" x 10240" >10knuls.tar
 '
 
 # Make a dir and clean it up afterwards
@@ -47,7 +51,6 @@ test_expect_success HEADER_ONLY_TAR_OK 'tar archive of commit 
with empty tree' '
 
 test_expect_success 'tar archive of empty tree is empty' '
git archive --format=tar HEAD: >empty.tar &&
-   perl -e "print \"\\0\" x 10240" >10knuls.tar &&
test_cmp_bin 10knuls.tar empty.tar
 '
 
@@ -106,16 +109,12 @@ test_expect_success 'create a commit with an empty 
subtree' '
 
 test_expect_success 'archive empty subtree with no pathspec' '
git archive --format=tar $root_tree >subtree-all.tar &&
-   make_dir extract &&
-   "$TAR" xf subtree-all.tar -C extract &&
-   check_dir extract
+   test_cmp_bin 10knuls.tar subtree-all.tar
 '
 
 test_expect_success 'archive empty subtree by direct pathspec' '
git archive --format=tar $root_tree -- sub >subtree-path.tar &&
-   make_dir extract &&
-   "$TAR" xf subtree-path.tar -C extract &&
-   check_dir extract
+   test_cmp_bin 10knuls.tar subtree-path.tar
 '
 
 ZIPINFO=zipinfo
-- 
2.20.0.rc2



[PATCH] t5004: avoid using tar for empty packages

2018-12-01 Thread Carlo Marcelo Arenas Belón
ea2d20d4c2 ("t5004: avoid using tar for checking emptiness of archive",
2013-05-09), introduced a fake empty tar archive to allow for portable
tests of emptiness without having to invoke tar

4318094047 ("archive: don't add empty directories to archives", 2017-09-13)
changed the expected result for its tests from one containing an empty
directory to a plain empty archive but the portable test wasn't updated
resulting on them failing again in (at least) NetBSD and OpenBSD

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 t/t5004-archive-corner-cases.sh | 17 -
 1 file changed, 8 insertions(+), 9 deletions(-)

diff --git a/t/t5004-archive-corner-cases.sh b/t/t5004-archive-corner-cases.sh
index ced44355ca..271eb5a1fd 100755
--- a/t/t5004-archive-corner-cases.sh
+++ b/t/t5004-archive-corner-cases.sh
@@ -3,8 +3,12 @@
 test_description='test corner cases of git-archive'
 . ./test-lib.sh
 
-test_expect_success 'create commit with empty tree' '
-   git commit --allow-empty -m foo
+# the 10knuls.tar file is used to test for an empty git generated tar
+# without having to invoke tar because an otherwise valid empty GNU tar
+# will be considered broken by {Open,Net}BSD tar
+test_expect_success 'create commit with empty tree and fake empty tar' '
+   git commit --allow-empty -m foo &&
+   perl -e "print \"\\0\" x 10240" >10knuls.tar
 '
 
 # Make a dir and clean it up afterwards
@@ -47,7 +51,6 @@ test_expect_success HEADER_ONLY_TAR_OK 'tar archive of commit 
with empty tree' '
 
 test_expect_success 'tar archive of empty tree is empty' '
git archive --format=tar HEAD: >empty.tar &&
-   perl -e "print \"\\0\" x 10240" >10knuls.tar &&
test_cmp_bin 10knuls.tar empty.tar
 '
 
@@ -106,16 +109,12 @@ test_expect_success 'create a commit with an empty 
subtree' '
 
 test_expect_success 'archive empty subtree with no pathspec' '
git archive --format=tar $root_tree >subtree-all.tar &&
-   make_dir extract &&
-   "$TAR" xf subtree-all.tar -C extract &&
-   check_dir extract
+   test_cmp_bin 10knuls.tar subtree-all.tar
 '
 
 test_expect_success 'archive empty subtree by direct pathspec' '
git archive --format=tar $root_tree -- sub >subtree-path.tar &&
-   make_dir extract &&
-   "$TAR" xf subtree-path.tar -C extract &&
-   check_dir extract
+   test_cmp_bin 10knuls.tar subtree-path.tar
 '
 
 ZIPINFO=zipinfo
-- 
2.20.0.rc2



[PATCH 1/2] config.mak.uname: OpenBSD uses BSD semantics with fread for directories

2018-12-01 Thread Carlo Marcelo Arenas Belón
this "fixes" test 23 (proper error on directory "files") from t1308

MirBSD likely also affected but this was only tested with OpenBSD and
therefore this specific change only affects that platform

the optional 'configure' sets this automatically (tested with 6.1 to 6.4)
but considering this is a legacy feature it is likely that it affected
all old versions and is probably what most users had been using as a
workaround

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 config.mak.uname | 1 +
 1 file changed, 1 insertion(+)

diff --git a/config.mak.uname b/config.mak.uname
index 3ee7da0e23..378ca0a582 100644
--- a/config.mak.uname
+++ b/config.mak.uname
@@ -233,6 +233,7 @@ ifeq ($(uname_S),OpenBSD)
HAVE_BSD_SYSCTL = YesPlease
HAVE_BSD_KERN_PROC_SYSCTL = YesPlease
PROCFS_EXECUTABLE_PATH = /proc/curproc/file
+   FREAD_READS_DIRECTORIES = UnfortunatelyYes
 endif
 ifeq ($(uname_S),MirBSD)
NO_STRCASESTR = YesPlease
-- 
2.20.0.rc2



[PATCH] unpack-trees: avoid dead store for struct progress

2018-10-18 Thread Carlo Marcelo Arenas Belón
it is unconditionally initialized a few lines below

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 unpack-trees.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/unpack-trees.c b/unpack-trees.c
index f25089b878..88dc9a615e 100644
--- a/unpack-trees.c
+++ b/unpack-trees.c
@@ -340,7 +340,7 @@ static int check_updates(struct unpack_trees_options *o)
 {
unsigned cnt = 0;
int errs = 0;
-   struct progress *progress = NULL;
+   struct progress *progress;
struct index_state *index = &o->result;
struct checkout state = CHECKOUT_INIT;
int i;
-- 
2.19.1



[PATCH] multi-pack-index: avoid dead store for struct progress

2018-10-18 Thread Carlo Marcelo Arenas Belón
it is initialized unconditionally by a call to start_progress
below.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 midx.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/midx.c b/midx.c
index ea2f3ffe2e..4fac0cd08a 100644
--- a/midx.c
+++ b/midx.c
@@ -941,7 +941,7 @@ static void midx_report(const char *fmt, ...)
 int verify_midx_file(const char *object_dir)
 {
uint32_t i;
-   struct progress *progress = NULL;
+   struct progress *progress;
struct multi_pack_index *m = load_multi_pack_index(object_dir, 1);
verify_midx_error = 0;
 
-- 
2.19.1



[PATCH] builtin/receive-pack: dead initializer for retval in check_nonce

2018-10-20 Thread Carlo Marcelo Arenas Belón
NONCE_BAD is explicitly set when needed with the fallback
instead as NONCE_SLOP

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 builtin/receive-pack.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/builtin/receive-pack.c b/builtin/receive-pack.c
index 95740f4f0e..ecce3d4043 100644
--- a/builtin/receive-pack.c
+++ b/builtin/receive-pack.c
@@ -507,7 +507,7 @@ static const char *check_nonce(const char *buf, size_t len)
char *nonce = find_header(buf, len, "nonce", NULL);
timestamp_t stamp, ostamp;
char *bohmac, *expect = NULL;
-   const char *retval = NONCE_BAD;
+   const char *retval;
 
if (!nonce) {
retval = NONCE_MISSING;
-- 
2.19.1



[PATCH] read-cache: use of memory after it is freed

2018-10-20 Thread Carlo Marcelo Arenas Belón
introduced with c46c406ae1e (trace.h: support nested performance tracing)
on Aug 18, 2018 but not affecting maint

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 read-cache.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/read-cache.c b/read-cache.c
index 1df5c16dbc..78f47d2f50 100644
--- a/read-cache.c
+++ b/read-cache.c
@@ -2297,8 +2297,8 @@ int read_index_from(struct index_state *istate, const 
char *path,
freshen_shared_index(base_path, 0);
merge_base_index(istate);
post_read_index_from(istate);
-   free(base_path);
trace_performance_leave("read cache %s", base_path);
+   free(base_path);
return ret;
 }
 
-- 
2.19.1



[PATCH] cc5e1bf992 (gettext: avoid initialization if the locale dir is not present, 2018-04-21) changed the way the gettext initialization is done skipping most of it for performance reasons if the lo

2019-08-07 Thread Carlo Marcelo Arenas Belón
in environments where the build running wasn't installed and wasn't
using NO_GETTEXT the initialization of charset will be skipped, breaking
is_utf_locale()

Split the init function on two, so the initialization of charset could
be done before a decision to abort was made and therefore keeping most
of the performance improvement.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 gettext.c | 14 +++---
 1 file changed, 11 insertions(+), 3 deletions(-)

diff --git a/gettext.c b/gettext.c
index d4021d690c..3ecf456f74 100644
--- a/gettext.c
+++ b/gettext.c
@@ -69,7 +69,14 @@ static int test_vsnprintf(const char *fmt, ...)
return ret;
 }
 
-static void init_gettext_charset(const char *domain)
+static void init_gettext_charset(void)
+{
+   const char *current = setlocale(LC_CTYPE, "");
+   charset = locale_charset();
+   setlocale(LC_CTYPE, current);
+}
+
+static void bind_gettext_charset(const char *domain)
 {
/*
   This trick arranges for messages to be emitted in the user's
@@ -150,7 +157,7 @@ static void init_gettext_charset(const char *domain)
   2. E.g. "Content-Type: text/plain; charset=UTF-8\n" in po/is.po
*/
setlocale(LC_CTYPE, "");
-   charset = locale_charset();
+   /* charset was already initialized in init_gettext_charset() */
bind_textdomain_codeset(domain, charset);
/* the string is taken from v0.99.6~1 */
if (test_vsnprintf("%.*s", 13, "David_K\345gedal") < 0)
@@ -166,6 +173,7 @@ void git_setup_gettext(void)
podir = p = system_path(GIT_LOCALE_PATH);
 
use_gettext_poison(); /* getenv() reentrancy paranoia */
+   init_gettext_charset();
 
if (!is_directory(podir)) {
free(p);
@@ -175,7 +183,7 @@ void git_setup_gettext(void)
bindtextdomain("git", podir);
setlocale(LC_MESSAGES, "");
setlocale(LC_TIME, "");
-   init_gettext_charset("git");
+   bind_gettext_charset("git");
textdomain("git");
 
free(p);
-- 
2.23.0.rc1



[RFC PATCH v4 1/3] grep: make PCRE1 aware of custom allocator

2019-08-07 Thread Carlo Marcelo Arenas Belón
63e7e9d8b6 ("git-grep: Learn PCRE", 2011-05-09) didn't include a way
to override the system alocator, and so it is incompatible with
USE_NED_ALLOCATOR as reported by Dscho[1] (in similar code from PCRE2)

Make the minimum change possible to ensure this combination is supported
by extending grep_init to set the PCRE1 specific functions to the NED
versions (using the aliased names though) and therefore making sure all
alocations are done inside PCRE1 with the same allocator than the rest
of git.

This change might have performance impact (hopefully for the best) and
so will be good idea to test it in a platform where NED might have a
positive impact (ex: Windows 7)

[1] https://public-inbox.org/git/pull.306.git.gitgitgad...@gmail.com

Signed-off-by: Carlo Marcelo Arenas Belón 
Signed-off-by: Junio C Hamano 
---
 Makefile |  2 +-
 grep.c   | 10 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index bd246f2989..4b384f3759 100644
--- a/Makefile
+++ b/Makefile
@@ -1764,7 +1764,7 @@ ifdef NATIVE_CRLF
 endif
 
 ifdef USE_NED_ALLOCATOR
-   COMPAT_CFLAGS += -Icompat/nedmalloc
+   COMPAT_CFLAGS += -DUSE_NED_ALLOCATOR -Icompat/nedmalloc
COMPAT_OBJS += compat/nedmalloc/nedmalloc.o
OVERRIDE_STRDUP = YesPlease
 endif
diff --git a/grep.c b/grep.c
index cd952ef5d3..0154998695 100644
--- a/grep.c
+++ b/grep.c
@@ -150,12 +150,22 @@ int grep_config(const char *var, const char *value, void 
*cb)
  * Initialize one instance of grep_opt and copy the
  * default values from the template we read the configuration
  * information in an earlier call to git_config(grep_config).
+ *
+ * If using PCRE make sure that the library is configured
+ * to use the right allocator (ex: NED)
  */
 void grep_init(struct grep_opt *opt, struct repository *repo, const char 
*prefix)
 {
struct grep_opt *def = &grep_defaults;
int i;
 
+#ifdef USE_NED_ALLOCATOR
+#ifdef USE_LIBPCRE1
+   pcre_malloc = malloc;
+   pcre_free = free;
+#endif
+#endif
+
memset(opt, 0, sizeof(*opt));
opt->repo = repo;
opt->prefix = prefix;
-- 
2.23.0.rc1



[RFC PATCH v4 3/3] grep: avoid leak of chartables in PCRE2

2019-08-07 Thread Carlo Marcelo Arenas Belón
94da9193a6 ("grep: add support for PCRE v2", 2017-06-01) introduced
a small memory leak visible with valgrind in t7813.

Complete the creation of a PCRE2 specific variable that was missing from
the original change and free the generated table just like it is done
for PCRE1.

Signed-off-by: Carlo Marcelo Arenas Belón 
Signed-off-by: Junio C Hamano 
---
 grep.c | 7 ---
 grep.h | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/grep.c b/grep.c
index 8ee982e2e8..ccb6ab38a3 100644
--- a/grep.c
+++ b/grep.c
@@ -541,7 +541,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const 
struct grep_opt *opt
PCRE2_UCHAR errbuf[256];
PCRE2_SIZE erroffset;
int options = PCRE2_MULTILINE;
-   const uint8_t *character_tables = NULL;
int jitret;
int patinforet;
size_t jitsizearg;
@@ -557,9 +556,10 @@ static void compile_pcre2_pattern(struct grep_pat *p, 
const struct grep_opt *opt
if (!pcre2_global_context)
BUG("pcre2_global_context uninitialized");
 #endif
-   character_tables = 
pcre2_maketables(pcre2_global_context);
+   p->pcre2_tables = 
pcre2_maketables(pcre2_global_context);
p->pcre2_compile_context = 
pcre2_compile_context_create(NULL);
-   pcre2_set_character_tables(p->pcre2_compile_context, 
character_tables);
+   pcre2_set_character_tables(p->pcre2_compile_context,
+   p->pcre2_tables);
}
options |= PCRE2_CASELESS;
}
@@ -663,6 +663,7 @@ static void free_pcre2_pattern(struct grep_pat *p)
pcre2_match_data_free(p->pcre2_match_data);
pcre2_jit_stack_free(p->pcre2_jit_stack);
pcre2_match_context_free(p->pcre2_match_context);
+   free((void *)p->pcre2_tables);
 }
 #else /* !USE_LIBPCRE2 */
 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt 
*opt)
diff --git a/grep.h b/grep.h
index 526c2db9ef..533165c21a 100644
--- a/grep.h
+++ b/grep.h
@@ -96,6 +96,7 @@ struct grep_pat {
pcre2_compile_context *pcre2_compile_context;
pcre2_match_context *pcre2_match_context;
pcre2_jit_stack *pcre2_jit_stack;
+   const uint8_t *pcre2_tables;
uint32_t pcre2_jit_on;
kwset_t kws;
unsigned fixed:1;
-- 
2.23.0.rc1



[RFC PATCH v4 2/3] grep: make PCRE2 aware of custom allocator

2019-08-07 Thread Carlo Marcelo Arenas Belón
94da9193a6 (grep: add support for PCRE v2, 2017-06-01) didn't include
a way to override the system allocator, and so it is incompatible with
USE_NED_ALLOCATOR.  The problem was made visible when an attempt to
avoid a leak in a data structure that is created by the library was
passed to NED's free for disposal triggering a segfault in Windows.

PCRE2 requires the use of a general context to override the allocator
and therefore, there is a lot more code needed than in PCRE1, including
a couple of wrapper functions.

Extend the grep API with a "destructor" that could be called to cleanup
any objects that were created and used globally.

Update builtin/{grep,log} to use that new API, but any other future
users should make sure to have matching grep_init/grep_destroy calls
if they are using the pattern matching functionality (currently only
relevant when using both NED and PCRE2)

Move the logic to decide if a general context will be needed to an
earlier phase so it will only be done once per pattern (instead of
at least once per worker thread) avoiding then the need for locking.

This change does the minimum change required to hopefully solve the
crash, with the rest of the users of it added later.

Helped-by: René Scharfe 
Reported-by: Johannes Schindelin 
Signed-off-by: Carlo Marcelo Arenas Belón 
---
V4:
* use xmalloc instead as suggested by René and Junio
* "fix" for regression in t7816 as reported by René

 builtin/grep.c |  1 +
 builtin/log.c  |  1 +
 grep.c | 62 --
 grep.h |  1 +
 4 files changed, 58 insertions(+), 7 deletions(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 560051784e..e49c20df60 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -1145,5 +1145,6 @@ int cmd_grep(int argc, const char **argv, const char 
*prefix)
run_pager(&opt, prefix);
clear_pathspec(&pathspec);
free_grep_patterns(&opt);
+   grep_destroy();
return !hit;
 }
diff --git a/builtin/log.c b/builtin/log.c
index 1cf9e37736..139b8770e7 100644
--- a/builtin/log.c
+++ b/builtin/log.c
@@ -2146,6 +2146,7 @@ int cmd_cherry(int argc, const char **argv, const char 
*prefix)
list = list->next;
}
 
+   grep_destroy();
free_patch_ids(&ids);
return 0;
 }
diff --git a/grep.c b/grep.c
index 0154998695..8ee982e2e8 100644
--- a/grep.c
+++ b/grep.c
@@ -16,6 +16,22 @@ static int grep_source_is_binary(struct grep_source *gs,
 
 static struct grep_opt grep_defaults;
 
+#ifdef USE_LIBPCRE2
+static pcre2_general_context *pcre2_global_context;
+
+#ifdef USE_NED_ALLOCATOR
+static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
+{
+   return xmalloc(size); /* will use nedalloc underneath */
+}
+
+static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
+{
+   return free(pointer);
+}
+#endif
+#endif
+
 static const char *color_grep_slots[] = {
[GREP_COLOR_CONTEXT]= "context",
[GREP_COLOR_FILENAME]   = "filename",
@@ -153,6 +169,7 @@ int grep_config(const char *var, const char *value, void 
*cb)
  *
  * If using PCRE make sure that the library is configured
  * to use the right allocator (ex: NED)
+ * if any object is created it should be cleaned up in grep_destroy()
  */
 void grep_init(struct grep_opt *opt, struct repository *repo, const char 
*prefix)
 {
@@ -188,6 +205,13 @@ void grep_init(struct grep_opt *opt, struct repository 
*repo, const char *prefix
color_set(opt->colors[i], def->colors[i]);
 }
 
+void grep_destroy(void)
+{
+#ifdef USE_LIBPCRE2
+   pcre2_general_context_free(pcre2_global_context);
+#endif
+}
+
 static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, 
struct grep_opt *opt)
 {
/*
@@ -254,12 +278,29 @@ void grep_commit_pattern_type(enum grep_pattern_type 
pattern_type, struct grep_o
grep_set_pattern_type_option(GREP_PATTERN_TYPE_ERE, opt);
 }
 
-static struct grep_pat *create_grep_pat(const char *pat, size_t patlen,
+static struct grep_pat *create_grep_pat(struct grep_opt *opt,
+   const char *pat, size_t patlen,
const char *origin, int no,
enum grep_pat_token t,
enum grep_header_field field)
 {
struct grep_pat *p = xcalloc(1, sizeof(*p));
+
+#if defined(USE_LIBPCRE2) && defined(USE_NED_ALLOCATOR)
+   /* BUG: this is technically not needed if we will do UTF matching
+*  but UTF locale detection is currently broken  */
+   /* SMELL: opt shouldn't be needed at this level but it is used
+*here to keep the way we were detecting the need for
+*the chartable consistent and until it can be refactored
+*properly.  the NULL check is 

[RFC PATCH v4 0/3] grep: no leaks or crashes (windows testing needed)

2019-08-07 Thread Carlo Marcelo Arenas Belón
This series is a candidate reroll for cb/pcre2-chartables-leakfix, that
hopefully addresses the root cause of the problem reported by Dscho in
Windows, where the PCRE2 library wasn't aware of the custom allocator and
was returning a pointer created with the system malloc but passing it to
NED's free, resulting in a segfault.

The reason why it was triggered by the original leak fix is the layering
violation reported by René and that is exclusive to PCRE2 (hence why it
hasn't been reported with PCRE1).  Additional work might be available
in a future release of PCRE2 to address that as detailed in the upstream
bug[1] report.

Eitherway, since I am unable to replicate the original bug or take
performance numbers in a representative environment without Windows
this is only published as an RFC,

Changes since v3 (mostly in patch 2):

* git log also calls the "destructor" for grep API
* no more "bug" being triggered by `make test`, sorry René
* hopefully no more crashes in windows (I was expecting at most a BUG)

Future work (other than the needed refactoring explained in the
second patch) and adjacent bugs, includes:

* tracking more possible users of the grep API that might need to call
  grep_destroy()
* completely moving PCRE2 to use NED (as is done with PCRE1 and was
  proposed on the original patch[2] this is based on
* build on top of the new API so that other work could be shared
  (for example the chartables that started this whole mess)

or (hopefully not)

* ignore the original leak (maybe with an UNLEAK) as René suggested [3]
* discard this work and just use Dscho's fix (probably with some improvements)

Carlo Marcelo Arenas Belón (3):
  grep: make PCRE1 aware of custom allocator
  grep: make PCRE2 aware of custom allocator
  grep: avoid leak of chartables in PCRE2

 Makefile   |  2 +-
 builtin/grep.c |  1 +
 builtin/log.c  |  1 +
 grep.c | 77 --
 grep.h |  2 ++
 5 files changed, 73 insertions(+), 10 deletions(-)

[1] https://bugs.exim.org/show_bug.cgi?id=2429
[2] 
https://public-inbox.org/git/3397e6797f872aedd18c6d795f4976e1c579514b.1565005867.git.gitgitgad...@gmail.com/
[3] https://public-inbox.org/git/7ec60d57-9940-35f2-f7b5-c87d4dc7c...@web.de/

base-commit: 51cf315870bbb7254ddf06c84fe03b41bc48eebd
-- 
2.23.0.rc1


[RFC PATCH v5 0/3] grep: almost no more leaks, hopefully no crashes

2019-08-08 Thread Carlo Marcelo Arenas Belón
This series is a candidate reroll for cb/pcre2-chartables-leakfix, that
hopefully addresses the root cause of the problem reported by Dscho in
Windows, where the PCRE2 library wasn't aware of the custom allocator and
was returning a pointer created with the system malloc but passing it to
NED's free, resulting in a segfault.

The most likely reason why it was triggered by the original leak fix is
the layering violation reported by René and that is likely exclusive to PCRE2
(hence why it hasn't been reported with PCRE1). Additional work might be
available in a future release of PCRE2 to address that as detailed in an
upstream bug[1] report.

Changes since v4 (only in patch 2):

* git log change reverted, still not sure where it will fit better and worst
  case will leak a few bytes when -P is used.
  since the users of this API are doing it indirectly it might be problematic
  long term though, but luckily since it is most of the tine a NOOP and can
  be called multiple times might be ok to do it unconditionally
* slightly better looking code

Changes since v3 (mostly in patch 2):

* git log also calls the "destructor" for grep API
* no more "bug" being triggered by make test, sorry René
* hopefully no more crashes in windows (I was expecting at most a BUG)

Future work (other than the needed refactoring explained in the
second patch) and adjacent bugs, includes:

* tracking more possible users of the grep API that might need to call
  grep_destroy()
* completely moving PCRE2 to use NED (as is done with PCRE1 and was
  proposed on the original patch[2] this is based on
* build on top of the new API so that other work could be shared
  (for example the chartables that started this whole mess)

or (hopefully not)

* ignore the original leak (maybe with an UNLEAK) as René suggested [3]
* discard this work and just use Dscho's fix (with some improvements,
  like using xmalloc)

[1] https://bugs.exim.org/show_bug.cgi?id=2429
[2] 
https://public-inbox.org/git/3397e6797f872aedd18c6d795f4976e1c579514b.1565005867.git.gitgitgad...@gmail.com/
[3] https://public-inbox.org/git/7ec60d57-9940-35f2-f7b5-c87d4dc7c...@web.de/

Carlo Marcelo Arenas Belón (3):
  grep: make PCRE1 aware of custom allocator
  grep: make PCRE2 aware of custom allocator
  grep: avoid leak of chartables in PCRE2

 Makefile   |  2 +-
 builtin/grep.c |  1 +
 grep.c | 71 +++---
 grep.h |  2 ++
 4 files changed, 72 insertions(+), 4 deletions(-)

base-commit: 51cf315870bbb7254ddf06c84fe03b41bc48eebd
-- 
2.23.0.rc1


[RFC PATCH v5 2/3] grep: make PCRE2 aware of custom allocator

2019-08-08 Thread Carlo Marcelo Arenas Belón
94da9193a6 (grep: add support for PCRE v2, 2017-06-01) didn't include
a way to override the system allocator, and so it is incompatible with
USE_NED_ALLOCATOR.  The problem was made visible when an attempt to
avoid a leak in a data structure that is created by the library was
passed to NED's free for disposal triggering a segfault in Windows.

PCRE2 requires the use of a general context to override the allocator
and therefore, there is a lot more code needed than in PCRE1, including
a couple of wrapper functions.

Extend the grep API with a "destructor" that could be called to cleanup
any objects that were created and used globally.

Update builtin/grep to use that new API, but any other future
users should make sure to have matching grep_init/grep_destroy calls
if they are using the pattern matching functionality (currently only
relevant when using both NED and PCRE2)

Move the logic to decide if a general context will be needed to an
earlier phase so it will only be done once per pattern (instead of
at least once per worker thread) avoiding then the need for locking.

This change does the minimum change required to hopefully solve the
crash, with the rest of the users of it added later.

Helped-by: René Scharfe 
Reported-by: Johannes Schindelin 
Signed-off-by: Carlo Marcelo Arenas Belón 
---
 builtin/grep.c |  1 +
 grep.c | 56 +-
 grep.h |  1 +
 3 files changed, 57 insertions(+), 1 deletion(-)

diff --git a/builtin/grep.c b/builtin/grep.c
index 560051784e..e49c20df60 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -1145,5 +1145,6 @@ int cmd_grep(int argc, const char **argv, const char 
*prefix)
run_pager(&opt, prefix);
clear_pathspec(&pathspec);
free_grep_patterns(&opt);
+   grep_destroy();
return !hit;
 }
diff --git a/grep.c b/grep.c
index 0154998695..8e0b838db0 100644
--- a/grep.c
+++ b/grep.c
@@ -16,6 +16,44 @@ static int grep_source_is_binary(struct grep_source *gs,
 
 static struct grep_opt grep_defaults;
 
+#ifdef USE_LIBPCRE2
+static pcre2_general_context *pcre2_global_context;
+
+#ifdef USE_NED_ALLOCATOR
+static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
+{
+   return xmalloc(size); /* will use nedalloc underneath */
+}
+
+static void pcre2_free(void *pointer, MAYBE_UNUSED void *memory_data)
+{
+   return free(pointer);
+}
+
+/*
+ * BUG: this is technically not needed if we will do UTF matching
+ *  but UTF locale detection is currently broken
+ * TODO: has_non_ascii() doesn't support NUL in pattern
+ */
+void setup_pcre2_as_needed(struct grep_opt *opt, const char *pat)
+{
+   if (!pcre2_global_context && opt->ignore_case &&
+   has_non_ascii(pat))
+   pcre2_global_context = pcre2_general_context_create(
+   pcre2_malloc, pcre2_free, NULL);
+}
+
+static void cleanup_pcre2_as_needed(void)
+{
+   pcre2_general_context_free(pcre2_global_context);
+}
+
+#else
+#define setup_pcre2_as_needed(opt, pat)
+#define cleanup_pcre2_as_needed()
+#endif
+#endif
+
 static const char *color_grep_slots[] = {
[GREP_COLOR_CONTEXT]= "context",
[GREP_COLOR_FILENAME]   = "filename",
@@ -153,6 +191,7 @@ int grep_config(const char *var, const char *value, void 
*cb)
  *
  * If using PCRE make sure that the library is configured
  * to use the right allocator (ex: NED)
+ * if any object is created it should be cleaned up in grep_destroy()
  */
 void grep_init(struct grep_opt *opt, struct repository *repo, const char 
*prefix)
 {
@@ -188,6 +227,11 @@ void grep_init(struct grep_opt *opt, struct repository 
*repo, const char *prefix
color_set(opt->colors[i], def->colors[i]);
 }
 
+void grep_destroy(void)
+{
+   cleanup_pcre2_as_needed();
+}
+
 static void grep_set_pattern_type_option(enum grep_pattern_type pattern_type, 
struct grep_opt *opt)
 {
/*
@@ -326,6 +370,7 @@ void append_grep_pat(struct grep_opt *opt, const char *pat, 
size_t patlen,
 const char *origin, int no, enum grep_pat_token t)
 {
struct grep_pat *p = create_grep_pat(pat, patlen, origin, no, t, 0);
+   setup_pcre2_as_needed(opt, pat);
do_append_grep_pat(&opt->pattern_tail, p);
 }
 
@@ -507,9 +552,18 @@ static void compile_pcre2_pattern(struct grep_pat *p, 
const struct grep_opt *opt
 
p->pcre2_compile_context = NULL;
 
+   /*
+* pcre2_global_context is initialized in append_grep_pat()
+* with logic from setup_pcre2_as_needed() that mimics what
+* is used here and with the BUG() to protect from mismatches
+*/
if (opt->ignore_case) {
if (has_non_ascii(p->pattern)) {
-   character_tables = pcre2_maketables(NULL);
+#ifdef USE_NED_ALLOCATOR
+   

[RFC PATCH v5 1/3] grep: make PCRE1 aware of custom allocator

2019-08-08 Thread Carlo Marcelo Arenas Belón
63e7e9d8b6 ("git-grep: Learn PCRE", 2011-05-09) didn't include a way
to override the system alocator, and so it is incompatible with
USE_NED_ALLOCATOR as reported by Dscho[1] (in similar code from PCRE2)

Make the minimum change possible to ensure this combination is supported
by extending grep_init to set the PCRE1 specific functions to the NED
versions (using the aliased names though) and therefore making sure all
alocations are done inside PCRE1 with the same allocator than the rest
of git.

This change might have performance impact (hopefully for the best) and
so will be good idea to test it in a platform where NED might have a
positive impact (ex: Windows 7)

[1] https://public-inbox.org/git/pull.306.git.gitgitgad...@gmail.com

Signed-off-by: Carlo Marcelo Arenas Belón 
Signed-off-by: Junio C Hamano 
---
 Makefile |  2 +-
 grep.c   | 10 ++
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index bd246f2989..4b384f3759 100644
--- a/Makefile
+++ b/Makefile
@@ -1764,7 +1764,7 @@ ifdef NATIVE_CRLF
 endif
 
 ifdef USE_NED_ALLOCATOR
-   COMPAT_CFLAGS += -Icompat/nedmalloc
+   COMPAT_CFLAGS += -DUSE_NED_ALLOCATOR -Icompat/nedmalloc
COMPAT_OBJS += compat/nedmalloc/nedmalloc.o
OVERRIDE_STRDUP = YesPlease
 endif
diff --git a/grep.c b/grep.c
index cd952ef5d3..0154998695 100644
--- a/grep.c
+++ b/grep.c
@@ -150,12 +150,22 @@ int grep_config(const char *var, const char *value, void 
*cb)
  * Initialize one instance of grep_opt and copy the
  * default values from the template we read the configuration
  * information in an earlier call to git_config(grep_config).
+ *
+ * If using PCRE make sure that the library is configured
+ * to use the right allocator (ex: NED)
  */
 void grep_init(struct grep_opt *opt, struct repository *repo, const char 
*prefix)
 {
struct grep_opt *def = &grep_defaults;
int i;
 
+#ifdef USE_NED_ALLOCATOR
+#ifdef USE_LIBPCRE1
+   pcre_malloc = malloc;
+   pcre_free = free;
+#endif
+#endif
+
memset(opt, 0, sizeof(*opt));
opt->repo = repo;
opt->prefix = prefix;
-- 
2.23.0.rc1



[RFC PATCH v5 3/3] grep: avoid leak of chartables in PCRE2

2019-08-08 Thread Carlo Marcelo Arenas Belón
94da9193a6 ("grep: add support for PCRE v2", 2017-06-01) introduced
a small memory leak visible with valgrind in t7813.

Complete the creation of a PCRE2 specific variable that was missing from
the original change and free the generated table just like it is done
for PCRE1.

Signed-off-by: Carlo Marcelo Arenas Belón 
Signed-off-by: Junio C Hamano 
---
 grep.c | 7 ---
 grep.h | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/grep.c b/grep.c
index 8e0b838db0..146891e2bf 100644
--- a/grep.c
+++ b/grep.c
@@ -543,7 +543,6 @@ static void compile_pcre2_pattern(struct grep_pat *p, const 
struct grep_opt *opt
PCRE2_UCHAR errbuf[256];
PCRE2_SIZE erroffset;
int options = PCRE2_MULTILINE;
-   const uint8_t *character_tables = NULL;
int jitret;
int patinforet;
size_t jitsizearg;
@@ -563,9 +562,10 @@ static void compile_pcre2_pattern(struct grep_pat *p, 
const struct grep_opt *opt
if (!pcre2_global_context)
BUG("pcre2_global_context uninitialized");
 #endif
-   character_tables = 
pcre2_maketables(pcre2_global_context);
+   p->pcre2_tables = 
pcre2_maketables(pcre2_global_context);
p->pcre2_compile_context = 
pcre2_compile_context_create(NULL);
-   pcre2_set_character_tables(p->pcre2_compile_context, 
character_tables);
+   pcre2_set_character_tables(p->pcre2_compile_context,
+   p->pcre2_tables);
}
options |= PCRE2_CASELESS;
}
@@ -669,6 +669,7 @@ static void free_pcre2_pattern(struct grep_pat *p)
pcre2_match_data_free(p->pcre2_match_data);
pcre2_jit_stack_free(p->pcre2_jit_stack);
pcre2_match_context_free(p->pcre2_match_context);
+   free((void *)p->pcre2_tables);
 }
 #else /* !USE_LIBPCRE2 */
 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt 
*opt)
diff --git a/grep.h b/grep.h
index 526c2db9ef..533165c21a 100644
--- a/grep.h
+++ b/grep.h
@@ -96,6 +96,7 @@ struct grep_pat {
pcre2_compile_context *pcre2_compile_context;
pcre2_match_context *pcre2_match_context;
pcre2_jit_stack *pcre2_jit_stack;
+   const uint8_t *pcre2_tables;
uint32_t pcre2_jit_on;
kwset_t kws;
unsigned fixed:1;
-- 
2.23.0.rc1



[PATCH] SQUASH

2019-08-09 Thread Carlo Marcelo Arenas Belón
Make using a general context (that is only needed with NED) to depend
on NED being selected at compile time.

the compile_context could be also make conditional but it gets ugly
really fasts with #ifdef
---
 Makefile | 2 +-
 grep.c   | 4 
 2 files changed, 5 insertions(+), 1 deletion(-)

diff --git a/Makefile b/Makefile
index 8a7e235352..995e6c9351 100644
--- a/Makefile
+++ b/Makefile
@@ -1774,7 +1774,7 @@ ifdef NATIVE_CRLF
 endif
 
 ifdef USE_NED_ALLOCATOR
-   COMPAT_CFLAGS += -Icompat/nedmalloc
+   COMPAT_CFLAGS += -DUSE_NED_ALLOCATOR -Icompat/nedmalloc
COMPAT_OBJS += compat/nedmalloc/nedmalloc.o
OVERRIDE_STRDUP = YesPlease
 endif
diff --git a/grep.c b/grep.c
index 8255ec956e..233072ed80 100644
--- a/grep.c
+++ b/grep.c
@@ -482,6 +482,7 @@ static void free_pcre1_regexp(struct grep_pat *p)
 #endif /* !USE_LIBPCRE1 */
 
 #ifdef USE_LIBPCRE2
+#ifdef USE_NED_ALLOCATOR
 static void *pcre2_malloc(PCRE2_SIZE size, MAYBE_UNUSED void *memory_data)
 {
return xmalloc(size);
@@ -491,6 +492,7 @@ static void pcre2_free(void *pointer, MAYBE_UNUSED void 
*memory_data)
 {
free(pointer);
 }
+#endif
 
 static void compile_pcre2_pattern(struct grep_pat *p, const struct grep_opt 
*opt)
 {
@@ -505,7 +507,9 @@ static void compile_pcre2_pattern(struct grep_pat *p, const 
struct grep_opt *opt
 
assert(opt->pcre2);
 
+#ifdef USE_NED_ALLOCATOR
p->pcre2_general_context = pcre2_general_context_create(pcre2_malloc, 
pcre2_free, NULL);
+#endif
p->pcre2_compile_context = 
pcre2_compile_context_create(p->pcre2_general_context);
 
if (opt->ignore_case) {
-- 
2.23.0.rc2



[RFC PATCH] http: use xmalloc with cURL

2019-08-10 Thread Carlo Marcelo Arenas Belón
4a30976e28 ([PATCH] support older versions of libcurl, 2005-07-28) added
support for conditionally initializing cURL but when f0ed8226c9
(Add custom memory allocator to MinGW and MacOS builds, 2009-05-31) that
support wasn't updated to make sure cURL will use the same allocator than
git if compiled with USE_NED_ALLOCATOR=YesPlease (usually done in Windows)

tested in macOS 10.14.6 with the system provided cURL (7.54.0)
and latest (7.65.3) and while the API used should be added starting around
7.12.0 (mid 2014). couldn't get a release that old to build and therefore
the current mismatch is unlikely to be found while testing because of that.

cURL is very strict about its allocator being thread safe and so that might
be an issue to look for.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 http.h | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/http.h b/http.h
index b429f1cf04..59ec4cbd30 100644
--- a/http.h
+++ b/http.h
@@ -27,6 +27,9 @@
 #endif
 #if LIBCURL_VERSION_NUM < 0x070800
 #define curl_global_init(a) do { /* nothing */ } while (0)
+#else
+#define curl_global_init(a) curl_global_init_mem(a, xmalloc, free, \
+   xrealloc, xstrdup, xcalloc)
 #endif
 
 #if (LIBCURL_VERSION_NUM < 0x070c04) || (LIBCURL_VERSION_NUM == 0x071000)
-- 
2.23.0.rc2



[PATCH] http: use xmalloc with cURL

2019-08-15 Thread Carlo Marcelo Arenas Belón
f0ed8226c9 (Add custom memory allocator to MinGW and MacOS builds, 2009-05-31)
never told cURL about it.

Correct that by using the cURL initializer available since version 7.12 to
point to xmalloc and friends for consistency which then will pass the
allocation requests along when USE_NED_ALLOCATOR=YesPlease is used (most
likely in Windows)

Signed-off-by: Carlo Marcelo Arenas Belón 
---
This doesn't conflict with anything and was originally based on maint (so it
applies cleanly also to master and next), but is now rebased on top of
jk/drop-release-pack-memory so the final product wouldn't have any chance to
introduce problems (thanks Peff)

it has been built and tested in Windows through Azure Pipelines (thanks Dscho)
and shouldn't introduce any build problems even with ancient versions or cURL
(thanks Daniel) and while not strictly needed is a nice thing to have for
consistency (thanks Junio)

 http.h | 6 ++
 1 file changed, 6 insertions(+)

diff --git a/http.h b/http.h
index b429f1cf04..e5f075dcbf 100644
--- a/http.h
+++ b/http.h
@@ -22,9 +22,15 @@
 #define DEFAULT_MAX_REQUESTS 5
 #endif
 
+#if LIBCURL_VERSION_NUM >= 0x070c00
+#define curl_global_init(a) curl_global_init_mem(a, xmalloc, free, \
+   xrealloc, xstrdup, xcalloc)
+#endif
+
 #if LIBCURL_VERSION_NUM < 0x070704
 #define curl_global_cleanup() do { /* nothing */ } while (0)
 #endif
+
 #if LIBCURL_VERSION_NUM < 0x070800
 #define curl_global_init(a) do { /* nothing */ } while (0)
 #endif

base-commit: 9827d4c185e4da728f51cd77c54a38c9de62495f
-- 
2.23.0.rc2



[PATCH v2] http: use xmalloc with cURL

2019-08-15 Thread Carlo Marcelo Arenas Belón
f0ed8226c9 (Add custom memory allocator to MinGW and MacOS builds, 2009-05-31)
never told cURL about it.

Correct that by using the cURL initializer available since version 7.12 to
point to xmalloc and friends for consistency which then will pass the
allocation requests along when USE_NED_ALLOCATOR=YesPlease is used (most
likely in Windows)

Signed-off-by: Carlo Marcelo Arenas Belón 
---

Notes:
v2: keep all global_init ifdefs together (as suggested by Junio)

 http.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/http.h b/http.h
index b429f1cf04..20a2030c94 100644
--- a/http.h
+++ b/http.h
@@ -25,8 +25,12 @@
 #if LIBCURL_VERSION_NUM < 0x070704
 #define curl_global_cleanup() do { /* nothing */ } while (0)
 #endif
+
 #if LIBCURL_VERSION_NUM < 0x070800
 #define curl_global_init(a) do { /* nothing */ } while (0)
+#elseif LIBCURL_VERSION_NUM >= 0x070c00
+#define curl_global_init(a) curl_global_init_mem(a, xmalloc, free, \
+   xrealloc, xstrdup, xcalloc)
 #endif
 
 #if (LIBCURL_VERSION_NUM < 0x070c04) || (LIBCURL_VERSION_NUM == 0x071000)

base-commit: 9827d4c185e4da728f51cd77c54a38c9de62495f
-- 
2.23.0.rc2



[PATCH v3] http: use xmalloc with cURL

2019-08-15 Thread Carlo Marcelo Arenas Belón
f0ed8226c9 (Add custom memory allocator to MinGW and MacOS builds,
2009-05-31) never told cURL about it.

Correct that by using the cURL initializer available since version 7.12 to
point to xmalloc and friends for consistency which then will pass the
allocation requests along when USE_NED_ALLOCATOR=YesPlease is used (most
likely in Windows)

Signed-off-by: Carlo Marcelo Arenas Belón 
---
v3: proper use of #elif (Thanks Junio)
v2: keep all curl_global_init ifdefs together (as suggested by Junio)

 http.h | 4 
 1 file changed, 4 insertions(+)

diff --git a/http.h b/http.h
index b429f1cf04..20a2030c94 100644
--- a/http.h
+++ b/http.h
@@ -25,8 +25,12 @@
 #if LIBCURL_VERSION_NUM < 0x070704
 #define curl_global_cleanup() do { /* nothing */ } while (0)
 #endif
+
 #if LIBCURL_VERSION_NUM < 0x070800
 #define curl_global_init(a) do { /* nothing */ } while (0)
+#elif LIBCURL_VERSION_NUM >= 0x070c00
+#define curl_global_init(a) curl_global_init_mem(a, xmalloc, free, \
+   xrealloc, xstrdup, xcalloc)
 #endif
 
 #if (LIBCURL_VERSION_NUM < 0x070c04) || (LIBCURL_VERSION_NUM == 0x071000)

base-commit: 9827d4c185e4da728f51cd77c54a38c9de62495f
-- 
2.23.0.rc2


[PATCH 1/2] grep: make sure NO_LIBPCRE1_JIT disable JIT in PCRE1

2019-08-25 Thread Carlo Marcelo Arenas Belón
e87de7cab4 ("grep: un-break building with PCRE < 8.32", 2017-05-25)
added a restriction for JIT support that is no longer needed after
pcre_jit_exec() calls were removed.

Reorganize the definitions in grep.h so that JIT support could be
detected early and NO_LIBPCRE1_JIT could be used reliably to enforce
JIT doesn't get used.

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 Makefile | 9 ++---
 grep.h   | 4 +---
 2 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/Makefile b/Makefile
index f58bf14c7b..3f78ef942f 100644
--- a/Makefile
+++ b/Makefile
@@ -34,13 +34,8 @@ all::
 # library. Support for version 1 will likely be removed in some future
 # release of Git, as upstream has all but abandoned it.
 #
-# When using USE_LIBPCRE1, define NO_LIBPCRE1_JIT if the PCRE v1
-# library is compiled without --enable-jit. We will auto-detect
-# whether the version of the PCRE v1 library in use has JIT support at
-# all, but we unfortunately can't auto-detect whether JIT support
-# hasn't been compiled in in an otherwise JIT-supporting version. If
-# you have link-time errors about a missing `pcre_jit_exec` define
-# this, or recompile PCRE v1 with --enable-jit.
+# When using USE_LIBPCRE1, define NO_LIBPCRE1_JIT if you want to
+# disable JIT even if supported by your library.
 #
 # Define LIBPCREDIR=/foo/bar if your PCRE header and library files are
 # in /foo/bar/include and /foo/bar/lib directories. Which version of
diff --git a/grep.h b/grep.h
index c0c71eb4a9..1a044c501e 100644
--- a/grep.h
+++ b/grep.h
@@ -3,14 +3,12 @@
 #include "color.h"
 #ifdef USE_LIBPCRE1
 #include 
-#ifdef PCRE_CONFIG_JIT
-#if PCRE_MAJOR >= 8 && PCRE_MINOR >= 32
 #ifndef NO_LIBPCRE1_JIT
+#ifdef PCRE_CONFIG_JIT
 #define GIT_PCRE1_USE_JIT
 #define GIT_PCRE_STUDY_JIT_COMPILE PCRE_STUDY_JIT_COMPILE
 #endif
 #endif
-#endif
 #ifndef GIT_PCRE_STUDY_JIT_COMPILE
 #define GIT_PCRE_STUDY_JIT_COMPILE 0
 #endif
-- 
2.23.0



[PATCH 2/2] grep: refactor and simplify PCRE1 support

2019-08-25 Thread Carlo Marcelo Arenas Belón
The code used both a macro and a variable to keep track if JIT
support was desired and relied on the fact that a non JIT
enabled library will ignore a request for JIT compilation
(as defined by the second parameter of the call to pcre_study)

Cleanup the multiple levels of macros used and call pcre_study
with the right parameter after JIT support has been confirmed
and unless it was requested to be disabled with NO_LIBPCRE1_JIT

Signed-off-by: Carlo Marcelo Arenas Belón 
---
 grep.c | 16 ++--
 grep.h |  9 -
 2 files changed, 10 insertions(+), 15 deletions(-)

diff --git a/grep.c b/grep.c
index 76088784e3..05d31c2bcc 100644
--- a/grep.c
+++ b/grep.c
@@ -374,6 +374,7 @@ static void compile_pcre1_regexp(struct grep_pat *p, const 
struct grep_opt *opt)
const char *error;
int erroffset;
int options = PCRE_MULTILINE;
+   int study_options = 0;
 
if (opt->ignore_case) {
if (!opt->ignore_locale && has_non_ascii(p->pattern))
@@ -388,15 +389,18 @@ static void compile_pcre1_regexp(struct grep_pat *p, 
const struct grep_opt *opt)
if (!p->pcre1_regexp)
compile_regexp_failed(p, error);
 
-   p->pcre1_extra_info = pcre_study(p->pcre1_regexp, 
GIT_PCRE_STUDY_JIT_COMPILE, &error);
-   if (!p->pcre1_extra_info && error)
-   die("%s", error);
-
-#ifdef GIT_PCRE1_USE_JIT
+#if defined(PCRE_CONFIG_JIT) && !defined(NO_LIBPCRE1_JIT)
pcre_config(PCRE_CONFIG_JIT, &p->pcre1_jit_on);
if (opt->debug)
fprintf(stderr, "pcre1_jit_on=%d\n", p->pcre1_jit_on);
+
+   if (p->pcre1_jit_on)
+   study_options = PCRE_STUDY_JIT_COMPILE;
 #endif
+
+   p->pcre1_extra_info = pcre_study(p->pcre1_regexp, study_options, 
&error);
+   if (!p->pcre1_extra_info && error)
+   die("%s", error);
 }
 
 static int pcre1match(struct grep_pat *p, const char *line, const char *eol,
@@ -425,7 +429,7 @@ static int pcre1match(struct grep_pat *p, const char *line, 
const char *eol,
 static void free_pcre1_regexp(struct grep_pat *p)
 {
pcre_free(p->pcre1_regexp);
-#ifdef GIT_PCRE1_USE_JIT
+#ifdef PCRE_CONFIG_JIT
if (p->pcre1_jit_on)
pcre_free_study(p->pcre1_extra_info);
else
diff --git a/grep.h b/grep.h
index 1a044c501e..ff620d784a 100644
--- a/grep.h
+++ b/grep.h
@@ -3,15 +3,6 @@
 #include "color.h"
 #ifdef USE_LIBPCRE1
 #include 
-#ifndef NO_LIBPCRE1_JIT
-#ifdef PCRE_CONFIG_JIT
-#define GIT_PCRE1_USE_JIT
-#define GIT_PCRE_STUDY_JIT_COMPILE PCRE_STUDY_JIT_COMPILE
-#endif
-#endif
-#ifndef GIT_PCRE_STUDY_JIT_COMPILE
-#define GIT_PCRE_STUDY_JIT_COMPILE 0
-#endif
 #else
 typedef int pcre;
 typedef int pcre_extra;
-- 
2.23.0



  1   2   >