On Sun, May 21, 2017 at 1:50 AM, Junio C Hamano <[email protected]> wrote:
> Ævar Arnfjörð Bjarmason <[email protected]> writes:
>
>> Make the --regexp-ignore-case option work with --perl-regexp. This
>> never worked, and there was no test for this. Fix the bug and add a
>> test.
>>
>> When PCRE support was added in commit 63e7e9d8b6 ("git-grep: Learn
>> PCRE", 2011-05-09) compile_pcre_regexp() would only check
>> opt->ignore_case, but when the --perl-regexp option was added in
>> commit 727b6fc3ed ("log --grep: accept --basic-regexp and
>> --perl-regexp", 2012-10-03) the code didn't set the opt->ignore_case.
>>
>> Change the test suite to test for -i and --invert-regexp with
>> basic/extended/perl patterns in addition to fixed, which was the only
>> patternType that was tested for before in combination with those
>> options.
>>
>> Signed-off-by: Ævar Arnfjörð Bjarmason <[email protected]>
>> ---
>> revision.c | 1 +
>> t/t4202-log.sh | 60
>> +++++++++++++++++++++++++++++++++++++++++++++++++++++-----
>> 2 files changed, 56 insertions(+), 5 deletions(-)
>>
>> diff --git a/revision.c b/revision.c
>> index 8a8c1789c7..4883cdd2d0 100644
>> --- a/revision.c
>> +++ b/revision.c
>> @@ -1991,6 +1991,7 @@ static int handle_revision_opt(struct rev_info *revs,
>> int argc, const char **arg
>> } else if (!strcmp(arg, "--extended-regexp") || !strcmp(arg, "-E")) {
>> revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_ERE;
>> } else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i"))
>> {
>> + revs->grep_filter.ignore_case = 1;
>> revs->grep_filter.regflags |= REG_ICASE;
>> DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE);
>> } else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) {
>
> Looks good.
>
> I however wonder if it is a better approach in the longer term to
> treat the .ignore_case field just like .extended_regexp_option
> field, i.e. not committing immediately to .regflags but commit it
> after config and command line parsing is done, just like we make the
> "BRE? ERE?" decision in grep_commit_pattern_type().
I started hacking up a patch to fix the root cause of this, i.e. the
users of the grep API should only set `.ignore_case = 1` and not care
about setting regflags, but it was more than a trivial change, so I
didn't include it in this series:
diff --git a/builtin/grep.c b/builtin/grep.c
index 3ffb5b4e81..be28c37265 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -1151,8 +1151,6 @@ int cmd_grep(int argc, const char **argv, const
char *prefix)
if (!opt.pattern_list)
die(_("no pattern given."));
- if (!opt.fixed && opt.ignore_case)
- opt.regflags |= REG_ICASE;
compile_grep_patterns(&opt);
diff --git a/grep.c b/grep.c
index 47cee45067..7b13ee1043 100644
--- a/grep.c
+++ b/grep.c
@@ -435,12 +435,11 @@ static void compile_fixed_regexp(struct grep_pat
*p, struct grep_opt *opt)
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
- int icase, ascii_only;
+ int ascii_only;
int err;
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
- icase = opt->regflags & REG_ICASE || p->ignore_case;
ascii_only = !has_non_ascii(p->pattern);
/*
@@ -456,12 +455,12 @@ static void compile_regexp(struct grep_pat *p,
struct grep_opt *opt)
* want to use kws.
*/
if (opt->fixed || is_fixed(p->pattern, p->patternlen))
- p->fixed = !icase || ascii_only;
+ p->fixed = !p->ignore_case || ascii_only;
else
p->fixed = 0;
if (p->fixed) {
- p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL);
+ p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
kwsincr(p->kws, p->pattern, p->patternlen);
kwsprep(p->kws);
return;
@@ -480,6 +479,8 @@ static void compile_regexp(struct grep_pat *p,
struct grep_opt *opt)
return;
}
+ if (p->ignore_case)
+ opt->regflags |= REG_ICASE;
err = regcomp(&p->regexp, p->pattern, opt->regflags);
if (err) {
char errbuf[1024];
diff --git a/revision.c b/revision.c
index 4883cdd2d0..30c23a1098 100644
--- a/revision.c
+++ b/revision.c
@@ -1992,7 +1992,6 @@ static int handle_revision_opt(struct rev_info
*revs, int argc, const char **arg
revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_ERE;
} else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) {
revs->grep_filter.ignore_case = 1;
- revs->grep_filter.regflags |= REG_ICASE;
DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE);
} else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) {
revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_FIXED;
But an even better solution is to get rid of passing the regflags
field in grep_opt entirely, this conflicts with some of my later
patches:
diff --git a/builtin/grep.c b/builtin/grep.c
index 3ffb5b4e81..be28c37265 100644
--- a/builtin/grep.c
+++ b/builtin/grep.c
@@ -1151,8 +1151,6 @@ int cmd_grep(int argc, const char **argv, const
char *prefix)
if (!opt.pattern_list)
die(_("no pattern given."));
- if (!opt.fixed && opt.ignore_case)
- opt.regflags |= REG_ICASE;
compile_grep_patterns(&opt);
diff --git a/grep.c b/grep.c
index 47cee45067..1bde7037ba 100644
--- a/grep.c
+++ b/grep.c
@@ -34,7 +34,6 @@ void init_grep_defaults(void)
memset(opt, 0, sizeof(*opt));
opt->relative = 1;
opt->pathname = 1;
- opt->regflags = REG_NEWLINE;
opt->max_depth = -1;
opt->pattern_type_option = GREP_PATTERN_TYPE_UNSPECIFIED;
opt->extended_regexp_option = 0;
@@ -156,7 +155,6 @@ void grep_init(struct grep_opt *opt, const char *prefix)
opt->linenum = def->linenum;
opt->max_depth = def->max_depth;
opt->pathname = def->pathname;
- opt->regflags = def->regflags;
opt->relative = def->relative;
opt->output = def->output;
@@ -179,25 +177,25 @@ static void grep_set_pattern_type_option(enum
grep_pattern_type pattern_type, st
case GREP_PATTERN_TYPE_BRE:
opt->fixed = 0;
opt->pcre = 0;
- opt->regflags &= ~REG_EXTENDED;
+ opt->extended = 0;
break;
case GREP_PATTERN_TYPE_ERE:
opt->fixed = 0;
opt->pcre = 0;
- opt->regflags |= REG_EXTENDED;
+ opt->extended = 1;
break;
case GREP_PATTERN_TYPE_FIXED:
opt->fixed = 1;
opt->pcre = 0;
- opt->regflags &= ~REG_EXTENDED;
+ opt->extended = 0;
break;
case GREP_PATTERN_TYPE_PCRE:
opt->fixed = 0;
opt->pcre = 1;
- opt->regflags &= ~REG_EXTENDED;
+ opt->extended = 0;
break;
}
}
@@ -415,10 +413,9 @@ static void compile_fixed_regexp(struct grep_pat
*p, struct grep_opt *opt)
{
struct strbuf sb = STRBUF_INIT;
int err;
- int regflags;
+ int regflags = REG_NEWLINE;
basic_regex_quote_buf(&sb, p->pattern);
- regflags = opt->regflags & ~REG_EXTENDED;
if (opt->ignore_case)
regflags |= REG_ICASE;
err = regcomp(&p->regexp, sb.buf, regflags);
@@ -435,12 +432,12 @@ static void compile_fixed_regexp(struct grep_pat
*p, struct grep_opt *opt)
static void compile_regexp(struct grep_pat *p, struct grep_opt *opt)
{
- int icase, ascii_only;
+ int ascii_only;
int err;
+ int regflags = REG_NEWLINE;
p->word_regexp = opt->word_regexp;
p->ignore_case = opt->ignore_case;
- icase = opt->regflags & REG_ICASE || p->ignore_case;
ascii_only = !has_non_ascii(p->pattern);
/*
@@ -456,12 +453,12 @@ static void compile_regexp(struct grep_pat *p,
struct grep_opt *opt)
* want to use kws.
*/
if (opt->fixed || is_fixed(p->pattern, p->patternlen))
- p->fixed = !icase || ascii_only;
+ p->fixed = !p->ignore_case || ascii_only;
else
p->fixed = 0;
if (p->fixed) {
- p->kws = kwsalloc(icase ? tolower_trans_tbl : NULL);
+ p->kws = kwsalloc(p->ignore_case ? tolower_trans_tbl : NULL);
kwsincr(p->kws, p->pattern, p->patternlen);
kwsprep(p->kws);
return;
@@ -480,7 +477,11 @@ static void compile_regexp(struct grep_pat *p,
struct grep_opt *opt)
return;
}
- err = regcomp(&p->regexp, p->pattern, opt->regflags);
+ if (p->ignore_case)
+ regflags |= REG_ICASE;
+ if (opt->extended)
+ regflags |= REG_EXTENDED;
+ err = regcomp(&p->regexp, p->pattern, regflags);
if (err) {
char errbuf[1024];
regerror(err, &p->regexp, errbuf, 1024);
diff --git a/grep.h b/grep.h
index 267534ca24..d9d603deb1 100644
--- a/grep.h
+++ b/grep.h
@@ -129,7 +129,6 @@ struct grep_opt {
char color_match_selected[COLOR_MAXLEN];
char color_selected[COLOR_MAXLEN];
char color_sep[COLOR_MAXLEN];
- int regflags;
unsigned pre_context;
unsigned post_context;
unsigned last_shown;
diff --git a/revision.c b/revision.c
index 4883cdd2d0..67240d38af 100644
--- a/revision.c
+++ b/revision.c
@@ -1362,7 +1362,6 @@ void init_revisions(struct rev_info *revs, const
char *prefix)
init_grep_defaults();
grep_init(&revs->grep_filter, prefix);
revs->grep_filter.status_only = 1;
- revs->grep_filter.regflags = REG_NEWLINE;
diff_setup(&revs->diffopt);
if (prefix && !revs->diffopt.prefix) {
@@ -1992,7 +1991,6 @@ static int handle_revision_opt(struct rev_info
*revs, int argc, const char **arg
revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_ERE;
} else if (!strcmp(arg, "--regexp-ignore-case") || !strcmp(arg, "-i")) {
revs->grep_filter.ignore_case = 1;
- revs->grep_filter.regflags |= REG_ICASE;
DIFF_OPT_SET(&revs->diffopt, PICKAXE_IGNORE_CASE);
} else if (!strcmp(arg, "--fixed-strings") || !strcmp(arg, "-F")) {
revs->grep_filter.pattern_type_option = GREP_PATTERN_TYPE_FIXED;
But as all this code cleanup isn't needed for fixing this bug, and I'd
really like to get this series merged into next/master ASAP so I can
start submitting the grep/pcre patches that are actually interesting,
let's leave this orthogonal code cleanup for now.