Hi Kevin,

On 2026-05-21T09:47:13+0800, Kevin J. McCarthy wrote:
> REG_NEWLINE causes ^ to match both BOS and after a newline, and $ to
> match both EOS and just before a newline.
> 
> This leads to weird behavior with a pager search such as /^a*$/.  This
> will end up matching every line:
>   - ^ matches after the newline
>   - a* matches nothing
>   - $ matches EOS
> 
> Removing the newlines would be more painful, because the parser
> requires them in several places, for things like header to body
> switching, matching signatures, and breaking out of some loops.
> 
> Instead do the same thing that match_body_patterns() does: temporarily
> remove the newlines during the regex search.
> 
> Then, remove the REG_NEWLINE flag from all the REGCOMP calls.
> ---
> 
> I discovered this issue trying to help someone on IRC make a macro to
> skip to empty (or whitespace only) lines.
> 
> /^[[:space:]]*$ was not working: it matches every line in the email:
> ^ matches after the newline, [[:space:]]* matches nothing, $ matches
> EOS.
> 
> But /^[[:space:]]+$ worked:
> ^ matches the BOS,  [[:space:]]+ matches the \n, $ matches the EOS.
> 
> Turns out the problem is the REG_NEWLINE, and also that mutt leaves the
> \n in the lines.
> 
> I think the intention of REG_NEWLINE was to allow searching for /foo$
> to match a line ending with "foo".  However it behavior confusingly.
> 
> This patch removes both the REG_NEWLINE and the \n from the lines when
> searching.  This allows searches to make sense.
> 
>  pager.c | 69 ++++++++++++++++++++++++++++++++++++---------------------
>  1 file changed, 44 insertions(+), 25 deletions(-)
> 
> diff --git a/pager.c b/pager.c
> index 92fa152a..75221643 100644
> --- a/pager.c
> +++ b/pager.c
> @@ -1268,6 +1268,47 @@ bail:
>    return rc;
>  }
>  
> +static void resolve_search(struct line_t *lineInfo, int n, char *fmt,
> +                           regex_t *SearchRE)
> +{
> +  size_t fmtlen;
> +  int has_nl = 0, offset = 0;
> +  regmatch_t pmatch[1];
> +
> +  /* don't consider line endings part of the buffer
> +   * for regex matching */
> +  if ((fmtlen = mutt_strlen(fmt)) > 0 && fmt[fmtlen - 1] == '\n')

I would very much rewrite the above in two lines.  I don't see a good
reason to embed the assignment in the conditional (and that often leads
to bugs due to accidents).  I suggest:

        fmtlen = mutt_strlen(fmt);
        if (fmtlen > 0 && fmt[fmtlen - 1] == '\n')

Having a hypothetical strsuffix() function[1], the entire conditional
could be further rewritten as:

        char  *nl;

        if (fmt != NULL && NULL != (nl = strsuffix(fmt, "\n")))
          *nl = '\0';

And at the end of the function:

        if (nl)
          *nl = '\n';

[1]: <https://www.open-std.org/jtc1/sc22/wg14/www/docs/n3613.txt>


> +  {
> +    has_nl = 1;
> +    fmt[fmtlen - 1] = 0;
> +  }
> +
> +  offset = 0;
> +  lineInfo[n].search_cnt = 0;
> +  while (regexec(SearchRE, (char *) fmt + offset, 1, pmatch, (offset ? 
> REG_NOTBOL : 0)) == 0)

What's the point of the cast?  fmt is already char*, right?

> +  {
> +    if (++(lineInfo[n].search_cnt) > 1)
> +      safe_realloc(&(lineInfo[n].search),
> +                   (lineInfo[n].search_cnt) * sizeof(struct syntax_t));

We should probably have a safe_reallocarray() that wraps
reallocarray(3).  That would do the multiplication internally, avoiding
any possible unsigned integer wrap-arounds.

> +    else
> +      lineInfo[n].search = safe_malloc(sizeof(struct syntax_t));

I not familiar with mutt(1)'s safe_realloc() and safe_malloc(), but
realloc(3) allows one to do the initial allocation too.

        p = realloc(NULL, n);  // equivalent to malloc(n);

From what I can see in mutt(1)'s safe_realloc(), it seems to also handle
that case (in fact, it even has a workaround for SunOS 4.1.x).

Do we really need this conditional?

> +    pmatch[0].rm_so += offset;
> +    pmatch[0].rm_eo += offset;
> +    (lineInfo[n].search)[lineInfo[n].search_cnt - 1].first = pmatch[0].rm_so;
> +    (lineInfo[n].search)[lineInfo[n].search_cnt - 1].last = pmatch[0].rm_eo;
> +
> +    if (pmatch[0].rm_eo == pmatch[0].rm_so)
> +      offset++; /* avoid degenerate cases */
> +    else
> +      offset = pmatch[0].rm_eo;
> +    if (!fmt[offset])
> +      break;
> +  }
> +
> +  if (has_nl)
> +    fmt[fmtlen - 1] = '\n';

See above.

> +}
> +
>  static int format_line(struct line_t **lineInfo, int n, unsigned char *buf,
>                         int flags, ansi_attr *pa, int cnt,
>                         int *pspace, int *pvch, int *pcol, int *pspecial,
> @@ -1483,12 +1524,10 @@ display_line(FILE *f, LOFF_T *last_pos, struct line_t 
> **lineInfo, int n,
>    int ch, vch, col, cnt, b_read;
>    int buf_ready = 0, change_last = 0;
>    int special;
> -  int offset;
>    COLOR_ATTR def_color;
>    int m;
>    int rc = -1;
>    ansi_attr a = {0,-1,-1,-1};
> -  regmatch_t pmatch[1];
>  
>    if (n == *last)
>    {
> @@ -1567,27 +1606,7 @@ display_line(FILE *f, LOFF_T *last_pos, struct line_t 
> **lineInfo, int n,
>        goto out;
>      }
>  
> -    offset = 0;
> -    (*lineInfo)[n].search_cnt = 0;
> -    while (regexec(SearchRE, (char *) fmt + offset, 1, pmatch, (offset ? 
> REG_NOTBOL : 0)) == 0)

Ohhh, I see.  You've just moved the cast from here.  I wonder why it was
there.

> -    {
> -      if (++((*lineInfo)[n].search_cnt) > 1)
> -        safe_realloc(&((*lineInfo)[n].search),
> -                     ((*lineInfo)[n].search_cnt) * sizeof(struct syntax_t));
> -      else
> -        (*lineInfo)[n].search = safe_malloc(sizeof(struct syntax_t));

Same with this conditional.

I guess it's better to just move it here, and then consider improving it
in a separate commit.


Have a lovely day!
Alex

> -      pmatch[0].rm_so += offset;
> -      pmatch[0].rm_eo += offset;
> -      ((*lineInfo)[n].search)[(*lineInfo)[n].search_cnt - 1].first = 
> pmatch[0].rm_so;
> -      ((*lineInfo)[n].search)[(*lineInfo)[n].search_cnt - 1].last = 
> pmatch[0].rm_eo;
> -
> -      if (pmatch[0].rm_eo == pmatch[0].rm_so)
> -        offset++; /* avoid degenerate cases */
> -      else
> -        offset = pmatch[0].rm_eo;
> -      if (!fmt[offset])
> -        break;
> -    }
> +    resolve_search(*lineInfo, n, (char *) fmt, SearchRE);
>    }
>  
>    if (!(flags & MUTT_SHOW) && (*lineInfo)[n+1].offset > 0)
> @@ -1859,7 +1878,7 @@ static void pager_menu_redraw(MUTTMENU *pager_menu)
>        if ((rd->SearchCompiled = Resize->SearchCompiled))
>        {
>          if ((err = REGCOMP(&rd->SearchRE, rd->searchbuf,
> -                           REG_NEWLINE | mutt_which_case(rd->searchbuf))) != 
> 0)
> +                           mutt_which_case(rd->searchbuf))) != 0)
>          {
>            regerror(err, &rd->SearchRE, buffer, sizeof(buffer));
>            mutt_error("%s", buffer);
> @@ -2454,7 +2473,7 @@ search_next:
>            }
>          }
>  
> -        if ((err = REGCOMP(&rd.SearchRE, searchbuf, REG_NEWLINE | 
> mutt_which_case(searchbuf))) != 0)
> +        if ((err = REGCOMP(&rd.SearchRE, searchbuf, 
> mutt_which_case(searchbuf))) != 0)
>          {
>            regerror(err, &rd.SearchRE, buffer, sizeof(buffer));
>            mutt_error("%s", buffer);
> -- 
> 2.54.0
> 

-- 
<https://www.alejandro-colomar.es>

Attachment: signature.asc
Description: PGP signature

Reply via email to