Hi Kevin,

On Sun, Jan 25, 2026 at 06:09:39PM +0800, Kevin J. McCarthy wrote:
> From: Kevin McCarthy <[email protected]>
> 
> See gitlab issue 433.
> 
> This changes the From line parser to work in reverse.  When it gets to
> the return path, it just slurps it all in, allowing the address parser
> to make sense of it separately.
> ---
>  from.c | 231 +++++++++++++++++++++++++++++++++++++++++++++------------
>  init.h |   6 ++
>  mutt.h |   1 +
>  3 files changed, 190 insertions(+), 48 deletions(-)
> 
> diff --git a/from.c b/from.c
> index 47f5c76d..45ba7d67 100644
> --- a/from.c
> +++ b/from.c
> @@ -34,14 +34,15 @@ static const char *next_word (const char *s)
>    return s;
>  }
>  
> -int mutt_check_month (const char *s)
> +static const char *prev_word (const char * const bos, const char *cur)
>  {
> -  int i;
> -
> -  for (i = 0; i < 12; i++)
> -    if (mutt_strncasecmp (s, Months[i], 3) == 0)
> -      return (i);
> -  return (-1); /* error */
> +  if (cur <= bos)
> +    return bos;
> +  while (cur > bos && *(cur-1) && ISSPACE (*(cur-1)))
> +    cur--;
> +  while (cur > bos && *(cur-1) && !ISSPACE (*(cur-1)))
> +    cur--;
> +  return cur;
>  }
>  
>  static int is_day_name (const char *s)
> @@ -56,29 +57,43 @@ static int is_day_name (const char *s)
>    return 0;
>  }
>  
> -/*
> - * A valid message separator looks like:
> - *
> - * From [ <return-path> ] <weekday> <month> <day> <time> [ <timezone> ] 
> <year>
> - */
> -
> -int is_from (const char *s, char *path, size_t pathlen, time_t *tp)
> +int mutt_check_month (const char *s)
> +{
> +  int i;
> +
> +  for (i = 0; i < 12; i++)
> +    if (mutt_strncasecmp (s, Months[i], 3) == 0)
> +      return (i);
> +  return (-1); /* error */
> +}
> +
> +static int check_time (const char *s, struct tm *tm)
> +{
> +  int rv = 0;
> +  /* Accept either HH:MM or HH:MM:SS */
> +  if (sscanf (s, "%d:%d:%d", &tm->tm_hour, &tm->tm_min, &tm->tm_sec) == 3)
> +    ;
> +  else if (sscanf (s, "%d:%d", &tm->tm_hour, &tm->tm_min) == 2)
> +    tm->tm_sec = 0;
> +  else
> +    rv = -1;
> +
> +  return rv;
> +}
> +
> +static int check_year (const char *s)
>  {

It's a bit difficult to review this patch, as the diff merges refactors
with behavior changes.  It might be better to first have a patch that
splits code into helper functions and other similar refactors, and then
have the patch that changes behavior be minimal.


Have a lovely day!
Alex

> -  struct tm tm;
>    int yr;
>  
> -  if (path)
> -    *path = 0;
> -
> -  if (mutt_strncmp ("From ", s, 5) != 0)
> -    return 0;
> -
> -  s = next_word (s); /* skip over the From part. */
> -  if (!*s)
> -    return 0;
> -
> -  dprint (3, (debugfile, "\nis_from(): parsing: %s", s));
> +  if (sscanf (s, "%d", &yr) != 1 || yr < 0)
> +    return -1;
> +  return yr > 1900 ? yr - 1900 : (yr < 70 ? yr + 100 : yr);
> +}
>  
> +static int is_from_forward_scan (const char *s,
> +                                 char *path, size_t pathlen,
> +                                 struct tm *tm)
> +{
>    if (!is_day_name (s))
>    {
>      const char *p;
> @@ -106,7 +121,7 @@ int is_from (const char *s, char *path, size_t pathlen, 
> time_t *tp)
>        p = strchr(p + 4, ' ');
>        if (!p)
>        {
> -     dprint (1, (debugfile, "is_from(): error parsing what appears to be a 
> pipermail-style obscured return_path: %s\n", s));
> +     dprint (1, (debugfile, "is_from_forward_scan(): error parsing what 
> appears to be a pipermail-style obscured return_path: %s\n", s));
>       return 0;
>        }
>      }
> @@ -118,7 +133,7 @@ int is_from (const char *s, char *path, size_t pathlen, 
> time_t *tp)
>       len = pathlen - 1;
>        memcpy (path, s, len);
>        path[len] = 0;
> -      dprint (3, (debugfile, "is_from(): got return path: %s\n", path));
> +      dprint (3, (debugfile, "is_from_forward_scan(): got return path: 
> %s\n", path));
>      }
>  
>      s = p + 1;
> @@ -128,7 +143,7 @@ int is_from (const char *s, char *path, size_t pathlen, 
> time_t *tp)
>  
>      if (!is_day_name (s))
>      {
> -      dprint(1, (debugfile, "is_from():  expected weekday, got: %s\n", s));
> +      dprint(1, (debugfile, "is_from_forward_scan():  expected weekday, got: 
> %s\n", s));
>        return 0;
>      }
>    }
> @@ -147,28 +162,27 @@ int is_from (const char *s, char *path, size_t pathlen, 
> time_t *tp)
>    }
>  
>    /* now we should be on the month. */
> -  if ((tm.tm_mon = mutt_check_month (s)) < 0) return 0;
> +  if ((tm->tm_mon = mutt_check_month (s)) < 0)
> +    return 0;
>  
>    /* day */
>    s = next_word (s);
> -  if (!*s) return 0;
> -  if (sscanf (s, "%d", &tm.tm_mday) != 1) return 0;
> +  if (!*s)
> +    return 0;
> +  if (sscanf (s, "%d", &tm->tm_mday) != 1)
> +    return 0;
>  
>    /* time */
>    s = next_word (s);
> -  if (!*s) return 0;
> -
> -  /* Accept either HH:MM or HH:MM:SS */
> -  if (sscanf (s, "%d:%d:%d", &tm.tm_hour, &tm.tm_min, &tm.tm_sec) == 3);
> -  else if (sscanf (s, "%d:%d", &tm.tm_hour, &tm.tm_min) == 2)
> -    tm.tm_sec = 0;
> -  else
> +  if (!*s)
> +    return 0;
> +  if (check_time (s, tm) < 0)
>      return 0;
>  
> -  s = next_word (s);
> -  if (!*s) return 0;
> -
>    /* timezone? */
> +  s = next_word (s);
> +  if (!*s)
> +    return 0;
>    if (isalpha ((unsigned char) *s) || *s == '+' || *s == '-')
>    {
>      s = next_word (s);
> @@ -186,15 +200,136 @@ int is_from (const char *s, char *path, size_t 
> pathlen, time_t *tp)
>    }
>  
>    /* year */
> -  if (sscanf (s, "%d", &yr) != 1 || yr < 0) return 0;
> -  tm.tm_year = yr > 1900 ? yr - 1900 : (yr < 70 ? yr + 100 : yr);
> +  if ((tm->tm_year = check_year (s)) < 0)
> +    return 0;
>  
> -  dprint (3,(debugfile, "is_from(): month=%d, day=%d, hr=%d, min=%d, sec=%d, 
> yr=%d.\n",
> -          tm.tm_mon, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, 
> tm.tm_year));
> +  return 1;
> +}
>  
> -  tm.tm_isdst = -1;
> +static int is_from_reverse_scan (const char * const bos,
> +                                 char *path, size_t pathlen,
> +                                 struct tm *tm)
> +{
> +  const char *cur = bos;
> +
> +  cur = strchr (bos, '\0');
> +
> +  /* year */
> +  cur = prev_word (bos, cur);
> +  if (cur == bos)
> +    return 0;
> +  if ((tm->tm_year = check_year (cur)) < 0)
> +    return 0;
> +
> +  /* timezone? */
> +  cur = prev_word (bos, cur);
> +  if (cur == bos)
> +    return 0;
> +  if (isalpha ((unsigned char) *cur) || *cur == '+' || *cur == '-')
> +  {
> +    cur = prev_word (bos, cur);
> +    if (cur == bos)
> +      return 0;
> +
> +    /*
> +     * some places have two timezone fields after the time, e.g.
> +     *      From [email protected] Wed Aug  2 00:39:12 MET DST 1995
> +     */
> +    if (isalpha ((unsigned char) *cur))
> +    {
> +      cur = prev_word (bos, cur);
> +      if (cur == bos)
> +        return 0;
> +    }
> +  }
> +
> +  /* time */
> +  if (check_time (cur, tm) < 0)
> +    return 0;
> +
> +  /* day */
> +  cur = prev_word (bos, cur);
> +  if (cur == bos)
> +    return 0;
> +  if (sscanf (cur, "%d", &tm->tm_mday) != 1)
> +    return 0;
> +
> +  /* month */
> +  cur = prev_word (bos, cur);
> +  if (cur == bos)
> +    return 0;
> +  if ((tm->tm_mon = mutt_check_month (cur)) < 0)
> +    return 0;
> +
> +  /* day name */
> +  cur = prev_word (bos, cur);
> +  if (!is_day_name (cur))
> +  {
> +    dprint(1, (debugfile,
> +               "is_from_reverse_scan():  expected weekday, got: %s\n", cur));
> +    return 0;
> +  }
> +
> +  /* return path? */
> +  while (cur > bos && *(cur-1) && ISSPACE (*(cur-1)))
> +    cur--;
> +  if (cur != bos && path)
> +  {
> +    size_t len;
> +
> +    len = (size_t) (cur - bos);
> +    if (len + 1 > pathlen)
> +      len = pathlen - 1;
> +    memcpy (path, bos, len);
> +    path[len] = 0;
> +    dprint (3, (debugfile, "is_from_reverse_scan(): got return path: %s\n", 
> path));
> +  }
> +
> +  return 1;
> +}
> +
> +
> +/*
> + * A valid message separator looks like:
> + *
> + * From [ <return-path> ] <weekday> <month> <day> <time> [ <timezone> ] 
> <year>
> + */
> +
> +int is_from (const char *s, char *path, size_t pathlen, time_t *tp)
> +{
> +  struct tm tm;
> +  int rv;
> +
> +  if (path)
> +    *path = 0;
> +  if (tp)
> +    *tp = 0;
> +
> +  if (mutt_strncmp ("From ", s, 5) != 0)
> +    return 0;
> +
> +  s = next_word (s); /* skip over the From part. */
> +  if (!*s)
> +    return 0;
> +
> +  dprint (3, (debugfile, "\nis_from(): parsing: %s", s));
> +
> +  rv = option (OPTMBOXLAXPARSE) ?
> +    is_from_reverse_scan (s, path, pathlen, &tm) :
> +    is_from_forward_scan (s, path, pathlen, &tm);
> +  if (!rv)
> +    return 0;
> +
> +  dprint (3, (debugfile,
> +              "is_from(): month=%d, day=%d, hr=%d, min=%d, sec=%d, yr=%d.\n",
> +              tm.tm_mon, tm.tm_mday, tm.tm_hour, tm.tm_min, tm.tm_sec, 
> tm.tm_year));
> +
> +  if (tp)
> +  {
> +    tm.tm_isdst = -1;
> +    *tp = mutt_mktime (&tm, 0);
> +  }
>  
> -  if (tp) *tp = mutt_mktime (&tm, 0);
>    return 1;
>  }
>  
> diff --git a/init.h b/init.h
> index 63684916..237afc7b 100644
> --- a/init.h
> +++ b/init.h
> @@ -2139,6 +2139,12 @@ struct option_t MuttVars[] = {
>    ** .pp
>    ** Also see the $$move variable.
>    */
> +  { "mbox_lax_parse",        DT_BOOL, R_NONE, {.l=OPTMBOXLAXPARSE}, {.l=1} },
> +  /*
> +  ** .pp
> +  ** This version of the mbox parser allows more loose return path values in 
> the
> +  ** From_ line.
> +  */
>    { "mbox_type",     DT_MAGIC,R_NONE, {.p=&DefaultMagic}, {.l=MUTT_MBOX} },
>    /*
>    ** .pp
> diff --git a/mutt.h b/mutt.h
> index 97c02653..be3dc603 100644
> --- a/mutt.h
> +++ b/mutt.h
> @@ -502,6 +502,7 @@ enum
>    OPTMAILDIRCHECKCUR,
>    OPTMARKERS,
>    OPTMARKOLD,
> +  OPTMBOXLAXPARSE,
>    OPTMENUSCROLL,     /* scroll menu instead of implicit next-page */
>    OPTMENUMOVEOFF,    /* allow menu to scroll past last entry */
>  #if defined(USE_IMAP) || defined(USE_POP)
> -- 
> 2.52.0
> 

-- 
<https://www.alejandro-colomar.es>

Attachment: signature.asc
Description: PGP signature

Reply via email to