Re: smtpd: improve message parser

2018-08-06 Thread Gilles Chehade
On Thu, Jul 26, 2018 at 09:08:02PM +0200, Eric Faurot wrote:
> Hi,
> 
> The message parser was introduced for different reasons, when it
> turned out that altering the incoming message was actually needed.
> It is used to add required "Date" and "Message-ID" headers if missing,
> to append the local domain to incomplete addresses found in "To",
> "From" and "Cc" headers, and to provide basic sender masquerading.
> 
> The current implementation was a best-effort, but it has some
> short-comings, especially when it comes to the handling of long lines.
> So this diff provides a replacement for the whole parser, with the
> following intended improvements:
> 
> - Use a more straightforward interface rather than the callback approach.
> - Avoid unnecessary string copy.
> - Stop using fixed-size string buffers, especially on the stack.
> 
> This is a step towards better handling of message line length in the daemon.
> 
> Please test and report success/issues.
> 

it would be nice to get feedback that no one experiences regressions,
particularly because this makes it possible to solve some issues that
we have currently only worked around (long lines to name one).

eric@ has been sitting on this for months now and if we don't commit,
we'll still have this diff off-tree in 2019.

i'll commit in a week unless someone objects or eric commits first.


> Index: rfc5322.c
> ===
> RCS file: rfc5322.c
> diff -N rfc5322.c
> --- /dev/null 1 Jan 1970 00:00:00 -
> +++ rfc5322.c 26 Jul 2018 14:40:56 -
> @@ -0,0 +1,264 @@
> +/*   $OpenBSD: rfc5322.c,v 1.7 2016/02/04 22:35:17 eric Exp $*/
> +
> +/*
> + * Copyright (c) 2018 Eric Faurot 
> + *
> + * Permission to use, copy, modify, and distribute this software for any
> + * purpose with or without fee is hereby granted, provided that the above
> + * copyright notice and this permission notice appear in all copies.
> + *
> + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
> + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
> + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
> + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
> + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
> + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
> + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
> + */
> +
> +#include 
> +#include 
> +#include 
> +#include 
> +#include 
> +
> +#include "rfc5322.h"
> +
> +struct buf {
> + char*buf;
> + size_t   bufsz;
> + size_t   buflen;
> + size_t   bufmax;
> +};
> +
> +static int buf_alloc(struct buf *, size_t);
> +static int buf_grow(struct buf *, size_t);
> +static int buf_cat(struct buf *, const char *);
> +
> +struct rfc5322_parser {
> + const char  *line;
> + int  state; /* last parser state */
> + int  next;  /* parser needs data */
> + int  unfold;
> + const char  *currhdr;
> + struct buf   hdr;
> + struct buf   val;
> +};
> +
> +struct rfc5322_parser *
> +rfc5322_parser_new(void)
> +{
> + struct rfc5322_parser *parser;
> +
> + parser = calloc(1, sizeof(*parser));
> + if (parser == NULL)
> + return NULL;
> +
> + rfc5322_clear(parser);
> + parser->hdr.bufmax = 1024;
> + parser->val.bufmax = 65536;
> +
> + return parser;
> +}
> +
> +void
> +rfc5322_free(struct rfc5322_parser *parser)
> +{
> + free(parser->hdr.buf);
> + free(parser->val.buf);
> + free(parser);
> +}
> +
> +void
> +rfc5322_clear(struct rfc5322_parser *parser)
> +{
> + parser->line = NULL;
> + parser->state = RFC5322_NONE;
> + parser->next = 0;
> + parser->hdr.buflen = 0;
> + parser->val.buflen = 0;
> +}
> +
> +int
> +rfc5322_push(struct rfc5322_parser *parser, const char *line)
> +{
> + if (parser->line) {
> + errno = EALREADY;
> + return -1;
> + }
> +
> + parser->line = line;
> + parser->next = 0;
> +
> + return 0;
> +}
> +
> +int
> +rfc5322_unfold_header(struct rfc5322_parser *parser)
> +{
> + if (parser->unfold) {
> + errno = EALREADY;
> + return -1;
> + }
> +
> + if (parser->currhdr == NULL) {
> + errno = EOPNOTSUPP;
> + return -1;
> + }
> +
> + if (buf_cat(>val, parser->currhdr) == -1)
> + return -1;
> +
> + parser->currhdr = NULL;
> + parser->unfold = 1;
> +
> + return 0;
> +}
> +
> +static int
> +_rfc5322_next(struct rfc5322_parser *parser, struct rfc5322_result *res)
> +{
> + size_t len;
> + const char *pos, *line;
> +
> + line = parser->line;
> +
> + switch(parser->state) {
> +
> + case RFC5322_HEADER_START:
> + case RFC5322_HEADER_CONT:
> + res->hdr = parser->hdr.buf;
> +
> + if 

smtpd: improve message parser

2018-07-26 Thread Eric Faurot
Hi,

The message parser was introduced for different reasons, when it
turned out that altering the incoming message was actually needed.
It is used to add required "Date" and "Message-ID" headers if missing,
to append the local domain to incomplete addresses found in "To",
"From" and "Cc" headers, and to provide basic sender masquerading.

The current implementation was a best-effort, but it has some
short-comings, especially when it comes to the handling of long lines.
So this diff provides a replacement for the whole parser, with the
following intended improvements:

- Use a more straightforward interface rather than the callback approach.
- Avoid unnecessary string copy.
- Stop using fixed-size string buffers, especially on the stack.

This is a step towards better handling of message line length in the daemon.

Please test and report success/issues.

Eric.


Index: rfc5322.c
===
RCS file: rfc5322.c
diff -N rfc5322.c
--- /dev/null   1 Jan 1970 00:00:00 -
+++ rfc5322.c   26 Jul 2018 14:40:56 -
@@ -0,0 +1,264 @@
+/* $OpenBSD: rfc5322.c,v 1.7 2016/02/04 22:35:17 eric Exp $*/
+
+/*
+ * Copyright (c) 2018 Eric Faurot 
+ *
+ * Permission to use, copy, modify, and distribute this software for any
+ * purpose with or without fee is hereby granted, provided that the above
+ * copyright notice and this permission notice appear in all copies.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+ * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+ * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+ * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+ * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+ * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+ * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+ */
+
+#include 
+#include 
+#include 
+#include 
+#include 
+
+#include "rfc5322.h"
+
+struct buf {
+   char*buf;
+   size_t   bufsz;
+   size_t   buflen;
+   size_t   bufmax;
+};
+
+static int buf_alloc(struct buf *, size_t);
+static int buf_grow(struct buf *, size_t);
+static int buf_cat(struct buf *, const char *);
+
+struct rfc5322_parser {
+   const char  *line;
+   int  state; /* last parser state */
+   int  next;  /* parser needs data */
+   int  unfold;
+   const char  *currhdr;
+   struct buf   hdr;
+   struct buf   val;
+};
+
+struct rfc5322_parser *
+rfc5322_parser_new(void)
+{
+   struct rfc5322_parser *parser;
+
+   parser = calloc(1, sizeof(*parser));
+   if (parser == NULL)
+   return NULL;
+
+   rfc5322_clear(parser);
+   parser->hdr.bufmax = 1024;
+   parser->val.bufmax = 65536;
+
+   return parser;
+}
+
+void
+rfc5322_free(struct rfc5322_parser *parser)
+{
+   free(parser->hdr.buf);
+   free(parser->val.buf);
+   free(parser);
+}
+
+void
+rfc5322_clear(struct rfc5322_parser *parser)
+{
+   parser->line = NULL;
+   parser->state = RFC5322_NONE;
+   parser->next = 0;
+   parser->hdr.buflen = 0;
+   parser->val.buflen = 0;
+}
+
+int
+rfc5322_push(struct rfc5322_parser *parser, const char *line)
+{
+   if (parser->line) {
+   errno = EALREADY;
+   return -1;
+   }
+
+   parser->line = line;
+   parser->next = 0;
+
+   return 0;
+}
+
+int
+rfc5322_unfold_header(struct rfc5322_parser *parser)
+{
+   if (parser->unfold) {
+   errno = EALREADY;
+   return -1;
+   }
+
+   if (parser->currhdr == NULL) {
+   errno = EOPNOTSUPP;
+   return -1;
+   }
+
+   if (buf_cat(>val, parser->currhdr) == -1)
+   return -1;
+
+   parser->currhdr = NULL;
+   parser->unfold = 1;
+
+   return 0;
+}
+
+static int
+_rfc5322_next(struct rfc5322_parser *parser, struct rfc5322_result *res)
+{
+   size_t len;
+   const char *pos, *line;
+
+   line = parser->line;
+
+   switch(parser->state) {
+
+   case RFC5322_HEADER_START:
+   case RFC5322_HEADER_CONT:
+   res->hdr = parser->hdr.buf;
+
+   if (line && (line[0] == ' ' || line[0] == '\t')) {
+   parser->line = NULL;
+   parser->next = 1;
+   if (parser->unfold) {
+   if (buf_cat(>val, "\n") == -1 ||
+   buf_cat(>val, line) == -1)
+   return -1;
+   }
+   res->value = line;
+   return RFC5322_HEADER_CONT;
+   }
+
+   if (parser->unfold) {
+   parser->val.buflen = 0;
+   parser->unfold = 0;
+