Hi, The message parser was introduced for different reasons, when it turned out that altering the incoming message was actually needed. It is used to add required "Date" and "Message-ID" headers if missing, to append the local domain to incomplete addresses found in "To", "From" and "Cc" headers, and to provide basic sender masquerading.
The current implementation was a best-effort, but it has some short-comings, especially when it comes to the handling of long lines. So this diff provides a replacement for the whole parser, with the following intended improvements: - Use a more straightforward interface rather than the callback approach. - Avoid unnecessary string copy. - Stop using fixed-size string buffers, especially on the stack. This is a step towards better handling of message line length in the daemon. Please test and report success/issues. Eric. Index: rfc5322.c =================================================================== RCS file: rfc5322.c diff -N rfc5322.c --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ rfc5322.c 26 Jul 2018 14:40:56 -0000 @@ -0,0 +1,264 @@ +/* $OpenBSD: rfc5322.c,v 1.7 2016/02/04 22:35:17 eric Exp $ */ + +/* + * Copyright (c) 2018 Eric Faurot <[email protected]> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +#include <ctype.h> +#include <errno.h> +#include <limits.h> +#include <stdlib.h> +#include <string.h> + +#include "rfc5322.h" + +struct buf { + char *buf; + size_t bufsz; + size_t buflen; + size_t bufmax; +}; + +static int buf_alloc(struct buf *, size_t); +static int buf_grow(struct buf *, size_t); +static int buf_cat(struct buf *, const char *); + +struct rfc5322_parser { + const char *line; + int state; /* last parser state */ + int next; /* parser needs data */ + int unfold; + const char *currhdr; + struct buf hdr; + struct buf val; +}; + +struct rfc5322_parser * +rfc5322_parser_new(void) +{ + struct rfc5322_parser *parser; + + parser = calloc(1, sizeof(*parser)); + if (parser == NULL) + return NULL; + + rfc5322_clear(parser); + parser->hdr.bufmax = 1024; + parser->val.bufmax = 65536; + + return parser; +} + +void +rfc5322_free(struct rfc5322_parser *parser) +{ + free(parser->hdr.buf); + free(parser->val.buf); + free(parser); +} + +void +rfc5322_clear(struct rfc5322_parser *parser) +{ + parser->line = NULL; + parser->state = RFC5322_NONE; + parser->next = 0; + parser->hdr.buflen = 0; + parser->val.buflen = 0; +} + +int +rfc5322_push(struct rfc5322_parser *parser, const char *line) +{ + if (parser->line) { + errno = EALREADY; + return -1; + } + + parser->line = line; + parser->next = 0; + + return 0; +} + +int +rfc5322_unfold_header(struct rfc5322_parser *parser) +{ + if (parser->unfold) { + errno = EALREADY; + return -1; + } + + if (parser->currhdr == NULL) { + errno = EOPNOTSUPP; + return -1; + } + + if (buf_cat(&parser->val, parser->currhdr) == -1) + return -1; + + parser->currhdr = NULL; + parser->unfold = 1; + + return 0; +} + +static int +_rfc5322_next(struct rfc5322_parser *parser, struct rfc5322_result *res) +{ + size_t len; + const char *pos, *line; + + line = parser->line; + + switch(parser->state) { + + case RFC5322_HEADER_START: + case RFC5322_HEADER_CONT: + res->hdr = parser->hdr.buf; + + if (line && (line[0] == ' ' || line[0] == '\t')) { + parser->line = NULL; + parser->next = 1; + if (parser->unfold) { + if (buf_cat(&parser->val, "\n") == -1 || + buf_cat(&parser->val, line) == -1) + return -1; + } + res->value = line; + return RFC5322_HEADER_CONT; + } + + if (parser->unfold) { + parser->val.buflen = 0; + parser->unfold = 0; + res->value = parser->val.buf; + } + return RFC5322_HEADER_END; + + case RFC5322_NONE: + case RFC5322_HEADER_END: + if (line && (pos = strchr(line, ':'))) { + len = pos - line; + if (buf_grow(&parser->hdr, len + 1) == -1) + return -1; + (void)memcpy(parser->hdr.buf, line, len); + parser->hdr.buf[len] = '\0'; + parser->hdr.buflen = len + 1; + parser->line = NULL; + parser->next = 1; + parser->currhdr = pos + 1; + res->hdr = parser->hdr.buf; + res->value = pos + 1; + return RFC5322_HEADER_START; + } + + return RFC5322_END_OF_HEADERS; + + case RFC5322_END_OF_HEADERS: + if (line == NULL) + return RFC5322_END_OF_MESSAGE; + + if (line[0] == '\0') { + parser->line = NULL; + parser->next = 1; + res->value = line; + return RFC5322_BODY_START; + } + + errno = EFTYPE; + return -1; + + case RFC5322_BODY_START: + case RFC5322_BODY: + if (line == NULL) + return RFC5322_END_OF_MESSAGE; + + parser->line = NULL; + parser->next = 1; + res->value = line; + return RFC5322_BODY; + + case RFC5322_END_OF_MESSAGE: + errno = ENOMSG; + return -1; + + default: + errno = EINVAL; + return -1; + } +} + +int +rfc5322_next(struct rfc5322_parser *parser, struct rfc5322_result *res) +{ + memset(res, 0, sizeof(*res)); + + if (parser->next) + return RFC5322_NONE; + + return (parser->state = _rfc5322_next(parser, res)); +} + +static int +buf_alloc(struct buf *b, size_t need) +{ + char *buf; + size_t alloc; + + if (b->buf && b->bufsz >= need) + return 0; + + if (need >= b->bufmax) { + errno = ERANGE; + return -1; + } + +#define N 256 + alloc = N * (need / N) + ((need % N) ? N : 0); +#undef N + buf = reallocarray(b->buf, alloc, 1); + if (buf == NULL) + return -1; + + b->buf = buf; + b->bufsz = alloc; + + return 0; +} + +static int +buf_grow(struct buf *b, size_t sz) +{ + if (SIZE_T_MAX - b->buflen <= sz) { + errno = ERANGE; + return -1; + } + + return buf_alloc(b, b->buflen + sz); +} + +static int +buf_cat(struct buf *b, const char *s) +{ + size_t len = strlen(s); + + if (buf_grow(b, len + 1) == -1) + return -1; + + (void)memmove(b->buf + b->buflen, s, len + 1); + b->buflen += len; + return 0; +} Index: rfc5322.h =================================================================== RCS file: rfc5322.h diff -N rfc5322.h --- /dev/null 1 Jan 1970 00:00:00 -0000 +++ rfc5322.h 26 Jul 2018 14:40:56 -0000 @@ -0,0 +1,41 @@ +/* $OpenBSD: rfc5322.h,v 1.4 2015/11/05 08:55:09 gilles Exp $ */ + +/* + * Copyright (c) 2018 Eric Faurot <[email protected]> + * + * Permission to use, copy, modify, and distribute this software for any + * purpose with or without fee is hereby granted, provided that the above + * copyright notice and this permission notice appear in all copies. + * + * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES + * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF + * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR + * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES + * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN + * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF + * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE. + */ + +struct rfc5322_result { + const char *hdr; + const char *value; +}; + +#define RFC5322_ERR -1 +#define RFC5322_NONE 0 +#define RFC5322_HEADER_START 1 +#define RFC5322_HEADER_CONT 2 +#define RFC5322_HEADER_END 3 +#define RFC5322_END_OF_HEADERS 4 +#define RFC5322_BODY_START 5 +#define RFC5322_BODY 6 +#define RFC5322_END_OF_MESSAGE 7 + +struct rfc5322_parser; + +struct rfc5322_parser *rfc5322_parser_new(void); +void rfc5322_free(struct rfc5322_parser *); +void rfc5322_clear(struct rfc5322_parser *); +int rfc5322_push(struct rfc5322_parser *, const char *); +int rfc5322_next(struct rfc5322_parser *, struct rfc5322_result *); +int rfc5322_unfold_header(struct rfc5322_parser *); Index: smtp_session.c =================================================================== RCS file: /cvs/src/usr.sbin/smtpd/smtp_session.c,v retrieving revision 1.334 diff -u -p -r1.334 smtp_session.c --- smtp_session.c 25 Jul 2018 16:00:48 -0000 1.334 +++ smtp_session.c 26 Jul 2018 14:40:57 -0000 @@ -44,6 +44,7 @@ #include "smtpd.h" #include "log.h" #include "ssl.h" +#include "rfc5322.h" #define SMTP_LINE_MAX 16384 #define DATA_HIWAT 65535 @@ -79,7 +80,8 @@ enum { TX_ERROR_IO, TX_ERROR_LOOP, TX_ERROR_MALFORMED, - TX_ERROR_RESOURCES + TX_ERROR_RESOURCES, + TX_ERROR_INTERNAL, }; enum smtp_command { @@ -117,10 +119,10 @@ struct smtp_tx { size_t datain; size_t odatalen; FILE *ofile; - int hdrdone; + struct rfc5322_parser *parser; int rcvcount; - int skiphdr; - struct rfc2822_parser rfc2822_parser; + int has_date; + int has_message_id; }; struct smtp_session { @@ -215,33 +217,6 @@ static struct tree wait_ssl_init; static struct tree wait_ssl_verify; static void -header_default_callback(const struct rfc2822_header *hdr, void *arg) -{ - struct smtp_tx *tx = arg; - struct rfc2822_line *l; - - if (smtp_message_printf(tx, "%s:", hdr->name) == -1) - return; - - TAILQ_FOREACH(l, &hdr->lines, next) - if (smtp_message_printf(tx, "%s\n", l->buffer) == -1) - return; -} - -static void -dataline_callback(const char *line, void *arg) -{ - struct smtp_tx *tx = arg; - - smtp_message_printf(tx, "%s\n", line); -} - -static void -header_bcc_callback(const struct rfc2822_header *hdr, void *arg) -{ -} - -static void header_append_domain_buffer(char *buffer, char *domain, size_t len) { size_t i; @@ -404,45 +379,50 @@ header_address_rewrite_buffer(char *buff } static void -header_domain_append_callback(const struct rfc2822_header *hdr, void *arg) +header_domain_append_callback(struct smtp_tx *tx, const char *hdr, + const char *val) { - struct smtp_session *s; - struct smtp_tx *tx = arg; - struct rfc2822_line *l; - size_t i, j; + size_t i, j, linelen; int escape, quote, comment, skip; char buffer[APPEND_DOMAIN_BUFFER_SIZE]; + const char *line, *end; - s = tx->session; - - if (smtp_message_printf(tx, "%s:", hdr->name) == -1) + if (smtp_message_printf(tx, "%s:", hdr) == -1) return; j = 0; escape = quote = comment = skip = 0; memset(buffer, 0, sizeof buffer); - TAILQ_FOREACH(l, &hdr->lines, next) { - for (i = 0; i < strlen(l->buffer); ++i) { - if (l->buffer[i] == '(' && !escape && !quote) + for (line = val; line; line = end) { + end = strchr(line, '\n'); + if (end) { + linelen = end - line; + end++; + } + else + linelen = strlen(line); + + for (i = 0; i < linelen; ++i) { + if (line[i] == '(' && !escape && !quote) comment++; - if (l->buffer[i] == '"' && !escape && !comment) + if (line[i] == '"' && !escape && !comment) quote = !quote; - if (l->buffer[i] == ')' && !escape && !quote && comment) + if (line[i] == ')' && !escape && !quote && comment) comment--; - if (l->buffer[i] == '\\' && !escape && !comment && !quote) + if (line[i] == '\\' && !escape && !comment && !quote) escape = 1; else escape = 0; /* found a separator, buffer contains a full address */ - if (l->buffer[i] == ',' && !escape && !quote && !comment) { - if (!skip && j + strlen(s->listener->hostname) + 1 < sizeof buffer) { - header_append_domain_buffer(buffer, s->listener->hostname, sizeof buffer); - if (s->flags & SF_AUTHENTICATED && - s->listener->sendertable[0] && - s->listener->flags & F_MASQUERADE && - !(strcasecmp(hdr->name, "From"))) + if (line[i] == ',' && !escape && !quote && !comment) { + if (!skip && j + strlen(tx->session->listener->hostname) + 1 < sizeof buffer) { + header_append_domain_buffer(buffer, tx->session->listener->hostname, sizeof buffer); + if (tx->session->flags & SF_AUTHENTICATED && + tx->session->listener->sendertable[0] && + tx->session->listener->flags & F_MASQUERADE && + !(strcasecmp(hdr, "From"))) header_address_rewrite_buffer(buffer, mailaddr_to_text(&tx->evp.sender), sizeof buffer); } @@ -454,11 +434,11 @@ header_domain_append_callback(const stru } else { if (skip) { - if (smtp_message_printf(tx, "%c", l->buffer[i]) == -1) + if (smtp_message_printf(tx, "%c", line[i]) == -1) return; } else { - buffer[j++] = l->buffer[i]; + buffer[j++] = line[i]; if (j == sizeof (buffer) - 1) { if (smtp_message_printf(tx, "%s", buffer) == -1) return; @@ -487,12 +467,12 @@ header_domain_append_callback(const stru /* end of header, if buffer is not empty we'll process it */ if (buffer[0]) { - if (j + strlen(s->listener->hostname) + 1 < sizeof buffer) { - header_append_domain_buffer(buffer, s->listener->hostname, sizeof buffer); - if (s->flags & SF_AUTHENTICATED && - s->listener->sendertable[0] && - s->listener->flags & F_MASQUERADE && - !(strcasecmp(hdr->name, "From"))) + if (j + strlen(tx->session->listener->hostname) + 1 < sizeof buffer) { + header_append_domain_buffer(buffer, tx->session->listener->hostname, sizeof buffer); + if (tx->session->flags & SF_AUTHENTICATED && + tx->session->listener->sendertable[0] && + tx->session->listener->flags & F_MASQUERADE && + !(strcasecmp(hdr, "From"))) header_address_rewrite_buffer(buffer, mailaddr_to_text(&tx->evp.sender), sizeof buffer); } @@ -501,19 +481,6 @@ header_domain_append_callback(const stru } static void -header_missing_callback(const char *header, void *arg) -{ - struct smtp_tx *tx = arg; - - if (strcasecmp(header, "message-id") == 0) - smtp_message_printf(tx, "Message-Id: <%016"PRIx64"@%s>\n", - generate_uid(), tx->session->listener->hostname); - - if (strcasecmp(header, "date") == 0) - smtp_message_printf(tx, "Date: %s\n", time_to_text(tx->time)); -} - -static void smtp_session_init(void) { static int init = 0; @@ -1848,26 +1815,9 @@ smtp_tx(struct smtp_session *s) if (s->flags & SF_AUTHENTICATED) tx->evp.flags |= EF_AUTHENTICATED; - /* Setup parser and callbacks */ - rfc2822_parser_init(&tx->rfc2822_parser); - rfc2822_header_default_callback(&tx->rfc2822_parser, - header_default_callback, tx); - rfc2822_header_callback(&tx->rfc2822_parser, "bcc", - header_bcc_callback, tx); - rfc2822_header_callback(&tx->rfc2822_parser, "from", - header_domain_append_callback, tx); - rfc2822_header_callback(&tx->rfc2822_parser, "to", - header_domain_append_callback, tx); - rfc2822_header_callback(&tx->rfc2822_parser, "cc", - header_domain_append_callback, tx); - rfc2822_body_callback(&tx->rfc2822_parser, - dataline_callback, tx); - - if (s->listener->local || s->listener->port == htons(587)) { - rfc2822_missing_header_callback(&tx->rfc2822_parser, "date", - header_missing_callback, tx); - rfc2822_missing_header_callback(&tx->rfc2822_parser, "message-id", - header_missing_callback, tx); + if ((tx->parser = rfc5322_parser_new()) == NULL) { + free(tx); + return 0; } return 1; @@ -1878,7 +1828,7 @@ smtp_tx_free(struct smtp_tx *tx) { struct smtp_rcpt *rcpt; - rfc2822_parser_release(&tx->rfc2822_parser); + rfc5322_free(tx->parser); while ((rcpt = TAILQ_FIRST(&tx->rcpts))) { TAILQ_REMOVE(&tx->rcpts, rcpt, entry); @@ -2066,66 +2016,133 @@ smtp_tx_rollback(struct smtp_tx *tx) static int smtp_tx_dataline(struct smtp_tx *tx, const char *line) { - int ret; + struct rfc5322_result res; + int r; log_trace(TRACE_SMTP, "<<< [MSG] %s", line); if (!strcmp(line, ".")) { log_trace(TRACE_SMTP, "<<< [EOM]"); - if (!tx->error) - rfc2822_parser_flush(&tx->rfc2822_parser); - return 1; + if (tx->error) + return 1; + line = NULL; } + else { + /* ignore data line if an error is set */ + if (tx->error) + return 0; - /* ignore data line if an error is set */ - if (tx->error) - return 0; + /* escape lines starting with a '.' */ + if (line[0] == '.') + line += 1; + + /* account for newline */ + tx->datain += strlen(line) + 1; + if (tx->datain > env->sc_maxsize) { + tx->error = TX_ERROR_SIZE; + return 0; + } + } - /* escape lines starting with a '.' */ - if (line[0] == '.') - line += 1; - - /* account for newline */ - tx->datain += strlen(line) + 1; - if (tx->datain > env->sc_maxsize) { - tx->error = TX_ERROR_SIZE; + if (rfc5322_push(tx->parser, line) == -1) { + log_warnx("failed to push dataline"); + tx->error = TX_ERROR_INTERNAL; return 0; } - if (!tx->hdrdone) { - - /* folded header that must be skipped */ - if (isspace((unsigned char)line[0]) && tx->skiphdr) + for(;;) { + r = rfc5322_next(tx->parser, &res); + switch (r) { + case -1: + if (errno == ENOMEM) + tx->error = TX_ERROR_INTERNAL; + else + tx->error = TX_ERROR_MALFORMED; return 0; - tx->skiphdr = 0; - /* BCC should be stripped from headers */ - if (strncasecmp("bcc:", line, 4) == 0) { - tx->skiphdr = 1; + case RFC5322_NONE: + /* Need more data */ return 0; - } - /* check for loop */ - if (strncasecmp("Received: ", line, 10) == 0) - tx->rcvcount++; - if (tx->rcvcount == MAX_HOPS_COUNT) { - tx->error = TX_ERROR_LOOP; - log_warnx("warn: loop detected"); - return 0; - } + case RFC5322_HEADER_START: + /* ignore bcc */ + if (!strcasecmp("Bcc", res.hdr)) + continue; + + if (!strcasecmp("To", res.hdr) || + !strcasecmp("Cc", res.hdr) || + !strcasecmp("From", res.hdr)) { + rfc5322_unfold_header(tx->parser); + continue; + } - if (line[0] == '\0') - tx->hdrdone = 1; - } + if (!strcasecmp("Received", res.hdr)) { + if (++tx->rcvcount >= MAX_HOPS_COUNT) { + log_warnx("warn: loop detected"); + tx->error = TX_ERROR_LOOP; + return 0; + } + } + else if (!tx->has_date && !strcasecmp("Date", res.hdr)) + tx->has_date = 1; + else if (!tx->has_message_id && + !strcasecmp("Message-Id", res.hdr)) + tx->has_message_id = 1; + + smtp_message_printf(tx, "%s:%s\n", res.hdr, res.value); + break; + + case RFC5322_HEADER_CONT: - ret = rfc2822_parser_feed(&tx->rfc2822_parser, line); - if (ret == -1) - tx->error = TX_ERROR_RESOURCES; + if (!strcasecmp("Bcc", res.hdr) || + !strcasecmp("To", res.hdr) || + !strcasecmp("Cc", res.hdr) || + !strcasecmp("From", res.hdr)) + continue; - if (ret == 0) - tx->error = TX_ERROR_MALFORMED; + smtp_message_printf(tx, "%s\n", res.value); + break; + + case RFC5322_HEADER_END: + if (!strcasecmp("To", res.hdr) || + !strcasecmp("Cc", res.hdr) || + !strcasecmp("From", res.hdr)) + header_domain_append_callback(tx, res.hdr, + res.value); + break; + + case RFC5322_END_OF_HEADERS: + if (tx->session->listener->local || + tx->session->listener->port == 587) { - return 0; + if (!tx->has_date) { + log_debug("debug: %p: adding Date", tx); + smtp_message_printf(tx, "Date: %s\n", + time_to_text(tx->time)); + } + + if (!tx->has_message_id) { + log_debug("debug: %p: adding Message-ID", tx); + smtp_message_printf(tx, + "Message-ID: <%016"PRIx64"@%s>\n", + generate_uid(), + tx->session->listener->hostname); + } + } + break; + + case RFC5322_BODY_START: + case RFC5322_BODY: + smtp_message_printf(tx, "%s\n", res.value); + break; + + case RFC5322_END_OF_MESSAGE: + return 1; + + default: + fatalx("%s", __func__); + } + } } static void Index: smtpd.h =================================================================== RCS file: /cvs/src/usr.sbin/smtpd/smtpd.h,v retrieving revision 1.556 diff -u -p -r1.556 smtpd.h --- smtpd.h 25 Jul 2018 16:00:48 -0000 1.556 +++ smtpd.h 26 Jul 2018 14:40:57 -0000 @@ -30,8 +30,6 @@ #include "smtpd-api.h" #include "ioev.h" -#include "rfc2822.h" - #define CHECK_IMSG_DATA_SIZE(imsg, expected_sz) do { \ if ((imsg)->hdr.len - IMSG_HEADER_SIZE != (expected_sz)) \ fatalx("smtpd: imsg %d: data size expected %zd got %zd",\ Index: smtpd/Makefile =================================================================== RCS file: /cvs/src/usr.sbin/smtpd/smtpd/Makefile,v retrieving revision 1.92 diff -u -p -r1.92 Makefile --- smtpd/Makefile 25 Jul 2018 16:00:48 -0000 1.92 +++ smtpd/Makefile 26 Jul 2018 14:40:57 -0000 @@ -36,6 +36,7 @@ SRCS+= pony.c SRCS+= queue.c SRCS+= queue_backend.c SRCS+= resolver.c +SRCS+= rfc5322.c SRCS+= ruleset.c SRCS+= runq.c SRCS+= scheduler.c @@ -52,9 +53,6 @@ SRCS+= to.c SRCS+= tree.c SRCS+= util.c SRCS+= waitq.c - -# RFC parsers -SRCS+= rfc2822.c # backends SRCS+= compress_gzip.c
