This new version, when it detects invalid utf-8 in the body saves a dead.letter, prints the following message and exits.
$ mail -s hello user < invalid_utf8.txt Invalid or incomplete multibyte or wide character . . . message not sent. Index: send.c =================================================================== RCS file: /cvs/src/usr.bin/mail/send.c,v retrieving revision 1.26 diff -u -p -r1.26 send.c --- send.c 8 Mar 2023 04:43:11 -0000 1.26 +++ send.c 25 Sep 2023 13:07:17 -0000 @@ -32,6 +32,11 @@ #include "rcv.h" #include "extern.h" +#include "locale.h" + +/* To check charset of the message and add the appropiate MIME headers */ +static char nutf8; +static int not_utf8(FILE *s, int len); static volatile sig_atomic_t sendsignal; /* Interrupted by a signal? */ @@ -341,6 +346,17 @@ mail1(struct header *hp, int printheader else puts("Null message body; hope that's ok"); } + + /* Check non valid UTF-8 characters in the message */ + nutf8 = not_utf8(mtf, fsize(mtf)); + rewind(mtf); + if (nutf8 > 1) { + savedeadletter(mtf); + puts("Invalid or incomplete multibyte or wide character"); + fputs(". . . message not sent.\n", stderr); + exit(1); + } + /* * Now, take the user names from the combined * to and cc lists and do all the alias @@ -520,15 +536,30 @@ puthead(struct header *hp, FILE *fo, int gotcha = 0; from = hp->h_from ? hp->h_from : value("from"); if (from != NULL) - fprintf(fo, "From: %s\n", from), gotcha++; + fprintf(fo, "From: %s\n", from), + gotcha++; if (hp->h_to != NULL && w & GTO) - fmt("To:", hp->h_to, fo, w&GCOMMA), gotcha++; + fmt("To:", hp->h_to, fo, w&GCOMMA), + gotcha++; if (hp->h_subject != NULL && w & GSUBJECT) - fprintf(fo, "Subject: %s\n", hp->h_subject), gotcha++; + fprintf(fo, "Subject: %s\n", hp->h_subject), + gotcha++; + if (nutf8 == 0) + fprintf(fo, "MIME-Version: 1.0\n" + "Content-Type: text/plain; charset=us-ascii\n" + "Content-Transfer-Encoding: 7bit\n"), + gotcha++; + else if (nutf8 == 1) + fprintf(fo, "MIME-Version: 1.0\n" + "Content-Type: text/plain; charset=utf-8\n" + "Content-Transfer-Encoding: 8bit\n"), + gotcha++; if (hp->h_cc != NULL && w & GCC) - fmt("Cc:", hp->h_cc, fo, w&GCOMMA), gotcha++; + fmt("Cc:", hp->h_cc, fo, w&GCOMMA), + gotcha++; if (hp->h_bcc != NULL && w & GBCC) - fmt("Bcc:", hp->h_bcc, fo, w&GCOMMA), gotcha++; + fmt("Bcc:", hp->h_bcc, fo, w&GCOMMA), + gotcha++; if (gotcha && w & GNL) (void)putc('\n', fo); return(0); @@ -609,4 +640,44 @@ sendint(int s) { sendsignal = s; +} + +/* Search non valid UTF-8 characters in the message */ +static int +not_utf8(FILE *message, int len) +{ + int c, count, n, ulen; + size_t i, resize; + size_t jump = 100; + unsigned char *s = NULL; + + setlocale(LC_CTYPE, "en_US.UTF-8"); + + if (s == NULL && (s = malloc(jump)) == NULL) + err(1, NULL); + + i = count = 0; + while ((c = getc(message)) != EOF) { + if (s == NULL || count == jump) { + if ((s = realloc(s, i + jump + 1)) == NULL) + err(1, NULL); + count = 0; + } + s[i++] = c; + count++; + } + + s[i] = '\0'; + + ulen = mbstowcs(NULL, s, 0); + + if (ulen == len) + n = 0; + else if (ulen < 0) + n = 2; + else if (ulen < len) + n = 1; + + free(s); + return n; } -- Walter