> On Thu, Sep 21, 2023 at 02:12:50PM +0200, Stefan Sperling wrote: > > Your implementation lacks proper bounds checking. It accesses > > s[i + 3] based purely on the contents of the input string, without > > checking whether len < i + 3. Entering the while (i != len) loop with
You surely meant "len > i + 3" (grater than). The patch below is wrong. I know it doesn't matter anymore but I'm still clarifying so that no one wastes time trying the patch. > > > > Index: send.c > =================================================================== > RCS file: /cvs/src/usr.bin/mail/send.c,v > retrieving revision 1.26 > diff -u -p -r1.26 send.c > --- send.c 8 Mar 2023 04:43:11 -0000 1.26 > +++ send.c 21 Sep 2023 14:16:08 -0000 > @@ -33,6 +33,10 @@ > #include "rcv.h" > #include "extern.h" > > +/* To check charset of the message and add the appropiate MIME headers */ > +static char nutf8; > +static int not_utf8(FILE *s, int len); > + > static volatile sig_atomic_t sendsignal; /* Interrupted by a signal? */ > > /* > @@ -341,6 +345,11 @@ mail1(struct header *hp, int printheader > else > puts("Null message body; hope that's ok"); > } > + > + /* Check non valid UTF-8 characters in the message */ > + nutf8 = not_utf8(mtf, fsize(mtf)); > + rewind(mtf); > + > /* > * Now, take the user names from the combined > * to and cc lists and do all the alias > @@ -525,6 +534,14 @@ puthead(struct header *hp, FILE *fo, int > fmt("To:", hp->h_to, fo, w&GCOMMA), gotcha++; > if (hp->h_subject != NULL && w & GSUBJECT) > fprintf(fo, "Subject: %s\n", hp->h_subject), gotcha++; > + if (nutf8 == 0) > + fprintf(fo, "MIME-Version: 1.0\n" > + "Content-Type: text/plain; charset=us-ascii\n" > + "Content-Transfer-Encoding: 7bit\n"), gotcha++; > + else if (nutf8 == 1) > + fprintf(fo, "MIME-Version: 1.0\n" > + "Content-Type: text/plain; charset=utf-8\n" > + "Content-Transfer-Encoding: 8bit\n"), gotcha++; > if (hp->h_cc != NULL && w & GCC) > fmt("Cc:", hp->h_cc, fo, w&GCOMMA), gotcha++; > if (hp->h_bcc != NULL && w & GBCC) > @@ -609,4 +626,60 @@ sendint(int s) > { > > sendsignal = s; > +} > + > +/* Search non valid UTF-8 characters in the message */ > +static int > +not_utf8(FILE *message, int len) > +{ > + int i, n, nonascii; > + char c; > + unsigned char s[len + 1]; > + > + i = 0; > + while ((c = getc(message)) != EOF) > + s[i++] = c; > + > + s[i] = '\0'; > + > + i = n = nonascii = 0; > + while (i != len) > + if (s[i] <= 0x7f) { > + i++; > + /* Two bytes case */ > + } else if (len < i + 1 && s[i] >= 0xc2 && s[i] < 0xe0 && > + s[i + 1] >= 0x80 && s[i + 1] <= 0xbf) { > + i += 2; > + nonascii++; > + /* Special three bytes case */ > + } else if ((len < i + 2 && s[i] == 0xe0 && > + s[i + 1] >= 0xa0 && s[i + 1] <= 0xbf && > + s[i + 2] >= 0x80 && s[i + 2] <= 0xbf) || > + /* Three bytes case */ > + (len < i + 2 && s[i] > 0xe0 && s[i] < 0xf0 && > + s[i + 1] >= 0x80 && s[i + 1] <= 0xbf && > + s[i + 2] >= 0x80 && s[i + 2] <= 0xbf)) { > + i += 3; > + nonascii++; > + /* Special four bytes case */ > + } else if ((len < i + 3 && s[i] == 0xf0 && > + s[i + 1] >= 0x90 && s[i + 1] <= 0xbf && > + s[i + 2] >= 0x80 && s[i + 2] <= 0xbf && > + s[i + 3] >= 0x80 && s[i + 3] <= 0xbf) || > + /* Four bytes case */ > + (len < i + 3 && s[i] > 0xf0 && > + s[i + 1] >= 0x80 && s[i + 1] <= 0xbf && > + s[i + 2] >= 0x80 && s[i + 2] <= 0xbf && > + s[i + 3] >= 0x80 && s[i + 3] <= 0xbf)) { > + i += 4; > + nonascii++; > + } else { > + n = i + 1; > + break; > + } > + > + if (nonascii) > + n++; > + > + return n; > } > > > -- > Walter -- Walter