Opening a new clean thread.

First of all, I'm sending this message from my patched mail(1), take a
look at the headers. :-)

I tried what each and everyone suggested to me, the only way to know for
sure what works and what doesn't.  If you decide that this modification
is too much for a simple application as mail, that's your decision, my
work could still be useful if in the future you change your mind.
Anyway, for me was useful to learn.

I renamed my function "isutf8" to make the return values more logical
and easy to understand.  I abused Otto Morebeek's kindness asking him
questions about malloc to the point he sent me a vacation email.  I'll
be banned from his email server for at least a couple of years.  Sorry
Otto! :-)  He helped showing me more readable ways of using realloc, the
realloc loop in the code is from him.  As Omar Polo did, Otto also
advised me not to use variable length arrays (I also did some research
about it.)  He also told me that the fsize() function in mail(1) may
likely fail reading from stdin, besides the file can grow while reading
from it.  So, I removed the "int len" from my function arguments and fed
mbstowcs() with the new read size (i.e. "i").

Otto also showed me the "DEBUG=-g make" trick to get the useful
addr2line reports with kdump.  Acording to these tests, it seems that my
patch doesn't add leaks:

******** Start dump mail *******
M=8 I=1 F=1 U=1 J=2 R=0 X=0 C=-935363880 cache=0 G=4096
Leak report:
              f     sum      #    avg
            0x0   46464    327    142 addr2line -e . 0x0
  0x34f10a4b153   20480      1  20480 addr2line -e /usr/lib/libc.so.97.1 0x4d153
  0x34f10a961dc   55910      1  55910 addr2line -e /usr/lib/libc.so.97.1 0x981dc
  0x34f10a96470  410576     25  16423 addr2line -e /usr/lib/libc.so.97.1 0x98470
  0x34f10abb562   22376      1  22376 addr2line -e /usr/lib/libc.so.97.1 0xbd562

******** End dump mail *******

$ addr2line -e /usr/lib/libc.so.97.1 0x4d153
/usr/src/lib/libc/stdio/makebuf.c:62
$ addr2line -e /usr/lib/libc.so.97.1 0x981dc
/usr/src/lib/libc/locale/rune.c:258
$ addr2line -e /usr/lib/libc.so.97.1 0x98470
/usr/src/lib/libc/locale/rune.c:137
$ addr2line -e /usr/lib/libc.so.97.1 0xbd562
/usr/src/lib/libc/time/localtime.c:1121


I decided to add another header, "Message-ID".  Sending mails from my
patched mail(1) I realized that it's convenient the MUA itself add the
Message-ID (the one in this message was generated by my patch), if you
relegate this to the MTA, your MUA will save the local copy without that
header, then if more late you wish to read your mail with a
thread-capable MUA (eg Mutt), those messages won't be in the right
place.


       Summary of what OpenBSD mail(1) does with this patch
       ----------------------------------------------------

  The string used as Message-ID is equivalent to this shell command:

     $ echo "Message-ID: <$(date +%Y%m%d.%H%M%S@$(hostname))>"

  When the body is all ASCII, mail(1) adds these headers:

    MIME-Version: 1.0
    Content-Type: text/plain; charset=us-ascii
    Content-Transfer-Encoding: 7bit

  When valid UTF-8 is detected in the body, mail(1) adds these headers:

    MIME-Version: 1.0
    Content-Type: text/plain; charset=utf-8
    Content-Transfer-Encoding: 8bit\n

  When non valid UTF-8 characters are found in the body, it adds only
  the Message-ID.



Index: send.c
===================================================================
RCS file: /cvs/src/usr.bin/mail/send.c,v
retrieving revision 1.26
diff -u -p -r1.26 send.c
--- send.c      8 Mar 2023 04:43:11 -0000       1.26
+++ send.c      1 Oct 2023 07:47:30 -0000
@@ -32,7 +32,10 @@
 
 #include "rcv.h"
 #include "extern.h"
+#include "locale.h"
 
+static int utf8body;
+static int isutf8(FILE *s);                    /* UTF-8 check  */
 static volatile sig_atomic_t sendsignal;       /* Interrupted by a signal? */
 
 /*
@@ -341,6 +344,14 @@ mail1(struct header *hp, int printheader
                else
                        puts("Null message body; hope that's ok");
        }
+
+       /* UTF-8 check */
+       setlocale(LC_CTYPE, "en_US.UTF-8");
+       if (fsize(mtf) != 0) {
+               utf8body = isutf8(mtf);
+               rewind(mtf);
+       }
+
        /*
         * Now, take the user names from the combined
         * to and cc lists and do all the alias
@@ -516,19 +527,42 @@ puthead(struct header *hp, FILE *fo, int
 {
        int gotcha;
        char *from;
+       time_t t = time(NULL);
+       struct tm tm = *localtime(&t);
+       char hostname[1024];
+       gethostname(hostname, 1023);
 
        gotcha = 0;
        from = hp->h_from ? hp->h_from : value("from");
        if (from != NULL)
-               fprintf(fo, "From: %s\n", from), gotcha++;
+               fprintf(fo, "From: %s\n", from),
+                   gotcha++;
        if (hp->h_to != NULL && w & GTO)
-               fmt("To:", hp->h_to, fo, w&GCOMMA), gotcha++;
+               fmt("To:", hp->h_to, fo, w&GCOMMA),
+                   gotcha++;
        if (hp->h_subject != NULL && w & GSUBJECT)
-               fprintf(fo, "Subject: %s\n", hp->h_subject), gotcha++;
+               fprintf(fo, "Subject: %s\n", hp->h_subject),
+                   gotcha++;
+       fprintf(fo, "Message-ID: <%d%02d%02d.%02d%02d%02d@%s>\n",
+           tm.tm_year + 1900, tm.tm_mon + 1, tm.tm_mday,
+           tm.tm_hour, tm.tm_min, tm.tm_sec, hostname),
+                   gotcha++;
+       if (utf8body > 0)
+               fprintf(fo, "MIME-Version: 1.0\n"
+                   "Content-Type: text/plain; charset=utf-8\n"
+                   "Content-Transfer-Encoding: 8bit\n"),
+                   gotcha++;
+       else if (utf8body == 0)
+               fprintf(fo, "MIME-Version: 1.0\n"
+                   "Content-Type: text/plain; charset=us-ascii\n"
+                   "Content-Transfer-Encoding: 7bit\n"),
+                   gotcha++;
        if (hp->h_cc != NULL && w & GCC)
-               fmt("Cc:", hp->h_cc, fo, w&GCOMMA), gotcha++;
+               fmt("Cc:", hp->h_cc, fo, w&GCOMMA),
+                   gotcha++;
        if (hp->h_bcc != NULL && w & GBCC)
-               fmt("Bcc:", hp->h_bcc, fo, w&GCOMMA), gotcha++;
+               fmt("Bcc:", hp->h_bcc, fo, w&GCOMMA),
+                   gotcha++;
        if (gotcha && w & GNL)
                (void)putc('\n', fo);
        return(0);
@@ -607,6 +641,44 @@ savemail(char *name, FILE *fi)
 void
 sendint(int s)
 {
-
        sendsignal = s;
+}
+
+/* UTF-8 check */
+static int
+isutf8(FILE *fp)
+{
+       unsigned char *p = NULL;
+       size_t size = 0;
+       size_t i = 0;
+       int c, n, len;
+
+       setlocale(LC_CTYPE, "en_US.UTF-8");
+
+       while ((c = getc(fp)) != EOF) {
+               if (i == size) {
+                       p = realloc(p, size + 100);
+                       if (p == NULL)
+                               err(1, NULL);
+                       size += 100;
+               }
+               p[i++] = c;
+       }
+       if (i == size) {
+               p = realloc(p, size + 1);
+               if (p == NULL)
+                       err(1, NULL);
+       }
+       p[i] = '\0';
+
+       len = mbstowcs(NULL, p, 0);
+       if (len == i)
+               n = 0;          /* ASCII */
+       else if (len < i)
+               n = 1;          /* UTF-8 */
+       if (len < 0)
+               n = len;        /* Invalid UTF-8 */
+
+       free(p);
+       return n;
 }


Reply via email to