decodeBase64() input need not be NUL-terminated [patch]

Stephen Gildea Wed, 04 Jun 2025 10:08:34 -0700

Change decodeBase64() to accept a length for the encoded string and
not assume it is NUL-terminated.


In all cases, I found the length of the encoded string was already
available to the caller.  That let me remove a strlen() in
decodeBase64 and not add a strlen() to any call site.  One call site
even got simpler: in mhparse.c I was able to remove an apologetic
comment.

This change is part of an effort to move the nmh code base away
from assuming C strings and towards using a length for all data.
A comment in popsbr.c says this is a good idea.

And that effort is also deep background for fixing bug #66407, inc
long lines.  The fix there is complicated by how awkward it is for
the current code to deal with an arbitrary sequence of bytes from
the middle of a stream.

diff --git a/sbr/base64.h b/sbr/base64.h
index aea3bdde..64547c25 100644
--- a/sbr/base64.h
+++ b/sbr/base64.h
@@ -8,7 +8,7 @@
 int writeBase64aux(FILE *, FILE *, int);
 int writeBase64(const unsigned char *, size_t, unsigned char *);
 int writeBase64raw(const unsigned char *, size_t, unsigned char *);
-int decodeBase64(const char *, unsigned char **, size_t *, int);
+int decodeBase64(const char *, size_t, unsigned char **, size_t *, int);
 void hexify(const unsigned char *, size_t, char **);
 
 /* Includes trailing NUL. */
diff --git a/sbr/base64.c b/sbr/base64.c
index 5548a684..153b954f 100644
--- a/sbr/base64.c
+++ b/sbr/base64.c
@@ -248,21 +248,21 @@ static const unsigned char b642nib[0x80] = {
  *                skipped
  */
 int
-decodeBase64 (const char *encoded, unsigned char **decoded, size_t *len,
-	      int skip_crs)
+decodeBase64 (const char *encoded, size_t encoded_len,
+              unsigned char **decoded, size_t *len, int skip_crs)
 {
     const char *cp = encoded;
     int bitno, skip;
     uint32_t bits;
     /* Size the decoded string very conservatively. */
-    charstring_t decoded_c = charstring_create (strlen (encoded));
+    charstring_t decoded_c = charstring_create (encoded_len);
 
     bitno = 18;
     bits = 0L;
     skip = 0;
 
     bool self_delimiting = false;
-    for (; *cp; ++cp) {
+    for (; encoded_len > 0; --encoded_len, ++cp) {
         switch (*cp) {
             unsigned char value;
 
@@ -272,8 +272,9 @@ decodeBase64 (const char *encoded, unsigned char **decoded, size_t *len,
                 }
                 if (skip  ||  (((unsigned char) *cp) & 0x80)  ||
                     (value = b642nib[((unsigned char) *cp) & 0x7f]) > 0x3f) {
-                    inform("invalid base64 byte %#x: %.42s",
-                        *(unsigned char *)cp, cp);
+                    inform("invalid base64 byte %#x: %.*s",
+                           *(unsigned char *)cp, (int) min(encoded_len, 42),
+                           cp);
                     charstring_free (decoded_c);
                     *decoded = NULL;
 
@@ -317,8 +318,11 @@ test_end:
 
     if (! self_delimiting  &&  bitno != 18) {
         /* Show some context for the error. */
-        cp -= min(cp - encoded, 20);
-        inform("premature ending (bitno %d) near %s", bitno, cp);
+        int context = min(cp - encoded, 20);
+        cp -= context;
+        encoded_len += context;
+        inform("premature ending (bitno %d) near %.*s",
+               bitno, (int) encoded_len, cp);
         charstring_free (decoded_c);
         *decoded = NULL;
 
diff --git a/mts/smtp/smtp.c b/mts/smtp/smtp.c
index a9adfcc5..d5ed03b6 100644
--- a/mts/smtp/smtp.c
+++ b/mts/smtp/smtp.c
@@ -1055,7 +1055,7 @@ sm_sasl_callback(enum sasl_message_type mtype, unsigned const char *indata,
 	    *outdata = NULL;
 	    *outdatalen = 0;
 	} else {
-	    rc = decodeBase64(line + 4, outdata, &len, 0);
+	    rc = decodeBase64(line + 4, len - 4, outdata, &len, 0);
 	    if (rc != OK) {
 		netsec_err(errstr, "Unable to decode base64 response");
 		return NOTOK;
diff --git a/sbr/netsec.c b/sbr/netsec.c
index bf0ff9cf..9e2f31d4 100644
--- a/sbr/netsec.c
+++ b/sbr/netsec.c
@@ -323,7 +323,7 @@ netsec_b64_snoop_decoder(netsec_context *nsc, const char *string, size_t len,
 	len -= offset;
     }
 
-    if (decodeBase64(string, &decoded, &decodedlen, 1) == OK) {
+    if (decodeBase64(string, len, &decoded, &decodedlen, 1) == OK) {
 	/*
 	 * Some mechanisms produce large binary tokens, which aren't really
 	 * readable.  So let's do a simple heuristic.  If the token is greater
diff --git a/uip/imaptest.c b/uip/imaptest.c
index 9e19aac6..254c1a9c 100644
--- a/uip/imaptest.c
+++ b/uip/imaptest.c
@@ -651,7 +651,7 @@ imap_sasl_callback(enum sasl_message_type mtype, unsigned const char *indata,
 	    *outdata = NULL;
 	    *outdatalen = 0;
 	} else {
-	    rc = decodeBase64(line + 2, outdata, &len, 0);
+	    rc = decodeBase64(line + 2, len - 2, outdata, &len, 0);
 	    *outdatalen = len;
 	    if (rc != OK) {
 		netsec_err(errstr, "Unable to decode base64 response");
diff --git a/uip/mhparse.c b/uip/mhparse.c
index 7f3fac8d..052eceb6 100644
--- a/uip/mhparse.c
+++ b/uip/mhparse.c
@@ -1832,10 +1832,7 @@ openBase64 (CT ct, char **file)
         }
     }
 
-    /* decodeBase64() requires null-terminated input. */
-    *cp = '\0';
-
-    if (decodeBase64 (buffer, &decoded, &decoded_len,
+    if (decodeBase64 (buffer, cp - buffer, &decoded, &decoded_len,
 		      ct->c_type == CT_TEXT) != OK)
         goto clean_up;
 
diff --git a/uip/popsbr.c b/uip/popsbr.c
index 89fc4cac..79376a05 100644
--- a/uip/popsbr.c
+++ b/uip/popsbr.c
@@ -464,7 +464,7 @@ pop_sasl_callback(enum sasl_message_type mtype, unsigned const char *indata,
 	    *outdata = NULL;
 	    *outdatalen = 0;
 	} else {
-	    rc = decodeBase64(line + 2, outdata, &len, 0);
+	    rc = decodeBase64(line + 2, len - 2, outdata, &len, 0);
 	    *outdatalen = len;
 	    if (rc != OK) {
 		netsec_err(errstr, "Unable to decode base64 response");

decodeBase64() input need not be NUL-terminated [patch]

Reply via email to