bug#66265: Make padding optional with base64

Pádraig Brady Fri, 06 Oct 2023 08:37:05 -0700

On 29/09/2023 15:11, Pádraig Brady wrote:

On 29/09/2023 10:46, Paul Millar wrote:

Hi,


RFC 4648 says[1]:
   >   In some circumstances, the use of padding ("=") in base-encoded data
   >   is not required or used.

Currently, the 'base64' application always includes the padding when
encoding, and prints an warning/error message (on stderr) if padding is
omitted when decoding.  Decoding is nonetheless successful (the correct
data is emitted on stdout) if the base64-encoded data omits the padding.

I think the base64 application should be updated to support
base64-encoded data without padding.

My suggestion would be to add an option to base64 to control whether
padding is added when encoding.  For decoding, it might make sense to
add an option to control whether padding is expected.

(although, other approaches might be possible)

Cheers,
Paul.

[1] https://datatracker.ietf.org/doc/html/rfc4648#section-3.2


I agree with this actually.

The main advantage of padding as I see it is when concatenating encoded data.
I.e. that's the only use case where there could be ambiguity in the received
encoded data, as otherwise one can auto assume appropriate padding based on
the input length. I can't see `base64` or `basenc --base64` being used in a
problematic streaming context like that. If the utils themselves read from
a stream in a _single invocation_ they'll deal with partial blocks 
appropriately.
Looking at it another way, I don't see how base64 data over _separate 
invocations_
of these utils could be handled now, as they just give errors in this case 
anyway.
Now there would be a slight loss in error detection as truncated data would
not be noticed, however that is the case for a third of truncated sizes already.

So on input I'm inclined to auto pad.

On output one can trivially remove padding with `tr -d =`,
so I'm inclined to leave that as is.

I.e. we shouldn't need a new option for any of this.


Implementation attached

cheers,
Pádraig

From 6b00e41e5ae6de8e9f269172e62cb40d73fce5c9 Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?P=C3=A1draig=20Brady?= <p...@draigbrady.com>
Date: Thu, 5 Oct 2023 17:00:51 +0100
Subject: [PATCH] basenc: auto pad base32 and base64 inputs when decoding

Padding of encoded data is useful in cases where
base64 encoded data is concatenated / streamed.
I.e. where there are padding chars _within_ the stream.
In other cases padding is optional and can be inferred.
Note we continue to treat partially padded encoded data
as invalid, as that may be indicative of truncation.

* src/basenc.c (do_decode): Auto pad the end of the input.
* NEWS: Mention the change in behavior.
* tests/misc/base64.pl: Adjust to not fail for missing padding.
Addresses https://bugs.gnu.org/66265
---
 NEWS                 |  3 +++
 src/basenc.c         | 62 +++++++++++++++++++++++++++++++++++++++++---
 tests/misc/base64.pl |  6 ++---
 3 files changed, 63 insertions(+), 8 deletions(-)

diff --git a/NEWS b/NEWS
index 18f80cb4c..93f98b99d 100644
--- a/NEWS
+++ b/NEWS
@@ -16,6 +16,9 @@ GNU coreutils NEWS                                    -*- outline -*-
 
 ** Changes in behavior
 
+  base32 and base64 no longer require padding when decoding.
+  Previously an error was given for non padded encoded data.
+
   ls --dired now implies long format output without hyperlinks enabled,
   and will take precedence over previously specified formats or hyperlink mode.
 
diff --git a/src/basenc.c b/src/basenc.c
index ce259c482..12021e900 100644
--- a/src/basenc.c
+++ b/src/basenc.c
@@ -23,6 +23,7 @@
 #include <sys/types.h>
 
 #include "system.h"
+#include "assure.h"
 #include "c-ctype.h"
 #include "fadvise.h"
 #include "quote.h"
@@ -172,10 +173,37 @@ from any other non-alphabet bytes in the encoded stream.\n"),
   exit (status);
 }
 
+#if BASE_TYPE != 64
+static int
+base32_required_padding (int len)
+{
+  int partial = len % 8;
+  return partial ? 8 - partial : 0;
+}
+#endif
+
+#if BASE_TYPE != 32
+static int
+base64_required_padding (int len)
+{
+  int partial = len % 4;
+  return partial ? 4 - partial : 0;
+}
+#endif
+
+#if BASE_TYPE == 42
+static int
+no_required_padding (int len)
+{
+  return 0;
+}
+#endif
+
 #define ENC_BLOCKSIZE (1024 * 3 * 10)
 
 #if BASE_TYPE == 32
 # define BASE_LENGTH BASE32_LENGTH
+# define REQUIRED_PADDING base32_required_padding
 /* Note that increasing this may decrease performance if --ignore-garbage
    is used, because of the memmove operation below.  */
 # define DEC_BLOCKSIZE (1024 * 5)
@@ -191,6 +219,7 @@ static_assert (DEC_BLOCKSIZE % 40 == 0); /* Complete encoded blocks are used. */
 # define isbase isbase32
 #elif BASE_TYPE == 64
 # define BASE_LENGTH BASE64_LENGTH
+# define REQUIRED_PADDING base64_required_padding
 /* Note that increasing this may decrease performance if --ignore-garbage
    is used, because of the memmove operation below.  */
 # define DEC_BLOCKSIZE (1024 * 3)
@@ -208,6 +237,7 @@ static_assert (DEC_BLOCKSIZE % 12 == 0); /* Complete encoded blocks are used. */
 
 
 # define BASE_LENGTH base_length
+# define REQUIRED_PADDING required_padding
 
 /* Note that increasing this may decrease performance if --ignore-garbage
    is used, because of the memmove operation below.  */
@@ -216,6 +246,7 @@ static_assert (DEC_BLOCKSIZE % 40 == 0); /* complete encoded blocks for base32*/
 static_assert (DEC_BLOCKSIZE % 12 == 0); /* complete encoded blocks for base64*/
 
 static int (*base_length) (int i);
+static int (*required_padding) (int i);
 static bool (*isbase) (char ch);
 static void (*base_encode) (char const *restrict in, idx_t inlen,
                             char *restrict out, idx_t outlen);
@@ -486,7 +517,6 @@ base32hex_decode_ctx_wrapper (struct base_decode_context *ctx,
   return b;
 }
 
-
 static bool
 isbase16 (char ch)
 {
@@ -1011,6 +1041,7 @@ do_decode (FILE *in, char const *infile, FILE *out, bool ignore_garbage)
   idx_t sum;
   struct base_decode_context ctx;
 
+  char padbuf[8] = "========";
   inbuf = xmalloc (BASE_LENGTH (DEC_BLOCKSIZE));
   outbuf = xmalloc (DEC_BLOCKSIZE);
 
@@ -1053,10 +1084,25 @@ do_decode (FILE *in, char const *infile, FILE *out, bool ignore_garbage)
          telling it to flush what is in CTX.  */
       for (int k = 0; k < 1 + !!feof (in); k++)
         {
-          if (k == 1 && ctx.i == 0)
-            break;
+          if (k == 1)
+            {
+              if (ctx.i == 0)
+                break;
+
+              /* auto pad input (at eof).  */
+              idx_t auto_padding = REQUIRED_PADDING (ctx.i);
+              if (auto_padding && (sum == 0 || inbuf[sum - 1] != '='))
+                {
+                  affirm (auto_padding <= sizeof (padbuf));
+                  IF_LINT (free (inbuf));
+                  sum = auto_padding;
+                  inbuf = padbuf;
+                }
+              else
+                sum = 0;  /* process ctx buffer only */
+            }
           idx_t n = DEC_BLOCKSIZE;
-          ok = base_decode_ctx (&ctx, inbuf, (k == 0 ? sum : 0), outbuf, &n);
+          ok = base_decode_ctx (&ctx, inbuf, sum, outbuf, &n);
 
           if (fwrite (outbuf, 1, n, out) < n)
             write_error ();
@@ -1145,6 +1191,7 @@ main (int argc, char **argv)
     {
     case BASE64_OPTION:
       base_length = base64_length_wrapper;
+      required_padding = base64_required_padding;
       isbase = isbase64;
       base_encode = base64_encode;
       base_decode_ctx_init = base64_decode_ctx_init_wrapper;
@@ -1153,6 +1200,7 @@ main (int argc, char **argv)
 
     case BASE64URL_OPTION:
       base_length = base64_length_wrapper;
+      required_padding = base64_required_padding;
       isbase = isbase64url;
       base_encode = base64url_encode;
       base_decode_ctx_init = base64url_decode_ctx_init_wrapper;
@@ -1161,6 +1209,7 @@ main (int argc, char **argv)
 
     case BASE32_OPTION:
       base_length = base32_length_wrapper;
+      required_padding = base32_required_padding;
       isbase = isbase32;
       base_encode = base32_encode;
       base_decode_ctx_init = base32_decode_ctx_init_wrapper;
@@ -1169,6 +1218,7 @@ main (int argc, char **argv)
 
     case BASE32HEX_OPTION:
       base_length = base32_length_wrapper;
+      required_padding = base32_required_padding;
       isbase = isbase32hex;
       base_encode = base32hex_encode;
       base_decode_ctx_init = base32hex_decode_ctx_init_wrapper;
@@ -1177,6 +1227,7 @@ main (int argc, char **argv)
 
     case BASE16_OPTION:
       base_length = base16_length;
+      required_padding = no_required_padding;
       isbase = isbase16;
       base_encode = base16_encode;
       base_decode_ctx_init = base16_decode_ctx_init;
@@ -1185,6 +1236,7 @@ main (int argc, char **argv)
 
     case BASE2MSBF_OPTION:
       base_length = base2_length;
+      required_padding = no_required_padding;
       isbase = isbase2;
       base_encode = base2msbf_encode;
       base_decode_ctx_init = base2_decode_ctx_init;
@@ -1193,6 +1245,7 @@ main (int argc, char **argv)
 
     case BASE2LSBF_OPTION:
       base_length = base2_length;
+      required_padding = no_required_padding;
       isbase = isbase2;
       base_encode = base2lsbf_encode;
       base_decode_ctx_init = base2_decode_ctx_init;
@@ -1201,6 +1254,7 @@ main (int argc, char **argv)
 
     case Z85_OPTION:
       base_length = z85_length;
+      required_padding = no_required_padding;
       isbase = isz85;
       base_encode = z85_encode;
       base_decode_ctx_init = z85_decode_ctx_init;
diff --git a/tests/misc/base64.pl b/tests/misc/base64.pl
index 63e6c6b44..40c6c3d07 100755
--- a/tests/misc/base64.pl
+++ b/tests/misc/base64.pl
@@ -124,10 +124,8 @@ sub gen_tests($)
         push @Tests, (
           ['baddecode', '--decode', {IN=>'a'}, {OUT=>""},
           {ERR_SUBST => 's/.*: invalid input//'}, {ERR => "\n"}, {EXIT => 1}],
-          ['baddecode2', '--decode', {IN=>'ab'}, {OUT=>"i"},
-          {ERR_SUBST => 's/.*: invalid input//'}, {ERR => "\n"}, {EXIT => 1}],
-          ['baddecode3', '--decode', {IN=>'Zzz'}, {OUT=>"g<"},
-          {ERR_SUBST => 's/.*: invalid input//'}, {ERR => "\n"}, {EXIT => 1}],
+          ['paddecode2', '--decode', {IN=>'ab'}, {OUT=>"i"}],
+          ['paddecode3', '--decode', {IN=>'Zzz'}, {OUT=>"g<"}],
           ['baddecode4', '--decode', {IN=>'Zz='}, {OUT=>"g"},
           {ERR_SUBST => 's/.*: invalid input//'}, {ERR => "\n"}, {EXIT => 1}],
           ['baddecode5', '--decode', {IN=>'Z==='}, {OUT=>""},
-- 
2.41.0

bug#66265: Make padding optional with base64

Reply via email to