base64: Optimize base64_decode() with reverse lookup tables

Guan-Chun Wu Fri, 17 Oct 2025 13:52:06 -0700

On Sun, Oct 05, 2025 at 06:18:03PM +0100, David Laight wrote:
> On Wed, 1 Oct 2025 09:20:27 -0700
> Caleb Sander Mateos <[email protected]> wrote:
> 
> > On Wed, Oct 1, 2025 at 3:18 AM Guan-Chun Wu <[email protected]> 
> > wrote:
> > >
> > > On Fri, Sep 26, 2025 at 04:33:12PM -0700, Caleb Sander Mateos wrote:  
> > > > On Thu, Sep 25, 2025 at 11:59 PM Guan-Chun Wu 
> > > > <[email protected]> wrote:  
> > > > >
> > > > > From: Kuan-Wei Chiu <[email protected]>
> > > > >
> > > > > Replace the use of strchr() in base64_decode() with precomputed 
> > > > > reverse
> > > > > lookup tables for each variant. This avoids repeated string scans and
> > > > > improves performance. Use -1 in the tables to mark invalid characters.
> > > > >
> > > > > Decode:
> > > > >   64B   ~1530ns  ->  ~75ns    (~20.4x)
> > > > >   1KB  ~27726ns  -> ~1165ns   (~23.8x)
> > > > >
> > > > > Signed-off-by: Kuan-Wei Chiu <[email protected]>
> > > > > Co-developed-by: Guan-Chun Wu <[email protected]>
> > > > > Signed-off-by: Guan-Chun Wu <[email protected]>
> > > > > ---
> > > > >  lib/base64.c | 66 
> > > > > ++++++++++++++++++++++++++++++++++++++++++++++++----
> > > > >  1 file changed, 61 insertions(+), 5 deletions(-)
> > > > >
> > > > > diff --git a/lib/base64.c b/lib/base64.c
> > > > > index 1af557785..b20fdf168 100644
> > > > > --- a/lib/base64.c
> > > > > +++ b/lib/base64.c
> > > > > @@ -21,6 +21,63 @@ static const char base64_tables[][65] = {
> > > > >         [BASE64_IMAP] = 
> > > > > "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+,",
> > > > >  };
> > > > >
> > > > > +static const s8 base64_rev_tables[][256] = {
> > > > > +       [BASE64_STD] = {
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  
> > > > > -1,  -1,  -1,  63,
> > > > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  
> > > > > 11,  12,  13,  14,
> > > > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  
> > > > > 37,  38,  39,  40,
> > > > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +       },
> > > > > +       [BASE64_URLSAFE] = {
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  62,  -1,  -1,
> > > > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  
> > > > > 11,  12,  13,  14,
> > > > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  
> > > > > -1,  -1,  -1,  63,
> > > > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  
> > > > > 37,  38,  39,  40,
> > > > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +       },
> > > > > +       [BASE64_IMAP] = {
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  62,  
> > > > > 63,  -1,  -1,  -1,
> > > > > +        52,  53,  54,  55,  56,  57,  58,  59,  60,  61,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,   0,   1,   2,   3,   4,   5,   6,   7,   8,   9,  10,  
> > > > > 11,  12,  13,  14,
> > > > > +        15,  16,  17,  18,  19,  20,  21,  22,  23,  24,  25,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  26,  27,  28,  29,  30,  31,  32,  33,  34,  35,  36,  
> > > > > 37,  38,  39,  40,
> > > > > +        41,  42,  43,  44,  45,  46,  47,  48,  49,  50,  51,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +        -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  -1,  
> > > > > -1,  -1,  -1,  -1,
> > > > > +       },  
> > > >
> > > > Do we actually need 3 separate lookup tables? It looks like all 3
> > > > variants agree on the value of any characters they have in common. So
> > > > we could combine them into a single lookup table that would work for a
> > > > valid base64 string of any variant. The only downside I can see is
> > > > that base64 strings which are invalid in some variants might no longer
> > > > be rejected by base64_decode().
> > > >  
> > >
> > > In addition to the approach David mentioned, maybe we can use a common
> > > lookup table for A–Z, a–z, and 0–9, and then handle the variant-specific
> > > symbols with a switch.
> 
> It is certainly possible to generate the initialiser from a #define to
> avoid all the replicated source.
> 
> > >
> > > For example:
> > >
> > > static const s8 base64_rev_common[256] = {
> > >     [0 ... 255] = -1,
> > >     ['A'] = 0, ['B'] = 1, /* ... */, ['Z'] = 25,
> 
> If you assume ASCII (I doubt Linux runs on any EBCDIC systems) you
> can assume the characters are sequential and miss ['B'] = etc to
> reduce the the line lengths.
> (Even EBCDIC has A-I J-R S-Z and 0-9 as adjacent values)
> 
> > >     ['a'] = 26, /* ... */, ['z'] = 51,
> > >     ['0'] = 52, /* ... */, ['9'] = 61,
> > > };
> > >
> > > static inline int base64_rev_lookup(u8 c, enum base64_variant variant) {
> > >     s8 v = base64_rev_common[c];
> > >     if (v != -1)
> > >         return v;
> > >
> > >     switch (variant) {
> > >     case BASE64_STD:
> > >         if (c == '+') return 62;
> > >         if (c == '/') return 63;
> > >         break;
> > >     case BASE64_IMAP:
> > >         if (c == '+') return 62;
> > >         if (c == ',') return 63;
> > >         break;
> > >     case BASE64_URLSAFE:
> > >         if (c == '-') return 62;
> > >         if (c == '_') return 63;
> > >         break;
> > >     }
> > >     return -1;
> > > }
> > >
> > > What do you think?  
> > 
> > That adds several branches in the hot loop, at least 2 of which are
> > unpredictable for valid base64 input of a given variant (v != -1 as
> > well as the first c check in the applicable switch case).
> 
> I'd certainly pass in the character values for 62 and 63 so they are
> determined well outside the inner loop.
> Possibly even going as far as #define BASE64_STD ('+' << 8 | '/').
> 
> > That seems like it would hurt performance, no?
> > I think having 3 separate tables
> > would be preferable to making the hot loop more branchy.
> 
> Depends how common you think 62 and 63 are...
> I guess 63 comes from 0xff bytes - so might be quite common.
> 
> One thing I think you've missed is that the decode converts 4 characters
> into 24 bits - which then need carefully writing into the output buffer.
> There is no need to check whether each character is valid.
> After:
>       val_24 = t[b[0]] | t[b[1]] << 6 | t[b[2]] << 12 | t[b[3]] << 18;
> val_24 will be negative iff one of b[0..3] is invalid.
> So you only need to check every 4 input characters, not for every one.
> That does require separate tables.
> (Or have a decoder that always maps "+-" to 62 and "/,_" to 63.)
> 
>       David
>


Thanks for the feedback.
For the next revision, we’ll use a single lookup table that maps both +
and - to 62, and /, _, and , to 63.
Does this approach sound good to everyone?

Best regards,
Guan-Chun

> > 
> > Best,
> > Caleb
> > 
>

Re: [PATCH v3 2/6] lib/base64: Optimize base64_decode() with reverse lookup tables

Reply via email to