On Fri, 14 Nov 2025 14:01:32 +0800 Guan-Chun Wu <[email protected]> wrote:
> The old base64 implementation relied on a bit-accumulator loop, which was > slow for larger inputs and too permissive in validation. It would accept > extra '=', missing '=', or even '=' appearing in the middle of the input, > allowing malformed strings to pass. This patch reworks the internals to > improve performance and enforce stricter validation. > > Changes: > - Encoder: > * Process input in 3-byte blocks, mapping 24 bits into four 6-bit > symbols, avoiding bit-by-bit shifting and reducing loop iterations. > * Handle the final 1-2 leftover bytes explicitly and emit '=' only when > requested. > - Decoder: > * Based on the reverse lookup tables from the previous patch, decode > input in 4-character groups. > * Each group is looked up directly, converted into numeric values, and > combined into 3 output bytes. > * Explicitly handle padded and unpadded forms: > - With padding: input length must be a multiple of 4, and '=' is > allowed only in the last two positions. Reject stray or early '='. > - Without padding: validate tail lengths (2 or 3 chars) and require > unused low bits to be zero. > * Removed the bit-accumulator style loop to reduce loop iterations. > > Performance (x86_64, Intel Core i7-10700 @ 2.90GHz, avg over 1000 runs, > KUnit): > > Encode: > 64B ~90ns -> ~32ns (~2.8x) > 1KB ~1332ns -> ~510ns (~2.6x) > > Decode: > 64B ~1530ns -> ~35ns (~43.7x) > 1KB ~27726ns -> ~530ns (~52.3x) > > Co-developed-by: Kuan-Wei Chiu <[email protected]> > Signed-off-by: Kuan-Wei Chiu <[email protected]> > Co-developed-by: Yu-Sheng Huang <[email protected]> > Signed-off-by: Yu-Sheng Huang <[email protected]> > Signed-off-by: Guan-Chun Wu <[email protected]> Reviewed-by: David Laight <[email protected]> But see minor nit below. > --- > lib/base64.c | 109 ++++++++++++++++++++++++++++++++------------------- > 1 file changed, 68 insertions(+), 41 deletions(-) > > diff --git a/lib/base64.c b/lib/base64.c > index 9d1074bb821c..1a6d8fe37eda 100644 > --- a/lib/base64.c > +++ b/lib/base64.c > @@ -79,28 +79,38 @@ static const s8 base64_rev_maps[][256] = { > int base64_encode(const u8 *src, int srclen, char *dst, bool padding, enum > base64_variant variant) > { > u32 ac = 0; > - int bits = 0; > - int i; > char *cp = dst; > const char *base64_table = base64_tables[variant]; > > - for (i = 0; i < srclen; i++) { > - ac = (ac << 8) | src[i]; > - bits += 8; > - do { > - bits -= 6; > - *cp++ = base64_table[(ac >> bits) & 0x3f]; > - } while (bits >= 6); > - } > - if (bits) { > - *cp++ = base64_table[(ac << (6 - bits)) & 0x3f]; > - bits -= 6; > + while (srclen >= 3) { > + ac = (u32)src[0] << 16 | (u32)src[1] << 8 | (u32)src[2]; There is no need for the (u32) casts. All char/short values are promoted to 'int' prior to any maths. > + *cp++ = base64_table[ac >> 18]; > + *cp++ = base64_table[(ac >> 12) & 0x3f]; > + *cp++ = base64_table[(ac >> 6) & 0x3f]; > + *cp++ = base64_table[ac & 0x3f]; > + > + src += 3; > + srclen -= 3; > } > - if (padding) { > - while (bits < 0) { > + > + switch (srclen) { > + case 2: > + ac = (u32)src[0] << 16 | (u32)src[1] << 8; > + *cp++ = base64_table[ac >> 18]; > + *cp++ = base64_table[(ac >> 12) & 0x3f]; > + *cp++ = base64_table[(ac >> 6) & 0x3f]; > + if (padding) > + *cp++ = '='; > + break; > + case 1: > + ac = (u32)src[0] << 16; > + *cp++ = base64_table[ac >> 18]; > + *cp++ = base64_table[(ac >> 12) & 0x3f]; > + if (padding) { > + *cp++ = '='; > *cp++ = '='; > - bits += 2; > } > + break; > } > return cp - dst; > } > @@ -116,41 +126,58 @@ EXPORT_SYMBOL_GPL(base64_encode); > * > * Decodes a string using the selected Base64 variant. > * > - * This implementation hasn't been optimized for performance. > - * > * Return: the length of the resulting decoded binary data in bytes, > * or -1 if the string isn't a valid Base64 string. > */ > int base64_decode(const char *src, int srclen, u8 *dst, bool padding, enum > base64_variant variant) > { > - u32 ac = 0; > - int bits = 0; > - int i; > u8 *bp = dst; > - s8 ch; > + s8 input[4]; > + s32 val; > + const u8 *s = (const u8 *)src; > + const s8 *base64_rev_tables = base64_rev_maps[variant]; > > - for (i = 0; i < srclen; i++) { > - if (padding) { > - if (src[i] == '=') { > - ac = (ac << 6); > - bits += 6; > - if (bits >= 8) > - bits -= 8; > - continue; > - } > - } > - ch = base64_rev_maps[variant][(u8)src[i]]; > - if (ch == -1) > - return -1; > - ac = (ac << 6) | ch; > - bits += 6; > - if (bits >= 8) { > - bits -= 8; > - *bp++ = (u8)(ac >> bits); > + while (srclen >= 4) { > + input[0] = base64_rev_tables[s[0]]; > + input[1] = base64_rev_tables[s[1]]; > + input[2] = base64_rev_tables[s[2]]; > + input[3] = base64_rev_tables[s[3]]; > + > + val = input[0] << 18 | input[1] << 12 | input[2] << 6 | > input[3]; > + > + if (unlikely(val < 0)) { > + if (!padding || srclen != 4 || s[3] != '=') > + return -1; > + padding = 0; > + srclen = s[2] == '=' ? 2 : 3; > + break; > } > + > + *bp++ = val >> 16; > + *bp++ = val >> 8; > + *bp++ = val; > + > + s += 4; > + srclen -= 4; > } > - if (ac & ((1 << bits) - 1)) > + > + if (likely(!srclen)) > + return bp - dst; > + if (padding || srclen == 1) > return -1; > + > + val = (base64_rev_tables[s[0]] << 12) | (base64_rev_tables[s[1]] << 6); > + *bp++ = val >> 10; > + > + if (srclen == 2) { > + if (val & 0x800003ff) > + return -1; > + } else { > + val |= base64_rev_tables[s[2]]; > + if (val & 0x80000003) > + return -1; > + *bp++ = val >> 2; > + } > return bp - dst; > } > EXPORT_SYMBOL_GPL(base64_decode);
