From 3398db2e87d8e4658655aa8cacc6a94974fa1b2d Mon Sep 17 00:00:00 2001
From: Florents Tselai <florents.tselai@gmail.com>
Date: Sat, 12 Jul 2025 15:12:17 -0400
Subject: [PATCH v4] Add base64url

---
 doc/src/sgml/func.sgml                |  19 +++
 src/backend/utils/adt/encode.c        | 168 +++++++++++++++++++++-----
 src/test/regress/expected/strings.out | 150 +++++++++++++++++++++++
 src/test/regress/sql/strings.sql      |  54 +++++++++
 4 files changed, 359 insertions(+), 32 deletions(-)

diff --git a/doc/src/sgml/func.sgml b/doc/src/sgml/func.sgml
index c28aa71f570..34c8d4990c2 100644
--- a/doc/src/sgml/func.sgml
+++ b/doc/src/sgml/func.sgml
@@ -4999,6 +4999,7 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
        Encodes binary data into a textual representation; supported
        <parameter>format</parameter> values are:
        <link linkend="encode-format-base64"><literal>base64</literal></link>,
+       <link linkend="encode-format-base64url"><literal>base64url</literal></link>,
        <link linkend="encode-format-escape"><literal>escape</literal></link>,
        <link linkend="encode-format-hex"><literal>hex</literal></link>.
       </para>
@@ -5056,6 +5057,24 @@ SELECT format('Testing %3$s, %2$s, %s', 'one', 'two', 'three');
      </listitem>
     </varlistentry>
 
+    <varlistentry id="encode-format-base64url">
+     <term>base64url
+      <indexterm>
+       <primary>base64url format</primary>
+      </indexterm></term>
+     <listitem>
+      <para>
+       The <literal>base64url</literal> format is a URL-safe variant of
+       <ulink url="https://datatracker.ietf.org/doc/html/rfc4648#section-5">RFC 4648
+       Section 5</ulink> <literal>base64</literal>, that replaces
+       <literal>'+'</literal> with <literal>'-'</literal> and
+       <literal>'/'</literal> with <literal>'_'</literal> to ensure safe usage
+       in URLs and filenames. It also omits the <literal>'='</literal> padding
+       character.
+      </para>
+     </listitem>
+    </varlistentry>
+
     <varlistentry id="encode-format-escape">
      <term>escape
      <indexterm>
diff --git a/src/backend/utils/adt/encode.c b/src/backend/utils/adt/encode.c
index 4ccaed815d1..9359800ff14 100644
--- a/src/backend/utils/adt/encode.c
+++ b/src/backend/utils/adt/encode.c
@@ -273,6 +273,9 @@ hex_dec_len(const char *src, size_t srclen)
 static const char _base64[] =
 "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
 
+static const char _base64url[] =
+"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-_";
+
 static const int8 b64lookup[128] = {
 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
 	-1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
@@ -285,17 +288,15 @@ static const int8 b64lookup[128] = {
 };
 
 static uint64
-pg_base64_encode(const char *src, size_t len, char *dst)
+pg_base64_encode_internal(const char *src, size_t len, char *dst, bool url)
 {
-	char	   *p,
-			   *lend = dst + 76;
-	const char *s,
-			   *end = src + len;
+	const char *alphabet = url ? _base64url : _base64;
+	const char *end = src + len;
+	const char *s = src;
+	char	   *p = dst;
 	int			pos = 2;
 	uint32		buf = 0;
-
-	s = src;
-	p = dst;
+	char	   *lend = dst + 76;
 
 	while (s < end)
 	{
@@ -306,53 +307,81 @@ pg_base64_encode(const char *src, size_t len, char *dst)
 		/* write it out */
 		if (pos < 0)
 		{
-			*p++ = _base64[(buf >> 18) & 0x3f];
-			*p++ = _base64[(buf >> 12) & 0x3f];
-			*p++ = _base64[(buf >> 6) & 0x3f];
-			*p++ = _base64[buf & 0x3f];
+			*p++ = alphabet[(buf >> 18) & 0x3f];
+			*p++ = alphabet[(buf >> 12) & 0x3f];
+			*p++ = alphabet[(buf >> 6) & 0x3f];
+			*p++ = alphabet[buf & 0x3f];
 
 			pos = 2;
 			buf = 0;
-		}
-		if (p >= lend)
-		{
-			*p++ = '\n';
-			lend = p + 76;
+
+			if (!url && p >= lend)
+			{
+				*p++ = '\n';
+				lend = p + 76;
+			}
 		}
 	}
+
+	/* handle remainder */
 	if (pos != 2)
 	{
-		*p++ = _base64[(buf >> 18) & 0x3f];
-		*p++ = _base64[(buf >> 12) & 0x3f];
-		*p++ = (pos == 0) ? _base64[(buf >> 6) & 0x3f] : '=';
-		*p++ = '=';
+		*p++ = alphabet[(buf >> 18) & 0x3f];
+		*p++ = alphabet[(buf >> 12) & 0x3f];
+
+		if (pos == 0)
+		{
+			*p++ = alphabet[(buf >> 6) & 0x3f];
+			if (!url)
+				*p++ = '=';
+		}
+		else if (!url)
+		{
+			*p++ = '=';
+			*p++ = '=';
+		}
 	}
 
 	return p - dst;
 }
 
 static uint64
-pg_base64_decode(const char *src, size_t len, char *dst)
+pg_base64_encode(const char *src, size_t len, char *dst)
 {
-	const char *srcend = src + len,
-			   *s = src;
+	return pg_base64_encode_internal(src, len, dst, false);
+}
+
+static uint64
+pg_base64_decode_internal(const char *src, size_t len, char *dst, bool url)
+{
+	const char *srcend = src + len;
+	const char *s = src;
 	char	   *p = dst;
 	char		c;
 	int			b = 0;
 	uint32		buf = 0;
-	int			pos = 0,
-				end = 0;
+	int			pos = 0;
+	int			end = 0;
 
 	while (s < srcend)
 	{
 		c = *s++;
 
+		/* skip whitespace */
 		if (c == ' ' || c == '\t' || c == '\n' || c == '\r')
 			continue;
 
+		/* convert Base64URL to Base64 if needed */
+		if (url)
+		{
+			if (c == '-')
+				c = '+';
+			else if (c == '_')
+				c = '/';
+		}
+
 		if (c == '=')
 		{
-			/* end sequence */
 			if (!end)
 			{
 				if (pos == 2)
@@ -377,30 +406,49 @@ pg_base64_decode(const char *src, size_t len, char *dst)
 						 errmsg("invalid symbol \"%.*s\" found while decoding base64 sequence",
 								pg_mblen(s - 1), s - 1)));
 		}
-		/* add it to buffer */
+
 		buf = (buf << 6) + b;
 		pos++;
+
 		if (pos == 4)
 		{
-			*p++ = (buf >> 16) & 255;
+			*p++ = (buf >> 16) & 0xFF;
 			if (end == 0 || end > 1)
-				*p++ = (buf >> 8) & 255;
+				*p++ = (buf >> 8) & 0xFF;
 			if (end == 0 || end > 2)
-				*p++ = buf & 255;
+				*p++ = buf & 0xFF;
 			buf = 0;
 			pos = 0;
 		}
 	}
 
-	if (pos != 0)
+	if (pos == 2)
+	{
+		buf <<= 12;				/* 2 * 6 = 12 bits, pad remaining to 24 */
+		*p++ = (buf >> 16) & 0xFF;
+	}
+	else if (pos == 3)
+	{
+		buf <<= 6;				/* 3 * 6 = 18 bits */
+		*p++ = (buf >> 16) & 0xFF;
+		*p++ = (buf >> 8) & 0xFF;
+	}
+	else if (pos != 0)
+	{
 		ereport(ERROR,
 				(errcode(ERRCODE_INVALID_PARAMETER_VALUE),
 				 errmsg("invalid base64 end sequence"),
 				 errhint("Input data is missing padding, is truncated, or is otherwise corrupted.")));
+	}
 
 	return p - dst;
 }
 
+static uint64
+pg_base64_decode(const char *src, size_t len, char *dst)
+{
+	return pg_base64_decode_internal(src, len, dst, false);
+}
 
 static uint64
 pg_base64_enc_len(const char *src, size_t srclen)
@@ -415,6 +463,56 @@ pg_base64_dec_len(const char *src, size_t srclen)
 	return ((uint64) srclen * 3) >> 2;
 }
 
+/*
+ * Calculate the length of base64url encoded output for given input length
+ * Base64url encoding: 3 bytes -> 4 chars, padding to multiple of 4
+ */
+static uint64
+pg_base64url_enc_len(const char *src, size_t srclen)
+{
+	uint64		result;
+
+	/*
+	 * Base64 encoding converts 3 bytes into 4 characters Formula: ceil(srclen
+	 * / 3) * 4
+	 *
+	 * Unlike standard base64, base64url doesn't use padding characters when
+	 * the input length is not divisible by 3
+	 */
+	result = (srclen + 2) / 3 * 4;	/* ceiling division by 3, then multiply by
+									 * 4 */
+
+	return result;
+}
+
+static uint64
+pg_base64url_dec_len(const char *src, size_t srclen)
+{
+	/*
+	 * For Base64, each 4 characters of input produce at most 3 bytes of
+	 * output
+	 */
+	/* For Base64URL without padding, we need to round up to the nearest 4 */
+	size_t		adjusted_len = srclen;
+
+	if (srclen % 4 != 0)
+		adjusted_len += 4 - (srclen % 4);
+
+	return (adjusted_len * 3) / 4;
+}
+
+static uint64
+pg_base64url_encode(const char *src, size_t len, char *dst)
+{
+	return pg_base64_encode_internal(src, len, dst, true);
+}
+
+static uint64
+pg_base64url_decode(const char *src, size_t len, char *dst)
+{
+	return pg_base64_decode_internal(src, len, dst, true);
+}
+
 /*
  * Escape
  * Minimally escape bytea to text.
@@ -606,6 +704,12 @@ static const struct
 			pg_base64_enc_len, pg_base64_dec_len, pg_base64_encode, pg_base64_decode
 		}
 	},
+	{
+		"base64url",
+		{
+			pg_base64url_enc_len, pg_base64url_dec_len, pg_base64url_encode, pg_base64url_decode
+		}
+	},
 	{
 		"escape",
 		{
diff --git a/src/test/regress/expected/strings.out b/src/test/regress/expected/strings.out
index 788844abd20..ae5da7bde82 100644
--- a/src/test/regress/expected/strings.out
+++ b/src/test/regress/expected/strings.out
@@ -2462,6 +2462,156 @@ SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
  \x1234567890abcdef00
 (1 row)
 
+--
+-- Base64URL encoding/decoding
+--
+SET bytea_output TO hex;
+-- Simple encoding/decoding
+SELECT encode('\x69b73eff', 'base64url');  -- abc-_w
+ encode 
+--------
+ abc-_w
+(1 row)
+
+SELECT decode('abc-_w', 'base64url');      -- \x69b73eff
+   decode   
+------------
+ \x69b73eff
+(1 row)
+
+-- Round-trip: decode(encode(x)) = x
+SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url');  -- \x1234567890abcdef00
+        decode        
+----------------------
+ \x1234567890abcdef00
+(1 row)
+
+-- Empty input
+SELECT encode('', 'base64url');  -- ''
+ encode 
+--------
+ 
+(1 row)
+
+SELECT decode('', 'base64url');  -- ''
+ decode 
+--------
+ \x
+(1 row)
+
+-- 1 byte input
+SELECT encode('\x01', 'base64url');  -- AQ
+ encode 
+--------
+ AQ
+(1 row)
+
+SELECT decode('AQ', 'base64url');    -- \x01
+ decode 
+--------
+ \x01
+(1 row)
+
+-- 2 byte input
+SELECT encode('\x0102'::bytea, 'base64url');  -- AQI
+ encode 
+--------
+ AQI
+(1 row)
+
+SELECT decode('AQI', 'base64url');            -- \x0102
+ decode 
+--------
+ \x0102
+(1 row)
+
+-- 3 byte input (no padding needed)
+SELECT encode('\x010203'::bytea, 'base64url');  -- AQID
+ encode 
+--------
+ AQID
+(1 row)
+
+SELECT decode('AQID', 'base64url');             -- \x010203
+  decode  
+----------
+ \x010203
+(1 row)
+
+-- 4 byte input (results in 6 base64 chars)
+SELECT encode('\xdeadbeef'::bytea, 'base64url');  -- 3q2-7w
+ encode 
+--------
+ 3q2-7w
+(1 row)
+
+SELECT decode('3q2-7w', 'base64url');             -- \xdeadbeef
+   decode   
+------------
+ \xdeadbeef
+(1 row)
+
+-- Round-trip test for all lengths from 0–4
+SELECT encode(decode(encode(E'\\x', 'base64url'), 'base64url'), 'base64url');
+ encode 
+--------
+ 
+(1 row)
+
+SELECT encode(decode(encode(E'\\x00', 'base64url'), 'base64url'), 'base64url');
+ encode 
+--------
+ AA
+(1 row)
+
+SELECT encode(decode(encode(E'\\x0001', 'base64url'), 'base64url'), 'base64url');
+ encode 
+--------
+ AAE
+(1 row)
+
+SELECT encode(decode(encode(E'\\x000102', 'base64url'), 'base64url'), 'base64url');
+ encode 
+--------
+ AAEC
+(1 row)
+
+SELECT encode(decode(encode(E'\\x00010203', 'base64url'), 'base64url'), 'base64url');
+ encode 
+--------
+ AAECAw
+(1 row)
+
+-- Invalid inputs (should ERROR)
+-- invalid character '@'
+SELECT decode('QQ@=', 'base64url');
+ERROR:  invalid symbol "@" found while decoding base64 sequence
+-- missing characters (incomplete group)
+SELECT decode('QQ', 'base64url');  -- ok (1 byte)
+ decode 
+--------
+ \x41
+(1 row)
+
+SELECT decode('QQI', 'base64url'); -- ok (2 bytes)
+ decode 
+--------
+ \x4102
+(1 row)
+
+SELECT decode('QQIDQ', 'base64url'); -- ERROR: invalid base64 end sequence
+ERROR:  invalid base64 end sequence
+HINT:  Input data is missing padding, is truncated, or is otherwise corrupted.
+-- unexpected '=' at start
+SELECT decode('=QQQ', 'base64url');
+ERROR:  unexpected "=" while decoding base64 sequence
+-- valid base64 padding in base64url (optional, but accepted)
+SELECT decode('abc-_w==', 'base64url');  -- should decode to \x69b73eff
+   decode   
+------------
+ \x69b73eff
+(1 row)
+
 --
 -- get_bit/set_bit etc
 --
diff --git a/src/test/regress/sql/strings.sql b/src/test/regress/sql/strings.sql
index 2577a42987d..fb49f564936 100644
--- a/src/test/regress/sql/strings.sql
+++ b/src/test/regress/sql/strings.sql
@@ -774,6 +774,60 @@ SELECT decode(encode(('\x' || repeat('1234567890abcdef0001', 7))::bytea,
 SELECT encode('\x1234567890abcdef00', 'escape');
 SELECT decode(encode('\x1234567890abcdef00', 'escape'), 'escape');
 
+--
+-- Base64URL encoding/decoding
+--
+SET bytea_output TO hex;
+
+-- Simple encoding/decoding
+SELECT encode('\x69b73eff', 'base64url');  -- abc-_w
+SELECT decode('abc-_w', 'base64url');      -- \x69b73eff
+
+-- Round-trip: decode(encode(x)) = x
+SELECT decode(encode('\x1234567890abcdef00', 'base64url'), 'base64url');  -- \x1234567890abcdef00
+
+-- Empty input
+SELECT encode('', 'base64url');  -- ''
+SELECT decode('', 'base64url');  -- ''
+
+-- 1 byte input
+SELECT encode('\x01', 'base64url');  -- AQ
+SELECT decode('AQ', 'base64url');    -- \x01
+
+-- 2 byte input
+SELECT encode('\x0102'::bytea, 'base64url');  -- AQI
+SELECT decode('AQI', 'base64url');            -- \x0102
+
+-- 3 byte input (no padding needed)
+SELECT encode('\x010203'::bytea, 'base64url');  -- AQID
+SELECT decode('AQID', 'base64url');             -- \x010203
+
+-- 4 byte input (results in 6 base64 chars)
+SELECT encode('\xdeadbeef'::bytea, 'base64url');  -- 3q2-7w
+SELECT decode('3q2-7w', 'base64url');             -- \xdeadbeef
+
+-- Round-trip test for all lengths from 0–4
+SELECT encode(decode(encode(E'\\x', 'base64url'), 'base64url'), 'base64url');
+SELECT encode(decode(encode(E'\\x00', 'base64url'), 'base64url'), 'base64url');
+SELECT encode(decode(encode(E'\\x0001', 'base64url'), 'base64url'), 'base64url');
+SELECT encode(decode(encode(E'\\x000102', 'base64url'), 'base64url'), 'base64url');
+SELECT encode(decode(encode(E'\\x00010203', 'base64url'), 'base64url'), 'base64url');
+
+-- Invalid inputs (should ERROR)
+-- invalid character '@'
+SELECT decode('QQ@=', 'base64url');
+
+-- missing characters (incomplete group)
+SELECT decode('QQ', 'base64url');  -- ok (1 byte)
+SELECT decode('QQI', 'base64url'); -- ok (2 bytes)
+SELECT decode('QQIDQ', 'base64url'); -- ERROR: invalid base64 end sequence
+
+-- unexpected '=' at start
+SELECT decode('=QQQ', 'base64url');
+
+-- valid base64 padding in base64url (optional, but accepted)
+SELECT decode('abc-_w==', 'base64url');  -- should decode to \x69b73eff
+
 --
 -- get_bit/set_bit etc
 --
-- 
2.49.0

