This is an automated email from the ASF dual-hosted git repository.
garydgregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git
The following commit(s) were added to refs/heads/master by this push:
new f890da3c [CODEC-341] Fix Base16 custom alphabet decode table (#434)
f890da3c is described below
commit f890da3cb5320745009160065ffe95aeb23b91a1
Author: OldTruckDriver <[email protected]>
AuthorDate: Thu Jun 18 11:58:19 2026 +1000
[CODEC-341] Fix Base16 custom alphabet decode table (#434)
* [CODEC-341] Fix Base16 custom alphabet decode table
Derive Base16 decode tables from custom encode tables so a configured codec
can decode its own output. Reject encode tables that do not contain exactly 16
unique byte values.
Reviewed-by: OpenAI Codex
Reviewed-by: Anthropic Claude Code
* Update Base16Test.java
* Remove test clutter.
---------
Co-authored-by: Gary Gregory <[email protected]>
---
src/changes/changes.xml | 1 +
.../org/apache/commons/codec/binary/Base16.java | 51 ++++++++++++++++++----
.../apache/commons/codec/binary/Base16Test.java | 33 ++++++++++++++
3 files changed, 77 insertions(+), 8 deletions(-)
diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 6bfe8353..f43f0329 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -45,6 +45,7 @@ The <action> type attribute can be add,update,fix,remove.
<body>
<release version="1.22.1" date="YYYY-MM-DD" description="This is a feature
and maintenance release. Java 8 or later is required.">
<!-- FIX -->
+ <action type="fix" issue="CODEC-341" dev="ggregory" due-to="Ruiqi Dong,
Gary Gregory">Base16.Builder.setEncodeTable(byte...) can create a codec that
cannot decode its own output.</action>
<action type="fix" issue="CODEC-339" dev="ggregory" due-to="Ruiqi Dong,
Gary Gregory">URLCodec.encodeUrl(BitSet, byte[]) allows custom safe sets to
emit URL encoding control characters.</action>
<action type="fix" issue="CODEC-338" dev="ggregory" due-to="Ruiqi Dong,
Gary Gregory">PercentCodec loses literal '+' when plusForSpace is
enabled.</action>
<action type="add" issue="CODEC-337" dev="pkarwasz" due-to="Ruiqi Dong,
Gary Gregory">Digest ALL reuses System.in, so only the first algorithm sees the
real input (#431).</action>
diff --git a/src/main/java/org/apache/commons/codec/binary/Base16.java
b/src/main/java/org/apache/commons/codec/binary/Base16.java
index cd4b8938..c8f94501 100644
--- a/src/main/java/org/apache/commons/codec/binary/Base16.java
+++ b/src/main/java/org/apache/commons/codec/binary/Base16.java
@@ -33,7 +33,7 @@ import org.apache.commons.codec.CodecPolicy;
* </p>
* <p>
* The only additional feature above those specified in RFC 4648 is support
for working with a lower-case alphabet in addition to the default upper-case
- * alphabet.
+ * alphabet, and configuring a custom 16-byte alphabet with {@link
Builder#setEncodeTable(byte...)}.
* </p>
*
* @see Base16InputStream
@@ -78,10 +78,18 @@ public class Base16 extends BaseNCodec {
return new Base16(this);
}
+ /**
+ * Sets the Base16 encode table and derives the matching decode table.
+ *
+ * @param encodeTable 16 unique bytes, null resets to the default
upper-case table.
+ * @return {@code this} instance.
+ * @throws IllegalArgumentException if {@code encodeTable} does not
contain 16 unique bytes.
+ */
@Override
public Builder setEncodeTable(final byte... encodeTable) {
- super.setDecodeTableRaw(Arrays.equals(encodeTable,
LOWER_CASE_ENCODE_TABLE) ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE);
- return super.setEncodeTable(encodeTable);
+ final byte[] table = encodeTable != null ? encodeTable :
UPPER_CASE_ENCODE_TABLE;
+ super.setDecodeTableRaw(toDecodeTable(table));
+ return super.setEncodeTable(table);
}
/**
@@ -91,8 +99,7 @@ public class Base16 extends BaseNCodec {
* @return {@code this} instance.
*/
public Builder setLowerCase(final boolean lowerCase) {
- setEncodeTableRaw(lowerCase ? LOWER_CASE_ENCODE_TABLE :
UPPER_CASE_ENCODE_TABLE);
- return asThis();
+ return setEncodeTable(lowerCase ? LOWER_CASE_ENCODE_TABLE :
UPPER_CASE_ENCODE_TABLE);
}
}
@@ -152,6 +159,32 @@ public class Base16 extends BaseNCodec {
/** Mask used to extract 4 bits, used when decoding character. */
private static final int MASK_4_BITS = 0x0f;
+ private static byte[] toDecodeTable(final byte[] encodeTable) {
+ if (Arrays.equals(encodeTable, UPPER_CASE_ENCODE_TABLE)) {
+ return UPPER_CASE_DECODE_TABLE;
+ }
+ if (Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE)) {
+ return LOWER_CASE_DECODE_TABLE;
+ }
+ if (encodeTable.length != 1 << BITS_PER_ENCODED_BYTE) {
+ throw new IllegalArgumentException("Base16 encode table must
contain 16 entries.");
+ }
+ int max = -1;
+ for (final byte b : encodeTable) {
+ max = Math.max(max, b & 0xff);
+ }
+ final byte[] decodeTable = new byte[max + 1];
+ Arrays.fill(decodeTable, (byte) -1);
+ for (int i = 0; i < encodeTable.length; i++) {
+ final int b = encodeTable[i] & 0xff;
+ if (decodeTable[b] != -1) {
+ throw new IllegalArgumentException("Duplicate value in Base16
encode table: " + b);
+ }
+ decodeTable[b] = (byte) i;
+ }
+ return decodeTable;
+ }
+
/**
* Constructs a new builder.
*
@@ -241,8 +274,9 @@ public class Base16 extends BaseNCodec {
private int decodeOctet(final byte octet) {
int decoded = -1;
- if ((octet & 0xff) < decodeTable.length) {
- decoded = decodeTable[octet];
+ final int b = octet & 0xff;
+ if (b < decodeTable.length) {
+ decoded = decodeTable[b];
}
if (decoded == -1) {
throw new IllegalArgumentException("Invalid octet in encoded
value: " + (int) octet);
@@ -282,7 +316,8 @@ public class Base16 extends BaseNCodec {
*/
@Override
public boolean isInAlphabet(final byte octet) {
- return isInAlphabet((byte) (octet & 0xff), decodeTable);
+ final int b = octet & 0xff;
+ return b < decodeTable.length && decodeTable[b] != -1;
}
/**
diff --git a/src/test/java/org/apache/commons/codec/binary/Base16Test.java
b/src/test/java/org/apache/commons/codec/binary/Base16Test.java
index 7ef4cc6f..46a356bd 100644
--- a/src/test/java/org/apache/commons/codec/binary/Base16Test.java
+++ b/src/test/java/org/apache/commons/codec/binary/Base16Test.java
@@ -149,6 +149,39 @@ class Base16Test {
new Base16(false, CodecPolicy.STRICT);
}
+ @Test
+ void testCustomEncodeTableAffectsDecodeTable() {
+ final byte[] encodeTable =
"0123456789ABCDEF".getBytes(StandardCharsets.US_ASCII);
+ final byte tmp = encodeTable[0];
+ encodeTable[0] = encodeTable[1];
+ encodeTable[1] = tmp;
+ final Base16 base16 =
Base16.builder().setEncodeTable(encodeTable).get();
+ final byte[] encoded = base16.encode(new byte[] { 1 });
+ assertEquals("10", new String(encoded, StandardCharsets.US_ASCII));
+ assertArrayEquals(new byte[] { 1 }, base16.decode(encoded));
+ }
+
+ @Test
+ void testCustomEncodeTableRejectsDuplicates() {
+ final byte[] encodeTable =
"00123456789ABCDE".getBytes(StandardCharsets.US_ASCII);
+ assertThrows(IllegalArgumentException.class, () ->
Base16.builder().setEncodeTable(encodeTable));
+ }
+
+ @Test
+ void testCustomEncodeTableRejectsInvalidLength() {
+ assertThrows(IllegalArgumentException.class,
+ () ->
Base16.builder().setEncodeTable("0123456789ABCDE".getBytes(StandardCharsets.US_ASCII)));
+ }
+
+ @Test
+ void testBuilderSetLowerCaseDecodesOwnOutput() {
+ final Base16 base16 = Base16.builder().setLowerCase(true).get();
+ final byte[] data = { (byte) 0xab };
+ final byte[] encoded = base16.encode(data);
+ assertEquals("ab", new String(encoded, StandardCharsets.US_ASCII));
+ assertArrayEquals(data, base16.decode(encoded));
+ }
+
@Test
void testDecodeSingleBytes() {
final String encoded = "556E74696C206E6578742074696D6521";