This is an automated email from the ASF dual-hosted git repository.

garydgregory pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/commons-codec.git


The following commit(s) were added to refs/heads/master by this push:
     new f890da3c [CODEC-341] Fix Base16 custom alphabet decode table (#434)
f890da3c is described below

commit f890da3cb5320745009160065ffe95aeb23b91a1
Author: OldTruckDriver <[email protected]>
AuthorDate: Thu Jun 18 11:58:19 2026 +1000

    [CODEC-341] Fix Base16 custom alphabet decode table (#434)
    
    * [CODEC-341] Fix Base16 custom alphabet decode table
    
    Derive Base16 decode tables from custom encode tables so a configured codec 
can decode its own output. Reject encode tables that do not contain exactly 16 
unique byte values.
    
    Reviewed-by: OpenAI Codex
    Reviewed-by: Anthropic Claude Code
    
    * Update Base16Test.java
    
    * Remove test clutter.
    
    ---------
    
    Co-authored-by: Gary Gregory <[email protected]>
---
 src/changes/changes.xml                            |  1 +
 .../org/apache/commons/codec/binary/Base16.java    | 51 ++++++++++++++++++----
 .../apache/commons/codec/binary/Base16Test.java    | 33 ++++++++++++++
 3 files changed, 77 insertions(+), 8 deletions(-)

diff --git a/src/changes/changes.xml b/src/changes/changes.xml
index 6bfe8353..f43f0329 100644
--- a/src/changes/changes.xml
+++ b/src/changes/changes.xml
@@ -45,6 +45,7 @@ The <action> type attribute can be add,update,fix,remove.
   <body>
     <release version="1.22.1" date="YYYY-MM-DD" description="This is a feature 
and maintenance release. Java 8 or later is required.">
       <!-- FIX -->
+      <action type="fix" issue="CODEC-341" dev="ggregory" due-to="Ruiqi Dong, 
Gary Gregory">Base16.Builder.setEncodeTable(byte...) can create a codec that 
cannot decode its own output.</action>
       <action type="fix" issue="CODEC-339" dev="ggregory" due-to="Ruiqi Dong, 
Gary Gregory">URLCodec.encodeUrl(BitSet, byte[]) allows custom safe sets to 
emit URL encoding control characters.</action>
       <action type="fix" issue="CODEC-338" dev="ggregory" due-to="Ruiqi Dong, 
Gary Gregory">PercentCodec loses literal '+' when plusForSpace is 
enabled.</action>
       <action type="add" issue="CODEC-337" dev="pkarwasz" due-to="Ruiqi Dong, 
Gary Gregory">Digest ALL reuses System.in, so only the first algorithm sees the 
real input (#431).</action>
diff --git a/src/main/java/org/apache/commons/codec/binary/Base16.java 
b/src/main/java/org/apache/commons/codec/binary/Base16.java
index cd4b8938..c8f94501 100644
--- a/src/main/java/org/apache/commons/codec/binary/Base16.java
+++ b/src/main/java/org/apache/commons/codec/binary/Base16.java
@@ -33,7 +33,7 @@ import org.apache.commons.codec.CodecPolicy;
  * </p>
  * <p>
  * The only additional feature above those specified in RFC 4648 is support 
for working with a lower-case alphabet in addition to the default upper-case
- * alphabet.
+ * alphabet, and configuring a custom 16-byte alphabet with {@link 
Builder#setEncodeTable(byte...)}.
  * </p>
  *
  * @see Base16InputStream
@@ -78,10 +78,18 @@ public class Base16 extends BaseNCodec {
             return new Base16(this);
         }
 
+        /**
+         * Sets the Base16 encode table and derives the matching decode table.
+         *
+         * @param encodeTable 16 unique bytes, null resets to the default 
upper-case table.
+         * @return {@code this} instance.
+         * @throws IllegalArgumentException if {@code encodeTable} does not 
contain 16 unique bytes.
+         */
         @Override
         public Builder setEncodeTable(final byte... encodeTable) {
-            super.setDecodeTableRaw(Arrays.equals(encodeTable, 
LOWER_CASE_ENCODE_TABLE) ? LOWER_CASE_DECODE_TABLE : UPPER_CASE_DECODE_TABLE);
-            return super.setEncodeTable(encodeTable);
+            final byte[] table = encodeTable != null ? encodeTable : 
UPPER_CASE_ENCODE_TABLE;
+            super.setDecodeTableRaw(toDecodeTable(table));
+            return super.setEncodeTable(table);
         }
 
         /**
@@ -91,8 +99,7 @@ public class Base16 extends BaseNCodec {
          * @return {@code this} instance.
          */
         public Builder setLowerCase(final boolean lowerCase) {
-            setEncodeTableRaw(lowerCase ? LOWER_CASE_ENCODE_TABLE : 
UPPER_CASE_ENCODE_TABLE);
-            return asThis();
+            return setEncodeTable(lowerCase ? LOWER_CASE_ENCODE_TABLE : 
UPPER_CASE_ENCODE_TABLE);
         }
 
     }
@@ -152,6 +159,32 @@ public class Base16 extends BaseNCodec {
     /** Mask used to extract 4 bits, used when decoding character. */
     private static final int MASK_4_BITS = 0x0f;
 
+    private static byte[] toDecodeTable(final byte[] encodeTable) {
+        if (Arrays.equals(encodeTable, UPPER_CASE_ENCODE_TABLE)) {
+            return UPPER_CASE_DECODE_TABLE;
+        }
+        if (Arrays.equals(encodeTable, LOWER_CASE_ENCODE_TABLE)) {
+            return LOWER_CASE_DECODE_TABLE;
+        }
+        if (encodeTable.length != 1 << BITS_PER_ENCODED_BYTE) {
+            throw new IllegalArgumentException("Base16 encode table must 
contain 16 entries.");
+        }
+        int max = -1;
+        for (final byte b : encodeTable) {
+            max = Math.max(max, b & 0xff);
+        }
+        final byte[] decodeTable = new byte[max + 1];
+        Arrays.fill(decodeTable, (byte) -1);
+        for (int i = 0; i < encodeTable.length; i++) {
+            final int b = encodeTable[i] & 0xff;
+            if (decodeTable[b] != -1) {
+                throw new IllegalArgumentException("Duplicate value in Base16 
encode table: " + b);
+            }
+            decodeTable[b] = (byte) i;
+        }
+        return decodeTable;
+    }
+
     /**
      * Constructs a new builder.
      *
@@ -241,8 +274,9 @@ public class Base16 extends BaseNCodec {
 
     private int decodeOctet(final byte octet) {
         int decoded = -1;
-        if ((octet & 0xff) < decodeTable.length) {
-            decoded = decodeTable[octet];
+        final int b = octet & 0xff;
+        if (b < decodeTable.length) {
+            decoded = decodeTable[b];
         }
         if (decoded == -1) {
             throw new IllegalArgumentException("Invalid octet in encoded 
value: " + (int) octet);
@@ -282,7 +316,8 @@ public class Base16 extends BaseNCodec {
      */
     @Override
     public boolean isInAlphabet(final byte octet) {
-        return isInAlphabet((byte) (octet & 0xff), decodeTable);
+        final int b = octet & 0xff;
+        return b < decodeTable.length && decodeTable[b] != -1;
     }
 
     /**
diff --git a/src/test/java/org/apache/commons/codec/binary/Base16Test.java 
b/src/test/java/org/apache/commons/codec/binary/Base16Test.java
index 7ef4cc6f..46a356bd 100644
--- a/src/test/java/org/apache/commons/codec/binary/Base16Test.java
+++ b/src/test/java/org/apache/commons/codec/binary/Base16Test.java
@@ -149,6 +149,39 @@ class Base16Test {
         new Base16(false, CodecPolicy.STRICT);
     }
 
+    @Test
+    void testCustomEncodeTableAffectsDecodeTable() {
+        final byte[] encodeTable = 
"0123456789ABCDEF".getBytes(StandardCharsets.US_ASCII);
+        final byte tmp = encodeTable[0];
+        encodeTable[0] = encodeTable[1];
+        encodeTable[1] = tmp;
+        final Base16 base16 = 
Base16.builder().setEncodeTable(encodeTable).get();
+        final byte[] encoded = base16.encode(new byte[] { 1 });
+        assertEquals("10", new String(encoded, StandardCharsets.US_ASCII));
+        assertArrayEquals(new byte[] { 1 }, base16.decode(encoded));
+    }
+
+    @Test
+    void testCustomEncodeTableRejectsDuplicates() {
+        final byte[] encodeTable = 
"00123456789ABCDE".getBytes(StandardCharsets.US_ASCII);
+        assertThrows(IllegalArgumentException.class, () -> 
Base16.builder().setEncodeTable(encodeTable));
+    }
+
+    @Test
+    void testCustomEncodeTableRejectsInvalidLength() {
+        assertThrows(IllegalArgumentException.class,
+                () -> 
Base16.builder().setEncodeTable("0123456789ABCDE".getBytes(StandardCharsets.US_ASCII)));
+    }
+
+    @Test
+    void testBuilderSetLowerCaseDecodesOwnOutput() {
+        final Base16 base16 = Base16.builder().setLowerCase(true).get();
+        final byte[] data = { (byte) 0xab };
+        final byte[] encoded = base16.encode(data);
+        assertEquals("ab", new String(encoded, StandardCharsets.US_ASCII));
+        assertArrayEquals(data, base16.decode(encoded));
+    }
+
     @Test
     void testDecodeSingleBytes() {
         final String encoded = "556E74696C206E6578742074696D6521";

Reply via email to