aherbert commented on a change in pull request #46: URL: https://github.com/apache/commons-codec/pull/46#discussion_r442066490
########## File path: src/main/java/org/apache/commons/codec/binary/Base16.java ########## @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.binary; + +import org.apache.commons.codec.CodecPolicy; + +/** + * Provides Base16 encoding and decoding. + * + * <p> + * This class is thread-safe. + * </p> + * + * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a> + * + * @since 1.15 + */ +public class Base16 extends BaseNCodec { + + /** + * BASE16 characters are 4 bits in length. + * They are formed by taking an 8-bit group, + * which is converted into two BASE16 characters. + */ + private static final int BITS_PER_ENCODED_BYTE = 4; + private static final int BYTES_PER_ENCODED_BLOCK = 2; + private static final int BYTES_PER_UNENCODED_BLOCK = 1; + + /** + * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified + * in Table 5 of RFC 4648) into their 4-bit positive integer equivalents. Characters that are not in the Base16 + * alphabet but fall within the bounds of the array are translated to -1. + */ + private static final byte[] UPPER_CASE_DECODE_TABLE = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 + -1, 10, 11, 12, 13, 14, 15 // 40-46 A-F + }; + + /** + * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" + * equivalents as specified in Table 5 of RFC 4648. + */ + private static final byte[] UPPER_CASE_ENCODE_TABLE = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'A', 'B', 'C', 'D', 'E', 'F' + }; + + /** + * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" + * into their 4-bit positive integer equivalents. Characters that are not in the Base16 + * alphabet but fall within the bounds of the array are translated to -1. + */ + private static final byte[] LOWER_CASE_DECODE_TABLE = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f + -1, 10, 11, 12, 13, 14, 15 // 60-66 a-f + }; + + /** + * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" + * lower-case equivalents. + */ + private static final byte[] LOWER_CASE_ENCODE_TABLE = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f' + }; + + /** Mask used to extract 4 bits, used when decoding character. */ + private static final int MASK_4BITS = 0x0f; + + /** + * Decode table to use. + */ + private final byte[] decodeTable; + + /** + * Encode table to use. + */ + private final byte[] encodeTable; + + /** + * Creates a Base16 codec used for decoding and encoding. + */ + public Base16() { + this(false); + } + + /** + * Creates a Base16 codec used for decoding and encoding. + * + * @param lowerCase if {@code true} then use a lower-case Base16 alphabet. + */ + public Base16(final boolean lowerCase) { + this(lowerCase, DECODING_POLICY_DEFAULT); + } + + /** + * Creates a Base16 codec used for decoding and encoding. + * + * @param lowerCase if {@code true} then use a lower-case Base16 alphabet. + * @param decodingPolicy Decoding policy. + */ + public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) { + super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0, + PAD_DEFAULT, decodingPolicy); + if (lowerCase) { + this.encodeTable = LOWER_CASE_ENCODE_TABLE; + this.decodeTable = LOWER_CASE_DECODE_TABLE; + } else { + this.encodeTable = UPPER_CASE_ENCODE_TABLE; + this.decodeTable = UPPER_CASE_DECODE_TABLE; + } + } + + @Override + void decode(final byte[] data, int offset, final int length, final Context context) { + if (context.eof || length < 0) { + context.eof = true; + if (context.ibitWorkArea > 0) { Review comment: `ibitWorkArea != 0` ########## File path: src/test/java/org/apache/commons/codec/binary/HexTest.java ########## @@ -340,6 +341,18 @@ public void testDecodeHexCharArrayOddCharacters5() { checkDecodeHexCharArrayOddCharacters(new char[] { 'A', 'B', 'C', 'D', 'E' }); } + @Test(expected = DecoderException.class) Review comment: IIUC these tests target coverage in new methods added to Hex. If the changes are dropped then these tests can be dropped too. ########## File path: src/test/java/org/apache/commons/codec/binary/Base16Test.java ########## @@ -0,0 +1,625 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.binary; + +import org.apache.commons.codec.CodecPolicy; +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.EncoderException; +import org.apache.commons.lang3.ArrayUtils; +import org.junit.Assume; +import org.junit.Test; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Random; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * Test cases for Base16 class. + * + * @since 1.15 + */ +public class Base16Test { + + private static final Charset CHARSET_UTF8 = StandardCharsets.UTF_8; + + private final Random random = new Random(); + + /** + * @return Returns the random. + */ + public Random getRandom() { + return this.random; + } + + /** + * Test the Base16 implementation + */ + @Test + public void testBase16() { + final String content = "Hello World"; + final byte[] encodedBytes = new Base16().encode(StringUtils.getBytesUtf8(content)); + final String encodedContent = StringUtils.newStringUtf8(encodedBytes); + assertEquals("encoding hello world", "48656C6C6F20576F726C64", encodedContent); + + final byte[] decodedBytes = new Base16().decode(encodedBytes); + final String decodedContent = StringUtils.newStringUtf8(decodedBytes); + assertEquals("decoding hello world", content, decodedContent); + } + + @Test + public void testBase16AtBufferStart() { + testBase16InBuffer(0, 100); + } + + @Test + public void testBase16AtBufferEnd() { + testBase16InBuffer(100, 0); + } + + @Test + public void testBase16AtBufferMiddle() { + testBase16InBuffer(100, 100); + } + + private void testBase16InBuffer(final int startPasSize, final int endPadSize) { + final String content = "Hello World"; + String encodedContent; + final byte[] bytesUtf8 = StringUtils.getBytesUtf8(content); + byte[] buffer = ArrayUtils.addAll(bytesUtf8, new byte[endPadSize]); + buffer = ArrayUtils.addAll(new byte[startPasSize], buffer); + final byte[] encodedBytes = new Base16().encode(buffer, startPasSize, bytesUtf8.length); + encodedContent = StringUtils.newStringUtf8(encodedBytes); + assertEquals("encoding hello world", "48656C6C6F20576F726C64", encodedContent); + } + + /** + * isBase16 throws RuntimeException on some + * non-Base16 bytes + */ + @Test(expected=RuntimeException.class) + public void testCodec68() { + final byte[] x = new byte[] { 'n', 'H', '=', '=', (byte) 0x9c }; + final Base16 b16 = new Base16(); + b16.decode(x); + } + + @Test + public void testConstructors() { + new Base16(); + new Base16(false); + new Base16(true); + new Base16(false, CodecPolicy.LENIENT); + new Base16(false, CodecPolicy.STRICT); + } + + @Test + public void testConstructor_LowerCase() { + final Base16 Base16 = new Base16(true); Review comment: Replace `Base16 Base16` with `Base16 base16` ########## File path: src/main/java/org/apache/commons/codec/binary/BaseNCodec.java ########## @@ -567,7 +567,7 @@ public String encodeToString(final byte[] pArray) { */ protected byte[] ensureBufferSize(final int size, final Context context){ if (context.buffer == null) { - context.buffer = new byte[getDefaultBufferSize()]; + context.buffer = new byte[Math.max(size, getDefaultBufferSize())]; Review comment: Nice spot. Previously it was not relevant as the buffer is expanded continuously inside the decoding loop. Base32 and Base64 can (should) be similarly optimised. Something for another improvement. ########## File path: src/main/java/org/apache/commons/codec/binary/Hex.java ########## @@ -54,13 +54,13 @@ /** * Used to build output as Hex */ - private static final char[] DIGITS_LOWER = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', + static final char[] DIGITS_LOWER = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', Review comment: All the changes to Hex are no longer required since Base16 no longer delegates encoding/decoding to the Hex class. The changes are to either package-private or protected scope and so will not be usable API. There is one public method to decode part of an array: ```java public static int decodeHex(final char[] data, final byte[] out, final int outOffset) ``` This perhaps should have been protected/package private utility method anyway. I think all changes in this class can be removed. ########## File path: src/main/java/org/apache/commons/codec/binary/Base16.java ########## @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.binary; + +import org.apache.commons.codec.CodecPolicy; + +/** + * Provides Base16 encoding and decoding. + * + * <p> + * This class is thread-safe. + * </p> + * + * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a> + * + * @since 1.15 + */ +public class Base16 extends BaseNCodec { + + /** + * BASE16 characters are 4 bits in length. + * They are formed by taking an 8-bit group, + * which is converted into two BASE16 characters. + */ + private static final int BITS_PER_ENCODED_BYTE = 4; + private static final int BYTES_PER_ENCODED_BLOCK = 2; + private static final int BYTES_PER_UNENCODED_BLOCK = 1; + + /** + * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified + * in Table 5 of RFC 4648) into their 4-bit positive integer equivalents. Characters that are not in the Base16 + * alphabet but fall within the bounds of the array are translated to -1. + */ + private static final byte[] UPPER_CASE_DECODE_TABLE = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 + -1, 10, 11, 12, 13, 14, 15 // 40-46 A-F + }; + + /** + * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" + * equivalents as specified in Table 5 of RFC 4648. + */ + private static final byte[] UPPER_CASE_ENCODE_TABLE = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'A', 'B', 'C', 'D', 'E', 'F' + }; + + /** + * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" + * into their 4-bit positive integer equivalents. Characters that are not in the Base16 + * alphabet but fall within the bounds of the array are translated to -1. + */ + private static final byte[] LOWER_CASE_DECODE_TABLE = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f + -1, 10, 11, 12, 13, 14, 15 // 60-66 a-f + }; + + /** + * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" + * lower-case equivalents. + */ + private static final byte[] LOWER_CASE_ENCODE_TABLE = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f' + }; + + /** Mask used to extract 4 bits, used when decoding character. */ + private static final int MASK_4BITS = 0x0f; + + /** + * Decode table to use. + */ + private final byte[] decodeTable; + + /** + * Encode table to use. + */ + private final byte[] encodeTable; + + /** + * Creates a Base16 codec used for decoding and encoding. + */ + public Base16() { + this(false); + } + + /** + * Creates a Base16 codec used for decoding and encoding. + * + * @param lowerCase if {@code true} then use a lower-case Base16 alphabet. + */ + public Base16(final boolean lowerCase) { + this(lowerCase, DECODING_POLICY_DEFAULT); + } + + /** + * Creates a Base16 codec used for decoding and encoding. + * + * @param lowerCase if {@code true} then use a lower-case Base16 alphabet. + * @param decodingPolicy Decoding policy. + */ + public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) { + super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0, + PAD_DEFAULT, decodingPolicy); + if (lowerCase) { + this.encodeTable = LOWER_CASE_ENCODE_TABLE; + this.decodeTable = LOWER_CASE_DECODE_TABLE; + } else { + this.encodeTable = UPPER_CASE_ENCODE_TABLE; + this.decodeTable = UPPER_CASE_DECODE_TABLE; + } + } + + @Override + void decode(final byte[] data, int offset, final int length, final Context context) { + if (context.eof || length < 0) { + context.eof = true; + if (context.ibitWorkArea > 0) { + validateTrailingCharacter(); + } + return; + } + + final int dataLen = Math.min(data.length - offset, length); + final int availableChars = (context.ibitWorkArea > 0 ? 1 : 0) + dataLen; + + // small optimisation to short-cut the rest of this method when it is fed byte-by-byte + if (availableChars == 1 && availableChars == dataLen) { + context.ibitWorkArea = decodeOctet(data[offset]) + 1; // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0 + return; + } + + // we must have an even number of chars to decode + final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1; + + final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context); + + int result; + int i = 0; + if (dataLen < availableChars) { + // we have 1/2 byte from previous invocation to decode + result = (context.ibitWorkArea - 1) << BITS_PER_ENCODED_BYTE; + result |= decodeOctet(data[offset++]); + i = 2; + + buffer[context.pos++] = (byte)result; + + // reset to empty-value for next invocation! + context.ibitWorkArea = 0; + } + + while (i < charsToProcess) { + result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE; + result |= decodeOctet(data[offset++]); + i += 2; + buffer[context.pos++] = (byte)result; + } + + // we have one char of a hex-pair left over + if (i < dataLen) { + context.ibitWorkArea = decodeOctet(data[i]) + 1; // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0 + } + } + + private int decodeOctet(final byte octet) { + int decoded = -1; + if (octet >= 0 && octet < decodeTable.length) { Review comment: Avoid the `>= 0` using `(octet & 0xff) < decodeTable.length` ########## File path: src/main/java/org/apache/commons/codec/binary/Base16.java ########## @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.binary; + +import org.apache.commons.codec.CodecPolicy; + +/** + * Provides Base16 encoding and decoding. + * + * <p> + * This class is thread-safe. + * </p> + * + * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a> + * + * @since 1.15 + */ +public class Base16 extends BaseNCodec { + + /** + * BASE16 characters are 4 bits in length. + * They are formed by taking an 8-bit group, + * which is converted into two BASE16 characters. + */ + private static final int BITS_PER_ENCODED_BYTE = 4; + private static final int BYTES_PER_ENCODED_BLOCK = 2; + private static final int BYTES_PER_UNENCODED_BLOCK = 1; + + /** + * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified + * in Table 5 of RFC 4648) into their 4-bit positive integer equivalents. Characters that are not in the Base16 + * alphabet but fall within the bounds of the array are translated to -1. + */ + private static final byte[] UPPER_CASE_DECODE_TABLE = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 + -1, 10, 11, 12, 13, 14, 15 // 40-46 A-F + }; + + /** + * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" + * equivalents as specified in Table 5 of RFC 4648. + */ + private static final byte[] UPPER_CASE_ENCODE_TABLE = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'A', 'B', 'C', 'D', 'E', 'F' + }; + + /** + * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" + * into their 4-bit positive integer equivalents. Characters that are not in the Base16 + * alphabet but fall within the bounds of the array are translated to -1. + */ + private static final byte[] LOWER_CASE_DECODE_TABLE = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f + -1, 10, 11, 12, 13, 14, 15 // 60-66 a-f + }; + + /** + * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" + * lower-case equivalents. + */ + private static final byte[] LOWER_CASE_ENCODE_TABLE = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f' + }; + + /** Mask used to extract 4 bits, used when decoding character. */ + private static final int MASK_4BITS = 0x0f; + + /** + * Decode table to use. + */ + private final byte[] decodeTable; + + /** + * Encode table to use. + */ + private final byte[] encodeTable; + + /** + * Creates a Base16 codec used for decoding and encoding. + */ + public Base16() { + this(false); + } + + /** + * Creates a Base16 codec used for decoding and encoding. + * + * @param lowerCase if {@code true} then use a lower-case Base16 alphabet. + */ + public Base16(final boolean lowerCase) { + this(lowerCase, DECODING_POLICY_DEFAULT); + } + + /** + * Creates a Base16 codec used for decoding and encoding. + * + * @param lowerCase if {@code true} then use a lower-case Base16 alphabet. + * @param decodingPolicy Decoding policy. + */ + public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) { + super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0, + PAD_DEFAULT, decodingPolicy); + if (lowerCase) { + this.encodeTable = LOWER_CASE_ENCODE_TABLE; + this.decodeTable = LOWER_CASE_DECODE_TABLE; + } else { + this.encodeTable = UPPER_CASE_ENCODE_TABLE; + this.decodeTable = UPPER_CASE_DECODE_TABLE; + } + } + + @Override + void decode(final byte[] data, int offset, final int length, final Context context) { + if (context.eof || length < 0) { + context.eof = true; + if (context.ibitWorkArea > 0) { + validateTrailingCharacter(); + } + return; + } + + final int dataLen = Math.min(data.length - offset, length); + final int availableChars = (context.ibitWorkArea > 0 ? 1 : 0) + dataLen; + + // small optimisation to short-cut the rest of this method when it is fed byte-by-byte + if (availableChars == 1 && availableChars == dataLen) { + context.ibitWorkArea = decodeOctet(data[offset]) + 1; // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0 + return; + } + + // we must have an even number of chars to decode + final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1; + + final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context); + + int result; + int i = 0; + if (dataLen < availableChars) { + // we have 1/2 byte from previous invocation to decode + result = (context.ibitWorkArea - 1) << BITS_PER_ENCODED_BYTE; + result |= decodeOctet(data[offset++]); + i = 2; + + buffer[context.pos++] = (byte)result; + + // reset to empty-value for next invocation! + context.ibitWorkArea = 0; + } + + while (i < charsToProcess) { + result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE; + result |= decodeOctet(data[offset++]); + i += 2; + buffer[context.pos++] = (byte)result; + } + + // we have one char of a hex-pair left over + if (i < dataLen) { + context.ibitWorkArea = decodeOctet(data[i]) + 1; // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0 + } + } + + private int decodeOctet(final byte octet) { + int decoded = -1; + if (octet >= 0 && octet < decodeTable.length) { + decoded = decodeTable[octet]; + } + + if (decoded == -1) { + throw new IllegalArgumentException("Invalid octet in encoded value: " + (int)octet); + } + + return decoded; + } + + @Override + void encode(final byte[] data, final int offset, final int length, final Context context) { + if (context.eof) { + return; + } + + if (length < 0) { + context.eof = true; + return; + } + + final byte[] buffer = ensureBufferSize(length * BYTES_PER_ENCODED_BLOCK, context); + + final int end = offset + length; + for (int i = offset; i < end; i++) { + final int value = data[i]; + final int high = (value >> BITS_PER_ENCODED_BYTE) & MASK_4BITS; + final int low = value & MASK_4BITS; + buffer[context.pos++] = encodeTable[high]; + buffer[context.pos++] = encodeTable[low]; + } + } + + /** + * Returns whether or not the {@code octet} is in the Base16 alphabet. + * + * @param octet The value to test. + * + * @return {@code true} if the value is defined in the the Base16 alphabet {@code false} otherwise. + */ + @Override + public boolean isInAlphabet(final byte octet) { + return octet >= 0 && octet < decodeTable.length && decodeTable[octet] != -1; Review comment: To avoid checking if the octet is not negative you could convert as unsigned: `(octet & 0xff) < decodeTable.length && decodeTable[octet] != -1` ########## File path: src/main/java/org/apache/commons/codec/binary/Base16.java ########## @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.binary; + +import org.apache.commons.codec.CodecPolicy; + +/** + * Provides Base16 encoding and decoding. + * + * <p> + * This class is thread-safe. + * </p> + * + * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a> + * + * @since 1.15 + */ +public class Base16 extends BaseNCodec { + + /** + * BASE16 characters are 4 bits in length. + * They are formed by taking an 8-bit group, + * which is converted into two BASE16 characters. + */ + private static final int BITS_PER_ENCODED_BYTE = 4; + private static final int BYTES_PER_ENCODED_BLOCK = 2; + private static final int BYTES_PER_UNENCODED_BLOCK = 1; + + /** + * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified + * in Table 5 of RFC 4648) into their 4-bit positive integer equivalents. Characters that are not in the Base16 + * alphabet but fall within the bounds of the array are translated to -1. + */ + private static final byte[] UPPER_CASE_DECODE_TABLE = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 + -1, 10, 11, 12, 13, 14, 15 // 40-46 A-F + }; + + /** + * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" + * equivalents as specified in Table 5 of RFC 4648. + */ + private static final byte[] UPPER_CASE_ENCODE_TABLE = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'A', 'B', 'C', 'D', 'E', 'F' + }; + + /** + * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" + * into their 4-bit positive integer equivalents. Characters that are not in the Base16 + * alphabet but fall within the bounds of the array are translated to -1. + */ + private static final byte[] LOWER_CASE_DECODE_TABLE = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f + -1, 10, 11, 12, 13, 14, 15 // 60-66 a-f + }; + + /** + * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" + * lower-case equivalents. + */ + private static final byte[] LOWER_CASE_ENCODE_TABLE = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f' + }; + + /** Mask used to extract 4 bits, used when decoding character. */ + private static final int MASK_4BITS = 0x0f; + + /** + * Decode table to use. + */ + private final byte[] decodeTable; + + /** + * Encode table to use. + */ + private final byte[] encodeTable; + + /** + * Creates a Base16 codec used for decoding and encoding. + */ + public Base16() { + this(false); + } + + /** + * Creates a Base16 codec used for decoding and encoding. + * + * @param lowerCase if {@code true} then use a lower-case Base16 alphabet. + */ + public Base16(final boolean lowerCase) { + this(lowerCase, DECODING_POLICY_DEFAULT); + } + + /** + * Creates a Base16 codec used for decoding and encoding. + * + * @param lowerCase if {@code true} then use a lower-case Base16 alphabet. + * @param decodingPolicy Decoding policy. + */ + public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) { + super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0, + PAD_DEFAULT, decodingPolicy); + if (lowerCase) { + this.encodeTable = LOWER_CASE_ENCODE_TABLE; + this.decodeTable = LOWER_CASE_DECODE_TABLE; + } else { + this.encodeTable = UPPER_CASE_ENCODE_TABLE; + this.decodeTable = UPPER_CASE_DECODE_TABLE; + } + } + + @Override + void decode(final byte[] data, int offset, final int length, final Context context) { + if (context.eof || length < 0) { + context.eof = true; + if (context.ibitWorkArea > 0) { + validateTrailingCharacter(); + } + return; + } + + final int dataLen = Math.min(data.length - offset, length); + final int availableChars = (context.ibitWorkArea > 0 ? 1 : 0) + dataLen; + + // small optimisation to short-cut the rest of this method when it is fed byte-by-byte + if (availableChars == 1 && availableChars == dataLen) { + context.ibitWorkArea = decodeOctet(data[offset]) + 1; // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0 + return; + } + + // we must have an even number of chars to decode + final int charsToProcess = availableChars % BYTES_PER_ENCODED_BLOCK == 0 ? availableChars : availableChars - 1; + + final byte[] buffer = ensureBufferSize(charsToProcess / BYTES_PER_ENCODED_BLOCK, context); + + int result; + int i = 0; + if (dataLen < availableChars) { + // we have 1/2 byte from previous invocation to decode + result = (context.ibitWorkArea - 1) << BITS_PER_ENCODED_BYTE; + result |= decodeOctet(data[offset++]); + i = 2; + + buffer[context.pos++] = (byte)result; + + // reset to empty-value for next invocation! + context.ibitWorkArea = 0; + } + + while (i < charsToProcess) { + result = decodeOctet(data[offset++]) << BITS_PER_ENCODED_BYTE; + result |= decodeOctet(data[offset++]); + i += 2; + buffer[context.pos++] = (byte)result; + } + + // we have one char of a hex-pair left over + if (i < dataLen) { + context.ibitWorkArea = decodeOctet(data[i]) + 1; // store 1/2 byte for next invocation of decode, we offset by +1 as empty-value is 0 + } + } + + private int decodeOctet(final byte octet) { + int decoded = -1; + if (octet >= 0 && octet < decodeTable.length) { + decoded = decodeTable[octet]; + } + + if (decoded == -1) { + throw new IllegalArgumentException("Invalid octet in encoded value: " + (int)octet); + } + + return decoded; + } + + @Override + void encode(final byte[] data, final int offset, final int length, final Context context) { + if (context.eof) { + return; + } + + if (length < 0) { + context.eof = true; + return; + } + + final byte[] buffer = ensureBufferSize(length * BYTES_PER_ENCODED_BLOCK, context); + + final int end = offset + length; + for (int i = offset; i < end; i++) { + final int value = data[i]; + final int high = (value >> BITS_PER_ENCODED_BYTE) & MASK_4BITS; + final int low = value & MASK_4BITS; + buffer[context.pos++] = encodeTable[high]; + buffer[context.pos++] = encodeTable[low]; + } + } + + /** + * Returns whether or not the {@code octet} is in the Base16 alphabet. + * + * @param octet The value to test. + * + * @return {@code true} if the value is defined in the the Base16 alphabet {@code false} otherwise. + */ + @Override + public boolean isInAlphabet(final byte octet) { Review comment: I am not convinced by the two decode tables. Is there any reason that you would not want to support case-insensitive decoding? Currently if I want to decode some hex encoded data I have to know before decoding if the hex uses upper or lower case and create the correct decoder. Base32 supports case insensitive decoding when used in Hex mode. Base64 supports decoding of both URL-safe and standard encoding by default. The Hex class which delegates to `Character.digit(ch, 16)` will handle mixed case input hex. The upside of forcing a case is that data encoded using both upper and lower case characters will be reported as invalid. Is this something in RFC 4648? ########## File path: src/test/java/org/apache/commons/codec/binary/Base16Test.java ########## @@ -0,0 +1,625 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.binary; + +import org.apache.commons.codec.CodecPolicy; +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.EncoderException; +import org.apache.commons.lang3.ArrayUtils; +import org.junit.Assume; +import org.junit.Test; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Random; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * Test cases for Base16 class. + * + * @since 1.15 + */ +public class Base16Test { + + private static final Charset CHARSET_UTF8 = StandardCharsets.UTF_8; + + private final Random random = new Random(); + + /** + * @return Returns the random. + */ + public Random getRandom() { + return this.random; + } + + /** + * Test the Base16 implementation + */ + @Test + public void testBase16() { + final String content = "Hello World"; + final byte[] encodedBytes = new Base16().encode(StringUtils.getBytesUtf8(content)); + final String encodedContent = StringUtils.newStringUtf8(encodedBytes); + assertEquals("encoding hello world", "48656C6C6F20576F726C64", encodedContent); + + final byte[] decodedBytes = new Base16().decode(encodedBytes); + final String decodedContent = StringUtils.newStringUtf8(decodedBytes); + assertEquals("decoding hello world", content, decodedContent); + } + + @Test + public void testBase16AtBufferStart() { + testBase16InBuffer(0, 100); + } + + @Test + public void testBase16AtBufferEnd() { + testBase16InBuffer(100, 0); + } + + @Test + public void testBase16AtBufferMiddle() { + testBase16InBuffer(100, 100); + } + + private void testBase16InBuffer(final int startPasSize, final int endPadSize) { + final String content = "Hello World"; + String encodedContent; + final byte[] bytesUtf8 = StringUtils.getBytesUtf8(content); + byte[] buffer = ArrayUtils.addAll(bytesUtf8, new byte[endPadSize]); + buffer = ArrayUtils.addAll(new byte[startPasSize], buffer); + final byte[] encodedBytes = new Base16().encode(buffer, startPasSize, bytesUtf8.length); + encodedContent = StringUtils.newStringUtf8(encodedBytes); + assertEquals("encoding hello world", "48656C6C6F20576F726C64", encodedContent); + } + + /** + * isBase16 throws RuntimeException on some + * non-Base16 bytes + */ + @Test(expected=RuntimeException.class) + public void testCodec68() { + final byte[] x = new byte[] { 'n', 'H', '=', '=', (byte) 0x9c }; + final Base16 b16 = new Base16(); + b16.decode(x); + } + + @Test + public void testConstructors() { + new Base16(); + new Base16(false); + new Base16(true); + new Base16(false, CodecPolicy.LENIENT); + new Base16(false, CodecPolicy.STRICT); + } + + @Test + public void testConstructor_LowerCase() { + final Base16 Base16 = new Base16(true); + final byte[] encoded = Base16.encode(Base16TestData.DECODED); + final String expectedResult = Base16TestData.ENCODED_UTF8_LOWERCASE; + final String result = StringUtils.newStringUtf8(encoded); + assertEquals("new Base16(true)", expectedResult, result); + } + + @Test + public void testConstructor_LowerCase_DecodingPolicy() { + final Base16 Base16 = new Base16(false, CodecPolicy.STRICT); + final byte[] encoded = Base16.encode(Base16TestData.DECODED); + final String expectedResult = Base16TestData.ENCODED_UTF8_UPPERCASE; + final String result = StringUtils.newStringUtf8(encoded); + assertEquals("new Base16(false, CodecPolicy.STRICT)", result, expectedResult); + } + + /** + * Test encode and decode of empty byte array. + */ + @Test + public void testEmptyBase16() { + byte[] empty = new byte[0]; + byte[] result = new Base16().encode(empty); + assertEquals("empty Base16 encode", 0, result.length); + assertEquals("empty Base16 encode", null, new Base16().encode(null)); + + empty = new byte[0]; + result = new Base16().decode(empty); + assertEquals("empty Base16 decode", 0, result.length); + assertEquals("empty Base16 encode", null, new Base16().decode((byte[]) null)); + } + + // encode/decode a large random array + @Test + public void testEncodeDecodeRandom() { + for (int i = 1; i < 5; i++) { + final int len = this.getRandom().nextInt(10000) + 1; + final byte[] data = new byte[len]; + this.getRandom().nextBytes(data); + final byte[] enc = new Base16().encode(data); + final byte[] data2 = new Base16().decode(enc); + assertArrayEquals(data, data2); + } + } + + // encode/decode random arrays from size 0 to size 11 + @Test + public void testEncodeDecodeSmall() { + for (int i = 0; i < 12; i++) { + final byte[] data = new byte[i]; + this.getRandom().nextBytes(data); + final byte[] enc = new Base16().encode(data); + final byte[] data2 = new Base16().decode(enc); + assertArrayEquals(toString(data) + " equals " + toString(data2), data, data2); + } + } + + @Test + public void testKnownDecodings() { + assertEquals("The quick brown fox jumped over the lazy dogs.", new String(new Base16(true).decode( + "54686520717569636b2062726f776e20666f78206a756d706564206f76657220746865206c617a7920646f67732e".getBytes(CHARSET_UTF8)))); + assertEquals("It was the best of times, it was the worst of times.", new String(new Base16(true).decode( + "497420776173207468652062657374206f662074696d65732c206974207761732074686520776f727374206f662074696d65732e".getBytes(CHARSET_UTF8)))); + assertEquals("http://jakarta.apache.org/commmons", new String( + new Base16(true).decode("687474703a2f2f6a616b617274612e6170616368652e6f72672f636f6d6d6d6f6e73".getBytes(CHARSET_UTF8)))); + assertEquals("AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz", new String(new Base16(true).decode( + "4161426243634464456546664767486849694a6a4b6b4c6c4d6d4e6e4f6f50705171527253735474557556765777587859795a7a".getBytes(CHARSET_UTF8)))); + assertEquals("{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }", + new String(new Base16(true).decode("7b20302c20312c20322c20332c20342c20352c20362c20372c20382c2039207d".getBytes(CHARSET_UTF8)))); + assertEquals("xyzzy!", new String(new Base16(true).decode("78797a7a7921".getBytes(CHARSET_UTF8)))); + } + + @Test + public void testKnownEncodings() { + assertEquals("54686520717569636b2062726f776e20666f78206a756d706564206f76657220746865206c617a7920646f67732e", new String( + new Base16(true).encode("The quick brown fox jumped over the lazy dogs.".getBytes(CHARSET_UTF8)))); + assertEquals("497420776173207468652062657374206f662074696d65732c206974207761732074686520776f727374206f662074696d65732e", new String( + new Base16(true).encode("It was the best of times, it was the worst of times.".getBytes(CHARSET_UTF8)))); + assertEquals("687474703a2f2f6a616b617274612e6170616368652e6f72672f636f6d6d6d6f6e73", + new String(new Base16(true).encode("http://jakarta.apache.org/commmons".getBytes(CHARSET_UTF8)))); + assertEquals("4161426243634464456546664767486849694a6a4b6b4c6c4d6d4e6e4f6f50705171527253735474557556765777587859795a7a", new String( + new Base16(true).encode("AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz".getBytes(CHARSET_UTF8)))); + assertEquals("7b20302c20312c20322c20332c20342c20352c20362c20372c20382c2039207d", + new String(new Base16(true).encode("{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }".getBytes(CHARSET_UTF8)))); + assertEquals("78797a7a7921", new String(new Base16(true).encode("xyzzy!".getBytes(CHARSET_UTF8)))); + } + + @Test + public void testNonBase16Test() { + final byte[] invalidEncodedChars = { '/', ':', '@', 'G', '%', '`', 'g' }; + + final byte[] encoded = new byte[1]; + for (final byte invalidEncodedChar : invalidEncodedChars) { + try { + encoded[0] = invalidEncodedChar; + new Base16().decode(encoded); + fail("IllegalArgumentException should have been thrown when trying to decode invalid Base16 char: " + (char)invalidEncodedChar); + } catch (final Exception e) { + assertTrue(e instanceof IllegalArgumentException); + } + } + } + + @Test + public void testObjectDecodeWithInvalidParameter() { + final Base16 b16 = new Base16(); + + try { + b16.decode(Integer.valueOf(5)); + fail("decode(Object) didn't throw an exception when passed an Integer object"); + } catch (final DecoderException e) { + // ignored + } + + } + + @Test + public void testObjectDecodeWithValidParameter() throws Exception { + final String original = "Hello World!"; + final Object o = new Base16().encode(original.getBytes(CHARSET_UTF8)); + + final Base16 b16 = new Base16(); + final Object oDecoded = b16.decode(o); + final byte[] baDecoded = (byte[]) oDecoded; + final String dest = new String(baDecoded); + + assertEquals("dest string does not equal original", original, dest); + } + + @Test + public void testObjectEncodeWithInvalidParameter() { + final Base16 b16 = new Base16(); + try { + b16.encode("Yadayadayada"); + fail("encode(Object) didn't throw an exception when passed a String object"); + } catch (final EncoderException e) { + // Expected + } + } + + @Test + public void testObjectEncodeWithValidParameter() throws Exception { + final String original = "Hello World!"; + final Object origObj = original.getBytes(CHARSET_UTF8); + + final Object oEncoded = new Base16().encode(origObj); + final byte[] bArray = new Base16().decode((byte[]) oEncoded); + final String dest = new String(bArray); + + assertEquals("dest string does not equal original", original, dest); + } + + @Test + public void testObjectEncode() { + final Base16 b16 = new Base16(); + assertEquals("48656C6C6F20576F726C64", new String(b16.encode("Hello World".getBytes(CHARSET_UTF8)))); + } + + @Test + public void testPairs() { + assertEquals("0000", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0 }))); + assertEquals("0001", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 1 }))); + assertEquals("0002", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 2 }))); + assertEquals("0003", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 3 }))); + assertEquals("0004", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 4 }))); + assertEquals("0005", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 5 }))); + assertEquals("0006", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 6 }))); + assertEquals("0007", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 7 }))); + assertEquals("0008", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 8 }))); + assertEquals("0009", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 9 }))); + assertEquals("000A", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 10 }))); + assertEquals("000B", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 11 }))); + assertEquals("000C", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 12 }))); + assertEquals("000D", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 13 }))); + assertEquals("000E", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 14 }))); + assertEquals("000F", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 15 }))); + assertEquals("0010", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 16 }))); + assertEquals("0011", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 17 }))); + for (int i = -128; i <= 127; i++) { + final byte test[] = { (byte) i, (byte) i }; + assertArrayEquals(test, new Base16().decode(new Base16().encode(test))); + } + } + + @Test + public void testSingletons() { + assertEquals("00", new String(new Base16().encode(new byte[] { (byte) 0 }))); + assertEquals("01", new String(new Base16().encode(new byte[] { (byte) 1 }))); + assertEquals("02", new String(new Base16().encode(new byte[] { (byte) 2 }))); + assertEquals("03", new String(new Base16().encode(new byte[] { (byte) 3 }))); + assertEquals("04", new String(new Base16().encode(new byte[] { (byte) 4 }))); + assertEquals("05", new String(new Base16().encode(new byte[] { (byte) 5 }))); + assertEquals("06", new String(new Base16().encode(new byte[] { (byte) 6 }))); + assertEquals("07", new String(new Base16().encode(new byte[] { (byte) 7 }))); + assertEquals("08", new String(new Base16().encode(new byte[] { (byte) 8 }))); + assertEquals("09", new String(new Base16().encode(new byte[] { (byte) 9 }))); + assertEquals("0A", new String(new Base16().encode(new byte[] { (byte) 10 }))); + assertEquals("0B", new String(new Base16().encode(new byte[] { (byte) 11 }))); + assertEquals("0C", new String(new Base16().encode(new byte[] { (byte) 12 }))); + assertEquals("0D", new String(new Base16().encode(new byte[] { (byte) 13 }))); + assertEquals("0E", new String(new Base16().encode(new byte[] { (byte) 14 }))); + assertEquals("0F", new String(new Base16().encode(new byte[] { (byte) 15 }))); + assertEquals("10", new String(new Base16().encode(new byte[] { (byte) 16 }))); + assertEquals("11", new String(new Base16().encode(new byte[] { (byte) 17 }))); + assertEquals("12", new String(new Base16().encode(new byte[] { (byte) 18 }))); + assertEquals("13", new String(new Base16().encode(new byte[] { (byte) 19 }))); + assertEquals("14", new String(new Base16().encode(new byte[] { (byte) 20 }))); + assertEquals("15", new String(new Base16().encode(new byte[] { (byte) 21 }))); + assertEquals("16", new String(new Base16().encode(new byte[] { (byte) 22 }))); + assertEquals("17", new String(new Base16().encode(new byte[] { (byte) 23 }))); + assertEquals("18", new String(new Base16().encode(new byte[] { (byte) 24 }))); + assertEquals("19", new String(new Base16().encode(new byte[] { (byte) 25 }))); + assertEquals("1A", new String(new Base16().encode(new byte[] { (byte) 26 }))); + assertEquals("1B", new String(new Base16().encode(new byte[] { (byte) 27 }))); + assertEquals("1C", new String(new Base16().encode(new byte[] { (byte) 28 }))); + assertEquals("1D", new String(new Base16().encode(new byte[] { (byte) 29 }))); + assertEquals("1E", new String(new Base16().encode(new byte[] { (byte) 30 }))); + assertEquals("1F", new String(new Base16().encode(new byte[] { (byte) 31 }))); + assertEquals("20", new String(new Base16().encode(new byte[] { (byte) 32 }))); + assertEquals("21", new String(new Base16().encode(new byte[] { (byte) 33 }))); + assertEquals("22", new String(new Base16().encode(new byte[] { (byte) 34 }))); + assertEquals("23", new String(new Base16().encode(new byte[] { (byte) 35 }))); + assertEquals("24", new String(new Base16().encode(new byte[] { (byte) 36 }))); + assertEquals("25", new String(new Base16().encode(new byte[] { (byte) 37 }))); + assertEquals("26", new String(new Base16().encode(new byte[] { (byte) 38 }))); + assertEquals("27", new String(new Base16().encode(new byte[] { (byte) 39 }))); + assertEquals("28", new String(new Base16().encode(new byte[] { (byte) 40 }))); + assertEquals("29", new String(new Base16().encode(new byte[] { (byte) 41 }))); + assertEquals("2A", new String(new Base16().encode(new byte[] { (byte) 42 }))); + assertEquals("2B", new String(new Base16().encode(new byte[] { (byte) 43 }))); + assertEquals("2C", new String(new Base16().encode(new byte[] { (byte) 44 }))); + assertEquals("2D", new String(new Base16().encode(new byte[] { (byte) 45 }))); + assertEquals("2E", new String(new Base16().encode(new byte[] { (byte) 46 }))); + assertEquals("2F", new String(new Base16().encode(new byte[] { (byte) 47 }))); + assertEquals("30", new String(new Base16().encode(new byte[] { (byte) 48 }))); + assertEquals("31", new String(new Base16().encode(new byte[] { (byte) 49 }))); + assertEquals("32", new String(new Base16().encode(new byte[] { (byte) 50 }))); + assertEquals("33", new String(new Base16().encode(new byte[] { (byte) 51 }))); + assertEquals("34", new String(new Base16().encode(new byte[] { (byte) 52 }))); + assertEquals("35", new String(new Base16().encode(new byte[] { (byte) 53 }))); + assertEquals("36", new String(new Base16().encode(new byte[] { (byte) 54 }))); + assertEquals("37", new String(new Base16().encode(new byte[] { (byte) 55 }))); + assertEquals("38", new String(new Base16().encode(new byte[] { (byte) 56 }))); + assertEquals("39", new String(new Base16().encode(new byte[] { (byte) 57 }))); + assertEquals("3A", new String(new Base16().encode(new byte[] { (byte) 58 }))); + assertEquals("3B", new String(new Base16().encode(new byte[] { (byte) 59 }))); + assertEquals("3C", new String(new Base16().encode(new byte[] { (byte) 60 }))); + assertEquals("3D", new String(new Base16().encode(new byte[] { (byte) 61 }))); + assertEquals("3E", new String(new Base16().encode(new byte[] { (byte) 62 }))); + assertEquals("3F", new String(new Base16().encode(new byte[] { (byte) 63 }))); + assertEquals("40", new String(new Base16().encode(new byte[] { (byte) 64 }))); + assertEquals("41", new String(new Base16().encode(new byte[] { (byte) 65 }))); + assertEquals("42", new String(new Base16().encode(new byte[] { (byte) 66 }))); + assertEquals("43", new String(new Base16().encode(new byte[] { (byte) 67 }))); + assertEquals("44", new String(new Base16().encode(new byte[] { (byte) 68 }))); + assertEquals("45", new String(new Base16().encode(new byte[] { (byte) 69 }))); + assertEquals("46", new String(new Base16().encode(new byte[] { (byte) 70 }))); + assertEquals("47", new String(new Base16().encode(new byte[] { (byte) 71 }))); + assertEquals("48", new String(new Base16().encode(new byte[] { (byte) 72 }))); + assertEquals("49", new String(new Base16().encode(new byte[] { (byte) 73 }))); + assertEquals("4A", new String(new Base16().encode(new byte[] { (byte) 74 }))); + assertEquals("4B", new String(new Base16().encode(new byte[] { (byte) 75 }))); + assertEquals("4C", new String(new Base16().encode(new byte[] { (byte) 76 }))); + assertEquals("4D", new String(new Base16().encode(new byte[] { (byte) 77 }))); + assertEquals("4E", new String(new Base16().encode(new byte[] { (byte) 78 }))); + assertEquals("4F", new String(new Base16().encode(new byte[] { (byte) 79 }))); + assertEquals("50", new String(new Base16().encode(new byte[] { (byte) 80 }))); + assertEquals("51", new String(new Base16().encode(new byte[] { (byte) 81 }))); + assertEquals("52", new String(new Base16().encode(new byte[] { (byte) 82 }))); + assertEquals("53", new String(new Base16().encode(new byte[] { (byte) 83 }))); + assertEquals("54", new String(new Base16().encode(new byte[] { (byte) 84 }))); + assertEquals("55", new String(new Base16().encode(new byte[] { (byte) 85 }))); + assertEquals("56", new String(new Base16().encode(new byte[] { (byte) 86 }))); + assertEquals("57", new String(new Base16().encode(new byte[] { (byte) 87 }))); + assertEquals("58", new String(new Base16().encode(new byte[] { (byte) 88 }))); + assertEquals("59", new String(new Base16().encode(new byte[] { (byte) 89 }))); + assertEquals("5A", new String(new Base16().encode(new byte[] { (byte) 90 }))); + assertEquals("5B", new String(new Base16().encode(new byte[] { (byte) 91 }))); + assertEquals("5C", new String(new Base16().encode(new byte[] { (byte) 92 }))); + assertEquals("5D", new String(new Base16().encode(new byte[] { (byte) 93 }))); + assertEquals("5E", new String(new Base16().encode(new byte[] { (byte) 94 }))); + assertEquals("5F", new String(new Base16().encode(new byte[] { (byte) 95 }))); + assertEquals("60", new String(new Base16().encode(new byte[] { (byte) 96 }))); + assertEquals("61", new String(new Base16().encode(new byte[] { (byte) 97 }))); + assertEquals("62", new String(new Base16().encode(new byte[] { (byte) 98 }))); + assertEquals("63", new String(new Base16().encode(new byte[] { (byte) 99 }))); + assertEquals("64", new String(new Base16().encode(new byte[] { (byte) 100 }))); + assertEquals("65", new String(new Base16().encode(new byte[] { (byte) 101 }))); + assertEquals("66", new String(new Base16().encode(new byte[] { (byte) 102 }))); + assertEquals("67", new String(new Base16().encode(new byte[] { (byte) 103 }))); + assertEquals("68", new String(new Base16().encode(new byte[] { (byte) 104 }))); + for (int i = -128; i <= 127; i++) { + final byte test[] = { (byte) i }; + assertTrue(Arrays.equals(test, new Base16().decode(new Base16().encode(test)))); + } + } + + @Test + public void testTriplets() { + assertEquals("000000", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 0 }))); + assertEquals("000001", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 1 }))); + assertEquals("000002", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 2 }))); + assertEquals("000003", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 3 }))); + assertEquals("000004", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 4 }))); + assertEquals("000005", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 5 }))); + assertEquals("000006", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 6 }))); + assertEquals("000007", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 7 }))); + assertEquals("000008", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 8 }))); + assertEquals("000009", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 9 }))); + assertEquals("00000A", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 10 }))); + assertEquals("00000B", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 11 }))); + assertEquals("00000C", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 12 }))); + assertEquals("00000D", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 13 }))); + assertEquals("00000E", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 14 }))); + assertEquals("00000F", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 15 }))); + } + + @Test + public void testByteToStringVariations() throws DecoderException { + final Base16 Base16 = new Base16(); + final byte[] b1 = StringUtils.getBytesUtf8("Hello World"); + final byte[] b2 = new byte[0]; + final byte[] b3 = null; + + assertEquals("byteToString Hello World", "48656C6C6F20576F726C64", Base16.encodeToString(b1)); + assertEquals("byteToString static Hello World", "48656C6C6F20576F726C64", StringUtils.newStringUtf8(new Base16().encode(b1))); + assertEquals("byteToString \"\"", "", Base16.encodeToString(b2)); + assertEquals("byteToString static \"\"", "", StringUtils.newStringUtf8(new Base16().encode(b2))); + assertEquals("byteToString null", null, Base16.encodeToString(b3)); + assertEquals("byteToString static null", null, StringUtils.newStringUtf8(new Base16().encode(b3))); + } + + @Test + public void testStringToByteVariations() throws DecoderException { + final Base16 Base16 = new Base16(); + final String s1 = "48656C6C6F20576F726C64"; + final String s2 = ""; + final String s3 = null; + + assertEquals("StringToByte Hello World", "Hello World", StringUtils.newStringUtf8(Base16.decode(s1))); + assertEquals("StringToByte Hello World", "Hello World", + StringUtils.newStringUtf8((byte[]) new Base16().decode((Object) s1))); + assertEquals("StringToByte static Hello World", "Hello World", + StringUtils.newStringUtf8(new Base16().decode(s1))); + assertEquals("StringToByte \"\"", "", StringUtils.newStringUtf8(new Base16().decode(s2))); + assertEquals("StringToByte static \"\"", "", StringUtils.newStringUtf8(new Base16().decode(s2))); + assertEquals("StringToByte null", null, StringUtils.newStringUtf8(new Base16().decode(s3))); + assertEquals("StringToByte static null", null, StringUtils.newStringUtf8(new Base16().decode(s3))); + } + + private String toString(final byte[] data) { + final StringBuilder buf = new StringBuilder(); + for (int i = 0; i < data.length; i++) { + buf.append(data[i]); + if (i != data.length - 1) { + buf.append(","); + } + } + return buf.toString(); + } + + /** + * Test for CODEC-265: Encode a ~1GiB file. + * + * @see <a href="https://issues.apache.org/jira/projects/CODEC/issues/CODEC-265">CODEC-265</a> + */ + public void testCodec265_over() { + // almost 1GiB file to encode: 2^29 bytes + final int size1GiB = 1 << 29; + + // Expecting a size of 2 output bytes per 1 input byte + final int blocks = size1GiB; + final int expectedLength = 2 * blocks; + + // This test is memory hungry. Check we can run it. + final long presumableFreeMemory = BaseNCodecTest.getPresumableFreeMemory(); + + // Estimate the maximum memory required: + // 1GiB + 1GiB + ~2GiB + ~1.33GiB + 32 KiB = ~5.33GiB + // + // 1GiB: Input buffer to encode + // 1GiB: Existing working buffer (due to doubling of default buffer size of 8192) + // ~2GiB: New working buffer to allocate (due to doubling) + // ~1.33GiB: Expected output size (since the working buffer is copied at the end) + // 32KiB: Some head room + final long estimatedMemory = (long) size1GiB * 4 + expectedLength + 32 * 1024; + Assume.assumeTrue("Not enough free memory for the test", presumableFreeMemory > estimatedMemory); + + final byte[] bytes = new byte[size1GiB]; + final byte[] encoded = new Base16().encode(bytes); + assertEquals(expectedLength, encoded.length); + } + + @Test + public void testIsInAlphabet() { + // invalid bounds + Base16 b16 = new Base16(true); + assertFalse(b16.isInAlphabet((byte)0)); + assertFalse(b16.isInAlphabet((byte)1)); + assertFalse(b16.isInAlphabet((byte)-1)); + assertFalse(b16.isInAlphabet((byte)-15)); + assertFalse(b16.isInAlphabet((byte)-16)); + assertFalse(b16.isInAlphabet((byte)128)); + assertFalse(b16.isInAlphabet((byte)255)); + + // lower-case + b16 = new Base16(true); + for (char c = '0'; c <= '9'; c++) { + assertTrue(b16.isInAlphabet((byte) c)); + } + for (char c = 'a'; c <= 'f'; c++) { + assertTrue(b16.isInAlphabet((byte) c)); + } + for (char c = 'A'; c <= 'F'; c++) { + assertFalse(b16.isInAlphabet((byte) c)); + } + assertFalse(b16.isInAlphabet((byte) ('0' - 1))); + assertFalse(b16.isInAlphabet((byte) ('9' + 1))); + assertFalse(b16.isInAlphabet((byte) ('a' - 1))); + assertFalse(b16.isInAlphabet((byte) ('z' + 1))); + + // upper-case + b16 = new Base16(false); + for (char c = '0'; c <= '9'; c++) { + assertTrue(b16.isInAlphabet((byte) c)); + } + for (char c = 'a'; c <= 'f'; c++) { + assertFalse(b16.isInAlphabet((byte) c)); + } + for (char c = 'A'; c <= 'F'; c++) { + assertTrue(b16.isInAlphabet((byte) c)); + } + assertFalse(b16.isInAlphabet((byte) ('0' - 1))); + assertFalse(b16.isInAlphabet((byte) ('9' + 1))); + assertFalse(b16.isInAlphabet((byte) ('A' - 1))); + assertFalse(b16.isInAlphabet((byte) ('F' + 1))); + } + + @Test + public void testDecodeSingleBytes() { + final String encoded = "556E74696C206E6578742074696D6521"; + + final BaseNCodec.Context context = new BaseNCodec.Context(); + final Base16 b16 = new Base16(); + + final byte[] encocdedBytes = StringUtils.getBytesUtf8(encoded); + + // decode byte-by-byte + b16.decode(encocdedBytes, 0, 1, context); + b16.decode(encocdedBytes, 1, 1, context); // yields "U" + b16.decode(encocdedBytes, 2, 1, context); + b16.decode(encocdedBytes, 3, 1, context); // yields "n" + + // decode split hex-pairs + b16.decode(encocdedBytes, 4, 3, context); // yields "t" + b16.decode(encocdedBytes, 7, 3, context); // yields "il" + b16.decode(encocdedBytes, 10, 3, context); // yields " " + + // decode remaining + b16.decode(encocdedBytes, 13, 19, context); // yields "next time!" + + final byte[] decodedBytes = new byte[context.pos]; + System.arraycopy(context.buffer, context.readPos, decodedBytes, 0, decodedBytes.length); + final String decoded = StringUtils.newStringUtf8(decodedBytes); + + assertEquals("Until next time!", decoded); + } + + @Test + public void testDecodeSingleBytesOptimisation() { + final BaseNCodec.Context context = new BaseNCodec.Context(); + context.ibitWorkArea = 0; Review comment: The codec relies on the fact that a new Context created in `BaseNCodec.decode(byte[])` has a zero work area. So perhaps this should be checked. I'd also change the -17 to the hex byte representation for clarity. ```java final BaseNCodec.Context context = new BaseNCodec.Context(); assertEquals(0, context.ibitWorkArea); final byte[] data = new byte[1]; final Base16 b16 = new Base16(); data[0] = (byte) 'E'; b16.decode(data, 0, 1, context); assertEquals(15, context.ibitWorkArea); data[0] = (byte) 'F'; b16.decode(data, 0, 1, context); assertEquals(0, context.ibitWorkArea); assertEquals((byte) 0xEF, context.buffer[0]); ``` ########## File path: src/main/java/org/apache/commons/codec/binary/Base16.java ########## @@ -0,0 +1,251 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.binary; + +import org.apache.commons.codec.CodecPolicy; + +/** + * Provides Base16 encoding and decoding. + * + * <p> + * This class is thread-safe. + * </p> + * + * @see <a href="https://tools.ietf.org/html/rfc4648#section-8">RFC 4648 - 8. Base 16 Encoding</a> + * + * @since 1.15 + */ +public class Base16 extends BaseNCodec { + + /** + * BASE16 characters are 4 bits in length. + * They are formed by taking an 8-bit group, + * which is converted into two BASE16 characters. + */ + private static final int BITS_PER_ENCODED_BYTE = 4; + private static final int BYTES_PER_ENCODED_BLOCK = 2; + private static final int BYTES_PER_UNENCODED_BLOCK = 1; + + /** + * This array is a lookup table that translates Unicode characters drawn from the "Base16 Alphabet" (as specified + * in Table 5 of RFC 4648) into their 4-bit positive integer equivalents. Characters that are not in the Base16 + * alphabet but fall within the bounds of the array are translated to -1. + */ + private static final byte[] UPPER_CASE_DECODE_TABLE = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 + -1, 10, 11, 12, 13, 14, 15 // 40-46 A-F + }; + + /** + * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" + * equivalents as specified in Table 5 of RFC 4648. + */ + private static final byte[] UPPER_CASE_ENCODE_TABLE = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'A', 'B', 'C', 'D', 'E', 'F' + }; + + /** + * This array is a lookup table that translates Unicode characters drawn from the a lower-case "Base16 Alphabet" + * into their 4-bit positive integer equivalents. Characters that are not in the Base16 + * alphabet but fall within the bounds of the array are translated to -1. + */ + private static final byte[] LOWER_CASE_DECODE_TABLE = { + // 0 1 2 3 4 5 6 7 8 9 A B C D E F + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 00-0f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 10-1f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 20-2f + 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, -1, -1, -1, -1, -1, -1, // 30-3f 0-9 + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 40-4f + -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, // 50-5f + -1, 10, 11, 12, 13, 14, 15 // 60-66 a-f + }; + + /** + * This array is a lookup table that translates 4-bit positive integer index values into their "Base16 Alphabet" + * lower-case equivalents. + */ + private static final byte[] LOWER_CASE_ENCODE_TABLE = { + '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', + 'a', 'b', 'c', 'd', 'e', 'f' + }; + + /** Mask used to extract 4 bits, used when decoding character. */ + private static final int MASK_4BITS = 0x0f; + + /** + * Decode table to use. + */ + private final byte[] decodeTable; + + /** + * Encode table to use. + */ + private final byte[] encodeTable; + + /** + * Creates a Base16 codec used for decoding and encoding. + */ + public Base16() { + this(false); + } + + /** + * Creates a Base16 codec used for decoding and encoding. + * + * @param lowerCase if {@code true} then use a lower-case Base16 alphabet. + */ + public Base16(final boolean lowerCase) { + this(lowerCase, DECODING_POLICY_DEFAULT); + } + + /** + * Creates a Base16 codec used for decoding and encoding. + * + * @param lowerCase if {@code true} then use a lower-case Base16 alphabet. + * @param decodingPolicy Decoding policy. + */ + public Base16(final boolean lowerCase, final CodecPolicy decodingPolicy) { + super(BYTES_PER_UNENCODED_BLOCK, BYTES_PER_ENCODED_BLOCK, 0, 0, + PAD_DEFAULT, decodingPolicy); + if (lowerCase) { + this.encodeTable = LOWER_CASE_ENCODE_TABLE; + this.decodeTable = LOWER_CASE_DECODE_TABLE; + } else { + this.encodeTable = UPPER_CASE_ENCODE_TABLE; + this.decodeTable = UPPER_CASE_DECODE_TABLE; + } + } + + @Override + void decode(final byte[] data, int offset, final int length, final Context context) { + if (context.eof || length < 0) { + context.eof = true; + if (context.ibitWorkArea > 0) { + validateTrailingCharacter(); + } + return; + } + + final int dataLen = Math.min(data.length - offset, length); + final int availableChars = (context.ibitWorkArea > 0 ? 1 : 0) + dataLen; Review comment: Could change to `ibitWorkArea != 0` ########## File path: src/test/java/org/apache/commons/codec/binary/Base16TestData.java ########## @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.binary; + +import java.io.ByteArrayOutputStream; +import java.io.IOException; +import java.io.InputStream; +import java.util.Random; + +/** + * This random data was encoded by OpenSSL. Java had nothing to do with it. This data helps us test interop between + * Commons-Codec and OpenSSL. + * + * @since 1.15 + */ +public class Base16TestData { + + final static String ENCODED_UTF8_LOWERCASE + = "f483cd2b052f74b888029e9cb73d764a2426386b2d5b262f48f79ebee7c386bcdac2ceb9be8ca42a36c88f7dd85936bdc40edcfc51f2a56725ad9850ee89df737244f77049e5f4f847dcc011d8db8f2d61bf8658367113e1625e5cc2c9ff9a7ea81a53b0fa5ea56f03355632d5cd36ff5c320be92003a0af45477d712aff96df3c00476c4d5e063029f5f84c2e02261d8afc6ece7f9c2ccf2ada37b0aa5239dad3fd27b0acf2fa86ef5b3af960042cabe6fd4a2fbf268e8be39d3147e343424b88b907bbaa7d3b0520bd0aa20cacc4bff02e828d1d4cf67360613208fe4656b95edd041d81c8881e7a5d7785544cf19151b5de29b87ef94ddf6c922783fb105ccdc8601c8c67bc6df47592d4b51c45d43b3ebc27fc89505b138c7aaf8a6494a802f896b5db1ead7c876c811afff866af8a7f8fcd24d20f6adf986ad5ac8633df7ce002d349a67c59ec856d9c750b10bf428a9ff76507ff2941467489367e2c4b4a1adee51b36f3a7a640780fd57b52df5ab629c226bc3ec2225c325fbd929db9d48531046038714c61d1e6b19373835a7c08f79124b6658e2b00923f4c635b020cc438f2830006e51f1f93d1fd6d58b5b6131abe6e270dce2f68da1213546764d6306e25159553ccf44725bc959359dea2816780d0cc47000f2238ce559e6aa9fd618ced40eae7dac121d3500a879325a02412d02e2cbe73a2039ae5ef8ccd58ef0793184253a569a4ed42b440a576b967855f11a9eef542b67e2d53f2324f142d8f99779bc69d1b9cef955be6e0c8474822421009a66ad4c2960b72ae88e4bd049d6deced00d892892afa04e1436e9735764c6082f8a0279a346a401a979499a08c7400a0735928e99afee2103acbdf0e7aa271874399fc8262e57cf478c0d47f2d88327ce557ecac51dd71b3c0a0d0a98bae5ac094877dd3ced4d0ce34e2bef92ed8d155d4828618ac157863b61b0bcc20fe7a9c8a6af7966ded472a7e42c80acdd4446df3519341793cbc8b7ec50495ea63b3541d5777c45752c9b62cb0037b9b54d40947185b6316bf0bf5f2daac699babef74bf76977ab571c7afdf921c68e892c7b226fbf1b157695529d68ebd85460c3d730517ba6360b041bf69d3cf25dfff77a86479e7ddcd0a2b8f3d672c0d6c144a13351325b414d5f517c6e7cc79d88a3a3213f8dfe2cfe5f5b05def225d456442ca287659cca902238812406c1fe7424e06a5bbcb110e8321e1920105d8077e8654c9c2ea45e4052d96784a5e334a6cede6f43140584429bf7e7dfff8adbd4a028eb089f7a783155f22c6b66f99635f302a5ecec990fbe60ba7daed7e19667751a246b1625bb7720f0e57eb86ffa6001d98a5a3c6add0ea6490d74d16e870b8696406aad928d523fe04a2618734ea017faffcfa896023a572515b5aaadc22d95dd64567f56ba977958ed380bb60"; + + final static String ENCODED_UTF8_UPPERCASE + = "F483CD2B052F74B888029E9CB73D764A2426386B2D5B262F48F79EBEE7C386BCDAC2CEB9BE8CA42A36C88F7DD85936BDC40EDCFC51F2A56725AD9850EE89DF737244F77049E5F4F847DCC011D8DB8F2D61BF8658367113E1625E5CC2C9FF9A7EA81A53B0FA5EA56F03355632D5CD36FF5C320BE92003A0AF45477D712AFF96DF3C00476C4D5E063029F5F84C2E02261D8AFC6ECE7F9C2CCF2ADA37B0AA5239DAD3FD27B0ACF2FA86EF5B3AF960042CABE6FD4A2FBF268E8BE39D3147E343424B88B907BBAA7D3B0520BD0AA20CACC4BFF02E828D1D4CF67360613208FE4656B95EDD041D81C8881E7A5D7785544CF19151B5DE29B87EF94DDF6C922783FB105CCDC8601C8C67BC6DF47592D4B51C45D43B3EBC27FC89505B138C7AAF8A6494A802F896B5DB1EAD7C876C811AFFF866AF8A7F8FCD24D20F6ADF986AD5AC8633DF7CE002D349A67C59EC856D9C750B10BF428A9FF76507FF2941467489367E2C4B4A1ADEE51B36F3A7A640780FD57B52DF5AB629C226BC3EC2225C325FBD929DB9D48531046038714C61D1E6B19373835A7C08F79124B6658E2B00923F4C635B020CC438F2830006E51F1F93D1FD6D58B5B6131ABE6E270DCE2F68DA1213546764D6306E25159553CCF44725BC959359DEA2816780D0CC47000F2238CE559E6AA9FD618CED40EAE7DAC121D3500A879325A02412D02E2CBE73A2039AE5EF8CCD58EF0793184253A569A4ED42B440A576B967855F11A9EEF542B67E2D53F2324F142D8F99779BC69D1B9CEF955BE6E0C8474822421009A66AD4C2960B72AE88E4BD049D6DECED00D892892AFA04E1436E9735764C6082F8A0279A346A401A979499A08C7400A0735928E99AFEE2103ACBDF0E7AA271874399FC8262E57CF478C0D47F2D88327CE557ECAC51DD71B3C0A0D0A98BAE5AC094877DD3CED4D0CE34E2BEF92ED8D155D4828618AC157863B61B0BCC20FE7A9C8A6AF7966DED472A7E42C80ACDD4446DF3519341793CBC8B7EC50495EA63B3541D5777C45752C9B62CB0037B9B54D40947185B6316BF0BF5F2DAAC699BABEF74BF76977AB571C7AFDF921C68E892C7B226FBF1B157695529D68EBD85460C3D730517BA6360B041BF69D3CF25DFFF77A86479E7DDCD0A2B8F3D672C0D6C144A13351325B414D5F517C6E7CC79D88A3A3213F8DFE2CFE5F5B05DEF225D456442CA287659CCA902238812406C1FE7424E06A5BBCB110E8321E1920105D8077E8654C9C2EA45E4052D96784A5E334A6CEDE6F43140584429BF7E7DFFF8ADBD4A028EB089F7A783155F22C6B66F99635F302A5ECEC990FBE60BA7DAED7E19667751A246B1625BB7720F0E57EB86FFA6001D98A5A3C6ADD0EA6490D74D16E870B8696406AAD928D523FE04A2618734EA017FAFFCFA896023A572515B5AAADC22D95DD64567F56BA977958ED380BB60"; + + final static byte[] DECODED + = {-12, -125, -51, 43, 5, 47, 116, -72, -120, 2, -98, -100, -73, 61, 118, 74, 36, 38, 56, 107, 45, 91, 38, + 47, 72, -9, -98, -66, -25, -61, -122, -68, -38, -62, -50, -71, -66, -116, -92, 42, 54, -56, -113, 125, + -40, 89, 54, -67, -60, 14, -36, -4, 81, -14, -91, 103, 37, -83, -104, 80, -18, -119, -33, 115, 114, 68, + -9, 112, 73, -27, -12, -8, 71, -36, -64, 17, -40, -37, -113, 45, 97, -65, -122, 88, 54, 113, 19, -31, 98, + 94, 92, -62, -55, -1, -102, 126, -88, 26, 83, -80, -6, 94, -91, 111, 3, 53, 86, 50, -43, -51, 54, -1, 92, + 50, 11, -23, 32, 3, -96, -81, 69, 71, 125, 113, 42, -1, -106, -33, 60, 0, 71, 108, 77, 94, 6, 48, 41, -11, + -8, 76, 46, 2, 38, 29, -118, -4, 110, -50, 127, -100, 44, -49, 42, -38, 55, -80, -86, 82, 57, -38, -45, + -3, 39, -80, -84, -14, -6, -122, -17, 91, 58, -7, 96, 4, 44, -85, -26, -3, 74, 47, -65, 38, -114, -117, + -29, -99, 49, 71, -29, 67, 66, 75, -120, -71, 7, -69, -86, 125, 59, 5, 32, -67, 10, -94, 12, -84, -60, -65, + -16, 46, -126, -115, 29, 76, -10, 115, 96, 97, 50, 8, -2, 70, 86, -71, 94, -35, 4, 29, -127, -56, -120, + 30, 122, 93, 119, -123, 84, 76, -15, -111, 81, -75, -34, 41, -72, 126, -7, 77, -33, 108, -110, 39, -125, + -5, 16, 92, -51, -56, 96, 28, -116, 103, -68, 109, -12, 117, -110, -44, -75, 28, 69, -44, 59, 62, -68, + 39, -4, -119, 80, 91, 19, -116, 122, -81, -118, 100, -108, -88, 2, -8, -106, -75, -37, 30, -83, 124, -121, + 108, -127, 26, -1, -8, 102, -81, -118, 127, -113, -51, 36, -46, 15, 106, -33, -104, 106, -43, -84, -122, + 51, -33, 124, -32, 2, -45, 73, -90, 124, 89, -20, -123, 109, -100, 117, 11, 16, -65, 66, -118, -97, -9, + 101, 7, -1, 41, 65, 70, 116, -119, 54, 126, 44, 75, 74, 26, -34, -27, 27, 54, -13, -89, -90, 64, 120, 15, + -43, 123, 82, -33, 90, -74, 41, -62, 38, -68, 62, -62, 34, 92, 50, 95, -67, -110, -99, -71, -44, -123, + 49, 4, 96, 56, 113, 76, 97, -47, -26, -79, -109, 115, -125, 90, 124, 8, -9, -111, 36, -74, 101, -114, 43, + 0, -110, 63, 76, 99, 91, 2, 12, -60, 56, -14, -125, 0, 6, -27, 31, 31, -109, -47, -3, 109, 88, -75, -74, + 19, 26, -66, 110, 39, 13, -50, 47, 104, -38, 18, 19, 84, 103, 100, -42, 48, 110, 37, 21, -107, 83, -52, + -12, 71, 37, -68, -107, -109, 89, -34, -94, -127, 103, -128, -48, -52, 71, 0, 15, 34, 56, -50, 85, -98, + 106, -87, -3, 97, -116, -19, 64, -22, -25, -38, -63, 33, -45, 80, 10, -121, -109, 37, -96, 36, 18, -48, + 46, 44, -66, 115, -94, 3, -102, -27, -17, -116, -51, 88, -17, 7, -109, 24, 66, 83, -91, 105, -92, -19, + 66, -76, 64, -91, 118, -71, 103, -123, 95, 17, -87, -18, -11, 66, -74, 126, 45, 83, -14, 50, 79, 20, 45, + -113, -103, 119, -101, -58, -99, 27, -100, -17, -107, 91, -26, -32, -56, 71, 72, 34, 66, 16, 9, -90, 106, + -44, -62, -106, 11, 114, -82, -120, -28, -67, 4, -99, 109, -20, -19, 0, -40, -110, -119, 42, -6, 4, -31, + 67, 110, -105, 53, 118, 76, 96, -126, -8, -96, 39, -102, 52, 106, 64, 26, -105, -108, -103, -96, -116, + 116, 0, -96, 115, 89, 40, -23, -102, -2, -30, 16, 58, -53, -33, 14, 122, -94, 113, -121, 67, -103, -4, + -126, 98, -27, 124, -12, 120, -64, -44, 127, 45, -120, 50, 124, -27, 87, -20, -84, 81, -35, 113, -77, + -64, -96, -48, -87, -117, -82, 90, -64, -108, -121, 125, -45, -50, -44, -48, -50, 52, -30, -66, -7, 46, + -40, -47, 85, -44, -126, -122, 24, -84, 21, 120, 99, -74, 27, 11, -52, 32, -2, 122, -100, -118, 106, -9, + -106, 109, -19, 71, 42, 126, 66, -56, 10, -51, -44, 68, 109, -13, 81, -109, 65, 121, 60, -68, -117, 126, + -59, 4, -107, -22, 99, -77, 84, 29, 87, 119, -60, 87, 82, -55, -74, 44, -80, 3, 123, -101, 84, -44, 9, 71, + 24, 91, 99, 22, -65, 11, -11, -14, -38, -84, 105, -101, -85, -17, 116, -65, 118, -105, 122, -75, 113, + -57, -81, -33, -110, 28, 104, -24, -110, -57, -78, 38, -5, -15, -79, 87, 105, 85, 41, -42, -114, -67, + -123, 70, 12, 61, 115, 5, 23, -70, 99, 96, -80, 65, -65, 105, -45, -49, 37, -33, -1, 119, -88, 100, 121, + -25, -35, -51, 10, 43, -113, 61, 103, 44, 13, 108, 20, 74, 19, 53, 19, 37, -76, 20, -43, -11, 23, -58, -25, + -52, 121, -40, -118, 58, 50, 19, -8, -33, -30, -49, -27, -11, -80, 93, -17, 34, 93, 69, 100, 66, -54, 40, + 118, 89, -52, -87, 2, 35, -120, 18, 64, 108, 31, -25, 66, 78, 6, -91, -69, -53, 17, 14, -125, 33, -31, -110, + 1, 5, -40, 7, 126, -122, 84, -55, -62, -22, 69, -28, 5, 45, -106, 120, 74, 94, 51, 74, 108, -19, -26, -12, + 49, 64, 88, 68, 41, -65, 126, 125, -1, -8, -83, -67, 74, 2, -114, -80, -119, -9, -89, -125, 21, 95, 34, + -58, -74, 111, -103, 99, 95, 48, 42, 94, -50, -55, -112, -5, -26, 11, -89, -38, -19, 126, 25, 102, 119, + 81, -94, 70, -79, 98, 91, -73, 114, 15, 14, 87, -21, -122, -1, -90, 0, 29, -104, -91, -93, -58, -83, -48, + -22, 100, -112, -41, 77, 22, -24, 112, -72, 105, 100, 6, -86, -39, 40, -43, 35, -2, 4, -94, 97, -121, 52, + -22, 1, 127, -81, -4, -6, -119, 96, 35, -91, 114, 81, 91, 90, -86, -36, 34, -39, 93, -42, 69, 103, -11, + 107, -87, 119, -107, -114, -45, -128, -69, 96}; + + static byte[] streamToBytes(final InputStream is) throws IOException { + final ByteArrayOutputStream os = new ByteArrayOutputStream(); + final byte[] buf = new byte[4096]; + int read; + while ((read = is.read(buf)) > -1) { + os.write(buf, 0, read); + } + return os.toByteArray(); + } + + /** + * Returns an encoded and decoded copy of the same random data. + * + * @param size amount of random data to generate and encode + * @return two byte[] arrays: [0] = decoded, [1] = encoded + */ + static byte[][] randomData(final int size) { + final Random r = new Random(); + final byte[] decoded = new byte[size]; + r.nextBytes(decoded); Review comment: Although only test data any use of Random should be discouraged. ThreadLocalRandom or SplittableRandom are far more robust statistically and also faster (due to lack of synchronisation). ``` ThreadLocalRandom.current().nextBytes(decoded); ``` ########## File path: src/test/java/org/apache/commons/codec/binary/Base16Test.java ########## @@ -0,0 +1,625 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.commons.codec.binary; + +import org.apache.commons.codec.CodecPolicy; +import org.apache.commons.codec.DecoderException; +import org.apache.commons.codec.EncoderException; +import org.apache.commons.lang3.ArrayUtils; +import org.junit.Assume; +import org.junit.Test; + +import java.nio.charset.Charset; +import java.nio.charset.StandardCharsets; +import java.util.Arrays; +import java.util.Random; + +import static org.junit.Assert.assertArrayEquals; +import static org.junit.Assert.assertEquals; +import static org.junit.Assert.assertFalse; +import static org.junit.Assert.assertTrue; +import static org.junit.Assert.fail; + +/** + * Test cases for Base16 class. + * + * @since 1.15 + */ +public class Base16Test { + + private static final Charset CHARSET_UTF8 = StandardCharsets.UTF_8; + + private final Random random = new Random(); + + /** + * @return Returns the random. + */ + public Random getRandom() { + return this.random; + } + + /** + * Test the Base16 implementation + */ + @Test + public void testBase16() { + final String content = "Hello World"; + final byte[] encodedBytes = new Base16().encode(StringUtils.getBytesUtf8(content)); + final String encodedContent = StringUtils.newStringUtf8(encodedBytes); + assertEquals("encoding hello world", "48656C6C6F20576F726C64", encodedContent); + + final byte[] decodedBytes = new Base16().decode(encodedBytes); + final String decodedContent = StringUtils.newStringUtf8(decodedBytes); + assertEquals("decoding hello world", content, decodedContent); + } + + @Test + public void testBase16AtBufferStart() { + testBase16InBuffer(0, 100); + } + + @Test + public void testBase16AtBufferEnd() { + testBase16InBuffer(100, 0); + } + + @Test + public void testBase16AtBufferMiddle() { + testBase16InBuffer(100, 100); + } + + private void testBase16InBuffer(final int startPasSize, final int endPadSize) { + final String content = "Hello World"; + String encodedContent; + final byte[] bytesUtf8 = StringUtils.getBytesUtf8(content); + byte[] buffer = ArrayUtils.addAll(bytesUtf8, new byte[endPadSize]); + buffer = ArrayUtils.addAll(new byte[startPasSize], buffer); + final byte[] encodedBytes = new Base16().encode(buffer, startPasSize, bytesUtf8.length); + encodedContent = StringUtils.newStringUtf8(encodedBytes); + assertEquals("encoding hello world", "48656C6C6F20576F726C64", encodedContent); + } + + /** + * isBase16 throws RuntimeException on some + * non-Base16 bytes + */ + @Test(expected=RuntimeException.class) + public void testCodec68() { + final byte[] x = new byte[] { 'n', 'H', '=', '=', (byte) 0x9c }; + final Base16 b16 = new Base16(); + b16.decode(x); + } + + @Test + public void testConstructors() { + new Base16(); + new Base16(false); + new Base16(true); + new Base16(false, CodecPolicy.LENIENT); + new Base16(false, CodecPolicy.STRICT); + } + + @Test + public void testConstructor_LowerCase() { + final Base16 Base16 = new Base16(true); + final byte[] encoded = Base16.encode(Base16TestData.DECODED); + final String expectedResult = Base16TestData.ENCODED_UTF8_LOWERCASE; + final String result = StringUtils.newStringUtf8(encoded); + assertEquals("new Base16(true)", expectedResult, result); + } + + @Test + public void testConstructor_LowerCase_DecodingPolicy() { + final Base16 Base16 = new Base16(false, CodecPolicy.STRICT); + final byte[] encoded = Base16.encode(Base16TestData.DECODED); + final String expectedResult = Base16TestData.ENCODED_UTF8_UPPERCASE; + final String result = StringUtils.newStringUtf8(encoded); + assertEquals("new Base16(false, CodecPolicy.STRICT)", result, expectedResult); + } + + /** + * Test encode and decode of empty byte array. + */ + @Test + public void testEmptyBase16() { + byte[] empty = new byte[0]; + byte[] result = new Base16().encode(empty); + assertEquals("empty Base16 encode", 0, result.length); + assertEquals("empty Base16 encode", null, new Base16().encode(null)); + + empty = new byte[0]; + result = new Base16().decode(empty); + assertEquals("empty Base16 decode", 0, result.length); + assertEquals("empty Base16 encode", null, new Base16().decode((byte[]) null)); + } + + // encode/decode a large random array + @Test + public void testEncodeDecodeRandom() { + for (int i = 1; i < 5; i++) { + final int len = this.getRandom().nextInt(10000) + 1; + final byte[] data = new byte[len]; + this.getRandom().nextBytes(data); + final byte[] enc = new Base16().encode(data); + final byte[] data2 = new Base16().decode(enc); + assertArrayEquals(data, data2); + } + } + + // encode/decode random arrays from size 0 to size 11 + @Test + public void testEncodeDecodeSmall() { + for (int i = 0; i < 12; i++) { + final byte[] data = new byte[i]; + this.getRandom().nextBytes(data); + final byte[] enc = new Base16().encode(data); + final byte[] data2 = new Base16().decode(enc); + assertArrayEquals(toString(data) + " equals " + toString(data2), data, data2); + } + } + + @Test + public void testKnownDecodings() { + assertEquals("The quick brown fox jumped over the lazy dogs.", new String(new Base16(true).decode( + "54686520717569636b2062726f776e20666f78206a756d706564206f76657220746865206c617a7920646f67732e".getBytes(CHARSET_UTF8)))); + assertEquals("It was the best of times, it was the worst of times.", new String(new Base16(true).decode( + "497420776173207468652062657374206f662074696d65732c206974207761732074686520776f727374206f662074696d65732e".getBytes(CHARSET_UTF8)))); + assertEquals("http://jakarta.apache.org/commmons", new String( + new Base16(true).decode("687474703a2f2f6a616b617274612e6170616368652e6f72672f636f6d6d6d6f6e73".getBytes(CHARSET_UTF8)))); + assertEquals("AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz", new String(new Base16(true).decode( + "4161426243634464456546664767486849694a6a4b6b4c6c4d6d4e6e4f6f50705171527253735474557556765777587859795a7a".getBytes(CHARSET_UTF8)))); + assertEquals("{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }", + new String(new Base16(true).decode("7b20302c20312c20322c20332c20342c20352c20362c20372c20382c2039207d".getBytes(CHARSET_UTF8)))); + assertEquals("xyzzy!", new String(new Base16(true).decode("78797a7a7921".getBytes(CHARSET_UTF8)))); + } + + @Test + public void testKnownEncodings() { + assertEquals("54686520717569636b2062726f776e20666f78206a756d706564206f76657220746865206c617a7920646f67732e", new String( + new Base16(true).encode("The quick brown fox jumped over the lazy dogs.".getBytes(CHARSET_UTF8)))); + assertEquals("497420776173207468652062657374206f662074696d65732c206974207761732074686520776f727374206f662074696d65732e", new String( + new Base16(true).encode("It was the best of times, it was the worst of times.".getBytes(CHARSET_UTF8)))); + assertEquals("687474703a2f2f6a616b617274612e6170616368652e6f72672f636f6d6d6d6f6e73", + new String(new Base16(true).encode("http://jakarta.apache.org/commmons".getBytes(CHARSET_UTF8)))); + assertEquals("4161426243634464456546664767486849694a6a4b6b4c6c4d6d4e6e4f6f50705171527253735474557556765777587859795a7a", new String( + new Base16(true).encode("AaBbCcDdEeFfGgHhIiJjKkLlMmNnOoPpQqRrSsTtUuVvWwXxYyZz".getBytes(CHARSET_UTF8)))); + assertEquals("7b20302c20312c20322c20332c20342c20352c20362c20372c20382c2039207d", + new String(new Base16(true).encode("{ 0, 1, 2, 3, 4, 5, 6, 7, 8, 9 }".getBytes(CHARSET_UTF8)))); + assertEquals("78797a7a7921", new String(new Base16(true).encode("xyzzy!".getBytes(CHARSET_UTF8)))); + } + + @Test + public void testNonBase16Test() { + final byte[] invalidEncodedChars = { '/', ':', '@', 'G', '%', '`', 'g' }; + + final byte[] encoded = new byte[1]; + for (final byte invalidEncodedChar : invalidEncodedChars) { + try { + encoded[0] = invalidEncodedChar; + new Base16().decode(encoded); + fail("IllegalArgumentException should have been thrown when trying to decode invalid Base16 char: " + (char)invalidEncodedChar); + } catch (final Exception e) { + assertTrue(e instanceof IllegalArgumentException); + } + } + } + + @Test + public void testObjectDecodeWithInvalidParameter() { + final Base16 b16 = new Base16(); + + try { + b16.decode(Integer.valueOf(5)); + fail("decode(Object) didn't throw an exception when passed an Integer object"); + } catch (final DecoderException e) { + // ignored + } + + } + + @Test + public void testObjectDecodeWithValidParameter() throws Exception { + final String original = "Hello World!"; + final Object o = new Base16().encode(original.getBytes(CHARSET_UTF8)); + + final Base16 b16 = new Base16(); + final Object oDecoded = b16.decode(o); + final byte[] baDecoded = (byte[]) oDecoded; + final String dest = new String(baDecoded); + + assertEquals("dest string does not equal original", original, dest); + } + + @Test + public void testObjectEncodeWithInvalidParameter() { + final Base16 b16 = new Base16(); + try { + b16.encode("Yadayadayada"); + fail("encode(Object) didn't throw an exception when passed a String object"); + } catch (final EncoderException e) { + // Expected + } + } + + @Test + public void testObjectEncodeWithValidParameter() throws Exception { + final String original = "Hello World!"; + final Object origObj = original.getBytes(CHARSET_UTF8); + + final Object oEncoded = new Base16().encode(origObj); + final byte[] bArray = new Base16().decode((byte[]) oEncoded); + final String dest = new String(bArray); + + assertEquals("dest string does not equal original", original, dest); + } + + @Test + public void testObjectEncode() { + final Base16 b16 = new Base16(); + assertEquals("48656C6C6F20576F726C64", new String(b16.encode("Hello World".getBytes(CHARSET_UTF8)))); + } + + @Test + public void testPairs() { + assertEquals("0000", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0 }))); + assertEquals("0001", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 1 }))); + assertEquals("0002", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 2 }))); + assertEquals("0003", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 3 }))); + assertEquals("0004", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 4 }))); + assertEquals("0005", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 5 }))); + assertEquals("0006", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 6 }))); + assertEquals("0007", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 7 }))); + assertEquals("0008", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 8 }))); + assertEquals("0009", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 9 }))); + assertEquals("000A", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 10 }))); + assertEquals("000B", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 11 }))); + assertEquals("000C", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 12 }))); + assertEquals("000D", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 13 }))); + assertEquals("000E", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 14 }))); + assertEquals("000F", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 15 }))); + assertEquals("0010", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 16 }))); + assertEquals("0011", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 17 }))); + for (int i = -128; i <= 127; i++) { + final byte test[] = { (byte) i, (byte) i }; + assertArrayEquals(test, new Base16().decode(new Base16().encode(test))); + } + } + + @Test + public void testSingletons() { + assertEquals("00", new String(new Base16().encode(new byte[] { (byte) 0 }))); + assertEquals("01", new String(new Base16().encode(new byte[] { (byte) 1 }))); + assertEquals("02", new String(new Base16().encode(new byte[] { (byte) 2 }))); + assertEquals("03", new String(new Base16().encode(new byte[] { (byte) 3 }))); + assertEquals("04", new String(new Base16().encode(new byte[] { (byte) 4 }))); + assertEquals("05", new String(new Base16().encode(new byte[] { (byte) 5 }))); + assertEquals("06", new String(new Base16().encode(new byte[] { (byte) 6 }))); + assertEquals("07", new String(new Base16().encode(new byte[] { (byte) 7 }))); + assertEquals("08", new String(new Base16().encode(new byte[] { (byte) 8 }))); + assertEquals("09", new String(new Base16().encode(new byte[] { (byte) 9 }))); + assertEquals("0A", new String(new Base16().encode(new byte[] { (byte) 10 }))); + assertEquals("0B", new String(new Base16().encode(new byte[] { (byte) 11 }))); + assertEquals("0C", new String(new Base16().encode(new byte[] { (byte) 12 }))); + assertEquals("0D", new String(new Base16().encode(new byte[] { (byte) 13 }))); + assertEquals("0E", new String(new Base16().encode(new byte[] { (byte) 14 }))); + assertEquals("0F", new String(new Base16().encode(new byte[] { (byte) 15 }))); + assertEquals("10", new String(new Base16().encode(new byte[] { (byte) 16 }))); + assertEquals("11", new String(new Base16().encode(new byte[] { (byte) 17 }))); + assertEquals("12", new String(new Base16().encode(new byte[] { (byte) 18 }))); + assertEquals("13", new String(new Base16().encode(new byte[] { (byte) 19 }))); + assertEquals("14", new String(new Base16().encode(new byte[] { (byte) 20 }))); + assertEquals("15", new String(new Base16().encode(new byte[] { (byte) 21 }))); + assertEquals("16", new String(new Base16().encode(new byte[] { (byte) 22 }))); + assertEquals("17", new String(new Base16().encode(new byte[] { (byte) 23 }))); + assertEquals("18", new String(new Base16().encode(new byte[] { (byte) 24 }))); + assertEquals("19", new String(new Base16().encode(new byte[] { (byte) 25 }))); + assertEquals("1A", new String(new Base16().encode(new byte[] { (byte) 26 }))); + assertEquals("1B", new String(new Base16().encode(new byte[] { (byte) 27 }))); + assertEquals("1C", new String(new Base16().encode(new byte[] { (byte) 28 }))); + assertEquals("1D", new String(new Base16().encode(new byte[] { (byte) 29 }))); + assertEquals("1E", new String(new Base16().encode(new byte[] { (byte) 30 }))); + assertEquals("1F", new String(new Base16().encode(new byte[] { (byte) 31 }))); + assertEquals("20", new String(new Base16().encode(new byte[] { (byte) 32 }))); + assertEquals("21", new String(new Base16().encode(new byte[] { (byte) 33 }))); + assertEquals("22", new String(new Base16().encode(new byte[] { (byte) 34 }))); + assertEquals("23", new String(new Base16().encode(new byte[] { (byte) 35 }))); + assertEquals("24", new String(new Base16().encode(new byte[] { (byte) 36 }))); + assertEquals("25", new String(new Base16().encode(new byte[] { (byte) 37 }))); + assertEquals("26", new String(new Base16().encode(new byte[] { (byte) 38 }))); + assertEquals("27", new String(new Base16().encode(new byte[] { (byte) 39 }))); + assertEquals("28", new String(new Base16().encode(new byte[] { (byte) 40 }))); + assertEquals("29", new String(new Base16().encode(new byte[] { (byte) 41 }))); + assertEquals("2A", new String(new Base16().encode(new byte[] { (byte) 42 }))); + assertEquals("2B", new String(new Base16().encode(new byte[] { (byte) 43 }))); + assertEquals("2C", new String(new Base16().encode(new byte[] { (byte) 44 }))); + assertEquals("2D", new String(new Base16().encode(new byte[] { (byte) 45 }))); + assertEquals("2E", new String(new Base16().encode(new byte[] { (byte) 46 }))); + assertEquals("2F", new String(new Base16().encode(new byte[] { (byte) 47 }))); + assertEquals("30", new String(new Base16().encode(new byte[] { (byte) 48 }))); + assertEquals("31", new String(new Base16().encode(new byte[] { (byte) 49 }))); + assertEquals("32", new String(new Base16().encode(new byte[] { (byte) 50 }))); + assertEquals("33", new String(new Base16().encode(new byte[] { (byte) 51 }))); + assertEquals("34", new String(new Base16().encode(new byte[] { (byte) 52 }))); + assertEquals("35", new String(new Base16().encode(new byte[] { (byte) 53 }))); + assertEquals("36", new String(new Base16().encode(new byte[] { (byte) 54 }))); + assertEquals("37", new String(new Base16().encode(new byte[] { (byte) 55 }))); + assertEquals("38", new String(new Base16().encode(new byte[] { (byte) 56 }))); + assertEquals("39", new String(new Base16().encode(new byte[] { (byte) 57 }))); + assertEquals("3A", new String(new Base16().encode(new byte[] { (byte) 58 }))); + assertEquals("3B", new String(new Base16().encode(new byte[] { (byte) 59 }))); + assertEquals("3C", new String(new Base16().encode(new byte[] { (byte) 60 }))); + assertEquals("3D", new String(new Base16().encode(new byte[] { (byte) 61 }))); + assertEquals("3E", new String(new Base16().encode(new byte[] { (byte) 62 }))); + assertEquals("3F", new String(new Base16().encode(new byte[] { (byte) 63 }))); + assertEquals("40", new String(new Base16().encode(new byte[] { (byte) 64 }))); + assertEquals("41", new String(new Base16().encode(new byte[] { (byte) 65 }))); + assertEquals("42", new String(new Base16().encode(new byte[] { (byte) 66 }))); + assertEquals("43", new String(new Base16().encode(new byte[] { (byte) 67 }))); + assertEquals("44", new String(new Base16().encode(new byte[] { (byte) 68 }))); + assertEquals("45", new String(new Base16().encode(new byte[] { (byte) 69 }))); + assertEquals("46", new String(new Base16().encode(new byte[] { (byte) 70 }))); + assertEquals("47", new String(new Base16().encode(new byte[] { (byte) 71 }))); + assertEquals("48", new String(new Base16().encode(new byte[] { (byte) 72 }))); + assertEquals("49", new String(new Base16().encode(new byte[] { (byte) 73 }))); + assertEquals("4A", new String(new Base16().encode(new byte[] { (byte) 74 }))); + assertEquals("4B", new String(new Base16().encode(new byte[] { (byte) 75 }))); + assertEquals("4C", new String(new Base16().encode(new byte[] { (byte) 76 }))); + assertEquals("4D", new String(new Base16().encode(new byte[] { (byte) 77 }))); + assertEquals("4E", new String(new Base16().encode(new byte[] { (byte) 78 }))); + assertEquals("4F", new String(new Base16().encode(new byte[] { (byte) 79 }))); + assertEquals("50", new String(new Base16().encode(new byte[] { (byte) 80 }))); + assertEquals("51", new String(new Base16().encode(new byte[] { (byte) 81 }))); + assertEquals("52", new String(new Base16().encode(new byte[] { (byte) 82 }))); + assertEquals("53", new String(new Base16().encode(new byte[] { (byte) 83 }))); + assertEquals("54", new String(new Base16().encode(new byte[] { (byte) 84 }))); + assertEquals("55", new String(new Base16().encode(new byte[] { (byte) 85 }))); + assertEquals("56", new String(new Base16().encode(new byte[] { (byte) 86 }))); + assertEquals("57", new String(new Base16().encode(new byte[] { (byte) 87 }))); + assertEquals("58", new String(new Base16().encode(new byte[] { (byte) 88 }))); + assertEquals("59", new String(new Base16().encode(new byte[] { (byte) 89 }))); + assertEquals("5A", new String(new Base16().encode(new byte[] { (byte) 90 }))); + assertEquals("5B", new String(new Base16().encode(new byte[] { (byte) 91 }))); + assertEquals("5C", new String(new Base16().encode(new byte[] { (byte) 92 }))); + assertEquals("5D", new String(new Base16().encode(new byte[] { (byte) 93 }))); + assertEquals("5E", new String(new Base16().encode(new byte[] { (byte) 94 }))); + assertEquals("5F", new String(new Base16().encode(new byte[] { (byte) 95 }))); + assertEquals("60", new String(new Base16().encode(new byte[] { (byte) 96 }))); + assertEquals("61", new String(new Base16().encode(new byte[] { (byte) 97 }))); + assertEquals("62", new String(new Base16().encode(new byte[] { (byte) 98 }))); + assertEquals("63", new String(new Base16().encode(new byte[] { (byte) 99 }))); + assertEquals("64", new String(new Base16().encode(new byte[] { (byte) 100 }))); + assertEquals("65", new String(new Base16().encode(new byte[] { (byte) 101 }))); + assertEquals("66", new String(new Base16().encode(new byte[] { (byte) 102 }))); + assertEquals("67", new String(new Base16().encode(new byte[] { (byte) 103 }))); + assertEquals("68", new String(new Base16().encode(new byte[] { (byte) 104 }))); + for (int i = -128; i <= 127; i++) { + final byte test[] = { (byte) i }; + assertTrue(Arrays.equals(test, new Base16().decode(new Base16().encode(test)))); + } + } + + @Test + public void testTriplets() { + assertEquals("000000", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 0 }))); + assertEquals("000001", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 1 }))); + assertEquals("000002", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 2 }))); + assertEquals("000003", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 3 }))); + assertEquals("000004", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 4 }))); + assertEquals("000005", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 5 }))); + assertEquals("000006", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 6 }))); + assertEquals("000007", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 7 }))); + assertEquals("000008", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 8 }))); + assertEquals("000009", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 9 }))); + assertEquals("00000A", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 10 }))); + assertEquals("00000B", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 11 }))); + assertEquals("00000C", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 12 }))); + assertEquals("00000D", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 13 }))); + assertEquals("00000E", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 14 }))); + assertEquals("00000F", new String(new Base16().encode(new byte[] { (byte) 0, (byte) 0, (byte) 15 }))); + } + + @Test + public void testByteToStringVariations() throws DecoderException { + final Base16 Base16 = new Base16(); + final byte[] b1 = StringUtils.getBytesUtf8("Hello World"); + final byte[] b2 = new byte[0]; + final byte[] b3 = null; + + assertEquals("byteToString Hello World", "48656C6C6F20576F726C64", Base16.encodeToString(b1)); + assertEquals("byteToString static Hello World", "48656C6C6F20576F726C64", StringUtils.newStringUtf8(new Base16().encode(b1))); + assertEquals("byteToString \"\"", "", Base16.encodeToString(b2)); + assertEquals("byteToString static \"\"", "", StringUtils.newStringUtf8(new Base16().encode(b2))); + assertEquals("byteToString null", null, Base16.encodeToString(b3)); + assertEquals("byteToString static null", null, StringUtils.newStringUtf8(new Base16().encode(b3))); + } + + @Test + public void testStringToByteVariations() throws DecoderException { + final Base16 Base16 = new Base16(); + final String s1 = "48656C6C6F20576F726C64"; + final String s2 = ""; + final String s3 = null; + + assertEquals("StringToByte Hello World", "Hello World", StringUtils.newStringUtf8(Base16.decode(s1))); + assertEquals("StringToByte Hello World", "Hello World", + StringUtils.newStringUtf8((byte[]) new Base16().decode((Object) s1))); + assertEquals("StringToByte static Hello World", "Hello World", + StringUtils.newStringUtf8(new Base16().decode(s1))); + assertEquals("StringToByte \"\"", "", StringUtils.newStringUtf8(new Base16().decode(s2))); + assertEquals("StringToByte static \"\"", "", StringUtils.newStringUtf8(new Base16().decode(s2))); + assertEquals("StringToByte null", null, StringUtils.newStringUtf8(new Base16().decode(s3))); + assertEquals("StringToByte static null", null, StringUtils.newStringUtf8(new Base16().decode(s3))); + } + + private String toString(final byte[] data) { + final StringBuilder buf = new StringBuilder(); + for (int i = 0; i < data.length; i++) { + buf.append(data[i]); + if (i != data.length - 1) { + buf.append(","); + } + } + return buf.toString(); + } + + /** + * Test for CODEC-265: Encode a ~1GiB file. + * + * @see <a href="https://issues.apache.org/jira/projects/CODEC/issues/CODEC-265">CODEC-265</a> + */ + public void testCodec265_over() { + // almost 1GiB file to encode: 2^29 bytes + final int size1GiB = 1 << 29; + + // Expecting a size of 2 output bytes per 1 input byte + final int blocks = size1GiB; + final int expectedLength = 2 * blocks; + + // This test is memory hungry. Check we can run it. + final long presumableFreeMemory = BaseNCodecTest.getPresumableFreeMemory(); + + // Estimate the maximum memory required: Review comment: This has been copied from the Base64Test, changed (to encode a 500MiB file) and then disabled (no `@Test` annotation). Codec 265 concerns encoding a 1GiB file. Using base 64 you get 4 characters per 3 bytes so the output is ~1.33GiB. With Base16 you get 2 characters per byte and it is not possible to create a 2 GiB char array. Note: It will never run on Travis CI due to memory restrictions. When I run this locally it passes with a 500MiB input (1 << 29) but fails with 1GiB (1 << 30). However this is not due to an out-of-memory error (as expected) but an index out of bounds exception. This is due to the allocation of the buffer using: ```java context.buffer = new byte[Math.max(size, getDefaultBufferSize())]; ``` As noted in my review comment for `ensureBufferSize` this has been changed to allow a single allocation of the entire array. Unfortunately when size is negative this creates a buffer of the default size. Given that `BaseNCodec.ensureBufferSize` is intended to be called with a positive-only size one suggestion is `Base16.encode` could be updated to change this line: ```java final byte[] buffer = ensureBufferSize(length * BYTES_PER_ENCODED_BLOCK, context) ``` to ```java final int size = length * BYTES_PER_ENCODED_BLOCK; if (size < 0) { throw new IllegalArgumentException("Input byte[] length exceeds maximum size for encoded data: " + length); } final byte[] buffer = ensureBufferSize(size, context) ``` A different approach with a less meaningful exception (a negative array size exception) would be to change ensureBufferSize: ```java context.buffer = new byte[ compareUnsigned(size, getDefaultBufferSize()) > 0 ? size : getDefaultBufferSize()]; ``` Either way a test should be added to hit this exception. ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: [email protected]
