This is an automated email from the ASF dual-hosted git repository.
soumyakanti3578 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 8b74e4597f5 HIVE-29514: Optimize UDF Unhex and improve its test
coverage (#6471)
8b74e4597f5 is described below
commit 8b74e4597f57877e35fa340eb754edd365e9cb1a
Author: Tanishq Chugh <[email protected]>
AuthorDate: Thu May 28 07:55:13 2026 +0530
HIVE-29514: Optimize UDF Unhex and improve its test coverage (#6471)
---
.../org/apache/hadoop/hive/ql/udf/UDFUnhex.java | 53 ++++++++---
.../apache/hadoop/hive/ql/udf/TestUDFUnhex.java | 101 ++++++++++++++++++++-
2 files changed, 137 insertions(+), 17 deletions(-)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java
b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java
index a6a9f568e49..820d14b7ade 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java
@@ -42,32 +42,57 @@
public class UDFUnhex extends UDF {
/**
- * Convert every two hex digits in s into.
- *
+ * Convert every two hex digits in s into a byte.
*/
public byte[] evaluate(Text s) {
if (s == null) {
return null;
}
- // append a leading 0 if needed
- String str;
- if (s.getLength() % 2 == 1) {
- str = "0" + s.toString();
- } else {
- str = s.toString();
+ int len = s.getLength();
+ if (len == 0) {
+ return new byte[0];
}
- byte[] result = new byte[str.length() / 2];
- for (int i = 0; i < str.length(); i += 2) {
- try {
- result[i / 2] = ((byte) Integer.parseInt(str.substring(i, i + 2), 16));
- } catch (NumberFormatException e) {
- // invalid character present, return null
+ byte[] textBytes = s.getBytes();
+
+ // (len + 1) / 2 ensures right size for odd lengths
+ byte[] result = new byte[(len + 1) / 2];
+
+ int i = 0;
+ int resIdx = 0;
+
+ // If length is odd, the first character acts as the first byte avoiding
adding "0" prefix
+ if (len % 2 != 0) {
+ int val = decodeHexChar(textBytes[i++]);
+ if (val == -1) {
+ return null;
+ }
+ result[resIdx++] = (byte) val;
+ }
+
+ while (i < len) {
+ int high, low;
+ if ((high = decodeHexChar(textBytes[i++])) == -1 ||
+ (low = decodeHexChar(textBytes[i++])) == -1) {
return null;
}
+ result[resIdx++] = (byte) ((high << 4) | low);
}
return result;
}
+
+ private int decodeHexChar(byte b) {
+ if (b >= '0' && b <= '9') {
+ return b - '0';
+ }
+ if (b >= 'a' && b <= 'f') {
+ return b - 'a' + 10;
+ }
+ if (b >= 'A' && b <= 'F') {
+ return b - 'A' + 10;
+ }
+ return -1;
+ }
}
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFUnhex.java
b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFUnhex.java
index 3cf665472c2..c7296262561 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFUnhex.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFUnhex.java
@@ -21,7 +21,9 @@
import org.apache.hadoop.io.Text;
+import static org.junit.Assert.assertArrayEquals;
import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
import org.junit.Test;
/**
@@ -40,8 +42,101 @@ public void testUnhexConversion(){
UDFUnhex udf = new UDFUnhex();
byte[] output = udf.evaluate(hex);
assertEquals(expected.length,output.length);
- for (int i = 0; i < expected.length; i++){
- assertEquals(expected[i], output[i]);
- }
+ assertArrayEquals(expected, output);
+ }
+
+ @Test
+ public void testUnhexOddLength() {
+ UDFUnhex udf = new UDFUnhex();
+
+ Text hex1 = new Text("A");
+ byte[] expected1 = new byte[] {(byte) 0x0A};
+ assertArrayEquals(expected1, udf.evaluate(hex1));
+
+ Text hex2 = new Text("123");
+ byte[] expected2 = new byte[] {(byte) 0x01, (byte) 0x23};
+ assertArrayEquals(expected2, udf.evaluate(hex2));
+ }
+
+ @Test
+ public void testUnhexInvalidCharacters() {
+ UDFUnhex udf = new UDFUnhex();
+
+ Text hex = new Text("7374G9");
+ assertNull("Should return null for invalid hex characters",
udf.evaluate(hex));
+
+ Text hexOddInvalid = new Text("12G");
+ assertNull("Should return null for invalid hex characters in odd length
string", udf.evaluate(hexOddInvalid));
+
+ Text hexOddInvalidSingleChar = new Text("G");
+ assertNull("Should return null for invalid hex character in odd-length
input",
+ udf.evaluate(hexOddInvalidSingleChar));
+
+ Text hexInvalidLow = new Text("0G");
+ assertNull("Should return null when low nibble is invalid",
udf.evaluate(hexInvalidLow));
+
+ Text hexInvalidHigh = new Text("G0");
+ assertNull("Should return null when high nibble is invalid",
udf.evaluate(hexInvalidHigh));
+ }
+
+ @Test
+ public void testUnhexNullEmptyCases() {
+ UDFUnhex udf = new UDFUnhex();
+
+ assertNull(udf.evaluate(null));
+
+ Text hexEmpty = new Text("");
+ byte[] expectedEmpty = new byte[0];
+ assertArrayEquals(expectedEmpty, udf.evaluate(hexEmpty));
+ }
+
+ @Test
+ public void testUnhexMixedCase() {
+ UDFUnhex udf = new UDFUnhex();
+
+ Text hex = new Text("aABb9");
+ byte[] expected = new byte[] {(byte) 0x0A, (byte) 0xAB, (byte) 0xB9};
+ assertArrayEquals(expected, udf.evaluate(hex));
+ }
+
+ @Test
+ public void testUnhexLowerCase() {
+ UDFUnhex udf = new UDFUnhex();
+
+ Text hexLowerPair = new Text("ab");
+ assertArrayEquals(new byte[] {(byte) 0xAB}, udf.evaluate(hexLowerPair));
+
+ Text hexLowerOddLength = new Text("abc");
+ assertArrayEquals(new byte[] {(byte) 0x0A, (byte) 0xBC},
udf.evaluate(hexLowerOddLength));
+
+ Text hexLowerDigits = new Text("0123456789abcdef");
+ byte[] expectedLowerDigits = new byte[] {
+ (byte) 0x01, (byte) 0x23, (byte) 0x45, (byte) 0x67,
+ (byte) 0x89, (byte) 0xAB, (byte) 0xCD, (byte) 0xEF
+ };
+ assertArrayEquals(expectedLowerDigits, udf.evaluate(hexLowerDigits));
+ }
+
+ @Test
+ public void testUnhexBoundaryValues() {
+ UDFUnhex udf = new UDFUnhex();
+
+ Text hexMinByte = new Text("00");
+ assertArrayEquals(new byte[] {(byte) 0x00}, udf.evaluate(hexMinByte));
+
+ Text hexMaxByteUpper = new Text("FF");
+ assertArrayEquals(new byte[] {(byte) 0xFF}, udf.evaluate(hexMaxByteUpper));
+
+ Text hexMaxByteLower = new Text("ff");
+ assertArrayEquals(new byte[] {(byte) 0xFF}, udf.evaluate(hexMaxByteLower));
+
+ Text hexOddMinDigit = new Text("0");
+ assertArrayEquals(new byte[] {(byte) 0x00}, udf.evaluate(hexOddMinDigit));
+
+ Text hexOddMaxUpper = new Text("F");
+ assertArrayEquals(new byte[] {(byte) 0x0F}, udf.evaluate(hexOddMaxUpper));
+
+ Text hexOddMaxLower = new Text("f");
+ assertArrayEquals(new byte[] {(byte) 0x0F}, udf.evaluate(hexOddMaxLower));
}
}