This is an automated email from the ASF dual-hosted git repository.

soumyakanti3578 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new 8b74e4597f5 HIVE-29514: Optimize UDF Unhex and improve its test 
coverage (#6471)
8b74e4597f5 is described below

commit 8b74e4597f57877e35fa340eb754edd365e9cb1a
Author: Tanishq Chugh <[email protected]>
AuthorDate: Thu May 28 07:55:13 2026 +0530

    HIVE-29514: Optimize UDF Unhex and improve its test coverage (#6471)
---
 .../org/apache/hadoop/hive/ql/udf/UDFUnhex.java    |  53 ++++++++---
 .../apache/hadoop/hive/ql/udf/TestUDFUnhex.java    | 101 ++++++++++++++++++++-
 2 files changed, 137 insertions(+), 17 deletions(-)

diff --git a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java 
b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java
index a6a9f568e49..820d14b7ade 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/udf/UDFUnhex.java
@@ -42,32 +42,57 @@
 public class UDFUnhex extends UDF {
 
   /**
-   * Convert every two hex digits in s into.
-   *
+   * Convert every two hex digits in s into a byte.
    */
   public byte[] evaluate(Text s) {
     if (s == null) {
       return null;
     }
 
-    // append a leading 0 if needed
-    String str;
-    if (s.getLength() % 2 == 1) {
-      str = "0" + s.toString();
-    } else {
-      str = s.toString();
+    int len = s.getLength();
+    if (len == 0) {
+      return new byte[0];
     }
 
-    byte[] result = new byte[str.length() / 2];
-    for (int i = 0; i < str.length(); i += 2) {
-      try {
-        result[i / 2] = ((byte) Integer.parseInt(str.substring(i, i + 2), 16));
-      } catch (NumberFormatException e) {
-        // invalid character present, return null
+    byte[] textBytes = s.getBytes();
+
+    // (len + 1) / 2 ensures right size for odd lengths
+    byte[] result = new byte[(len + 1) / 2];
+
+    int i = 0;
+    int resIdx = 0;
+
+    // If length is odd, the first character acts as the first byte avoiding 
adding "0" prefix
+    if (len % 2 != 0) {
+      int val = decodeHexChar(textBytes[i++]);
+      if (val == -1) {
+        return null;
+      }
+      result[resIdx++] = (byte) val;
+    }
+
+    while (i < len) {
+      int high, low;
+      if ((high = decodeHexChar(textBytes[i++])) == -1 ||
+          (low = decodeHexChar(textBytes[i++])) == -1) {
         return null;
       }
+      result[resIdx++] = (byte) ((high << 4) | low);
     }
 
     return result;
   }
+
+  private int decodeHexChar(byte b) {
+    if (b >= '0' && b <= '9') {
+      return b - '0';
+    }
+    if (b >= 'a' && b <= 'f') {
+      return b - 'a' + 10;
+    }
+    if (b >= 'A' && b <= 'F') {
+      return b - 'A' + 10;
+    }
+    return -1;
+  }
 }
diff --git a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFUnhex.java 
b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFUnhex.java
index 3cf665472c2..c7296262561 100644
--- a/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFUnhex.java
+++ b/ql/src/test/org/apache/hadoop/hive/ql/udf/TestUDFUnhex.java
@@ -21,7 +21,9 @@
 
 
 import org.apache.hadoop.io.Text;
+import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
 import org.junit.Test;
 
 /**
@@ -40,8 +42,101 @@ public void testUnhexConversion(){
     UDFUnhex udf = new UDFUnhex();
     byte[] output = udf.evaluate(hex);
     assertEquals(expected.length,output.length);
-    for (int i = 0; i < expected.length; i++){
-      assertEquals(expected[i], output[i]);
-    }
+    assertArrayEquals(expected, output);
+  }
+
+  @Test
+  public void testUnhexOddLength() {
+    UDFUnhex udf = new UDFUnhex();
+
+    Text hex1 = new Text("A");
+    byte[] expected1 = new byte[] {(byte) 0x0A};
+    assertArrayEquals(expected1, udf.evaluate(hex1));
+
+    Text hex2 = new Text("123");
+    byte[] expected2 = new byte[] {(byte) 0x01, (byte) 0x23};
+    assertArrayEquals(expected2, udf.evaluate(hex2));
+  }
+
+  @Test
+  public void testUnhexInvalidCharacters() {
+    UDFUnhex udf = new UDFUnhex();
+
+    Text hex = new Text("7374G9");
+    assertNull("Should return null for invalid hex characters", 
udf.evaluate(hex));
+
+    Text hexOddInvalid = new Text("12G");
+    assertNull("Should return null for invalid hex characters in odd length 
string", udf.evaluate(hexOddInvalid));
+
+    Text hexOddInvalidSingleChar = new Text("G");
+    assertNull("Should return null for invalid hex character in odd-length 
input",
+        udf.evaluate(hexOddInvalidSingleChar));
+
+    Text hexInvalidLow = new Text("0G");
+    assertNull("Should return null when low nibble is invalid", 
udf.evaluate(hexInvalidLow));
+
+    Text hexInvalidHigh = new Text("G0");
+    assertNull("Should return null when high nibble is invalid", 
udf.evaluate(hexInvalidHigh));
+  }
+
+  @Test
+  public void testUnhexNullEmptyCases() {
+    UDFUnhex udf = new UDFUnhex();
+
+    assertNull(udf.evaluate(null));
+
+    Text hexEmpty = new Text("");
+    byte[] expectedEmpty = new byte[0];
+    assertArrayEquals(expectedEmpty, udf.evaluate(hexEmpty));
+  }
+
+  @Test
+  public void testUnhexMixedCase() {
+    UDFUnhex udf = new UDFUnhex();
+
+    Text hex = new Text("aABb9");
+    byte[] expected = new byte[] {(byte) 0x0A, (byte) 0xAB, (byte) 0xB9};
+    assertArrayEquals(expected, udf.evaluate(hex));
+  }
+
+  @Test
+  public void testUnhexLowerCase() {
+    UDFUnhex udf = new UDFUnhex();
+
+    Text hexLowerPair = new Text("ab");
+    assertArrayEquals(new byte[] {(byte) 0xAB}, udf.evaluate(hexLowerPair));
+
+    Text hexLowerOddLength = new Text("abc");
+    assertArrayEquals(new byte[] {(byte) 0x0A, (byte) 0xBC}, 
udf.evaluate(hexLowerOddLength));
+
+    Text hexLowerDigits = new Text("0123456789abcdef");
+    byte[] expectedLowerDigits = new byte[] {
+        (byte) 0x01, (byte) 0x23, (byte) 0x45, (byte) 0x67,
+        (byte) 0x89, (byte) 0xAB, (byte) 0xCD, (byte) 0xEF
+    };
+    assertArrayEquals(expectedLowerDigits, udf.evaluate(hexLowerDigits));
+  }
+
+  @Test
+  public void testUnhexBoundaryValues() {
+    UDFUnhex udf = new UDFUnhex();
+
+    Text hexMinByte = new Text("00");
+    assertArrayEquals(new byte[] {(byte) 0x00}, udf.evaluate(hexMinByte));
+
+    Text hexMaxByteUpper = new Text("FF");
+    assertArrayEquals(new byte[] {(byte) 0xFF}, udf.evaluate(hexMaxByteUpper));
+
+    Text hexMaxByteLower = new Text("ff");
+    assertArrayEquals(new byte[] {(byte) 0xFF}, udf.evaluate(hexMaxByteLower));
+
+    Text hexOddMinDigit = new Text("0");
+    assertArrayEquals(new byte[] {(byte) 0x00}, udf.evaluate(hexOddMinDigit));
+
+    Text hexOddMaxUpper = new Text("F");
+    assertArrayEquals(new byte[] {(byte) 0x0F}, udf.evaluate(hexOddMaxUpper));
+
+    Text hexOddMaxLower = new Text("f");
+    assertArrayEquals(new byte[] {(byte) 0x0F}, udf.evaluate(hexOddMaxLower));
   }
 }

Reply via email to