This is an automated email from the ASF dual-hosted git repository.

dmollitor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git


The following commit(s) were added to refs/heads/master by this push:
     new eacb4f3  HIVE-22674: Replace Base64 in serde Package (David Mollitor, 
reviewed by Naveen Gangam)
eacb4f3 is described below

commit eacb4f344352b66009e8f84797d9db4c3ae0ade7
Author: belugabehr <12578579+belugab...@users.noreply.github.com>
AuthorDate: Wed Jul 22 09:42:55 2020 -0400

    HIVE-22674: Replace Base64 in serde Package (David Mollitor, reviewed by 
Naveen Gangam)
---
 .../clientpositive/llap/compute_stats_binary.q.out       |  2 +-
 .../org/apache/hadoop/hive/serde2/lazy/LazyBinary.java   | 16 +++++++++-------
 .../org/apache/hadoop/hive/serde2/lazy/LazyUtils.java    |  4 ++--
 .../hive/serde2/lazy/fast/LazySimpleSerializeWrite.java  |  6 +++---
 .../hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java     |  6 ++++--
 5 files changed, 19 insertions(+), 15 deletions(-)

diff --git a/ql/src/test/results/clientpositive/llap/compute_stats_binary.q.out 
b/ql/src/test/results/clientpositive/llap/compute_stats_binary.q.out
index 133c01e..fc90c89 100644
--- a/ql/src/test/results/clientpositive/llap/compute_stats_binary.q.out
+++ b/ql/src/test/results/clientpositive/llap/compute_stats_binary.q.out
@@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 16) from tab_binary
 POSTHOOK: type: QUERY
 POSTHOOK: Input: default@tab_binary
 #### A masked pattern was here ####
-{"columntype":"Binary","maxlength":36,"avglength":20.0,"countnulls":0}
+{"columntype":"Binary","maxlength":58,"avglength":32.5,"countnulls":0}
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java 
b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java
index 8c594a8..6ce4906 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java
@@ -18,9 +18,11 @@
 
 package org.apache.hadoop.hive.serde2.lazy;
 
-import org.apache.commons.codec.binary.Base64;
 import org.slf4j.Logger;
 import org.slf4j.LoggerFactory;
+
+import java.util.Base64;
+
 import 
org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector;
 import org.apache.hadoop.io.BytesWritable;
 
@@ -47,17 +49,17 @@ public class LazyBinary extends 
LazyPrimitive<LazyBinaryObjectInspector, BytesWr
     byte[] recv = new byte[length];
     System.arraycopy(bytes.getData(), start, recv, 0, length);
     byte[] decoded = decodeIfNeeded(recv);
-    // use the original bytes in case decoding should fail
-    decoded = decoded.length > 0 ? decoded : recv;
     data.set(decoded, 0, decoded.length);
   }
 
   // todo this should be configured in serde
   public static byte[] decodeIfNeeded(byte[] recv) {
-    boolean arrayByteBase64 = Base64.isArrayByteBase64(recv);
-    if (LOG.isDebugEnabled() && arrayByteBase64) {
-      LOG.debug("Data only contains Base64 alphabets only so try to decode the 
data.");
+    try {
+      return Base64.getDecoder().decode(recv);
+    } catch (IllegalArgumentException e) {
+      // use the original bytes in case decoding should fail
+      LOG.debug("Data does not contain only Base64 characters so return 
original byte array", e);
+      return recv;
     }
-    return arrayByteBase64 ? Base64.decodeBase64(recv) : recv;
   }
 }
diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java 
b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
index 544a668..65a76ac 100644
--- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
+++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java
@@ -24,9 +24,9 @@ import java.io.OutputStream;
 import java.nio.ByteBuffer;
 import java.nio.charset.CharacterCodingException;
 import java.util.Arrays;
+import java.util.Base64;
 import java.util.Map;
 
-import org.apache.commons.codec.binary.Base64;
 import org.apache.hadoop.hive.serde2.SerDeException;
 import org.apache.hadoop.hive.serde2.io.HiveCharWritable;
 import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable;
@@ -294,7 +294,7 @@ public final class LazyUtils {
       BytesWritable bw = ((BinaryObjectInspector) 
oi).getPrimitiveWritableObject(o);
       byte[] toEncode = new byte[bw.getLength()];
       System.arraycopy(bw.getBytes(), 0,toEncode, 0, bw.getLength());
-      byte[] toWrite = Base64.encodeBase64(toEncode);
+      byte[] toWrite = Base64.getEncoder().withoutPadding().encode(toEncode);
       out.write(toWrite, 0, toWrite.length);
       break;
     }
diff --git 
a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java
 
b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java
index a42d6f4..4be9c40 100644
--- 
a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java
+++ 
b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java
@@ -21,11 +21,11 @@ package org.apache.hadoop.hive.serde2.lazy.fast;
 import java.io.IOException;
 import java.nio.ByteBuffer;
 import java.util.ArrayDeque;
+import java.util.Base64;
 import java.util.Deque;
 import java.util.List;
 import java.util.Map;
 
-import org.apache.commons.codec.binary.Base64;
 import org.apache.hadoop.hive.common.type.Date;
 import org.apache.hadoop.hive.serde2.io.DateWritableV2;
 import org.slf4j.Logger;
@@ -277,7 +277,7 @@ public final class LazySimpleSerializeWrite implements 
SerializeWrite {
     beginPrimitive();
     byte[] toEncode = new byte[v.length];
     System.arraycopy(v, 0, toEncode, 0, v.length);
-    byte[] toWrite = Base64.encodeBase64(toEncode);
+    byte[] toWrite = Base64.getEncoder().withoutPadding().encode(toEncode);
     output.write(toWrite, 0, toWrite.length);
     finishPrimitive();
   }
@@ -287,7 +287,7 @@ public final class LazySimpleSerializeWrite implements 
SerializeWrite {
     beginPrimitive();
     byte[] toEncode = new byte[length];
     System.arraycopy(v, start, toEncode, 0, length);
-    byte[] toWrite = Base64.encodeBase64(toEncode);
+    byte[] toWrite = Base64.getEncoder().withoutPadding().encode(toEncode);
     output.write(toWrite, 0, toWrite.length);
     finishPrimitive();
   }
diff --git 
a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java 
b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java
index c697dcf..91857d2 100644
--- a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java
+++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java
@@ -18,13 +18,13 @@
 package org.apache.hadoop.hive.serde2.lazy;
 
 import java.io.IOException;
+import java.util.Base64;
 import java.util.List;
 import java.util.Properties;
 import java.util.Random;
 
 
 
-import org.apache.commons.codec.binary.Base64;
 import org.apache.hadoop.conf.Configuration;
 import org.apache.hadoop.hive.serde.serdeConstants;
 import org.apache.hadoop.hive.serde2.ByteStream;
@@ -76,7 +76,9 @@ public class TestLazySimpleSerDe {
       Text t = new Text("123\t456\t789\t1000\t5.3\thive and 
hadoop\t1.\tNULL\t");
       t.append(new byte[]{(byte)Integer.parseInt("10111111", 2)}, 0, 1);
       StringBuilder sb = new StringBuilder("123\t456\t789\t1000\t5.3\thive and 
hadoop\t1\tNULL\t");
-      String s = sb.append(new String(Base64.encodeBase64(new 
byte[]{(byte)Integer.parseInt("10111111", 2)}))).toString();
+      String s = sb.append(
+          Base64.getEncoder().withoutPadding().encodeToString(new byte[] { 
(byte) Integer.parseInt("10111111", 2) }))
+          .toString();
       Object[] expectedFieldsData = {new ByteWritable((byte) 123),
           new ShortWritable((short) 456), new IntWritable(789),
           new LongWritable(1000), new DoubleWritable(5.3),

Reply via email to