This is an automated email from the ASF dual-hosted git repository. dmollitor pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push: new eacb4f3 HIVE-22674: Replace Base64 in serde Package (David Mollitor, reviewed by Naveen Gangam) eacb4f3 is described below commit eacb4f344352b66009e8f84797d9db4c3ae0ade7 Author: belugabehr <12578579+belugab...@users.noreply.github.com> AuthorDate: Wed Jul 22 09:42:55 2020 -0400 HIVE-22674: Replace Base64 in serde Package (David Mollitor, reviewed by Naveen Gangam) --- .../clientpositive/llap/compute_stats_binary.q.out | 2 +- .../org/apache/hadoop/hive/serde2/lazy/LazyBinary.java | 16 +++++++++------- .../org/apache/hadoop/hive/serde2/lazy/LazyUtils.java | 4 ++-- .../hive/serde2/lazy/fast/LazySimpleSerializeWrite.java | 6 +++--- .../hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java | 6 ++++-- 5 files changed, 19 insertions(+), 15 deletions(-) diff --git a/ql/src/test/results/clientpositive/llap/compute_stats_binary.q.out b/ql/src/test/results/clientpositive/llap/compute_stats_binary.q.out index 133c01e..fc90c89 100644 --- a/ql/src/test/results/clientpositive/llap/compute_stats_binary.q.out +++ b/ql/src/test/results/clientpositive/llap/compute_stats_binary.q.out @@ -31,4 +31,4 @@ POSTHOOK: query: select compute_stats(a, 16) from tab_binary POSTHOOK: type: QUERY POSTHOOK: Input: default@tab_binary #### A masked pattern was here #### -{"columntype":"Binary","maxlength":36,"avglength":20.0,"countnulls":0} +{"columntype":"Binary","maxlength":58,"avglength":32.5,"countnulls":0} diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java index 8c594a8..6ce4906 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyBinary.java @@ -18,9 +18,11 @@ package org.apache.hadoop.hive.serde2.lazy; -import org.apache.commons.codec.binary.Base64; import org.slf4j.Logger; import org.slf4j.LoggerFactory; + +import java.util.Base64; + import org.apache.hadoop.hive.serde2.lazy.objectinspector.primitive.LazyBinaryObjectInspector; import org.apache.hadoop.io.BytesWritable; @@ -47,17 +49,17 @@ public class LazyBinary extends LazyPrimitive<LazyBinaryObjectInspector, BytesWr byte[] recv = new byte[length]; System.arraycopy(bytes.getData(), start, recv, 0, length); byte[] decoded = decodeIfNeeded(recv); - // use the original bytes in case decoding should fail - decoded = decoded.length > 0 ? decoded : recv; data.set(decoded, 0, decoded.length); } // todo this should be configured in serde public static byte[] decodeIfNeeded(byte[] recv) { - boolean arrayByteBase64 = Base64.isArrayByteBase64(recv); - if (LOG.isDebugEnabled() && arrayByteBase64) { - LOG.debug("Data only contains Base64 alphabets only so try to decode the data."); + try { + return Base64.getDecoder().decode(recv); + } catch (IllegalArgumentException e) { + // use the original bytes in case decoding should fail + LOG.debug("Data does not contain only Base64 characters so return original byte array", e); + return recv; } - return arrayByteBase64 ? Base64.decodeBase64(recv) : recv; } } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java index 544a668..65a76ac 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/LazyUtils.java @@ -24,9 +24,9 @@ import java.io.OutputStream; import java.nio.ByteBuffer; import java.nio.charset.CharacterCodingException; import java.util.Arrays; +import java.util.Base64; import java.util.Map; -import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.hive.serde2.SerDeException; import org.apache.hadoop.hive.serde2.io.HiveCharWritable; import org.apache.hadoop.hive.serde2.io.HiveDecimalWritable; @@ -294,7 +294,7 @@ public final class LazyUtils { BytesWritable bw = ((BinaryObjectInspector) oi).getPrimitiveWritableObject(o); byte[] toEncode = new byte[bw.getLength()]; System.arraycopy(bw.getBytes(), 0,toEncode, 0, bw.getLength()); - byte[] toWrite = Base64.encodeBase64(toEncode); + byte[] toWrite = Base64.getEncoder().withoutPadding().encode(toEncode); out.write(toWrite, 0, toWrite.length); break; } diff --git a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java index a42d6f4..4be9c40 100644 --- a/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java +++ b/serde/src/java/org/apache/hadoop/hive/serde2/lazy/fast/LazySimpleSerializeWrite.java @@ -21,11 +21,11 @@ package org.apache.hadoop.hive.serde2.lazy.fast; import java.io.IOException; import java.nio.ByteBuffer; import java.util.ArrayDeque; +import java.util.Base64; import java.util.Deque; import java.util.List; import java.util.Map; -import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.hive.common.type.Date; import org.apache.hadoop.hive.serde2.io.DateWritableV2; import org.slf4j.Logger; @@ -277,7 +277,7 @@ public final class LazySimpleSerializeWrite implements SerializeWrite { beginPrimitive(); byte[] toEncode = new byte[v.length]; System.arraycopy(v, 0, toEncode, 0, v.length); - byte[] toWrite = Base64.encodeBase64(toEncode); + byte[] toWrite = Base64.getEncoder().withoutPadding().encode(toEncode); output.write(toWrite, 0, toWrite.length); finishPrimitive(); } @@ -287,7 +287,7 @@ public final class LazySimpleSerializeWrite implements SerializeWrite { beginPrimitive(); byte[] toEncode = new byte[length]; System.arraycopy(v, start, toEncode, 0, length); - byte[] toWrite = Base64.encodeBase64(toEncode); + byte[] toWrite = Base64.getEncoder().withoutPadding().encode(toEncode); output.write(toWrite, 0, toWrite.length); finishPrimitive(); } diff --git a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java index c697dcf..91857d2 100644 --- a/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java +++ b/serde/src/test/org/apache/hadoop/hive/serde2/lazy/TestLazySimpleSerDe.java @@ -18,13 +18,13 @@ package org.apache.hadoop.hive.serde2.lazy; import java.io.IOException; +import java.util.Base64; import java.util.List; import java.util.Properties; import java.util.Random; -import org.apache.commons.codec.binary.Base64; import org.apache.hadoop.conf.Configuration; import org.apache.hadoop.hive.serde.serdeConstants; import org.apache.hadoop.hive.serde2.ByteStream; @@ -76,7 +76,9 @@ public class TestLazySimpleSerDe { Text t = new Text("123\t456\t789\t1000\t5.3\thive and hadoop\t1.\tNULL\t"); t.append(new byte[]{(byte)Integer.parseInt("10111111", 2)}, 0, 1); StringBuilder sb = new StringBuilder("123\t456\t789\t1000\t5.3\thive and hadoop\t1\tNULL\t"); - String s = sb.append(new String(Base64.encodeBase64(new byte[]{(byte)Integer.parseInt("10111111", 2)}))).toString(); + String s = sb.append( + Base64.getEncoder().withoutPadding().encodeToString(new byte[] { (byte) Integer.parseInt("10111111", 2) })) + .toString(); Object[] expectedFieldsData = {new ByteWritable((byte) 123), new ShortWritable((short) 456), new IntWritable(789), new LongWritable(1000), new DoubleWritable(5.3),