This is an automated email from the ASF dual-hosted git repository.
sbadhya pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new 855e4055675 HIVE-28148: Implement array_compact UDF to remove all
nulls from an array (#5161) (Taraka Rama Rao Lethavadla reviewed by Sourabh
Badhya)
855e4055675 is described below
commit 855e4055675e3c993a61f59501f783e641abaaa6
Author: tarak271 <[email protected]>
AuthorDate: Fri Apr 5 16:45:17 2024 +0530
HIVE-28148: Implement array_compact UDF to remove all nulls from an array
(#5161) (Taraka Rama Rao Lethavadla reviewed by Sourabh Badhya)
---
.../hadoop/hive/ql/exec/FunctionRegistry.java | 1 +
.../ql/udf/generic/GenericUDFArrayCompact.java | 56 +++++++++
.../ql/udf/generic/TestGenericUDFArrayCompact.java | 127 +++++++++++++++++++++
.../queries/clientnegative/udf_array_compact_1.q | 1 +
.../queries/clientpositive/udf_array_compact.q | 38 ++++++
.../clientnegative/udf_array_compact_1.q.out | 1 +
.../clientpositive/llap/show_functions.q.out | 2 +
.../clientpositive/llap/udf_array_compact.q.out | 112 ++++++++++++++++++
8 files changed, 338 insertions(+)
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
index 28f35c4a15f..c54a59f9516 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/exec/FunctionRegistry.java
@@ -617,6 +617,7 @@ public final class FunctionRegistry {
system.registerGenericUDF("array_remove", GenericUDFArrayRemove.class);
system.registerGenericUDF("array_position", GenericUDFArrayPosition.class);
system.registerGenericUDF("array_append", GenericUDFArrayAppend.class);
+ system.registerGenericUDF("array_compact", GenericUDFArrayCompact.class);
system.registerGenericUDF("deserialize", GenericUDFDeserialize.class);
system.registerGenericUDF("sentences", GenericUDFSentences.class);
system.registerGenericUDF("map_keys", GenericUDFMapKeys.class);
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayCompact.java
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayCompact.java
new file mode 100644
index 00000000000..71f5526e126
--- /dev/null
+++
b/ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayCompact.java
@@ -0,0 +1,56 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.List;
+import java.util.Objects;
+import java.util.stream.Collectors;
+
+/**
+ * GenericUDFArrayCompact.
+ */
+@Description(name = "array_compact", value = "_FUNC_(array) - Removes NULL
elements from array.",
+ extended = "Example:\n" + " > SELECT _FUNC_(array(1,NULL,3,NULL,4)) FROM
src;\n" + " [1,3,4]")
+public class GenericUDFArrayCompact extends AbstractGenericUDFArrayBase {
+ private static final String FUNC_NAME = "ARRAY_COMPACT";
+
+ public GenericUDFArrayCompact() {
+ super(FUNC_NAME, 1, 1, ObjectInspector.Category.LIST);
+ }
+
+ @Override
+ public Object evaluate(DeferredObject[] arguments) throws HiveException {
+ Object array = arguments[ARRAY_IDX].get();
+ int arrayLength = arrayOI.getListLength(array);
+ if (arrayLength == 0) {
+ return Collections.emptyList();
+ } else if (arrayLength < 0) {
+ return null;
+ }
+
+ List resultArray = new ArrayList<>(((ListObjectInspector)
argumentOIs[ARRAY_IDX]).getList(array));
+ return resultArray.stream().filter(Objects::nonNull).map(o ->
converter.convert(o)).collect(Collectors.toList());
+ }
+}
diff --git
a/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayCompact.java
b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayCompact.java
new file mode 100644
index 00000000000..fb0ec6ed307
--- /dev/null
+++
b/ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayCompact.java
@@ -0,0 +1,127 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.common.type.Date;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.io.DateWritableV2;
+import org.apache.hadoop.hive.serde2.io.DoubleWritable;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import
org.apache.hadoop.hive.serde2.objectinspector.primitive.PrimitiveObjectInspectorFactory;
+import org.apache.hadoop.io.FloatWritable;
+import org.apache.hadoop.io.IntWritable;
+import org.apache.hadoop.io.Text;
+import org.junit.Test;
+
+import java.util.HashMap;
+import java.util.Map;
+
+import static java.util.Arrays.asList;
+
+public class TestGenericUDFArrayCompact extends TestGenericUDFArray {
+
+ public TestGenericUDFArrayCompact() {
+ super.udf = new GenericUDFArrayCompact();
+ }
+
+ @Test public void testPrimitive() throws HiveException {
+ ObjectInspector[] inputOIs = {
ObjectInspectorFactory.getStandardListObjectInspector(
+ PrimitiveObjectInspectorFactory.writableIntObjectInspector) };
+ udf.initialize(inputOIs);
+
+ Object i1 = new IntWritable(3);
+ Object i2 = new IntWritable(1);
+ Object i3 = new IntWritable(2);
+ Object i4 = new IntWritable(4);
+ runAndVerify(asList(i1, i2, null, i3, i4), asList(i1, i2, i3, i4));
+
+ i1 = new FloatWritable(13.3f);
+ i2 = new FloatWritable(1.1f);
+ i3 = new FloatWritable(3.3f);
+ i4 = new FloatWritable(2.20f);
+ runAndVerify(asList(i1, i2, i3, i4, null), asList(i1, i2, i3, i4));
+ }
+
+ @Test public void testList() throws HiveException {
+ ObjectInspector[] inputOIs = {
ObjectInspectorFactory.getStandardListObjectInspector(
+ ObjectInspectorFactory.getStandardListObjectInspector(
+ PrimitiveObjectInspectorFactory.writableStringObjectInspector)) };
+ udf.initialize(inputOIs);
+
+ Object i1 = asList(new Text("aa1"), new Text("dd"), new Text("cc"), new
Text("bb"));
+ Object i2 = asList(new Text("aa2"), new Text("cc"), new Text("ba"), new
Text("dd"));
+ Object i3 = asList(new Text("aa3"), new Text("cc"), new Text("dd"), new
Text("ee"), new Text("bb"));
+ Object i4 = asList(new Text("aa4"), new Text("cc"), new Text("ddd"), new
Text("bb"));
+ runAndVerify(asList(i1, i2, null, i3, null, i4), asList(i1, i2, i3, i4));
+ }
+
+ @Test public void testStruct() throws HiveException {
+ ObjectInspector[] inputOIs = {
ObjectInspectorFactory.getStandardListObjectInspector(
+ ObjectInspectorFactory.getStandardStructObjectInspector(asList("f1",
"f2", "f3", "f4"),
+
asList(PrimitiveObjectInspectorFactory.writableStringObjectInspector,
+ PrimitiveObjectInspectorFactory.writableDoubleObjectInspector,
+ PrimitiveObjectInspectorFactory.writableDateObjectInspector,
+ ObjectInspectorFactory.getStandardListObjectInspector(
+
PrimitiveObjectInspectorFactory.writableIntObjectInspector)))) };
+ udf.initialize(inputOIs);
+
+ Object i1 = asList(new Text("a"), new DoubleWritable(3.1415), new
DateWritableV2(Date.of(2015, 5, 26)),
+ asList(new IntWritable(1), new IntWritable(3), new IntWritable(2), new
IntWritable(4)));
+
+ Object i2 = asList(new Text("b"), new DoubleWritable(3.14), new
DateWritableV2(Date.of(2015, 5, 26)),
+ asList(new IntWritable(1), new IntWritable(3), new IntWritable(2), new
IntWritable(4)));
+
+ Object i3 = asList(new Text("a"), new DoubleWritable(3.1415), new
DateWritableV2(Date.of(2015, 5, 25)),
+ asList(new IntWritable(1), new IntWritable(3), new IntWritable(2), new
IntWritable(5)));
+
+ Object i4 = asList(new Text("a"), new DoubleWritable(3.1415), new
DateWritableV2(Date.of(2015, 5, 25)),
+ asList(new IntWritable(1), new IntWritable(3), new IntWritable(2), new
IntWritable(4)));
+
+ runAndVerify(asList(i1, null, null, i3, i4, i2), asList(i1, i3, i4, i2));
+ }
+
+ @Test public void testMap() throws HiveException {
+ ObjectInspector[] inputOIs = {
ObjectInspectorFactory.getStandardListObjectInspector(
+ ObjectInspectorFactory.getStandardMapObjectInspector(
+ PrimitiveObjectInspectorFactory.writableStringObjectInspector,
+ PrimitiveObjectInspectorFactory.writableIntObjectInspector)) };
+ udf.initialize(inputOIs);
+
+ Map<Text, IntWritable> m1 = new HashMap<>();
+ m1.put(new Text("a"), new IntWritable(4));
+ m1.put(new Text("b"), new IntWritable(3));
+ m1.put(new Text("c"), new IntWritable(1));
+ m1.put(new Text("d"), new IntWritable(2));
+
+ Map<Text, IntWritable> m2 = new HashMap<>();
+ m2.put(new Text("d"), new IntWritable(4));
+ m2.put(new Text("b"), new IntWritable(3));
+ m2.put(new Text("a"), new IntWritable(1));
+ m2.put(new Text("c"), new IntWritable(2));
+
+ Map<Text, IntWritable> m3 = new HashMap<>();
+ m3.put(new Text("d"), new IntWritable(4));
+ m3.put(new Text("b"), new IntWritable(3));
+ m3.put(new Text("a"), new IntWritable(1));
+
+ runAndVerify(asList(m1, m3, m2, null, null), asList(m1, m3, m2));
+ }
+
+}
diff --git a/ql/src/test/queries/clientnegative/udf_array_compact_1.q
b/ql/src/test/queries/clientnegative/udf_array_compact_1.q
new file mode 100644
index 00000000000..fc5fb9e2096
--- /dev/null
+++ b/ql/src/test/queries/clientnegative/udf_array_compact_1.q
@@ -0,0 +1 @@
+SELECT array_compact(3);
\ No newline at end of file
diff --git a/ql/src/test/queries/clientpositive/udf_array_compact.q
b/ql/src/test/queries/clientpositive/udf_array_compact.q
new file mode 100644
index 00000000000..f8b6a42aadb
--- /dev/null
+++ b/ql/src/test/queries/clientpositive/udf_array_compact.q
@@ -0,0 +1,38 @@
+--! qt:dataset:src
+
+-- SORT_QUERY_RESULTS
+
+set hive.fetch.task.conversion=more;
+
+DESCRIBE FUNCTION array_compact;
+DESCRIBE FUNCTION EXTENDED array_compact;
+
+-- evalutes function for array of primitives
+SELECT array_compact(array(1, 2, 3, null,3,4)) FROM src tablesample (1 rows);
+
+SELECT array_compact(array()) FROM src tablesample (1 rows);
+
+SELECT array_compact(array(null)) FROM src tablesample (1 rows);
+
+SELECT array_compact(array(1.12, 2.23, 3.34, null,1.11,1.12,2.9)) FROM src
tablesample (1 rows);
+
+SELECT array_compact(array(1.1234567890, 2.234567890, 3.34567890, null,
3.3456789, 2.234567,1.1234567890)) FROM src tablesample (1 rows);
+
+SELECT array_compact(array(11234567890, 2234567890, 334567890, null,
11234567890, 2234567890, 334567890, null)) FROM src tablesample (1 rows);
+
+SELECT
array_compact(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")))
FROM src tablesample (1 rows);
+
+# handle null array cases
+
+dfs ${system:test.dfs.mkdir} ${system:test.tmp.dir}/test_null_array;
+
+dfs -copyFromLocal ../../data/files/test_null_array.csv
${system:test.tmp.dir}/test_null_array/;
+
+create external table test_null_array (id int, value Array<String>) ROW FORMAT
DELIMITED
+ FIELDS TERMINATED BY ':' collection items terminated by ',' location
'${system:test.tmp.dir}/test_null_array';
+
+select value from test_null_array;
+
+select array_compact(value) from test_null_array;
+
+dfs -rm -r ${system:test.tmp.dir}/test_null_array;
\ No newline at end of file
diff --git a/ql/src/test/results/clientnegative/udf_array_compact_1.q.out
b/ql/src/test/results/clientnegative/udf_array_compact_1.q.out
new file mode 100644
index 00000000000..30933b86378
--- /dev/null
+++ b/ql/src/test/results/clientnegative/udf_array_compact_1.q.out
@@ -0,0 +1 @@
+FAILED: SemanticException [Error 10016]: Line 1:21 Argument type mismatch '3':
"array" expected at function ARRAY_COMPACT, but "int" is found
diff --git a/ql/src/test/results/clientpositive/llap/show_functions.q.out
b/ql/src/test/results/clientpositive/llap/show_functions.q.out
index b764c89e94d..08f85658227 100644
--- a/ql/src/test/results/clientpositive/llap/show_functions.q.out
+++ b/ql/src/test/results/clientpositive/llap/show_functions.q.out
@@ -47,6 +47,7 @@ and
approx_distinct
array
array_append
+array_compact
array_contains
array_distinct
array_except
@@ -677,6 +678,7 @@ and
approx_distinct
array
array_append
+array_compact
array_contains
array_distinct
array_except
diff --git a/ql/src/test/results/clientpositive/llap/udf_array_compact.q.out
b/ql/src/test/results/clientpositive/llap/udf_array_compact.q.out
new file mode 100644
index 00000000000..633e21e30bf
--- /dev/null
+++ b/ql/src/test/results/clientpositive/llap/udf_array_compact.q.out
@@ -0,0 +1,112 @@
+PREHOOK: query: DESCRIBE FUNCTION array_compact
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION array_compact
+POSTHOOK: type: DESCFUNCTION
+array_compact(array) - Removes NULL elements from array.
+PREHOOK: query: DESCRIBE FUNCTION EXTENDED array_compact
+PREHOOK: type: DESCFUNCTION
+POSTHOOK: query: DESCRIBE FUNCTION EXTENDED array_compact
+POSTHOOK: type: DESCFUNCTION
+array_compact(array) - Removes NULL elements from array.
+Example:
+ > SELECT array_compact(array(1,NULL,3,NULL,4)) FROM src;
+ [1,3,4]
+Function class:org.apache.hadoop.hive.ql.udf.generic.GenericUDFArrayCompact
+Function type:BUILTIN
+PREHOOK: query: SELECT array_compact(array(1, 2, 3, null,3,4)) FROM src
tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT array_compact(array(1, 2, 3, null,3,4)) FROM src
tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[1,2,3,3,4]
+PREHOOK: query: SELECT array_compact(array()) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT array_compact(array()) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[]
+PREHOOK: query: SELECT array_compact(array(null)) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT array_compact(array(null)) FROM src tablesample (1
rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[]
+PREHOOK: query: SELECT array_compact(array(1.12, 2.23, 3.34,
null,1.11,1.12,2.9)) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT array_compact(array(1.12, 2.23, 3.34,
null,1.11,1.12,2.9)) FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[1.12,2.23,3.34,1.11,1.12,2.9]
+PREHOOK: query: SELECT array_compact(array(1.1234567890, 2.234567890,
3.34567890, null, 3.3456789, 2.234567,1.1234567890)) FROM src tablesample (1
rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT array_compact(array(1.1234567890, 2.234567890,
3.34567890, null, 3.3456789, 2.234567,1.1234567890)) FROM src tablesample (1
rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[1.123456789,2.23456789,3.3456789,3.3456789,2.234567,1.123456789]
+PREHOOK: query: SELECT array_compact(array(11234567890, 2234567890, 334567890,
null, 11234567890, 2234567890, 334567890, null)) FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT array_compact(array(11234567890, 2234567890,
334567890, null, 11234567890, 2234567890, 334567890, null)) FROM src
tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[11234567890,2234567890,334567890,11234567890,2234567890,334567890]
+PREHOOK: query: SELECT
array_compact(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")))
FROM src tablesample (1 rows)
+PREHOOK: type: QUERY
+PREHOOK: Input: default@src
+#### A masked pattern was here ####
+POSTHOOK: query: SELECT
array_compact(array(array("a","b","c","d"),array("a","b","c","d"),array("a","b","c","d","e"),null,array("e","a","b","c","d")))
FROM src tablesample (1 rows)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@src
+#### A masked pattern was here ####
+[["a","b","c","d"],["a","b","c","d"],["a","b","c","d","e"],["e","a","b","c","d"]]
+PREHOOK: query: create external table test_null_array (id int, value
Array<String>) ROW FORMAT DELIMITED
+#### A masked pattern was here ####
+PREHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_null_array
+POSTHOOK: query: create external table test_null_array (id int, value
Array<String>) ROW FORMAT DELIMITED
+#### A masked pattern was here ####
+POSTHOOK: type: CREATETABLE
+#### A masked pattern was here ####
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_null_array
+PREHOOK: query: select value from test_null_array
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_null_array
+#### A masked pattern was here ####
+POSTHOOK: query: select value from test_null_array
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_null_array
+#### A masked pattern was here ####
+["NULL"]
+["null","null"]
+[]
+PREHOOK: query: select array_compact(value) from test_null_array
+PREHOOK: type: QUERY
+PREHOOK: Input: default@test_null_array
+#### A masked pattern was here ####
+POSTHOOK: query: select array_compact(value) from test_null_array
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@test_null_array
+#### A masked pattern was here ####
+["NULL"]
+["null","null"]
+[]