[
https://issues.apache.org/jira/browse/HIVE-26754?focusedWorklogId=826917&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-826917
]
ASF GitHub Bot logged work on HIVE-26754:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 17/Nov/22 17:25
Start Date: 17/Nov/22 17:25
Worklog Time Spent: 10m
Work Description: scarlin-cloudera commented on code in PR #3777:
URL: https://github.com/apache/hive/pull/3777#discussion_r1025468514
##########
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java:
##########
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.serde.serdeConstants;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Abstract GenericUDF for array functions
+ */
+
+public abstract class AbstractGenericUDFArrayBase extends GenericUDF {
+
+ static final int ARRAY_IDX = 0;
+ static final int ARRAY2_IDX = 1;
+ static final int START_IDX = 1;
+ static final int LENGTH_IDX = 2;
+ static final int SEPARATOR_IDX = 1;
+ static final int REPLACE_NULL_IDX = 2;
+
+ int MIN_ARG_COUNT;
Review Comment:
Variables should be lower case and camel case.
Also, in cases like this? my first preference is to declare these as
private and have derived classes retrieve with getter classes. I don't oppose
allowing child classes to have access, but if I do that, usually I go with
"protected"...which doesn't have as much meaning in Java, but it still shows me
as a developer that children are gonna use it.
##########
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java:
##########
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.serde.serdeConstants;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Abstract GenericUDF for array functions
+ */
+
+public abstract class AbstractGenericUDFArrayBase extends GenericUDF {
+
+ static final int ARRAY_IDX = 0;
+ static final int ARRAY2_IDX = 1;
+ static final int START_IDX = 1;
+ static final int LENGTH_IDX = 2;
+ static final int SEPARATOR_IDX = 1;
+ static final int REPLACE_NULL_IDX = 2;
+
+ int MIN_ARG_COUNT;
+ int MAX_ARG_COUNT;
+
+ transient ListObjectInspector arrayOI;
+ transient ObjectInspector[] argumentOIs;
+
+ transient Converter converter;
+
+ enum FUNC_NAMES {
+ ARRAY_MAX, ARRAY_MIN, ARRAY_DISTINCT, ARRAY_SLICE, ARRAY_JOIN,
ARRAY_EXCEPT, ARRAY_INTERSECT
+ }
+
+ FUNC_NAMES FUNC_NAME;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments)
+ throws UDFArgumentException {
+
+ // Check if wrong number of arguments were passed
+ checkArgsSize(arguments, MIN_ARG_COUNT, MAX_ARG_COUNT);
+
+ // Check if the argument is of category LIST or not
+ checkArgCategory(arguments, ARRAY_IDX, ObjectInspector.Category.LIST,
FUNC_NAME,
+ org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME);
+
+ if (FUNC_NAME == FUNC_NAMES.ARRAY_EXCEPT
+ || FUNC_NAME == FUNC_NAMES.ARRAY_INTERSECT
+ || FUNC_NAME == FUNC_NAMES.ARRAY_JOIN) {
+ checkArgCategory(arguments, ARRAY2_IDX,
ObjectInspector.Category.LIST, FUNC_NAME,
+
org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME);
+ }
+
+ if (FUNC_NAME == FUNC_NAMES.ARRAY_SLICE) {
Review Comment:
Perhaps for this one, we can override the initialize, and check these
categories in the derived class?
##########
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java:
##########
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.serde.serdeConstants;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Abstract GenericUDF for array functions
+ */
+
+public abstract class AbstractGenericUDFArrayBase extends GenericUDF {
+
+ static final int ARRAY_IDX = 0;
+ static final int ARRAY2_IDX = 1;
+ static final int START_IDX = 1;
+ static final int LENGTH_IDX = 2;
+ static final int SEPARATOR_IDX = 1;
+ static final int REPLACE_NULL_IDX = 2;
+
+ int MIN_ARG_COUNT;
+ int MAX_ARG_COUNT;
+
+ transient ListObjectInspector arrayOI;
+ transient ObjectInspector[] argumentOIs;
+
+ transient Converter converter;
+
+ enum FUNC_NAMES {
+ ARRAY_MAX, ARRAY_MIN, ARRAY_DISTINCT, ARRAY_SLICE, ARRAY_JOIN,
ARRAY_EXCEPT, ARRAY_INTERSECT
+ }
+
+ FUNC_NAMES FUNC_NAME;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments)
+ throws UDFArgumentException {
+
+ // Check if wrong number of arguments were passed
+ checkArgsSize(arguments, MIN_ARG_COUNT, MAX_ARG_COUNT);
+
+ // Check if the argument is of category LIST or not
+ checkArgCategory(arguments, ARRAY_IDX, ObjectInspector.Category.LIST,
FUNC_NAME,
+ org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME);
+
+ if (FUNC_NAME == FUNC_NAMES.ARRAY_EXCEPT
+ || FUNC_NAME == FUNC_NAMES.ARRAY_INTERSECT
+ || FUNC_NAME == FUNC_NAMES.ARRAY_JOIN) {
+ checkArgCategory(arguments, ARRAY2_IDX,
ObjectInspector.Category.LIST, FUNC_NAME,
+
org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME);
+ }
+
+ if (FUNC_NAME == FUNC_NAMES.ARRAY_SLICE) {
+ PrimitiveObjectInspector startIndexObjectInspector =
(PrimitiveObjectInspector) arguments[START_IDX];
+ PrimitiveObjectInspector lengthObjectInspector =
(PrimitiveObjectInspector) arguments[LENGTH_IDX];
+ checkArgIntPrimitiveCategory(startIndexObjectInspector, FUNC_NAME,
2);
+ checkArgIntPrimitiveCategory(lengthObjectInspector, FUNC_NAME, 3);
+ }
+
+ arrayOI = (ListObjectInspector) arguments[ARRAY_IDX];
+ argumentOIs = arguments;
+
+ //return initialize(arguments);
+ return initListOI(arguments);
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ assert (children.length == MIN_ARG_COUNT);
+ return FUNC_NAME.toString().toLowerCase() + "(" + children[ARRAY_IDX]
+ ")";
+ }
+
+ List<Object> convertArray(List objects) {
+ List<Object> ret = new ArrayList<>();
+ for (Object o : objects) {
+ ret.add(converter.convert(o));
+ }
+ return ret;
+ }
+
+ void checkArgCategory(ObjectInspector[] arguments, int idx, Enum category,
+ FUNC_NAMES function_name, String typeName) throws
UDFArgumentTypeException {
+
+ if (!arguments[idx].getCategory().equals(category)) {
+ throw new UDFArgumentTypeException(idx,
+ "\"" + typeName + "\" "
+ + "expected at function " + function_name + ", but
"
+ + "\"" + arguments[idx].getTypeName() + "\" "
+ + "is found");
+ }
+ }
+
+ void checkArgIntPrimitiveCategory(PrimitiveObjectInspector objectInspector,
+ FUNC_NAMES function_name, int idx)
throws UDFArgumentTypeException {
Review Comment:
functionName should be camel case
##########
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java:
##########
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.serde.serdeConstants;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Abstract GenericUDF for array functions
+ */
+
+public abstract class AbstractGenericUDFArrayBase extends GenericUDF {
+
+ static final int ARRAY_IDX = 0;
+ static final int ARRAY2_IDX = 1;
+ static final int START_IDX = 1;
+ static final int LENGTH_IDX = 2;
+ static final int SEPARATOR_IDX = 1;
+ static final int REPLACE_NULL_IDX = 2;
+
+ int MIN_ARG_COUNT;
+ int MAX_ARG_COUNT;
+
+ transient ListObjectInspector arrayOI;
+ transient ObjectInspector[] argumentOIs;
+
+ transient Converter converter;
+
+ enum FUNC_NAMES {
+ ARRAY_MAX, ARRAY_MIN, ARRAY_DISTINCT, ARRAY_SLICE, ARRAY_JOIN,
ARRAY_EXCEPT, ARRAY_INTERSECT
+ }
+
+ FUNC_NAMES FUNC_NAME;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments)
+ throws UDFArgumentException {
+
+ // Check if wrong number of arguments were passed
+ checkArgsSize(arguments, MIN_ARG_COUNT, MAX_ARG_COUNT);
+
+ // Check if the argument is of category LIST or not
+ checkArgCategory(arguments, ARRAY_IDX, ObjectInspector.Category.LIST,
FUNC_NAME,
+ org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME);
+
+ if (FUNC_NAME == FUNC_NAMES.ARRAY_EXCEPT
+ || FUNC_NAME == FUNC_NAMES.ARRAY_INTERSECT
+ || FUNC_NAME == FUNC_NAMES.ARRAY_JOIN) {
+ checkArgCategory(arguments, ARRAY2_IDX,
ObjectInspector.Category.LIST, FUNC_NAME,
+
org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME);
+ }
+
+ if (FUNC_NAME == FUNC_NAMES.ARRAY_SLICE) {
+ PrimitiveObjectInspector startIndexObjectInspector =
(PrimitiveObjectInspector) arguments[START_IDX];
+ PrimitiveObjectInspector lengthObjectInspector =
(PrimitiveObjectInspector) arguments[LENGTH_IDX];
+ checkArgIntPrimitiveCategory(startIndexObjectInspector, FUNC_NAME,
2);
+ checkArgIntPrimitiveCategory(lengthObjectInspector, FUNC_NAME, 3);
+ }
+
+ arrayOI = (ListObjectInspector) arguments[ARRAY_IDX];
+ argumentOIs = arguments;
+
+ //return initialize(arguments);
+ return initListOI(arguments);
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ assert (children.length == MIN_ARG_COUNT);
+ return FUNC_NAME.toString().toLowerCase() + "(" + children[ARRAY_IDX]
+ ")";
+ }
+
+ List<Object> convertArray(List objects) {
+ List<Object> ret = new ArrayList<>();
+ for (Object o : objects) {
+ ret.add(converter.convert(o));
+ }
+ return ret;
+ }
+
+ void checkArgCategory(ObjectInspector[] arguments, int idx, Enum category,
+ FUNC_NAMES function_name, String typeName) throws
UDFArgumentTypeException {
+
+ if (!arguments[idx].getCategory().equals(category)) {
+ throw new UDFArgumentTypeException(idx,
+ "\"" + typeName + "\" "
+ + "expected at function " + function_name + ", but
"
+ + "\"" + arguments[idx].getTypeName() + "\" "
+ + "is found");
+ }
+ }
+
+ void checkArgIntPrimitiveCategory(PrimitiveObjectInspector objectInspector,
+ FUNC_NAMES function_name, int idx)
throws UDFArgumentTypeException {
+
+ switch (objectInspector.getPrimitiveCategory()) {
+ case SHORT:
+ case INT:
+ case LONG:
+ break;
+ default:
+ throw new UDFArgumentTypeException(0, "Argument " + idx
+ + " of function " + function_name + " must be \""
+ + serdeConstants.SMALLINT_TYPE_NAME + "\""
+ + " or \"" + serdeConstants.INT_TYPE_NAME + "\""
+ + " or \"" + serdeConstants.BIGINT_TYPE_NAME + "\",
but \""
+ + objectInspector.getTypeName() + "\" was found.");
+ }
+ }
+
+ boolean isListEmpty(Object array, ListObjectInspector listObjectInspector)
{
+
+ int arrayLength = listObjectInspector.getListLength(array);
+
+ // Check if array is null or empty or value is null
+ return array == null || arrayLength <= 0;
Review Comment:
optional nit: I would prefer this to be one line, as in
return array == null || listObjectInspector.getListLength(array) <= 0;
...and then get rid of all the blank lines above and below
##########
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayDistinct.java:
##########
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.Description;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+
+import java.util.List;
+import java.util.stream.Collectors;
+
+/**
+ * Generic UDF for distinct array
+ * <code>ARRAY_DISTINCT(array(obj1, obj2, obj3...))</code>.
+ *
+ * @see org.apache.hadoop.hive.ql.udf.generic.GenericUDF
+ */
+@Description(name = "array_distinct",
+ value = "_FUNC_(array(obj1, obj2,...)) - "
+ + "The function returns an array of the same type as the input
argument where all duplicate"
+ + " values have been removed.",
+ extended = "Example:\n"
+ + " > SELECT _FUNC_(array('b', 'd', 'd', 'a')) FROM src LIMIT
1;\n"
+ + " 'b', 'd', 'a'")
+public class GenericUDFArrayDistinct extends AbstractGenericUDFArrayBase {
+
+ public GenericUDFArrayDistinct() {
+ FUNC_NAME = FUNC_NAMES.ARRAY_DISTINCT;
Review Comment:
Even if we do make these variables accessble from the child class, we should
only set the variables in the parent class. Please pass these up through the
constructor.
##########
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java:
##########
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.serde.serdeConstants;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Abstract GenericUDF for array functions
+ */
+
+public abstract class AbstractGenericUDFArrayBase extends GenericUDF {
+
+ static final int ARRAY_IDX = 0;
+ static final int ARRAY2_IDX = 1;
+ static final int START_IDX = 1;
+ static final int LENGTH_IDX = 2;
+ static final int SEPARATOR_IDX = 1;
+ static final int REPLACE_NULL_IDX = 2;
+
+ int MIN_ARG_COUNT;
+ int MAX_ARG_COUNT;
+
+ transient ListObjectInspector arrayOI;
+ transient ObjectInspector[] argumentOIs;
+
+ transient Converter converter;
+
+ enum FUNC_NAMES {
+ ARRAY_MAX, ARRAY_MIN, ARRAY_DISTINCT, ARRAY_SLICE, ARRAY_JOIN,
ARRAY_EXCEPT, ARRAY_INTERSECT
+ }
+
+ FUNC_NAMES FUNC_NAME;
Review Comment:
Same as line 47
##########
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java:
##########
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.serde.serdeConstants;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Abstract GenericUDF for array functions
+ */
+
+public abstract class AbstractGenericUDFArrayBase extends GenericUDF {
+
+ static final int ARRAY_IDX = 0;
+ static final int ARRAY2_IDX = 1;
+ static final int START_IDX = 1;
+ static final int LENGTH_IDX = 2;
+ static final int SEPARATOR_IDX = 1;
+ static final int REPLACE_NULL_IDX = 2;
+
+ int MIN_ARG_COUNT;
+ int MAX_ARG_COUNT;
+
+ transient ListObjectInspector arrayOI;
+ transient ObjectInspector[] argumentOIs;
+
+ transient Converter converter;
+
+ enum FUNC_NAMES {
Review Comment:
I'm not a big fan of having the parent class being aware of the child
classes. Gonna comment below on how these might be replaced...
##########
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java:
##########
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.serde.serdeConstants;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Abstract GenericUDF for array functions
+ */
+
+public abstract class AbstractGenericUDFArrayBase extends GenericUDF {
+
+ static final int ARRAY_IDX = 0;
+ static final int ARRAY2_IDX = 1;
+ static final int START_IDX = 1;
+ static final int LENGTH_IDX = 2;
+ static final int SEPARATOR_IDX = 1;
+ static final int REPLACE_NULL_IDX = 2;
+
+ int MIN_ARG_COUNT;
+ int MAX_ARG_COUNT;
+
+ transient ListObjectInspector arrayOI;
+ transient ObjectInspector[] argumentOIs;
+
+ transient Converter converter;
+
+ enum FUNC_NAMES {
+ ARRAY_MAX, ARRAY_MIN, ARRAY_DISTINCT, ARRAY_SLICE, ARRAY_JOIN,
ARRAY_EXCEPT, ARRAY_INTERSECT
+ }
+
+ FUNC_NAMES FUNC_NAME;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments)
+ throws UDFArgumentException {
+
+ // Check if wrong number of arguments were passed
+ checkArgsSize(arguments, MIN_ARG_COUNT, MAX_ARG_COUNT);
+
+ // Check if the argument is of category LIST or not
+ checkArgCategory(arguments, ARRAY_IDX, ObjectInspector.Category.LIST,
FUNC_NAME,
+ org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME);
+
+ if (FUNC_NAME == FUNC_NAMES.ARRAY_EXCEPT
Review Comment:
Can we have an abstract supporter function like if (supportsTwoArgs() (I'm
horrible with names) or something like that?
##########
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java:
##########
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.serde.serdeConstants;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Abstract GenericUDF for array functions
+ */
+
+public abstract class AbstractGenericUDFArrayBase extends GenericUDF {
+
+ static final int ARRAY_IDX = 0;
+ static final int ARRAY2_IDX = 1;
+ static final int START_IDX = 1;
+ static final int LENGTH_IDX = 2;
+ static final int SEPARATOR_IDX = 1;
+ static final int REPLACE_NULL_IDX = 2;
+
+ int MIN_ARG_COUNT;
+ int MAX_ARG_COUNT;
+
+ transient ListObjectInspector arrayOI;
+ transient ObjectInspector[] argumentOIs;
+
+ transient Converter converter;
+
+ enum FUNC_NAMES {
+ ARRAY_MAX, ARRAY_MIN, ARRAY_DISTINCT, ARRAY_SLICE, ARRAY_JOIN,
ARRAY_EXCEPT, ARRAY_INTERSECT
+ }
+
+ FUNC_NAMES FUNC_NAME;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments)
+ throws UDFArgumentException {
+
+ // Check if wrong number of arguments were passed
+ checkArgsSize(arguments, MIN_ARG_COUNT, MAX_ARG_COUNT);
+
+ // Check if the argument is of category LIST or not
+ checkArgCategory(arguments, ARRAY_IDX, ObjectInspector.Category.LIST,
FUNC_NAME,
+ org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME);
+
+ if (FUNC_NAME == FUNC_NAMES.ARRAY_EXCEPT
+ || FUNC_NAME == FUNC_NAMES.ARRAY_INTERSECT
+ || FUNC_NAME == FUNC_NAMES.ARRAY_JOIN) {
+ checkArgCategory(arguments, ARRAY2_IDX,
ObjectInspector.Category.LIST, FUNC_NAME,
+
org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME);
+ }
+
+ if (FUNC_NAME == FUNC_NAMES.ARRAY_SLICE) {
+ PrimitiveObjectInspector startIndexObjectInspector =
(PrimitiveObjectInspector) arguments[START_IDX];
+ PrimitiveObjectInspector lengthObjectInspector =
(PrimitiveObjectInspector) arguments[LENGTH_IDX];
+ checkArgIntPrimitiveCategory(startIndexObjectInspector, FUNC_NAME,
2);
+ checkArgIntPrimitiveCategory(lengthObjectInspector, FUNC_NAME, 3);
+ }
+
+ arrayOI = (ListObjectInspector) arguments[ARRAY_IDX];
+ argumentOIs = arguments;
+
+ //return initialize(arguments);
+ return initListOI(arguments);
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ assert (children.length == MIN_ARG_COUNT);
+ return FUNC_NAME.toString().toLowerCase() + "(" + children[ARRAY_IDX]
+ ")";
+ }
+
+ List<Object> convertArray(List objects) {
+ List<Object> ret = new ArrayList<>();
+ for (Object o : objects) {
+ ret.add(converter.convert(o));
+ }
+ return ret;
+ }
+
+ void checkArgCategory(ObjectInspector[] arguments, int idx, Enum category,
+ FUNC_NAMES function_name, String typeName) throws
UDFArgumentTypeException {
+
+ if (!arguments[idx].getCategory().equals(category)) {
+ throw new UDFArgumentTypeException(idx,
+ "\"" + typeName + "\" "
+ + "expected at function " + function_name + ", but
"
+ + "\"" + arguments[idx].getTypeName() + "\" "
+ + "is found");
+ }
+ }
+
+ void checkArgIntPrimitiveCategory(PrimitiveObjectInspector objectInspector,
+ FUNC_NAMES function_name, int idx)
throws UDFArgumentTypeException {
+
+ switch (objectInspector.getPrimitiveCategory()) {
+ case SHORT:
+ case INT:
+ case LONG:
+ break;
+ default:
+ throw new UDFArgumentTypeException(0, "Argument " + idx
+ + " of function " + function_name + " must be \""
+ + serdeConstants.SMALLINT_TYPE_NAME + "\""
+ + " or \"" + serdeConstants.INT_TYPE_NAME + "\""
+ + " or \"" + serdeConstants.BIGINT_TYPE_NAME + "\",
but \""
+ + objectInspector.getTypeName() + "\" was found.");
+ }
+ }
+
+ boolean isListEmpty(Object array, ListObjectInspector listObjectInspector)
{
+
+ int arrayLength = listObjectInspector.getListLength(array);
+
+ // Check if array is null or empty or value is null
+ return array == null || arrayLength <= 0;
+ }
+
+ ObjectInspector initListOI(ObjectInspector[] arguments) {
+
+ GenericUDFUtils.ReturnObjectInspectorResolver returnOIResolver =
+ new GenericUDFUtils.ReturnObjectInspectorResolver(true);
+
+ ObjectInspector elementObjectInspector =
+ ((ListObjectInspector)
(arguments[0])).getListElementObjectInspector();
+
+ ObjectInspector returnOI =
returnOIResolver.get(elementObjectInspector);
+ converter =
ObjectInspectorConverters.getConverter(elementObjectInspector, returnOI);
+ if(FUNC_NAME == FUNC_NAMES.ARRAY_MAX || FUNC_NAME ==
FUNC_NAMES.ARRAY_MIN){
Review Comment:
Not sure how to handle this one yet, but again, if we can move these down to
the derived class to avoid function names in the base class, I think that would
be good. Will think of a way later.
Issue Time Tracking
-------------------
Worklog Id: (was: 826917)
Time Spent: 20m (was: 10m)
> Implement array_distinct UDF to return an array after removing duplicates in
> it
> -------------------------------------------------------------------------------
>
> Key: HIVE-26754
> URL: https://issues.apache.org/jira/browse/HIVE-26754
> Project: Hive
> Issue Type: Sub-task
> Components: Hive
> Reporter: Taraka Rama Rao Lethavadla
> Assignee: Taraka Rama Rao Lethavadla
> Priority: Major
> Labels: pull-request-available
> Time Spent: 20m
> Remaining Estimate: 0h
>
> *array_distinct(array(obj1, obj2,...))* - The function returns an array of
> the same type as the input argument where all duplicate values have been
> removed.
> Example:
> > SELECT array_distinct(array('b', 'd', 'd', 'a')) FROM src LIMIT 1;
> ['a', 'b', 'c']
--
This message was sent by Atlassian Jira
(v8.20.10#820010)