[
https://issues.apache.org/jira/browse/HIVE-26754?focusedWorklogId=827569&page=com.atlassian.jira.plugin.system.issuetabpanels:worklog-tabpanel#worklog-827569
]
ASF GitHub Bot logged work on HIVE-26754:
-----------------------------------------
Author: ASF GitHub Bot
Created on: 21/Nov/22 13:01
Start Date: 21/Nov/22 13:01
Worklog Time Spent: 10m
Work Description: SourabhBadhya commented on code in PR #3777:
URL: https://github.com/apache/hive/pull/3777#discussion_r1028009527
##########
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/AbstractGenericUDFArrayBase.java:
##########
@@ -0,0 +1,160 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.hadoop.hive.ql.udf.generic;
+
+import org.apache.hadoop.hive.ql.exec.UDFArgumentException;
+import org.apache.hadoop.hive.ql.exec.UDFArgumentTypeException;
+import org.apache.hadoop.hive.serde.serdeConstants;
+
+import org.apache.hadoop.hive.serde2.objectinspector.ListObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspector;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorFactory;
+import org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters;
+import
org.apache.hadoop.hive.serde2.objectinspector.ObjectInspectorConverters.Converter;
+import org.apache.hadoop.hive.serde2.objectinspector.PrimitiveObjectInspector;
+
+import java.util.ArrayList;
+import java.util.List;
+
+/**
+ * Abstract GenericUDF for array functions
+ */
+
+public abstract class AbstractGenericUDFArrayBase extends GenericUDF {
+
+ static final int ARRAY_IDX = 0;
+ static final int ARRAY2_IDX = 1;
+ static final int START_IDX = 1;
+ static final int LENGTH_IDX = 2;
+ static final int SEPARATOR_IDX = 1;
+ static final int REPLACE_NULL_IDX = 2;
+
+ int MIN_ARG_COUNT;
+ int MAX_ARG_COUNT;
+
+ transient ListObjectInspector arrayOI;
+ transient ObjectInspector[] argumentOIs;
+
+ transient Converter converter;
+
+ enum FUNC_NAMES {
+ ARRAY_MAX, ARRAY_MIN, ARRAY_DISTINCT, ARRAY_SLICE, ARRAY_JOIN,
ARRAY_EXCEPT, ARRAY_INTERSECT
+ }
+
+ FUNC_NAMES FUNC_NAME;
+
+ @Override
+ public ObjectInspector initialize(ObjectInspector[] arguments)
+ throws UDFArgumentException {
+
+ // Check if wrong number of arguments were passed
+ checkArgsSize(arguments, MIN_ARG_COUNT, MAX_ARG_COUNT);
+
+ // Check if the argument is of category LIST or not
+ checkArgCategory(arguments, ARRAY_IDX, ObjectInspector.Category.LIST,
FUNC_NAME,
+ org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME);
+
+ if (FUNC_NAME == FUNC_NAMES.ARRAY_EXCEPT
+ || FUNC_NAME == FUNC_NAMES.ARRAY_INTERSECT
+ || FUNC_NAME == FUNC_NAMES.ARRAY_JOIN) {
+ checkArgCategory(arguments, ARRAY2_IDX,
ObjectInspector.Category.LIST, FUNC_NAME,
+
org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME);
+ }
+
+ if (FUNC_NAME == FUNC_NAMES.ARRAY_SLICE) {
+ PrimitiveObjectInspector startIndexObjectInspector =
(PrimitiveObjectInspector) arguments[START_IDX];
+ PrimitiveObjectInspector lengthObjectInspector =
(PrimitiveObjectInspector) arguments[LENGTH_IDX];
+ checkArgIntPrimitiveCategory(startIndexObjectInspector, FUNC_NAME,
2);
+ checkArgIntPrimitiveCategory(lengthObjectInspector, FUNC_NAME, 3);
+ }
+
+ arrayOI = (ListObjectInspector) arguments[ARRAY_IDX];
+ argumentOIs = arguments;
+
+ //return initialize(arguments);
+ return initListOI(arguments);
+ }
+
+ @Override
+ public String getDisplayString(String[] children) {
+ assert (children.length == MIN_ARG_COUNT);
+ return FUNC_NAME.toString().toLowerCase() + "(" + children[ARRAY_IDX]
+ ")";
+ }
+
+ List<Object> convertArray(List objects) {
+ List<Object> ret = new ArrayList<>();
+ for (Object o : objects) {
+ ret.add(converter.convert(o));
+ }
+ return ret;
+ }
+
+ void checkArgCategory(ObjectInspector[] arguments, int idx, Enum category,
+ FUNC_NAMES function_name, String typeName) throws
UDFArgumentTypeException {
+
+ if (!arguments[idx].getCategory().equals(category)) {
+ throw new UDFArgumentTypeException(idx,
+ "\"" + typeName + "\" "
+ + "expected at function " + function_name + ", but
"
+ + "\"" + arguments[idx].getTypeName() + "\" "
+ + "is found");
+ }
+ }
+
+ void checkArgIntPrimitiveCategory(PrimitiveObjectInspector objectInspector,
+ FUNC_NAMES function_name, int idx)
throws UDFArgumentTypeException {
+
+ switch (objectInspector.getPrimitiveCategory()) {
+ case SHORT:
+ case INT:
+ case LONG:
+ break;
+ default:
+ throw new UDFArgumentTypeException(0, "Argument " + idx
+ + " of function " + function_name + " must be \""
+ + serdeConstants.SMALLINT_TYPE_NAME + "\""
+ + " or \"" + serdeConstants.INT_TYPE_NAME + "\""
+ + " or \"" + serdeConstants.BIGINT_TYPE_NAME + "\",
but \""
+ + objectInspector.getTypeName() + "\" was found.");
+ }
+ }
+
+ boolean isListEmpty(Object array, ListObjectInspector listObjectInspector)
{
+
+ int arrayLength = listObjectInspector.getListLength(array);
+
+ // Check if array is null or empty or value is null
+ return array == null || arrayLength <= 0;
Review Comment:
This can be simplified to -
`return listObjectInspector.getListLength(array) <= 0;`
Because the function `getListLength` is already checking for null values
within and returning -1. See here -
https://github.com/apache/hive/blob/master/serde/src/java/org/apache/hadoop/hive/serde2/objectinspector/ListObjectInspector.java#L42-L45
Issue Time Tracking
-------------------
Worklog Id: (was: 827569)
Time Spent: 50m (was: 40m)
> Implement array_distinct UDF to return an array after removing duplicates in
> it
> -------------------------------------------------------------------------------
>
> Key: HIVE-26754
> URL: https://issues.apache.org/jira/browse/HIVE-26754
> Project: Hive
> Issue Type: Sub-task
> Components: Hive
> Reporter: Taraka Rama Rao Lethavadla
> Assignee: Taraka Rama Rao Lethavadla
> Priority: Major
> Labels: pull-request-available
> Time Spent: 50m
> Remaining Estimate: 0h
>
> *array_distinct(array(obj1, obj2,...))* - The function returns an array of
> the same type as the input argument where all duplicate values have been
> removed.
> Example:
> > SELECT array_distinct(array('b', 'd', 'd', 'a')) FROM src LIMIT 1;
> ['a', 'b', 'c']
--
This message was sent by Atlassian Jira
(v8.20.10#820010)