okumin commented on code in PR #4090:
URL: https://github.com/apache/hive/pull/4090#discussion_r1224999059
##########
ql/src/test/org/apache/hadoop/hive/ql/udf/generic/TestGenericUDFArrayExcept.java:
##########
@@ -75,27 +80,41 @@ public void testPrimitive() throws HiveException {
i6 = new FloatWritable(1.1f);
i7 = new FloatWritable(2.28f);
i8 = new FloatWritable(2.20f);
- List<Object> inputListf = new ArrayList<>();
- inputListf.add(i1);
- inputListf.add(i2);
- inputListf.add(i3);
- inputListf.add(i4);
-
- runAndVerify(new ArrayList<>(inputListf), asList(i5, i6, i7, i8),
asList(i3, i4));
+ List<Object> inputFloatList = new ArrayList<>();
+ inputFloatList.add(i1);
+ inputFloatList.add(i2);
+ inputFloatList.add(i3);
+ inputFloatList.add(i4);
- runAndVerify(new
ArrayList<>(inputListf),inputList,asList(i1,i2,i3,i4)); // Int & float arrays
+ udf.initialize(new ObjectInspector[] { floatObjectInspector,
floatObjectInspector });
+ runAndVerify(new ArrayList<>(inputFloatList), asList(i5, i6, i7, i8),
asList(i3, i4));
Object s1 = new Text("1");
Object s2 = new Text("2");
Object s3 = new Text("4");
Object s4 = new Text("5");
- List<Object> inputLists = new ArrayList<>();
- inputLists.add(s1);
- inputLists.add(s2);
- inputLists.add(s3);
- inputLists.add(s4);
+ List<Object> inputStringList = new ArrayList<>();
+ inputStringList.add(s1);
+ inputStringList.add(s2);
+ inputStringList.add(s3);
+ inputStringList.add(s4);
- runAndVerify(new
ArrayList<>(inputListf),inputLists,asList(i1,i2,i3,i4)); // float and string
arrays
+ udf.initialize(new ObjectInspector[] { stringObjectInspector,
stringObjectInspector });
+ runAndVerify(inputStringList,asList(s1,s3),asList(s2,s4));
+ // Empty array output
+ runAndVerify(inputStringList,inputStringList,asList());
+ runAndVerify(inputStringList,asList(),inputStringList);
+ // Empty input arrays
+ runAndVerify(asList(),asList(),asList());
Review Comment:
Great test cases!
##########
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java:
##########
@@ -31,29 +34,47 @@
*/
@Description(name = "array_except", value = "_FUNC_(array1, array2) - Returns
an array of the elements in array1 but not in array2.", extended =
"Example:\n" + " > SELECT _FUNC_(array(1, 2, 3,4), array(2,3)) FROM src
LIMIT 1;\n"
- + " [1,4]") public class GenericUDFArrayExcept extends
AbstractGenericUDFArrayBase {
+ + " [1,4]")
+public class GenericUDFArrayExcept extends AbstractGenericUDFArrayBase {
static final int ARRAY2_IDX = 1;
private static final String FUNC_NAME = "ARRAY_EXCEPT";
+ static final String ERROR_NOT_COMPARABLE = "Input arrays are not comparable
to use ARRAY_EXCEPT udf";
+ private transient ListObjectInspector array2OI;
+ private transient ObjectInspector arrayElementOI;
+ private transient ObjectInspector array2ElementOI;
public GenericUDFArrayExcept() {
super(FUNC_NAME, 2, 2, ObjectInspector.Category.LIST);
}
@Override public ObjectInspector initialize(ObjectInspector[] arguments)
throws UDFArgumentException {
ObjectInspector defaultOI = super.initialize(arguments);
+ array2OI = (ListObjectInspector) arguments[ARRAY2_IDX];
Review Comment:
NITs:
- This line should be after the following `checkArgCategory` because it
validates the type and `checkArgCategory` throws a kind message
##########
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java:
##########
@@ -31,29 +34,47 @@
*/
@Description(name = "array_except", value = "_FUNC_(array1, array2) - Returns
an array of the elements in array1 but not in array2.", extended =
"Example:\n" + " > SELECT _FUNC_(array(1, 2, 3,4), array(2,3)) FROM src
LIMIT 1;\n"
- + " [1,4]") public class GenericUDFArrayExcept extends
AbstractGenericUDFArrayBase {
+ + " [1,4]")
+public class GenericUDFArrayExcept extends AbstractGenericUDFArrayBase {
static final int ARRAY2_IDX = 1;
private static final String FUNC_NAME = "ARRAY_EXCEPT";
+ static final String ERROR_NOT_COMPARABLE = "Input arrays are not comparable
to use ARRAY_EXCEPT udf";
+ private transient ListObjectInspector array2OI;
+ private transient ObjectInspector arrayElementOI;
+ private transient ObjectInspector array2ElementOI;
public GenericUDFArrayExcept() {
super(FUNC_NAME, 2, 2, ObjectInspector.Category.LIST);
}
@Override public ObjectInspector initialize(ObjectInspector[] arguments)
throws UDFArgumentException {
ObjectInspector defaultOI = super.initialize(arguments);
+ array2OI = (ListObjectInspector) arguments[ARRAY2_IDX];
checkArgCategory(arguments, ARRAY2_IDX, ObjectInspector.Category.LIST,
FUNC_NAME,
org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); //Array1
is already getting validated in Parent class
+ arrayElementOI = arrayOI.getListElementObjectInspector();
+ array2ElementOI = array2OI.getListElementObjectInspector();
Review Comment:
NIT: `array2OI` can be a local variable, or we can simply do
`ObjectInspectorUtils.compareTypes(arrayOI, arguments[ARRAY2_IDX])` because it
seems to recursively check types of LIST, MAP, etc.
##########
ql/src/java/org/apache/hadoop/hive/ql/udf/generic/GenericUDFArrayExcept.java:
##########
@@ -31,29 +34,47 @@
*/
@Description(name = "array_except", value = "_FUNC_(array1, array2) - Returns
an array of the elements in array1 but not in array2.", extended =
"Example:\n" + " > SELECT _FUNC_(array(1, 2, 3,4), array(2,3)) FROM src
LIMIT 1;\n"
- + " [1,4]") public class GenericUDFArrayExcept extends
AbstractGenericUDFArrayBase {
+ + " [1,4]")
+public class GenericUDFArrayExcept extends AbstractGenericUDFArrayBase {
static final int ARRAY2_IDX = 1;
private static final String FUNC_NAME = "ARRAY_EXCEPT";
+ static final String ERROR_NOT_COMPARABLE = "Input arrays are not comparable
to use ARRAY_EXCEPT udf";
+ private transient ListObjectInspector array2OI;
+ private transient ObjectInspector arrayElementOI;
+ private transient ObjectInspector array2ElementOI;
public GenericUDFArrayExcept() {
super(FUNC_NAME, 2, 2, ObjectInspector.Category.LIST);
}
@Override public ObjectInspector initialize(ObjectInspector[] arguments)
throws UDFArgumentException {
ObjectInspector defaultOI = super.initialize(arguments);
+ array2OI = (ListObjectInspector) arguments[ARRAY2_IDX];
checkArgCategory(arguments, ARRAY2_IDX, ObjectInspector.Category.LIST,
FUNC_NAME,
org.apache.hadoop.hive.serde.serdeConstants.LIST_TYPE_NAME); //Array1
is already getting validated in Parent class
+ arrayElementOI = arrayOI.getListElementObjectInspector();
+ array2ElementOI = array2OI.getListElementObjectInspector();
+ if (!ObjectInspectorUtils.compareTypes(arrayElementOI, array2ElementOI)) {
// check if elements of arrays are comparable
+ throw new UDFArgumentTypeException(1, ERROR_NOT_COMPARABLE);
+ }
return defaultOI;
}
@Override public Object evaluate(DeferredObject[] arguments) throws
HiveException {
Object array = arguments[ARRAY_IDX].get();
- if (array == null || arrayOI.getListLength(array) <= 0) {
+ Object array2 = arguments[ARRAY2_IDX].get();
+ if (array == null) {
+ return null;
+ }
+
+ if (array2 == null) {
return null;
}
List<?> retArray3 = ((ListObjectInspector)
argumentOIs[ARRAY_IDX]).getList(array);
- retArray3.removeAll(((ListObjectInspector)
argumentOIs[ARRAY2_IDX]).getList(arguments[ARRAY2_IDX].get()));
- return retArray3.stream().distinct().map(o ->
converter.convert(o)).collect(Collectors.toList());
+ List inputArrayCopy = new ArrayList<>();
+ inputArrayCopy.addAll(retArray3);
+ inputArrayCopy.removeAll(((ListObjectInspector)
argumentOIs[ARRAY2_IDX]).getList(arguments[ARRAY2_IDX].get()));
+ return inputArrayCopy.stream().distinct().map(o ->
converter.convert(o)).collect(Collectors.toList());
Review Comment:
We got robustness. Thanks!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]