Github user ueshin commented on a diff in the pull request:
https://github.com/apache/spark/pull/21102#discussion_r207765490
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
---
@@ -3965,6 +4034,242 @@ object ArrayUnion {
}
}
+/**
+ * Returns an array of the elements in the intersect of x and y, without
duplicates
+ */
+@ExpressionDescription(
+ usage = """
+ _FUNC_(array1, array2) - Returns an array of the elements in the
intersection of array1 and
+ array2, without duplicates.
+ """,
+ examples = """
+ Examples:Fun
+ > SELECT _FUNC_(array(1, 2, 3), array(1, 3, 5));
+ array(1, 3)
+ """,
+ since = "2.4.0")
+case class ArrayIntersect(left: Expression, right: Expression) extends
ArraySetLike
+ with ComplexTypeMergingExpression {
+ override def dataType: DataType = {
+ dataTypeCheck
+ ArrayType(elementType,
+ left.dataType.asInstanceOf[ArrayType].containsNull &&
+ right.dataType.asInstanceOf[ArrayType].containsNull)
+ }
+
+ @transient lazy val evalIntersect: (ArrayData, ArrayData) => ArrayData =
{
+ if (elementTypeSupportEquals) {
+ (array1, array2) =>
+ val hs = new OpenHashSet[Any]
--- End diff --
How about shortcutting to return an empty array when we find one of the two
is empty?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]