kiszk commented on a change in pull request #30243:
URL: https://github.com/apache/spark/pull/30243#discussion_r528061382
##########
File path:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
##########
@@ -3957,3 +3957,194 @@ case class ArrayExcept(left: Expression, right:
Expression) extends ArrayBinaryL
override def prettyName: String = "array_except"
}
+
+/**
+ * Checks if the array (left) has the array (right)
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(array1, array2) - Returns true if the array1 contains the
array2.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(array(1, 2, 3), array(2));
+ true
+ """,
+ group = "array_funcs",
+ since = "3.1.0")
+case class ArrayContainsArray(left: Expression, right: Expression)
+ extends BinaryArrayExpressionWithImplicitCast with ArraySetLike with
NullIntolerant {
+
+ override def dataType: DataType = BooleanType
+
+ override def et: DataType = elementType
+
+ override def dt: DataType = dataType
+
+ override def checkInputDataTypes(): TypeCheckResult = {
+ val typeCheckResult = super.checkInputDataTypes()
+ if (typeCheckResult.isSuccess) {
+ TypeUtils.checkForOrderingExpr(et, s"function $prettyName")
+ } else {
+ typeCheckResult
+ }
+ }
+
+ @transient lazy val evalContains: (ArrayData, ArrayData) => Boolean = {
+ if (TypeUtils.typeWithProperEquals(elementType)) {
+ (array1, array2) =>
+ if (array2.numElements() == 0) {
+ true
+ } else if (array1.numElements() == 0) {
+ false
+ } else {
+ val hs = new OpenHashSet[Any]
+ var result = true
+ var foundNullElement = false
+ var i = 0
+ while (i < array1.numElements()) {
+ if (array1.isNullAt(i) && !foundNullElement) {
+ foundNullElement = true
Review comment:
I think that this is a bit efficient since it avoids to store `null` in
the set.
```suggestion
if (array1.isNullAt(i)) {
if (!foundNullElement) {
foundNullElement = true
}
```
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]