Github user cloud-fan commented on a diff in the pull request: https://github.com/apache/spark/pull/21028#discussion_r186354007 --- Diff: sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala --- @@ -530,6 +560,155 @@ case class ArrayContains(left: Expression, right: Expression) override def prettyName: String = "array_contains" } +/** + * Checks if the two arrays contain at least one common element. + */ +// scalastyle:off line.size.limit +@ExpressionDescription( + usage = "_FUNC_(a1, a2) - Returns true if a1 contains at least an element present also in a2. If the arrays have no common element and either of them contains a null element null is returned, false otherwise.", + examples = """ + Examples: + > SELECT _FUNC_(array(1, 2, 3), array(3, 4, 5)); + true + """, since = "2.4.0") +// scalastyle:off line.size.limit +case class ArraysOverlap(left: Expression, right: Expression) + extends BinaryArrayExpressionWithImplicitCast { + + override def dataType: DataType = BooleanType + + override def nullable: Boolean = { + left.nullable || right.nullable || left.dataType.asInstanceOf[ArrayType].containsNull || + right.dataType.asInstanceOf[ArrayType].containsNull + } + + override def nullSafeEval(a1: Any, a2: Any): Any = { + var hasNull = false + val arr1 = a1.asInstanceOf[ArrayData] + val arr2 = a2.asInstanceOf[ArrayData] + val (biggestArr, smallestArr) = if (arr1.numElements() > arr2.numElements()) { + (arr1, arr2) + } else { + (arr2, arr1) + } + if (smallestArr.numElements() > 0) { + val smallestSet = new mutable.HashSet[Any] + smallestArr.foreach(elementType, (_, v) => + if (v == null) { + hasNull = true + } else { + smallestSet += v + }) + biggestArr.foreach(elementType, (_, v1) => + if (v1 == null) { + hasNull = true + } else if (smallestSet.contains(v1)) { + return true + } + ) + } else if (containsNull(biggestArr, right.dataType.asInstanceOf[ArrayType])) { + hasNull = true + } + if (hasNull) { + null + } else { + false + } + } + + def containsNull(arr: ArrayData, dt: ArrayType): Boolean = { + if (dt.containsNull) { + arr.foreach(elementType, (_, v) => --- End diff -- ``` var i = 0 var hasNull = false while (i < arr.numElements && !hasNull) { hasNull = arr.isNullAt(i) i += 1 } hasNull ```
--- --------------------------------------------------------------------- To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org For additional commands, e-mail: reviews-h...@spark.apache.org