Github user mgaido91 commented on a diff in the pull request:
https://github.com/apache/spark/pull/21986#discussion_r207816072
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
---
@@ -210,3 +221,66 @@ case class ArrayTransform(
override def prettyName: String = "transform"
}
+
+/**
+ * Filters entries in a map using the provided function.
+ */
+@ExpressionDescription(
+usage = "_FUNC_(expr, func) - Filters entries in a map using the
function.",
+examples = """
+ Examples:
+ > SELECT _FUNC_(map(1, 0, 2, 2, 3, -1), (k, v) -> k > v);
+ [1 -> 0, 3 -> -1]
+ """,
+since = "2.4.0")
+case class MapFilter(
+ input: Expression,
+ function: Expression)
+ extends MapBasedUnaryHigherOrderFunction with CodegenFallback {
+
+ @transient val (keyType, valueType, valueContainsNull) = input.dataType
match {
+ case MapType(kType, vType, vContainsNull) => (kType, vType,
vContainsNull)
+ case _ =>
+ val MapType(kType, vType, vContainsNull) =
MapType.defaultConcreteType
+ (kType, vType, vContainsNull)
+ }
+
+ @transient lazy val (keyVar, valueVar) = {
+ val args = function.asInstanceOf[LambdaFunction].arguments
+ (args.head.asInstanceOf[NamedLambdaVariable],
args.tail.head.asInstanceOf[NamedLambdaVariable])
+ }
+
+ override def bind(f: (Expression, Seq[(DataType, Boolean)]) =>
LambdaFunction): MapFilter = {
+ function match {
+ case LambdaFunction(_, _, _) =>
+ copy(function = f(function, (keyType, false) :: (valueType,
valueContainsNull) :: Nil))
+ }
+ }
+
+ override def nullable: Boolean = input.nullable
+
+ override def eval(input: InternalRow): Any = {
+ val m = this.input.eval(input).asInstanceOf[MapData]
+ if (m == null) {
+ null
+ } else {
+ val retKeys = new mutable.ListBuffer[Any]
+ val retValues = new mutable.ListBuffer[Any]
--- End diff --
But I just checked that in `ArrayFilter` you initialized it with the number
of incoming elements. So i think there is no difference in terms of
performance, as using an upper value for the number of output elements we are
sure no copy is performed.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]