Github user hvanhovell commented on a diff in the pull request:
https://github.com/apache/spark/pull/22013#discussion_r208136330
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
---
@@ -365,3 +365,69 @@ case class ArrayAggregate(
override def prettyName: String = "aggregate"
}
+
+/**
+ * Transform Keys in a map using the transform_keys function.
+ */
+@ExpressionDescription(
+ usage = "_FUNC_(expr, func) - Transforms elements in a map using the
function.",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k,v) -> k + 1);
+ map(array(2, 3, 4), array(1, 2, 3))
+ > SELECT _FUNC_(map(array(1, 2, 3), array(1, 2, 3), (k, v) -> k + v);
+ map(array(2, 4, 6), array(1, 2, 3))
+ """,
+ since = "2.4.0")
+case class TransformKeys(
+ input: Expression,
+ function: Expression)
+ extends ArrayBasedHigherOrderFunction with CodegenFallback {
+
+ override def nullable: Boolean = input.nullable
+
+ override def dataType: DataType = {
+ val valueType = input.dataType.asInstanceOf[MapType].valueType
+ MapType(function.dataType, valueType, input.nullable)
+ }
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(MapType,
expectingFunctionType)
+
+ override def bind(f: (Expression, Seq[(DataType, Boolean)]) =>
LambdaFunction):
+ TransformKeys = {
+ val (keyElementType, valueElementType, containsNull) = input.dataType
match {
+ case MapType(keyType, valueType, containsNullValue) =>
+ (keyType, valueType, containsNullValue)
+ case _ =>
+ val MapType(keyType, valueType, containsNullValue) =
MapType.defaultConcreteType
+ (keyType, valueType, containsNullValue)
+ }
+ copy(function = f(function, (keyElementType, false) ::
(valueElementType, containsNull) :: Nil))
+ }
+
+ @transient lazy val (keyVar, valueVar) = {
+ val LambdaFunction(
+ _, (keyVar: NamedLambdaVariable) :: (valueVar: NamedLambdaVariable) ::
Nil, _) = function
+ (keyVar, valueVar)
+ }
+
+ override def eval(input: InternalRow): Any = {
+ val arr = this.input.eval(input).asInstanceOf[MapData]
+ if (arr == null) {
+ null
+ } else {
+ val f = functionForEval
+ val resultKeys = new GenericArrayData(new
Array[Any](arr.numElements))
+ var i = 0
+ while (i < arr.numElements) {
+ keyVar.value.set(arr.keyArray().get(i, keyVar.dataType))
+ valueVar.value.set(arr.valueArray().get(i, valueVar.dataType))
+ resultKeys.update(i, f.eval(input))
--- End diff --
This assumes that the transformation will return a unique key right? If it
doesn't you'll break the map semantics. For example: `map_key(some_map, (k, v)
-> 0)`
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]