[GitHub] spark pull request #22031: [SPARK-23932][SQL] Higher order function zip_with

techaddict Wed, 15 Aug 2018 18:01:58 -0700

Github user techaddict commented on a diff in the pull request:

    https://github.com/apache/spark/pull/22031#discussion_r210452329
  
    --- Diff: 
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/higherOrderFunctions.scala
 ---
    @@ -442,3 +442,91 @@ case class ArrayAggregate(
     
       override def prettyName: String = "aggregate"
     }
    +
    +// scalastyle:off line.size.limit
    +@ExpressionDescription(
    +  usage = "_FUNC_(left, right, func) - Merges the two given arrays, 
element-wise, into a single array using function. If one array is shorter, 
nulls are appended at the end to match the length of the longer array, before 
applying function.",
    +  examples = """
    +    Examples:
    +      > SELECT _FUNC_(array(1, 2, 3), array('a', 'b', 'c'), (x, y) -> (y, 
x));
    +       array(('a', 1), ('b', 3), ('c', 5))
    +      > SELECT _FUNC_(array(1, 2), array(3, 4), (x, y) -> x + y));
    +       array(4, 6)
    +      > SELECT _FUNC_(array('a', 'b', 'c'), array('d', 'e', 'f'), (x, y) 
-> concat(x, y));
    +       array('ad', 'be', 'cf')
    +  """,
    +  since = "2.4.0")
    +// scalastyle:on line.size.limit
    +case class ArraysZipWith(
    +    left: Expression,
    +    right: Expression,
    +    function: Expression)
    +  extends HigherOrderFunction with CodegenFallback with ExpectsInputTypes {
    +
    +  override def inputs: Seq[Expression] = List(left, right)
    +
    +  override def functions: Seq[Expression] = List(function)
    +
    +  def expectingFunctionType: AbstractDataType = AnyDataType
    +  @transient lazy val functionForEval: Expression = functionsForEval.head
    +
    +  override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType, 
ArrayType, expectingFunctionType)
    +
    +  override def nullable: Boolean = inputs.exists(_.nullable)
    +
    +  override def dataType: ArrayType = ArrayType(function.dataType, 
function.nullable)
    +
    +  override def bind(f: (Expression, Seq[(DataType, Boolean)]) => 
LambdaFunction): ArraysZipWith = {
    +    val (leftElementType, leftContainsNull) = left.dataType match {
    +      case ArrayType(elementType, containsNull) => (elementType, 
containsNull)
    +      case _ =>
    +        val ArrayType(elementType, containsNull) = 
ArrayType.defaultConcreteType
    +        (elementType, containsNull)
    +    }
    +    val (rightElementType, rightContainsNull) = right.dataType match {
    +      case ArrayType(elementType, containsNull) => (elementType, 
containsNull)
    +      case _ =>
    +        val ArrayType(elementType, containsNull) = 
ArrayType.defaultConcreteType
    +        (elementType, containsNull)
    +    }
    +    copy(function = f(function,
    +      (leftElementType, leftContainsNull) :: (rightElementType, 
rightContainsNull) :: Nil))
    --- End diff --
    
    @mn-mikke @ueshin "both arrays must be the same length" was how zip_with in 
presto used to work, they've moved to appending nulls and process regardless.



---

---------------------------------------------------------------------
To unsubscribe, e-mail: reviews-unsubscr...@spark.apache.org
For additional commands, e-mail: reviews-h...@spark.apache.org

[GitHub] spark pull request #22031: [SPARK-23932][SQL] Higher order function zip_with

Reply via email to