Github user DylanGuedes commented on a diff in the pull request:
https://github.com/apache/spark/pull/21045#discussion_r188283661
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/collectionOperations.scala
---
@@ -90,6 +90,110 @@ case class MapKeys(child: Expression)
override def prettyName: String = "map_keys"
}
+@ExpressionDescription(
+ usage = """_FUNC_(a1, a2) - Returns a merged array matching N-th element
of first
+ array with the N-th element of second.""",
+ examples = """
+ Examples:
+ > SELECT _FUNC_(array(1, 2, 3), array(2, 3, 4));
+ [[1, 2], [2, 3], [3, 4]]
+ """,
+ since = "2.4.0")
+case class Zip(left: Expression, right: Expression)
+ extends BinaryExpression with ExpectsInputTypes {
+
+ override def inputTypes: Seq[AbstractDataType] = Seq(ArrayType,
ArrayType)
+
+ override def dataType: DataType = ArrayType(StructType(
+ StructField("_1", left.dataType.asInstanceOf[ArrayType].elementType,
true) ::
+ StructField("_2", right.dataType.asInstanceOf[ArrayType].elementType,
true) ::
+ Nil))
+
+ override def prettyName: String = "zip"
+
+ override def doGenCode(ctx: CodegenContext, ev: ExprCode): ExprCode = {
+ nullSafeCodeGen(ctx, ev, (arr1, arr2) => {
+ val genericArrayData = classOf[GenericArrayData].getName
+ val genericInternalRow = classOf[GenericInternalRow].getName
+
+ val i = ctx.freshName("i")
+ val values = ctx.freshName("values")
+ val len1 = ctx.freshName("len1")
+ val len2 = ctx.freshName("len2")
+ val pair = ctx.freshName("pair")
+ val getValue1 = CodeGenerator.getValue(
+ arr1, left.dataType.asInstanceOf[ArrayType].elementType, i)
+ val getValue2 = CodeGenerator.getValue(
+ arr2, right.dataType.asInstanceOf[ArrayType].elementType, i)
+
+ s"""
+ |int $len1 = $arr1.numElements();
+ |int $len2 = $arr2.numElements();
+ |Object[] $values;
+ |Object[] $pair;
+ |if ($len1 > $len2) {
+ | $values = new Object[$len1];
+ | for (int $i = 0; $i < $len1; $i ++) {
+ | $pair = new Object[2];
+ | $pair[0] = $getValue1;
+ | if ($i >= $len2) {
+ | $pair[1] = null;
+ | } else {
+ | $pair[1] = $getValue2;
+ | }
+ | $values[$i] = new $genericInternalRow($pair);
+ | }
+ |} else {
+ | $values = new Object[$len2];
+ | for (int $i = 0; $i < $len2; $i ++) {
+ | $pair = new Object[2];
+ | $pair[1] = $getValue2;
+ | if ($i >= $len1) {
+ | $pair[0] = null;
+ | } else {
+ | $pair[0] = $getValue1;
+ | }
+ | $values[$i] = new $genericInternalRow($pair);
+ | }
+ |}
+ |${ev.value} = new $genericArrayData($values);
+ """.stripMargin
+ })
+ }
+
+ def extendWithNull(a1: Array[AnyRef], a2: Array[AnyRef]):
+ (Array[AnyRef], Array[AnyRef]) = {
+ val lens = (a1.length, a2.length)
+
+ var arr1 = a1
+ var arr2 = a2
+
+ val diff = lens._1 - lens._2
+ if (lens._1 > lens._2) {
+ arr2 = a2 ++ Array.fill(diff)(null)
+ }
+ if (lens._1 < lens._2) {
+ arr1 = a1 ++ Array.fill(-diff)(null)
+ }
+
+ (arr1, arr2)
+ }
+
+ override def nullSafeEval(a1: Any, a2: Any): Any = {
--- End diff --
Nice, I figured out after seeing the coalesce function, worked great.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]