Github user viirya commented on a diff in the pull request:
https://github.com/apache/spark/pull/21240#discussion_r186291554
--- Diff:
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/generators.scala
---
@@ -222,6 +222,51 @@ case class Stack(children: Seq[Expression]) extends
Generator {
}
}
+/**
+ * Replicate the row N times. N is specified as the first argument to the
function.
+ * {{{
+ * SELECT replicate_rows(2, "val1", "val2") ->
+ * 2 val1 val2
+ * 2 val1 val2
+ * }}}
+ */
+@ExpressionDescription(
+usage = "_FUNC_(n, expr1, ..., exprk) - Replicates `n`, `expr1`, ...,
`exprk` into `n` rows.",
+examples = """
+ Examples:
+ > SELECT _FUNC_(2, "val1", "val2");
+ 2 val1 val2
+ 2 val1 val2
+ """)
+case class ReplicateRows(children: Seq[Expression]) extends Generator with
CodegenFallback {
+ override def checkInputDataTypes(): TypeCheckResult = {
+ if (children.length < 2) {
+ TypeCheckResult.TypeCheckFailure(s"$prettyName requires at least 2
arguments.")
+ } else if (children.head.dataType != LongType) {
+ TypeCheckResult.TypeCheckFailure("The number of rows must be a
positive long value.")
+ } else {
+ TypeCheckResult.TypeCheckSuccess
+ }
+ }
+
+ override def elementSchema: StructType =
+ StructType(children.zipWithIndex.map {
+ case (e, index) => StructField(s"col$index", e.dataType)
+ })
+
+ override def eval(input: InternalRow): TraversableOnce[InternalRow] = {
+ val numRows = children.head.eval(input).asInstanceOf[Long]
+ val values = children.map(_.eval(input)).toArray
--- End diff --
`children.head` seems getting evaluated twice here, can we avoid it?
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]