EnricoMi commented on code in PR #37407:
URL: https://github.com/apache/spark/pull/37407#discussion_r974925706
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala:
##########
@@ -1374,32 +1374,104 @@ case class Pivot(
override protected def withNewChildInternal(newChild: LogicalPlan): Pivot =
copy(child = newChild)
}
+/**
+ * Expression for [[Unpivot]] for one unpivot value column (one or more
expressions)
+ * and an optional alias. This node itself is not evaluable and resolvable.
+ * Only its children are to be resolved.
+ *
+ * @param exprs expressions to unpivot
+ * @param alias optional alias
+ */
+case class UnpivotExpr(exprs: Seq[NamedExpression], alias: Option[String])
extends Unevaluable {
+ override val children: Seq[NamedExpression] = exprs
+ override def dataType: DataType = throw new UnresolvedException("dataType")
+ override def nullable: Boolean = throw new UnresolvedException("nullable")
+ // override lazy val resolved = false
+
+ override protected def withNewChildrenInternal(
+ newChildren: IndexedSeq[Expression]): Expression = {
+ // turn expressions into named expressions
+ copy(exprs = newChildren.map {
+ case ne: NamedExpression => ne
+ case e: Expression => UnresolvedAlias(e)
+ })
+ }
+}
+
/**
* A constructor for creating an Unpivot, which will later be converted to an
[[Expand]]
* during the query analysis.
*
- * An empty values array will be replaced during analysis with all resolved
outputs of child except
+ * Either ids or values array must be set. The ids array can be empty,
+ * the values array must not be empty if not None.
+ *
+ * A None ids array will be replaced during analysis with all resolved outputs
of child except
+ * the values. This expansion allows to easily select all non-value columns as
id columns.
+ *
+ * A None values array will be replaced during analysis with all resolved
outputs of child except
* the ids. This expansion allows to easily unpivot all non-id columns.
*
* @see `org.apache.spark.sql.catalyst.analysis.Analyzer.ResolveUnpivot`
*
- * The type of the value column is derived from all value columns during
analysis once all values
- * are resolved. All values' types have to be compatible, otherwise the result
value column cannot
- * be assigned the individual values and an AnalysisException is thrown.
+ * Multiple columns can be unpivoted in one row by providing multiple value
column names
+ * and the same number of unpivot value expressions:
+ * {{{
+ * // one-dimensional value columns
+ * Unpivot(
+ * Some(Seq("id")),
+ * Some(Seq(
+ * (Seq("val1"), None),
+ * (Seq("val2"), None)
+ * )),
+ * "var",
+ * Seq("val")
+ * )
+ *
+ * // two-dimensional value columns
+ * Unpivot(
+ * Some(Seq("id")),
+ * Some(Seq(
+ * (Seq("val1.1", "val1.2"), None),
+ * (Seq("val2.1", "val2.2"), None)
+ * )),
+ * "var",
+ * Seq("val1", "val2")
+ * )
+ * }}}
+ *
+ * The variable column will contain the name of the unpivot value while the
value columns contain
+ * the unpivot values. Multi-dimensional unpivot values can be given `aliases`:
+ * }}}
+ * // two-dimensional value columns with aliases
+ * Unpivot(
+ * Some(Seq("id")),
+ * Some(Seq(
+ * (Seq("val1.1", "val1.2"), Some("val1")),
+ * (Seq("val2.1", "val2.2"), Some("val2"))
+ * )),
+ * "var",
+ * Seq("val1", "val2")
+ * )
+ * }}}
+ *
+ * All "value" columns must share a least common data type. Unless they are
the same data type,
+ * all "value" columns are cast to the nearest common data type. For instance,
+ * types `IntegerType` and `LongType` are cast to `LongType`, while
`IntegerType` and `StringType`
+ * do not have a common data type and `unpivot` fails with an
`AnalysisException`.
*
* @see
`org.apache.spark.sql.catalyst.analysis.TypeCoercionBase.UnpivotCoercion`
*
* @param ids Id columns
- * @param values Value columns to unpivot
+ * @param values Value column sets to unpivot with optional aliases
* @param variableColumnName Name of the variable column
- * @param valueColumnName Name of the value column
+ * @param valueColumnNames Names of the value columns
* @param child Child operator
*/
case class Unpivot(
- ids: Seq[NamedExpression],
- values: Seq[NamedExpression],
+ ids: Option[Seq[NamedExpression]],
+ values: Option[Seq[UnpivotExpr]],
variableColumnName: String,
- valueColumnName: String,
+ valueColumnNames: Seq[String],
child: LogicalPlan) extends UnaryNode {
override lazy val resolved = false // Unpivot will be replaced after being
resolved.
Review Comment:
done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]