[GitHub] [spark] EnricoMi commented on a diff in pull request #37407: [SPARK-39876][SQL] Add UNPIVOT to SQL syntax

GitBox Mon, 19 Sep 2022 23:18:15 -0700


EnricoMi commented on code in PR #37407:
URL: https://github.com/apache/spark/pull/37407#discussion_r974925706



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/plans/logical/basicLogicalOperators.scala:
##########
@@ -1374,32 +1374,104 @@ case class Pivot(
   override protected def withNewChildInternal(newChild: LogicalPlan): Pivot = 
copy(child = newChild)
 }
 
+/**
+ * Expression for [[Unpivot]] for one unpivot value column (one or more 
expressions)
+ * and an optional alias. This node itself is not evaluable and resolvable.
+ * Only its children are to be resolved.
+ *
+ * @param exprs expressions to unpivot
+ * @param alias optional alias
+ */
+case class UnpivotExpr(exprs: Seq[NamedExpression], alias: Option[String]) 
extends Unevaluable {
+  override val children: Seq[NamedExpression] = exprs
+  override def dataType: DataType = throw new UnresolvedException("dataType")
+  override def nullable: Boolean = throw new UnresolvedException("nullable")
+  // override lazy val resolved = false
+
+  override protected def withNewChildrenInternal(
+      newChildren: IndexedSeq[Expression]): Expression = {
+    // turn expressions into named expressions
+    copy(exprs = newChildren.map {
+      case ne: NamedExpression => ne
+      case e: Expression => UnresolvedAlias(e)
+    })
+  }
+}
+
 /**
  * A constructor for creating an Unpivot, which will later be converted to an 
[[Expand]]
  * during the query analysis.
  *
- * An empty values array will be replaced during analysis with all resolved 
outputs of child except
+ * Either ids or values array must be set. The ids array can be empty,
+ * the values array must not be empty if not None.
+ *
+ * A None ids array will be replaced during analysis with all resolved outputs 
of child except
+ * the values. This expansion allows to easily select all non-value columns as 
id columns.
+ *
+ * A None values array will be replaced during analysis with all resolved 
outputs of child except
  * the ids. This expansion allows to easily unpivot all non-id columns.
  *
  * @see `org.apache.spark.sql.catalyst.analysis.Analyzer.ResolveUnpivot`
  *
- * The type of the value column is derived from all value columns during 
analysis once all values
- * are resolved. All values' types have to be compatible, otherwise the result 
value column cannot
- * be assigned the individual values and an AnalysisException is thrown.
+ * Multiple columns can be unpivoted in one row by providing multiple value 
column names
+ * and the same number of unpivot value expressions:
+ * {{{
+ *   // one-dimensional value columns
+ *   Unpivot(
+ *     Some(Seq("id")),
+ *     Some(Seq(
+ *       (Seq("val1"), None),
+ *       (Seq("val2"), None)
+ *     )),
+ *     "var",
+ *     Seq("val")
+ *   )
+ *
+ *   // two-dimensional value columns
+ *   Unpivot(
+ *     Some(Seq("id")),
+ *     Some(Seq(
+ *       (Seq("val1.1", "val1.2"), None),
+ *       (Seq("val2.1", "val2.2"), None)
+ *     )),
+ *     "var",
+ *     Seq("val1", "val2")
+ *   )
+ * }}}
+ *
+ * The variable column will contain the name of the unpivot value while the 
value columns contain
+ * the unpivot values. Multi-dimensional unpivot values can be given `aliases`:
+ * }}}
+ *   // two-dimensional value columns with aliases
+ *   Unpivot(
+ *     Some(Seq("id")),
+ *     Some(Seq(
+ *       (Seq("val1.1", "val1.2"), Some("val1")),
+ *       (Seq("val2.1", "val2.2"), Some("val2"))
+ *     )),
+ *     "var",
+ *     Seq("val1", "val2")
+ *   )
+ * }}}
+ *
+ * All "value" columns must share a least common data type. Unless they are 
the same data type,
+ * all "value" columns are cast to the nearest common data type. For instance,
+ * types `IntegerType` and `LongType` are cast to `LongType`, while 
`IntegerType` and `StringType`
+ * do not have a common data type and `unpivot` fails with an 
`AnalysisException`.
  *
  * @see 
`org.apache.spark.sql.catalyst.analysis.TypeCoercionBase.UnpivotCoercion`
  *
  * @param ids                Id columns
- * @param values             Value columns to unpivot
+ * @param values             Value column sets to unpivot with optional aliases
  * @param variableColumnName Name of the variable column
- * @param valueColumnName    Name of the value column
+ * @param valueColumnNames   Names of the value columns
  * @param child              Child operator
  */
 case class Unpivot(
-    ids: Seq[NamedExpression],
-    values: Seq[NamedExpression],
+    ids: Option[Seq[NamedExpression]],
+    values: Option[Seq[UnpivotExpr]],
     variableColumnName: String,
-    valueColumnName: String,
+    valueColumnNames: Seq[String],
     child: LogicalPlan) extends UnaryNode {
   override lazy val resolved = false  // Unpivot will be replaced after being 
resolved.

Review Comment:
   done



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

[GitHub] [spark] EnricoMi commented on a diff in pull request #37407: [SPARK-39876][SQL] Add UNPIVOT to SQL syntax

Reply via email to