Github user maropu commented on a diff in the pull request:
https://github.com/apache/spark/pull/21944#discussion_r207160479
--- Diff: sql/core/src/main/scala/org/apache/spark/sql/Dataset.scala ---
@@ -1367,6 +1367,22 @@ class Dataset[T] private[sql](
}: _*)
}
+ /**
+ * Casts all the values of the current Dataset following the types of a
specific StructType.
+ * This method works also with nested structTypes.
+ *
+ * @group typedrel
+ * @since 2.4.0
+ */
+ def castBySchema(schema: StructType): DataFrame = {
+
assert(schema.fields.map(_.name).toList.sameElements(this.schema.fields.map(_.name).toList),
+ "schema should have the same fields as the original schema")
+
+ selectExpr(schema.map(
--- End diff --
-1 (I think it is a pretty sensitive issue to add a new api in
`Dataset`....)
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]