HyukjinKwon commented on code in PR #42418:
URL: https://github.com/apache/spark/pull/42418#discussion_r1289369842
##########
connector/connect/client/jvm/src/main/scala/org/apache/spark/sql/Dataset.scala:
##########
@@ -2724,6 +2725,38 @@ class Dataset[T] private[sql] (
flatMap(UdfUtils.flatMapFuncToScalaFunc(f))(encoder)
}
+ /**
+ * (Scala-specific) Returns a new Dataset where each row has been expanded
to zero or more rows
+ * by the provided function. This is similar to a `LATERAL VIEW` in HiveQL.
The columns of the
+ * input row are implicitly joined with each row that is output by the
function.
+ *
+ * Given that this is deprecated, as an alternative, you can explode columns
either using
+ * `functions.explode()` or `flatMap()`. The following example uses these
alternatives to count
+ * the number of books that contain a given word:
+ *
+ * {{{
+ * case class Book(title: String, words: String)
+ * val ds: Dataset[Book]
+ *
+ * val allWords = ds.select($"title", explode(split($"words", "
")).as("word"))
+ *
+ * val bookCountPerWord =
allWords.groupBy("word").agg(count_distinct("title"))
+ * }}}
+ *
+ * Using `flatMap()` this can similarly be exploded as:
+ *
+ * {{{
+ * ds.flatMap(_.words.split(" "))
+ * }}}
+ *
+ * @group untypedrel
+ * @since 3.5.0
+ */
+ @deprecated("use flatMap() or select() with functions.explode() instead",
"2.0.0")
+ def explode[A <: Product: TypeTag](input: Column*)(f: Row =>
TraversableOnce[A]): DataFrame = {
Review Comment:
Do we need `String` signature too?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]