HyukjinKwon commented on code in PR #42462:
URL: https://github.com/apache/spark/pull/42462#discussion_r1300853023


##########
sql/core/src/main/scala/org/apache/spark/sql/functions.scala:
##########
@@ -7314,6 +7314,103 @@ object functions {
    */
   def to_csv(e: Column): Column = to_csv(e, Map.empty[String, String].asJava)
 
+  // scalastyle:off line.size.limit
+
+  /**
+   * Parses a column containing a XML string into the data type corresponding 
to the specified schema.
+   * Returns `null`, in the case of an unparseable string.
+   *
+   * @param e       a string column containing XML data.
+   * @param schema  the schema to use when parsing the XML string
+   * @param options options to control how the XML is parsed. accepts the same 
options and the
+   *                XML data source.
+   *                See
+   *                <a href=
+   *                
"https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option";>
+   *                Data Source Option</a> in the version you use.
+   * @group collection_funcs
+   * @since
+   */
+  // scalastyle:on line.size.limit
+  def from_xml(e: Column, schema: StructType, options: Map[String, String]): 
Column = withExpr {
+    XmlToStructs(CharVarcharUtils.failIfHasCharVarchar(schema), options, 
e.expr)
+  }
+
+  // scalastyle:off line.size.limit
+
+  /**
+   * (Java-specific) Parses a column containing a XML string into a 
`StructType`
+   * with the specified schema. Returns `null`, in the case of an unparseable 
string.
+   *
+   * @param e       a string column containing XML data.
+   * @param schema  the schema to use when parsing the XML string
+   * @param options options to control how the XML is parsed. accepts the same 
options and the
+   *                XML data source.
+   *                See
+   *                <a href=
+   *                
"https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option";>
+   *                Data Source Option</a> in the version you use.
+   * @group collection_funcs
+   * @since
+   */
+  // scalastyle:on line.size.limit
+  def from_xml(e: Column, schema: Column, options: java.util.Map[String, 
String]): Column = {
+    withExpr(new XmlToStructs(e.expr, schema.expr, options.asScala.toMap))
+  }
+
+  /**
+   * Parses a column containing a XML string into the data type
+   * corresponding to the specified schema.
+   * Returns `null`, in the case of an unparseable string.
+   *
+   * @param e       a string column containing XML data.
+   * @param schema  the schema to use when parsing the XML string
+
+   * @group collection_funcs
+   * @since
+   */
+  def from_xml(e: Column, schema: StructType): Column =
+    from_xml(e, schema, Map.empty[String, String])
+
+  /**
+   * Parses a XML string and infers its schema in DDL format.
+   *
+   * @param xml a XML string.
+   * @group collection_funcs
+   * @since 4.0.0
+   */
+  def schema_of_xml(xml: String): Column = schema_of_xml(lit(xml))
+
+  /**
+   * Parses a XML string and infers its schema in DDL format.
+   *
+   * @param xml a foldable string column containing a XML string.
+   * @group collection_funcs
+   * @since 4.0.0
+   */
+  def schema_of_xml(xml: Column): Column = withExpr(new SchemaOfXml(xml.expr))
+
+  // scalastyle:off line.size.limit
+
+  /**
+   * Parses a XML string and infers its schema in DDL format using options.
+   *
+   * @param xml    a foldable string column containing XML data.
+   * @param options options to control how the xml is parsed. accepts the same 
options and the
+   *                XML data source.
+   *                See
+   *                <a href=
+   *                
"https://spark.apache.org/docs/latest/sql-data-sources-xml.html#data-source-option";>
+   *                Data Source Option</a> in the version you use.
+   * @return a column with string literal containing schema in DDL format.
+   * @group collection_funcs
+   * @since 4.0.0
+   */
+  // scalastyle:on line.size.limit
+  def schema_of_xml(xml: Column, options: java.util.Map[String, String]): 
Column = {

Review Comment:
   Actually this is same as `schema_of_json`. I suggested to only have Java map 
one only for now .. to avoid having too many overloaded versions.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to