This is an automated email from the ASF dual-hosted git repository. yao pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new cda84dd096b9 [SPARK-52695][SQL] User Defined Type write support for xml file format cda84dd096b9 is described below commit cda84dd096b936111d55d4f2077f4954f0797351 Author: Kent Yao <y...@apache.org> AuthorDate: Tue Jul 8 10:03:38 2025 +0800 [SPARK-52695][SQL] User Defined Type write support for xml file format ### What changes were proposed in this pull request? This PR adds UDT write support for the XML file format ### Why are the changes needed? IllegalArgumentException is being thrown while writing UDT values ### Does this PR introduce _any_ user-facing change? Yes, if the udt's sqlType is compatible with XML file format, it becomes writable ### How was this patch tested? new test ### Was this patch authored or co-authored using generative AI tooling? no Closes #51388 from yaooqinn/SPARK-52695. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Kent Yao <y...@apache.org> --- .../spark/sql/catalyst/xml/StaxXmlGenerator.scala | 2 ++ .../sql/execution/datasources/xml/XmlSuite.scala | 28 ++++++++++++++++++++-- 2 files changed, 28 insertions(+), 2 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala index 9e4e25ba1746..c29df303333a 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala @@ -228,6 +228,8 @@ class StaxXmlGenerator( writeChild(field.name, field.dataType, value) } + case (u: UserDefinedType[_], v) => writeElement(u.sqlType, v, options) + case (_, _) => throw new SparkIllegalArgumentException( errorClass = "_LEGACY_ERROR_TEMP_3238", diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala index be08ce5bd7db..9ab1b2c157e1 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala @@ -20,7 +20,7 @@ import java.io.{EOFException, File, FileOutputStream, StringWriter} import java.nio.charset.{StandardCharsets, UnsupportedCharsetException} import java.nio.file.{Files, Path, Paths} import java.sql.{Date, Timestamp} -import java.time.{Instant, LocalDateTime} +import java.time.{Instant, LocalDateTime, Year} import java.util.TimeZone import java.util.concurrent.ConcurrentHashMap import javax.xml.stream.{XMLOutputFactory, XMLStreamException} @@ -38,7 +38,8 @@ import org.apache.hadoop.io.compress.{CompressionCodecFactory, GzipCodec} import org.apache.spark.{DebugFilesystem, SparkException} import org.apache.spark.io.ZStdCompressionCodec -import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Encoders, QueryTest, Row, SaveMode} +import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Encoders, QueryTest, Row, SaveMode, YearUDT} +import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.UDTEncoder import org.apache.spark.sql.catalyst.util._ import org.apache.spark.sql.catalyst.util.TypeUtils.ordinalNumber import org.apache.spark.sql.catalyst.xml.{IndentingXMLStreamWriter, XmlOptions} @@ -3490,6 +3491,29 @@ class XmlSuite } } } + + test("SPARK-52695: UDT write support for xml file format") { + val udt = new YearUDT() + val encoder = UDTEncoder(udt, classOf[YearUDT]) + withTempDir { dir => + val path = dir.getCanonicalPath + // Write a dataset of Year objects + val df1 = spark.range(2018, 2025).map(y => Year.of(y.toInt))(encoder) + + df1 + .write + .mode(SaveMode.Overwrite) + .option("rowTag", "ROW") + .xml(path) + + val df = spark.read + .option("rowTag", "ROW") + .xml(path) + + assert(df.schema === StructType(Seq(StructField("value", LongType)))) + checkAnswer(df, spark.range(2018, 2025).toDF("value")) + } + } } // Mock file system that checks the number of open files --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org