(spark) branch master updated: [SPARK-52695][SQL] User Defined Type write support for xml file format

yao Mon, 07 Jul 2025 19:03:58 -0700

This is an automated email from the ASF dual-hosted git repository.

yao pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new cda84dd096b9 [SPARK-52695][SQL] User Defined Type write support for 
xml file format
cda84dd096b9 is described below

commit cda84dd096b936111d55d4f2077f4954f0797351
Author: Kent Yao <y...@apache.org>
AuthorDate: Tue Jul 8 10:03:38 2025 +0800

    [SPARK-52695][SQL] User Defined Type write support for xml file format
    
    ### What changes were proposed in this pull request?
    
    This PR adds UDT write support for the XML file format
    
    ### Why are the changes needed?
    
    IllegalArgumentException is being thrown while writing UDT values
    
    ### Does this PR introduce _any_ user-facing change?
    
    Yes, if the udt's sqlType is compatible with XML file format, it becomes 
writable
    
    ### How was this patch tested?
    new test
    
    ### Was this patch authored or co-authored using generative AI tooling?
    no
    
    Closes #51388 from yaooqinn/SPARK-52695.
    
    Authored-by: Kent Yao <y...@apache.org>
    Signed-off-by: Kent Yao <y...@apache.org>
---
 .../spark/sql/catalyst/xml/StaxXmlGenerator.scala  |  2 ++
 .../sql/execution/datasources/xml/XmlSuite.scala   | 28 ++++++++++++++++++++--
 2 files changed, 28 insertions(+), 2 deletions(-)

diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
index 9e4e25ba1746..c29df303333a 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/StaxXmlGenerator.scala
@@ -228,6 +228,8 @@ class StaxXmlGenerator(
         writeChild(field.name, field.dataType, value)
       }
 
+    case (u: UserDefinedType[_], v) => writeElement(u.sqlType, v, options)
+
     case (_, _) =>
       throw new SparkIllegalArgumentException(
         errorClass = "_LEGACY_ERROR_TEMP_3238",
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
index be08ce5bd7db..9ab1b2c157e1 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
@@ -20,7 +20,7 @@ import java.io.{EOFException, File, FileOutputStream, 
StringWriter}
 import java.nio.charset.{StandardCharsets, UnsupportedCharsetException}
 import java.nio.file.{Files, Path, Paths}
 import java.sql.{Date, Timestamp}
-import java.time.{Instant, LocalDateTime}
+import java.time.{Instant, LocalDateTime, Year}
 import java.util.TimeZone
 import java.util.concurrent.ConcurrentHashMap
 import javax.xml.stream.{XMLOutputFactory, XMLStreamException}
@@ -38,7 +38,8 @@ import 
org.apache.hadoop.io.compress.{CompressionCodecFactory, GzipCodec}
 
 import org.apache.spark.{DebugFilesystem, SparkException}
 import org.apache.spark.io.ZStdCompressionCodec
-import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Encoders, 
QueryTest, Row, SaveMode}
+import org.apache.spark.sql.{AnalysisException, DataFrame, Dataset, Encoders, 
QueryTest, Row, SaveMode, YearUDT}
+import org.apache.spark.sql.catalyst.encoders.AgnosticEncoders.UDTEncoder
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.util.TypeUtils.ordinalNumber
 import org.apache.spark.sql.catalyst.xml.{IndentingXMLStreamWriter, XmlOptions}
@@ -3490,6 +3491,29 @@ class XmlSuite
       }
     }
   }
+
+  test("SPARK-52695: UDT write support for xml file format") {
+    val udt = new YearUDT()
+    val encoder = UDTEncoder(udt, classOf[YearUDT])
+    withTempDir { dir =>
+      val path = dir.getCanonicalPath
+      // Write a dataset of Year objects
+      val df1 = spark.range(2018, 2025).map(y => Year.of(y.toInt))(encoder)
+
+      df1
+      .write
+      .mode(SaveMode.Overwrite)
+      .option("rowTag", "ROW")
+      .xml(path)
+
+      val df = spark.read
+        .option("rowTag", "ROW")
+        .xml(path)
+
+      assert(df.schema === StructType(Seq(StructField("value", LongType))))
+      checkAnswer(df, spark.range(2018, 2025).toDF("value"))
+    }
+  }
 }
 
 // Mock file system that checks the number of open files


---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org
For additional commands, e-mail: commits-h...@spark.apache.org

(spark) branch master updated: [SPARK-52695][SQL] User Defined Type write support for xml file format

Reply via email to