uros-b commented on code in PR #56681:
URL: https://github.com/apache/spark/pull/56681#discussion_r3461511442


##########
sql/core/src/test/scala/org/apache/spark/sql/XmlFunctionsSuite.scala:
##########
@@ -79,6 +79,38 @@ class XmlFunctionsSuite extends SharedSparkSession {
     }
   }
 
+  test("from_xml variant output honors the parse mode") {
+    // A raw control char (code 5, ENQ) in XML text is illegal in XML 1.0 and 
the parser rejects
+    // it. Before the fix the variant path bypassed FailureSafeParser, so the 
parse `mode` had no
+    // effect and a malformed record aborted the whole query.
+    val badRec = "<Event>ab" + 5.toChar + "cd</Event>"
+    val goodRec = "<Event><a>1</a></Event>"
+    val df = Seq(badRec, goodRec).toDF("value")
+    df.createOrReplaceTempView("from_xml_variant_mode")
+
+    // Exercise both the interpreted path and whole-stage codegen.
+    Seq("true", "false").foreach { wholeStage =>
+      withSQLConf(SQLConf.WHOLESTAGE_CODEGEN_ENABLED.key -> wholeStage) {
+        // PERMISSIVE: the good record parses, the malformed record is rescued 
to null. Compare
+        // to_json of the variant so the expected value is concrete; key off 
`value` since row
+        // order is not guaranteed.
+        checkAnswer(
+          spark.sql("SELECT value, to_json(from_xml(value, 'variant', " +
+            "map('rowTag','Event','mode','PERMISSIVE'))) AS json FROM 
from_xml_variant_mode"),
+          Seq(Row(goodRec, "{\"a\":1}"), Row(badRec, null)))
+
+        // FAILFAST: the malformed record aborts the query.
+        val e = intercept[SparkException] {
+          spark.sql("SELECT from_xml(value, 'variant', " +
+            "map('rowTag','Event','mode','FAILFAST')) AS d FROM 
from_xml_variant_mode").collect()
+        }
+        assert(e.getMessage.toLowerCase(Locale.ROOT).contains("malformed"),

Review Comment:
   Nit: XmlFunctionsSuite FAILFAST assertion uses 
getMessage.toLowerCase.contains("malformed") rather than the project 
checkError(..., MALFORMED_RECORD_IN_PARSING, ...) idiom. Switching to 
checkError guards against error-message wording drift.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to