(spark) branch master updated: [SPARK-45562][SQL] XML: Add SQL error class for missing rowTag option

gurwls223 Fri, 10 Nov 2023 00:44:57 -0800

This is an automated email from the ASF dual-hosted git repository.

gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git



The following commit(s) were added to refs/heads/master by this push:
     new f5b1b8306cf [SPARK-45562][SQL] XML: Add SQL error class for missing 
rowTag option
f5b1b8306cf is described below

commit f5b1b8306cf13218f5ff79944aaa9c0b4e74fda4
Author: Sandip Agarwala <[email protected]>
AuthorDate: Fri Nov 10 17:44:39 2023 +0900

    [SPARK-45562][SQL] XML: Add SQL error class for missing rowTag option
    
    ### What changes were proposed in this pull request?
    rowTag option is required for reading XML files. This PR adds a SQL error 
class for missing rowTag option.
    
    ### Why are the changes needed?
    rowTag option is required for reading XML files. This PR adds a SQL error 
class for missing rowTag option.
    
    ### Does this PR introduce _any_ user-facing change?
    No
    
    ### How was this patch tested?
    Updated the unit test to check for error message.
    
    ### Was this patch authored or co-authored using generative AI tooling?
    No
    
    Closes #43710 from sandip-db/xml-rowTagRequiredError.
    
    Authored-by: Sandip Agarwala <[email protected]>
    Signed-off-by: Hyukjin Kwon <[email protected]>
---
 common/utils/src/main/resources/error/error-classes.json      |  6 ++++++
 docs/sql-error-conditions.md                                  |  6 ++++++
 .../scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala  |  8 ++++++--
 .../org/apache/spark/sql/errors/QueryCompilationErrors.scala  |  7 +++++++
 .../apache/spark/sql/execution/datasources/xml/XmlSuite.scala | 11 ++++++++---
 5 files changed, 33 insertions(+), 5 deletions(-)

diff --git a/common/utils/src/main/resources/error/error-classes.json 
b/common/utils/src/main/resources/error/error-classes.json
index 26f6c0240af..3b7a3a6006e 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -3911,6 +3911,12 @@
     },
     "sqlState" : "42605"
   },
+  "XML_ROW_TAG_MISSING" : {
+    "message" : [
+      "<rowTag> option is required for reading files in XML format."
+    ],
+    "sqlState" : "42000"
+  },
   "_LEGACY_ERROR_TEMP_0001" : {
     "message" : [
       "Invalid InsertIntoContext."
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index 2cb433b19fa..a811019e0a5 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -2369,3 +2369,9 @@ The operation `<operation>` requires a `<requiredType>`. 
But `<objectName>` is a
 The `<functionName>` requires `<expectedNum>` parameters but the actual number 
is `<actualNum>`.
 
 For more details see 
[WRONG_NUM_ARGS](sql-error-conditions-wrong-num-args-error-class.html)
+
+### XML_ROW_TAG_MISSING
+
+[SQLSTATE: 
42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+`<rowTag>` option is required for reading files in XML format.
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala
index aac6eec21c6..8f6cdbf360e 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala
@@ -24,7 +24,7 @@ import javax.xml.stream.XMLInputFactory
 import org.apache.spark.internal.Logging
 import org.apache.spark.sql.catalyst.{DataSourceOptions, FileSourceOptions}
 import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap, 
CompressionCodecs, DateFormatter, DateTimeUtils, ParseMode, PermissiveMode}
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors, 
QueryExecutionErrors}
 import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
 
 /**
@@ -66,7 +66,11 @@ private[sql] class XmlOptions(
 
   val compressionCodec = 
parameters.get(COMPRESSION).map(CompressionCodecs.getCodecClassName)
   val rowTagOpt = parameters.get(XmlOptions.ROW_TAG).map(_.trim)
-  require(!rowTagRequired || rowTagOpt.isDefined, s"'${XmlOptions.ROW_TAG}' 
option is required.")
+
+  if (rowTagRequired && rowTagOpt.isEmpty) {
+    throw QueryCompilationErrors.xmlRowTagRequiredError(XmlOptions.ROW_TAG)
+  }
+
   val rowTag = rowTagOpt.getOrElse(XmlOptions.DEFAULT_ROW_TAG)
   require(rowTag.nonEmpty, s"'$ROW_TAG' option should not be an empty string.")
   require(!rowTag.startsWith("<") && !rowTag.endsWith(">"),
diff --git 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 0c5dcb1ead0..e772b3497ac 100644
--- 
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++ 
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -3817,4 +3817,11 @@ private[sql] object QueryCompilationErrors extends 
QueryErrorsBase with Compilat
       errorClass = "FOUND_MULTIPLE_DATA_SOURCES",
       messageParameters = Map("provider" -> provider))
   }
+
+  def xmlRowTagRequiredError(optionName: String): Throwable = {
+    new AnalysisException(
+      errorClass = "XML_ROW_TAG_MISSING",
+      messageParameters = Map("rowTag" -> toSQLId(optionName))
+    )
+  }
 }
diff --git 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
index 2d4cd2f403c..21122676c46 100644
--- 
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
+++ 
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.{AnalysisException, Dataset, 
Encoders, QueryTest, Ro
 import org.apache.spark.sql.catalyst.util._
 import org.apache.spark.sql.catalyst.xml.XmlOptions
 import org.apache.spark.sql.catalyst.xml.XmlOptions._
+import org.apache.spark.sql.errors.QueryCompilationErrors
 import org.apache.spark.sql.execution.datasources.xml.TestUtils._
 import org.apache.spark.sql.functions._
 import org.apache.spark.sql.test.SharedSparkSession
@@ -1782,17 +1783,21 @@ class XmlSuite extends QueryTest with 
SharedSparkSession {
   test("Test XML Options Error Messages") {
     def checkXmlOptionErrorMessage(
       parameters: Map[String, String] = Map.empty,
-      msg: String): Unit = {
-      val e = intercept[IllegalArgumentException] {
+      msg: String,
+      exception: Throwable = new IllegalArgumentException().getCause): Unit = {
+      val e = intercept[Exception] {
         spark.read
           .options(parameters)
           .xml(getTestResourcePath(resDir + "ages.xml"))
           .collect()
       }
+      assert(e.getCause === exception)
       assert(e.getMessage.contains(msg))
     }
 
-    checkXmlOptionErrorMessage(Map.empty, "'rowTag' option is required.")
+    checkXmlOptionErrorMessage(Map.empty,
+      "[XML_ROW_TAG_MISSING] `rowTag` option is required for reading files in 
XML format.",
+      
QueryCompilationErrors.xmlRowTagRequiredError(XmlOptions.ROW_TAG).getCause)
     checkXmlOptionErrorMessage(Map("rowTag" -> ""),
       "'rowTag' option should not be an empty string.")
     checkXmlOptionErrorMessage(Map("rowTag" -> " "),


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(spark) branch master updated: [SPARK-45562][SQL] XML: Add SQL error class for missing rowTag option

Reply via email to