This is an automated email from the ASF dual-hosted git repository.
gurwls223 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new f5b1b8306cf [SPARK-45562][SQL] XML: Add SQL error class for missing
rowTag option
f5b1b8306cf is described below
commit f5b1b8306cf13218f5ff79944aaa9c0b4e74fda4
Author: Sandip Agarwala <[email protected]>
AuthorDate: Fri Nov 10 17:44:39 2023 +0900
[SPARK-45562][SQL] XML: Add SQL error class for missing rowTag option
### What changes were proposed in this pull request?
rowTag option is required for reading XML files. This PR adds a SQL error
class for missing rowTag option.
### Why are the changes needed?
rowTag option is required for reading XML files. This PR adds a SQL error
class for missing rowTag option.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Updated the unit test to check for error message.
### Was this patch authored or co-authored using generative AI tooling?
No
Closes #43710 from sandip-db/xml-rowTagRequiredError.
Authored-by: Sandip Agarwala <[email protected]>
Signed-off-by: Hyukjin Kwon <[email protected]>
---
common/utils/src/main/resources/error/error-classes.json | 6 ++++++
docs/sql-error-conditions.md | 6 ++++++
.../scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala | 8 ++++++--
.../org/apache/spark/sql/errors/QueryCompilationErrors.scala | 7 +++++++
.../apache/spark/sql/execution/datasources/xml/XmlSuite.scala | 11 ++++++++---
5 files changed, 33 insertions(+), 5 deletions(-)
diff --git a/common/utils/src/main/resources/error/error-classes.json
b/common/utils/src/main/resources/error/error-classes.json
index 26f6c0240af..3b7a3a6006e 100644
--- a/common/utils/src/main/resources/error/error-classes.json
+++ b/common/utils/src/main/resources/error/error-classes.json
@@ -3911,6 +3911,12 @@
},
"sqlState" : "42605"
},
+ "XML_ROW_TAG_MISSING" : {
+ "message" : [
+ "<rowTag> option is required for reading files in XML format."
+ ],
+ "sqlState" : "42000"
+ },
"_LEGACY_ERROR_TEMP_0001" : {
"message" : [
"Invalid InsertIntoContext."
diff --git a/docs/sql-error-conditions.md b/docs/sql-error-conditions.md
index 2cb433b19fa..a811019e0a5 100644
--- a/docs/sql-error-conditions.md
+++ b/docs/sql-error-conditions.md
@@ -2369,3 +2369,9 @@ The operation `<operation>` requires a `<requiredType>`.
But `<objectName>` is a
The `<functionName>` requires `<expectedNum>` parameters but the actual number
is `<actualNum>`.
For more details see
[WRONG_NUM_ARGS](sql-error-conditions-wrong-num-args-error-class.html)
+
+### XML_ROW_TAG_MISSING
+
+[SQLSTATE:
42000](sql-error-conditions-sqlstates.html#class-42-syntax-error-or-access-rule-violation)
+
+`<rowTag>` option is required for reading files in XML format.
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala
index aac6eec21c6..8f6cdbf360e 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala
@@ -24,7 +24,7 @@ import javax.xml.stream.XMLInputFactory
import org.apache.spark.internal.Logging
import org.apache.spark.sql.catalyst.{DataSourceOptions, FileSourceOptions}
import org.apache.spark.sql.catalyst.util.{CaseInsensitiveMap,
CompressionCodecs, DateFormatter, DateTimeUtils, ParseMode, PermissiveMode}
-import org.apache.spark.sql.errors.QueryExecutionErrors
+import org.apache.spark.sql.errors.{QueryCompilationErrors,
QueryExecutionErrors}
import org.apache.spark.sql.internal.{LegacyBehaviorPolicy, SQLConf}
/**
@@ -66,7 +66,11 @@ private[sql] class XmlOptions(
val compressionCodec =
parameters.get(COMPRESSION).map(CompressionCodecs.getCodecClassName)
val rowTagOpt = parameters.get(XmlOptions.ROW_TAG).map(_.trim)
- require(!rowTagRequired || rowTagOpt.isDefined, s"'${XmlOptions.ROW_TAG}'
option is required.")
+
+ if (rowTagRequired && rowTagOpt.isEmpty) {
+ throw QueryCompilationErrors.xmlRowTagRequiredError(XmlOptions.ROW_TAG)
+ }
+
val rowTag = rowTagOpt.getOrElse(XmlOptions.DEFAULT_ROW_TAG)
require(rowTag.nonEmpty, s"'$ROW_TAG' option should not be an empty string.")
require(!rowTag.startsWith("<") && !rowTag.endsWith(">"),
diff --git
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
index 0c5dcb1ead0..e772b3497ac 100644
---
a/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
+++
b/sql/catalyst/src/main/scala/org/apache/spark/sql/errors/QueryCompilationErrors.scala
@@ -3817,4 +3817,11 @@ private[sql] object QueryCompilationErrors extends
QueryErrorsBase with Compilat
errorClass = "FOUND_MULTIPLE_DATA_SOURCES",
messageParameters = Map("provider" -> provider))
}
+
+ def xmlRowTagRequiredError(optionName: String): Throwable = {
+ new AnalysisException(
+ errorClass = "XML_ROW_TAG_MISSING",
+ messageParameters = Map("rowTag" -> toSQLId(optionName))
+ )
+ }
}
diff --git
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
index 2d4cd2f403c..21122676c46 100644
---
a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
+++
b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala
@@ -36,6 +36,7 @@ import org.apache.spark.sql.{AnalysisException, Dataset,
Encoders, QueryTest, Ro
import org.apache.spark.sql.catalyst.util._
import org.apache.spark.sql.catalyst.xml.XmlOptions
import org.apache.spark.sql.catalyst.xml.XmlOptions._
+import org.apache.spark.sql.errors.QueryCompilationErrors
import org.apache.spark.sql.execution.datasources.xml.TestUtils._
import org.apache.spark.sql.functions._
import org.apache.spark.sql.test.SharedSparkSession
@@ -1782,17 +1783,21 @@ class XmlSuite extends QueryTest with
SharedSparkSession {
test("Test XML Options Error Messages") {
def checkXmlOptionErrorMessage(
parameters: Map[String, String] = Map.empty,
- msg: String): Unit = {
- val e = intercept[IllegalArgumentException] {
+ msg: String,
+ exception: Throwable = new IllegalArgumentException().getCause): Unit = {
+ val e = intercept[Exception] {
spark.read
.options(parameters)
.xml(getTestResourcePath(resDir + "ages.xml"))
.collect()
}
+ assert(e.getCause === exception)
assert(e.getMessage.contains(msg))
}
- checkXmlOptionErrorMessage(Map.empty, "'rowTag' option is required.")
+ checkXmlOptionErrorMessage(Map.empty,
+ "[XML_ROW_TAG_MISSING] `rowTag` option is required for reading files in
XML format.",
+
QueryCompilationErrors.xmlRowTagRequiredError(XmlOptions.ROW_TAG).getCause)
checkXmlOptionErrorMessage(Map("rowTag" -> ""),
"'rowTag' option should not be an empty string.")
checkXmlOptionErrorMessage(Map("rowTag" -> " "),
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]