This is an automated email from the ASF dual-hosted git repository. gurwls223 pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new 7f056d89164c [SPARK-46382][SQL] XML: Default ignoreSurroundingSpaces to true 7f056d89164c is described below commit 7f056d89164c584ec57e252eb37bdc17a0a2e20c Author: Shujing Yang <shujing.y...@databricks.com> AuthorDate: Tue Jan 9 10:23:48 2024 +0900 [SPARK-46382][SQL] XML: Default ignoreSurroundingSpaces to true ### What changes were proposed in this pull request? Default ignoreSurroundingSpaces to true. ### Why are the changes needed? To handle values interspersed between elements better ### Does this PR introduce _any_ user-facing change? Yes ### How was this patch tested? Unit tests ### Was this patch authored or co-authored using generative AI tooling? No Closes #44629 from shujingyang-db/IGNORE_SURROUNDING_SPACES. Authored-by: Shujing Yang <shujing.y...@databricks.com> Signed-off-by: Hyukjin Kwon <gurwls...@apache.org> --- .../org/apache/spark/sql/catalyst/xml/XmlOptions.scala | 2 +- .../spark/sql/execution/datasources/xml/XmlSuite.scala | 14 +++++++------- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala index 92b156fb8f23..218d56c0f203 100644 --- a/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala +++ b/sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlOptions.scala @@ -95,7 +95,7 @@ class XmlOptions( val nullValue = parameters.getOrElse(NULL_VALUE, XmlOptions.DEFAULT_NULL_VALUE) val columnNameOfCorruptRecord = parameters.getOrElse(COLUMN_NAME_OF_CORRUPT_RECORD, defaultColumnNameOfCorruptRecord) - val ignoreSurroundingSpaces = getBool(IGNORE_SURROUNDING_SPACES, false) + val ignoreSurroundingSpaces = getBool(IGNORE_SURROUNDING_SPACES, true) val parseMode = ParseMode.fromString(parameters.getOrElse(MODE, PermissiveMode.name)) val inferSchema = getBool(INFER_SCHEMA, true) val rowValidationXSDPath = parameters.get(ROW_VALIDATION_XSD_PATH).orNull diff --git a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala index 38734e001367..398706dba3d9 100644 --- a/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala +++ b/sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlSuite.scala @@ -761,7 +761,7 @@ class XmlSuite .collect() assert(results(0) === Row("alice", "35")) - assert(results(1) === Row("bob", " ")) + assert(results(1) === Row("bob", "")) assert(results(2) === Row("coc", "24")) } @@ -847,7 +847,7 @@ class XmlSuite assert(result(0) === Row(Row(null))) assert(result(1) === Row(Row(Row(null, null)))) assert(result(2) === Row(Row(Row("E", null)))) - assert(result(3) === Row(Row(Row("E", " ")))) + assert(result(3) === Row(Row(Row("E", "")))) assert(result(4) === Row(Row(Row("E", "")))) } @@ -1177,8 +1177,8 @@ class XmlSuite .option("inferSchema", true) .xml(getTestResourcePath(resDir + "mixed_children.xml")) val mixedRow = mixedDF.head() - assert(mixedRow.getAs[Row](0) === Row(List(" issue ", " text ignored "), " lorem ")) - assert(mixedRow.getString(1) === " ipsum ") + assert(mixedRow.getAs[Row](0) === Row(List("issue", "text ignored"), "lorem")) + assert(mixedRow.getString(1) === "ipsum") } test("test mixed text and complex element children") { @@ -1186,9 +1186,9 @@ class XmlSuite .option("rowTag", "root") .option("inferSchema", true) .xml(getTestResourcePath(resDir + "mixed_children_2.xml")) - assert(mixedDF.select("foo.bar").head().getString(0) === " lorem ") + assert(mixedDF.select("foo.bar").head().getString(0) === "lorem") assert(mixedDF.select("foo.baz.bing").head().getLong(0) === 2) - assert(mixedDF.select("missing").head().getString(0) === " ipsum ") + assert(mixedDF.select("missing").head().getString(0) === "ipsum") } test("test XSD validation") { @@ -1752,7 +1752,7 @@ class XmlSuite assert(result(1).getAs[String]("_attr") == "attr1" && result(1).getAs[String]("_VALUE") == "value2") // comments aren't included in valueTag - assert(result(2).getAs[String]("_VALUE") == "\n value3\n ") + assert(result(2).getAs[String]("_VALUE") == "value3") } } --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org