Re: [PR] [SPARK-45844] Implement case-insensitivity for XML [spark]

via GitHub Sun, 12 Nov 2023 18:44:44 -0800


beliefer commented on code in PR #43722:
URL: https://github.com/apache/spark/pull/43722#discussion_r1390572193



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala:
##########
@@ -50,7 +52,8 @@ private[sql] class XmlInferSchema(options: XmlOptions) 
extends Serializable with
     options.zoneId,
     options.locale,
     legacyFormat = FAST_DATE_FORMAT,
-    isParsing = true)
+    isParsing = true
+  )

Review Comment:
   Please revert the change.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala:
##########
@@ -97,20 +100,21 @@ private[sql] class XmlInferSchema(options: XmlOptions) 
extends Serializable with
    *   2. Merge types by choosing the lowest type necessary to cover equal keys
    *   3. Replace any remaining null fields with string, the top type
    */
-  def infer(xml: RDD[String], caseSensitive: Boolean): StructType = {
+  def infer(xml: RDD[String]): StructType = {
     val schemaData = if (options.samplingRatio < 1.0) {
       xml.sample(withReplacement = false, options.samplingRatio, 1)
     } else {
       xml
     }
     // perform schema inference on each row and merge afterwards
-    val rootType = schemaData.mapPartitions { iter =>
-      val xsdSchema = 
Option(options.rowValidationXSDPath).map(ValidatorUtil.getSchema)
+    val rootType = schemaData
+      .mapPartitions { iter =>
+        val xsdSchema = 
Option(options.rowValidationXSDPath).map(ValidatorUtil.getSchema)
 
-      iter.flatMap { xml =>
-        infer(xml, caseSensitive, xsdSchema)
+        iter.flatMap { xml =>

Review Comment:
   Please revert this line.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala:
##########
@@ -97,20 +100,21 @@ private[sql] class XmlInferSchema(options: XmlOptions) 
extends Serializable with
    *   2. Merge types by choosing the lowest type necessary to cover equal keys
    *   3. Replace any remaining null fields with string, the top type
    */
-  def infer(xml: RDD[String], caseSensitive: Boolean): StructType = {
+  def infer(xml: RDD[String]): StructType = {
     val schemaData = if (options.samplingRatio < 1.0) {
       xml.sample(withReplacement = false, options.samplingRatio, 1)
     } else {
       xml
     }
     // perform schema inference on each row and merge afterwards
-    val rootType = schemaData.mapPartitions { iter =>
-      val xsdSchema = 
Option(options.rowValidationXSDPath).map(ValidatorUtil.getSchema)
+    val rootType = schemaData
+      .mapPartitions { iter =>
+        val xsdSchema = 
Option(options.rowValidationXSDPath).map(ValidatorUtil.getSchema)

Review Comment:
   Please revert the three lines.



##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/xml/XmlInferSchema.scala:
##########
@@ -97,20 +100,21 @@ private[sql] class XmlInferSchema(options: XmlOptions) 
extends Serializable with
    *   2. Merge types by choosing the lowest type necessary to cover equal keys
    *   3. Replace any remaining null fields with string, the top type
    */
-  def infer(xml: RDD[String], caseSensitive: Boolean): StructType = {
+  def infer(xml: RDD[String]): StructType = {
     val schemaData = if (options.samplingRatio < 1.0) {
       xml.sample(withReplacement = false, options.samplingRatio, 1)
     } else {
       xml
     }
     // perform schema inference on each row and merge afterwards
-    val rootType = schemaData.mapPartitions { iter =>
-      val xsdSchema = 
Option(options.rowValidationXSDPath).map(ValidatorUtil.getSchema)
+    val rootType = schemaData
+      .mapPartitions { iter =>
+        val xsdSchema = 
Option(options.rowValidationXSDPath).map(ValidatorUtil.getSchema)
 
-      iter.flatMap { xml =>
-        infer(xml, caseSensitive, xsdSchema)
+        iter.flatMap { xml =>
+          infer(xml, xsdSchema)

Review Comment:
   two indent.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Re: [PR] [SPARK-45844] Implement case-insensitivity for XML [spark]

Reply via email to