uros-b commented on code in PR #56697:
URL: https://github.com/apache/spark/pull/56697#discussion_r3460550254


##########
sql/core/src/test/scala/org/apache/spark/sql/execution/datasources/xml/XmlInferSchemaSuite.scala:
##########
@@ -646,6 +648,50 @@ class XmlInferSchemaSuite
     assert(xmlDF.schema === expectedSchema)
     checkAnswer(xmlDF, expectedAns)
   }
+
+  test("TIME type inference") {
+    val xmlString = Seq("""<ROW><t>13:31:24.123456</t></ROW>""")
+    val df = readData(xmlString)
+    assert(df.schema === new StructType().add("t", 
TimeType(TimeType.DEFAULT_PRECISION)))
+    checkAnswer(df, Row(java.time.LocalTime.of(13, 31, 24, 123456000)))
+  }
+
+  test("TIME type inference - disabled when timeType.enabled is false") {
+    withSQLConf(SQLConf.TIME_TYPE_ENABLED.key -> "false") {
+      val xmlString = Seq("""<ROW><t>13:31:24</t></ROW>""")
+      val df = readData(xmlString)
+      // Falls through to date/timestamp or string
+      assert(df.schema.fields.head.dataType != 
TimeType(TimeType.DEFAULT_PRECISION))
+    }
+  }
+
+  test("TIME type inference - negative cases") {
+    // Date strings should not infer as TIME
+    val xmlDate = Seq("""<ROW><t>2024-01-15</t></ROW>""")
+    val dfDate = readData(xmlDate)
+    assert(dfDate.schema.fields.head.dataType === DateType)
+
+    // Timestamp strings should not infer as TIME
+    val xmlTs = Seq("""<ROW><t>2024-01-15T13:31:24</t></ROW>""")
+    val dfTs = readData(xmlTs)
+    assert(dfTs.schema.fields.head.dataType != 
TimeType(TimeType.DEFAULT_PRECISION))
+  }
+
+  test("TIME type inference - cross-row merge") {
+    // TIME + TIME -> TIME
+    val xmlTime = Seq(
+      """<ROW><t>13:31:24</t></ROW>""",
+      """<ROW><t>09:15:00.123</t></ROW>""")
+    val dfTime = readData(xmlTime)
+    assert(dfTime.schema === new StructType().add("t", 
TimeType(TimeType.DEFAULT_PRECISION)))
+
+    // TIME + non-time string -> StringType
+    val xmlMixed = Seq(
+      """<ROW><t>13:31:24</t></ROW>""",
+      """<ROW><t>not-a-time</t></ROW>""")
+    val dfMixed = readData(xmlMixed)
+    assert(dfMixed.schema.fields.head.dataType === StringType)
+  }
 }

Review Comment:
   Given that SPARK-57572 added a compatibleType unit test for TIME + DateType 
→ StringType in CSVInferSchemaSuite / JsonInferSchemaSuite, please consider 
adding the same for XmlInferSchema.compatibleType for consistency.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to