uros-b commented on code in PR #56634:
URL: https://github.com/apache/spark/pull/56634#discussion_r3447604019
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/json/JsonInferSchemaSuite.scala:
##########
@@ -120,4 +120,43 @@ class JsonInferSchemaSuite extends SparkFunSuite with
SQLHelper {
"""{"a": "2884-06-24T02:45:51.138"}""",
StringType)
}
+
+ test("SPARK-57572: infer TimeType when timeType.enabled is true") {
+ withSQLConf(SQLConf.TIME_TYPE_ENABLED.key -> "true") {
+ checkType(Map("inferTimestamp" -> "true"), """{"a": "12:13:14"}""",
+ TimeType(TimeType.MICROS_PRECISION))
+ checkType(Map("inferTimestamp" -> "true"), """{"a":
"23:59:59.123456"}""",
+ TimeType(TimeType.MICROS_PRECISION))
+ // Negative: date and timestamp strings should NOT infer as TimeType
+ checkType(Map("inferTimestamp" -> "true"), """{"a": "2023-01-01"}""",
TimestampType)
+ checkType(Map("inferTimestamp" -> "true"),
+ """{"a": "2024-06-24T02:45:51.138"}""", TimestampType)
+ }
+ }
+
+ test("SPARK-57572: TimeType not inferred when timeType.enabled is false") {
+ withSQLConf(SQLConf.TIME_TYPE_ENABLED.key -> "false") {
+ // With inferTimestamp, time-only strings fall through to timestamp
(lenient parser)
+ checkType(Map("inferTimestamp" -> "true"), """{"a": "12:13:14"}""",
TimestampType)
+ // Without inferTimestamp, time-only strings become StringType
+ checkType(Map.empty[String, String], """{"a": "12:13:14"}""", StringType)
+ }
+ }
+
+ test("SPARK-57572: TimeType not inferred without inferTimestamp") {
+ withSQLConf(SQLConf.TIME_TYPE_ENABLED.key -> "true") {
+ // inferTimestamp defaults to false; time inference requires it
+ checkType(Map.empty[String, String], """{"a": "12:13:14"}""", StringType)
+ }
+ }
+
+ test("SPARK-57572: TimeType cross-row merge via compatibleType") {
+ withSQLConf(SQLConf.TIME_TYPE_ENABLED.key -> "true") {
+ // Single time value infers as TimeType
+ checkType(Map("inferTimestamp" -> "true"), """{"a": "12:13:14"}""",
+ TimeType(TimeType.MICROS_PRECISION))
+ // Non-time value stays StringType even with timeType enabled
+ checkType(Map("inferTimestamp" -> "true"), """{"a": "not-a-time"}""",
StringType)
+ }
+ }
}
Review Comment:
On another note, we don't have any end-to-end read tests. All tests operate
directly on CSVInferSchema/JsonInferSchema. There's no round-trip test through
spark.read.csv(...) / spark.read.json(...) verifying that the inferred schema
is TimeType and the values parse correctly. Since this is the first time
inference can produce TimeType, a single round-trip test in CSVSuite/JsonSuite
would guard the full inference+parse path.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]