uros-b commented on code in PR #56633:
URL: https://github.com/apache/spark/pull/56633#discussion_r3447523886
##########
connector/avro/src/test/scala/org/apache/spark/sql/avro/AvroSuite.scala:
##########
@@ -3523,6 +3523,55 @@ class AvroV2Suite extends AvroSuite with
ExplainSuiteHelper {
}
}
+ test("SPARK-57581: TIME is written as unit-correct time-micros for external
readers") {
+ // Expected microseconds-since-midnight for TIME'12:34:56.123456'
truncated to each precision.
+ val baseSeconds = (12 * 3600 + 34 * 60 + 56).toLong
+ val expectedMicros = Map(
+ 0 -> (baseSeconds * 1000000L + 0L),
+ 1 -> (baseSeconds * 1000000L + 100000L),
+ 2 -> (baseSeconds * 1000000L + 120000L),
+ 3 -> (baseSeconds * 1000000L + 123000L),
+ 4 -> (baseSeconds * 1000000L + 123400L),
+ 5 -> (baseSeconds * 1000000L + 123450L),
+ 6 -> (baseSeconds * 1000000L + 123456L))
+ // Valid micros-of-day range; values mislabeled as micros but holding
nanos would exceed this.
+ val microsPerDay = 24L * 3600L * 1000000L
+
+ (0 to 6).foreach { p =>
+ withTempPath { dir =>
+ spark.sql(s"SELECT CAST(TIME'12:34:56.123456' AS TIME($p)) as t")
+ .write.format("avro").save(dir.toString)
+
+ val avroFile = dir.listFiles()
+ .filter(f => f.isFile && f.getName.endsWith("avro"))
+ .head
+ val reader = new DataFileReader[GenericRecord](
+ avroFile, new GenericDatumReader[GenericRecord]())
+ try {
+ // The Avro field must be annotated with the time-micros logical
type.
+ val fieldSchema = reader.getSchema.getField("t").schema()
+ val timeSchema = if (fieldSchema.getType == Type.UNION) {
+ fieldSchema.getTypes.asScala.find(_.getType == Type.LONG).get
+ } else {
+ fieldSchema
+ }
+ assert(timeSchema.getLogicalType.getName == "time-micros",
+ s"precision $p should be written as time-micros")
+
+ assert(reader.hasNext)
+ val record = reader.next()
+ val stored = record.get("t").asInstanceOf[Long]
+ assert(stored == expectedMicros(p),
+ s"precision $p should store micros-of-day ${expectedMicros(p)},
but was $stored")
+ assert(stored >= 0 && stored < microsPerDay,
+ s"precision $p stored value $stored is outside the valid
micros-of-day range")
+ } finally {
+ reader.close()
+ }
+ }
+ }
+ }
Review Comment:
Currently, the test only covers the write path. The new test decodes the
file as raw Avro and asserts the stored micros — great for the serializer. But
the deserializer change (micros -> nanos) is the more user-visible improvement
for external files (Hive/Trino/fastavro-written time-micros with no
spark.sql.catalyst.type prop). Spark-to-Spark round-trips would still pass even
if read and write were both wrong in a compensating way. Consider adding a test
that writes a plain time-micros value via a raw Avro writer (no catalyst prop)
and reads it back through Spark, asserting the resulting TIME equals the
expected micros-of-day. That directly pins the read-path semantics.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]