MaxGekk commented on code in PR #56778:
URL: https://github.com/apache/spark/pull/56778#discussion_r3476079760
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/util/ArrowUtilsSuite.scala:
##########
@@ -154,6 +154,47 @@ class ArrowUtilsSuite extends SparkFunSuite {
ArrowUtils.toArrowSchema(schemaWithMeta, null, true, false)) ===
schemaWithMeta)
}
+ test("time") {
+ // Arrow's Time type has no precision field, so TIME(p) precision is
preserved via field
+ // metadata; the Arrow type itself stays Time(NANOSECOND, 64).
+ Seq(0, 3, 6, 9).foreach { p =>
+ val schema = new StructType().add("value", TimeType(p))
+ val arrowSchema = ArrowUtils.toArrowSchema(schema, null, true, false)
+ val fieldType =
arrowSchema.findField("value").getType.asInstanceOf[ArrowType.Time]
+ assert(fieldType.getUnit === TimeUnit.NANOSECOND)
+ assert(fieldType.getBitWidth === 8 * 8)
+ assert(ArrowUtils.fromArrowSchema(arrowSchema) === schema)
+ }
+
+ // Fallback: a nanosecond Arrow time without precision metadata maps to
canonical TIME(6).
+ def timeField: Field = new Field(
+ "value",
+ new FieldType(true, new ArrowType.Time(TimeUnit.NANOSECOND, 8 * 8),
null, null),
+ java.util.Collections.emptyList[Field]())
+ assert(ArrowUtils.fromArrowField(timeField) ===
TimeType(TimeType.MICROS_PRECISION))
+
+ // Fallback also covers a present-but-invalid precision key (out of [0, 9]
or non-numeric):
+ // the value is unusable, so the type maps to the canonical TIME(6) just
like the no-metadata
+ // case.
+ def timeFieldWithPrecision(precision: String): Field = new Field(
+ "value",
+ new FieldType(
+ true,
+ new ArrowType.Time(TimeUnit.NANOSECOND, 8 * 8),
+ null,
+ java.util.Collections.singletonMap("SPARK::time::precision",
precision)),
+ java.util.Collections.emptyList[Field]())
+ val micros = TimeType(TimeType.MICROS_PRECISION)
+ assert(ArrowUtils.fromArrowField(timeFieldWithPrecision("10")) === micros)
+ assert(ArrowUtils.fromArrowField(timeFieldWithPrecision("x")) === micros)
+
+ // The precision metadata key does not leak into the reconstructed column
Metadata.
+ val md = new MetadataBuilder().putString("city", "beijing").build()
+ val schemaWithMeta = new StructType().add("value", TimeType(3), nullable =
true, md)
+ assert(ArrowUtils.fromArrowSchema(
+ ArrowUtils.toArrowSchema(schemaWithMeta, null, true, false)) ===
schemaWithMeta)
+ }
+
Review Comment:
Good call — added both in `d17f3d7`: `p=7` to the round-trip loop (`Seq(0,
3, 6, 7, 9)`, covering the nanosecond boundary) and a below-min `"-1"` case
alongside the existing `"10"`/`"x"` invalid-key checks. Thanks for the review!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]