This is an automated email from the ASF dual-hosted git repository.
MaxGekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 4148152e7b85 [SPARK-57034][SQL][TEST] Add TimestampNanosTestUtils and
RandomDataGenerator support for nanosecond timestamps
4148152e7b85 is described below
commit 4148152e7b85ea9288cb10e9ac2d1b571c307b1b
Author: Stevo Mitric <[email protected]>
AuthorDate: Fri May 29 16:19:48 2026 +0200
[SPARK-57034][SQL][TEST] Add TimestampNanosTestUtils and
RandomDataGenerator support for nanosecond timestamps
### What changes were proposed in this pull request?
Adds test infrastructure for nanosecond-capable timestamp types:
- New TimestampNanosTestUtils (parallel to DateTimeTestUtils) with
fixed-value builders, java.time / TimestampNanosVal conversions, a
specialNanosTs edge-case corpus, and a precision-loop helper.
- RandomDataGenerator.forType extended with cases for TimestampNTZNanosType
(to LocalDateTime) and TimestampLTZNanosType (→ Instant). Existing micros cases
unchanged.
- TimestampNanosTestUtilsSuite covering the helpers.
### Why are the changes needed?
Existing nanos tests hand-write TimestampNanosVal.fromParts(...) literals.
Downstream work (cast, coercion, Parquet, expression parity) needs a shared
edge-case corpus and a random generator. This centralizes both.
### Does this PR introduce _any_ user-facing change?
No
### How was this patch tested?
Tests in this PR.
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Claude Opus 4.7
Closes #56149 from stevomitric/stevomitric/test-utils.
Authored-by: Stevo Mitric <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../org/apache/spark/sql/RandomDataGenerator.scala | 34 +++
.../catalyst/util/TimestampNanosTestUtils.scala | 197 ++++++++++++++++
.../util/TimestampNanosTestUtilsSuite.scala | 250 +++++++++++++++++++++
.../org/apache/spark/sql/UnsafeRowSuite.scala | 7 +-
4 files changed, 487 insertions(+), 1 deletion(-)
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
index ad4b701c19cf..289b2ff6851f 100644
--- a/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
+++ b/sql/catalyst/src/test/scala/org/apache/spark/sql/RandomDataGenerator.scala
@@ -28,6 +28,7 @@ import scala.util.{Random, Try}
import org.apache.spark.sql.catalyst.CatalystTypeConverters
import org.apache.spark.sql.catalyst.util.DateTimeConstants._
import org.apache.spark.sql.catalyst.util.DateTimeUtils
+import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils
import org.apache.spark.sql.internal.SQLConf
import org.apache.spark.sql.types._
import org.apache.spark.sql.types.DayTimeIntervalType._
@@ -284,6 +285,39 @@ object RandomDataGenerator {
},
specialTs.map { s => LocalDateTime.parse(s.replace(" ", "T")) }
)
+ case t: TimestampNTZNanosType =>
+ // Honor the declared precision: e.g. TimestampNTZNanosType(7) values
must have at most
+ // 7 fractional-second digits, so the low (9-p) digits of
nano-of-second are zeroed for
+ // both the uniform random and the specialNanosTs corpus.
+ val truncate = TimestampNanosTestUtils.nanoOfSecTruncator(t.precision)
+ randomNumeric[LocalDateTime](
+ rand,
+ (rand: Random) => {
+ // Uniform micros for the high-order Long + an independent [0,
999] for the
+ // sub-microsecond nanos. plusNanos is safe here because
microsToLocalDateTime
+ // returns a value whose nano-of-second is a multiple of 1000, so
adding [0, 999]
+ // never crosses a microsecond boundary.
+ val ldt =
DateTimeUtils.microsToLocalDateTime(uniformMicrosRand(rand))
+ .plusNanos(rand.nextInt(NANOS_PER_MICROS.toInt).toLong)
+ ldt.withNano(truncate(ldt.getNano))
+ },
+ TimestampNanosTestUtils.specialNanosTs
+ .map(TimestampNanosTestUtils.parseSpecialNanosNTZ)
+ .map(ldt => ldt.withNano(truncate(ldt.getNano)))
+ )
+ case t: TimestampLTZNanosType =>
+ val truncate = TimestampNanosTestUtils.nanoOfSecTruncator(t.precision)
+ randomNumeric[Instant](
+ rand,
+ (rand: Random) => {
+ val instant =
DateTimeUtils.microsToInstant(uniformMicrosRand(rand))
+ .plusNanos(rand.nextInt(NANOS_PER_MICROS.toInt).toLong)
+ Instant.ofEpochSecond(instant.getEpochSecond,
truncate(instant.getNano).toLong)
+ },
+ TimestampNanosTestUtils.specialNanosTs
+ .map(s => TimestampNanosTestUtils.parseSpecialNanosLTZ(s,
ZoneId.systemDefault()))
+ .map(i => Instant.ofEpochSecond(i.getEpochSecond,
truncate(i.getNano).toLong))
+ )
case _: TimeType =>
val specialTimes = Seq(
"00:00:00",
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosTestUtils.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosTestUtils.scala
new file mode 100644
index 000000000000..e6138c60d7d8
--- /dev/null
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosTestUtils.scala
@@ -0,0 +1,197 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import java.time.{Instant, LocalDateTime, ZoneId, ZoneOffset}
+
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MICROS
+import org.apache.spark.sql.types.TimestampNTZNanosType
+import org.apache.spark.unsafe.types.TimestampNanosVal
+
+/**
+ * Helpers for tests involving nanosecond-capable timestamp types
+ * ([[org.apache.spark.sql.types.TimestampNTZNanosType]] /
+ * [[org.apache.spark.sql.types.TimestampLTZNanosType]], precision in [7, 9]).
+ *
+ * Provides three things:
+ * - Fixed-value builders that take readable `(year, month, day, ...,
nanoOfSec)` arguments and
+ * return external-representation `java.time` values ([[LocalDateTime]]
for NTZ,
+ * [[Instant]] for LTZ) -- same convention as
+ * [[org.apache.spark.sql.RandomDataGenerator]] for microsecond timestamps.
+ * - Conversion between `java.time` external types and the physical composite
+ * [[TimestampNanosVal]] (`epochMicros` + `nanosWithinMicro in [0, 999]`).
+ * - A shared edge-case corpus [[specialNanosTs]] extending the microsecond
`specialTs` set
+ * with 7-to-9 fractional digits.
+ */
+object TimestampNanosTestUtils {
+
+ /**
+ * Builds a [[LocalDateTime]] suitable for `TIMESTAMP_NTZ(p)` tests.
+ *
+ * @param nanoOfSec nanoseconds within the second, in [0, 999_999_999];
matches
+ * [[LocalDateTime.of]]'s `nanoOfSecond` argument and
+ * [[LocalDateTime.getNano]].
+ */
+ def timestampNTZ(
+ year: Int,
+ month: Int,
+ day: Int,
+ hour: Int = 0,
+ minute: Int = 0,
+ sec: Int = 0,
+ nanoOfSec: Int = 0): LocalDateTime = {
+ LocalDateTime.of(year, month, day, hour, minute, sec, nanoOfSec)
+ }
+
+ /**
+ * Builds an [[Instant]] suitable for `TIMESTAMP_LTZ(p)` tests.
+ *
+ * @param nanoOfSec nanoseconds within the second, in [0, 999_999_999].
+ * @param zoneId zone used to interpret the local time when computing the
instant; defaults
+ * to UTC so the wall-clock fields above match the
resulting epoch instant.
+ */
+ def timestampLTZ(
+ year: Int,
+ month: Int,
+ day: Int,
+ hour: Int = 0,
+ minute: Int = 0,
+ sec: Int = 0,
+ nanoOfSec: Int = 0,
+ zoneId: ZoneId = ZoneOffset.UTC): Instant = {
+ LocalDateTime.of(year, month, day, hour, minute, sec,
nanoOfSec).atZone(zoneId).toInstant
+ }
+
+ /**
+ * Builds a [[TimestampNanosVal]] from raw components. Range-checks the
`Int` argument before
+ * narrowing to `Short` so out-of-range callers see their actual input value
in the error
+ * message (rather than the silently-wrapped narrowed result).
+ */
+ def nanosVal(epochMicros: Long, nanosWithinMicro: Int): TimestampNanosVal = {
+ require(
+ nanosWithinMicro >= 0 && nanosWithinMicro <=
TimestampNanosVal.MAX_NANOS_WITHIN_MICRO,
+ s"nanosWithinMicro must be in [0,
${TimestampNanosVal.MAX_NANOS_WITHIN_MICRO}], " +
+ s"got: $nanosWithinMicro")
+ TimestampNanosVal.fromParts(epochMicros, nanosWithinMicro.toShort)
+ }
+
+ /**
+ * Converts a [[LocalDateTime]] (interpreted under `zoneId`, default UTC) to
its composite
+ * physical representation `(epochMicros, nanosWithinMicro)`.
+ */
+ def localDateTimeToNanosVal(
+ ldt: LocalDateTime,
+ zoneId: ZoneId = ZoneOffset.UTC): TimestampNanosVal = {
+ instantToNanosVal(ldt.atZone(zoneId).toInstant)
+ }
+
+ /**
+ * Converts an [[Instant]] to its composite physical representation
+ * `(epochMicros, nanosWithinMicro)`.
+ */
+ def instantToNanosVal(instant: Instant): TimestampNanosVal = {
+ val epochMicros = DateTimeUtils.instantToMicros(instant)
+ val nanosWithinMicro = (instant.getNano % NANOS_PER_MICROS).toShort
+ TimestampNanosVal.fromParts(epochMicros, nanosWithinMicro)
+ }
+
+ /**
+ * Converts a composite [[TimestampNanosVal]] back to a [[LocalDateTime]] at
UTC. The result
+ * preserves nanosecond precision: round-tripping through
+ * [[localDateTimeToNanosVal]] / [[nanosValToLocalDateTime]] is the identity
(modulo zone).
+ */
+ def nanosValToLocalDateTime(v: TimestampNanosVal): LocalDateTime = {
+
DateTimeUtils.microsToLocalDateTime(v.epochMicros).plusNanos(v.nanosWithinMicro.toLong)
+ }
+
+ /**
+ * Converts a composite [[TimestampNanosVal]] back to an [[Instant]]. The
result preserves
+ * nanosecond precision: round-tripping through [[instantToNanosVal]] /
[[nanosValToInstant]]
+ * is the identity.
+ */
+ def nanosValToInstant(v: TimestampNanosVal): Instant = {
+
DateTimeUtils.microsToInstant(v.epochMicros).plusNanos(v.nanosWithinMicro.toLong)
+ }
+
+ /**
+ * Edge-case corpus for nanosecond timestamps, extending the microsecond
`specialTs` set used
+ * by [[org.apache.spark.sql.RandomDataGenerator]] with sub-microsecond
fractional digits.
+ * Each entry is an ISO-like `"yyyy-MM-dd HH:mm:ss.nnnnnnnnn"` string.
+ *
+ * Covers the four canonical micro dates (0001/1582/1970/9999) each at
+ * `nanosWithinMicro in {0, 1, 999}`, plus an arbitrary mid-range string
spanning all 9
+ * fractional digits.
+ */
+ val specialNanosTs: Seq[String] = Seq(
+ "0001-01-01 00:00:00.000000000",
+ "0001-01-01 00:00:00.000000001",
+ "0001-01-01 00:00:00.000000999",
+ "1582-10-15 23:59:59.123456789",
+ "1970-01-01 00:00:00.000000000",
+ "1970-01-01 00:00:00.000000001",
+ "1970-01-01 00:00:00.000000999",
+ "9999-12-31 23:59:59.999999000",
+ "9999-12-31 23:59:59.999999001",
+ "9999-12-31 23:59:59.999999999")
+
+ /**
+ * Parses an entry from [[specialNanosTs]] into an [[Instant]] (LTZ external
rep). The zone
+ * is required; callers should pass [[ZoneId.systemDefault]] when mirroring
the LTZ
+ * special-value corpus in [[org.apache.spark.sql.RandomDataGenerator]]'s
`TimestampType`
+ * case.
+ */
+ def parseSpecialNanosLTZ(s: String, zoneId: ZoneId): Instant = {
+ parseSpecialNanosNTZ(s).atZone(zoneId).toInstant
+ }
+
+ /** Parses an entry from [[specialNanosTs]] into a [[LocalDateTime]] (NTZ
external rep). */
+ def parseSpecialNanosNTZ(s: String): LocalDateTime =
LocalDateTime.parse(s.replace(' ', 'T'))
+
+ /**
+ * Runs `body` once for each valid nanosecond timestamp precision (currently
7, 8, 9).
+ * Both [[org.apache.spark.sql.types.TimestampNTZNanosType]] and
+ * [[org.apache.spark.sql.types.TimestampLTZNanosType]] share the same
precision band, so a
+ * single iterator is enough.
+ */
+ def foreachNanosPrecision(body: Int => Unit): Unit = {
+ TimestampNTZNanosType.MIN_PRECISION to TimestampNTZNanosType.MAX_PRECISION
foreach body
+ }
+
+ // Index `i` holds 10^i; sized to cover excessDigits in [0, NANOS_PRECISION].
+ private val POWERS_OF_10 =
+ Array(1, 10, 100, 1000, 10000, 100000, 1000000, 10000000, 100000000,
1000000000)
+
+ /**
+ * Returns a function that truncates a nano-of-second value
(`0..999_999_999`, as produced by
+ * [[LocalDateTime.getNano]] / [[Instant.getNano]]) to the given
fractional-second precision.
+ *
+ * For `precision = 9` the result is the identity. Each precision below the
max zeroes one
+ * more low-order decimal digit so the surviving value has exactly
`precision` significant
+ * fractional digits and is valid for `TIMESTAMP(precision)`:
+ * - `precision = 8` zeroes the last digit (e.g. `123_456_789` ->
`123_456_780`).
+ * - `precision = 7` zeroes the last two digits (e.g. `123_456_789` ->
`123_456_700`).
+ */
+ def nanoOfSecTruncator(precision: Int): Int => Int = {
+ val excessDigits = TimestampNTZNanosType.NANOS_PRECISION - precision
+ if (excessDigits <= 0) identity
+ else {
+ val factor = POWERS_OF_10(excessDigits)
+ n => (n / factor) * factor
+ }
+ }
+}
diff --git
a/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosTestUtilsSuite.scala
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosTestUtilsSuite.scala
new file mode 100644
index 000000000000..6e03721d36cd
--- /dev/null
+++
b/sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/util/TimestampNanosTestUtilsSuite.scala
@@ -0,0 +1,250 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.sql.catalyst.util
+
+import java.time.{Instant, LocalDateTime, ZoneId, ZoneOffset}
+
+import scala.util.Random
+
+import org.apache.spark.SparkFunSuite
+import org.apache.spark.sql.RandomDataGenerator
+import org.apache.spark.sql.catalyst.util.DateTimeConstants.NANOS_PER_MICROS
+import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils._
+import org.apache.spark.sql.types.{DataType, TimestampLTZNanosType,
TimestampNTZNanosType}
+import org.apache.spark.unsafe.types.TimestampNanosVal
+
+class TimestampNanosTestUtilsSuite extends SparkFunSuite {
+
+ test("timestampNTZ / timestampLTZ builders return java.time values with the
requested nanos") {
+ val ldt = timestampNTZ(2024, 1, 15, 10, 30, 45, 123_456_789)
+ assert(ldt === LocalDateTime.of(2024, 1, 15, 10, 30, 45, 123_456_789))
+ assert(ldt.getNano === 123_456_789)
+
+ val instant = timestampLTZ(2024, 1, 15, 10, 30, 45, 123_456_789)
+ assert(instant === LocalDateTime.of(2024, 1, 15, 10, 30, 45, 123_456_789)
+ .atZone(ZoneOffset.UTC).toInstant)
+ assert(instant.getNano === 123_456_789)
+ }
+
+ test("nanosVal enforces the [0, 999] invariant on nanosWithinMicro") {
+ // Boundary values accepted.
+ Seq(0, 1, 499, TimestampNanosVal.MAX_NANOS_WITHIN_MICRO).foreach { n =>
+ val v = nanosVal(1234L, n)
+ assert(v.epochMicros === 1234L)
+ assert(v.nanosWithinMicro === n.toShort)
+ }
+ // Out-of-range values rejected.
+ Seq(-1, 1000, 1001, Int.MaxValue).foreach { n =>
+ val e = intercept[Exception](nanosVal(0L, n))
+ assert(e.getMessage.contains("nanosWithinMicro"))
+ }
+ }
+
+ test("LocalDateTime <-> TimestampNanosVal round-trip preserves nanosecond
precision") {
+ Seq(
+ timestampNTZ(1970, 1, 1, 0, 0, 0, 0),
+ timestampNTZ(1970, 1, 1, 0, 0, 0, 1),
+ timestampNTZ(1970, 1, 1, 0, 0, 0,
TimestampNanosVal.MAX_NANOS_WITHIN_MICRO),
+ // 1582 Julian/Gregorian cutover -- the date most sensitive to any
change in
+ // microsToInstant / instantToMicros calendar handling.
+ timestampNTZ(1582, 10, 15, 23, 59, 59, 123_456_789),
+ timestampNTZ(2024, 1, 15, 10, 30, 45, 123_456_789),
+ timestampNTZ(9999, 12, 31, 23, 59, 59, 999_999_999)).foreach { ldt =>
+ val v = localDateTimeToNanosVal(ldt)
+ assert(v.nanosWithinMicro >= 0 &&
+ v.nanosWithinMicro <= TimestampNanosVal.MAX_NANOS_WITHIN_MICRO,
+ s"nanosWithinMicro out of range for $ldt: ${v.nanosWithinMicro}")
+ assert(nanosValToLocalDateTime(v) === ldt)
+ }
+ }
+
+ test("Instant <-> TimestampNanosVal round-trip preserves nanosecond
precision") {
+ Seq(
+ Instant.EPOCH,
+ Instant.EPOCH.plusNanos(1),
+ Instant.EPOCH.plusNanos(TimestampNanosVal.MAX_NANOS_WITHIN_MICRO.toLong),
+ timestampLTZ(1582, 10, 15, 23, 59, 59, 123_456_789),
+ timestampLTZ(2024, 1, 15, 10, 30, 45, 123_456_789),
+ timestampLTZ(9999, 12, 31, 23, 59, 59, 999_999_999)).foreach { instant =>
+ val v = instantToNanosVal(instant)
+ assert(v.nanosWithinMicro >= 0 &&
+ v.nanosWithinMicro <= TimestampNanosVal.MAX_NANOS_WITHIN_MICRO,
+ s"nanosWithinMicro out of range for $instant: ${v.nanosWithinMicro}")
+ assert(nanosValToInstant(v) === instant)
+ }
+ }
+
+ test("LDT split: epochMicros truncates, nanosWithinMicro keeps remainder") {
+ // ".000000789" -> epochMicros at .000000 boundary, nanosWithinMicro = 789.
+ val ldt = LocalDateTime.of(1970, 1, 1, 0, 0, 0, 789)
+ val v = localDateTimeToNanosVal(ldt)
+ assert(v.epochMicros === 0L)
+ assert(v.nanosWithinMicro === 789.toShort)
+
+ // ".000001789" -> epochMicros = 1, nanosWithinMicro = 789.
+ val ldt2 = LocalDateTime.of(1970, 1, 1, 0, 0, 0, 1789)
+ val v2 = localDateTimeToNanosVal(ldt2)
+ assert(v2.epochMicros === 1L)
+ assert(v2.nanosWithinMicro === 789.toShort)
+ }
+
+ test("specialNanosTs entries parse without exception via both NTZ and LTZ
helpers") {
+ val zone = ZoneId.of("America/Los_Angeles")
+ specialNanosTs.foreach { s =>
+ val ldt = parseSpecialNanosNTZ(s)
+ // Sanity-check nano-of-second range across the corpus.
+ assert(ldt.getNano >= 0 && ldt.getNano <= 999_999_999)
+
+ val instant = parseSpecialNanosLTZ(s, zone)
+ assert(instant.getNano === ldt.getNano)
+ }
+ // Corpus invariant: at least one entry must carry sub-microsecond digits,
otherwise the
+ // suite degenerates to exercising the micros path only.
+ assert(specialNanosTs.exists(s => parseSpecialNanosNTZ(s).getNano %
NANOS_PER_MICROS != 0),
+ "specialNanosTs corpus has no sub-micro entries")
+ }
+
+ test("nanoOfSecTruncator zeros (9 - precision) low decimal digits") {
+ // precision 9: identity on every nano-of-second.
+ val truncP9 = nanoOfSecTruncator(9)
+ Seq(0, 1, 999, 123_456_789, 999_999_999).foreach { n =>
+ assert(truncP9(n) === n, s"precision 9 truncator should be identity, but
$n -> ${truncP9(n)}")
+ }
+
+ // precision 8: zeroes the last digit (factor 10).
+ val truncP8 = nanoOfSecTruncator(8)
+ assert(truncP8(123_456_789) === 123_456_780)
+ assert(truncP8(123_456_780) === 123_456_780)
+ assert(truncP8(999_999_999) === 999_999_990)
+ assert(truncP8(9) === 0)
+ assert(truncP8(0) === 0)
+
+ // precision 7: zeroes the last two digits (factor 100).
+ val truncP7 = nanoOfSecTruncator(7)
+ assert(truncP7(123_456_789) === 123_456_700)
+ assert(truncP7(123_456_700) === 123_456_700)
+ assert(truncP7(999_999_999) === 999_999_900)
+ assert(truncP7(99) === 0)
+ assert(truncP7(100) === 100)
+ }
+
+ test("RandomDataGenerator honors precision for TimestampNTZNanosType") {
+ foreachNanosPrecision { p =>
+ val factor = math.pow(10.0, (9 - p).toDouble).toInt
+ val gen = RandomDataGenerator.forType(TimestampNTZNanosType(p), nullable
= false,
+ rand = new Random(42L))
+ .getOrElse(fail(s"No generator for TimestampNTZNanosType($p)"))
+ Iterator.fill(200)(gen()).foreach {
+ case ldt: LocalDateTime =>
+ assert(ldt.getNano % factor === 0,
+ s"p=$p generated $ldt with nanoOfSec ${ldt.getNano}; not divisible
by $factor")
+ case other => fail(s"Expected LocalDateTime, got $other:
${other.getClass}")
+ }
+ }
+ }
+
+ test("RandomDataGenerator honors precision for TimestampLTZNanosType") {
+ foreachNanosPrecision { p =>
+ val factor = math.pow(10.0, (9 - p).toDouble).toInt
+ val gen = RandomDataGenerator.forType(TimestampLTZNanosType(p), nullable
= false,
+ rand = new Random(42L))
+ .getOrElse(fail(s"No generator for TimestampLTZNanosType($p)"))
+ Iterator.fill(200)(gen()).foreach {
+ case ins: Instant =>
+ assert(ins.getNano % factor === 0,
+ s"p=$p generated $ins with nanoOfSec ${ins.getNano}; not divisible
by $factor")
+ case other => fail(s"Expected Instant, got $other: ${other.getClass}")
+ }
+ }
+ }
+
+ test("foreachNanosPrecision iterates [MIN_PRECISION, MAX_PRECISION]
inclusive") {
+ val seen = scala.collection.mutable.ArrayBuffer.empty[Int]
+ foreachNanosPrecision(seen.append(_))
+ assert(seen.toSeq === (TimestampNTZNanosType.MIN_PRECISION
+ to TimestampNTZNanosType.MAX_PRECISION))
+ assert(seen.toSeq === (TimestampLTZNanosType.MIN_PRECISION
+ to TimestampLTZNanosType.MAX_PRECISION),
+ "NTZ and LTZ should share the same precision band")
+ }
+
+ test("RandomDataGenerator produces non-null LocalDateTime for
TimestampNTZNanosType") {
+ foreachNanosPrecision { p =>
+ val gen = RandomDataGenerator.forType(TimestampNTZNanosType(p), nullable
= false,
+ rand = new Random(42L))
+ .getOrElse(fail(s"No generator for TimestampNTZNanosType($p)"))
+ val values = Iterator.fill(200)(gen()).toList
+ assert(!values.contains(null), "nullable = false should never produce
null")
+ values.foreach(v => assert(v.isInstanceOf[LocalDateTime], s"got $v:
${v.getClass}"))
+ // Sub-microsecond variation is the whole point: at least one sample
must carry
+ // nanosWithinMicro > 0.
+ assert(values.exists { case ldt: LocalDateTime => ldt.getNano %
NANOS_PER_MICROS != 0 },
+ "Random generator never produced sub-microsecond nanos in 200 samples")
+ }
+ }
+
+ test("RandomDataGenerator produces non-null Instant for
TimestampLTZNanosType") {
+ foreachNanosPrecision { p =>
+ val gen = RandomDataGenerator.forType(TimestampLTZNanosType(p), nullable
= false,
+ rand = new Random(42L))
+ .getOrElse(fail(s"No generator for TimestampLTZNanosType($p)"))
+ val values = Iterator.fill(200)(gen()).toList
+ assert(!values.contains(null), "nullable = false should never produce
null")
+ values.foreach(v => assert(v.isInstanceOf[Instant], s"got $v:
${v.getClass}"))
+ assert(values.exists { case ins: Instant => ins.getNano %
NANOS_PER_MICROS != 0 },
+ "Random generator never produced sub-microsecond nanos in 200 samples")
+ }
+ }
+
+ test("RandomDataGenerator nullable = true mixes in nulls for nanos timestamp
types") {
+ val gen = RandomDataGenerator.forType(TimestampNTZNanosType(9), nullable =
true,
+ rand = new Random(42L))
+ .getOrElse(fail("No generator for TimestampNTZNanosType(9)"))
+ // Couples to RandomDataGenerator.PROBABILITY_OF_NULL (~10%). 500 samples
gives a vanishing
+ // false-fail probability at the current rate; if the constant ever drops
below ~1% this
+ // could flake.
+ assert(Iterator.fill(500)(gen()).contains(null), "Expected at least one
null in 500 samples")
+ }
+
+ test("Seeded RandomDataGenerator -> composite -> java.time round-trip is the
identity") {
+ // The seeded smoke test required by the JIRA: generate java.time values
from a seeded
+ // RandomDataGenerator, push them through the composite physical
representation and back,
+ // and assert equality.
+ Seq[(DataType, Any => TimestampNanosVal, TimestampNanosVal => Any)](
+ (TimestampNTZNanosType(9),
+ v => localDateTimeToNanosVal(v.asInstanceOf[LocalDateTime]),
+ nanosValToLocalDateTime),
+ (TimestampLTZNanosType(9),
+ v => instantToNanosVal(v.asInstanceOf[Instant]),
+ nanosValToInstant)).foreach { case (dt, toComposite, fromComposite) =>
+ val gen = RandomDataGenerator.forType(dt, nullable = false, rand = new
Random(42L))
+ .getOrElse(fail(s"No generator for $dt"))
+ (1 to 1000).foreach { i =>
+ val original = gen()
+ val composite = toComposite(original)
+ assert(composite.nanosWithinMicro >= 0 && composite.nanosWithinMicro
<= 999,
+ s"iter=$i value=$original produced out-of-range nanosWithinMicro " +
+ s"${composite.nanosWithinMicro}")
+ val roundTripped = fromComposite(composite)
+ assert(roundTripped === original,
+ s"iter=$i round-trip failed: original=$original,
composite=$composite, " +
+ s"roundTripped=$roundTripped")
+ }
+ }
+ }
+}
diff --git a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
index 18a6c538e0a8..da862a5f9c65 100644
--- a/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
+++ b/sql/core/src/test/scala/org/apache/spark/sql/UnsafeRowSuite.scala
@@ -129,7 +129,12 @@ class UnsafeRowSuite extends SparkFunSuite {
test("calling get(ordinal, datatype) on null columns") {
val row = InternalRow.apply(null)
val unsafeRow =
UnsafeProjection.create(Array[DataType](NullType)).apply(row)
- for (dataType <- DataTypeTestUtils.atomicTypes) {
+ // Iterate `atomicTypes` plus the parameterized nanosecond timestamp
types, which aren't yet
+ // included in `DataTypeTestUtils.atomicTypes` but should still return
null for null columns
+ // (their physical row support was added in SPARK-56981).
+ val typesToCheck: Set[DataType] =
DataTypeTestUtils.atomicTypes.toSet[DataType] ++
+ Set(TimestampNTZNanosType(9), TimestampLTZNanosType(9))
+ for (dataType <- typesToCheck) {
assert(unsafeRow.get(0, dataType) === null)
}
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]