MaxGekk commented on code in PR #56616:
URL: https://github.com/apache/spark/pull/56616#discussion_r3445639742
##########
sql/core/src/test/scala/org/apache/spark/sql/TimestampNanosFunctionsSuiteBase.scala:
##########
@@ -481,6 +481,41 @@ abstract class TimestampNanosFunctionsSuiteBase extends
SharedSparkSession {
checkAnswer(ltz.select(unix_nanos(col("c"))), Row(null))
}
}
+
+ test("SPARK-57526: timestamp_nanos builds nanosecond-precision TIMESTAMP_LTZ
values") {
+ // 1230219000123456789 ns since the epoch -> 2008-12-25 15:30:00.123456789
UTC. The result is a
+ // TIMESTAMP_LTZ(9); collecting it yields the absolute Instant regardless
of the session zone.
+ val nanos = 1230219000123456789L
+ val instant = Instant.parse("2008-12-25T15:30:00.123456789Z")
+ val sqlRes = spark.sql(s"SELECT timestamp_nanos($nanos)")
+ val colRes = spark.range(1).select(timestamp_nanos(lit(nanos)))
+ // The SQL and Scala Column API agree, return the expected instant, and
keep the LTZ(9) type.
+ checkAnswer(sqlRes, colRes)
+ checkAnswer(sqlRes, Row(instant))
+ assert(sqlRes.schema.head.dataType === TimestampLTZNanosType(9))
+
+ // A BIGINT argument is implicitly cast to DECIMAL, so the integral
literal works directly.
Review Comment:
Good catch, fixed in e81da36. The comment was left over from the original
`ImplicitCastInputTypes` + `Seq(DecimalType)` design; updated it to describe
the dedicated `IntegralType` path (widened to `BigInteger`, no DECIMAL cast).
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala:
##########
@@ -759,6 +759,92 @@ case class MicrosToTimestamp(child: Expression)
copy(child = newChild)
}
+// scalastyle:off line.size.limit line.contains.tab
+@ExpressionDescription(
+ usage = "_FUNC_(nanoseconds) - Creates timestamp with the local time zone
and nanosecond precision (TIMESTAMP_LTZ(9)) from the number of nanoseconds
since UTC epoch.",
+ examples = """
+ Examples:
+ > SET spark.sql.timestampNanosTypes.enabled=true;
+ spark.sql.timestampNanosTypes.enabled true
+ > SELECT _FUNC_(1230219000123456789);
+ 2008-12-25 07:30:00.123456789
+ """,
+ group = "datetime_funcs",
+ since = "4.3.0")
+// scalastyle:on line.size.limit line.contains.tab
+case class NanosToTimestamp(child: Expression)
+ extends UnaryExpression with ExpectsInputTypes {
+ override def nullIntolerant: Boolean = true
+
+ // Accepts an integral or DECIMAL nanosecond count only. DECIMAL is required
to span the full
+ // [0001, 9999] calendar range: nanos for year 9999 (~2.5e20) overflow a
64-bit BIGINT, the same
+ // reason the inverse `unix_nanos` returns DECIMAL(21, 0); an integral
argument is widened to
+ // BigInteger directly. FLOAT/DOUBLE/STRING are intentionally rejected at
analysis rather than
+ // implicitly coerced: a fractional or string nanosecond count is not
meaningful, and the implicit
+ // DECIMAL coercion (FLOAT -> DECIMAL(14, 7), DOUBLE -> DECIMAL(30, 15))
would silently overflow
+ // for realistic magnitudes.
+ override def inputTypes: Seq[AbstractDataType] =
Seq(TypeCollection(IntegralType, DecimalType))
+
+ override def dataType: DataType = TimestampLTZNanosType(9)
+
+ // Maps the integer nanosecond count to the (epochMicros, nanosWithinMicro)
pair with floor
+ // semantics, so the sub-microsecond remainder is always in [0, 999]
(matching the negative-input
+ // behavior of `floorDiv`/`floorMod`). When `epochMicros` overflows 64 bits
-- i.e. the input is
+ // outside the representable timestamp range -- `longValueExact` throws,
which is surfaced as a
+ // DATETIME_OVERFLOW error.
+ override def nullSafeEval(input: Any): Any = {
+ val n = child.dataType match {
+ case _: DecimalType =>
+ input.asInstanceOf[Decimal].toJavaBigDecimal
+ .setScale(0, java.math.RoundingMode.FLOOR).toBigInteger
+ case _: IntegralType =>
+ BigInteger.valueOf(input.asInstanceOf[Number].longValue())
+ }
+ val thousand = BigInteger.valueOf(NANOS_PER_MICROS)
+ val rem = n.mod(thousand)
+ val micros = try {
+ n.subtract(rem).divide(thousand).longValueExact()
+ } catch {
+ case _: ArithmeticException => throw
QueryExecutionErrors.timestampNanosOverflowError(n)
Review Comment:
Intentional. It matches the sibling
`timestamp_micros`/`timestamp_millis`/`timestamp_seconds`, which likewise guard
only the 64-bit boundary (`Math.multiplyExact`) and do not validate the [0001,
9999] calendar range, so an `epochMicros` that fits in a long but lands past
year 9999 (up to the long-micros maximum, ~year 294247) yields an out-of-range
value rather than an error. I added an inline comment in e81da36 documenting
this so the behavior is explicit. I kept it consistent with the micro
constructors rather than introducing calendar-range validation here; happy to
add that in a follow-up if we'd prefer the stricter behavior across all of them.
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala:
##########
@@ -1743,6 +1743,62 @@ class DateExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
}
}
+ test("SPARK-57526: timestamp_nanos builds a TIMESTAMP_LTZ(9) from
nanoseconds") {
+ import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils._
+
+ // DECIMAL input is accepted as-is; a wide DECIMAL(38, 0) holds every
input below.
+ def tsNanos(n: BigInt): NanosToTimestamp =
+ NanosToTimestamp(Literal.create(Decimal(BigDecimal(n), 38, 0),
DecimalType(38, 0)))
+
+ assert(tsNanos(0).dataType === TimestampLTZNanosType(9))
+
+ // The JIRA example: 1230219000123456789 ns -> 1230219000123456 micros +
789 ns.
+ checkEvaluation(tsNanos(BigInt("1230219000123456789")),
nanosVal(1230219000123456L, 789))
+
+ // An integral argument is accepted directly (widened to BigInteger),
exercising the
+ // IntegralType eval/codegen path rather than the DECIMAL one.
+ checkEvaluation(
+ NanosToTimestamp(Literal(1230219000123456789L)),
nanosVal(1230219000123456L, 789))
+ checkEvaluation(NanosToTimestamp(Literal(-1L)), nanosVal(-1L, 999))
+ checkEvaluation(NanosToTimestamp(Literal(1000)), nanosVal(1L, 0))
Review Comment:
Added TINYINT (`Literal(2.toByte)`) and SMALLINT (`Literal(1000.toShort)`)
cases in e81da36 so every integral width exercises the `(long)` codegen cast.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]