stevomitric commented on code in PR #56616:
URL: https://github.com/apache/spark/pull/56616#discussion_r3449140378
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala:
##########
@@ -759,6 +759,98 @@ case class MicrosToTimestamp(child: Expression)
copy(child = newChild)
}
+// scalastyle:off line.size.limit line.contains.tab
+@ExpressionDescription(
+ usage = "_FUNC_(nanoseconds) - Creates timestamp with the local time zone
and nanosecond precision (TIMESTAMP_LTZ(9)) from the number of nanoseconds
since UTC epoch.",
+ examples = """
+ Examples:
+ > SET spark.sql.timestampNanosTypes.enabled=true;
+ spark.sql.timestampNanosTypes.enabled true
+ > SELECT _FUNC_(1230219000123456789);
+ 2008-12-25 07:30:00.123456789
+ """,
+ group = "datetime_funcs",
+ since = "4.3.0")
+// scalastyle:on line.size.limit line.contains.tab
+case class NanosToTimestamp(child: Expression)
+ extends UnaryExpression with ExpectsInputTypes {
+ override def nullIntolerant: Boolean = true
+
+ // Accepts an integral or DECIMAL nanosecond count only. DECIMAL is required
to span the full
+ // [0001, 9999] calendar range: nanos for year 9999 (~2.5e20) overflow a
64-bit BIGINT, the same
+ // reason the inverse `unix_nanos` returns DECIMAL(21, 0); an integral
argument is widened to
+ // BigInteger directly. FLOAT/DOUBLE/STRING are intentionally rejected at
analysis rather than
+ // implicitly coerced: a fractional or string nanosecond count is not
meaningful, and the implicit
+ // DECIMAL coercion (FLOAT -> DECIMAL(14, 7), DOUBLE -> DECIMAL(30, 15))
would silently overflow
+ // for realistic magnitudes.
+ override def inputTypes: Seq[AbstractDataType] =
Seq(TypeCollection(IntegralType, DecimalType))
Review Comment:
`TypeCollection(IntegralType, DecimalType)` accepts a DECIMAL of any scale
(no implicit coercion, since this mixes in `ExpectsInputTypes`), so
`timestamp_nanos(CAST(x AS DECIMAL(p, s>0)))` passes analysis and the fraction
is silently floored by `setScale(0, FLOOR)` below. The rationale comment
justifies rejecting fractional FLOAT/DOUBLE as "not meaningful" — could we
extend it to say a fractional DECIMAL is accepted and floored to whole
nanoseconds, so the asymmetry is deliberate? (Note `timestamp_seconds`'s
DECIMAL branch errors on sub-resolution input instead — a sentence on why nanos
floors would help.)
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala:
##########
@@ -1743,6 +1743,65 @@ class DateExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
}
}
+ test("SPARK-57526: timestamp_nanos builds a TIMESTAMP_LTZ(9) from
nanoseconds") {
+ import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils._
+
+ // DECIMAL input is accepted as-is; a wide DECIMAL(38, 0) holds every
input below.
+ def tsNanos(n: BigInt): NanosToTimestamp =
+ NanosToTimestamp(Literal.create(Decimal(BigDecimal(n), 38, 0),
DecimalType(38, 0)))
+
+ assert(tsNanos(0).dataType === TimestampLTZNanosType(9))
+
+ // The JIRA example: 1230219000123456789 ns -> 1230219000123456 micros +
789 ns.
+ checkEvaluation(tsNanos(BigInt("1230219000123456789")),
nanosVal(1230219000123456L, 789))
+
+ // An integral argument is accepted directly (widened to BigInteger),
exercising the
+ // IntegralType eval/codegen path rather than the DECIMAL one. Cover every
integral width
+ // (TINYINT/SMALLINT/INT/BIGINT) so the `(long)` codegen cast is checked
for each.
+ checkEvaluation(NanosToTimestamp(Literal(2.toByte)), nanosVal(0L, 2))
+ checkEvaluation(NanosToTimestamp(Literal(1000.toShort)), nanosVal(1L, 0))
+ checkEvaluation(NanosToTimestamp(Literal(1000)), nanosVal(1L, 0))
+ checkEvaluation(
+ NanosToTimestamp(Literal(1230219000123456789L)),
nanosVal(1230219000123456L, 789))
+ checkEvaluation(NanosToTimestamp(Literal(-1L)), nanosVal(-1L, 999))
+
+ // FLOAT/DOUBLE/STRING are rejected at analysis: a fractional or string
nanosecond count is not
+ // meaningful, and the implicit DECIMAL coercion would silently overflow
for realistic values.
+ Seq(Literal(1.0f), Literal(1.0d), Literal("1")).foreach { lit =>
+ val mismatch =
NanosToTimestamp(lit).checkInputDataTypes().asInstanceOf[DataTypeMismatch]
+ assert(mismatch.errorSubClass == "UNEXPECTED_INPUT_TYPE")
+ }
+
+ // Pre-epoch / negative inputs use floor semantics, so nanosWithinMicro
stays in [0, 999]:
+ // -1 ns floors to epochMicros = -1 with a 999 ns remainder.
+ checkEvaluation(tsNanos(BigInt(-1)), nanosVal(-1L, 999))
+ checkEvaluation(tsNanos(BigInt(-1000)), nanosVal(-1L, 0))
+ checkEvaluation(tsNanos(BigInt(-1500)), nanosVal(-2L, 500))
Review Comment:
The `setScale(0, FLOOR)` direction is never actually exercised — every
DECIMAL input in the suite is scale 0, so the scale-reduction is a no-op under
test. Could we add one positive and one negative scale>0 case here, e.g.
`tsNanos`-style `Decimal(BigDecimal("1500.9"), ...) -> nanosVal(1L, 500)` and
especially `Decimal(BigDecimal("-0.5"), ...) -> nanosVal(-1L, 999)`? The
negative one pins FLOOR-toward-−∞ vs HALF_UP/DOWN.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]