MaxGekk commented on code in PR #56602:
URL: https://github.com/apache/spark/pull/56602#discussion_r3441732921
##########
sql/api/src/main/scala/org/apache/spark/sql/functions.scala:
##########
@@ -8199,6 +8199,16 @@ object functions {
*/
def unix_micros(e: Column): Column = Column.fn("unix_micros", e)
+ /**
+ * Returns the number of nanoseconds since 1970-01-01 00:00:00 UTC for a
nanosecond-precision
+ * timestamp (`TIMESTAMP_LTZ(p)` / `TIMESTAMP_NTZ(p)`, `p` in `[7, 9]`). The
result is a
+ * lossless `DECIMAL(21, 0)`.
+ *
+ * @group datetime_funcs
+ * @since 4.3.0
+ */
+ def unix_nanos(e: Column): Column = Column.fn("unix_nanos", e)
Review Comment:
Good catch, you're right. `functions.scala` is in the shared `sql/api`
module, so the Scala Spark Connect client picks up `unix_nanos` automatically.
I've corrected the PR description: only the PySpark API (classic + Spark
Connect Python) and R are deferred, plus a Connect plan golden test as a
follow-up. Thanks!
##########
sql/catalyst/src/main/scala/org/apache/spark/sql/catalyst/expressions/datetimeExpressions.scala:
##########
@@ -854,6 +855,57 @@ case class UnixMicros(child: Expression) extends
TimestampToLongBase {
copy(child = newChild)
}
+// scalastyle:off line.contains.tab
+@ExpressionDescription(
+ usage = "_FUNC_(timestamp) - Returns the number of nanoseconds since
1970-01-01 00:00:00 UTC.",
+ examples = """
+ Examples:
+ > SET spark.sql.timestampNanosTypes.enabled=true;
+ spark.sql.timestampNanosTypes.enabled true
+ > SELECT _FUNC_(TIMESTAMP_NTZ '2008-12-25 15:30:00.123456789');
+ 1230219000123456789
+ """,
+ group = "datetime_funcs",
+ since = "4.3.0")
+// scalastyle:on line.contains.tab
+case class UnixNanos(child: Expression)
+ extends UnaryExpression with ExpectsInputTypes {
+ override def nullIntolerant: Boolean = true
+
+ // Accepts only the nanosecond-precision timestamp types TIMESTAMP_LTZ(p) /
TIMESTAMP_NTZ(p)
+ // (p in [7, 9]); the microsecond timestamp types are intentionally not
supported here.
Review Comment:
Done — reworded the comment to say support for the microsecond timestamp
types is deferred to a follow-up rather than a permanent exclusion.
##########
sql/catalyst/src/test/scala/org/apache/spark/sql/catalyst/expressions/DateExpressionsSuite.scala:
##########
@@ -1696,6 +1696,43 @@ class DateExpressionsSuite extends SparkFunSuite with
ExpressionEvalHelper {
checkEvaluation(UnixMicros(Literal(timestampWithNanos)), 1000001L)
}
+ test("SPARK-57527: unix_nanos over nanosecond-precision timestamps") {
+ import org.apache.spark.sql.catalyst.util.TimestampNanosTestUtils._
+
+ def expectedNanos(v: TimestampNanosVal): Decimal = {
+ val nanos = BigInt(v.epochMicros) * NANOS_PER_MICROS +
v.nanosWithinMicro.toInt
+ Decimal(BigDecimal(nanos), 21, 0)
+ }
+
+ // 2008-12-25 15:30:00.123456789 -> 1230219000123456789 nanos since the
epoch. unix_nanos
+ // applies no zone shift, so the NTZ wall-clock value and the LTZ instant
at the same UTC
+ // reading produce the same result; the declared precision does not
re-truncate the value.
+ val ntz = localDateTimeToNanosVal(timestampNTZ(2008, 12, 25, 15, 30, 0,
123456789))
+ val ltz =
instantToNanosVal(Instant.parse("2008-12-25T15:30:00.123456789Z"))
+ val post = Decimal(BigDecimal("1230219000123456789"), 21, 0)
+ foreachNanosPrecision { p =>
+ checkEvaluation(UnixNanos(Literal.create(ntz,
TimestampNTZNanosType(p))), post)
+ checkEvaluation(UnixNanos(Literal.create(ltz,
TimestampLTZNanosType(p))), post)
+ }
+
+ // Pre-epoch value exercises the negative-epoch path.
+ val preEpoch = localDateTimeToNanosVal(timestampNTZ(1960, 1, 1, 0, 0, 0,
1))
+ checkEvaluation(
+ UnixNanos(Literal.create(preEpoch, TimestampNTZNanosType(9))),
expectedNanos(preEpoch))
+
+ // Far-future value: epochMicros * 1000 overflows a 64-bit BIGINT, so the
DECIMAL result must
+ // exceed Long.MaxValue and the computation must not be done in long
arithmetic.
+ val far = localDateTimeToNanosVal(timestampNTZ(9999, 12, 31, 23, 59, 59,
999999999))
+ checkEvaluation(UnixNanos(Literal.create(far, TimestampNTZNanosType(9))),
expectedNanos(far))
+ val farResult =
+ UnixNanos(Literal.create(far,
TimestampNTZNanosType(9))).eval().asInstanceOf[Decimal]
+
assert(farResult.toJavaBigDecimal.compareTo(java.math.BigDecimal.valueOf(Long.MaxValue))
> 0)
+
+ // NULL input.
+ checkEvaluation(UnixNanos(Literal.create(null, TimestampNTZNanosType(9))),
null)
+ checkEvaluation(UnixNanos(Literal.create(null, TimestampLTZNanosType(9))),
null)
+ }
+
Review Comment:
Added in the latest commit: the test `SPARK-57527: unix_nanos rejects
non-nanosecond input types` asserts that micro `TimestampType` /
`TimestampNTZType` (and `DateType` / `LongType`) fail analysis with an
`UNEXPECTED_INPUT_TYPE` data type mismatch. Thanks!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]