This is an automated email from the ASF dual-hosted git repository.
MaxGekk pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push:
new 0b0ffb711b2b [SPARK-57103][SQL] Add Comparable to TimestampNanosVal
0b0ffb711b2b is described below
commit 0b0ffb711b2bf5b37eb02ef668b9896d9af0b8b0
Author: Stevo Mitric <[email protected]>
AuthorDate: Fri May 29 13:10:57 2026 +0200
[SPARK-57103][SQL] Add Comparable to TimestampNanosVal
### What changes were proposed in this pull request?
Make `TimestampNanosVal` implement `Comparable<TimestampNanosVal>`, ordered
lexicographically by `epochMicros` then `nanosWithinMicro`.
This is the first of SPARK-57103 (ordering + compare + hash for nanosecond
timestamp types). Follow-ups will
- wire `PhysicalDataType.ordering` for the two nanos physical types
- extend the hash expressions (`hash`, `xxhash64`, `murmur3`).
### Why are the changes needed?
`TimestampNanosVal` (added in SPARK-56981) is the physical value class for
`TimestampNTZNanosType(p)` / `TimestampLTZNanosType(p)`. Without a `compareTo`,
Catalyst cannot give it an `Ordering`, which blocks `ORDER BY`, sort-merge
join, sort-based `GROUP BY`, `DISTINCT`, and any other operator that needs
total order on the type.
### Does this PR introduce _any_ user-facing change?
No.
### How was this patch tested?
New UT in this PR.
### Was this patch authored or co-authored using generative AI tooling?
Generated-by: Claude Code (Claude Opus 4.7)
Closes #56187 from stevomitric/stevomitric/SPARK-57103-compare.
Authored-by: Stevo Mitric <[email protected]>
Signed-off-by: Max Gekk <[email protected]>
---
.../spark/unsafe/types/TimestampNanosVal.java | 17 +++-
.../spark/unsafe/types/TimestampNanosSuite.java | 94 ++++++++++++++++++++++
2 files changed, 110 insertions(+), 1 deletion(-)
diff --git
a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/TimestampNanosVal.java
b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/TimestampNanosVal.java
index 973547f1d9f3..fb501ef179c8 100644
---
a/common/unsafe/src/main/java/org/apache/spark/unsafe/types/TimestampNanosVal.java
+++
b/common/unsafe/src/main/java/org/apache/spark/unsafe/types/TimestampNanosVal.java
@@ -43,7 +43,7 @@ import java.util.Map;
* @since 4.3.0
*/
@Unstable
-public final class TimestampNanosVal implements Serializable {
+public final class TimestampNanosVal implements Comparable<TimestampNanosVal>,
Serializable {
/** Size of the {@code UnsafeRow} variable-length payload for this type (two
8-byte words). */
public static final int SIZE_IN_BYTES = 16;
@@ -115,6 +115,21 @@ public final class TimestampNanosVal implements
Serializable {
return 31 * Long.hashCode(epochMicros) + nanosWithinMicro;
}
+ /**
+ * Lexicographic order on the pair ({@link #epochMicros}, {@link
#nanosWithinMicro}), which
+ * matches calendar order: instants with a smaller {@code epochMicros} come
first, and within
+ * the same microsecond the value with fewer extra nanoseconds comes first.
Consistent with
+ * {@link #equals}: {@code a.compareTo(b) == 0} iff {@code a.equals(b)}.
+ */
+ @Override
+ public int compareTo(TimestampNanosVal that) {
+ // Long.compare avoids the overflow that plain subtraction has near
Long.MinValue/MaxValue.
+ int cmp = Long.compare(epochMicros, that.epochMicros);
+ if (cmp != 0) return cmp;
+ // short - short widens to int, so any pair of shorts fits without
overflow.
+ return nanosWithinMicro - that.nanosWithinMicro;
+ }
+
@Override
public String toString() {
return "TimestampNanosVal(" + epochMicros + ", " + nanosWithinMicro + ")";
diff --git
a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/TimestampNanosSuite.java
b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/TimestampNanosSuite.java
index 8a37767add9e..d748eb977a0b 100644
---
a/common/unsafe/src/test/java/org/apache/spark/unsafe/types/TimestampNanosSuite.java
+++
b/common/unsafe/src/test/java/org/apache/spark/unsafe/types/TimestampNanosSuite.java
@@ -20,6 +20,8 @@ package org.apache.spark.unsafe.types;
import org.apache.spark.SparkIllegalArgumentException;
import org.junit.jupiter.api.Test;
+import java.util.Arrays;
+
import static org.junit.jupiter.api.Assertions.*;
public class TimestampNanosSuite {
@@ -75,4 +77,96 @@ public class TimestampNanosSuite {
assertEquals(0L, TimestampNanosVal.ZERO.epochMicros);
assertEquals((short) 0, TimestampNanosVal.ZERO.nanosWithinMicro);
}
+
+ @Test
+ public void compareToOrdersByEpochMicrosThenNanos() {
+ TimestampNanosVal a = TimestampNanosVal.fromParts(1000L, (short) 100);
+ TimestampNanosVal b = TimestampNanosVal.fromParts(1001L, (short) 0);
+ TimestampNanosVal c = TimestampNanosVal.fromParts(1000L, (short) 101);
+ TimestampNanosVal d = TimestampNanosVal.fromParts(1000L, (short) 100);
+
+ assertTrue(a.compareTo(b) < 0);
+ assertTrue(b.compareTo(a) > 0);
+ assertTrue(a.compareTo(c) < 0);
+ assertTrue(c.compareTo(a) > 0);
+ assertEquals(0, a.compareTo(d));
+ }
+
+ @Test
+ public void compareToIsConsistentWithEquals() {
+ // The Comparable contract requires a.compareTo(b) == 0 iff a.equals(b).
Without this,
+ // TreeSet/TreeMap-backed Catalyst ops would silently dedup or lose values.
+ TimestampNanosVal a = TimestampNanosVal.fromParts(-42L, (short) 7);
+ TimestampNanosVal b = TimestampNanosVal.fromParts(-42L, (short) 7);
+ assertEquals(a, b);
+ assertEquals(0, a.compareTo(b));
+ assertEquals(0, b.compareTo(a));
+ }
+
+ @Test
+ public void compareToHandlesLongBoundaries() {
+ // Plain (a.epochMicros - b.epochMicros) would overflow here; Long.compare
must protect us.
+ TimestampNanosVal min = TimestampNanosVal.fromParts(Long.MIN_VALUE,
(short) 0);
+ TimestampNanosVal minPlusNanos =
TimestampNanosVal.fromParts(Long.MIN_VALUE, (short) 999);
+ TimestampNanosVal max = TimestampNanosVal.fromParts(Long.MAX_VALUE,
(short) 0);
+ TimestampNanosVal maxMinusNanos =
TimestampNanosVal.fromParts(Long.MAX_VALUE - 1, (short) 999);
+
+ assertTrue(min.compareTo(max) < 0);
+ assertTrue(max.compareTo(min) > 0);
+ // Within the same epochMicros, the nanos tie-breaker decides.
+ assertTrue(min.compareTo(minPlusNanos) < 0);
+ // epochMicros wins even when the smaller epochMicros has larger nanos.
+ assertTrue(maxMinusNanos.compareTo(max) < 0);
+ }
+
+ @Test
+ public void compareToHandlesNegativeEpoch() {
+ // Pre-epoch instants are valid (the SPIP keeps the 0001-9999 calendar
range). Verify
+ // a negative epochMicros sorts before a positive one regardless of the
nanos field.
+ TimestampNanosVal preEpoch = TimestampNanosVal.fromParts(-1L, (short) 999);
+ TimestampNanosVal postEpoch = TimestampNanosVal.fromParts(0L, (short) 0);
+ assertTrue(preEpoch.compareTo(postEpoch) < 0);
+ assertTrue(postEpoch.compareTo(preEpoch) > 0);
+ }
+
+ @Test
+ public void compareToIsAntisymmetricAndTransitive() {
+ TimestampNanosVal a = TimestampNanosVal.fromParts(10L, (short) 1);
+ TimestampNanosVal b = TimestampNanosVal.fromParts(10L, (short) 2);
+ TimestampNanosVal c = TimestampNanosVal.fromParts(11L, (short) 0);
+
+ // antisymmetry: sign(a.compareTo(b)) == -sign(b.compareTo(a))
+ assertEquals(Integer.signum(a.compareTo(b)),
-Integer.signum(b.compareTo(a)));
+ assertEquals(Integer.signum(b.compareTo(c)),
-Integer.signum(c.compareTo(b)));
+ // transitivity: a < b and b < c implies a < c
+ assertTrue(a.compareTo(b) < 0);
+ assertTrue(b.compareTo(c) < 0);
+ assertTrue(a.compareTo(c) < 0);
+ }
+
+ @Test
+ public void compareToThrowsOnNull() {
+ // The Comparable javadoc requires NullPointerException when the argument
is null.
+ TimestampNanosVal v = TimestampNanosVal.fromParts(0L, (short) 0);
+ assertThrows(NullPointerException.class, () -> v.compareTo(null));
+ }
+
+ @Test
+ public void arraysSortUsesComparable() {
+ TimestampNanosVal[] xs = new TimestampNanosVal[] {
+ TimestampNanosVal.fromParts(5L, (short) 0),
+ TimestampNanosVal.fromParts(-1L, (short) 999),
+ TimestampNanosVal.fromParts(0L, (short) 0),
+ TimestampNanosVal.fromParts(5L, (short) 999),
+ TimestampNanosVal.fromParts(5L, (short) 1)
+ };
+ Arrays.sort(xs);
+ assertArrayEquals(new TimestampNanosVal[] {
+ TimestampNanosVal.fromParts(-1L, (short) 999),
+ TimestampNanosVal.fromParts(0L, (short) 0),
+ TimestampNanosVal.fromParts(5L, (short) 0),
+ TimestampNanosVal.fromParts(5L, (short) 1),
+ TimestampNanosVal.fromParts(5L, (short) 999)
+ }, xs);
+ }
}
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]