bodduv commented on code in PR #14500:
URL: https://github.com/apache/iceberg/pull/14500#discussion_r2815893783
##########
api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java:
##########
@@ -684,4 +733,111 @@ SCHEMA, lessThanOrEqual("struct.nested_col_with_stats",
INT_MAX_VALUE))
new StrictMetricsEvaluator(SCHEMA,
notNull("struct.nested_col_with_stats")).eval(FILE);
assertThat(shouldRead).as("notNull nested column should not
match").isFalse();
}
+
+ // Tests for UUID with StrictMetricsEvaluator using RFC-compliant comparison
only
+
+ @Test
+ public void testStrictUuidGt() {
+ // Query: uuid > 0x00... (all UUIDs in file should be > this)
+ UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+ boolean allMatch =
+ new StrictMetricsEvaluator(SCHEMA, greaterThan("uuid",
belowMin)).eval(UUID_FILE);
+ // With bounds [UUID_MIN, UUID_MAX], all values should be > belowMin
+ assertThat(allMatch).as("All UUIDs should be greater than
belowMin").isTrue();
+ }
+
+ @Test
+ public void testStrictUuidLt() {
+ // Query: uuid < 0xFF... (all UUIDs in file should be < this)
+ UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+ boolean allMatch =
+ new StrictMetricsEvaluator(SCHEMA, lessThan("uuid",
aboveMax)).eval(UUID_FILE);
+ // With bounds [UUID_MIN, UUID_MAX], all values should be < aboveMax
+ assertThat(allMatch).as("All UUIDs should be less than aboveMax").isTrue();
+ }
+
+ @Test
+ public void testStrictUuidEqNeverMatchesRange() {
+ // Strict eq should never match when there's a range of values
+ UUID middle = UUID.fromString("40000000-0000-0000-0000-000000000001");
+ boolean allMatch = new StrictMetricsEvaluator(SCHEMA, equal("uuid",
middle)).eval(UUID_FILE);
+ assertThat(allMatch).as("Strict eq should not match range").isFalse();
+ }
+
+ @Test
+ public void testStrictUuidInNeverMatchesRange() {
+ // Strict IN should never match when there's a range of values (lower !=
upper)
+ UUID middle1 = UUID.fromString("40000000-0000-0000-0000-000000000001");
+ UUID middle2 = UUID.fromString("50000000-0000-0000-0000-000000000001");
+ boolean allMatch =
+ new StrictMetricsEvaluator(SCHEMA, in("uuid", middle1,
middle2)).eval(UUID_FILE);
+ assertThat(allMatch).as("Strict IN should not match range").isFalse();
+ }
+
+ @Test
+ public void testStrictUuidInMatchesSingleValue() {
+ // Strict IN should match when lower == upper and the value is in the set
+ boolean allMatch =
+ new StrictMetricsEvaluator(SCHEMA, in("uuid",
SINGLE_UUID)).eval(SINGLE_UUID_FILE);
+ assertThat(allMatch).as("Strict IN should match single value in
set").isTrue();
+ }
+
+ @Test
+ public void testStrictUuidInDoesNotMatchWhenValueNotInSet() {
+ // Strict IN should not match when lower == upper but the value is not in
the set
+ UUID otherUuid = UUID.fromString("50000000-0000-0000-0000-000000000001");
+ boolean allMatch =
+ new StrictMetricsEvaluator(SCHEMA, in("uuid",
otherUuid)).eval(SINGLE_UUID_FILE);
+ assertThat(allMatch).as("Strict IN should not match when value not in
set").isFalse();
+ }
+
+ @Test
+ public void testStrictUuidNotInMatchesWhenAllValuesOutsideBounds() {
+ // Strict NOT IN should match when all values in the set are outside the
file's bounds
+ UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+ UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+ boolean allMatch =
+ new StrictMetricsEvaluator(SCHEMA, notIn("uuid", belowMin,
aboveMax)).eval(UUID_FILE);
+ // All values in the set are outside [UUID_MIN, UUID_MAX], so all rows
match NOT IN
+ assertThat(allMatch).as("Strict NOT IN should match when all values
outside bounds").isTrue();
+ }
+
+ @Test
+ public void testStrictUuidNotInDoesNotMatchWhenValueInBounds() {
+ // Strict NOT IN should not match when a value in the set is within bounds
+ UUID middle = UUID.fromString("40000000-0000-0000-0000-000000000001");
+ boolean allMatch = new StrictMetricsEvaluator(SCHEMA, notIn("uuid",
middle)).eval(UUID_FILE);
+ // middle is within [UUID_MIN, UUID_MAX], so some rows might match the
value
+ assertThat(allMatch).as("Strict NOT IN should not match when value in
bounds").isFalse();
+ }
+
+ // Tests for file with inverted UUID bounds (as would be written by legacy
signed comparator)
Review Comment:
I think inverted bounds is probably more accurately. I am clarifying what
inverted bounds mean at this place.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]