bodduv commented on code in PR #14500:
URL: https://github.com/apache/iceberg/pull/14500#discussion_r2815893783


##########
api/src/test/java/org/apache/iceberg/expressions/TestStrictMetricsEvaluator.java:
##########
@@ -684,4 +733,111 @@ SCHEMA, lessThanOrEqual("struct.nested_col_with_stats", 
INT_MAX_VALUE))
         new StrictMetricsEvaluator(SCHEMA, 
notNull("struct.nested_col_with_stats")).eval(FILE);
     assertThat(shouldRead).as("notNull nested column should not 
match").isFalse();
   }
+
+  // Tests for UUID with StrictMetricsEvaluator using RFC-compliant comparison 
only
+
+  @Test
+  public void testStrictUuidGt() {
+    // Query: uuid > 0x00... (all UUIDs in file should be > this)
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, greaterThan("uuid", 
belowMin)).eval(UUID_FILE);
+    // With bounds [UUID_MIN, UUID_MAX], all values should be > belowMin
+    assertThat(allMatch).as("All UUIDs should be greater than 
belowMin").isTrue();
+  }
+
+  @Test
+  public void testStrictUuidLt() {
+    // Query: uuid < 0xFF... (all UUIDs in file should be < this)
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, lessThan("uuid", 
aboveMax)).eval(UUID_FILE);
+    // With bounds [UUID_MIN, UUID_MAX], all values should be < aboveMax
+    assertThat(allMatch).as("All UUIDs should be less than aboveMax").isTrue();
+  }
+
+  @Test
+  public void testStrictUuidEqNeverMatchesRange() {
+    // Strict eq should never match when there's a range of values
+    UUID middle = UUID.fromString("40000000-0000-0000-0000-000000000001");
+    boolean allMatch = new StrictMetricsEvaluator(SCHEMA, equal("uuid", 
middle)).eval(UUID_FILE);
+    assertThat(allMatch).as("Strict eq should not match range").isFalse();
+  }
+
+  @Test
+  public void testStrictUuidInNeverMatchesRange() {
+    // Strict IN should never match when there's a range of values (lower != 
upper)
+    UUID middle1 = UUID.fromString("40000000-0000-0000-0000-000000000001");
+    UUID middle2 = UUID.fromString("50000000-0000-0000-0000-000000000001");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, in("uuid", middle1, 
middle2)).eval(UUID_FILE);
+    assertThat(allMatch).as("Strict IN should not match range").isFalse();
+  }
+
+  @Test
+  public void testStrictUuidInMatchesSingleValue() {
+    // Strict IN should match when lower == upper and the value is in the set
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, in("uuid", 
SINGLE_UUID)).eval(SINGLE_UUID_FILE);
+    assertThat(allMatch).as("Strict IN should match single value in 
set").isTrue();
+  }
+
+  @Test
+  public void testStrictUuidInDoesNotMatchWhenValueNotInSet() {
+    // Strict IN should not match when lower == upper but the value is not in 
the set
+    UUID otherUuid = UUID.fromString("50000000-0000-0000-0000-000000000001");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, in("uuid", 
otherUuid)).eval(SINGLE_UUID_FILE);
+    assertThat(allMatch).as("Strict IN should not match when value not in 
set").isFalse();
+  }
+
+  @Test
+  public void testStrictUuidNotInMatchesWhenAllValuesOutsideBounds() {
+    // Strict NOT IN should match when all values in the set are outside the 
file's bounds
+    UUID belowMin = UUID.fromString("00000000-0000-0000-0000-000000000000");
+    UUID aboveMax = UUID.fromString("ffffffff-ffff-ffff-ffff-ffffffffffff");
+    boolean allMatch =
+        new StrictMetricsEvaluator(SCHEMA, notIn("uuid", belowMin, 
aboveMax)).eval(UUID_FILE);
+    // All values in the set are outside [UUID_MIN, UUID_MAX], so all rows 
match NOT IN
+    assertThat(allMatch).as("Strict NOT IN should match when all values 
outside bounds").isTrue();
+  }
+
+  @Test
+  public void testStrictUuidNotInDoesNotMatchWhenValueInBounds() {
+    // Strict NOT IN should not match when a value in the set is within bounds
+    UUID middle = UUID.fromString("40000000-0000-0000-0000-000000000001");
+    boolean allMatch = new StrictMetricsEvaluator(SCHEMA, notIn("uuid", 
middle)).eval(UUID_FILE);
+    // middle is within [UUID_MIN, UUID_MAX], so some rows might match the 
value
+    assertThat(allMatch).as("Strict NOT IN should not match when value in 
bounds").isFalse();
+  }
+
+  // Tests for file with inverted UUID bounds (as would be written by legacy 
signed comparator)

Review Comment:
   I think inverted bounds is probably more accurately. I am clarifying what 
inverted bounds mean at this place.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to