This is an automated email from the ASF dual-hosted git repository.
findepi pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new 122c4408b1 API: Reduce 'Scanning table' log verbosity for long list of
strings (#14757)
122c4408b1 is described below
commit 122c4408b101bcf846440348040caef016e16626
Author: Raunaq Morarka <[email protected]>
AuthorDate: Wed Dec 10 17:56:27 2025 +0530
API: Reduce 'Scanning table' log verbosity for long list of strings (#14757)
---
.../org/apache/iceberg/expressions/ExpressionUtil.java | 17 ++++++++---------
.../apache/iceberg/expressions/TestExpressionUtil.java | 10 ++++++++++
2 files changed, 18 insertions(+), 9 deletions(-)
diff --git
a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java
b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java
index d3dc00d914..9bb2b71343 100644
--- a/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java
+++ b/api/src/main/java/org/apache/iceberg/expressions/ExpressionUtil.java
@@ -24,7 +24,6 @@ import java.time.ZoneOffset;
import java.time.temporal.ChronoUnit;
import java.util.List;
import java.util.Locale;
-import java.util.Set;
import java.util.concurrent.TimeUnit;
import java.util.function.Function;
import java.util.regex.Pattern;
@@ -69,7 +68,6 @@ public class ExpressionUtil {
"\\d{4}-\\d{2}-\\d{2}T\\d{2}:\\d{2}(:\\d{2}(.\\d{7,9})?)?([-+]\\d{2}:\\d{2}|Z)");
static final int LONG_IN_PREDICATE_ABBREVIATION_THRESHOLD = 10;
- private static final int LONG_IN_PREDICATE_ABBREVIATION_MIN_GAIN = 5;
private ExpressionUtil() {}
@@ -502,19 +500,20 @@ public class ExpressionUtil {
private static List<String> abbreviateValues(List<String> sanitizedValues) {
if (sanitizedValues.size() >= LONG_IN_PREDICATE_ABBREVIATION_THRESHOLD) {
- Set<String> distinctValues = ImmutableSet.copyOf(sanitizedValues);
- if (distinctValues.size()
- <= sanitizedValues.size() - LONG_IN_PREDICATE_ABBREVIATION_MIN_GAIN)
{
- List<String> abbreviatedList =
Lists.newArrayListWithCapacity(distinctValues.size() + 1);
- abbreviatedList.addAll(distinctValues);
+ List<String> distinctValues =
ImmutableSet.copyOf(sanitizedValues).asList();
+ int abbreviatedSize =
+ Math.min(distinctValues.size(),
LONG_IN_PREDICATE_ABBREVIATION_THRESHOLD);
+ List<String> abbreviatedList =
Lists.newArrayListWithCapacity(abbreviatedSize + 1);
+ abbreviatedList.addAll(distinctValues.subList(0, abbreviatedSize));
+ if (abbreviatedSize < sanitizedValues.size()) {
abbreviatedList.add(
String.format(
Locale.ROOT,
"... (%d values hidden, %d in total)",
- sanitizedValues.size() - distinctValues.size(),
+ sanitizedValues.size() - abbreviatedSize,
sanitizedValues.size()));
- return abbreviatedList;
}
+ return abbreviatedList;
}
return sanitizedValues;
}
diff --git
a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java
b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java
index ca08951b1f..d9fe26eacc 100644
--- a/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java
+++ b/api/src/test/java/org/apache/iceberg/expressions/TestExpressionUtil.java
@@ -115,6 +115,16 @@ public class TestExpressionUtil {
.as("Sanitized string should be abbreviated")
.isEqualTo("test IN ((2-digit-int), (3-digit-int), ... (8 values
hidden, 10 in total))");
+ Object[] tooLongStringsList =
+ IntStream.range(0,
ExpressionUtil.LONG_IN_PREDICATE_ABBREVIATION_THRESHOLD + 5)
+ .mapToObj(i -> "string_" + i)
+ .toArray();
+
+ assertThat(ExpressionUtil.toSanitizedString(Expressions.in("test",
tooLongStringsList)))
+ .as("Sanitized string should be abbreviated")
+ .isEqualTo(
+ "test IN ((hash-14128790), (hash-1056a62b), (hash-22fd6340),
(hash-3f9d20e4), (hash-136200f0), (hash-25fc9033), (hash-681d31e2),
(hash-6c1796d4), (hash-382d143e), (hash-272f4e5b), ... (5 values hidden, 15 in
total))");
+
// The sanitization resulting in an expression tree does not abbreviate
List<String> expectedValues = Lists.newArrayList();
expectedValues.addAll(Collections.nCopies(5, "(2-digit-int)"));