This is an automated email from the ASF dual-hosted git repository.
yufei pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/iceberg.git
The following commit(s) were added to refs/heads/main by this push:
new b531e97f66 Core: Extract filePath comparator into it's own class
(#10664)
b531e97f66 is described below
commit b531e97f66ef2bf80f3167152e268be0ce25f459
Author: Denys Kuzmenko <[email protected]>
AuthorDate: Mon Aug 5 22:43:34 2024 +0200
Core: Extract filePath comparator into it's own class (#10664)
---
.../java/org/apache/iceberg/types/Comparators.java | 41 ++++++++++++++++++++++
.../java/org/apache/iceberg/deletes/Deletes.java | 31 +++-------------
2 files changed, 45 insertions(+), 27 deletions(-)
diff --git a/api/src/main/java/org/apache/iceberg/types/Comparators.java
b/api/src/main/java/org/apache/iceberg/types/Comparators.java
index d09d9f5395..a803afac10 100644
--- a/api/src/main/java/org/apache/iceberg/types/Comparators.java
+++ b/api/src/main/java/org/apache/iceberg/types/Comparators.java
@@ -173,6 +173,10 @@ public class Comparators {
return CharSeqComparator.INSTANCE;
}
+ public static Comparator<CharSequence> filePath() {
+ return FilePathComparator.INSTANCE;
+ }
+
private static class NullsFirst<T> implements Comparator<T> {
private static final NullsFirst<?> INSTANCE = new NullsFirst<>();
@@ -351,4 +355,41 @@ public class Comparators {
return Integer.compare(s1.length(), s2.length());
}
}
+
+ private static class FilePathComparator implements Comparator<CharSequence> {
+ private static final FilePathComparator INSTANCE = new
FilePathComparator();
+
+ private FilePathComparator() {}
+
+ @Override
+ public int compare(CharSequence s1, CharSequence s2) {
+ if (s1 == s2) {
+ return 0;
+ }
+ int count = s1.length();
+
+ int cmp = Integer.compare(count, s2.length());
+ if (cmp != 0) {
+ return cmp;
+ }
+
+ if (s1 instanceof String && s2 instanceof String) {
+ cmp = Integer.compare(s1.hashCode(), s2.hashCode());
+ if (cmp != 0) {
+ return cmp;
+ }
+ }
+ // File paths inside a delete file normally have more identical chars at
the beginning. For
+ // example, a typical
+ // path is like
"s3:/bucket/db/table/data/partition/00000-0-[uuid]-00001.parquet".
+ // The uuid is where the difference starts. So it's faster to find the
first diff backward.
+ for (int i = count - 1; i >= 0; i--) {
+ cmp = Character.compare(s1.charAt(i), s2.charAt(i));
+ if (cmp != 0) {
+ return cmp;
+ }
+ }
+ return 0;
+ }
+ }
}
diff --git a/core/src/main/java/org/apache/iceberg/deletes/Deletes.java
b/core/src/main/java/org/apache/iceberg/deletes/Deletes.java
index ff20ba53ff..cef57cd167 100644
--- a/core/src/main/java/org/apache/iceberg/deletes/Deletes.java
+++ b/core/src/main/java/org/apache/iceberg/deletes/Deletes.java
@@ -36,6 +36,7 @@ import org.apache.iceberg.io.FilterIterator;
import org.apache.iceberg.relocated.com.google.common.collect.ImmutableList;
import org.apache.iceberg.relocated.com.google.common.collect.Iterables;
import org.apache.iceberg.relocated.com.google.common.collect.Lists;
+import org.apache.iceberg.types.Comparators;
import org.apache.iceberg.types.Types;
import org.apache.iceberg.util.CharSequenceMap;
import org.apache.iceberg.util.Filter;
@@ -398,33 +399,9 @@ public class Deletes {
@Override
protected boolean shouldKeep(T posDelete) {
- return charSeqEquals(dataLocation, (CharSequence)
FILENAME_ACCESSOR.get(posDelete));
- }
-
- private boolean charSeqEquals(CharSequence s1, CharSequence s2) {
- if (s1 == s2) {
- return true;
- }
-
- int count = s1.length();
- if (count != s2.length()) {
- return false;
- }
-
- if (s1 instanceof String && s2 instanceof String && s1.hashCode() !=
s2.hashCode()) {
- return false;
- }
-
- // File paths inside a delete file normally have more identical chars at
the beginning. For
- // example, a typical
- // path is like
"s3:/bucket/db/table/data/partition/00000-0-[uuid]-00001.parquet".
- // The uuid is where the difference starts. So it's faster to find the
first diff backward.
- for (int i = count - 1; i >= 0; i--) {
- if (s1.charAt(i) != s2.charAt(i)) {
- return false;
- }
- }
- return true;
+ return Comparators.filePath()
+ .compare(dataLocation, (CharSequence)
FILENAME_ACCESSOR.get(posDelete))
+ == 0;
}
}
}