This is an automated email from the ASF dual-hosted git repository. boroknagyz pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 26e3529c95dd63e50e9f59a9871084dccb28d868 Author: Csaba Ringhofer <[email protected]> AuthorDate: Tue Feb 10 20:58:25 2026 +0100 IMPALA-14734: Optimize sorting file descriptors during planning IcebergScanNode sorts the file descriptors by path (IMPALA-12765). This can dominate planning time if there are many files. This change makes this faster by avoiding extracting Java Strings from flatbuffer, which involves utf8 decoding. Also changes a few similar functions to avoid duplicate decoding. For a table with ~1 million files: explain select * from bigice limit 1; before: ~12s after: ~6.5s Change-Id: Icb914eb4de7bdadeb876f7dd101e8737b9527b6f Reviewed-on: http://gerrit.cloudera.org:8080/23958 Reviewed-by: Csaba Ringhofer <[email protected]> Tested-by: Csaba Ringhofer <[email protected]> --- .../org/apache/impala/catalog/FileDescriptor.java | 28 ++++++++++++++++------ .../org/apache/impala/planner/IcebergScanNode.java | 3 ++- 2 files changed, 23 insertions(+), 8 deletions(-) diff --git a/fe/src/main/java/org/apache/impala/catalog/FileDescriptor.java b/fe/src/main/java/org/apache/impala/catalog/FileDescriptor.java index 16b3009e7..3bba32a82 100644 --- a/fe/src/main/java/org/apache/impala/catalog/FileDescriptor.java +++ b/fe/src/main/java/org/apache/impala/catalog/FileDescriptor.java @@ -215,9 +215,8 @@ public class FileDescriptor implements Comparable<FileDescriptor> { } public String getAbsolutePath() { - return StringUtils.isEmpty(fbFileDescriptor_.absolutePath()) ? - StringUtils.EMPTY : - fbFileDescriptor_.absolutePath(); + String path = fbFileDescriptor_.absolutePath(); + return StringUtils.isEmpty(path) ? StringUtils.EMPTY : path; } public String getAbsolutePath(String rootPath) { @@ -230,14 +229,21 @@ public class FileDescriptor implements Comparable<FileDescriptor> { } public String getPath() { - if (StringUtils.isEmpty(fbFileDescriptor_.relativePath()) - && StringUtils.isNotEmpty(fbFileDescriptor_.absolutePath())) { - return fbFileDescriptor_.absolutePath(); + String relativePath = fbFileDescriptor_.relativePath(); + if (StringUtils.isEmpty(relativePath)) { + String absolutePath = fbFileDescriptor_.absolutePath(); + return StringUtils.isEmpty(absolutePath) ? StringUtils.EMPTY : absolutePath; } else { - return fbFileDescriptor_.relativePath(); + return relativePath; } } + public ByteBuffer getPathAsByteBuffer() { + ByteBuffer relativePath = fbFileDescriptor_.relativePathAsByteBuffer(); + if (relativePath != null) return relativePath; + return fbFileDescriptor_.absolutePathAsByteBuffer(); + } + public long getFileLength() { return fbFileDescriptor_.length(); } @@ -305,6 +311,14 @@ public class FileDescriptor implements Comparable<FileDescriptor> { return getPath().compareTo(otherFd.getPath()); } + /** + * Does lexical comparison without UTF8 decoding. Faster alternative to compareTo where + * the exact order is not critical. + */ + public int byteBufferCompareTo(FileDescriptor otherFd) { + return getPathAsByteBuffer().compareTo(otherFd.getPathAsByteBuffer()); + } + /** * Compares the modification time and file size between current FileDescriptor and the * latest FileStatus to determine if the file has changed. Returns true if the file diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java index 58b1a3dbb..ed8e5808c 100644 --- a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java +++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java @@ -113,7 +113,8 @@ public class IcebergScanNode extends HdfsScanNode { // Create a clone of the original file descriptor list to avoid getting // ConcurrentModificationException when sorting. fileDescs_ = new ArrayList<>(fileDescs_); - Collections.sort(fileDescs_); + Collections.sort(fileDescs_, + (IcebergFileDescriptor a, IcebergFileDescriptor b) -> a.byteBufferCompareTo(b)); filesAreSorted_ = true; } nonIdentityConjuncts_ = nonIdentityConjuncts;
