This is an automated email from the ASF dual-hosted git repository.

boroknagyz pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 26e3529c95dd63e50e9f59a9871084dccb28d868
Author: Csaba Ringhofer <[email protected]>
AuthorDate: Tue Feb 10 20:58:25 2026 +0100

    IMPALA-14734: Optimize sorting file descriptors during planning
    
    IcebergScanNode sorts the file descriptors by path (IMPALA-12765).
    This can dominate planning time if there are many files.
    
    This change makes this faster by avoiding extracting Java
    Strings from flatbuffer, which involves utf8 decoding. Also
    changes a few similar functions to avoid duplicate decoding.
    
    For a table with ~1 million files:
    explain select * from bigice limit 1;
    before: ~12s
    after: ~6.5s
    
    Change-Id: Icb914eb4de7bdadeb876f7dd101e8737b9527b6f
    Reviewed-on: http://gerrit.cloudera.org:8080/23958
    Reviewed-by: Csaba Ringhofer <[email protected]>
    Tested-by: Csaba Ringhofer <[email protected]>
---
 .../org/apache/impala/catalog/FileDescriptor.java  | 28 ++++++++++++++++------
 .../org/apache/impala/planner/IcebergScanNode.java |  3 ++-
 2 files changed, 23 insertions(+), 8 deletions(-)

diff --git a/fe/src/main/java/org/apache/impala/catalog/FileDescriptor.java 
b/fe/src/main/java/org/apache/impala/catalog/FileDescriptor.java
index 16b3009e7..3bba32a82 100644
--- a/fe/src/main/java/org/apache/impala/catalog/FileDescriptor.java
+++ b/fe/src/main/java/org/apache/impala/catalog/FileDescriptor.java
@@ -215,9 +215,8 @@ public class FileDescriptor implements 
Comparable<FileDescriptor> {
   }
 
   public String getAbsolutePath() {
-    return StringUtils.isEmpty(fbFileDescriptor_.absolutePath()) ?
-        StringUtils.EMPTY :
-        fbFileDescriptor_.absolutePath();
+    String path = fbFileDescriptor_.absolutePath();
+    return StringUtils.isEmpty(path) ? StringUtils.EMPTY : path;
   }
 
   public String getAbsolutePath(String rootPath) {
@@ -230,14 +229,21 @@ public class FileDescriptor implements 
Comparable<FileDescriptor> {
   }
 
   public String getPath() {
-    if (StringUtils.isEmpty(fbFileDescriptor_.relativePath())
-        && StringUtils.isNotEmpty(fbFileDescriptor_.absolutePath())) {
-      return fbFileDescriptor_.absolutePath();
+    String relativePath = fbFileDescriptor_.relativePath();
+    if (StringUtils.isEmpty(relativePath)) {
+      String absolutePath = fbFileDescriptor_.absolutePath();
+      return StringUtils.isEmpty(absolutePath) ? StringUtils.EMPTY : 
absolutePath;
     } else {
-      return fbFileDescriptor_.relativePath();
+      return relativePath;
     }
   }
 
+  public ByteBuffer getPathAsByteBuffer() {
+    ByteBuffer relativePath = fbFileDescriptor_.relativePathAsByteBuffer();
+    if (relativePath != null) return relativePath;
+    return fbFileDescriptor_.absolutePathAsByteBuffer();
+  }
+
   public long getFileLength() {
     return fbFileDescriptor_.length();
   }
@@ -305,6 +311,14 @@ public class FileDescriptor implements 
Comparable<FileDescriptor> {
     return getPath().compareTo(otherFd.getPath());
   }
 
+  /**
+   * Does lexical comparison without UTF8 decoding. Faster alternative to 
compareTo where
+   * the exact order is not critical.
+   */
+  public int byteBufferCompareTo(FileDescriptor otherFd) {
+    return getPathAsByteBuffer().compareTo(otherFd.getPathAsByteBuffer());
+  }
+
   /**
    * Compares the modification time and file size between current 
FileDescriptor and the
    * latest FileStatus to determine if the file has changed. Returns true if 
the file
diff --git a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java 
b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
index 58b1a3dbb..ed8e5808c 100644
--- a/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
+++ b/fe/src/main/java/org/apache/impala/planner/IcebergScanNode.java
@@ -113,7 +113,8 @@ public class IcebergScanNode extends HdfsScanNode {
       // Create a clone of the original file descriptor list to avoid getting
       // ConcurrentModificationException when sorting.
       fileDescs_ = new ArrayList<>(fileDescs_);
-      Collections.sort(fileDescs_);
+      Collections.sort(fileDescs_,
+          (IcebergFileDescriptor a, IcebergFileDescriptor b) -> 
a.byteBufferCompareTo(b));
       filesAreSorted_ = true;
     }
     nonIdentityConjuncts_ = nonIdentityConjuncts;

Reply via email to