This is an automated email from the ASF dual-hosted git repository.

joemcdonnell pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 762fe0a4f IMPALA-14473: Fix absolute path logic for sorting scan 
ranges oldest to newest
762fe0a4f is described below

commit 762fe0a4f5c9089e8a75fd992ab39c85943db562
Author: Joe McDonnell <[email protected]>
AuthorDate: Mon Oct 6 12:57:49 2025 -0700

    IMPALA-14473: Fix absolute path logic for sorting scan ranges oldest to 
newest
    
    When IMPALA-14462 added tie-breaking logic to
    ScanRangeOldestToNewestComparator, it relied on absolute path
    being unset if the relative path is set. However, the code
    always sets absolute path and uses an empty string to indicate
    whether it is set. This caused the tie-breaking logic to see
    two unrelated scan ranges as equal, triggering a DCHECK when
    running query_test/test_tuple_cache_tpc_queries.py.
    
    The fix is to rearrange the logic to check whether the relative
    path is not empty rather than checking whether the absolute
    path is set.
    
    Testing:
     - Ran query_test/test_tuple_cache_tpc_queries.py
     - Ran custom_cluster/test_tuple_cache.py
    
    Change-Id: I449308f4a0efdca7fc238e3dda24985a2931dd37
    Reviewed-on: http://gerrit.cloudera.org:8080/23495
    Reviewed-by: Michael Smith <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Yida Wu <[email protected]>
    Reviewed-by: Joe McDonnell <[email protected]>
---
 be/src/scheduling/scheduler.cc | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/be/src/scheduling/scheduler.cc b/be/src/scheduling/scheduler.cc
index 5033b692f..71d9cd796 100644
--- a/be/src/scheduling/scheduler.cc
+++ b/be/src/scheduling/scheduler.cc
@@ -194,9 +194,9 @@ static bool ScanRangeOldestToNewestComparator(
   // Multiple files (or multiple splits from the same file) can have the same
   // modification time, so we need tie-breaking when they are equal.
   if (split1.mtime != split2.mtime) return split1.mtime < split2.mtime;
-  if (!split1.__isset.absolute_path && !split2.__isset.absolute_path) {
-    // If neither has an absolute path set (the common case), compare the
-    // partition hash and relative path
+  if (!split1.relative_path.empty() && !split2.relative_path.empty()) {
+    // If both have a relative path set (the common case), compare the 
partition hash
+    // and relative path
     if (split1.partition_path_hash != split2.partition_path_hash) {
       return split1.partition_path_hash < split2.partition_path_hash;
     }
@@ -204,10 +204,19 @@ static bool ScanRangeOldestToNewestComparator(
       return split1.relative_path < split2.relative_path;
     }
   } else {
-    // If only one has an absolute path, sort absolute paths ahead of relative 
paths.
-    if (split1.__isset.absolute_path && !split2.__isset.absolute_path) return 
true;
-    if (!split1.__isset.absolute_path && split2.__isset.absolute_path) return 
false;
-    // Both are absolute, so compare them
+    // If only one has a relative path, sort absolute paths ahead of relative 
paths.
+    if (split1.relative_path.empty()) {
+      DCHECK(split1.__isset.absolute_path && !split1.absolute_path.empty());
+      return true;
+    }
+    if (split2.relative_path.empty()) {
+      DCHECK(split2.__isset.absolute_path && !split2.absolute_path.empty());
+      return false;
+    }
+    // Both have empty relative paths, so compare the absolute paths (which 
must be
+    // non-empty)
+    DCHECK(split1.__isset.absolute_path && !split1.absolute_path.empty());
+    DCHECK(split2.__isset.absolute_path && !split2.absolute_path.empty());
     if (split1.absolute_path != split2.absolute_path) {
       return split1.absolute_path < split2.absolute_path;
     }

Reply via email to