This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 514ecef3cb8677c4cb8d8a0976d0ca0a26ec5643
Author: Riza Suminto <[email protected]>
AuthorDate: Thu Mar 13 09:58:37 2025 -0700

    IMPALA-13860: Fix DCHECK hit in cluster-membership-mgr.cc
    
    Enabling graceful shutdown in test_coord_only_pool_exec_groups reveals a
    DCHECK hit caused by two back-two-back call to RemoveExecutorAndGroup
    during graceful shutdown. This patch fix it by turning the DHCECK into
    if and VLOG(1).
    
    Testing:
    - Pass test_coord_only_pool_exec_groups with graceful shutdown after the
      change without any FATAL log.
    
    Change-Id: If678ae472bade50c18842df9e98c536fb9f1fe9c
    Reviewed-on: http://gerrit.cloudera.org:8080/22620
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 be/src/scheduling/cluster-membership-mgr.cc       |  5 ++++-
 tests/custom_cluster/test_admission_controller.py | 10 ++++++++--
 2 files changed, 12 insertions(+), 3 deletions(-)

diff --git a/be/src/scheduling/cluster-membership-mgr.cc 
b/be/src/scheduling/cluster-membership-mgr.cc
index ce3c211db..4815ec08e 100644
--- a/be/src/scheduling/cluster-membership-mgr.cc
+++ b/be/src/scheduling/cluster-membership-mgr.cc
@@ -58,7 +58,10 @@ void RemoveExecutorAndGroup(const BackendDescriptorPB& 
be_desc,
     const ExecutorGroupDescPB& group,
     ClusterMembershipMgr::ExecutorGroups* executor_groups) {
   auto it = executor_groups->find(group.name());
-  DCHECK(it != executor_groups->end());
+  if (it == executor_groups->end()) {
+    VLOG(1) << "Group \"" << group.name() << "\" is not found";
+    return;
+  }
   DCHECK_EQ(group.name(), it->second.name());
   it->second.RemoveExecutor(be_desc);
   if (it->second.NumExecutors() == 0) {
diff --git a/tests/custom_cluster/test_admission_controller.py 
b/tests/custom_cluster/test_admission_controller.py
index 51386151e..c17e75676 100644
--- a/tests/custom_cluster/test_admission_controller.py
+++ b/tests/custom_cluster/test_admission_controller.py
@@ -2035,11 +2035,13 @@ class 
TestAdmissionController(TestAdmissionControllerBase):
         
additional_args="--expected_executor_group_sets=root.group-set-small:1,"
                         "root.group-set-large:2 "
                         "--num_expected_executors=2 
--executor_groups=coordinator"),
+        impalad_graceful_shutdown=True,
         statestored_args=_STATESTORED_ARGS)
   def test_coord_only_pool_exec_groups(self, vector):
     """Asserts queries using only coordinators request pools can run 
successfully when
        executor groups are configured."""
     self.wait_for_wm_init_complete()
+    executor_flags = '--shutdown_grace_period_s=0 --shutdown_deadline_s=60 '
 
     # Assert queries can be run when no executors are started.
     self.__run_assert_systables_query(vector)
@@ -2053,7 +2055,8 @@ class 
TestAdmissionController(TestAdmissionControllerBase):
     expected_num_impalads += 1
     self._start_impala_cluster(
         options=[
-            
"--impalad_args=--executor_groups=root.group-set-small-group-000:1"],
+            "--impalad_args=--executor_groups=root.group-set-small-group-000:1 
"
+            + executor_flags],
         add_executors=True,
         cluster_size=1,
         expected_subscribers=expected_subscribers,
@@ -2065,12 +2068,15 @@ class 
TestAdmissionController(TestAdmissionControllerBase):
     expected_num_impalads += 2
     self._start_impala_cluster(
         options=[
-            
"--impalad_args=--executor_groups=root.group-set-small-group-000:2"],
+            "--impalad_args=--executor_groups=root.group-set-small-group-000:2 
"
+            + executor_flags],
         add_executors=True,
         cluster_size=2,
         expected_subscribers=expected_subscribers,
         expected_num_impalads=expected_num_impalads)
     self.__run_assert_systables_query(vector)
+    # Refresh cluster to include those two new impalad for graceful shutdown.
+    self.cluster.refresh()
 
 
 class TestAdmissionControllerWithACService(TestAdmissionController):

Reply via email to