This is an automated email from the ASF dual-hosted git repository. stigahuang pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/impala.git
commit 514ecef3cb8677c4cb8d8a0976d0ca0a26ec5643 Author: Riza Suminto <[email protected]> AuthorDate: Thu Mar 13 09:58:37 2025 -0700 IMPALA-13860: Fix DCHECK hit in cluster-membership-mgr.cc Enabling graceful shutdown in test_coord_only_pool_exec_groups reveals a DCHECK hit caused by two back-two-back call to RemoveExecutorAndGroup during graceful shutdown. This patch fix it by turning the DHCECK into if and VLOG(1). Testing: - Pass test_coord_only_pool_exec_groups with graceful shutdown after the change without any FATAL log. Change-Id: If678ae472bade50c18842df9e98c536fb9f1fe9c Reviewed-on: http://gerrit.cloudera.org:8080/22620 Reviewed-by: Impala Public Jenkins <[email protected]> Tested-by: Impala Public Jenkins <[email protected]> --- be/src/scheduling/cluster-membership-mgr.cc | 5 ++++- tests/custom_cluster/test_admission_controller.py | 10 ++++++++-- 2 files changed, 12 insertions(+), 3 deletions(-) diff --git a/be/src/scheduling/cluster-membership-mgr.cc b/be/src/scheduling/cluster-membership-mgr.cc index ce3c211db..4815ec08e 100644 --- a/be/src/scheduling/cluster-membership-mgr.cc +++ b/be/src/scheduling/cluster-membership-mgr.cc @@ -58,7 +58,10 @@ void RemoveExecutorAndGroup(const BackendDescriptorPB& be_desc, const ExecutorGroupDescPB& group, ClusterMembershipMgr::ExecutorGroups* executor_groups) { auto it = executor_groups->find(group.name()); - DCHECK(it != executor_groups->end()); + if (it == executor_groups->end()) { + VLOG(1) << "Group \"" << group.name() << "\" is not found"; + return; + } DCHECK_EQ(group.name(), it->second.name()); it->second.RemoveExecutor(be_desc); if (it->second.NumExecutors() == 0) { diff --git a/tests/custom_cluster/test_admission_controller.py b/tests/custom_cluster/test_admission_controller.py index 51386151e..c17e75676 100644 --- a/tests/custom_cluster/test_admission_controller.py +++ b/tests/custom_cluster/test_admission_controller.py @@ -2035,11 +2035,13 @@ class TestAdmissionController(TestAdmissionControllerBase): additional_args="--expected_executor_group_sets=root.group-set-small:1," "root.group-set-large:2 " "--num_expected_executors=2 --executor_groups=coordinator"), + impalad_graceful_shutdown=True, statestored_args=_STATESTORED_ARGS) def test_coord_only_pool_exec_groups(self, vector): """Asserts queries using only coordinators request pools can run successfully when executor groups are configured.""" self.wait_for_wm_init_complete() + executor_flags = '--shutdown_grace_period_s=0 --shutdown_deadline_s=60 ' # Assert queries can be run when no executors are started. self.__run_assert_systables_query(vector) @@ -2053,7 +2055,8 @@ class TestAdmissionController(TestAdmissionControllerBase): expected_num_impalads += 1 self._start_impala_cluster( options=[ - "--impalad_args=--executor_groups=root.group-set-small-group-000:1"], + "--impalad_args=--executor_groups=root.group-set-small-group-000:1 " + + executor_flags], add_executors=True, cluster_size=1, expected_subscribers=expected_subscribers, @@ -2065,12 +2068,15 @@ class TestAdmissionController(TestAdmissionControllerBase): expected_num_impalads += 2 self._start_impala_cluster( options=[ - "--impalad_args=--executor_groups=root.group-set-small-group-000:2"], + "--impalad_args=--executor_groups=root.group-set-small-group-000:2 " + + executor_flags], add_executors=True, cluster_size=2, expected_subscribers=expected_subscribers, expected_num_impalads=expected_num_impalads) self.__run_assert_systables_query(vector) + # Refresh cluster to include those two new impalad for graceful shutdown. + self.cluster.refresh() class TestAdmissionControllerWithACService(TestAdmissionController):
