This is an automated email from the ASF dual-hosted git repository.
wzhou pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git
The following commit(s) were added to refs/heads/master by this push:
new 7c53e87aa IMPALA-12833: Enabled
'catalogd_ha_reset_metadata_on_failover' by default
7c53e87aa is described below
commit 7c53e87aa166bb77cd2e31646ff913302912d3fd
Author: wzhou-code <[email protected]>
AuthorDate: Wed Feb 21 20:08:17 2024 -0800
IMPALA-12833: Enabled 'catalogd_ha_reset_metadata_on_failover' by default
The standby catalogd may have stale metadata for some reason, like
event processor could have hung or could be just behind in processing
events. Also the standby catalogd doesn't get invalidate requests from
coordinators so we should probably reset its metadata when it becomes
active to avoid stale metadata.
This patch set the default value of catalog server starting flag
'catalogd_ha_reset_metadata_on_failover' as true so that catalogd
will reset its metadata when it becomes active. Also makes the flag
as hidden option.
Testing:
- Looped to run unit-tests for catalog HA and statestore HA without
failure.
- Passed core tests
Change-Id: Ibc7c529f34b70734a700ac0d9d58b7e5b0215f8d
Reviewed-on: http://gerrit.cloudera.org:8080/21051
Tested-by: Impala Public Jenkins <[email protected]>
Reviewed-by: Michael Smith <[email protected]>
Reviewed-by: Abhishek Rawat <[email protected]>
---
be/src/catalog/catalog-server.cc | 8 ++++++--
tests/custom_cluster/test_catalogd_ha.py | 4 ++++
2 files changed, 10 insertions(+), 2 deletions(-)
diff --git a/be/src/catalog/catalog-server.cc b/be/src/catalog/catalog-server.cc
index 3402959b0..529d9523c 100644
--- a/be/src/catalog/catalog-server.cc
+++ b/be/src/catalog/catalog-server.cc
@@ -152,8 +152,12 @@ DEFINE_bool(enable_skipping_older_events, false, "This
configuration is used to
DEFINE_int32(catalog_operation_log_size, 100, "Number of catalog operation log
records "
"to retain in catalogd. If -1, the operation log has unbounded size.");
-DEFINE_bool(catalogd_ha_reset_metadata_on_failover, false, "If true, reset all
metadata "
- "when the catalogd becomes active.");
+// The standby catalogd may have stale metadata for some reason, like event
processor
+// could have hung or could be just behind in processing events. Also the
standby
+// catalogd doesn't get invalidate requests from coordinators so we should
probably
+// reset its metadata when it becomes active to avoid stale metadata.
+DEFINE_bool_hidden(catalogd_ha_reset_metadata_on_failover, true, "If true,
reset all "
+ "metadata when the catalogd becomes active.");
DEFINE_int32(topic_update_log_gc_frequency, 1000, "Frequency at which the
entries "
"of the catalog topic update log are garbage collected. An entry may
survive "
diff --git a/tests/custom_cluster/test_catalogd_ha.py
b/tests/custom_cluster/test_catalogd_ha.py
index 62575ef66..bf812db5f 100644
--- a/tests/custom_cluster/test_catalogd_ha.py
+++ b/tests/custom_cluster/test_catalogd_ha.py
@@ -184,6 +184,7 @@ class TestCatalogdHA(CustomClusterTestSuite):
@CustomClusterTestSuite.with_args(
statestored_args="--use_subscriber_id_as_catalogd_priority=true "
"--statestore_heartbeat_frequency_ms=1000",
+ catalogd_args="--catalogd_ha_reset_metadata_on_failover=false",
start_args="--enable_catalogd_ha")
def test_catalogd_auto_failover(self):
"""Tests for Catalog Service auto fail over without failed RPCs."""
@@ -201,6 +202,7 @@ class TestCatalogdHA(CustomClusterTestSuite):
statestored_args="--use_subscriber_id_as_catalogd_priority=true "
"--statestore_heartbeat_frequency_ms=1000 "
"--debug_actions=SEND_UPDATE_CATALOGD_RPC_FIRST_ATTEMPT:[email protected]",
+ catalogd_args="--catalogd_ha_reset_metadata_on_failover=false",
start_args="--enable_catalogd_ha")
def test_catalogd_auto_failover_with_failed_rpc(self):
"""Tests for Catalog Service auto fail over with failed RPCs."""
@@ -283,6 +285,7 @@ class TestCatalogdHA(CustomClusterTestSuite):
@CustomClusterTestSuite.with_args(
statestored_args="--use_subscriber_id_as_catalogd_priority=true "
"--statestore_heartbeat_frequency_ms=1000",
+ catalogd_args="--catalogd_ha_reset_metadata_on_failover=false",
start_args="--enable_catalogd_ha")
def test_catalogd_manual_failover(self):
"""Tests for Catalog Service manual fail over without failed RPCs."""
@@ -300,6 +303,7 @@ class TestCatalogdHA(CustomClusterTestSuite):
statestored_args="--use_subscriber_id_as_catalogd_priority=true "
"--statestore_heartbeat_frequency_ms=1000 "
"--debug_actions=SEND_UPDATE_CATALOGD_RPC_FIRST_ATTEMPT:[email protected]",
+ catalogd_args="--catalogd_ha_reset_metadata_on_failover=false",
start_args="--enable_catalogd_ha")
def test_catalogd_manual_failover_with_failed_rpc(self):
"""Tests for Catalog Service manual fail over with failed RPCs."""