This is an automated email from the ASF dual-hosted git repository.
Apache9 pushed a commit to branch branch-3
in repository https://gitbox.apache.org/repos/asf/hbase.git
The following commit(s) were added to refs/heads/branch-3 by this push:
new df9843f229f HBASE-28419 Allow Action and Policies of
ServerKillingMonkey to be configurable. (#5743)
df9843f229f is described below
commit df9843f229f3842d967ad71b49367564bc2651d8
Author: Wei-Chiu Chuang <[email protected]>
AuthorDate: Wed Mar 13 10:36:54 2024 -0700
HBASE-28419 Allow Action and Policies of ServerKillingMonkey to be
configurable. (#5743)
Signed-off-by: Nick Dimiduk <[email protected]>
(cherry picked from commit beafd332618ec81febaf1fcfb8bb8c216de61164)
---
.../hbase/chaos/factories/MonkeyConstants.java | 13 ++++++
.../ServerAndDependenciesKillingMonkeyFactory.java | 52 +++++++++++++++++-----
.../factories/ServerKillingMonkeyFactory.java | 28 +++++++++---
3 files changed, 77 insertions(+), 16 deletions(-)
diff --git
a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java
index f906eb863da..3fda0844729 100644
---
a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java
+++
b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/MonkeyConstants.java
@@ -20,6 +20,7 @@ package org.apache.hadoop.hbase.chaos.factories;
import java.util.Arrays;
import java.util.HashSet;
import java.util.Set;
+import java.util.concurrent.TimeUnit;
public interface MonkeyConstants {
@@ -45,6 +46,11 @@ public interface MonkeyConstants {
String UNBALANCE_WAIT_AFTER_BALANCE_MS =
"unbalance.action.wait.after.period";
String UNBALANCE_KILL_META_RS = "unbalance.action.kill.meta.rs";
String DECREASE_HFILE_SIZE_SLEEP_TIME = "decrease.hfile.size.sleep.time";
+ String RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME =
"restart.random.rs.exception.sleep.time";
+ String RESTART_ACTIVE_NAMENODE_SLEEP_TIME =
"restart.active.namenode.sleep.time";
+ String RESTART_RANDOM_DATANODE_SLEEP_TIME =
"restart.random.datanode.sleep.time";
+ String RESTART_RANDOM_JOURNALNODE_SLEEP_TIME =
"restart.random.journalnode.sleep.time";
+ String RESTART_RANDOM_ZKNODE_SLEEP_TIME = "restart.random.zknode.sleep.time";
String GRACEFUL_RESTART_RS_SLEEP_TIME = "graceful.restart.rs.sleep.time";
String ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME =
"rolling.batch.suspend.rs.sleep.time";
String ROLLING_BATCH_SUSPEND_RS_RATIO = "rolling.batch.suspend.rs.ratio";
@@ -93,6 +99,13 @@ public interface MonkeyConstants {
long DEFAULT_UNBALANCE_WAIT_AFTER_BALANCE_MS = 5 * 1000;
boolean DEFAULT_UNBALANCE_KILL_META_RS = true;
long DEFAULT_DECREASE_HFILE_SIZE_SLEEP_TIME = 30 * 1000;
+
+ long DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME =
TimeUnit.MILLISECONDS.toMillis(60000);
+ long DEFAULT_RESTART_ACTIVE_NAMENODE_SLEEP_TIME =
TimeUnit.MILLISECONDS.toMillis(60000);
+ long DEFAULT_RESTART_RANDOM_DATANODE_SLEEP_TIME =
TimeUnit.MILLISECONDS.toMillis(60000);
+ long DEFAULT_RESTART_RANDOM_JOURNALNODE_SLEEP_TIME =
TimeUnit.MILLISECONDS.toMillis(60000);
+ long DEFAULT_RESTART_RANDOM_ZKNODE_SLEEP_TIME =
TimeUnit.MILLISECONDS.toMillis(60000);
+
long DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME = 5000;
long DEFAULT_ROLLING_BATCH_SUSPEND_RS_SLEEP_TIME = 30 * 1000;
float DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO = 1.0f;
diff --git
a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java
index 8b3d10c4647..28dce481314 100644
---
a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java
+++
b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerAndDependenciesKillingMonkeyFactory.java
@@ -42,9 +42,17 @@ import
org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy;
*/
public class ServerAndDependenciesKillingMonkeyFactory extends MonkeyFactory {
+ private long restartRandomRsExceptMetaSleepTime;
+ private long restartActiveMasterSleepTime;
+ private long rollingBatchRestartRSSleepTime;
+ private long restartActiveNameNodeSleepTime;
+ private long restartRandomDataNodeSleepTime;
+ private long restartRandomJournalNodeSleepTime;
+ private long restartRandomZKNodeSleepTime;
private long gracefulRollingRestartTSSLeepTime;
private long rollingBatchSuspendRSSleepTime;
private float rollingBatchSuspendtRSRatio;
+ private long action1Period;
@Override
public ChaosMonkey build() {
@@ -53,15 +61,15 @@ public class ServerAndDependenciesKillingMonkeyFactory
extends MonkeyFactory {
// Destructive actions to mess things around. Cannot run batch restart.
// @formatter:off
Action[] actions1 = new Action[] {
- new RestartRandomRsExceptMetaAction(60000),
- new RestartActiveMasterAction(5000),
+ new RestartRandomRsExceptMetaAction(restartRandomRsExceptMetaSleepTime),
+ new RestartActiveMasterAction(restartActiveMasterSleepTime),
// only allow 2 servers to be dead.
- new RollingBatchRestartRsAction(5000, 1.0f, 2, true),
+ new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, 1.0f, 2,
true),
new ForceBalancerAction(),
- new RestartActiveNameNodeAction(60000),
- new RestartRandomDataNodeAction(60000),
- new RestartRandomJournalNodeAction(60000),
- new RestartRandomZKNodeAction(60000),
+ new RestartActiveNameNodeAction(restartActiveNameNodeSleepTime),
+ new RestartRandomDataNodeAction(restartRandomDataNodeSleepTime),
+ new RestartRandomJournalNodeAction(restartRandomJournalNodeSleepTime),
+ new RestartRandomZKNodeAction(restartRandomZKNodeSleepTime),
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
rollingBatchSuspendtRSRatio)
@@ -73,12 +81,33 @@ public class ServerAndDependenciesKillingMonkeyFactory
extends MonkeyFactory {
new Action[] { new DumpClusterStatusAction(), new
DumpHdfsClusterStatusAction() };
return new PolicyBasedChaosMonkey(properties, util,
- new CompositeSequentialPolicy(new DoActionsOncePolicy(60 * 1000,
actions1),
- new PeriodicRandomActionPolicy(60 * 1000, actions1)),
- new PeriodicRandomActionPolicy(60 * 1000, actions2));
+ new CompositeSequentialPolicy(new DoActionsOncePolicy(action1Period,
actions1),
+ new PeriodicRandomActionPolicy(action1Period, actions1)),
+ new PeriodicRandomActionPolicy(action1Period, actions2));
}
private void loadProperties() {
+ restartRandomRsExceptMetaSleepTime = Long
+
.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME + ""));
+ restartActiveMasterSleepTime =
+
Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + ""));
+ rollingBatchRestartRSSleepTime = Long
+
.parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + ""));
+ restartActiveNameNodeSleepTime =
+
Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_NAMENODE_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_ACTIVE_NAMENODE_SLEEP_TIME + ""));
+ restartRandomDataNodeSleepTime =
+
Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_DATANODE_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_RANDOM_DATANODE_SLEEP_TIME + ""));
+ restartRandomJournalNodeSleepTime = Long
+
.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_JOURNALNODE_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_RANDOM_JOURNALNODE_SLEEP_TIME + ""));
+ restartRandomZKNodeSleepTime =
+
Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_ZKNODE_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_RANDOM_ZKNODE_SLEEP_TIME + ""));
gracefulRollingRestartTSSLeepTime =
Long.parseLong(this.properties.getProperty(MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME,
MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + ""));
@@ -88,5 +117,8 @@ public class ServerAndDependenciesKillingMonkeyFactory
extends MonkeyFactory {
rollingBatchSuspendtRSRatio =
Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO,
MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + ""));
+ action1Period =
+
Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION1_PERIOD,
+ MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + ""));
}
}
diff --git
a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java
b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java
index 9d49a1f9293..7b58d217040 100644
---
a/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java
+++
b/hbase-it/src/test/java/org/apache/hadoop/hbase/chaos/factories/ServerKillingMonkeyFactory.java
@@ -37,9 +37,13 @@ import
org.apache.hadoop.hbase.chaos.policies.PeriodicRandomActionPolicy;
*/
public class ServerKillingMonkeyFactory extends MonkeyFactory {
+ private long restartRandomRsExceptMetaSleepTime;
+ private long restartActiveMasterSleepTime;
+ private long rollingBatchRestartRSSleepTime;
private long gracefulRollingRestartTSSLeepTime;
private long rollingBatchSuspendRSSleepTime;
private float rollingBatchSuspendtRSRatio;
+ private long action1Period;
@Override
public ChaosMonkey build() {
@@ -48,10 +52,10 @@ public class ServerKillingMonkeyFactory extends
MonkeyFactory {
// Destructive actions to mess things around. Cannot run batch restart
// @formatter:off
Action[] actions1 = new Action[] {
- new RestartRandomRsExceptMetaAction(60000),
- new RestartActiveMasterAction(5000),
+ new RestartRandomRsExceptMetaAction(restartRandomRsExceptMetaSleepTime),
+ new RestartActiveMasterAction(restartActiveMasterSleepTime),
// only allow 2 servers to be dead
- new RollingBatchRestartRsAction(5000, 1.0f, 2, true),
+ new RollingBatchRestartRsAction(rollingBatchRestartRSSleepTime, 1.0f, 2,
true),
new ForceBalancerAction(),
new GracefulRollingRestartRsAction(gracefulRollingRestartTSSLeepTime),
new RollingBatchSuspendResumeRsAction(rollingBatchSuspendRSSleepTime,
@@ -63,12 +67,21 @@ public class ServerKillingMonkeyFactory extends
MonkeyFactory {
Action[] actions2 = new Action[] { new DumpClusterStatusAction() };
return new PolicyBasedChaosMonkey(properties, util,
- new CompositeSequentialPolicy(new DoActionsOncePolicy(60 * 1000,
actions1),
- new PeriodicRandomActionPolicy(60 * 1000, actions1)),
- new PeriodicRandomActionPolicy(60 * 1000, actions2));
+ new CompositeSequentialPolicy(new DoActionsOncePolicy(action1Period,
actions1),
+ new PeriodicRandomActionPolicy(action1Period, actions1)),
+ new PeriodicRandomActionPolicy(action1Period, actions2));
}
private void loadProperties() {
+ restartRandomRsExceptMetaSleepTime = Long
+
.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_RANDOM_RS_EXCEPTION_SLEEP_TIME + ""));
+ restartActiveMasterSleepTime =
+
Long.parseLong(this.properties.getProperty(MonkeyConstants.RESTART_ACTIVE_MASTER_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_RESTART_ACTIVE_MASTER_SLEEP_TIME + ""));
+ rollingBatchRestartRSSleepTime = Long
+
.parseLong(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_RESTART_RS_SLEEP_TIME,
+ MonkeyConstants.DEFAULT_ROLLING_BATCH_RESTART_RS_SLEEP_TIME + ""));
gracefulRollingRestartTSSLeepTime =
Long.parseLong(this.properties.getProperty(MonkeyConstants.GRACEFUL_RESTART_RS_SLEEP_TIME,
MonkeyConstants.DEFAULT_GRACEFUL_RESTART_RS_SLEEP_TIME + ""));
@@ -78,5 +91,8 @@ public class ServerKillingMonkeyFactory extends MonkeyFactory
{
rollingBatchSuspendtRSRatio =
Float.parseFloat(this.properties.getProperty(MonkeyConstants.ROLLING_BATCH_SUSPEND_RS_RATIO,
MonkeyConstants.DEFAULT_ROLLING_BATCH_SUSPEND_RS_RATIO + ""));
+ action1Period =
+
Long.parseLong(this.properties.getProperty(MonkeyConstants.PERIODIC_ACTION1_PERIOD,
+ MonkeyConstants.DEFAULT_PERIODIC_ACTION1_PERIOD + ""));
}
}