Michael Blow has uploaded a new change for review.
https://asterix-gerrit.ics.uci.edu/2426
Change subject: [NO ISSUE] Set MaxGCPauseMillis to not exceed 1/2 of dead node
detection threshold
......................................................................
[NO ISSUE] Set MaxGCPauseMillis to not exceed 1/2 of dead node detection
threshold
Help prevent nodes under heavy gc from missing too many heartbeats
Change-Id: I7e51db5ccfbb4771ba1f6e0264abfd69f833e7e7
---
M
hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
1 file changed, 30 insertions(+), 7 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/26/2426/1
diff --git
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
index aa7a4fe..38175f0 100644
---
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
+++
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
@@ -25,8 +25,10 @@
import java.io.StringWriter;
import java.net.Socket;
+import org.apache.hyracks.api.config.IApplicationConfig;
import org.apache.hyracks.api.config.Section;
import org.apache.hyracks.control.cc.ClusterControllerService;
+import org.apache.hyracks.control.common.config.ConfigManager;
import org.apache.hyracks.control.common.controllers.NCConfig;
import
org.apache.hyracks.control.common.controllers.ServiceConstants.ServiceCommand;
import org.apache.hyracks.control.common.work.AbstractWork;
@@ -34,6 +36,7 @@
import org.apache.logging.log4j.LogManager;
import org.apache.logging.log4j.Logger;
import org.ini4j.Ini;
+import org.ini4j.Profile;
/**
* A work which is run at CC startup for each NC specified in the
configuration file.
@@ -64,7 +67,7 @@
ObjectOutputStream oos = new
ObjectOutputStream(s.getOutputStream());
oos.writeUTF(NC_SERVICE_MAGIC_COOKIE);
oos.writeUTF(ServiceCommand.START_NC.name());
-
oos.writeUTF(TriggerNCWork.this.serializeIni(ccs.getCCConfig().getIni()));
+ oos.writeUTF(TriggerNCWork.this.serializeIni());
oos.close();
return;
// QQQ Should probably have an ACK here
@@ -83,14 +86,30 @@
/**
* Given an Ini object, serialize it to String with some enhancements.
- * @param ccini the ini file to decorate and forward to NC
*/
- private String serializeIni(Ini ccini) throws IOException {
+ private String serializeIni() throws IOException {
StringWriter iniString = new StringWriter();
-
ccini.get(Section.NC.sectionName()).putIfAbsent(NCConfig.Option.CLUSTER_ADDRESS.ini(),
- ccs.getCCConfig().getClusterPublicAddress());
-
ccini.get(Section.NC.sectionName()).putIfAbsent(NCConfig.Option.CLUSTER_PORT.ini(),
- String.valueOf(ccs.getCCConfig().getClusterPublicPort()));
+ ConfigManager configManager = ccs.getCCConfig().getConfigManager();
+ Ini ccini = configManager.toIni(false);
+ IApplicationConfig ncConfig =
configManager.getNodeEffectiveConfig(ncId);
+ String sectionName = Section.NC.sectionName() + "/" + ncId;
+ Profile.Section ncSection = ccini.get(sectionName);
+ if (ncSection == null) {
+ ncSection = ccini.add(sectionName);
+ }
+ if (ncConfig.getString(NCConfig.Option.CLUSTER_ADDRESS) == null) {
+ ncSection.put(NCConfig.Option.CLUSTER_ADDRESS.ini(),
ccs.getCCConfig().getClusterPublicAddress());
+ ncSection.put(NCConfig.Option.CLUSTER_PORT.ini(),
String.valueOf(ccs.getCCConfig().getClusterPublicPort()));
+ }
+
+ // if not already configured, set GC max pause time millis to not
exceed 1/2 the total max heartbeat miss period...
+ String ncJvmArgs = ncConfig.getString(NCConfig.Option.JVM_ARGS);
+ if (ncJvmArgs == null || !ncJvmArgs.contains("-XX:MaxGCPauseMillis")) {
+ String gcMaxPauseArg = "-XX:MaxGCPauseMillis=" +
getGcMaxPauseMillis();
+ ncSection.put(NCConfig.Option.JVM_ARGS.ini(),
+ ncJvmArgs == null ? gcMaxPauseArg : ncJvmArgs + " " +
gcMaxPauseArg);
+ }
+
// Finally insert *this* NC's name into localnc section - this is a
fixed
// entry point so that NCs can determine where all their config is.
ccini.put(Section.LOCALNC.sectionName(),
NCConfig.Option.NODE_ID.ini(), ncId);
@@ -100,4 +119,8 @@
}
return iniString.toString();
}
+
+ private long getGcMaxPauseMillis() {
+ return ccs.getCCConfig().getHeartbeatPeriodMillis() *
ccs.getCCConfig().getHeartbeatMaxMisses() / 2;
+ }
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/2426
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I7e51db5ccfbb4771ba1f6e0264abfd69f833e7e7
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Michael Blow <[email protected]>