Michael Blow has submitted this change and it was merged. Change subject: [NO ISSUE] Set MaxGCPauseMillis to not exceed 1/2 of dead node detection threshold ......................................................................
[NO ISSUE] Set MaxGCPauseMillis to not exceed 1/2 of dead node detection threshold Help prevent nodes under heavy gc from missing too many heartbeats Change-Id: I7e51db5ccfbb4771ba1f6e0264abfd69f833e7e7 Reviewed-on: https://asterix-gerrit.ics.uci.edu/2426 Sonar-Qube: Jenkins <[email protected]> Tested-by: Jenkins <[email protected]> Contrib: Jenkins <[email protected]> Integration-Tests: Jenkins <[email protected]> Reviewed-by: Murtadha Hubail <[email protected]> --- M hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java 1 file changed, 45 insertions(+), 9 deletions(-) Approvals: Anon. E. Moose #1000171: Jenkins: Verified; No violations found; ; Verified Murtadha Hubail: Looks good to me, approved diff --git a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java index aa7a4fe..daed5e4 100644 --- a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java +++ b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java @@ -18,6 +18,8 @@ */ package org.apache.hyracks.control.cc.work; +import static org.apache.hyracks.api.config.Section.LOCALNC; +import static org.apache.hyracks.api.config.Section.NC; import static org.apache.hyracks.control.common.controllers.ServiceConstants.NC_SERVICE_MAGIC_COOKIE; import java.io.IOException; @@ -25,8 +27,9 @@ import java.io.StringWriter; import java.net.Socket; -import org.apache.hyracks.api.config.Section; +import org.apache.hyracks.api.config.IApplicationConfig; import org.apache.hyracks.control.cc.ClusterControllerService; +import org.apache.hyracks.control.common.config.ConfigManager; import org.apache.hyracks.control.common.controllers.NCConfig; import org.apache.hyracks.control.common.controllers.ServiceConstants.ServiceCommand; import org.apache.hyracks.control.common.work.AbstractWork; @@ -34,6 +37,7 @@ import org.apache.logging.log4j.LogManager; import org.apache.logging.log4j.Logger; import org.ini4j.Ini; +import org.ini4j.Profile.Section; /** * A work which is run at CC startup for each NC specified in the configuration file. @@ -42,6 +46,7 @@ public class TriggerNCWork extends AbstractWork { private static final Logger LOGGER = LogManager.getLogger(); + private static final String JVM_ARG_MAX_GCPAUSE_MILLIS = "-XX:MaxGCPauseMillis="; private final ClusterControllerService ccs; private final String ncHost; @@ -64,7 +69,7 @@ ObjectOutputStream oos = new ObjectOutputStream(s.getOutputStream()); oos.writeUTF(NC_SERVICE_MAGIC_COOKIE); oos.writeUTF(ServiceCommand.START_NC.name()); - oos.writeUTF(TriggerNCWork.this.serializeIni(ccs.getCCConfig().getIni())); + oos.writeUTF(TriggerNCWork.this.serializeIni()); oos.close(); return; // QQQ Should probably have an ACK here @@ -83,21 +88,52 @@ /** * Given an Ini object, serialize it to String with some enhancements. - * @param ccini the ini file to decorate and forward to NC */ - private String serializeIni(Ini ccini) throws IOException { + private String serializeIni() throws IOException { StringWriter iniString = new StringWriter(); - ccini.get(Section.NC.sectionName()).putIfAbsent(NCConfig.Option.CLUSTER_ADDRESS.ini(), - ccs.getCCConfig().getClusterPublicAddress()); - ccini.get(Section.NC.sectionName()).putIfAbsent(NCConfig.Option.CLUSTER_PORT.ini(), - String.valueOf(ccs.getCCConfig().getClusterPublicPort())); + ConfigManager configManager = ccs.getCCConfig().getConfigManager(); + Ini ccini = configManager.toIni(false); + IApplicationConfig ncConfig = configManager.getNodeEffectiveConfig(ncId); + Section ncSection = getNcSection(ccini); + configClusterAddress(ncConfig, ncSection); + configMaxGcPause(ncConfig, ncSection); // Finally insert *this* NC's name into localnc section - this is a fixed // entry point so that NCs can determine where all their config is. - ccini.put(Section.LOCALNC.sectionName(), NCConfig.Option.NODE_ID.ini(), ncId); + ccini.put(LOCALNC.sectionName(), NCConfig.Option.NODE_ID.ini(), ncId); ccini.store(iniString); if (LOGGER.isDebugEnabled()) { LOGGER.debug("Returning Ini file:\n" + iniString.toString()); } return iniString.toString(); } + + private Section getNcSection(Ini ccini) { + String sectionName = NC.sectionName() + "/" + ncId; + Section ncSection = ccini.get(sectionName); + if (ncSection == null) { + ncSection = ccini.add(sectionName); + } + return ncSection; + } + + private void configClusterAddress(IApplicationConfig ncConfig, Section ncSection) { + if (ncConfig.getString(NCConfig.Option.CLUSTER_ADDRESS) == null) { + ncSection.put(NCConfig.Option.CLUSTER_ADDRESS.ini(), ccs.getCCConfig().getClusterPublicAddress()); + ncSection.put(NCConfig.Option.CLUSTER_PORT.ini(), String.valueOf(ccs.getCCConfig().getClusterPublicPort())); + } + } + + private void configMaxGcPause(IApplicationConfig ncConfig, Section ncSection) { + // if not already configured, set GC max pause time millis to not exceed 1/2 the total max heartbeat miss period + String ncJvmArgs = ncConfig.getString(NCConfig.Option.JVM_ARGS); + if (ncJvmArgs == null || !ncJvmArgs.contains(JVM_ARG_MAX_GCPAUSE_MILLIS)) { + String gcMaxPauseArg = JVM_ARG_MAX_GCPAUSE_MILLIS + getGcMaxPauseMillis(); + ncSection.put(NCConfig.Option.JVM_ARGS.ini(), + ncJvmArgs == null ? gcMaxPauseArg : ncJvmArgs + " " + gcMaxPauseArg); + } + } + + private long getGcMaxPauseMillis() { + return ccs.getCCConfig().getHeartbeatPeriodMillis() * ccs.getCCConfig().getHeartbeatMaxMisses() / 2; + } } -- To view, visit https://asterix-gerrit.ics.uci.edu/2426 To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings Gerrit-MessageType: merged Gerrit-Change-Id: I7e51db5ccfbb4771ba1f6e0264abfd69f833e7e7 Gerrit-PatchSet: 3 Gerrit-Project: asterixdb Gerrit-Branch: master Gerrit-Owner: Michael Blow <[email protected]> Gerrit-Reviewer: Anon. E. Moose #1000171 Gerrit-Reviewer: Jenkins <[email protected]> Gerrit-Reviewer: Michael Blow <[email protected]> Gerrit-Reviewer: Murtadha Hubail <[email protected]>
