Michael Blow has uploaded a new change for review.
https://asterix-gerrit.ics.uci.edu/1193
Change subject: HTTP API++, NCService Termination After Dead NCService
......................................................................
HTTP API++, NCService Termination After Dead NCService
- include PID of NC service in NC config API
- shutdown NCService even when NC is unresponsive (e.g. dead) when
shutdown?all=true
- include PIDs of NC service & NC in shutdown response document
Change-Id: Ia48eb16696e48444f610fd5e8d7d4666d0257a38
---
M
asterixdb/asterix-app/src/main/java/org/apache/asterix/api/http/servlet/ShutdownAPIServlet.java
M
asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.adm
M
asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_3/cluster_state_3.1.adm
M
asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_4/cluster_state_4.1.adm
M
asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/util/ClusterStateManager.java
M
hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/ClusterControllerService.java
M
hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/ClusterShutdownWork.java
M
hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
M
hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
M
hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/shutdown/ShutdownRun.java
M
hyracks-fullstack/hyracks/hyracks-control/hyracks-nc-service/src/main/java/org/apache/hyracks/control/nc/service/NCService.java
11 files changed, 205 insertions(+), 114 deletions(-)
git pull ssh://asterix-gerrit.ics.uci.edu:29418/asterixdb
refs/changes/93/1193/1
diff --git
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/api/http/servlet/ShutdownAPIServlet.java
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/api/http/servlet/ShutdownAPIServlet.java
index d8cb553..d99b379 100644
---
a/asterixdb/asterix-app/src/main/java/org/apache/asterix/api/http/servlet/ShutdownAPIServlet.java
+++
b/asterixdb/asterix-app/src/main/java/org/apache/asterix/api/http/servlet/ShutdownAPIServlet.java
@@ -34,10 +34,15 @@
import org.apache.asterix.common.config.GlobalConfig;
import org.apache.asterix.runtime.util.ClusterStateManager;
import org.apache.hyracks.api.client.IHyracksClientConnection;
+import org.json.JSONArray;
import org.json.JSONObject;
public class ShutdownAPIServlet extends HttpServlet {
private static final long serialVersionUID = 1L;
+ public static final String NODE_ID_KEY = "node_id";
+ public static final String NCSERVICE_PID = "ncservice_pid";
+ public static final String INI = "ini";
+ public static final String PID = "pid";
@Override
protected void doPost(HttpServletRequest request, HttpServletResponse
response)
@@ -61,7 +66,18 @@
try {
jsonObject.put("status", "SHUTTING_DOWN");
jsonObject.put("date", new Date());
- jsonObject.put("cluster",
ClusterStateManager.INSTANCE.getClusterStateDescription());
+ JSONObject clusterState =
ClusterStateManager.INSTANCE.getClusterStateDescription();
+ JSONArray ncs = clusterState.getJSONArray("ncs");
+ for (int i = 0; i < ncs.length(); i++) {
+ JSONObject nc = ncs.getJSONObject(i);
+ String node = nc.getString(NODE_ID_KEY);
+ JSONObject details = new
JSONObject(hcc.getNodeDetailsJSON(node, false, true));
+ nc.put(PID, details.get(PID));
+ if (details.has(INI) &&
details.getJSONObject(INI).has(NCSERVICE_PID)) {
+ nc.put(NCSERVICE_PID,
details.getJSONObject(INI).getInt(NCSERVICE_PID));
+ }
+ }
+ jsonObject.put("cluster", clusterState);
final PrintWriter writer = response.getWriter();
writer.print(jsonObject.toString(4));
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.adm
index 8b66885..30e6886 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_1/cluster_state_1.1.adm
@@ -60,9 +60,16 @@
"configUri":
"http://127.0.0.1:19002/admin/cluster/node/asterix_nc1/config",
"node_id": "asterix_nc1",
"partitions": [
- "partition_0",
- "partition_1"
+ {
+ "active": true,
+ "partition_id": "partition_0"
+ },
+ {
+ "active": true,
+ "partition_id": "partition_1"
+ }
],
+ "state": "ACTIVE",
"statsUri":
"http://127.0.0.1:19002/admin/cluster/node/asterix_nc1/stats",
"threadDumpUri":
"http://127.0.0.1:19002/admin/cluster/node/asterix_nc1/threaddump"
},
@@ -70,9 +77,16 @@
"configUri":
"http://127.0.0.1:19002/admin/cluster/node/asterix_nc2/config",
"node_id": "asterix_nc2",
"partitions": [
- "partition_2",
- "partition_3"
+ {
+ "active": true,
+ "partition_id": "partition_2"
+ },
+ {
+ "active": true,
+ "partition_id": "partition_3"
+ }
],
+ "state": "ACTIVE",
"statsUri":
"http://127.0.0.1:19002/admin/cluster/node/asterix_nc2/stats",
"threadDumpUri":
"http://127.0.0.1:19002/admin/cluster/node/asterix_nc2/threaddump"
}
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_3/cluster_state_3.1.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_3/cluster_state_3.1.adm
index 4357d25..b0815ee 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_3/cluster_state_3.1.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_3/cluster_state_3.1.adm
@@ -3,9 +3,16 @@
"configUri":
"http://127.0.0.1:19002/admin/cluster/node/asterix_nc1/config",
"node_id": "asterix_nc1",
"partitions": [
- "partition_0",
- "partition_1"
+ {
+ "active": true,
+ "partition_id": "partition_0"
+ },
+ {
+ "active": true,
+ "partition_id": "partition_1"
+ }
],
+ "state": "ACTIVE",
"statsUri":
"http://127.0.0.1:19002/admin/cluster/node/asterix_nc1/stats",
"threadDumpUri":
"http://127.0.0.1:19002/admin/cluster/node/asterix_nc1/threaddump"
},
@@ -13,9 +20,16 @@
"configUri":
"http://127.0.0.1:19002/admin/cluster/node/asterix_nc2/config",
"node_id": "asterix_nc2",
"partitions": [
- "partition_2",
- "partition_3"
+ {
+ "active": true,
+ "partition_id": "partition_2"
+ },
+ {
+ "active": true,
+ "partition_id": "partition_3"
+ }
],
+ "state": "ACTIVE",
"statsUri":
"http://127.0.0.1:19002/admin/cluster/node/asterix_nc2/stats",
"threadDumpUri":
"http://127.0.0.1:19002/admin/cluster/node/asterix_nc2/threaddump"
}
diff --git
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_4/cluster_state_4.1.adm
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_4/cluster_state_4.1.adm
index c893534..4635690 100644
---
a/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_4/cluster_state_4.1.adm
+++
b/asterixdb/asterix-app/src/test/resources/runtimets/results/api/cluster_state_4/cluster_state_4.1.adm
@@ -2,9 +2,16 @@
"configUri":
"http://127.0.0.1:19002/admin/cluster/node/asterix_nc1/config",
"node_id": "asterix_nc1",
"partitions": [
- "partition_0",
- "partition_1"
+ {
+ "active": true,
+ "partition_id": "partition_0"
+ },
+ {
+ "active": true,
+ "partition_id": "partition_1"
+ }
],
+ "state": "ACTIVE",
"statsUri": "http://127.0.0.1:19002/admin/cluster/node/asterix_nc1/stats",
"threadDumpUri":
"http://127.0.0.1:19002/admin/cluster/node/asterix_nc1/threaddump"
}
diff --git
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/util/ClusterStateManager.java
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/util/ClusterStateManager.java
index 487b8fa..bc15788 100644
---
a/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/util/ClusterStateManager.java
+++
b/asterixdb/asterix-runtime/src/main/java/org/apache/asterix/runtime/util/ClusterStateManager.java
@@ -629,10 +629,23 @@
for (Map.Entry<String, ClusterPartition[]> entry :
node2PartitionsMap.entrySet()) {
JSONObject nodeJSON = new JSONObject();
nodeJSON.put("node_id", entry.getKey());
- List<String> partitions = new ArrayList<>();
+ boolean allActive = true;
+ boolean anyActive = false;
+ Set<Map<String, Object>> partitions = new HashSet<>();
for (ClusterPartition part : entry.getValue()) {
- partitions.add("partition_" + part.getPartitionId());
+ HashMap<String, Object> partition = new HashMap<>();
+ partition.put("partition_id", "partition_" +
part.getPartitionId());
+ partition.put("active", part.isActive());
+ partitions.add(partition);
+ allActive = allActive && part.isActive();
+ if (allActive) {
+ anyActive = true;
+ }
}
+ nodeJSON.put("state", failedNodes.contains(entry.getKey()) ?
"FAILED"
+ : allActive ? "ACTIVE"
+ : anyActive ? "PARTIALLY_ACTIVE"
+ : "INACTIVE");
nodeJSON.put("partitions", partitions);
stateDescription.accumulate("ncs", nodeJSON);
}
diff --git
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/ClusterControllerService.java
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/ClusterControllerService.java
index 685a36b..4fde1f6 100644
---
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/ClusterControllerService.java
+++
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/ClusterControllerService.java
@@ -22,6 +22,7 @@
import java.io.FileReader;
import java.net.InetAddress;
import java.net.InetSocketAddress;
+import java.util.ArrayList;
import java.util.Collections;
import java.util.HashMap;
import java.util.LinkedHashMap;
@@ -87,6 +88,7 @@
import org.apache.hyracks.control.cc.work.ReportProfilesWork;
import org.apache.hyracks.control.cc.work.ReportResultPartitionFailureWork;
import
org.apache.hyracks.control.cc.work.ReportResultPartitionWriteCompletionWork;
+import org.apache.hyracks.control.cc.work.ShutdownNCServiceWork;
import org.apache.hyracks.control.cc.work.TaskCompleteWork;
import org.apache.hyracks.control.cc.work.TaskFailureWork;
import org.apache.hyracks.control.cc.work.TriggerNCWork;
@@ -114,7 +116,7 @@
import org.xml.sax.InputSource;
public class ClusterControllerService implements IControllerService {
- private static Logger LOGGER =
Logger.getLogger(ClusterControllerService.class.getName());
+ private static final Logger LOGGER =
Logger.getLogger(ClusterControllerService.class.getName());
private final CCConfig ccConfig;
@@ -279,12 +281,43 @@
}
}
+ private void terminateNCServices() throws Exception {
+ Ini ini = ccConfig.getIni();
+ if (ini == null) {
+ return;
+ }
+ List<ShutdownNCServiceWork> shutdownNCServiceWorks = new ArrayList<>();
+ for (String section : ini.keySet()) {
+ if (!section.startsWith("nc/")) {
+ continue;
+ }
+ String ncid = section.substring(3);
+ String address = IniUtils.getString(ini, section, "address", null);
+ int port = IniUtils.getInt(ini, section, "port", 9090);
+ if (address == null) {
+ address = InetAddress.getLoopbackAddress().getHostAddress();
+ }
+ ShutdownNCServiceWork shutdownWork = new
ShutdownNCServiceWork(address, port, ncid);
+ workQueue.schedule(shutdownWork);
+ shutdownNCServiceWorks.add(shutdownWork);
+ }
+ for (ShutdownNCServiceWork shutdownWork : shutdownNCServiceWorks) {
+ shutdownWork.sync();
+ }
+ }
+
private void notifyApplication() throws Exception {
if (aep != null) {
// Sometimes, there is no application entry point. Check
hyracks-client project
aep.startupCompleted();
}
}
+ public void stop(boolean terminateNCService) throws Exception {
+ if (terminateNCService) {
+ terminateNCServices();
+ }
+ stop();
+ }
@Override
public void stop() throws Exception {
diff --git
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/ClusterShutdownWork.java
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/ClusterShutdownWork.java
index 63ca4ae..29e1f83 100644
---
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/ClusterShutdownWork.java
+++
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/ClusterShutdownWork.java
@@ -53,7 +53,7 @@
throw new IPCException("Shutdown in Progress");
}
Map<String, NodeControllerState> nodeControllerStateMap =
ccs.getNodeMap();
- Set<String> nodeIds = new TreeSet<String>();
+ Set<String> nodeIds = new TreeSet<>();
nodeIds.addAll(nodeControllerStateMap.keySet());
/**
* set up our listener for the node ACKs
@@ -70,33 +70,22 @@
@Override
public void run() {
try {
- /**
+ /*
* wait for all our acks
*/
boolean cleanShutdown =
shutdownStatus.waitForCompletion();
- if (cleanShutdown) {
- callback.setValue(true);
- ccs.stop();
- LOGGER.info("JVM Exiting.. Bye!");
- Runtime rt = Runtime.getRuntime();
- rt.exit(0);
+ if (!cleanShutdown) {
+ /*
+ * best effort - just exit, user will have to kill
misbehaving NCs
+ */
+ LOGGER.severe("Clean shutdown of NCs timed out-
giving up! Unresponsive nodes: " +
+ shutdownStatus.getRemainingNodes());
}
- /**
- * best effort - just exit, user will have to kill
misbehaving NCs
- */
- else {
- LOGGER.severe("Clean shutdown of NCs timed out- CC
bailing out!");
- StringBuilder unresponsive = new StringBuilder();
- for (String s :
shutdownStatus.getRemainingNodes()) {
- unresponsive.append(s).append(' ');
- }
- LOGGER.severe("Unresponsive Nodes: " +
unresponsive);
- callback.setValue(false);
- ccs.stop();
- LOGGER.info("JVM Exiting.. Bye!");
- Runtime rt = Runtime.getRuntime();
- rt.exit(1);
- }
+ callback.setValue(cleanShutdown);
+ ccs.stop(terminateNCService);
+ LOGGER.info("JVM Exiting.. Bye!");
+ Runtime rt = Runtime.getRuntime();
+ rt.exit(cleanShutdown ? 0 : 1);
} catch (Exception e) {
callback.setException(e);
}
diff --git
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
index 9a52a06..a7bca25 100644
---
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
+++
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-cc/src/main/java/org/apache/hyracks/control/cc/work/TriggerNCWork.java
@@ -18,6 +18,8 @@
*/
package org.apache.hyracks.control.cc.work;
+import static
org.apache.hyracks.control.common.controllers.ServiceConstants.NC_SERVICE_MAGIC_COOKIE;
+
import java.io.IOException;
import java.io.ObjectOutputStream;
import java.io.StringWriter;
@@ -26,6 +28,7 @@
import java.util.logging.Logger;
import org.apache.hyracks.control.cc.ClusterControllerService;
+import
org.apache.hyracks.control.common.controllers.ServiceConstants.ServiceCommand;
import org.apache.hyracks.control.common.work.AbstractWork;
import org.ini4j.Ini;
@@ -35,11 +38,6 @@
*/
public class TriggerNCWork extends AbstractWork {
- // This constant must match the corresponding constant in
- //
hyracks-control/hyracks-nc-service/src/main/java/org/apache/hyracks/control/nc/service/NCService.java
- // I didn't want to introduce a Maven-level dependency on the
- // hyracks-nc-service package (or vice-versa).
- public static final String NC_MAGIC_COOKIE = "hyncmagic";
private static final Logger LOGGER =
Logger.getLogger(TriggerNCWork.class.getName());
private final ClusterControllerService ccs;
@@ -55,32 +53,25 @@
}
@Override
public final void run() {
- ccs.getExecutor().execute(new Runnable() {
- @Override
- @SuppressWarnings("squid:S2142") // suppress interrupted exception
warning
- public void run() {
- while (true) {
- if (LOGGER.isLoggable(Level.INFO)) {
- LOGGER.info("Connecting NC service '" + ncId + "' at "
+ ncHost + ":" + ncPort);
- }
- try (Socket s = new Socket(ncHost, ncPort)) {
- ObjectOutputStream oos = new
ObjectOutputStream(s.getOutputStream());
- oos.writeUTF(NC_MAGIC_COOKIE);
- oos.writeUTF(serializeIni(ccs.getCCConfig().getIni()));
- oos.close();
- break;
- // QQQ Should probably have an ACK here
- } catch (IOException e) {
- if (LOGGER.isLoggable(Level.WARNING)) {
- LOGGER.log(Level.WARNING, "Failed to contact NC
service at " + ncHost +
- ":" + ncPort + "; will retry", e);
- }
- }
- try {
- Thread.sleep(5000);
- } catch (InterruptedException e) {
- break;
- }
+ ccs.getExecutor().execute(() -> {
+ while (true) {
+ LOGGER.info("Connecting NC service '" + ncId + "' at " +
ncHost + ":" + ncPort);
+ try (Socket s = new Socket(ncHost, ncPort)) {
+ ObjectOutputStream oos = new
ObjectOutputStream(s.getOutputStream());
+ oos.writeUTF(NC_SERVICE_MAGIC_COOKIE);
+ oos.writeUTF(ServiceCommand.START_NC.name());
+
oos.writeUTF(TriggerNCWork.this.serializeIni(ccs.getCCConfig().getIni()));
+ oos.close();
+ return;
+ // QQQ Should probably have an ACK here
+ } catch (IOException e) {
+ LOGGER.log(Level.WARNING, "Failed to contact NC service at
" + ncHost + ":" + ncPort
+ + "; will retry", e);
+ }
+ try {
+ Thread.sleep(5000);
+ } catch (InterruptedException e) {
+ return;
}
}
});
@@ -95,7 +86,7 @@
ccini.store(iniString);
// Finally insert *this* NC's name into localnc section - this is a
fixed
// entry point so that NCs can determine where all their config is.
- iniString.append("\n[localnc]\nid=" + ncId + "\n");
+ iniString.append("\n[localnc]\nid=").append(ncId).append("\n");
if (LOGGER.isLoggable(Level.FINE)) {
LOGGER.fine("Returning Ini file:\n" + iniString.toString());
}
diff --git
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
index 64b9737..de336ed 100644
---
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
+++
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/controllers/NCConfig.java
@@ -129,6 +129,9 @@
+ " (default: same as -messaging-port; must set
-messaging-public-port also)", required = false)
public int messagingPublicPort = 0;
+ @Option(name = "-ncservice-pid", usage = "PID of the NCService which
launched this NCDriver", required = false)
+ public int ncservicePid = 0;
+
@Argument
@Option(name = "--", handler = StopOptionHandler.class)
public List<String> appArgs;
@@ -258,5 +261,8 @@
if (appNCMainClass != null) {
configuration.put("app-nc-main-class", appNCMainClass);
}
+ if (ncservicePid != 0) {
+ configuration.put("ncservice-pid", String.valueOf(ncservicePid));
+ }
}
}
diff --git
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/shutdown/ShutdownRun.java
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/shutdown/ShutdownRun.java
index 6912892..4e5c98f 100644
---
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/shutdown/ShutdownRun.java
+++
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-control-common/src/main/java/org/apache/hyracks/control/common/shutdown/ShutdownRun.java
@@ -24,7 +24,7 @@
public class ShutdownRun implements IShutdownStatusConditionVariable{
- private final Set<String> shutdownNodeIds = new TreeSet<String>();
+ private final Set<String> shutdownNodeIds = new TreeSet<>();
private boolean shutdownSuccess = false;
private static final int SHUTDOWN_TIMER_MS = 10000; //10 seconds
@@ -35,12 +35,11 @@
/**
* Notify that a node is shutting down.
*
- * @param nodeId
- * @param status
+ * @param nodeId the node acknowledging the shutdown
*/
public synchronized void notifyShutdown(String nodeId) {
shutdownNodeIds.remove(nodeId);
- if (shutdownNodeIds.size() == 0) {
+ if (shutdownNodeIds.isEmpty()) {
shutdownSuccess = true;
notifyAll();
}
@@ -48,10 +47,14 @@
@Override
public synchronized boolean waitForCompletion() throws Exception {
- /*
- * Either be woken up when we're done, or default to fail.
- */
- wait(SHUTDOWN_TIMER_MS);
+ if (shutdownNodeIds.isEmpty()) {
+ shutdownSuccess = true;
+ } else {
+ /*
+ * Either be woken up when we're done, or default to fail.
+ */
+ wait(SHUTDOWN_TIMER_MS);
+ }
return shutdownSuccess;
}
diff --git
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-nc-service/src/main/java/org/apache/hyracks/control/nc/service/NCService.java
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-nc-service/src/main/java/org/apache/hyracks/control/nc/service/NCService.java
index 5b032d9..bdc3423 100644
---
a/hyracks-fullstack/hyracks/hyracks-control/hyracks-nc-service/src/main/java/org/apache/hyracks/control/nc/service/NCService.java
+++
b/hyracks-fullstack/hyracks/hyracks-control/hyracks-nc-service/src/main/java/org/apache/hyracks/control/nc/service/NCService.java
@@ -34,6 +34,8 @@
import org.apache.commons.lang3.SystemUtils;
import org.apache.hyracks.control.common.controllers.IniUtils;
+import org.apache.hyracks.control.common.controllers.ServiceConstants;
+import
org.apache.hyracks.control.common.controllers.ServiceConstants.ServiceCommand;
import org.ini4j.Ini;
import org.kohsuke.args4j.CmdLineParser;
@@ -70,10 +72,8 @@
*/
private static Process proc = null;
- private static final String MAGIC_COOKIE = "hyncmagic";
-
private static List<String> buildCommand() throws IOException {
- List<String> cList = new ArrayList<String>();
+ List<String> cList = new ArrayList();
// Find the command to run. For now, we allow overriding the name, but
// still assume it's located in the bin/ directory of the deployment.
@@ -92,10 +92,15 @@
cList.add("-config-file");
// Store the Ini file from the CC locally so NCConfig can read it.
- // QQQ should arrange to delete this when done
File tempIni = File.createTempFile("ncconf", ".conf");
+ tempIni.deleteOnExit();
+
ini.store(tempIni);
cList.add(tempIni.getCanonicalPath());
+
+ // pass in the PID of the NCService
+ cList.add("-ncservice-pid");
+ cList.add(System.getProperty("app.pid", "0"));
return cList;
}
@@ -161,14 +166,10 @@
} catch (InterruptedException ignored) {
}
}
- if (LOGGER.isLoggable(Level.INFO)) {
- LOGGER.info("NCDriver exited with return value " + retval);
- }
+ LOGGER.info("NCDriver exited with return value " + retval);
if (retval == 99) {
- if (LOGGER.isLoggable(Level.INFO)) {
- LOGGER.info("Terminating NCService based on return value
from NCDriver");
- }
- System.exit(0);
+ LOGGER.info("Terminating NCService based on return value from
NCDriver");
+ exit(0);
}
return retval == 0;
} catch (Exception e) {
@@ -180,30 +181,42 @@
}
private static boolean acceptConnection(InputStream is) {
- // Simple on-wire protocol: magic cookie (string), CC address (string),
- // port (string), as encoded on CC by ObjectOutputStream. If we see
- // anything else or have any error, crap out and await a different
- // connection.
- // QQQ This should probably be changed to directly accept the full
- // config file from the CC, rather than calling back to the CC's
- // "config" webservice to retrieve it. Revisit when the CC is fully
- // parsing and validating the master config file.
+ // Simple on-wire protocol:
+ // magic cookie (string)
+ // either:
+ // START_NC, ini file
+ // or:
+ // TERMINATE
+ // If we see anything else or have any error, crap out and await a
different connection.
try {
ObjectInputStream ois = new ObjectInputStream(is);
String magic = ois.readUTF();
- if (! MAGIC_COOKIE.equals(magic)) {
+ if (! ServiceConstants.NC_SERVICE_MAGIC_COOKIE.equals(magic)) {
LOGGER.severe("Connection used incorrect magic cookie");
return false;
}
- String iniString = ois.readUTF();
- ini = new Ini(new StringReader(iniString));
- ncId = IniUtils.getString(ini, "localnc", "id", "");
- nodeSection = "nc/" + ncId;
- return launchNCProcess();
+ switch (ServiceCommand.valueOf(ois.readUTF())) {
+ case START_NC:
+ String iniString = ois.readUTF();
+ ini = new Ini(new StringReader(iniString));
+ ncId = IniUtils.getString(ini, "localnc", "id", "");
+ nodeSection = "nc/" + ncId;
+ return launchNCProcess();
+ case TERMINATE:
+ LOGGER.info("Terminating NCService based on command from
CC");
+ exit(0);
+ break;
+ }
} catch (Exception e) {
LOGGER.log(Level.SEVERE, "Error decoding connection from server",
e);
}
return false;
+ }
+
+ @SuppressWarnings("squid:S1147") // call to System.exit()
+ private static void exit(int exitCode) {
+ LOGGER.info("JVM Exiting.. Bye!");
+ System.exit(exitCode);
}
public static void main(String[] args) throws Exception {
@@ -239,25 +252,17 @@
// Loop forever - the NCService will always return to "waiting for CC"
state
// when the child NC terminates for any reason.
while (true) {
- ServerSocket listener = new ServerSocket(port, 5, addr);
- try {
+ try (ServerSocket listener = new ServerSocket(port, 5, addr)) {
boolean launched = false;
while (!launched) {
- if (LOGGER.isLoggable(Level.INFO)) {
- LOGGER.info("Waiting for connection from CC on " +
addr + ":" + port);
- }
- Socket socket = listener.accept();
- try {
+ LOGGER.info("Waiting for connection from CC on " + addr +
":" + port);
+ try (Socket socket = listener.accept()) {
// QQQ Because acceptConnection() doesn't return if the
// service is started appropriately, the socket remains
// open but non-responsive.
launched = acceptConnection(socket.getInputStream());
- } finally {
- socket.close();
}
}
- } finally {
- listener.close();
}
}
}
--
To view, visit https://asterix-gerrit.ics.uci.edu/1193
To unsubscribe, visit https://asterix-gerrit.ics.uci.edu/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: Ia48eb16696e48444f610fd5e8d7d4666d0257a38
Gerrit-PatchSet: 1
Gerrit-Project: asterixdb
Gerrit-Branch: master
Gerrit-Owner: Michael Blow <[email protected]>