nlu90 commented on a change in pull request #2839: fix healthmgr metrics URL: https://github.com/apache/incubator-heron/pull/2839#discussion_r179889330
########## File path: heron/healthmgr/src/java/com/twitter/heron/healthmgr/HealthManagerMetrics.java ########## @@ -136,44 +168,92 @@ public void run() { public void close() throws Exception { looper.exitLoop(); metricsMgrClient.stop(); - outMetricsQueues.clear(); } - class MetricsMgrClient extends HeronClient { + class SimpleMetricsManagerClient extends HeronClient { + private SystemConfig systemConfig; + private String hostname; - MetricsMgrClient(NIOLooper s, String host, int port, HeronSocketOptions options) { + SimpleMetricsManagerClient(NIOLooper s, String host, int port, HeronSocketOptions options) { super(s, host, port, options); - // TODO Auto-generated constructor stub + systemConfig = + (SystemConfig) SingletonRegistry.INSTANCE.getSingleton(SystemConfig.HERON_SYSTEM_CONFIG); + try { + this.hostname = InetAddress.getLocalHost().getHostName(); + } catch (UnknownHostException e) { + throw new RuntimeException("GetHostName failed"); + } } @Override public void onError() { - // TODO Auto-generated method stub + LOG.severe("Disconnected from Metrics Manager."); + // Dispatch to onConnect(...) + onConnect(StatusCode.CONNECT_ERROR); } @Override public void onConnect(StatusCode status) { - // TODO Auto-generated method stub + if (status != StatusCode.OK) { + LOG.log(Level.WARNING, + "Cannot connect to the local metrics mgr with status: {0}, Will Retry..", status); + Runnable r = new Runnable() { + public void run() { + start(); + } + }; + + getNIOLooper().registerTimerEvent(systemConfig.getInstanceReconnectMetricsmgrInterval(), r); + return; + } + + LOG.info("Connected to Metrics Manager. Ready to send register request"); + sendRegisterRequest(); + } + + private void sendRegisterRequest() { + Metrics.MetricPublisher publisher = Metrics.MetricPublisher.newBuilder().setHostname(hostname) + .setPort(getSocketChannel().socket().getPort()).setComponentName("__healthmgr__") + .setInstanceId("healthmgr-0").setInstanceIndex(-1).build(); + Metrics.MetricPublisherRegisterRequest request = + Metrics.MetricPublisherRegisterRequest.newBuilder().setPublisher(publisher).build(); + // The timeout would be the reconnect-interval-seconds + sendRequest(request, null, Metrics.MetricPublisherRegisterResponse.newBuilder(), + systemConfig.getInstanceReconnectMetricsmgrInterval()); } @Override public void onResponse(StatusCode status, Object ctx, Message response) { - // TODO Auto-generated method stub + if (status != StatusCode.OK) { + // TODO:- is this a good thing? Review comment: Is this TODO necessary? ---------------------------------------------------------------- This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services