[GitHub] huijunwu commented on a change in pull request #2839: fix healthmgr metrics
huijunwu commented on a change in pull request #2839: fix healthmgr metrics URL: https://github.com/apache/incubator-heron/pull/2839#discussion_r181235186 ## File path: heron/healthmgr/src/java/org/apache/heron/healthmgr/HealthManager.java ## @@ -329,6 +337,8 @@ protected void configure() { bind(String.class) .annotatedWith(Names.named(CONF_METRICS_SOURCE_TYPE)) .toInstance(type); +bind(HealthManagerMetrics.class) +.toInstance(publishingMetricsRunnable); Review comment: done This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] huijunwu commented on a change in pull request #2839: fix healthmgr metrics
huijunwu commented on a change in pull request #2839: fix healthmgr metrics URL: https://github.com/apache/incubator-heron/pull/2839#discussion_r181042810 ## File path: heron/healthmgr/src/java/com/twitter/heron/healthmgr/HealthManager.java ## @@ -195,6 +195,14 @@ public static void main(String[] args) throws Exception { setupLogging(cmd, config); +LOG.fine(Arrays.toString(cmd.getOptions())); + +// Add the SystemConfig into SingletonRegistry +SystemConfig systemConfig = SystemConfig.newBuilder(true) +.putAll(getOptionValue(cmd, CliArgs.SYSTEM_CONFIG_FILEPATH), true) +.putAll(getOptionValue(cmd, CliArgs.OVERRIDE_CONFIG_FILEPATH), true).build(); + SingletonRegistry.INSTANCE.registerSingleton(SystemConfig.HERON_SYSTEM_CONFIG, systemConfig); Review comment: updated to guice This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] huijunwu commented on a change in pull request #2839: fix healthmgr metrics
huijunwu commented on a change in pull request #2839: fix healthmgr metrics URL: https://github.com/apache/incubator-heron/pull/2839#discussion_r180561186 ## File path: heron/executor/src/python/heron_executor.py ## @@ -498,7 +498,11 @@ def _get_healthmgr_cmd(self): "--cluster", self.cluster, "--role", self.role, "--environment", self.environment, - "--topology_name", self.topology_name, "--verbose"] + "--topology_name", self.topology_name, + "--metricsmgr_port", self.metrics_manager_port, + "--system_config_file", self.heron_internals_config_file, Review comment: updated This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] huijunwu commented on a change in pull request #2839: fix healthmgr metrics
huijunwu commented on a change in pull request #2839: fix healthmgr metrics URL: https://github.com/apache/incubator-heron/pull/2839#discussion_r180531105 ## File path: heron/healthmgr/src/java/com/twitter/heron/healthmgr/HealthManager.java ## @@ -276,10 +283,8 @@ public void initialize() throws ReflectiveOperationException, FileNotFoundExcept stateMgrAdaptor = createStateMgrAdaptor(); -this.runtime = Config.newBuilder() -.put(Key.SCHEDULER_STATE_MANAGER_ADAPTOR, stateMgrAdaptor) -.put(Key.TOPOLOGY_NAME, Context.topologyName(config)) -.build(); +this.runtime = Config.newBuilder().put(Key.SCHEDULER_STATE_MANAGER_ADAPTOR, stateMgrAdaptor) +.put(Key.TOPOLOGY_NAME, Context.topologyName(config)).build(); Review comment: updated to keep the original format This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] huijunwu commented on a change in pull request #2839: fix healthmgr metrics
huijunwu commented on a change in pull request #2839: fix healthmgr metrics URL: https://github.com/apache/incubator-heron/pull/2839#discussion_r180531015 ## File path: heron/healthmgr/src/java/com/twitter/heron/healthmgr/HealthManager.java ## @@ -211,11 +219,13 @@ public static void main(String[] args) throws Exception { LOG.info("Initializing health manager"); healthManager.initialize(); -LOG.info("Starting Health Manager metirc posting thread"); HealthManagerMetrics publishingMetricsRunnable = null; if (hasOption(cmd, CliArgs.METRICSMGR_PORT)) { - publishingMetricsRunnable = new HealthManagerMetrics( - Integer.valueOf(getOptionValue(cmd, CliArgs.METRICSMGR_PORT))); + LOG.info("Starting Health Manager metirc posting thread"); Review comment: updated This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] huijunwu commented on a change in pull request #2839: fix healthmgr metrics
huijunwu commented on a change in pull request #2839: fix healthmgr metrics URL: https://github.com/apache/incubator-heron/pull/2839#discussion_r180530960 ## File path: heron/executor/src/python/heron_executor.py ## @@ -498,7 +498,11 @@ def _get_healthmgr_cmd(self): "--cluster", self.cluster, "--role", self.role, "--environment", self.environment, - "--topology_name", self.topology_name, "--verbose"] + "--topology_name", self.topology_name, + "--metricsmgr_port", self.metrics_manager_port, + "--system_config_file", self.heron_internals_config_file, + "--override_config_file", self.override_config_file, + "--verbose"] Review comment: updated This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] huijunwu commented on a change in pull request #2839: fix healthmgr metrics
huijunwu commented on a change in pull request #2839: fix healthmgr metrics URL: https://github.com/apache/incubator-heron/pull/2839#discussion_r179892658 ## File path: heron/healthmgr/src/java/com/twitter/heron/healthmgr/HealthManagerMetrics.java ## @@ -136,44 +168,92 @@ public void run() { public void close() throws Exception { looper.exitLoop(); metricsMgrClient.stop(); -outMetricsQueues.clear(); } - class MetricsMgrClient extends HeronClient { + class SimpleMetricsManagerClient extends HeronClient { +private SystemConfig systemConfig; +private String hostname; -MetricsMgrClient(NIOLooper s, String host, int port, HeronSocketOptions options) { +SimpleMetricsManagerClient(NIOLooper s, String host, int port, HeronSocketOptions options) { super(s, host, port, options); - // TODO Auto-generated constructor stub + systemConfig = + (SystemConfig) SingletonRegistry.INSTANCE.getSingleton(SystemConfig.HERON_SYSTEM_CONFIG); + try { +this.hostname = InetAddress.getLocalHost().getHostName(); + } catch (UnknownHostException e) { +throw new RuntimeException("GetHostName failed"); + } } @Override public void onError() { - // TODO Auto-generated method stub + LOG.severe("Disconnected from Metrics Manager."); + // Dispatch to onConnect(...) + onConnect(StatusCode.CONNECT_ERROR); } @Override public void onConnect(StatusCode status) { - // TODO Auto-generated method stub + if (status != StatusCode.OK) { +LOG.log(Level.WARNING, +"Cannot connect to the local metrics mgr with status: {0}, Will Retry..", status); +Runnable r = new Runnable() { + public void run() { +start(); + } +}; + + getNIOLooper().registerTimerEvent(systemConfig.getInstanceReconnectMetricsmgrInterval(), r); +return; + } + + LOG.info("Connected to Metrics Manager. Ready to send register request"); + sendRegisterRequest(); +} + +private void sendRegisterRequest() { + Metrics.MetricPublisher publisher = Metrics.MetricPublisher.newBuilder().setHostname(hostname) + .setPort(getSocketChannel().socket().getPort()).setComponentName("__healthmgr__") + .setInstanceId("healthmgr-0").setInstanceIndex(-1).build(); + Metrics.MetricPublisherRegisterRequest request = + Metrics.MetricPublisherRegisterRequest.newBuilder().setPublisher(publisher).build(); + // The timeout would be the reconnect-interval-seconds + sendRequest(request, null, Metrics.MetricPublisherRegisterResponse.newBuilder(), + systemConfig.getInstanceReconnectMetricsmgrInterval()); } @Override public void onResponse(StatusCode status, Object ctx, Message response) { - // TODO Auto-generated method stub + if (status != StatusCode.OK) { +// TODO:- is this a good thing? Review comment: not necessary. will remove it actually, it was from: `grep -r 'TODO:- is this a good thing?' heron` This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services
[GitHub] huijunwu commented on a change in pull request #2839: fix healthmgr metrics
huijunwu commented on a change in pull request #2839: fix healthmgr metrics URL: https://github.com/apache/incubator-heron/pull/2839#discussion_r179892658 ## File path: heron/healthmgr/src/java/com/twitter/heron/healthmgr/HealthManagerMetrics.java ## @@ -136,44 +168,92 @@ public void run() { public void close() throws Exception { looper.exitLoop(); metricsMgrClient.stop(); -outMetricsQueues.clear(); } - class MetricsMgrClient extends HeronClient { + class SimpleMetricsManagerClient extends HeronClient { +private SystemConfig systemConfig; +private String hostname; -MetricsMgrClient(NIOLooper s, String host, int port, HeronSocketOptions options) { +SimpleMetricsManagerClient(NIOLooper s, String host, int port, HeronSocketOptions options) { super(s, host, port, options); - // TODO Auto-generated constructor stub + systemConfig = + (SystemConfig) SingletonRegistry.INSTANCE.getSingleton(SystemConfig.HERON_SYSTEM_CONFIG); + try { +this.hostname = InetAddress.getLocalHost().getHostName(); + } catch (UnknownHostException e) { +throw new RuntimeException("GetHostName failed"); + } } @Override public void onError() { - // TODO Auto-generated method stub + LOG.severe("Disconnected from Metrics Manager."); + // Dispatch to onConnect(...) + onConnect(StatusCode.CONNECT_ERROR); } @Override public void onConnect(StatusCode status) { - // TODO Auto-generated method stub + if (status != StatusCode.OK) { +LOG.log(Level.WARNING, +"Cannot connect to the local metrics mgr with status: {0}, Will Retry..", status); +Runnable r = new Runnable() { + public void run() { +start(); + } +}; + + getNIOLooper().registerTimerEvent(systemConfig.getInstanceReconnectMetricsmgrInterval(), r); +return; + } + + LOG.info("Connected to Metrics Manager. Ready to send register request"); + sendRegisterRequest(); +} + +private void sendRegisterRequest() { + Metrics.MetricPublisher publisher = Metrics.MetricPublisher.newBuilder().setHostname(hostname) + .setPort(getSocketChannel().socket().getPort()).setComponentName("__healthmgr__") + .setInstanceId("healthmgr-0").setInstanceIndex(-1).build(); + Metrics.MetricPublisherRegisterRequest request = + Metrics.MetricPublisherRegisterRequest.newBuilder().setPublisher(publisher).build(); + // The timeout would be the reconnect-interval-seconds + sendRequest(request, null, Metrics.MetricPublisherRegisterResponse.newBuilder(), + systemConfig.getInstanceReconnectMetricsmgrInterval()); } @Override public void onResponse(StatusCode status, Object ctx, Message response) { - // TODO Auto-generated method stub + if (status != StatusCode.OK) { +// TODO:- is this a good thing? Review comment: not necessary. will remove it This is an automated message from the Apache Git Service. To respond to the message, please log on GitHub and use the URL above to go to the specific comment. For queries about this service, please contact Infrastructure at: us...@infra.apache.org With regards, Apache Git Services