AMBARI-18289 : Invalid negative values for some AMS metrics. (avijayan)
Project: http://git-wip-us.apache.org/repos/asf/ambari/repo Commit: http://git-wip-us.apache.org/repos/asf/ambari/commit/7adb5cff Tree: http://git-wip-us.apache.org/repos/asf/ambari/tree/7adb5cff Diff: http://git-wip-us.apache.org/repos/asf/ambari/diff/7adb5cff Branch: refs/heads/branch-2.5 Commit: 7adb5cfff607bf7914988fc3f99096d850cc0a33 Parents: 9b08b70 Author: Aravindan Vijayan <[email protected]> Authored: Wed Sep 14 15:08:40 2016 -0700 Committer: Aravindan Vijayan <[email protected]> Committed: Wed Sep 14 15:08:40 2016 -0700 ---------------------------------------------------------------------- .../conf/unix/metric_monitor.ini | 1 + .../src/main/python/core/config_reader.py | 5 ++- .../src/main/python/core/host_info.py | 39 ++++++++++++++++---- .../0.1.0/configuration/ams-env.xml | 20 ++++++++++ .../0.1.0/package/scripts/params.py | 1 + .../package/templates/metric_monitor.ini.j2 | 1 + 6 files changed, 58 insertions(+), 9 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/ambari/blob/7adb5cff/ambari-metrics/ambari-metrics-host-monitoring/conf/unix/metric_monitor.ini ---------------------------------------------------------------------- diff --git a/ambari-metrics/ambari-metrics-host-monitoring/conf/unix/metric_monitor.ini b/ambari-metrics/ambari-metrics-host-monitoring/conf/unix/metric_monitor.ini index 3e5d861..fc896b8 100644 --- a/ambari-metrics/ambari-metrics-host-monitoring/conf/unix/metric_monitor.ini +++ b/ambari-metrics/ambari-metrics-host-monitoring/conf/unix/metric_monitor.ini @@ -21,6 +21,7 @@ debug_level = INFO hostname = localhost enable_time_threshold = false enable_value_threshold = false +skip_disk_patterns = [emitter] send_interval = 60 http://git-wip-us.apache.org/repos/asf/ambari/blob/7adb5cff/ambari-metrics/ambari-metrics-host-monitoring/src/main/python/core/config_reader.py ---------------------------------------------------------------------- diff --git a/ambari-metrics/ambari-metrics-host-monitoring/src/main/python/core/config_reader.py b/ambari-metrics/ambari-metrics-host-monitoring/src/main/python/core/config_reader.py index 02f0ce3..e2ed83f 100644 --- a/ambari-metrics/ambari-metrics-host-monitoring/src/main/python/core/config_reader.py +++ b/ambari-metrics/ambari-metrics-host-monitoring/src/main/python/core/config_reader.py @@ -232,4 +232,7 @@ class Configuration: return 6188 def get_ca_certs(self): - return self._ca_cert_file_path \ No newline at end of file + return self._ca_cert_file_path + + def get_disk_metrics_skip_pattern(self): + return self.get("default", "skip_disk_patterns") http://git-wip-us.apache.org/repos/asf/ambari/blob/7adb5cff/ambari-metrics/ambari-metrics-host-monitoring/src/main/python/core/host_info.py ---------------------------------------------------------------------- diff --git a/ambari-metrics/ambari-metrics-host-monitoring/src/main/python/core/host_info.py b/ambari-metrics/ambari-metrics-host-monitoring/src/main/python/core/host_info.py index 845b270..f79cacd 100644 --- a/ambari-metrics/ambari-metrics-host-monitoring/src/main/python/core/host_info.py +++ b/ambari-metrics/ambari-metrics-host-monitoring/src/main/python/core/host_info.py @@ -26,6 +26,8 @@ import time import threading import socket import operator +import re +from collections import namedtuple logger = logging.getLogger() cached_hostname = None @@ -244,16 +246,37 @@ class HostInfo(): if delta <= 0: delta = float("inf") - io_counters = psutil.disk_io_counters() + skip_disk_patterns = self.__config.get_disk_metrics_skip_pattern() + logger.debug('skip_disk_patterns: %s' % skip_disk_patterns) + if not skip_disk_patterns or skip_disk_patterns == 'None': + io_counters = psutil.disk_io_counters() + else: + sdiskio = namedtuple('sdiskio', ['read_count', 'write_count', + 'read_bytes', 'write_bytes', + 'read_time', 'write_time']) + skip_disk_pattern_list = skip_disk_patterns.split(',') + rawdict = psutil.disk_io_counters(True) + if not rawdict: + raise RuntimeError("Couldn't find any physical disk") + trimmed_dict = {} + for disk, fields in rawdict.items(): + ignore_disk = False + for p in skip_disk_pattern_list: + if re.match(p, disk): + ignore_disk = True + if not ignore_disk: + trimmed_dict[disk] = sdiskio(*fields) + io_counters = sdiskio(*[sum(x) for x in zip(*trimmed_dict.values())]) new_disk_stats = { - 'read_count' : io_counters.read_count if hasattr(io_counters, 'read_count') else 0, - 'write_count' : io_counters.write_count if hasattr(io_counters, 'write_count') else 0, - 'read_bytes' : io_counters.read_bytes if hasattr(io_counters, 'read_bytes') else 0, - 'write_bytes' : io_counters.write_bytes if hasattr(io_counters, 'write_bytes') else 0, - 'read_time' : io_counters.read_time if hasattr(io_counters, 'read_time') else 0, - 'write_time' : io_counters.write_time if hasattr(io_counters, 'write_time') else 0 - } + 'read_count' : io_counters.read_count if hasattr(io_counters, 'read_count') else 0, + 'write_count' : io_counters.write_count if hasattr(io_counters, 'write_count') else 0, + 'read_bytes' : io_counters.read_bytes if hasattr(io_counters, 'read_bytes') else 0, + 'write_bytes' : io_counters.write_bytes if hasattr(io_counters, 'write_bytes') else 0, + 'read_time' : io_counters.read_time if hasattr(io_counters, 'read_time') else 0, + 'write_time' : io_counters.write_time if hasattr(io_counters, 'write_time') else 0 + } + if not self.__last_disk_data: self.__last_disk_data = new_disk_stats read_bps = (new_disk_stats['read_bytes'] - self.__last_disk_data['read_bytes']) / delta http://git-wip-us.apache.org/repos/asf/ambari/blob/7adb5cff/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-env.xml ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-env.xml b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-env.xml index 4135d32..4059510 100644 --- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-env.xml +++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/configuration/ams-env.xml @@ -81,6 +81,26 @@ <on-ambari-upgrade add="true"/> </property> <property> + <name>failover_strategy_blacklisted_interval</name> + <value>600</value> + <description> + Metrics collector host will be blacklisted for specified number of seconds if metric monitor failed to connect to it. + </description> + <value-attributes> + <type>int</type> + </value-attributes> + <on-ambari-upgrade add="true"/> + </property> + <property> + <name>timeline.metrics.skip.disk.metrics.patterns</name> + <value>true</value> + <description> + Comma separated list of disk patterns to be ignored while collecting aggregate disk usage and counter metrics. + For example, volume groups managed by docker can be ignored by using the pattern "dm-[0-9]+" + </description> + <on-ambari-upgrade add="true"/> + </property> + <property> <name>content</name> <display-name>ams-env template</display-name> <value> http://git-wip-us.apache.org/repos/asf/ambari/blob/7adb5cff/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/params.py ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/params.py b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/params.py index 2503c43..7cdb4a3 100644 --- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/params.py +++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/scripts/params.py @@ -184,6 +184,7 @@ metrics_collector_heapsize = default('/configurations/ams-env/metrics_collector_ host_sys_prepped = default("/hostLevelParams/host_sys_prepped", False) metrics_report_interval = default("/configurations/ams-site/timeline.metrics.sink.report.interval", 60) metrics_collection_period = default("/configurations/ams-site/timeline.metrics.sink.collection.period", 10) +skip_disk_metrics_patterns = default("/configurations/ams-env/timeline.metrics.skip.disk.metrics.patterns", None) hbase_log_dir = config['configurations']['ams-hbase-env']['hbase_log_dir'] hbase_classpath_additional = default("/configurations/ams-hbase-env/hbase_classpath_additional", None) http://git-wip-us.apache.org/repos/asf/ambari/blob/7adb5cff/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/templates/metric_monitor.ini.j2 ---------------------------------------------------------------------- diff --git a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/templates/metric_monitor.ini.j2 b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/templates/metric_monitor.ini.j2 index 383a0de..06109f4 100644 --- a/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/templates/metric_monitor.ini.j2 +++ b/ambari-server/src/main/resources/common-services/AMBARI_METRICS/0.1.0/package/templates/metric_monitor.ini.j2 @@ -21,6 +21,7 @@ debug_level = INFO hostname = {{hostname}} enable_time_threshold = false enable_value_threshold = false +skip_disk_patterns = {{skip_disk_metrics_patterns}} [emitter] send_interval = {{metrics_report_interval}}
