Xav Paice has proposed merging ~xavpaice/hw-health-charm:add_smartctl into 
hw-health-charm:master.

Requested reviews:
  Nagios Charm developers (nagios-charmers)

For more details, see:
https://code.launchpad.net/~xavpaice/hw-health-charm/+git/hw-health-charm/+merge/363833
-- 
Your team Nagios Charm developers is requested to review the proposed merge of 
~xavpaice/hw-health-charm:add_smartctl into hw-health-charm:master.
diff --git a/Makefile b/Makefile
index 20a6f9b..8f9d4fb 100644
--- a/Makefile
+++ b/Makefile
@@ -21,8 +21,12 @@ help:
 
 lint:
 	@echo "Running flake8"
+<<<<<<< Makefile
 	cd src && tox -e pep8
 
+=======
+	@flake8 --max-line-length=120 ./src
+>>>>>>> Makefile
 
 test: unittest lint
 
diff --git a/src/actions/actions.py b/src/actions/actions.py
index 9e27f0c..a851b2a 100755
--- a/src/actions/actions.py
+++ b/src/actions/actions.py
@@ -18,7 +18,13 @@ import os
 import subprocess
 import sys
 
+<<<<<<< src/actions/actions.py
 from charmhelpers.core.hookenv import action_set, action_fail, log
+=======
+from charms.layer import basic  # noqa E402
+basic.bootstrap_charm_deps()
+from charmhelpers.core.hookenv import action_set, action_fail, log  # noqa E402
+>>>>>>> src/actions/actions.py
 
 
 def clear_sel(args):
diff --git a/src/config.yaml b/src/config.yaml
index 1a5bcfd..9dbf3e8 100644
--- a/src/config.yaml
+++ b/src/config.yaml
@@ -16,4 +16,8 @@ options:
   enable_ipmi:
     type: boolean
     default: True
-    description: Enable the use of freeipmi tools to monitor hardware status.
\ No newline at end of file
+    description: Enable the use of freeipmi tools to monitor hardware status.
+  enable_smart:
+    type: boolean
+    default: True
+    description: Enable SMART monitoring of HDDs.
\ No newline at end of file
diff --git a/src/files/smart/check_smart.py b/src/files/smart/check_smart.py
new file mode 100755
index 0000000..a931b57
--- /dev/null
+++ b/src/files/smart/check_smart.py
@@ -0,0 +1,51 @@
+#!/usr/bin/env python3
+# -*- coding: us-ascii -*-
+
+import subprocess
+from nagios_plugin3 import CriticalError, UnknownError, try_check
+
+
+def check_smartctl():
+    """Check the status of disk devices using SMART
+
+    Use smartctl to collect a list of supported devices, and check the status of each.
+    See the smartctl(8) manpage for a description of the return codes and what they mean.
+    :return: either a string to indicate the devices are OK, or raise an exception.
+    """
+    devices = smartctl_scan()
+    errors = []
+    for device in devices:
+        command = ['sudo', 'smartctl', '-H', device, '--quietmode=silent']
+        try:
+            status_out = subprocess.run(command)
+        except subprocess.CalledProcessError:
+            # smartctl returned a non-OK state, but it's silent so won't output any stdout nor stderr for that
+            errors.append("{} RC: {}".format(device, status_out.returncode))
+    if len(errors) > 0:
+        raise CriticalError("Smartctl found drives not OK, {}".format(",".join(errors)))
+    msg = "OK: All SMART devices report OK"
+    print(msg)
+    return "check_smartctl returned OK"
+
+
+def smartctl_scan():
+    """Run smartctl --scan to get a list of devices
+
+    :return: a list of devices that are capable of smartctl checks
+    """
+    command = ['smartctl', '--scan']
+    try:
+        raw_devices = subprocess.check_output(command).decode()
+    except subprocess.CalledProcessError as e:
+        raise UnknownError("smartctl failed to scan with error {}".format(e))
+    raw_devices = raw_devices.strip()
+    devices = [dev.split(' ')[0] for dev in raw_devices.split('\n')]
+    return devices
+
+
+def main():
+    try_check(check_smartctl)
+
+
+if __name__ == '__main__':
+    main()
diff --git a/src/files/smart/check_smart_sudoer b/src/files/smart/check_smart_sudoer
new file mode 100644
index 0000000..989729a
--- /dev/null
+++ b/src/files/smart/check_smart_sudoer
@@ -0,0 +1,2 @@
+nagios ALL=(root) NOPASSWD: /usr/sbin/smartctl
+
diff --git a/src/layer.yaml b/src/layer.yaml
index 054b43e..a3c8c45 100644
--- a/src/layer.yaml
+++ b/src/layer.yaml
@@ -12,5 +12,7 @@ options:
     packages:
       - "freeipmi"
       - "libipc-run-perl"
+      - "smartmontools"
+      - "lm-sensors"
 repo: "lp:hw-health-charm"
 is: "hw-health"
diff --git a/src/lib/utils/hwdiscovery.py b/src/lib/utils/hwdiscovery.py
index c144fa4..9da532f 100644
--- a/src/lib/utils/hwdiscovery.py
+++ b/src/lib/utils/hwdiscovery.py
@@ -33,14 +33,16 @@ def get_tools():
                     # LSI3008-IR
                     tools.add('sas3ircu')
 
-    # HPE (Gen10 and newer) or HP (older)
     hw_vendor = dmidecode('system-manufacturer')
-    if hw_vendor == 'HPE':
+    if hw_vendor == 'Supermicro':
+        tools.add('sas3ircu')
+    """ TODO implement HP/HPE monitoring
+    # HPE (Gen10 and newer) or HP (older)
+    elif hw_vendor == 'HPE':
         tools.add('hpe')  # ['hponcfg', 'ilorest']
     elif hw_vendor == 'HP':
         tools.add('hp')  # ['hponcfg', 'hp-health', 'hpssacli']
-    elif hw_vendor == 'Supermicro':
-        tools.add('sas3ircu')
+    """
 
     # SW RAID?
     if _supports_mdadm():
@@ -49,6 +51,9 @@ def get_tools():
     if hookenv.config('enable_ipmi'):
         tools.add('ipmi')
 
+    if hookenv.config('enable_smart'):
+        tools.add('smart')
+
     return tools
 
 
diff --git a/src/lib/utils/tooling.py b/src/lib/utils/tooling.py
index bef4b4f..7d93792 100644
--- a/src/lib/utils/tooling.py
+++ b/src/lib/utils/tooling.py
@@ -13,17 +13,20 @@ TOOLING = {
     'megacli': {
         'snap': 'megacli',
         'filename': 'megaraid/check_megacli.py',
-        'cronjob': 'megaraid/check_megacli.sh'
+        'cronjob': 'megaraid/check_megacli.sh',
+        'install': 'resource'
     },
     'sas2ircu': {
         'snap': 'sas2ircu',
         'filename': 'mpt/check_sas2ircu.py',
-        'cronjob': 'mpt/check_sas2ircu.sh'
+        'cronjob': 'mpt/check_sas2ircu.sh',
+        'install': 'resource'
     },
     'sas3ircu': {
         'snap': 'sas3ircu',
         'filename': 'mpt/check_sas3ircu.py',
-        'cronjob': 'mpt/check_sas3ircu.sh'
+        'cronjob': 'mpt/check_sas3ircu.sh',
+        'install': 'resource'
     },
     'mdadm': {
         'filename': 'check_mdadm.py',
@@ -33,6 +36,10 @@ TOOLING = {
         'filename': 'ipmi/check_ipmi_sensor',
         'sudoers': 'ipmi/check_ipmi_sensor_sudoer'
     },
+    'smart': {
+        'filename': 'smart/check_smart.py',
+        'sudoers': 'smart/check_smart_sudoer'
+    }
 }
 
 PLUGINS_DIR = '/usr/local/lib/nagios/plugins/'
@@ -44,10 +51,17 @@ SUDOERS_DIR = '/etc/sudoers.d'
 def install_resource(tools, resource):
     """Install hardware diagnostic tools from a charm resource
 
+    Install hardware diagnostic tools from a charm resource.  We currently exclude a set of tools which are
+    installed by packages elsewhere.
     :param tools: set of tools to include
     :param resource: resource object
     :return: boolean, status of installation
     """
+    exclude_tools = set()
+    for tool in TOOLING.keys():
+        if not TOOLING[tool].get('install') == 'resource':
+            # any tool that isn't marked to be installed by resource needs excluding further down
+            exclude_tools.add(tool)
     if not isinstance(tools, set):
         try:
             tools = set(tools)
@@ -56,8 +70,8 @@ def install_resource(tools, resource):
             return False
     if not tools:
         return False
-    elif tools & {'mdadm', 'ipmi'}:
-        tools = tools - {'mdadm', 'ipmi'}
+    elif tools & exclude_tools:  # if the two sets intersect
+        tools = tools - exclude_tools
         if not tools:
             return True
 
@@ -196,7 +210,10 @@ def configure_nrpe_checks(tools):
                 hookenv.log(
                     'Cronjob file [{}] removed'.format(cronjob_file),
                     hookenv.DEBUG)
-
+    if os.path.exists('/usr/bin/sensors'):
+        nrpe_setup.add_check('check_sensors', "lmsensors status", '/usr/lib/nagios/plugins/check_sensors')
+        nrpe_setup.write()
+        count += 1
     return count > 0
 
 
@@ -219,6 +236,7 @@ def remove_nrpe_checks(tools):
         hookenv.log('Check removed from nrpe: {}'.format(tool))
         count += 1
 
+    # TODO remove nrpe check for check_sensors
     if count > 0:
         nrpe_setup.write()
         return True
diff --git a/src/reactive/hw_health.py b/src/reactive/hw_health.py
index 29cea0f..1890eb5 100644
--- a/src/reactive/hw_health.py
+++ b/src/reactive/hw_health.py
@@ -14,6 +14,7 @@ from utils.tooling import (
     configure_nrpe_checks,
     install_resource,
     remove_nrpe_checks,
+    TOOLING
 )
 
 
@@ -36,16 +37,21 @@ def install():
         status.blocked('Hardware not supported')
     else:
         resource = hookenv.resource_get('tools')
+        resource_needed_tools = [tool for tool in tools if TOOLING[tool].get('install') == 'resource']
         if resource:
             hookenv.log('Installing from resource')
             status.waiting('Installing from attached resource')
             installed = install_resource(tools, resource)
         else:
-            hookenv.log(
-                'Missing Juju resource: tools - alternative method is not'
-                ' available yet', hookenv.ERROR)
-            status.blocked('Missing Juju resource: tools')
-            return
+            if resource_needed_tools:
+                # we know that this tool needs to be provided by a resource but there's no resource available
+                hookenv.log(
+                    'Missing Juju resource: tools - alternative method is not'
+                    ' available yet', hookenv.ERROR)
+                status.blocked('Missing Juju resource: tools')
+                return
+            else:
+                installed = True
             # TODO(aluria): install vendor utilities via snaps
             # https://forum.snapcraft.io/t/request-for-classic-confinement-sas2ircu/9023
             # Snap interface for hw tooling is not supported
@@ -62,8 +68,8 @@ def install():
             set_flag('hw-health.installed')
             status.waiting('Preparing scripts installation')
         else:
-            missing_tools = tools - {'mdadm', 'ipmi'}
-            status.blocked('Tools not found: {}'.format(', '.join(missing_tools)))
+            status.blocked('Not all required tools found in tools resource: {}'.format(
+                ', '.join(resource_needed_tools)))
 
 
 @when('hw-health.installed')
diff --git a/src/tests/charms/__init__.py b/src/tests/charms/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/tests/charms/__init__.py
diff --git a/src/tests/charms/layer/__init__.py b/src/tests/charms/layer/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/src/tests/charms/layer/__init__.py
diff --git a/src/tests/charms/layer/basic.py b/src/tests/charms/layer/basic.py
new file mode 100644
index 0000000..5434905
--- /dev/null
+++ b/src/tests/charms/layer/basic.py
@@ -0,0 +1,2 @@
+def bootstrap_charm_deps():
+    pass
diff --git a/src/tests/unit/test_actions.py b/src/tests/unit/test_actions.py
index 3df07a3..bb5a492 100644
--- a/src/tests/unit/test_actions.py
+++ b/src/tests/unit/test_actions.py
@@ -12,10 +12,13 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 
+import os
+import sys
 import unittest
 import unittest.mock as mock
 
-from actions.actions import clear_sel
+sys.path.insert(0, '{}/{}'.format(os.getcwd(), 'tests'))
+from actions.actions import clear_sel  # noqa: E402
 
 
 class ClearSelTestCase(unittest.TestCase):
@@ -27,5 +30,9 @@ class ClearSelTestCase(unittest.TestCase):
         sel_output = "Unittest system event log output"
         mock_subprocess.return_value = sel_output
         mock_check_call.return_value = None
+<<<<<<< src/tests/unit/test_actions.py
         clear_sel('dummy_arg')
+=======
+        clear_sel('unittest')
+>>>>>>> src/tests/unit/test_actions.py
         mock_check_call.assert_called_once_with(['action-set', "cleared_log={}".format(sel_output)])
diff --git a/src/tests/unit/test_check_smart.py b/src/tests/unit/test_check_smart.py
new file mode 100644
index 0000000..f3556d1
--- /dev/null
+++ b/src/tests/unit/test_check_smart.py
@@ -0,0 +1,31 @@
+import sys
+import unittest
+import unittest.mock as mock
+
+sys.path.append('files/smart')
+import check_smart  # noqa E402
+
+
+class TestCheckSmart(unittest.TestCase):
+
+    @mock.patch('subprocess.check_output')
+    def test_smartctl_scan(self, mock_smartctl):
+        mock_smartctl.return_value = b"""/dev/sda -d scsi # /dev/sda, SCSI device
+/dev/sdb -d scsi # /dev/sdb, SCSI device
+/dev/sdc -d scsi # /dev/sdc, SCSI device
+/dev/sdd -d scsi # /dev/sdd, SCSI device
+/dev/sde -d scsi # /dev/sde, SCSI device
+
+        """
+        actual = check_smart.smartctl_scan()
+        expected = ['/dev/sda', '/dev/sdb', '/dev/sdc', '/dev/sdd', '/dev/sde']
+        self.assertEqual(actual, expected)
+
+    @mock.patch('subprocess.run')
+    @mock.patch('check_smart.smartctl_scan')
+    def test_check_smartctl(self, mock_devices, mock_smartctl):
+        mock_smartctl.return_code = 0
+        mock_devices.return_value = ['/dev/sda']
+        actual = check_smart.check_smartctl()
+        expected = "check_smartctl returned OK"
+        self.assertEqual(actual, expected)
diff --git a/src/tests/unit/test_hwdiscovery.py b/src/tests/unit/test_hwdiscovery.py
index 7cb516e..5fc3342 100644
--- a/src/tests/unit/test_hwdiscovery.py
+++ b/src/tests/unit/test_hwdiscovery.py
@@ -1,6 +1,4 @@
 import io
-import os  # noqa: F401
-import subprocess  # noqa: F401
 import sys
 import unittest
 import unittest.mock as mock
@@ -26,7 +24,7 @@ class TestHWDiscovery(unittest.TestCase):
         driver.side_effect = side_effect_glob
         mock_hookenv.return_value = True
         real = hwdiscovery.get_tools()
-        expected = set(['megacli', 'ipmi'])
+        expected = set(['megacli', 'ipmi', 'smart'])
         self.assertEqual(real, expected)
 
     @mock.patch('hwdiscovery._supports_mdadm')
@@ -42,10 +40,10 @@ class TestHWDiscovery(unittest.TestCase):
 
         supports_mdadm.return_value = False
         dmidecode.return_value = 'unittest'
-        mock_hookenv.return_value = True
+        mock_hookenv.return_value = False
         driver.side_effect = side_effect_glob
         real = hwdiscovery.get_tools()
-        expected = set(['sas2ircu', 'ipmi'])
+        expected = set(['sas2ircu'])
         self.assertEqual(real, expected)
 
     @mock.patch('hwdiscovery._supports_mdadm')
@@ -68,6 +66,7 @@ class TestHWDiscovery(unittest.TestCase):
         expected = set(['sas3ircu'])
         self.assertEqual(real, expected)
 
+    """ TODO implement HP
     @mock.patch('hwdiscovery._supports_mdadm')
     @mock.patch('hwdiscovery.dmidecode')
     @mock.patch('glob.glob')
@@ -93,6 +92,7 @@ class TestHWDiscovery(unittest.TestCase):
         real = hwdiscovery.get_tools()
         expected = set(['hp'])
         self.assertEqual(real, expected)
+    """
 
     @mock.patch('hwdiscovery._supports_mdadm')
     @mock.patch('hwdiscovery.dmidecode')
diff --git a/src/tests/unit/test_tooling.py b/src/tests/unit/test_tooling.py
index 5236f6e..9d55b83 100644
--- a/src/tests/unit/test_tooling.py
+++ b/src/tests/unit/test_tooling.py
@@ -131,3 +131,7 @@ class TestTooling(unittest.TestCase):
     def test_configure_tools_unsupported_tools(self):
         self.assertEqual(
             tooling.configure_tools(['unittest1', 'unittest2']), False)
+
+    def test_configure_nrpe_checks(self):
+        # TODO write this test
+        pass
-- 
Mailing list: https://launchpad.net/~nagios-charmers
Post to     : nagios-charmers@lists.launchpad.net
Unsubscribe : https://launchpad.net/~nagios-charmers
More help   : https://help.launchpad.net/ListHelp

Reply via email to