Ryan Harper has proposed merging ~raharper/cloud-init:feature/cloudinit-clean-from-write-log into cloud-init:master.
Commit message: clean: add a write_file audit log and use it to clean artifacts Cloud-init uses util.write_file() in many places to write files during cloud-init execution. This branch will create an audit log in /var/lib/cloud/instance/write_file.log which contains JSON entries which captures information about each write. As a user of this newly created log, cloud-init clean subcommand reads this data (if present) and augments the list of artifacts to clean. Requested reviews: cloud-init Commiters (cloud-init-dev) For more details, see: https://code.launchpad.net/~raharper/cloud-init/+git/cloud-init/+merge/372946 -- Your team cloud-init Commiters is requested to review the proposed merge of ~raharper/cloud-init:feature/cloudinit-clean-from-write-log into cloud-init:master.
diff --git a/cloudinit/cmd/clean.py b/cloudinit/cmd/clean.py index 30e49de..06f7daa 100644 --- a/cloudinit/cmd/clean.py +++ b/cloudinit/cmd/clean.py @@ -12,7 +12,7 @@ import sys from cloudinit.stages import Init from cloudinit.util import ( ProcessExecutionError, del_dir, del_file, get_config_logfiles, - is_link, subp) + is_link, subp, write_file_log_read) def error(msg): @@ -45,7 +45,20 @@ def get_parser(parser=None): return parser -def remove_artifacts(remove_logs, remove_seed=False): +def get_written_files(filterfn=None): + if filterfn is None: + def _exists(fn): + return os.path.exists(fn) + filterfn = _exists + + for wf in write_file_log_read(): + fn = wf.get('filename') + if fn: + if filterfn(fn): + yield fn + + +def remove_artifacts(remove_logs=False, remove_seed=False): """Helper which removes artifacts dir and optionally log files. @param: remove_logs: Boolean. Set True to delete the cloud_dir path. False @@ -56,14 +69,24 @@ def remove_artifacts(remove_logs, remove_seed=False): """ init = Init(ds_deps=[]) init.read_cfg() + + logfiles = get_config_logfiles(init.cfg) if remove_logs: - for log_file in get_config_logfiles(init.cfg): + for log_file in logfiles: del_file(log_file) if not os.path.isdir(init.paths.cloud_dir): return 0 # Artifacts dir already cleaned seed_path = os.path.join(init.paths.cloud_dir, 'seed') - for path in glob.glob('%s/*' % init.paths.cloud_dir): + + def _written_filter(fn): + if fn not in logfiles: + if not fn.startswith(init.paths.cloud_dir): + if os.path.exists(fn): + return fn + + files_written = list(get_written_files(filterfn=_written_filter)) + for path in glob.glob('%s/*' % init.paths.cloud_dir) + files_written: if path == seed_path and not remove_seed: continue try: diff --git a/cloudinit/cmd/tests/test_clean.py b/cloudinit/cmd/tests/test_clean.py index f092ab3..143d955 100644 --- a/cloudinit/cmd/tests/test_clean.py +++ b/cloudinit/cmd/tests/test_clean.py @@ -2,6 +2,7 @@ from cloudinit.cmd import clean from cloudinit.util import ensure_dir, sym_link, write_file +from cloudinit import util from cloudinit.tests.helpers import CiTestCase, wrap_and_call, mock from collections import namedtuple import os @@ -18,6 +19,10 @@ class TestClean(CiTestCase): self.artifact_dir = self.tmp_path('artifacts', self.new_root) self.log1 = self.tmp_path('cloud-init.log', self.new_root) self.log2 = self.tmp_path('cloud-init-output.log', self.new_root) + self.wflog = self.tmp_path('write_file.log', self.artifact_dir) + os.makedirs(os.path.dirname(self.wflog)) + util.WRITE_FILE_LOG = self.wflog + util._ENABLE_WRITE_FILE_LOG = True class FakeInit(object): cfg = {'def_log_file': self.log1, @@ -37,21 +42,31 @@ class TestClean(CiTestCase): """remove_artifacts removes logs when remove_logs is True.""" write_file(self.log1, 'cloud-init-log') write_file(self.log2, 'cloud-init-output-log') + netplan = self.tmp_path('50-cloud-init.yaml', self.artifact_dir) + write_file(netplan, 'netplan content') + # read the write log before it gets cleaned + with open(self.wflog) as fh: + contents = fh.readlines() + self.assertEqual(3, len(contents)) - self.assertFalse( - os.path.exists(self.artifact_dir), 'Unexpected artifacts dir') retcode = wrap_and_call( 'cloudinit.cmd.clean', {'Init': {'side_effect': self.init_class}}, clean.remove_artifacts, remove_logs=True) self.assertFalse(os.path.exists(self.log1), 'Unexpected file') self.assertFalse(os.path.exists(self.log2), 'Unexpected file') + self.assertFalse(os.path.exists(netplan), 'Unexpected file') self.assertEqual(0, retcode) + self.assertFalse(os.path.exists(self.wflog)) def test_remove_artifacts_preserves_logs(self): """remove_artifacts leaves logs when remove_logs is False.""" write_file(self.log1, 'cloud-init-log') write_file(self.log2, 'cloud-init-output-log') + # read the write log before it gets cleaned + with open(self.wflog) as fh: + contents = fh.readlines() + self.assertEqual(2, len(contents)) retcode = wrap_and_call( 'cloudinit.cmd.clean', @@ -60,6 +75,7 @@ class TestClean(CiTestCase): self.assertTrue(os.path.exists(self.log1), 'Missing expected file') self.assertTrue(os.path.exists(self.log2), 'Missing expected file') self.assertEqual(0, retcode) + self.assertFalse(os.path.exists(self.wflog)) def test_remove_artifacts_removes_unlinks_symlinks(self): """remove_artifacts cleans artifacts dir unlinking any symlinks.""" @@ -77,6 +93,7 @@ class TestClean(CiTestCase): self.assertFalse( os.path.exists(path), 'Unexpected {0} dir'.format(path)) + self.assertFalse(os.path.exists(self.wflog)) def test_remove_artifacts_removes_artifacts_skipping_seed(self): """remove_artifacts cleans artifacts dir with exception of seed dir.""" @@ -101,6 +118,7 @@ class TestClean(CiTestCase): self.assertFalse( os.path.exists(deleted_dir), 'Unexpected {0} dir'.format(deleted_dir)) + self.assertFalse(os.path.exists(self.wflog)) def test_remove_artifacts_removes_artifacts_removes_seed(self): """remove_artifacts removes seed dir when remove_seed is True.""" @@ -124,6 +142,8 @@ class TestClean(CiTestCase): os.path.exists(deleted_dir), 'Unexpected {0} dir'.format(deleted_dir)) + self.assertFalse(os.path.exists(self.wflog)) + def test_remove_artifacts_returns_one_on_errors(self): """remove_artifacts returns non-zero on failure and prints an error.""" ensure_dir(self.artifact_dir) @@ -139,6 +159,7 @@ class TestClean(CiTestCase): self.assertEqual( 'ERROR: Could not remove %s/dir1: oops\n' % self.artifact_dir, m_stderr.getvalue()) + self.assertFalse(os.path.exists(self.wflog)) def test_handle_clean_args_reboots(self): """handle_clean_args_reboots when reboot arg is provided.""" @@ -159,10 +180,15 @@ class TestClean(CiTestCase): self.assertEqual(0, retcode) self.assertEqual( [(['shutdown', '-r', 'now'], False)], called_cmds) + self.assertFalse(os.path.exists(self.wflog)) def test_status_main(self): '''clean.main can be run as a standalone script.''' write_file(self.log1, 'cloud-init-log') + # read the write log before it gets cleaned + with open(self.wflog) as fh: + contents = fh.readlines() + self.assertEqual(1, len(contents)) with self.assertRaises(SystemExit) as context_manager: wrap_and_call( 'cloudinit.cmd.clean', @@ -175,5 +201,4 @@ class TestClean(CiTestCase): self.assertFalse( os.path.exists(self.log1), 'Unexpected log {0}'.format(self.log1)) - # vi: ts=4 expandtab syntax=python diff --git a/cloudinit/tests/helpers.py b/cloudinit/tests/helpers.py index 4dad2af..c994a3b 100644 --- a/cloudinit/tests/helpers.py +++ b/cloudinit/tests/helpers.py @@ -96,6 +96,8 @@ class TestCase(unittest2.TestCase): util.PROC_CMDLINE = None util._DNS_REDIRECT_IP = None util._LSB_RELEASE = {} + util._ENABLE_WRITE_FILE_LOG = False + util.WRITE_LOG_FILE = '/var/lib/cloud/instance/write_file.log' def setUp(self): super(TestCase, self).setUp() diff --git a/cloudinit/util.py b/cloudinit/util.py index aa23b3f..99ec940 100644 --- a/cloudinit/util.py +++ b/cloudinit/util.py @@ -70,6 +70,8 @@ CONTAINER_TESTS = (['systemd-detect-virt', '--quiet', '--container'], ['lxc-is-container']) PROC_CMDLINE = None +WRITE_FILE_LOG = '/var/lib/cloud/instance/write_file.log' +_ENABLE_WRITE_FILE_LOG = True _LSB_RELEASE = {} @@ -1841,6 +1843,52 @@ def chmod(path, mode): os.chmod(path, real_mode) +def write_file_log_append(filename, omode, target=None): + """ Create an audit log of files that cloud-init has written. + + The log is located at: /var/lib/cloud/instance/write_file.log + + The format is JSON dict, one per line + {'timestamp': time.time(), 'path': filename, 'mode': omode} + + Example entries: + {'filename': '/etc/apt/sources.list', 'mode': 'wb', 'timestamp': ts} + + """ + global WRITE_FILE_LOG + global _ENABLE_WRITE_FILE_LOG + + if not _ENABLE_WRITE_FILE_LOG: + return + + log_file = target_path(target, path=WRITE_FILE_LOG) + if not os.path.exists(os.path.dirname(log_file)): + return + + record = {'timestamp': time.time(), 'filename': filename, 'mode': omode} + content = json.dumps(record, default=json_serialize_default) + with open(log_file, "ab") as wfl: + wfl.write((content + '\n').encode('utf-8')) + wfl.flush() + + +def write_file_log_read(target=None): + """ Read the WRITE_FILE_LOG and yield the contents. + + :returns a list of record dicts + """ + global WRITE_FILE_LOG + + log_file = target_path(target, path=WRITE_FILE_LOG) + if os.path.exists(log_file): + with open(log_file, "rb") as wfl: + contents = wfl.read() + for line in contents.splitlines(): + record = load_json(line) + if record: + yield record + + def write_file(filename, content, mode=0o644, omode="wb", copy_mode=False): """ Writes a file with the given content and sets the file mode as specified. @@ -1872,6 +1920,7 @@ def write_file(filename, content, mode=0o644, omode="wb", copy_mode=False): mode_r = "%r" % mode LOG.debug("Writing to %s - %s: [%s] %s %s", filename, omode, mode_r, len(content), write_type) + write_file_log_append(filename, omode) with SeLinuxGuard(path=filename): with open(filename, omode) as fh: fh.write(content) diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py index 0e71db8..82909a7 100644 --- a/tests/unittests/test_util.py +++ b/tests/unittests/test_util.py @@ -186,6 +186,51 @@ class TestWriteFile(helpers.TestCase): mockobj.assert_called_once_with('selinux') +class TestWriteFileLogAppend(helpers.CiTestCase): + + idir = 'var/lib/cloud/instance' + + def test_write_file_log_append(self): + root_d = self.tmp_dir() + log_path = os.path.join(root_d, self.idir, 'write_file.log') + util._ENABLE_WRITE_FILE_LOG = True + util.WRITE_FILE_LOG = log_path + helpers.populate_dir(root_d, + {os.path.join(self.idir, 'dummy'): 'i-foobar'}) + + content = self.random_string() + fname = os.path.join(root_d, self.random_string()) + util.write_file(fname, content, omode='wb') + + self.assertTrue(os.path.exists(log_path)) + contents = util.load_file(log_path) + self.assertEqual(1, len(contents.splitlines())) + for line in contents.splitlines(): + record = util.load_json(line) + self.assertEqual(fname, record['filename']) + self.assertEqual('wb', record['mode']) + self.assertIn('timestamp', record) + + def test_write_file_log_read(self): + root_d = self.tmp_dir() + log_path = os.path.join(root_d, self.idir, 'write_file.log') + util.WRITE_FILE_LOG = log_path + helpers.populate_dir(root_d, + {os.path.join(self.idir, 'dummy'): 'i-foobar'}) + + fname = os.path.join(root_d, self.random_string()) + + expected_record = {'timestamp': 0, 'filename': fname, 'mode': 'wb'} + with open(log_path, 'ab') as wfl: + record = (json.dumps(expected_record) + '\n').encode('utf-8') + wfl.write(record) + wfl.flush() + + records = list(util.write_file_log_read()) + self.assertEqual(1, len(records)) + self.assertEqual(expected_record, records[0]) + + class TestDeleteDirContents(helpers.TestCase): def setUp(self): super(TestDeleteDirContents, self).setUp()
_______________________________________________ Mailing list: https://launchpad.net/~cloud-init-dev Post to : cloud-init-dev@lists.launchpad.net Unsubscribe : https://launchpad.net/~cloud-init-dev More help : https://help.launchpad.net/ListHelp