Dan Bungert has proposed merging ~dbungert/curtin:flock-ex into curtin:master.
Commit message: do not squash Requested reviews: curtin developers (curtin-dev) Related bugs: Bug #2016860 in Ubuntu on IBM z Systems: "Error/crash while trying to wipe disk during 23.04+ install" https://bugs.launchpad.net/ubuntu-z-systems/+bug/2016860 Bug #2057661 in subiquity: "Daily install w/ ZFS + encryption: partitioning crashed with CurtinInstallError" https://bugs.launchpad.net/subiquity/+bug/2057661 For more details, see: https://code.launchpad.net/~dbungert/curtin/+git/curtin/+merge/462753 Add exclusive flocks to known problem areas: * ZFS encryption LP: #2057661 * wipefs LP: #2016860 -- Your team curtin developers is requested to review the proposed merge of ~dbungert/curtin:flock-ex into curtin:master.
diff --git a/curtin/block/zfs.py b/curtin/block/zfs.py index 767ad30..ccdddbb 100644 --- a/curtin/block/zfs.py +++ b/curtin/block/zfs.py @@ -8,6 +8,7 @@ import os import tempfile import secrets import shutil +from contextlib import ExitStack from pathlib import Path from curtin.config import merge_config @@ -31,11 +32,12 @@ ZFS_UNSUPPORTED_RELEASES = ['precise', 'trusty'] class ZPoolEncryption: - def __init__(self, poolname, style, keyfile): + def __init__(self, vdevs, poolname, style, keyfile): self.poolname = poolname self.style = style self.keyfile = keyfile self.system_key = None + self.vdevs = vdevs def get_system_key(self): if self.system_key is None: @@ -88,24 +90,31 @@ class ZPoolEncryption: self.poolname, "keystore", {"encryption": "off"}, keystore_size, ) - # cryptsetup format and open this keystore - keystore_volume = f"/dev/zvol/{self.poolname}/keystore" - cmd = ["cryptsetup", "luksFormat", keystore_volume, self.keyfile] - util.subp(cmd) - dm_name = f"keystore-{self.poolname}" - cmd = [ - "cryptsetup", "open", "--type", "luks", keystore_volume, dm_name, - "--key-file", self.keyfile, - ] - util.subp(cmd) - - # format as ext4, mount it, move the previously-generated system key - dmpath = f"/dev/mapper/{dm_name}" - cmd = ["mke2fs", "-t", "ext4", dmpath, "-L", dm_name] - util.subp(cmd, capture=True) - - keystore_root = f"/run/keystore/{self.poolname}" - with util.mount(dmpath, keystore_root): + with ExitStack() as es: + for vdev in self.vdevs: + es.enter_context(util.FlockEx(vdev)) + + # cryptsetup format and open this keystore + keystore_volume = f"/dev/zvol/{self.poolname}/keystore" + cmd = ["cryptsetup", "luksFormat", keystore_volume, self.keyfile] + util.subp(cmd) + dm_name = f"keystore-{self.poolname}" + cmd = [ + "cryptsetup", "open", "--type", "luks", keystore_volume, + dm_name, "--key-file", self.keyfile, + ] + util.subp(cmd) + + with ExitStack() as es: + # format as ext4, mount it, move the previously-generated systemkey + dmpath = f"/dev/mapper/{dm_name}" + es.enter_context(util.FlockEx(dmpath)) + cmd = ["mke2fs", "-t", "ext4", dmpath, "-L", dm_name] + util.subp(cmd, capture=True) + + keystore_root = f"/run/keystore/{self.poolname}" + + es.enter_context(util.mount(dmpath, keystore_root)) ks_system_key = f"{keystore_root}/system.key" shutil.move(self.system_key, ks_system_key) Path(ks_system_key).chmod(0o400) @@ -256,7 +265,7 @@ def zpool_create(poolname, vdevs, storage_config=None, context=None, if zfs_properties: merge_config(zfs_cfg, zfs_properties) - encryption = ZPoolEncryption(poolname, encryption_style, keyfile) + encryption = ZPoolEncryption(vdevs, poolname, encryption_style, keyfile) encryption.validate() if encryption.in_use(): merge_config(zfs_cfg, encryption.dataset_properties()) diff --git a/curtin/commands/block_meta.py b/curtin/commands/block_meta.py index 58889b9..dc26a8c 100644 --- a/curtin/commands/block_meta.py +++ b/curtin/commands/block_meta.py @@ -795,26 +795,29 @@ def disk_handler(info, storage_config, context): "table" % disk) else: # wipe the disk and create the partition table if instructed to do so - if config.value_as_boolean(info.get('wipe')): - block.wipe_volume(disk, mode=info.get('wipe')) - if config.value_as_boolean(ptable): - LOG.info("labeling device: '%s' with '%s' partition table", disk, - ptable) - if ptable == "gpt": - # Wipe both MBR and GPT that may be present on the disk. - # N.B.: wipe_volume wipes 1M at front and end of the disk. - # This could destroy disk data in filesystems that lived - # there. - block.wipe_volume(disk, mode='superblock') - elif ptable in _dos_names: - util.subp(["parted", disk, "--script", "mklabel", "msdos"]) - elif ptable == "vtoc": - util.subp(["fdasd", "-c", "/dev/null", disk]) - holders = clear_holders.get_holders(disk) - if len(holders) > 0: - LOG.info('Detected block holders on disk %s: %s', disk, holders) - clear_holders.clear_holders(disk) - clear_holders.assert_clear(disk) + with util.FlockEx(disk): + if config.value_as_boolean(info.get('wipe')): + block.wipe_volume(disk, mode=info.get('wipe')) + if config.value_as_boolean(ptable): + LOG.info("labeling device: '%s' with '%s' partition table", + disk, ptable) + if ptable == "gpt": + # Wipe both MBR and GPT that may be present on the disk. + # N.B.: wipe_volume wipes 1M at front and end of the disk. + # This could destroy disk data in filesystems that lived + # there. + block.wipe_volume(disk, mode='superblock') + elif ptable in _dos_names: + util.subp(["parted", disk, "--script", "mklabel", "msdos"]) + elif ptable == "vtoc": + util.subp(["fdasd", "-c", "/dev/null", disk]) + holders = clear_holders.get_holders(disk) + if len(holders) > 0: + LOG.info( + 'Detected block holders on disk %s: %s', disk, holders + ) + clear_holders.clear_holders(disk) + clear_holders.assert_clear(disk) # Make the name if needed if info.get('name'): diff --git a/curtin/util.py b/curtin/util.py index dcf9422..66735e8 100644 --- a/curtin/util.py +++ b/curtin/util.py @@ -4,6 +4,7 @@ import argparse import collections from contextlib import contextmanager, suppress import errno +import fcntl import json import os import platform @@ -1420,4 +1421,40 @@ def not_exclusive_retry(fun, *args, **kwargs): time.sleep(1) return fun(*args, **kwargs) + +class FlockEx: + """Acquire an exclusive lock on device. + See https://systemd.io/BLOCK_DEVICE_LOCKING/ for details and + motivation, which has the summary: + Use BSD file locks (flock(2)) on block device nodes to synchronize + access for partitioning and file system formatting tools. + + params: device: block device node to exclusively lock + """ + def __init__(self, device, timeout=60): + self.device = device + self.timeout = timeout + self.retries = 60 + + def __enter__(self): + self.lock_fd = os.open(self.device, os.O_RDONLY) + + LOG.debug(f"Acquiring fcntl LOCK_EX on {self.device}") + + for i in range(self.retries): + try: + fcntl.flock(self.lock_fd, fcntl.LOCK_EX | fcntl.LOCK_NB) + return + except OSError as ex: + LOG.debug(f"Try {i}: lock acquisition failed with {ex}") + time.sleep(self.timeout / self.retries) + else: + raise TimeoutError("Failed to acquire LOCK_EX on {self.device}") + + def __exit__(self, *args): + with suppress(Exception): + fcntl.flock(self.lock_fd, fcntl.LOCK_UN) + with suppress(Exception): + os.close(self.lock_fd) + # vi: ts=4 expandtab syntax=python diff --git a/tests/unittests/test_block_zfs.py b/tests/unittests/test_block_zfs.py index dd80caf..6d5c5db 100644 --- a/tests/unittests/test_block_zfs.py +++ b/tests/unittests/test_block_zfs.py @@ -614,7 +614,7 @@ class TestZfsKeystore(CiTestCase): def test_create_system_key(self, m_token_bytes): m_token_bytes.return_value = key = self.random_string() m_open = mock.mock_open() - zpe = zfs.ZPoolEncryption(None, None, None) + zpe = zfs.ZPoolEncryption(None, None, None, None) with mock.patch("builtins.open", m_open): system_key = zpe.get_system_key() self.assertIsNotNone(system_key) @@ -622,7 +622,7 @@ class TestZfsKeystore(CiTestCase): m_open.return_value.write.assert_called_with(key) def test_validate_good(self): - zpe = zfs.ZPoolEncryption("pool1", "luks_keystore", self.keyfile) + zpe = zfs.ZPoolEncryption(None, "pool1", "luks_keystore", self.keyfile) try: zpe.validate() except Exception: @@ -630,22 +630,22 @@ class TestZfsKeystore(CiTestCase): self.assertTrue(zpe.in_use()) def test_validate_missing_pool(self): - zpe = zfs.ZPoolEncryption(None, "luks_keystore", self.keyfile) + zpe = zfs.ZPoolEncryption(None, None, "luks_keystore", self.keyfile) with self.assertRaises(ValueError): zpe.validate() def test_validate_missing_key(self): - zpe = zfs.ZPoolEncryption("pool1", "luks_keystore", None) + zpe = zfs.ZPoolEncryption(None, "pool1", "luks_keystore", None) with self.assertRaises(ValueError): zpe.validate() def test_validate_missing_key_file(self): - zpe = zfs.ZPoolEncryption("pool1", "luks_keystore", "not-exist") + zpe = zfs.ZPoolEncryption(None, "pool1", "luks_keystore", "not-exist") with self.assertRaises(ValueError): zpe.validate() def test_validate_unencrypted_ok(self): - zpe = zfs.ZPoolEncryption("pool1", None, None) + zpe = zfs.ZPoolEncryption(None, "pool1", None, None) try: zpe.validate() except Exception: @@ -653,7 +653,7 @@ class TestZfsKeystore(CiTestCase): self.assertFalse(zpe.in_use()) def test_dataset_properties(self): - zpe = zfs.ZPoolEncryption("pool1", "luks_keystore", self.keyfile) + zpe = zfs.ZPoolEncryption(None, "pool1", "luks_keystore", self.keyfile) keyloc = self.random_string() with mock.patch.object(zpe, "get_system_key", return_value=keyloc): props = zpe.dataset_properties() diff --git a/tests/unittests/test_commands_block_meta.py b/tests/unittests/test_commands_block_meta.py index ed58cc1..7237cfc 100644 --- a/tests/unittests/test_commands_block_meta.py +++ b/tests/unittests/test_commands_block_meta.py @@ -5,6 +5,7 @@ from collections import OrderedDict import copy from unittest.mock import ( call, + MagicMock, Mock, patch, ) @@ -685,6 +686,7 @@ class TestBlockMeta(CiTestCase): self.m_mp.is_mpath_device.return_value = False self.m_mp.is_mpath_member.return_value = False + @patch('curtin.commands.block_meta.util.FlockEx', new=MagicMock()) def test_disk_handler_calls_clear_holder(self): info = self.storage_config.get('sda') disk = info.get('path') @@ -1789,6 +1791,7 @@ class TestDiskHandler(CiTestCase): m_getpath.assert_called_with(info['id'], storage_config) m_block.get_part_table_type.assert_called_with(disk_path) + @patch('curtin.commands.block_meta.util.FlockEx', new=MagicMock()) @patch('curtin.commands.block_meta.util.subp') @patch('curtin.commands.block_meta.clear_holders.get_holders') @patch('curtin.commands.block_meta.get_path_to_storage_volume') diff --git a/tests/unittests/test_util.py b/tests/unittests/test_util.py index 96f260f..45d4b79 100644 --- a/tests/unittests/test_util.py +++ b/tests/unittests/test_util.py @@ -1,7 +1,9 @@ # This file is part of curtin. See LICENSE file for copyright and license info. +from pathlib import Path from unittest import mock import errno +import fcntl import os import stat from textwrap import dedent @@ -1406,4 +1408,30 @@ class TestEFIVarFSBug(CiTestCase): util.EFIVarFSBug.apply_workaround_if_affected() apply_wa.assert_not_called() + +class TestFlockEx(CiTestCase): + def setUp(self): + self.tmpf = self.tmp_path('testfile') + Path(self.tmpf).touch() + + def test_basic(self): + with open(self.tmpf) as fp: + with util.FlockEx(self.tmpf): + with self.assertRaises(BlockingIOError): + fcntl.flock(fp, fcntl.LOCK_EX | fcntl.LOCK_NB) + + fcntl.flock(fp, fcntl.LOCK_EX | fcntl.LOCK_NB) + fcntl.flock(fp, fcntl.LOCK_UN) + + def test_timeout(self): + with open(self.tmpf) as fp: + fcntl.flock(fp, fcntl.LOCK_EX | fcntl.LOCK_NB) + + with self.assertRaises(TimeoutError): + with util.FlockEx(self.tmpf, timeout=.01): + pass + + fcntl.flock(fp, fcntl.LOCK_UN) + + # vi: ts=4 expandtab syntax=python
-- Mailing list: https://launchpad.net/~curtin-dev Post to : curtin-dev@lists.launchpad.net Unsubscribe : https://launchpad.net/~curtin-dev More help : https://help.launchpad.net/ListHelp