Would it be worth having a flag to keep long standing sync behaviour for situations where either diskspace, or verification results in problems (like bug #657320)?
Fabian On 07-07-2018 18:55:52 -0700, Zac Medico wrote: > Sync into a quarantine subdirectory, using the rsync --link-dest option > to create hardlinks to identical files in the previous snapshot of the > repository. If hardlinks are not supported, then show a warning message > and sync directly to the normal repository location. > > If verification succeeds, then the quarantine subdirectory is synced > to the normal repository location, and the quarantine subdirectory > is deleted. If verification fails, then the quarantine directory is > preserved for purposes of analysis. > > Even if verification happens to be disabled, the quarantine directory > is still useful for making the repository update more atomic, so that > it is less likely that normal repository location will be observed in > a partially synced state. > > The new behavior may conflict with configurations that restrict the > use of hardlinks, such as overlay filesystems. Therefore, users will > have to set "sync-allow-hardlinks = no" in repos.conf if they have > a configuration that prevents the use of hardlinks, but this should > not be very common. > > Bug: https://bugs.gentoo.org/660410 > --- > [PATCH v2] makes it possible to disable the new behavior by setting > "sync-allow-hardlinks = no" in repos.conf > > cnf/repos.conf | 1 + > man/portage.5 | 8 +++ > pym/portage/repository/config.py | 8 ++- > pym/portage/sync/modules/rsync/rsync.py | 89 > ++++++++++++++++++++++++++++++--- > 4 files changed, 97 insertions(+), 9 deletions(-) > > diff --git a/cnf/repos.conf b/cnf/repos.conf > index 352073cfd5..419f6d1182 100644 > --- a/cnf/repos.conf > +++ b/cnf/repos.conf > @@ -6,6 +6,7 @@ location = /usr/portage > sync-type = rsync > sync-uri = rsync://rsync.gentoo.org/gentoo-portage > auto-sync = yes > +sync-allow-hardlinks = yes > sync-rsync-verify-jobs = 1 > sync-rsync-verify-metamanifest = yes > sync-rsync-verify-max-age = 24 > diff --git a/man/portage.5 b/man/portage.5 > index 5adb07d821..acc80791be 100644 > --- a/man/portage.5 > +++ b/man/portage.5 > @@ -973,6 +973,14 @@ files). Defaults to true. > .br > Valid values: true, false. > .TP > +.B sync\-allow\-hardlinks = yes|no > +Allow sync plugins to use hardlinks in order to ensure that a repository > +remains in a valid state if something goes wrong during the sync operation. > +For example, if signature verification fails during a sync operation, > +the previous state of the repository will be preserved. This option may > +conflict with configurations that restrict the use of hardlinks, such as > +overlay filesystems. > +.TP > .B sync\-cvs\-repo > Specifies CVS repository. > .TP > diff --git a/pym/portage/repository/config.py > b/pym/portage/repository/config.py > index 1d897bb903..c7440369c2 100644 > --- a/pym/portage/repository/config.py > +++ b/pym/portage/repository/config.py > @@ -86,6 +86,7 @@ class RepoConfig(object): > 'sync_type', 'sync_umask', 'sync_uri', 'sync_user', > 'thin_manifest', > 'update_changelog', '_eapis_banned', '_eapis_deprecated', > '_masters_orig', 'module_specific_options', > 'manifest_required_hashes', > + 'sync_allow_hardlinks', > 'sync_openpgp_key_path', > 'sync_openpgp_key_refresh_retry_count', > 'sync_openpgp_key_refresh_retry_delay_max', > @@ -188,6 +189,9 @@ class RepoConfig(object): > self.strict_misc_digests = repo_opts.get( > 'strict-misc-digests', 'true').lower() == 'true' > > + self.sync_allow_hardlinks = repo_opts.get( > + 'sync-allow-hardlinks', 'true').lower() in ('true', > 'yes') > + > self.sync_openpgp_key_path = repo_opts.get( > 'sync-openpgp-key-path', None) > > @@ -534,6 +538,7 @@ class RepoConfigLoader(object): > 'clone_depth', > 'eclass_overrides', > 'force', 'masters', > 'priority', 'strict_misc_digests', > 'sync_depth', > 'sync_hooks_only_on_change', > + 'sync_allow_hardlinks', > 'sync_openpgp_key_path', > > 'sync_openpgp_key_refresh_retry_count', > > 'sync_openpgp_key_refresh_retry_delay_max', > @@ -962,7 +967,8 @@ class RepoConfigLoader(object): > def config_string(self): > bool_keys = ("strict_misc_digests",) > str_or_int_keys = ("auto_sync", "clone_depth", "format", > "location", > - "main_repo", "priority", "sync_depth", > "sync_openpgp_key_path", > + "main_repo", "priority", "sync_depth", > + "sync_allow_hardlinks", "sync_openpgp_key_path", > "sync_openpgp_key_refresh_retry_count", > "sync_openpgp_key_refresh_retry_delay_max", > "sync_openpgp_key_refresh_retry_delay_exp_base", > diff --git a/pym/portage/sync/modules/rsync/rsync.py > b/pym/portage/sync/modules/rsync/rsync.py > index 382a1eaaef..59211d2bb8 100644 > --- a/pym/portage/sync/modules/rsync/rsync.py > +++ b/pym/portage/sync/modules/rsync/rsync.py > @@ -11,6 +11,7 @@ import functools > import io > import re > import random > +import subprocess > import tempfile > > import portage > @@ -61,6 +62,56 @@ class RsyncSync(NewBase): > def __init__(self): > NewBase.__init__(self, "rsync", RSYNC_PACKAGE_ATOM) > > + def _select_download_dir(self): > + ''' > + Select and return the download directory. It's desirable to be > able > + to create shared hardlinks between the download directory to the > + normal repository, and this is facilitated by making the > download > + directory be a subdirectory of the normal repository location > + (ensuring that no mountpoints are crossed). Shared hardlinks are > + created by using the rsync --link-dest option. > + > + Since the download is initially unverified, it is safest to save > + it in a quarantine directory. The quarantine directory is also > + useful for making the repository update more atomic, so that it > + less likely that normal repository location will be observed in > + a partially synced state. > + > + This method returns a quarantine directory if > sync-allow-hardlinks > + is enabled in repos.conf, and otherwise it returne the normal > + repository location. > + ''' > + if self.repo.sync_allow_hardlinks: > + return os.path.join(self.repo.location, > '.tmp-unverified-download-quarantine') > + else: > + return self.repo.location > + > + def _commit_download(self, download_dir): > + ''' > + Commit changes from download_dir if it does not refer to the > + normal repository location. > + ''' > + exitcode = 0 > + if self.repo.location != download_dir: > + rsynccommand = [self.bin_command] + self.rsync_opts + > self.extra_rsync_opts > + rsynccommand.append('--exclude=/%s' % > os.path.basename(download_dir)) > + rsynccommand.append('%s/' % download_dir.rstrip('/')) > + rsynccommand.append('%s/' % self.repo.location) > + exitcode = subprocess.call(rsynccommand) > + if exitcode == 0: > + exitcode = self._remove_download(download_dir) > + > + return exitcode > + > + def _remove_download(self, download_dir): > + """ > + Remove download_dir if it does not refer to the normal > repository > + location. > + """ > + exitcode = 0 > + if self.repo.location != download_dir: > + exitcode = subprocess.call(['rm', '-rf', download_dir]) > + return exitcode > > def update(self): > '''Internal update function which performs the transfer''' > @@ -97,6 +148,9 @@ class RsyncSync(NewBase): > self.extra_rsync_opts.extend(portage.util.shlex_split( > > self.repo.module_specific_options['sync-rsync-extra-opts'])) > > + download_dir = self._select_download_dir() > + exitcode = 0 > + > # Process GLEP74 verification options. > # Default verification to 'no'; it's enabled for ::gentoo > # via default repos.conf though. > @@ -219,8 +273,10 @@ class RsyncSync(NewBase): > self.proto = "file" > dosyncuri = syncuri[7:] > unchanged, is_synced, exitcode, updatecache_flg > = self._do_rsync( > - dosyncuri, timestamp, opts) > + dosyncuri, timestamp, opts, > download_dir) > self._process_exitcode(exitcode, dosyncuri, > out, 1) > + if exitcode == 0 and not unchanged: > + self._commit_download(download_dir) > return (exitcode, updatecache_flg) > > retries=0 > @@ -352,7 +408,7 @@ class RsyncSync(NewBase): > dosyncuri = dosyncuri[6:].replace('/', > ':/', 1) > > unchanged, is_synced, exitcode, updatecache_flg > = self._do_rsync( > - dosyncuri, timestamp, opts) > + dosyncuri, timestamp, opts, > download_dir) > if not unchanged: > local_state_unchanged = False > if is_synced: > @@ -369,6 +425,12 @@ class RsyncSync(NewBase): > break > self._process_exitcode(exitcode, dosyncuri, out, > maxretries) > > + if local_state_unchanged: > + # The quarantine download_dir is not intended > to exist > + # in this case, so refer gemato to the normal > repository > + # location. > + download_dir = self.repo.location > + > # if synced successfully, verify now > if exitcode == 0 and self.verify_metamanifest: > if gemato is None: > @@ -380,7 +442,7 @@ class RsyncSync(NewBase): > # we always verify the Manifest > signature, in case > # we had to deal with key > revocation case > m = > gemato.recursiveloader.ManifestRecursiveLoader( > - > os.path.join(self.repo.location, 'Manifest'), > + > os.path.join(download_dir, 'Manifest'), > > verify_openpgp=True, > > openpgp_env=openpgp_env, > > max_jobs=self.verify_jobs) > @@ -411,7 +473,7 @@ class RsyncSync(NewBase): > # if nothing has changed, skip > the actual Manifest > # verification > if not local_state_unchanged: > - out.ebegin('Verifying > %s' % (self.repo.location,)) > + out.ebegin('Verifying > %s' % (download_dir,)) > > m.assert_directory_verifies() > out.eend(0) > except GematoException as e: > @@ -420,12 +482,16 @@ class RsyncSync(NewBase): > > level=logging.ERROR, noiselevel=-1) > exitcode = 1 > > + if exitcode == 0 and not local_state_unchanged: > + exitcode = self._commit_download(download_dir) > + > return (exitcode, updatecache_flg) > finally: > + if exitcode == 0: > + self._remove_download(download_dir) > if openpgp_env is not None: > openpgp_env.close() > > - > def _process_exitcode(self, exitcode, syncuri, out, maxretries): > if (exitcode==0): > pass > @@ -561,7 +627,7 @@ class RsyncSync(NewBase): > return rsync_opts > > > - def _do_rsync(self, syncuri, timestamp, opts): > + def _do_rsync(self, syncuri, timestamp, opts, download_dir): > updatecache_flg = False > is_synced = False > if timestamp != 0 and "--quiet" not in opts: > @@ -686,6 +752,12 @@ class RsyncSync(NewBase): > elif (servertimestamp == 0) or (servertimestamp > > timestamp): > # actual sync > command = rsynccommand[:] > + > + if self.repo.location != download_dir: > + # Use shared hardlinks for files that > are identical > + # in the previous snapshot of the > repository. > + command.append('--link-dest=%s' % > self.repo.location) > + > submodule_paths = self._get_submodule_paths() > if submodule_paths: > # The only way to select multiple > directories to > @@ -696,9 +768,10 @@ class RsyncSync(NewBase): > # /./ is special syntax > supported with the > # rsync --relative option. > command.append(syncuri + "/./" > + path) > - command.append(self.repo.location) > else: > - command.extend([syncuri + "/", > self.repo.location]) > + command.append(syncuri + "/") > + > + command.append(download_dir) > > exitcode = None > try: > -- > 2.13.6 > > -- Fabian Groffen Gentoo on a different level
signature.asc
Description: PGP signature