Would it be worth having a flag to keep long standing sync behaviour for
situations where either diskspace, or verification results in problems
(like bug #657320)?

Fabian

On 07-07-2018 18:55:52 -0700, Zac Medico wrote:
> Sync into a quarantine subdirectory, using the rsync --link-dest option
> to create hardlinks to identical files in the previous snapshot of the
> repository. If hardlinks are not supported, then show a warning message
> and sync directly to the normal repository location.
> 
> If verification succeeds, then the quarantine subdirectory is synced
> to the normal repository location, and the quarantine subdirectory
> is deleted. If verification fails, then the quarantine directory is
> preserved for purposes of analysis.
> 
> Even if verification happens to be disabled, the quarantine directory
> is still useful for making the repository update more atomic, so that
> it is less likely that normal repository location will be observed in
> a partially synced state.
> 
> The new behavior may conflict with configurations that restrict the
> use of hardlinks, such as overlay filesystems. Therefore, users will
> have to set "sync-allow-hardlinks = no" in repos.conf if they have
> a configuration that prevents the use of hardlinks, but this should
> not be very common.
> 
> Bug: https://bugs.gentoo.org/660410
> ---
> [PATCH v2] makes it possible to disable the new behavior by setting
> "sync-allow-hardlinks = no" in repos.conf
> 
>  cnf/repos.conf                          |  1 +
>  man/portage.5                           |  8 +++
>  pym/portage/repository/config.py        |  8 ++-
>  pym/portage/sync/modules/rsync/rsync.py | 89 
> ++++++++++++++++++++++++++++++---
>  4 files changed, 97 insertions(+), 9 deletions(-)
> 
> diff --git a/cnf/repos.conf b/cnf/repos.conf
> index 352073cfd5..419f6d1182 100644
> --- a/cnf/repos.conf
> +++ b/cnf/repos.conf
> @@ -6,6 +6,7 @@ location = /usr/portage
>  sync-type = rsync
>  sync-uri = rsync://rsync.gentoo.org/gentoo-portage
>  auto-sync = yes
> +sync-allow-hardlinks = yes
>  sync-rsync-verify-jobs = 1
>  sync-rsync-verify-metamanifest = yes
>  sync-rsync-verify-max-age = 24
> diff --git a/man/portage.5 b/man/portage.5
> index 5adb07d821..acc80791be 100644
> --- a/man/portage.5
> +++ b/man/portage.5
> @@ -973,6 +973,14 @@ files). Defaults to true.
>  .br
>  Valid values: true, false.
>  .TP
> +.B sync\-allow\-hardlinks = yes|no
> +Allow sync plugins to use hardlinks in order to ensure that a repository
> +remains in a valid state if something goes wrong during the sync operation.
> +For example, if signature verification fails during a sync operation,
> +the previous state of the repository will be preserved. This option may
> +conflict with configurations that restrict the use of hardlinks, such as
> +overlay filesystems.
> +.TP
>  .B sync\-cvs\-repo
>  Specifies CVS repository.
>  .TP
> diff --git a/pym/portage/repository/config.py 
> b/pym/portage/repository/config.py
> index 1d897bb903..c7440369c2 100644
> --- a/pym/portage/repository/config.py
> +++ b/pym/portage/repository/config.py
> @@ -86,6 +86,7 @@ class RepoConfig(object):
>               'sync_type', 'sync_umask', 'sync_uri', 'sync_user', 
> 'thin_manifest',
>               'update_changelog', '_eapis_banned', '_eapis_deprecated',
>               '_masters_orig', 'module_specific_options', 
> 'manifest_required_hashes',
> +             'sync_allow_hardlinks',
>               'sync_openpgp_key_path',
>               'sync_openpgp_key_refresh_retry_count',
>               'sync_openpgp_key_refresh_retry_delay_max',
> @@ -188,6 +189,9 @@ class RepoConfig(object):
>               self.strict_misc_digests = repo_opts.get(
>                       'strict-misc-digests', 'true').lower() == 'true'
>  
> +             self.sync_allow_hardlinks = repo_opts.get(
> +                     'sync-allow-hardlinks', 'true').lower() in ('true', 
> 'yes')
> +
>               self.sync_openpgp_key_path = repo_opts.get(
>                       'sync-openpgp-key-path', None)
>  
> @@ -534,6 +538,7 @@ class RepoConfigLoader(object):
>                                                       'clone_depth', 
> 'eclass_overrides',
>                                                       'force', 'masters', 
> 'priority', 'strict_misc_digests',
>                                                       'sync_depth', 
> 'sync_hooks_only_on_change',
> +                                                     'sync_allow_hardlinks',
>                                                       'sync_openpgp_key_path',
>                                                       
> 'sync_openpgp_key_refresh_retry_count',
>                                                       
> 'sync_openpgp_key_refresh_retry_delay_max',
> @@ -962,7 +967,8 @@ class RepoConfigLoader(object):
>       def config_string(self):
>               bool_keys = ("strict_misc_digests",)
>               str_or_int_keys = ("auto_sync", "clone_depth", "format", 
> "location",
> -                     "main_repo", "priority", "sync_depth", 
> "sync_openpgp_key_path",
> +                     "main_repo", "priority", "sync_depth",
> +                     "sync_allow_hardlinks", "sync_openpgp_key_path",
>                       "sync_openpgp_key_refresh_retry_count",
>                       "sync_openpgp_key_refresh_retry_delay_max",
>                       "sync_openpgp_key_refresh_retry_delay_exp_base",
> diff --git a/pym/portage/sync/modules/rsync/rsync.py 
> b/pym/portage/sync/modules/rsync/rsync.py
> index 382a1eaaef..59211d2bb8 100644
> --- a/pym/portage/sync/modules/rsync/rsync.py
> +++ b/pym/portage/sync/modules/rsync/rsync.py
> @@ -11,6 +11,7 @@ import functools
>  import io
>  import re
>  import random
> +import subprocess
>  import tempfile
>  
>  import portage
> @@ -61,6 +62,56 @@ class RsyncSync(NewBase):
>       def __init__(self):
>               NewBase.__init__(self, "rsync", RSYNC_PACKAGE_ATOM)
>  
> +     def _select_download_dir(self):
> +             '''
> +             Select and return the download directory. It's desirable to be 
> able
> +             to create shared hardlinks between the download directory to the
> +             normal repository, and this is facilitated by making the 
> download
> +             directory be a subdirectory of the normal repository location
> +             (ensuring that no mountpoints are crossed). Shared hardlinks are
> +             created by using the rsync --link-dest option.
> +
> +             Since the download is initially unverified, it is safest to save
> +             it in a quarantine directory. The quarantine directory is also
> +             useful for making the repository update more atomic, so that it
> +             less likely that normal repository location will be observed in
> +             a partially synced state.
> +
> +             This method returns a quarantine directory if 
> sync-allow-hardlinks
> +             is enabled in repos.conf, and otherwise it returne the normal
> +             repository location.
> +             '''
> +             if self.repo.sync_allow_hardlinks:
> +                     return os.path.join(self.repo.location, 
> '.tmp-unverified-download-quarantine')
> +             else:
> +                     return self.repo.location
> +
> +     def _commit_download(self, download_dir):
> +             '''
> +             Commit changes from download_dir if it does not refer to the
> +             normal repository location.
> +             '''
> +             exitcode = 0
> +             if self.repo.location != download_dir:
> +                     rsynccommand = [self.bin_command] + self.rsync_opts + 
> self.extra_rsync_opts
> +                     rsynccommand.append('--exclude=/%s' % 
> os.path.basename(download_dir))
> +                     rsynccommand.append('%s/' % download_dir.rstrip('/'))
> +                     rsynccommand.append('%s/' % self.repo.location)
> +                     exitcode = subprocess.call(rsynccommand)
> +                     if exitcode == 0:
> +                             exitcode = self._remove_download(download_dir)
> +
> +             return exitcode
> +
> +     def _remove_download(self, download_dir):
> +             """
> +             Remove download_dir if it does not refer to the normal 
> repository
> +             location.
> +             """
> +             exitcode = 0
> +             if self.repo.location != download_dir:
> +                     exitcode = subprocess.call(['rm', '-rf', download_dir])
> +             return exitcode
>  
>       def update(self):
>               '''Internal update function which performs the transfer'''
> @@ -97,6 +148,9 @@ class RsyncSync(NewBase):
>                       self.extra_rsync_opts.extend(portage.util.shlex_split(
>                               
> self.repo.module_specific_options['sync-rsync-extra-opts']))
>  
> +             download_dir = self._select_download_dir()
> +             exitcode = 0
> +
>               # Process GLEP74 verification options.
>               # Default verification to 'no'; it's enabled for ::gentoo
>               # via default repos.conf though.
> @@ -219,8 +273,10 @@ class RsyncSync(NewBase):
>                               self.proto = "file"
>                               dosyncuri = syncuri[7:]
>                               unchanged, is_synced, exitcode, updatecache_flg 
> = self._do_rsync(
> -                                     dosyncuri, timestamp, opts)
> +                                     dosyncuri, timestamp, opts, 
> download_dir)
>                               self._process_exitcode(exitcode, dosyncuri, 
> out, 1)
> +                             if exitcode == 0 and not unchanged:
> +                                     self._commit_download(download_dir)
>                               return (exitcode, updatecache_flg)
>  
>                       retries=0
> @@ -352,7 +408,7 @@ class RsyncSync(NewBase):
>                                       dosyncuri = dosyncuri[6:].replace('/', 
> ':/', 1)
>  
>                               unchanged, is_synced, exitcode, updatecache_flg 
> = self._do_rsync(
> -                                     dosyncuri, timestamp, opts)
> +                                     dosyncuri, timestamp, opts, 
> download_dir)
>                               if not unchanged:
>                                       local_state_unchanged = False
>                               if is_synced:
> @@ -369,6 +425,12 @@ class RsyncSync(NewBase):
>                                       break
>                       self._process_exitcode(exitcode, dosyncuri, out, 
> maxretries)
>  
> +                     if local_state_unchanged:
> +                             # The quarantine download_dir is not intended 
> to exist
> +                             # in this case, so refer gemato to the normal 
> repository
> +                             # location.
> +                             download_dir = self.repo.location
> +
>                       # if synced successfully, verify now
>                       if exitcode == 0 and self.verify_metamanifest:
>                               if gemato is None:
> @@ -380,7 +442,7 @@ class RsyncSync(NewBase):
>                                               # we always verify the Manifest 
> signature, in case
>                                               # we had to deal with key 
> revocation case
>                                               m = 
> gemato.recursiveloader.ManifestRecursiveLoader(
> -                                                             
> os.path.join(self.repo.location, 'Manifest'),
> +                                                             
> os.path.join(download_dir, 'Manifest'),
>                                                               
> verify_openpgp=True,
>                                                               
> openpgp_env=openpgp_env,
>                                                               
> max_jobs=self.verify_jobs)
> @@ -411,7 +473,7 @@ class RsyncSync(NewBase):
>                                               # if nothing has changed, skip 
> the actual Manifest
>                                               # verification
>                                               if not local_state_unchanged:
> -                                                     out.ebegin('Verifying 
> %s' % (self.repo.location,))
> +                                                     out.ebegin('Verifying 
> %s' % (download_dir,))
>                                                       
> m.assert_directory_verifies()
>                                                       out.eend(0)
>                                       except GematoException as e:
> @@ -420,12 +482,16 @@ class RsyncSync(NewBase):
>                                                               
> level=logging.ERROR, noiselevel=-1)
>                                               exitcode = 1
>  
> +                     if exitcode == 0 and not local_state_unchanged:
> +                             exitcode = self._commit_download(download_dir)
> +
>                       return (exitcode, updatecache_flg)
>               finally:
> +                     if exitcode == 0:
> +                             self._remove_download(download_dir)
>                       if openpgp_env is not None:
>                               openpgp_env.close()
>  
> -
>       def _process_exitcode(self, exitcode, syncuri, out, maxretries):
>               if (exitcode==0):
>                       pass
> @@ -561,7 +627,7 @@ class RsyncSync(NewBase):
>               return rsync_opts
>  
>  
> -     def _do_rsync(self, syncuri, timestamp, opts):
> +     def _do_rsync(self, syncuri, timestamp, opts, download_dir):
>               updatecache_flg = False
>               is_synced = False
>               if timestamp != 0 and "--quiet" not in opts:
> @@ -686,6 +752,12 @@ class RsyncSync(NewBase):
>                       elif (servertimestamp == 0) or (servertimestamp > 
> timestamp):
>                               # actual sync
>                               command = rsynccommand[:]
> +
> +                             if self.repo.location != download_dir:
> +                                     # Use shared hardlinks for files that 
> are identical
> +                                     # in the previous snapshot of the 
> repository.
> +                                     command.append('--link-dest=%s' % 
> self.repo.location)
> +
>                               submodule_paths = self._get_submodule_paths()
>                               if submodule_paths:
>                                       # The only way to select multiple 
> directories to
> @@ -696,9 +768,10 @@ class RsyncSync(NewBase):
>                                               # /./ is special syntax 
> supported with the
>                                               # rsync --relative option.
>                                               command.append(syncuri + "/./" 
> + path)
> -                                     command.append(self.repo.location)
>                               else:
> -                                     command.extend([syncuri + "/", 
> self.repo.location])
> +                                     command.append(syncuri + "/")
> +
> +                             command.append(download_dir)
>  
>                               exitcode = None
>                               try:
> -- 
> 2.13.6
> 
> 

-- 
Fabian Groffen
Gentoo on a different level

Attachment: signature.asc
Description: PGP signature

Reply via email to