Sync into a quarantine subdirectory, using the rsync --link-dest option
to create hardlinks to identical files in the previous snapshot of the
repository. If hardlinks are not supported, then show a warning message
and sync directly to the normal repository location.

If verification succeeds, then the quarantine subdirectory is synced
to the normal repository location, and the quarantine subdirectory
is deleted. If verification fails, then the quarantine directory is
preserved for purposes of analysis.

Even if verification happens to be disabled, the quarantine directory
is still useful for making the repository update more atomic, so that
it is less likely that normal repository location will be observed in
a partially synced state.

Bug: https://bugs.gentoo.org/660410
---
 pym/portage/sync/modules/rsync/rsync.py | 101 +++++++++++++++++++++++++++++---
 1 file changed, 93 insertions(+), 8 deletions(-)

diff --git a/pym/portage/sync/modules/rsync/rsync.py 
b/pym/portage/sync/modules/rsync/rsync.py
index 382a1eaaef..9334c8a791 100644
--- a/pym/portage/sync/modules/rsync/rsync.py
+++ b/pym/portage/sync/modules/rsync/rsync.py
@@ -11,6 +11,7 @@ import functools
 import io
 import re
 import random
+import subprocess
 import tempfile
 
 import portage
@@ -61,6 +62,68 @@ class RsyncSync(NewBase):
        def __init__(self):
                NewBase.__init__(self, "rsync", RSYNC_PACKAGE_ATOM)
 
+       def _select_download_dir(self):
+               '''
+               Select and return the download directory. It's desirable to be 
able
+               to create shared hardlinks between the download directory to the
+               normal repository, and this is facilitated by making the 
download
+               directory be a subdirectory of the normal repository location
+               (ensuring that no mountpoints are crossed). Shared hardlinks are
+               created by using the rsync --link-dest option.
+
+               Since the download is initially unverified, it is safest to save
+               it in a quarantine directory. The quarantine directory is also
+               useful for making the repository update more atomic, so that it
+               less likely that normal repository location will be observed in
+               a partially synced state.
+
+               This method tests if it is possible to create hardlinks in the
+               repository directory, and if that fails then it issues a warning
+               message and returns the normal repository location.
+               '''
+               with tempfile.NamedTemporaryFile(dir=self.repo.location,
+                       prefix='.tmp', suffix='-portage-hardlink-test') as f:
+                       hardlink = f.name + '-hardlink'
+                       try:
+                               os.link(f.name, hardlink)
+                       except OSError as e:
+                               writemsg_level("!!! Syncing directly to '%s' 
because hardlink creation failed: %s\n" %
+                                       (self.repo.location, e), 
level=logging.WARNING, noiselevel=-1)
+                               return self.repo.location
+                       finally:
+                               try:
+                                       os.unlink(hardlink)
+                               except OSError:
+                                       pass
+
+               return os.path.join(self.repo.location, 
'.tmp-unverified-download-quarantine')
+
+       def _commit_download(self, download_dir):
+               '''
+               Commit changes from download_dir if it does not refer to the
+               normal repository location.
+               '''
+               exitcode = 0
+               if self.repo.location != download_dir:
+                       rsynccommand = [self.bin_command] + self.rsync_opts + 
self.extra_rsync_opts
+                       rsynccommand.append('--exclude=/%s' % 
os.path.basename(download_dir))
+                       rsynccommand.append('%s/' % download_dir.rstrip('/'))
+                       rsynccommand.append('%s/' % self.repo.location)
+                       exitcode = subprocess.call(rsynccommand)
+                       if exitcode == 0:
+                               exitcode = self._remove_download(download_dir)
+
+               return exitcode
+
+       def _remove_download(self, download_dir):
+               """
+               Remove download_dir if it does not refer to the normal 
repository
+               location.
+               """
+               exitcode = 0
+               if self.repo.location != download_dir:
+                       exitcode = subprocess.call(['rm', '-rf', download_dir])
+               return exitcode
 
        def update(self):
                '''Internal update function which performs the transfer'''
@@ -97,6 +160,9 @@ class RsyncSync(NewBase):
                        self.extra_rsync_opts.extend(portage.util.shlex_split(
                                
self.repo.module_specific_options['sync-rsync-extra-opts']))
 
+               download_dir = self._select_download_dir()
+               exitcode = 0
+
                # Process GLEP74 verification options.
                # Default verification to 'no'; it's enabled for ::gentoo
                # via default repos.conf though.
@@ -219,8 +285,10 @@ class RsyncSync(NewBase):
                                self.proto = "file"
                                dosyncuri = syncuri[7:]
                                unchanged, is_synced, exitcode, updatecache_flg 
= self._do_rsync(
-                                       dosyncuri, timestamp, opts)
+                                       dosyncuri, timestamp, opts, 
download_dir)
                                self._process_exitcode(exitcode, dosyncuri, 
out, 1)
+                               if exitcode == 0 and not unchanged:
+                                       self._commit_download(download_dir)
                                return (exitcode, updatecache_flg)
 
                        retries=0
@@ -352,7 +420,7 @@ class RsyncSync(NewBase):
                                        dosyncuri = dosyncuri[6:].replace('/', 
':/', 1)
 
                                unchanged, is_synced, exitcode, updatecache_flg 
= self._do_rsync(
-                                       dosyncuri, timestamp, opts)
+                                       dosyncuri, timestamp, opts, 
download_dir)
                                if not unchanged:
                                        local_state_unchanged = False
                                if is_synced:
@@ -369,6 +437,12 @@ class RsyncSync(NewBase):
                                        break
                        self._process_exitcode(exitcode, dosyncuri, out, 
maxretries)
 
+                       if local_state_unchanged:
+                               # The quarantine download_dir is not intended 
to exist
+                               # in this case, so refer gemato to the normal 
repository
+                               # location.
+                               download_dir = self.repo.location
+
                        # if synced successfully, verify now
                        if exitcode == 0 and self.verify_metamanifest:
                                if gemato is None:
@@ -380,7 +454,7 @@ class RsyncSync(NewBase):
                                                # we always verify the Manifest 
signature, in case
                                                # we had to deal with key 
revocation case
                                                m = 
gemato.recursiveloader.ManifestRecursiveLoader(
-                                                               
os.path.join(self.repo.location, 'Manifest'),
+                                                               
os.path.join(download_dir, 'Manifest'),
                                                                
verify_openpgp=True,
                                                                
openpgp_env=openpgp_env,
                                                                
max_jobs=self.verify_jobs)
@@ -411,7 +485,7 @@ class RsyncSync(NewBase):
                                                # if nothing has changed, skip 
the actual Manifest
                                                # verification
                                                if not local_state_unchanged:
-                                                       out.ebegin('Verifying 
%s' % (self.repo.location,))
+                                                       out.ebegin('Verifying 
%s' % (download_dir,))
                                                        
m.assert_directory_verifies()
                                                        out.eend(0)
                                        except GematoException as e:
@@ -420,12 +494,16 @@ class RsyncSync(NewBase):
                                                                
level=logging.ERROR, noiselevel=-1)
                                                exitcode = 1
 
+                       if exitcode == 0 and not local_state_unchanged:
+                               exitcode = self._commit_download(download_dir)
+
                        return (exitcode, updatecache_flg)
                finally:
+                       if exitcode == 0:
+                               self._remove_download(download_dir)
                        if openpgp_env is not None:
                                openpgp_env.close()
 
-
        def _process_exitcode(self, exitcode, syncuri, out, maxretries):
                if (exitcode==0):
                        pass
@@ -561,7 +639,7 @@ class RsyncSync(NewBase):
                return rsync_opts
 
 
-       def _do_rsync(self, syncuri, timestamp, opts):
+       def _do_rsync(self, syncuri, timestamp, opts, download_dir):
                updatecache_flg = False
                is_synced = False
                if timestamp != 0 and "--quiet" not in opts:
@@ -686,6 +764,12 @@ class RsyncSync(NewBase):
                        elif (servertimestamp == 0) or (servertimestamp > 
timestamp):
                                # actual sync
                                command = rsynccommand[:]
+
+                               if self.repo.location != download_dir:
+                                       # Use shared hardlinks for files that 
are identical
+                                       # in the previous snapshot of the 
repository.
+                                       command.append('--link-dest=%s' % 
self.repo.location)
+
                                submodule_paths = self._get_submodule_paths()
                                if submodule_paths:
                                        # The only way to select multiple 
directories to
@@ -696,9 +780,10 @@ class RsyncSync(NewBase):
                                                # /./ is special syntax 
supported with the
                                                # rsync --relative option.
                                                command.append(syncuri + "/./" 
+ path)
-                                       command.append(self.repo.location)
                                else:
-                                       command.extend([syncuri + "/", 
self.repo.location])
+                                       command.append(syncuri + "/")
+
+                               command.append(download_dir)
 
                                exitcode = None
                                try:
-- 
2.13.6


Reply via email to