Re: [gentoo-portage-dev] [PATCH 3/4] rsync: split out repo storage framework

2018-08-09 Thread Brian Dolbec
On Mon,  6 Aug 2018 00:40:32 -0700
Zac Medico  wrote:

> Since there aremany ways to manage repository storage, split out a
> repo storage framework. The HardlinkQuarantineRepoStorage class
> implements the existing default behavior, and the InplaceRepoStorage
> class implements the legacy behavior (when sync-allow-hardlinks is
> disabled in repos.conf).
> 
> Each class implements RepoStorageInterface, which uses coroutine
> methods since coroutines are well-suited to the I/O bound tasks that
> these methods perform. The _sync_decorator is used to convert
> coroutine methods to synchronous methods, for smooth integration into
> the surrounding synchronous code.
> 
> Bug: https://bugs.gentoo.org/662070
>

missing space in first line of commit message
s/aremany/are many

 ---



[gentoo-portage-dev] [PATCH 3/4] rsync: split out repo storage framework

2018-08-06 Thread Zac Medico
Since there aremany ways to manage repository storage, split out a repo
storage framework. The HardlinkQuarantineRepoStorage class implements
the existing default behavior, and the InplaceRepoStorage class
implements the legacy behavior (when sync-allow-hardlinks is disabled in
repos.conf).

Each class implements RepoStorageInterface, which uses coroutine methods
since coroutines are well-suited to the I/O bound tasks that these
methods perform. The _sync_decorator is used to convert coroutine
methods to synchronous methods, for smooth integration into the
surrounding synchronous code.

Bug: https://bugs.gentoo.org/662070
---
 lib/portage/repository/storage/__init__.py |  0
 .../repository/storage/hardlink_quarantine.py  | 95 ++
 lib/portage/repository/storage/inplace.py  | 49 +++
 lib/portage/repository/storage/interface.py| 87 
 lib/portage/sync/controller.py |  1 +
 lib/portage/sync/modules/rsync/rsync.py| 85 +--
 lib/portage/sync/syncbase.py   | 31 +++
 7 files changed, 284 insertions(+), 64 deletions(-)
 create mode 100644 lib/portage/repository/storage/__init__.py
 create mode 100644 lib/portage/repository/storage/hardlink_quarantine.py
 create mode 100644 lib/portage/repository/storage/inplace.py
 create mode 100644 lib/portage/repository/storage/interface.py

diff --git a/lib/portage/repository/storage/__init__.py 
b/lib/portage/repository/storage/__init__.py
new file mode 100644
index 0..e69de29bb
diff --git a/lib/portage/repository/storage/hardlink_quarantine.py 
b/lib/portage/repository/storage/hardlink_quarantine.py
new file mode 100644
index 0..7e9cf4493
--- /dev/null
+++ b/lib/portage/repository/storage/hardlink_quarantine.py
@@ -0,0 +1,95 @@
+# Copyright 2018 Gentoo Foundation
+# Distributed under the terms of the GNU General Public License v2
+
+from portage import os
+from portage.repository.storage.interface import (
+   RepoStorageException,
+   RepoStorageInterface,
+)
+from portage.util.futures import asyncio
+from portage.util.futures.compat_coroutine import (
+   coroutine,
+   coroutine_return,
+)
+
+from _emerge.SpawnProcess import SpawnProcess
+
+
+class HardlinkQuarantineRepoStorage(RepoStorageInterface):
+   """
+   This is the default storage module, since its quite compatible with
+   most configurations.
+
+   It's desirable to be able to create shared hardlinks between the
+   download directory and the normal repository, and this is facilitated
+   by making the download directory be a subdirectory of the normal
+   repository location (ensuring that no mountpoints are crossed).
+   Shared hardlinks are created by using the rsync --link-dest option.
+
+   Since the download is initially unverified, it is safest to save
+   it in a quarantine directory. The quarantine directory is also
+   useful for making the repository update more atomic, so that it
+   less likely that normal repository location will be observed in
+   a partially synced state.
+   """
+   def __init__(self, repo, spawn_kwargs):
+   self._user_location = repo.location
+   self._update_location = None
+   self._spawn_kwargs = spawn_kwargs
+   self._current_update = None
+
+   @coroutine
+   def _check_call(self, cmd):
+   """
+   Run cmd and raise RepoStorageException on failure.
+
+   @param cmd: command to executre
+   @type cmd: list
+   """
+   p = SpawnProcess(args=cmd, scheduler=asyncio._wrap_loop(), 
**self._spawn_kwargs)
+   p.start()
+   if (yield p.async_wait()) != os.EX_OK:
+   raise RepoStorageException('command exited with status 
{}: {}'.\
+   format(p.returncode, ' '.join(cmd)))
+
+   @coroutine
+   def init_update(self):
+   update_location = os.path.join(self._user_location, 
'.tmp-unverified-download-quarantine')
+   yield self._check_call(['rm', '-rf', update_location])
+
+   # Use  rsync --link-dest to hardlink a files into 
self._update_location,
+   # since cp -l is not portable.
+   yield self._check_call(['rsync', '-a', '--link-dest', 
self._user_location,
+   '--exclude', 
'/{}'.format(os.path.basename(update_location)),
+   self._user_location + '/', update_location + '/'])
+
+   self._update_location = update_location
+
+   coroutine_return(self._update_location)
+
+   @property
+   def current_update(self):
+   if self._update_location is None:
+   raise RepoStorageException('current update does not 
exist')
+   return self._update_location
+
+   @coroutine
+