[gentoo-commits] proj/portage:master commit in: lib/portage/sync/modules/zipfile/, lib/portage/sync/modules/

2024-06-04 Thread Zac Medico
commit: 8bb7aecf7e5c922911192d0df63853c5c75d9f8a
Author: Alexey Gladkov  kernel  org>
AuthorDate: Tue Jun  4 15:31:06 2024 +
Commit: Zac Medico  gentoo  org>
CommitDate: Tue Jun  4 15:40:25 2024 +
URL:https://gitweb.gentoo.org/proj/portage.git/commit/?id=8bb7aecf

sync/zipfile: Install zipfile sync method

Add files that were accidentally forgotten when adding zipfile sync
method.

Fixes: 80445d9b0 ("sync: Add method to download zip archives")
Signed-off-by: Alexey Gladkov  kernel.org>
Closes: https://github.com/gentoo/portage/pull/1340
Signed-off-by: Zac Medico  gentoo.org>

 lib/portage/sync/modules/meson.build | 1 +
 lib/portage/sync/modules/zipfile/meson.build | 8 
 2 files changed, 9 insertions(+)

diff --git a/lib/portage/sync/modules/meson.build 
b/lib/portage/sync/modules/meson.build
index fab2878e92..ba0b6f278b 100644
--- a/lib/portage/sync/modules/meson.build
+++ b/lib/portage/sync/modules/meson.build
@@ -12,3 +12,4 @@ subdir('mercurial')
 subdir('rsync')
 subdir('svn')
 subdir('webrsync')
+subdir('zipfile')

diff --git a/lib/portage/sync/modules/zipfile/meson.build 
b/lib/portage/sync/modules/zipfile/meson.build
new file mode 100644
index 00..46006aea7e
--- /dev/null
+++ b/lib/portage/sync/modules/zipfile/meson.build
@@ -0,0 +1,8 @@
+py.install_sources(
+[
+'zipfile.py',
+'__init__.py',
+],
+subdir : 'portage/sync/modules/zipfile',
+pure : not native_extensions
+)



[gentoo-commits] proj/portage:master commit in: lib/portage/sync/modules/zipfile/

2024-04-27 Thread Sam James
commit: 8c6e5d06afbf6fca1893cff5ed777e44f93b7a5d
Author: Alexey Gladkov  kernel  org>
AuthorDate: Sun Mar  3 16:41:08 2024 +
Commit: Sam James  gentoo  org>
CommitDate: Sun Apr 28 00:04:08 2024 +
URL:https://gitweb.gentoo.org/proj/portage.git/commit/?id=8c6e5d06

sync/zipfile: Handle ETag header

Most services add an ETag header and determine whether the locally
cached version of the URL has expired. So we can add ETag processing to
avoid unnecessary downloading and unpacking of the zip archive.

Signed-off-by: Alexey Gladkov  kernel.org>
Signed-off-by: Sam James  gentoo.org>

 lib/portage/sync/modules/zipfile/zipfile.py | 36 +++--
 1 file changed, 29 insertions(+), 7 deletions(-)

diff --git a/lib/portage/sync/modules/zipfile/zipfile.py 
b/lib/portage/sync/modules/zipfile/zipfile.py
index 1762d2c8f1..bb78b39243 100644
--- a/lib/portage/sync/modules/zipfile/zipfile.py
+++ b/lib/portage/sync/modules/zipfile/zipfile.py
@@ -10,7 +10,7 @@ import tempfile
 import urllib.request
 
 import portage
-from portage.util import writemsg_level
+from portage.util import writemsg_level, writemsg_stdout
 from portage.sync.syncbase import SyncBase
 
 
@@ -31,13 +31,31 @@ class ZipFile(SyncBase):
 if kwargs:
 self._kwargs(kwargs)
 
-# initial checkout
-zip_uri = self.repo.sync_uri
+req = urllib.request.Request(url=self.repo.sync_uri)
 
-with urllib.request.urlopen(zip_uri) as response:
-with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
-shutil.copyfileobj(response, tmp_file)
-zip_file = tmp_file.name
+info = portage.grabdict(os.path.join(self.repo.location, ".info"))
+if "etag" in info:
+req.add_header("If-None-Match", info["etag"][0])
+
+try:
+with urllib.request.urlopen(req) as response:
+with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
+shutil.copyfileobj(response, tmp_file)
+
+zip_file = tmp_file.name
+etag = response.headers.get("etag")
+
+except urllib.error.HTTPError as resp:
+if resp.code == 304:
+writemsg_stdout(">>> The repository has not changed.\n", 
noiselevel=-1)
+return (os.EX_OK, False)
+
+writemsg_level(
+f"!!! Unable to obtain zip archive: {resp}\n",
+noiselevel=-1,
+level=logging.ERROR,
+)
+return (1, False)
 
 if not zipfile.is_zipfile(zip_file):
 msg = "!!! file is not a zip archive."
@@ -77,6 +95,10 @@ class ZipFile(SyncBase):
 with open(dstpath, "wb") as dstfile:
 shutil.copyfileobj(srcfile, dstfile)
 
+with open(os.path.join(self.repo.location, ".info"), "w") as infofile:
+if etag:
+infofile.write(f"etag {etag}\n")
+
 os.unlink(zip_file)
 
 return (os.EX_OK, True)



[gentoo-commits] proj/portage:master commit in: lib/portage/sync/modules/zipfile/

2024-04-27 Thread Sam James
commit: 7e93192fda22594b9e9d223c54a39b4bad0554f9
Author: Alexey Gladkov  kernel  org>
AuthorDate: Mon Mar 11 00:25:07 2024 +
Commit: Sam James  gentoo  org>
CommitDate: Sun Apr 28 00:04:08 2024 +
URL:https://gitweb.gentoo.org/proj/portage.git/commit/?id=7e93192f

sync/zipfile: Add retrieve_head to return archive checksum

Since we have an ETag, we can return the checksum of the archive. It
will be a replacement for the head commit of the repository.

Suggested-by: Zac Medico  gentoo.org>
Signed-off-by: Alexey Gladkov  kernel.org>
Signed-off-by: Sam James  gentoo.org>

 lib/portage/sync/modules/zipfile/__init__.py | 3 ++-
 lib/portage/sync/modules/zipfile/zipfile.py  | 9 +
 2 files changed, 11 insertions(+), 1 deletion(-)

diff --git a/lib/portage/sync/modules/zipfile/__init__.py 
b/lib/portage/sync/modules/zipfile/__init__.py
index 19fe3af412..e44833088c 100644
--- a/lib/portage/sync/modules/zipfile/__init__.py
+++ b/lib/portage/sync/modules/zipfile/__init__.py
@@ -21,10 +21,11 @@ module_spec = {
 "sourcefile": "zipfile",
 "class": "ZipFile",
 "description": doc,
-"functions": ["sync"],
+"functions": ["sync", "retrieve_head"],
 "func_desc": {
 "sync": "Performs an archived http download of the "
 + "repository, then unpacks it.",
+"retrieve_head": "Returns the checksum of the unpacked 
archive.",
 },
 "validate_config": CheckSyncConfig,
 "module_specific_options": (),

diff --git a/lib/portage/sync/modules/zipfile/zipfile.py 
b/lib/portage/sync/modules/zipfile/zipfile.py
index bb78b39243..3cd210a64b 100644
--- a/lib/portage/sync/modules/zipfile/zipfile.py
+++ b/lib/portage/sync/modules/zipfile/zipfile.py
@@ -26,6 +26,15 @@ class ZipFile(SyncBase):
 def __init__(self):
 SyncBase.__init__(self, "emerge", ">=sys-apps/portage-2.3")
 
+def retrieve_head(self, **kwargs):
+"""Get information about the checksum of the unpacked archive"""
+if kwargs:
+self._kwargs(kwargs)
+info = portage.grabdict(os.path.join(self.repo.location, ".info"))
+if "etag" in info:
+return (os.EX_OK, info["etag"][0])
+return (1, False)
+
 def sync(self, **kwargs):
 """Sync the repository"""
 if kwargs:



[gentoo-commits] proj/portage:master commit in: lib/portage/sync/modules/zipfile/

2024-04-27 Thread Sam James
commit: ced2e6d4f4ac95b8e17cf7dae964a64037a85bf0
Author: Alexey Gladkov  kernel  org>
AuthorDate: Mon Mar 11 17:09:05 2024 +
Commit: Sam James  gentoo  org>
CommitDate: Sun Apr 28 00:04:09 2024 +
URL:https://gitweb.gentoo.org/proj/portage.git/commit/?id=ced2e6d4

sync/zipfile: Recycle files that have not changed

We can check whether the content of files from the archive differs from
the current revision. This will give us several advantages:

* This will give us some meaning to the mtime of files, since it will
prevent the timestamps of unmodified files from being changed.

* This will also get rid of recreatiing self.repo.location, which will
allow sync with FEATURES=usersync because self.repo.location is reused.

Suggested-by: Zac Medico  gentoo.org>
Signed-off-by: Alexey Gladkov  kernel.org>
Signed-off-by: Sam James  gentoo.org>

 lib/portage/sync/modules/zipfile/zipfile.py | 32 -
 1 file changed, 31 insertions(+), 1 deletion(-)

diff --git a/lib/portage/sync/modules/zipfile/zipfile.py 
b/lib/portage/sync/modules/zipfile/zipfile.py
index 3cd210a64b..edfb5aa681 100644
--- a/lib/portage/sync/modules/zipfile/zipfile.py
+++ b/lib/portage/sync/modules/zipfile/zipfile.py
@@ -35,6 +35,16 @@ class ZipFile(SyncBase):
 return (os.EX_OK, info["etag"][0])
 return (1, False)
 
+def _do_cmp(self, f1, f2):
+bufsize = 8 * 1024
+while True:
+b1 = f1.read(bufsize)
+b2 = f2.read(bufsize)
+if b1 != b2:
+return False
+if not b1:
+return True
+
 def sync(self, **kwargs):
 """Sync the repository"""
 if kwargs:
@@ -76,7 +86,15 @@ class ZipFile(SyncBase):
 return (1, False)
 
 # Drop previous tree
-shutil.rmtree(self.repo.location)
+tempdir = tempfile.mkdtemp(prefix=".temp", dir=self.repo.location)
+tmpname = os.path.basename(tempdir)
+
+for name in os.listdir(self.repo.location):
+if name != tmpname:
+os.rename(
+os.path.join(self.repo.location, name),
+os.path.join(tempdir, name),
+)
 
 with zipfile.ZipFile(zip_file) as archive:
 strip_comp = 0
@@ -101,9 +119,21 @@ class ZipFile(SyncBase):
 continue
 
 with archive.open(n) as srcfile:
+prvpath = os.path.join(tempdir, *parts[strip_comp:])
+
+if os.path.exists(prvpath):
+with open(prvpath, "rb") as prvfile:
+if self._do_cmp(prvfile, srcfile):
+os.rename(prvpath, dstpath)
+continue
+srcfile.seek(0)
+
 with open(dstpath, "wb") as dstfile:
 shutil.copyfileobj(srcfile, dstfile)
 
+# Drop previous tree
+shutil.rmtree(tempdir)
+
 with open(os.path.join(self.repo.location, ".info"), "w") as infofile:
 if etag:
 infofile.write(f"etag {etag}\n")



[gentoo-commits] proj/portage:master commit in: lib/portage/sync/modules/zipfile/

2024-04-27 Thread Sam James
commit: 80445d9b00bfcd1eb4955cf3ecb397b4c02663ba
Author: Alexey Gladkov  kernel  org>
AuthorDate: Mon Feb 12 13:59:40 2024 +
Commit: Sam James  gentoo  org>
CommitDate: Sun Apr 28 00:04:07 2024 +
URL:https://gitweb.gentoo.org/proj/portage.git/commit/?id=80445d9b

sync: Add method to download zip archives

Add a simple method for synchronizing repository as a snapshot in a zip
archive. The implementation does not require external utilities to
download and unpack archive. This makes the method very cheap.

The main usecase being considered is obtaining snapshots of github
repositories, but many other web interfaces for git also support
receiving snapshots in zip format.

For example, to get a snapshot of the master branch:

  https://github.com/gentoo/portage/archive/refs/heads/master.zip
  https://gitweb.gentoo.org/proj/portage.git/snapshot/portage-master.zip

or a link to a snapshot of the tag:

  https://github.com/gentoo/portage/archive/refs/tags/portage-3.0.61.zip

Signed-off-by: Alexey Gladkov  kernel.org>
Signed-off-by: Sam James  gentoo.org>

 lib/portage/sync/modules/zipfile/__init__.py | 33 +++
 lib/portage/sync/modules/zipfile/zipfile.py  | 82 
 2 files changed, 115 insertions(+)

diff --git a/lib/portage/sync/modules/zipfile/__init__.py 
b/lib/portage/sync/modules/zipfile/__init__.py
new file mode 100644
index 00..19fe3af412
--- /dev/null
+++ b/lib/portage/sync/modules/zipfile/__init__.py
@@ -0,0 +1,33 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2024  Alexey Gladkov 
+
+doc = """Zipfile plug-in module for portage.
+Performs a http download of a portage snapshot and unpacks it to the repo
+location."""
+__doc__ = doc[:]
+
+
+import os
+
+from portage.sync.config_checks import CheckSyncConfig
+
+
+module_spec = {
+"name": "zipfile",
+"description": doc,
+"provides": {
+"zipfile-module": {
+"name": "zipfile",
+"sourcefile": "zipfile",
+"class": "ZipFile",
+"description": doc,
+"functions": ["sync"],
+"func_desc": {
+"sync": "Performs an archived http download of the "
++ "repository, then unpacks it.",
+},
+"validate_config": CheckSyncConfig,
+"module_specific_options": (),
+},
+},
+}

diff --git a/lib/portage/sync/modules/zipfile/zipfile.py 
b/lib/portage/sync/modules/zipfile/zipfile.py
new file mode 100644
index 00..1762d2c8f1
--- /dev/null
+++ b/lib/portage/sync/modules/zipfile/zipfile.py
@@ -0,0 +1,82 @@
+# SPDX-License-Identifier: GPL-2.0-or-later
+# Copyright (C) 2024  Alexey Gladkov 
+
+import os
+import os.path
+import logging
+import zipfile
+import shutil
+import tempfile
+import urllib.request
+
+import portage
+from portage.util import writemsg_level
+from portage.sync.syncbase import SyncBase
+
+
+class ZipFile(SyncBase):
+"""ZipFile sync module"""
+
+short_desc = "Perform sync operations on GitHub repositories"
+
+@staticmethod
+def name():
+return "ZipFile"
+
+def __init__(self):
+SyncBase.__init__(self, "emerge", ">=sys-apps/portage-2.3")
+
+def sync(self, **kwargs):
+"""Sync the repository"""
+if kwargs:
+self._kwargs(kwargs)
+
+# initial checkout
+zip_uri = self.repo.sync_uri
+
+with urllib.request.urlopen(zip_uri) as response:
+with tempfile.NamedTemporaryFile(delete=False) as tmp_file:
+shutil.copyfileobj(response, tmp_file)
+zip_file = tmp_file.name
+
+if not zipfile.is_zipfile(zip_file):
+msg = "!!! file is not a zip archive."
+self.logger(self.xterm_titles, msg)
+writemsg_level(msg + "\n", noiselevel=-1, level=logging.ERROR)
+
+os.unlink(zip_file)
+
+return (1, False)
+
+# Drop previous tree
+shutil.rmtree(self.repo.location)
+
+with zipfile.ZipFile(zip_file) as archive:
+strip_comp = 0
+
+for f in archive.namelist():
+f = os.path.normpath(f)
+if os.path.basename(f) == "profiles":
+strip_comp = f.count("/")
+break
+
+for n in archive.infolist():
+p = os.path.normpath(n.filename)
+
+if os.path.isabs(p):
+continue
+
+parts = p.split("/")
+dstpath = os.path.join(self.repo.location, *parts[strip_comp:])
+
+if n.is_dir():
+os.makedirs(dstpath, mode=0o755, exist_ok=True)
+continue
+
+with archive.open(n) as srcfile:
+with open(dstpath, "wb") as dstfile:
+shutil.copyfileobj(srcfile, dstfile)
+
+os.unlink(zip_file)
+
+return (os.EX_OK, True)