commit: cea075b477a201098cfa5296146c209faa2b8d0b Author: Matt Turner <mattst88 <AT> gentoo <DOT> org> AuthorDate: Fri Apr 17 18:03:02 2020 +0000 Commit: Matt Turner <mattst88 <AT> gentoo <DOT> org> CommitDate: Sun May 17 03:47:02 2020 +0000 URL: https://gitweb.gentoo.org/proj/catalyst.git/commit/?id=cea075b4
catalyst: gzip the .CONTENTS file Other algorithms give better compression ratios, but the difference is not meaningful for a 2MiB text file. In my testing bzip2 gave a better compression ratio of 15:1 vs gzip's 11:1, but that ends up being only a size difference of 50KiB (136 vs 187) which is only an additional 2.5% savings from the uncompressed input. Choose gzip because transparent decompression is widely supported by web servers and clients. Closes: https://bugs.gentoo.org/630284 Signed-off-by: Matt Turner <mattst88 <AT> gentoo.org> (cherry picked from commit bb21b8615e64cb31fa9aa9d533ef328dc1374e45) catalyst/base/genbase.py | 7 ++++--- catalyst/support.py | 4 +++- doc/HOWTO.txt | 6 +++--- 3 files changed, 10 insertions(+), 7 deletions(-) diff --git a/catalyst/base/genbase.py b/catalyst/base/genbase.py index 8af3b97a..0fc1f57e 100644 --- a/catalyst/base/genbase.py +++ b/catalyst/base/genbase.py @@ -1,6 +1,7 @@ import io import os +import gzip class GenBase(object): @@ -12,13 +13,13 @@ class GenBase(object): def gen_contents_file(self, path): - contents = path + ".CONTENTS" + contents = path + ".CONTENTS.gz" if os.path.exists(contents): os.remove(contents) if "contents" in self.settings: contents_map = self.settings["contents_map"] if os.path.exists(path): - with io.open(contents, "w", encoding='utf-8') as myf: + with gzip.open(contents, "w", encoding='utf-8') as myf: keys={} for i in self.settings["contents"].split(): keys[i]=1 @@ -41,7 +42,7 @@ class GenBase(object): for i in self.settings["digests"].split(): keys[i]=1 array = sorted(keys.keys()) - for f in [path, path + '.CONTENTS']: + for f in [path, path + '.CONTENTS.gz']: if os.path.exists(f): if "all" in array: for k in list(hash_map.hash_map): diff --git a/catalyst/support.py b/catalyst/support.py index 9cc5d751..7743b142 100644 --- a/catalyst/support.py +++ b/catalyst/support.py @@ -64,7 +64,9 @@ def file_check(filepath, extensions=None, strict=True): # so check if there are files of that name with an extension files = glob.glob("%s.*" % filepath) # remove any false positive files - files = [x for x in files if not x.endswith(".CONTENTS") and not x.endswith(".DIGESTS")] + files = [x for x in files if not x.endswith(".CONTENTS") and not + x.endswith(".CONTENTS.gz") and not x.endswith(".DIGESTS")] + if len(files) is 1: return files[0] elif len(files) > 1 and strict: diff --git a/doc/HOWTO.txt b/doc/HOWTO.txt index b1d315e6..b9ae1303 100644 --- a/doc/HOWTO.txt +++ b/doc/HOWTO.txt @@ -22,7 +22,7 @@ Create a snapshot of your current Portage tree (you may want to # catalyst --snapshot 20130131 # ls /var/tmp/catalyst/snapshots/ portage-20130131.tar.bz2 - portage-20130131.tar.bz2.CONTENTS + portage-20130131.tar.bz2.CONTENTS.gz portage-20130131.tar.bz2.DIGESTS where the storage location is relative to the default @@ -44,7 +44,7 @@ For example, Grab the tarball and put it where catalyst will find it: # wget http://…/stage3-amd64-20121213.tar.bz2 - # wget http://…/stage3-amd64-20121213.tar.bz2.CONTENTS + # wget http://…/stage3-amd64-20121213.tar.bz2.CONTENTS.gz # wget http://…/stage3-amd64-20121213.tar.bz2.DIGESTS.asc # sha512sum -c stage3-amd64-20121213.tar.bz2.DIGESTS.asc # gpg --verify stage3-amd64-20121213.tar.bz2.DIGESTS.asc @@ -89,7 +89,7 @@ which will build the target and install something like: # ls /var/tmp/catalyst/builds/default/stage1-amd64-2013.1.* /var/tmp/catalyst/builds/default/stage1-amd64-2013.1.tar.bz2 - /var/tmp/catalyst/builds/default/stage1-amd64-2013.1.tar.bz2.CONTENTS + /var/tmp/catalyst/builds/default/stage1-amd64-2013.1.tar.bz2.CONTENTS.gz /var/tmp/catalyst/builds/default/stage1-amd64-2013.1.tar.bz2.DIGESTS The name is an expansion of