Change cache modules to write md5 in cache entries, instead of mtime.
Since portage-2.2.27, the relevant cache modules have had the ability
to read cache entries containing either md5 or mtime, therefore this
change is backward-compatible with portage-2.2.27 and later.
Also fix the reconstruct_eclasses function to raise CacheCorruption
when the specified chf_type is md5 and the cache entry contains mtime
data, and optimize __getitem__ to skip reconstruct_eclasses calls when
the entry appears to have a different chf_type.
X-Gentoo-Bug: 568934
X-Gentoo-Bug-url: https://bugs.gentoo.org/show_bug.cgi?id=568934
---
[PATCH v3] fixes the __getitem__ optimization to ensure that
CacheCorruption is raised if a cache entry does not contain a
recognized chf_type
pym/portage/cache/anydbm.py| 4 ++--
pym/portage/cache/flat_hash.py | 4 ++--
pym/portage/cache/sqlite.py| 4 ++--
pym/portage/cache/template.py | 26 ++
4 files changed, 28 insertions(+), 10 deletions(-)
diff --git a/pym/portage/cache/anydbm.py b/pym/portage/cache/anydbm.py
index 80d24e5..88d85b0 100644
--- a/pym/portage/cache/anydbm.py
+++ b/pym/portage/cache/anydbm.py
@@ -36,8 +36,8 @@ from portage.cache import cache_errors
class database(fs_template.FsBased):
- validation_chf = 'mtime'
- chf_types = ('mtime', 'md5')
+ validation_chf = 'md5'
+ chf_types = ('md5', 'mtime')
autocommits = True
cleanse_keys = True
diff --git a/pym/portage/cache/flat_hash.py b/pym/portage/cache/flat_hash.py
index cca0f10..3a899c0 100644
--- a/pym/portage/cache/flat_hash.py
+++ b/pym/portage/cache/flat_hash.py
@@ -163,5 +163,5 @@ class md5_database(database):
class mtime_md5_database(database):
- validation_chf = 'mtime'
- chf_types = ('mtime', 'md5')
+ validation_chf = 'md5'
+ chf_types = ('md5', 'mtime')
diff --git a/pym/portage/cache/sqlite.py b/pym/portage/cache/sqlite.py
index 32e4076..69150f6 100644
--- a/pym/portage/cache/sqlite.py
+++ b/pym/portage/cache/sqlite.py
@@ -18,8 +18,8 @@ if sys.hexversion >= 0x300:
class database(fs_template.FsBased):
- validation_chf = 'mtime'
- chf_types = ('mtime', 'md5')
+ validation_chf = 'md5'
+ chf_types = ('md5', 'mtime')
autocommits = False
synchronous = False
diff --git a/pym/portage/cache/template.py b/pym/portage/cache/template.py
index a7c6de0..d292eed 100644
--- a/pym/portage/cache/template.py
+++ b/pym/portage/cache/template.py
@@ -54,6 +54,10 @@ class database(object):
if self.serialize_eclasses and "_eclasses_" in d:
for chf_type in chf_types:
+ if '_%s_' % chf_type not in d:
+ # Skip the reconstruct_eclasses call,
since this
+ # entry appears to have a different
chf_type.
+ continue
try:
d["_eclasses_"] =
reconstruct_eclasses(cpv, d["_eclasses_"],
chf_type,
paths=self.store_eclass_paths)
@@ -62,6 +66,9 @@ class database(object):
raise
else:
break
+ else:
+ raise cache_errors.CacheCorruption(cpv,
+ 'entry does not contain a recognized
chf_type')
elif "_eclasses_" not in d:
d["_eclasses_"] = {}
@@ -310,6 +317,18 @@ def serialize_eclasses(eclass_dict, chf_type='mtime',
paths=True):
for k, v in sorted(eclass_dict.items(), key=_keysorter))
+def _md5_deserializer(md5):
+ if len(md5) != 32:
+ raise ValueError('expected 32 hex digits')
+ return md5
+
+
+_chf_deserializers = {
+ 'md5': _md5_deserializer,
+ 'mtime': long,
+}
+
+
def reconstruct_eclasses(cpv, eclass_string, chf_type='mtime', paths=True):
"""returns a dict when handed a string generated by
serialize_eclasses"""
eclasses = eclass_string.rstrip().lstrip().split("\t")
@@ -317,9 +336,7 @@ def reconstruct_eclasses(cpv, eclass_string,
chf_type='mtime', paths=True):
# occasionally this occurs in the fs backends. they suck.
return {}
- converter = _unicode
- if chf_type == 'mtime':
- converter = long
+ converter = _chf_deserializers.get(chf_type, lambda x: x)
if paths:
if len(eclasses) % 3 != 0:
@@ -340,6 +357,7 @@ def reconstruct_eclasses(cpv, eclass_string,
chf_type='mtime', paths=True):
raise cache_errors.CacheCorruption(cpv,
"_eclasses_ was of invalid len %i" % len(eclasses))
except ValueError:
- raise cache_errors.CacheCorruption