Re: [gentoo-portage-dev] [PATCH v2] flat_hash: enable md5 validation for /var/cache/edb/dep (bug 568934)

2015-12-22 Thread Alexander Berntsen
-BEGIN PGP SIGNED MESSAGE-
Hash: SHA512

Great! Thanks. Go ahead & push.
- -- 
Alexander
berna...@gentoo.org
https://secure.plaimi.net/~alexander
-BEGIN PGP SIGNATURE-
Version: GnuPG v2

iQIcBAEBCgAGBQJWeYZDAAoJENQqWdRUGk8BWy4P/jM8su6UwETWM3WeYYLQ6SwW
+hlX8BBpeEGGykktJbeczOQTLP2b3fo6R5tyZx4EWgLPXE5XrZwhvQfmw1Xt0/1T
0/mqkIjsE71clZO8BNN98MdGXye5ryVmX5eVd6lJBWKCRwHkZI9cFTbk4UUCRC4x
cxfh3giqZCiYIKUfvsKRL0R4Mja1QYKm22Fgrn1u/5/XY3TdU08OJ6xY7nVdBsBz
WTRbCZ8VggoVNkLt1WbDGk7VxI3e7t64qjz6rSTh1KK035eDPfL4iaVMf0EZHWTW
9dZ81aZ2PZvw6OgKTs1xWc7KOct0I81fa7iuPueDgVrxVjhEJEAEczAVBcU/VbW4
sFc0NB0rz6KsDXET9A++QXuOtaWdKMoV6XXhEpAFWRTyG0cZOD2mzzu2NuznhrrJ
LZJZ1fn370eNhx6+8zCj1rZOfCGAZy4ImSkwZnV9iLZmajPAYyci4d6crZw+yx41
g/RzW+Luk0cjvmoWV28dXpfJSBunqU7EATee5IZXU3QpTbTKEuky4K4yVQnzr0p1
pMud9KnNQLzYChHRhXOGECkcD7KgX8ugMcXF+waR2iDYLbusDCsa886qETBHaM9p
3FzTd4kHg+4zUfN8+ccsKNL25IRUIm8pkqZKZc7F0CtL0pjaor28nsCPajSPCsHP
i8TiHbBaOywtdTAwUOGg
=dfFa
-END PGP SIGNATURE-



[gentoo-portage-dev] [PATCH v2] flat_hash: enable md5 validation for /var/cache/edb/dep (bug 568934)

2015-12-22 Thread Zac Medico
Since operations like `git reset --hard` (useful to implement shallow
pull) will reset timestamps of all files in the tree, the status quo
of using timestamps for validation of cache in /var/cache/edb/dep
is sub-optimal.

For forward-compatibility, add a flat_hash.mtime_md5_database cache
module which is capable of validating cache entries containing either
mtimes or md5 digests. Update the config class to use this cache
module by default for /var/cache/edb/dep.

X-Gentoo-Bug: 568934
X-Gentoo-Bug-url: https://bugs.gentoo.org/show_bug.cgi?id=568934
---
[PATCH v2] only updates the commit message to mention the motivation for using
md5 instead of mtime.

 pym/portage/cache/flat_hash.py   |  5 +
 pym/portage/cache/template.py| 35 ++-
 pym/portage/package/ebuild/config.py |  6 +++---
 3 files changed, 38 insertions(+), 8 deletions(-)

diff --git a/pym/portage/cache/flat_hash.py b/pym/portage/cache/flat_hash.py
index 5304296..cca0f10 100644
--- a/pym/portage/cache/flat_hash.py
+++ b/pym/portage/cache/flat_hash.py
@@ -160,3 +160,8 @@ class md5_database(database):
 
validation_chf = 'md5'
store_eclass_paths = False
+
+
+class mtime_md5_database(database):
+   validation_chf = 'mtime'
+   chf_types = ('mtime', 'md5')
diff --git a/pym/portage/cache/template.py b/pym/portage/cache/template.py
index bc81b86..a942b36 100644
--- a/pym/portage/cache/template.py
+++ b/pym/portage/cache/template.py
@@ -47,8 +47,21 @@ class database(object):
self.updates = 0
d=self._getitem(cpv)
if self.serialize_eclasses and "_eclasses_" in d:
-   d["_eclasses_"] = reconstruct_eclasses(cpv, 
d["_eclasses_"],
-   self.validation_chf, 
paths=self.store_eclass_paths)
+   try:
+   chf_types = self.chf_types
+   except AttributeError:
+   chf_types = (self.validation_chf,)
+
+   for chf_type in chf_types:
+   try:
+   d["_eclasses_"] = 
reconstruct_eclasses(cpv, d["_eclasses_"],
+   chf_type, 
paths=self.store_eclass_paths)
+   except cache_errors.CacheCorruption:
+   if chf_type is chf_types[-1]:
+   raise
+   else:
+   break
+
elif "_eclasses_" not in d:
d["_eclasses_"] = {}
# Never return INHERITED, since portdbapi.aux_get() will
@@ -204,15 +217,27 @@ class database(object):
return x
 
def validate_entry(self, entry, ebuild_hash, eclass_db):
-   hash_key = '_%s_' % self.validation_chf
+   try:
+   chf_types = self.chf_types
+   except AttributeError:
+   chf_types = (self.validation_chf,)
+
+   for chf_type in chf_types:
+   if self._validate_entry(chf_type, entry, ebuild_hash, 
eclass_db):
+   return True
+
+   return False
+
+   def _validate_entry(self, chf_type, entry, ebuild_hash, eclass_db):
+   hash_key = '_%s_' % chf_type
try:
entry_hash = entry[hash_key]
except KeyError:
return False
else:
-   if entry_hash != getattr(ebuild_hash, 
self.validation_chf):
+   if entry_hash != getattr(ebuild_hash, chf_type):
return False
-   update = 
eclass_db.validate_and_rewrite_cache(entry['_eclasses_'], self.validation_chf,
+   update = 
eclass_db.validate_and_rewrite_cache(entry['_eclasses_'], chf_type,
self.store_eclass_paths)
if update is None:
return False
diff --git a/pym/portage/package/ebuild/config.py 
b/pym/portage/package/ebuild/config.py
index d45c2a0..0bae55b 100644
--- a/pym/portage/package/ebuild/config.py
+++ b/pym/portage/package/ebuild/config.py
@@ -160,8 +160,8 @@ class config(object):
'repository', 'RESTRICT', 'LICENSE',)
 
_module_aliases = {
-   "cache.metadata_overlay.database" : 
"portage.cache.flat_hash.database",
-   "portage.cache.metadata_overlay.database" : 
"portage.cache.flat_hash.database",
+   "cache.metadata_overlay.database" : 
"portage.cache.flat_hash.mtime_md5_database",
+   "portage.cache.metadata_overlay.database" : 
"portage.cache.flat_hash.mtime_md5_database",
}
 
_case_insensitive_vars = special_env_vars.case_insensitive_vars
@@ -444,7 +444,7 @@ class config(obj