On 07/29/2014 05:43 PM, Mike McLean wrote:
> These changes cause the hub to use generators for some potentially large
> queries internally. The point is to reduce memory usage.
> 
> In the case that a large query is returned via rpc, the marshaller has
> been extended to handle generators. The marshaller will still create a
> giant pile of xml, but we at least still save the memory for the
> original data.
> 
> I also have some work on an iterating marshaller that will not keep the
> entire xml return in memory, but that's a bit more complicated, so I'm
> going to leave that for another day.
> 
> There are other cases (e.g. repo_init) where we have very large queries
> internally. In these cases, this patch set will result in significant
> memory savings.

A few more related changes, additional logging and a bugfix.

Also, will probably drop the chunksize patch, doesn't seem to matter
when unless the db connection has a lot of lag.

These changes are working well for me and are likely to land in git soon.

>From 91c725a40dbbc99748a813f4134b5ff01908d8e9 Mon Sep 17 00:00:00 2001
From: Mike McLean <[email protected]>
Date: Tue, 29 Jul 2014 22:29:14 -0400
Subject: [PATCH 12/14] log details on unknown maven file errors

---
 hub/kojihub.py | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/hub/kojihub.py b/hub/kojihub.py
index 467a48d..65ed305 100644
--- a/hub/kojihub.py
+++ b/hub/kojihub.py
@@ -10470,6 +10470,13 @@ class HostExports(object):
                     pass
                 else:
                     if not ignore_unknown:
+                        logger.error("Unknown file for %(group_id)s:%(artifact_id)s:%(version)s", maven_info)
+                        if build_id:
+                            build = get_build(build_id)
+                            logger.error("g:a:v supplied by build %(nvr)s", build)
+                            logger.error("Build supplies %i archives: %r", len(build_archives), build_archives.keys())
+                        if tag_archive:
+                            logger.error("Size mismatch, br: %i, db: %i", fileinfo['size'], tag_archive['size'])
                         raise koji.BuildrootError, 'Unknown file in build environment: %s, size: %s' % \
                               ('%s/%s' % (fileinfo['path'], fileinfo['filename']), fileinfo['size'])
 
-- 
1.8.3.1

>From caebfdb814b5656b24a2a8c7b51df3ab89045841 Mon Sep 17 00:00:00 2001
From: Mike McLean <[email protected]>
Date: Tue, 29 Jul 2014 23:53:08 -0400
Subject: [PATCH 13/14] log error when multiple builds found for a g:a:v

---
 hub/kojihub.py | 23 ++++++++++++++++-------
 1 file changed, 16 insertions(+), 7 deletions(-)

diff --git a/hub/kojihub.py b/hub/kojihub.py
index 65ed305..3439d28 100644
--- a/hub/kojihub.py
+++ b/hub/kojihub.py
@@ -10382,9 +10382,14 @@ class HostExports(object):
         for archive in maven_tag_archives(tag['id'], event_id=repo['create_event']):
             # unfortunately pgdb does not appear to intern strings, but still
             # better not to create any new ones
-            maven_build_index.setdefault(
-                archive['group_id'], {}).setdefault(
-                    archive['artifact_id'], {})[archive['version']] = archive['build_id']
+            idx_build = \
+                maven_build_index.setdefault(
+                    archive['group_id'], {}).setdefault(
+                        archive['artifact_id'], {}).setdefault(
+                            archive['version'], archive['build_id'])
+            if idx_build != archive['build_id']:
+                logger.error("Found multiple builds for %(group_id)s:%(artifact_id)s:%(version)s. Current build: %(build_id)i", archive)
+                logger.error("Indexed build id was %i", idx_build)
 
         if not ignore:
             ignore = []
@@ -10417,10 +10422,14 @@ class HostExports(object):
             else:
                 build = get_build(dep, strict=True)
                 for archive in list_archives(buildID=build['id'], type='maven'):
-                    maven_build_index.setdefault(
-                        archive['group_id'], {}).setdefault(
-                            archive['artifact_id'], {}).setdefault(
-                                archive['version'], archive['build_id'])
+                    idx_build = \
+                        maven_build_index.setdefault(
+                            archive['group_id'], {}).setdefault(
+                                archive['artifact_id'], {}).setdefault(
+                                    archive['version'], archive['build_id'])
+                    if idx_build != archive['build_id']:
+                        logger.error("Found multiple builds for %(group_id)s:%(artifact_id)s:%(version)s. Current build: %(build_id)i", archive)
+                        logger.error("Indexed build id was %i", idx_build)
 
         ignore.extend(task_deps.values())
 
-- 
1.8.3.1

>From 1db64656f73c14064c96e5ff5e33e918a8ca602b Mon Sep 17 00:00:00 2001
From: Mike McLean <[email protected]>
Date: Wed, 30 Jul 2014 12:57:48 -0400
Subject: [PATCH 14/14] fix typo in updateMavenBuildRootList

---
 hub/kojihub.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/hub/kojihub.py b/hub/kojihub.py
index 3439d28..62220cf 100644
--- a/hub/kojihub.py
+++ b/hub/kojihub.py
@@ -10466,7 +10466,7 @@ class HostExports(object):
                 build_archives = {}
             else:
                 tinfo = dslice(maven_info, ['group_id', 'artifact_id', 'version'])
-                build_archives = list_archives(type='maven', typeInfo=tinfo)
+                build_archives = list_archives(buildID=build_id, type='maven', typeInfo=tinfo)
                 # index by filename
                 build_archives = dict([(a['filename'], a) for a in build_archives])
 
-- 
1.8.3.1

--
buildsys mailing list
[email protected]
https://admin.fedoraproject.org/mailman/listinfo/buildsys

Reply via email to