Colin Watson has proposed merging ~cjwatson/launchpad:update-pkgcache-binary-race into launchpad:master.
Commit message: Fix update-pkgcache crash due to binary deletion Requested reviews: Launchpad code reviewers (launchpad-reviewers) Related bugs: Bug #2023698 in Launchpad itself: "update-pkgcache crashing with OOPS" https://bugs.launchpad.net/launchpad/+bug/2023698 For more details, see: https://code.launchpad.net/~cjwatson/launchpad/+git/launchpad/+merge/444768 The `update-pkgcache` script started crashing on production about a week ago, and mysteriously started working again today without us changing anything. Based on code inspection, the only way I can see for this particular crash to happen is if a binary was deleted between `DistroSeriesPackageCache.findCurrentBinaryPackageNames` and `DistroSeriesPackageCache._update` (`update-pkgcache` is slow enough that it makes many commits along the way, so this is possible), and simulating that in a test reproduces the same crash. It seems safe to skip the affected binary in such cases, since `DistroSeriesPackageCache.removeOld` will eventually clean up its cache entries. -- Your team Launchpad code reviewers is requested to review the proposed merge of ~cjwatson/launchpad:update-pkgcache-binary-race into launchpad:master.
diff --git a/lib/lp/soyuz/model/distroseriespackagecache.py b/lib/lp/soyuz/model/distroseriespackagecache.py index bedc2fd..93d3b3e 100644 --- a/lib/lp/soyuz/model/distroseriespackagecache.py +++ b/lib/lp/soyuz/model/distroseriespackagecache.py @@ -77,6 +77,10 @@ class DistroSeriesPackageCache(StormBase): ), ) .config(distinct=True) + # Not necessary for correctness, but useful for testability; and + # at the time of writing the sort only adds perhaps 10-20 ms to + # the query time on staging. + .order_by(BinaryPackagePublishingHistory.binarypackagenameID) ) return bulk.load(BinaryPackageName, bpn_ids) @@ -204,6 +208,14 @@ class DistroSeriesPackageCache(StormBase): ) for bpn in binarypackagenames: + if bpn not in details_map: + log.debug( + "No active publishing details found for %s; perhaps " + "removed in parallel with update-pkgcache? Skipping.", + bpn.name, + ) + continue + cache = cache_map[bpn] details = details_map[bpn] # make sure the cached name, summary and description are correct diff --git a/lib/lp/soyuz/scripts/tests/test_update_pkgcache.py b/lib/lp/soyuz/scripts/tests/test_update_pkgcache.py index 875e956..5560b57 100644 --- a/lib/lp/soyuz/scripts/tests/test_update_pkgcache.py +++ b/lib/lp/soyuz/scripts/tests/test_update_pkgcache.py @@ -6,8 +6,11 @@ import transaction from lp.services.log.logger import BufferLogger +from lp.soyuz.enums import PackagePublishingStatus +from lp.soyuz.model.distroseriespackagecache import DistroSeriesPackageCache from lp.soyuz.scripts.update_pkgcache import PackageCacheUpdater from lp.testing import TestCaseWithFactory +from lp.testing.dbuser import dbuser from lp.testing.layers import ZopelessDatabaseLayer @@ -34,10 +37,61 @@ class TestPackageCacheUpdater(TestCaseWithFactory): self.assertEqual(0, archive.sources_cached) self.factory.makeSourcePackagePublishingHistory(archive=archive) script = self.makeScript() - script.updateDistributionCache(distribution, archives[0]) - archives[0].updateArchiveCache() + with dbuser(self.dbuser): + script.updateDistributionCache(distribution, archives[0]) + archives[0].updateArchiveCache() self.assertEqual(1, archives[0].sources_cached) archives[1].disable() - script.updateDistributionCache(distribution, archives[1]) - archives[1].updateArchiveCache() + with dbuser(self.dbuser): + script.updateDistributionCache(distribution, archives[1]) + archives[1].updateArchiveCache() self.assertEqual(0, archives[1].sources_cached) + + def test_binary_deleted_during_run(self): + distroseries = self.factory.makeDistroSeries() + das = self.factory.makeDistroArchSeries(distroseries=distroseries) + archive = self.factory.makeArchive( + distribution=distroseries.distribution + ) + bpns = [self.factory.makeBinaryPackageName() for _ in range(4)] + bpphs = [ + self.factory.makeBinaryPackagePublishingHistory( + binarypackagename=bpn, + distroarchseries=das, + status=PackagePublishingStatus.PUBLISHED, + archive=archive, + ) + for bpn in bpns + ] + + class InstrumentedTransaction: + def commit(self): + transaction.commit() + # Change this binary package publishing history's status + # after the first commit, to simulate the situation where a + # BPPH ceases to be active part-way through an + # update-pkgcache run. + if bpphs[2].status == PackagePublishingStatus.PUBLISHED: + with dbuser("launchpad"): + bpphs[2].requestDeletion(archive.owner) + + logger = BufferLogger() + with dbuser(self.dbuser): + DistroSeriesPackageCache.updateAll( + distroseries, + archive=archive, + ztm=InstrumentedTransaction(), + log=logger, + commit_chunk=2, + ) + archive.updateArchiveCache() + self.assertEqual(4, archive.binaries_cached) + self.assertEqual( + "DEBUG Considering binaries {bpns[0].name}, {bpns[1].name}\n" + "DEBUG Committing\n" + "DEBUG Considering binaries {bpns[2].name}, {bpns[3].name}\n" + "DEBUG No active publishing details found for {bpns[2].name};" + " perhaps removed in parallel with update-pkgcache? Skipping.\n" + "DEBUG Committing\n".format(bpns=bpns), + logger.getLogBuffer(), + )
_______________________________________________ Mailing list: https://launchpad.net/~launchpad-reviewers Post to : launchpad-reviewers@lists.launchpad.net Unsubscribe : https://launchpad.net/~launchpad-reviewers More help : https://help.launchpad.net/ListHelp