Jeroen T. Vermeulen has proposed merging
lp:~jtv/launchpad/bug-994650-scrub-in-batches into lp:launchpad with
lp:~jtv/launchpad/bug-994650-scrub-faster as a prerequisite.
Requested reviews:
Launchpad code reviewers (launchpad-reviewers)
Related bugs:
Bug #994650 in Launchpad itself: "POFileTranslator falls out of date"
https://bugs.launchpad.net/launchpad/+bug/994650
For more details, see:
https://code.launchpad.net/~jtv/launchpad/bug-994650-scrub-in-batches/+merge/105189
This is one of two further optimizations for POFileTranslator scrubbing as
mentioned in the preceding MP:
https://code.launchpad.net/~jtv/launchpad/bug-994650-scrub-faster
Here you see two parts of the scrubbing process separated further: finding
which POFiles in a batch need their POFileTranslator entries fixed, and
actually doing so. The benefit is in a new, intervening step: bulk-load all
objects needed for doing this work. Disappointingly, about half of the
relevant object graph is needed for log output. But we probably should have it
anyway, because tweaking such processes without helpful logs can be highly
demotivating.
As an added bonus, it turns out we don't really need to load full
POFileTranslator records, let alone cache them across these steps. That's one
big memory load off my mind. I deliberately didn't make the scrubber check and
correct dates on existing POFileTranslator records; a bit of imprecision is
fine.
As a next step, which should further reduce memory load as well as DB querying,
we can cache sets of POTMsgSet ids per template across items in a batch. (But
not outside batches, since they may change between transactions).
The tests don't go into the new components. They already cover the aggregate
behaviour of fix_pofile() and the main loop; there'd be a lot of duplication
and I'd hate to lose integration test coverage as a side effect of moving
things into more fine-grained unit tests. Also I'm trying to correct for a
past of focusing too much on fine-grained tests. But, dear reviewer, if you
see anything that you do want tested then please say so.
Jeroen
--
https://code.launchpad.net/~jtv/launchpad/bug-994650-scrub-in-batches/+merge/105189
Your team Launchpad code reviewers is requested to review the proposed merge of
lp:~jtv/launchpad/bug-994650-scrub-in-batches into lp:launchpad.
=== modified file 'lib/lp/translations/scripts/scrub_pofiletranslator.py'
--- lib/lp/translations/scripts/scrub_pofiletranslator.py 2012-05-09 11:13:31 +0000
+++ lib/lp/translations/scripts/scrub_pofiletranslator.py 2012-05-09 11:13:34 +0000
@@ -8,14 +8,25 @@
'ScrubPOFileTranslator',
]
+from collections import namedtuple
+
from storm.expr import (
Coalesce,
Desc,
)
import transaction
+from lp.registry.model.distribution import Distribution
+from lp.registry.model.distroseries import DistroSeries
+from lp.registry.model.product import Product
+from lp.registry.model.productseries import ProductSeries
+from lp.services.database.bulk import (
+ load,
+ load_related,
+ )
from lp.services.database.lpstorm import IStore
from lp.services.looptuner import TunableLoop
+from lp.services.worlddata.model.language import Language
from lp.translations.model.pofile import POFile
from lp.translations.model.pofiletranslator import POFileTranslator
from lp.translations.model.potemplate import POTemplate
@@ -40,9 +51,12 @@
return query.order_by(POTemplate.name, POFile.languageID)
-def get_pofile_details(pofile_ids):
+def summarize_pofiles(pofile_ids):
"""Retrieve relevant parts of `POFile`s with given ids.
+ This gets just enough information to determine whether any of the
+ `POFile`s need their `POFileTranslator` records fixed.
+
:param pofile_ids: Iterable of `POFile` ids.
:return: Dict mapping each id in `pofile_ids` to a duple of
`POTemplate` id and `Language` id for the associated `POFile`.
@@ -113,15 +127,14 @@
def get_pofiletranslators(pofile_id):
- """Get `POFileTranslator` entries for a `POFile`.
+ """Get `Person` ids from `POFileTranslator` entries for a `POFile`.
- Returns a dict mapping each contributor's person id to their
- `POFileTranslator` record.
+ Returns a `set` of `Person` ids.
"""
store = IStore(POFileTranslator)
- pofts = store.find(
- POFileTranslator, POFileTranslator.pofileID == pofile_id)
- return dict((poft.personID, poft) for poft in pofts)
+ return set(store.find(
+ POFileTranslator.personID,
+ POFileTranslator.pofileID == pofile_id))
def remove_pofiletranslators(logger, pofile, person_ids):
@@ -139,14 +152,14 @@
def remove_unwarranted_pofiletranslators(logger, pofile, pofts, contribs):
"""Delete `POFileTranslator` records that shouldn't be there."""
- excess = set(pofts) - set(contribs)
+ excess = pofts - set(contribs)
if len(excess) > 0:
remove_pofiletranslators(logger, pofile, excess)
def create_missing_pofiletranslators(logger, pofile, pofts, contribs):
"""Create `POFileTranslator` records that were missing."""
- shortage = set(contribs) - set(pofts)
+ shortage = set(contribs) - pofts
if len(shortage) == 0:
return
logger.debug(
@@ -159,9 +172,8 @@
date_last_touched=contribs[missing_contributor]))
-def fix_pofile(logger, pofile_id, potmsgset_ids, pofiletranslators):
+def fix_pofile(logger, pofile, potmsgset_ids, pofiletranslators):
"""This `POFile` needs fixing. Load its data & fix it."""
- pofile = IStore(POFile).get(POFile, pofile_id)
contribs = get_contributions(pofile, potmsgset_ids)
remove_unwarranted_pofiletranslators(
logger, pofile, pofiletranslators, contribs)
@@ -169,17 +181,73 @@
logger, pofile, pofiletranslators, contribs)
-def scrub_pofile(logger, pofile_id, template_id, language_id):
- """Scrub `POFileTranslator` entries for one `POFile`.
+def needs_fixing(template_id, language_id, potmsgset_ids, pofiletranslators):
+ """Does the `POFile` with given details need `POFileTranslator` changes?
- Removes inappropriate entries and adds missing ones.
+ :param template_id: id of the `POTemplate` for the `POFile`.
+ :param language_id: id of the `Language` the `POFile` translates to.
+ :param potmsgset_ids: ids of the `POTMsgSet` items participating in the
+ template.
+ :param pofiletranslators: `POFileTranslator` objects for the `POFile`.
+ :return: Bool: does the existing set of `POFileTranslator` need fixing?
"""
- pofiletranslators = get_pofiletranslators(pofile_id)
- potmsgset_ids = get_potmsgset_ids(template_id)
contributors = summarize_contributors(
template_id, language_id, potmsgset_ids)
- if set(pofiletranslators) != set(contributors):
- fix_pofile(logger, pofile_id, potmsgset_ids, pofiletranslators)
+ return pofiletranslators != set(contributors)
+
+
+# A tuple describing a POFile that needs its POFileTranslators fixed.
+WorkItem = namedtuple("WorkItem", [
+ 'pofile_id',
+ 'potmsgset_ids',
+ 'pofiletranslators',
+ ])
+
+
+def gather_work_items(pofile_ids):
+ """Produce `WorkItem`s for those `POFile`s that need fixing.
+
+ :param pofile_ids: An iterable of `POFile` ids to check.
+ :param pofile_summaries: Dict as returned by `summarize_pofiles`.
+ :return: A sequence of `WorkItem`s for those `POFile`s that need fixing.
+ """
+ pofile_summaries = summarize_pofiles(pofile_ids)
+ work_items = []
+ for pofile_id in pofile_ids:
+ template_id, language_id = pofile_summaries[pofile_id]
+ potmsgset_ids = get_potmsgset_ids(template_id)
+ pofts = get_pofiletranslators(pofile_id)
+ if needs_fixing(template_id, language_id, potmsgset_ids, pofts):
+ work_items.append(WorkItem(pofile_id, potmsgset_ids, pofts))
+
+ return work_items
+
+
+def preload_work_items(work_items):
+ """Bulk load data that will be needed to process `work_items`.
+
+ :param work_items: A sequence of `WorkItem` records.
+ :return: A dict mapping `POFile` ids from `work_items` to their
+ respective `POFile` objects.
+ """
+ pofiles = load(POFile, [work_item.pofile_id for work_item in work_items])
+ load_related(Language, pofiles, ['languageID'])
+ templates = load_related(POTemplate, pofiles, ['potemplateID'])
+ distroseries = load_related(DistroSeries, templates, ['distroseriesID'])
+ load_related(Distribution, distroseries, ['distributionID'])
+ productseries = load_related(
+ ProductSeries, templates, ['productseriesID'])
+ load_related(Product, productseries, ['productID'])
+ return dict((pofile.id, pofile) for pofile in pofiles)
+
+
+def process_work_items(logger, work_items, pofiles):
+ """Fix the `POFileTranslator` records covered by `work_items`."""
+ for work_item in work_items:
+ pofile = pofiles[work_item.pofile_id]
+ fix_pofile(
+ logger, pofile, work_item.potmsgset_ids,
+ work_item.pofiletranslators)
class ScrubPOFileTranslator(TunableLoop):
@@ -199,13 +267,11 @@
batch = self.pofile_ids[start_offset:self.next_offset]
if len(batch) == 0:
self.next_offset = None
- return
-
- pofile_details = get_pofile_details(batch)
- for pofile_id in batch:
- template_id, language_id = pofile_details[pofile_id]
- scrub_pofile(self.log, pofile_id, template_id, language_id)
- transaction.commit()
+ else:
+ work_items = gather_work_items(batch)
+ pofiles = preload_work_items(work_items)
+ process_work_items(self.log, work_items, pofiles)
+ transaction.commit()
def isDone(self):
"""See `ITunableLoop`."""
=== modified file 'lib/lp/translations/scripts/tests/test_scrub_pofiletranslator.py'
--- lib/lp/translations/scripts/tests/test_scrub_pofiletranslator.py 2012-05-09 11:13:31 +0000
+++ lib/lp/translations/scripts/tests/test_scrub_pofiletranslator.py 2012-05-09 11:13:34 +0000
@@ -20,14 +20,14 @@
from lp.testing.layers import ZopelessDatabaseLayer
from lp.translations.model.pofiletranslator import POFileTranslator
from lp.translations.scripts.scrub_pofiletranslator import (
+ fix_pofile,
get_contributions,
- get_pofile_details,
get_pofile_ids,
get_pofiletranslators,
get_potmsgset_ids,
- scrub_pofile,
ScrubPOFileTranslator,
summarize_contributors,
+ summarize_pofiles,
)
@@ -128,11 +128,11 @@
self.assertEqual(
1, size_distance(ordering, pofiles[0].id, pofiles[1].id))
- def test_get_pofile_details_maps_id_to_template_and_language_ids(self):
+ def test_summarize_pofiles_maps_id_to_template_and_language_ids(self):
pofile = self.factory.makePOFile()
self.assertEqual(
{pofile.id: (pofile.potemplate.id, pofile.language.id)},
- get_pofile_details([pofile.id]))
+ summarize_pofiles([pofile.id]))
def test_get_potmsgset_ids_returns_potmsgset_ids(self):
pofile = self.factory.makePOFile()
@@ -232,41 +232,37 @@
potmsgset_ids = get_potmsgset_ids(pofile.potemplate.id)
self.assertEqual({}, get_contributions(pofile, potmsgset_ids))
- def test_get_pofiletranslators_gets_pofiletranslators_for_pofile(self):
+ def test_get_pofiletranslators_gets_translators_for_pofile(self):
pofile = self.factory.makePOFile()
tm = self.make_message_with_pofiletranslator(pofile)
- pofts = get_pofiletranslators(pofile.id)
- self.assertContentEqual([tm.submitter.id], pofts.keys())
- poft = pofts[tm.submitter.id]
- self.assertEqual(pofile, poft.pofile)
+ self.assertContentEqual(
+ [tm.submitter.id], get_pofiletranslators(pofile.id))
- def test_scrub_pofile_leaves_good_pofiletranslator_in_place(self):
+ def test_fix_pofile_leaves_good_pofiletranslator_in_place(self):
pofile = self.factory.makePOFile()
tm = self.make_message_with_pofiletranslator(pofile)
old_poft = self.query_pofiletranslator(pofile, tm.submitter).one()
- scrub_pofile(
- fake_logger, pofile.id, pofile.potemplate.id, pofile.language.id)
+ fix_pofile(
+ fake_logger, pofile, [tm.potmsgset.id], set([tm.submitter.id]))
new_poft = self.query_pofiletranslator(pofile, tm.submitter).one()
self.assertEqual(old_poft, new_poft)
- def test_scrub_pofile_deletes_unwarranted_entries(self):
+ def test_fix_pofile_deletes_unwarranted_entries(self):
# Deleting POFileTranslator records is not something the app
# server ever does, so it requires special privileges.
self.becomeDbUser('postgres')
poft = self.make_pofiletranslator_without_message()
(pofile, person) = (poft.pofile, poft.person)
- scrub_pofile(
- fake_logger, pofile.id, pofile.potemplate.id, pofile.language.id)
+ fix_pofile(fake_logger, pofile, [], set([person.id]))
self.assertIsNone(self.query_pofiletranslator(pofile, person).one())
- def test_scrub_pofile_adds_missing_entries(self):
+ def test_fix_pofile_adds_missing_entries(self):
pofile = self.factory.makePOFile()
tm = self.make_message_without_pofiletranslator(pofile)
- scrub_pofile(
- fake_logger, pofile.id, pofile.potemplate.id, pofile.language.id)
+ fix_pofile(fake_logger, pofile, [tm.potmsgset.id], set())
new_poft = self.query_pofiletranslator(pofile, tm.submitter).one()
self.assertEqual(tm.submitter, new_poft.person)
@@ -284,9 +280,9 @@
# Try to break the loop if it failed to commit its changes.
transaction.abort()
- # The unwarranted POFileTranslator record has been deleted.
+ # An unwarranted POFileTranslator record has been deleted.
self.assertIsNotNone(
self.query_pofiletranslator(pofile, tm.submitter).one())
- # The missing POFileTranslator has been created.
+ # A missing POFileTranslator has been created.
self.assertIsNone(
self.query_pofiletranslator(pofile, noncontributor).one())
_______________________________________________
Mailing list: https://launchpad.net/~launchpad-reviewers
Post to : [email protected]
Unsubscribe : https://launchpad.net/~launchpad-reviewers
More help : https://help.launchpad.net/ListHelp