I only see patch 1 of apparently 3 patches? I'm not super keen on the whole retry logic, and I think I have a way to do this that doesn't require that - give me a few days to try and polish it up enough to send out.
Regards, Daniel Stephen Finucane <step...@that.guru> writes: > Currently, the 'SeriesReference' object has a unique constraint on the > two fields it has, 'series', which is a foreign key to 'Series', and > 'msgid'. This is the wrong constraint. What we actually want to enforce > is that a patch, cover letter or comment is referenced by a single > series, or rather a single series per project the submission appears on. > As such, we should be enforcing uniqueness on the msgid and the project > that the patch, cover letter or comment belongs to. > > This requires adding a new field to the object, 'project', since it's > not possible to do something like the following: > > unique_together = [('msgid', 'series__project')] > > This is detailed here [1]. In addition, the migration needs a precursor > migration step to merge any broken series. > > [1] https://stackoverflow.com/a/4440189/613428 > > Signed-off-by: Stephen Finucane <step...@that.guru> > Closes: #241 > Cc: Daniel Axtens <d...@axtens.net> > Cc: Petr Vorel <petr.vo...@gmail.com> > --- > v4: > - Further tweaks to logging > v3: > - Improve logging > --- > .../0038_unique_series_references.py | 121 +++++++++++++++++ > patchwork/models.py | 6 +- > patchwork/parser.py | 123 +++++++++--------- > patchwork/tests/test_parser.py | 9 +- > patchwork/tests/utils.py | 6 +- > 5 files changed, 199 insertions(+), 66 deletions(-) > create mode 100644 patchwork/migrations/0038_unique_series_references.py > > diff --git a/patchwork/migrations/0038_unique_series_references.py > b/patchwork/migrations/0038_unique_series_references.py > new file mode 100644 > index 00000000..91799020 > --- /dev/null > +++ b/patchwork/migrations/0038_unique_series_references.py > @@ -0,0 +1,121 @@ > +from django.db import connection, migrations, models > +from django.db.models import Count > +import django.db.models.deletion > + > + > +def merge_duplicate_series(apps, schema_editor): > + SeriesReference = apps.get_model('patchwork', 'SeriesReference') > + Patch = apps.get_model('patchwork', 'Patch') > + > + msgid_seriesrefs = {} > + > + # find all SeriesReference that share a msgid but point to different > series > + # and decide which of the series is going to be the authoritative one > + msgid_counts = ( > + SeriesReference.objects.values('msgid') > + .annotate(count=Count('msgid')) > + .filter(count__gt=1) > + ) > + for msgid_count in msgid_counts: > + msgid = msgid_count['msgid'] > + chosen_ref = None > + for series_ref in SeriesReference.objects.filter(msgid=msgid): > + if series_ref.series.cover_letter: > + if chosen_ref: > + # I don't think this can happen, but explode if it does > + raise Exception( > + "Looks like you've got two or more series that share > " > + "some patches but do not share a cover letter. > Unable " > + "to auto-resolve." > + ) > + > + # if a series has a cover letter, that's the one we'll group > + # everything under > + chosen_ref = series_ref > + > + if not chosen_ref: > + # if none of the series have cover letters, simply use the last > + # one (hint: this relies on Python's weird scoping for for loops > + # where 'series_ref' is still accessible outside the loop) > + chosen_ref = series_ref > + > + msgid_seriesrefs[msgid] = chosen_ref > + > + # reassign any patches referring to non-authoritative series to point to > + # the authoritative one, and delete the other series; we do this > separately > + # to allow us a chance to raise the exception above if necessary > + for msgid, chosen_ref in msgid_seriesrefs.items(): > + for series_ref in SeriesReference.objects.filter(msgid=msgid): > + if series_ref == chosen_ref: > + continue > + > + # update the patches to point to our chosen series instead, on > the > + # assumption that all other metadata is correct > + for patch in Patch.objects.filter(series=series_ref.series): > + patch.series = chosen_ref.series > + patch.save() > + > + # delete the other series (which will delete the series ref) > + series_ref.series.delete() > + > + > +def copy_project_field(apps, schema_editor): > + if connection.vendor == 'postgresql': > + schema_editor.execute( > + """ > + UPDATE patchwork_seriesreference > + SET project_id = patchwork_series.project_id > + FROM patchwork_series > + WHERE patchwork_seriesreference.series_id = patchwork_series.id > + """ > + ) > + elif connection.vendor == 'mysql': > + schema_editor.execute( > + """ > + UPDATE patchwork_seriesreference, patchwork_series > + SET patchwork_seriesreference.project_id = > patchwork_series.project_id > + WHERE patchwork_seriesreference.series_id = patchwork_series.id > + """ > + ) # noqa > + else: > + SeriesReference = apps.get_model('patchwork', 'SeriesReference') > + > + for series_ref in SeriesReference.objects.all().select_related( > + 'series' > + ): > + series_ref.project = series_ref.series.project > + series_ref.save() > + > + > +class Migration(migrations.Migration): > + > + dependencies = [('patchwork', '0037_event_actor')] > + > + operations = [ > + migrations.RunPython( > + merge_duplicate_series, migrations.RunPython.noop, atomic=False > + ), > + migrations.AddField( > + model_name='seriesreference', > + name='project', > + field=models.ForeignKey( > + null=True, > + on_delete=django.db.models.deletion.CASCADE, > + to='patchwork.Project', > + ), > + ), > + migrations.AlterUniqueTogether( > + name='seriesreference', unique_together={('project', 'msgid')} > + ), > + migrations.RunPython( > + copy_project_field, migrations.RunPython.noop, atomic=False > + ), > + migrations.AlterField( > + model_name='seriesreference', > + name='project', > + field=models.ForeignKey( > + on_delete=django.db.models.deletion.CASCADE, > + to='patchwork.Project', > + ), > + ), > + ] > diff --git a/patchwork/models.py b/patchwork/models.py > index 7f0efd48..dbaff024 100644 > --- a/patchwork/models.py > +++ b/patchwork/models.py > @@ -79,7 +79,8 @@ class Project(models.Model): > webscm_url = models.CharField(max_length=2000, blank=True) > list_archive_url = models.CharField(max_length=2000, blank=True) > list_archive_url_format = models.CharField( > - max_length=2000, blank=True, > + max_length=2000, > + blank=True, > help_text="URL format for the list archive's Message-ID redirector. " > "{} will be replaced by the Message-ID.") > commit_url_format = models.CharField( > @@ -787,6 +788,7 @@ class SeriesReference(models.Model): > required to handle the case whereby one or more patches are > received before the cover letter. > """ > + project = models.ForeignKey(Project, on_delete=models.CASCADE) > series = models.ForeignKey(Series, related_name='references', > related_query_name='reference', > on_delete=models.CASCADE) > @@ -796,7 +798,7 @@ class SeriesReference(models.Model): > return self.msgid > > class Meta: > - unique_together = [('series', 'msgid')] > + unique_together = [('project', 'msgid')] > > > class Bundle(models.Model): > diff --git a/patchwork/parser.py b/patchwork/parser.py > index c424729b..c0084cc0 100644 > --- a/patchwork/parser.py > +++ b/patchwork/parser.py > @@ -16,6 +16,7 @@ import re > > from django.contrib.auth.models import User > from django.db.utils import IntegrityError > +from django.db import transaction > from django.utils import six > > from patchwork.models import Comment > @@ -256,16 +257,9 @@ def _find_series_by_references(project, mail): > for ref in refs: > try: > return SeriesReference.objects.get( > - msgid=ref[:255], series__project=project).series > + msgid=ref[:255], project=project).series > except SeriesReference.DoesNotExist: > continue > - except SeriesReference.MultipleObjectsReturned: > - # FIXME: Open bug: this can happen when we're processing > - # messages in parallel. Pick the first and log. > - logger.error("Multiple SeriesReferences for %s in project %s!" % > - (ref[:255], project.name)) > - return SeriesReference.objects.filter( > - msgid=ref[:255], series__project=project).first().series > > > def _find_series_by_markers(project, mail, author): > @@ -1092,47 +1086,65 @@ def parse_mail(mail, list_id=None): > except IntegrityError: > raise DuplicateMailError(msgid=msgid) > > - # if we don't have a series marker, we will never have an existing > - # series to match against. > - series = None > - if n: > - series = find_series(project, mail, author) > + for attempt in range(1, 11): # arbitrary retry count > + try: > + with transaction.atomic(): > + # if we don't have a series marker, we will never have an > + # existing series to match against. > + series = None > + if n: > + series = find_series(project, mail, author) > + else: > + x = n = 1 > + > + # We will create a new series if: > + # - there is no existing series to assign this patch to, > or > + # - there is an existing series, but it already has a > patch > + # with this number in it > + if not series or Patch.objects.filter( > + series=series, number=x).count(): > + series = Series.objects.create( > + project=project, > + date=date, > + submitter=author, > + version=version, > + total=n) > + > + # NOTE(stephenfin) We must save references for > series. > + # We do this to handle the case where a later patch > is > + # received first. Without storing references, it > would > + # not be possible to identify the relationship > between > + # patches as the earlier patch does not reference the > + # later one. > + for ref in refs + [msgid]: > + ref = ref[:255] > + # we don't want duplicates > + try: > + # we could have a ref to a previous series. > + # (For example, a series sent in reply to > + # another series.) That should not create a > + # series ref for this series, so check for > the > + # msg-id only, not the msg-id/series pair. > + SeriesReference.objects.get( > + msgid=ref, project=project) > + except SeriesReference.DoesNotExist: > + SeriesReference.objects.create( > + msgid=ref, project=project, > series=series) > + break > + except IntegrityError: > + # we lost the race so go again > + logger.warning('Conflict while saving series. This is ' > + 'probably because multiple patches belonging ' > + 'to the same series have been received at ' > + 'once. Trying again (attempt %02d/10)', > + attempt) > else: > - x = n = 1 > - > - # We will create a new series if: > - # - there is no existing series to assign this patch to, or > - # - there is an existing series, but it already has a patch with this > - # number in it > - if not series or Patch.objects.filter(series=series, > number=x).count(): > - series = Series.objects.create( > - project=project, > - date=date, > - submitter=author, > - version=version, > - total=n) > - > - # NOTE(stephenfin) We must save references for series. We > - # do this to handle the case where a later patch is > - # received first. Without storing references, it would not > - # be possible to identify the relationship between patches > - # as the earlier patch does not reference the later one. > - for ref in refs + [msgid]: > - ref = ref[:255] > - # we don't want duplicates > - try: > - # we could have a ref to a previous series. (For > - # example, a series sent in reply to another > - # series.) That should not create a series ref > - # for this series, so check for the msg-id only, > - # not the msg-id/series pair. > - SeriesReference.objects.get(msgid=ref, > - series__project=project) > - except SeriesReference.DoesNotExist: > - SeriesReference.objects.create(series=series, msgid=ref) > - except SeriesReference.MultipleObjectsReturned: > - logger.error("Multiple SeriesReferences for %s" > - " in project %s!" % (ref, project.name)) > + # we failed to save the series so return the series-less patch > + logger.warning('Failed to save series. Your patch with message > ID ' > + '%s has been saved but this should not happen. ' > + 'Please report this as a bug and include logs', > + msgid) > + return patch > > # add to a series if we have found one, and we have a numbered > # patch. Don't add unnumbered patches (for example diffs sent > @@ -1170,14 +1182,9 @@ def parse_mail(mail, list_id=None): > # message > try: > series = SeriesReference.objects.get( > - msgid=msgid, series__project=project).series > + msgid=msgid, project=project).series > except SeriesReference.DoesNotExist: > series = None > - except SeriesReference.MultipleObjectsReturned: > - logger.error("Multiple SeriesReferences for %s" > - " in project %s!" % (msgid, project.name)) > - series = SeriesReference.objects.filter( > - msgid=msgid, series__project=project).first().series > > if not series: > series = Series.objects.create( > @@ -1190,12 +1197,8 @@ def parse_mail(mail, list_id=None): > # we don't save the in-reply-to or references fields > # for a cover letter, as they can't refer to the same > # series > - try: > - SeriesReference.objects.get_or_create(series=series, > - msgid=msgid) > - except SeriesReference.MultipleObjectsReturned: > - logger.error("Multiple SeriesReferences for %s" > - " in project %s!" % (msgid, project.name)) > + SeriesReference.objects.create( > + msgid=msgid, project=project, series=series) > > try: > cover_letter = CoverLetter.objects.create( > diff --git a/patchwork/tests/test_parser.py b/patchwork/tests/test_parser.py > index 0bf71585..0edbd87a 100644 > --- a/patchwork/tests/test_parser.py > +++ b/patchwork/tests/test_parser.py > @@ -421,17 +421,20 @@ class SeriesCorrelationTest(TestCase): > msgids = [make_msgid()] > project = create_project() > series_v1 = create_series(project=project) > - create_series_reference(msgid=msgids[0], series=series_v1) > + create_series_reference(msgid=msgids[0], series=series_v1, > + project=project) > > # ...and three patches > for i in range(3): > msgids.append(make_msgid()) > - create_series_reference(msgid=msgids[-1], series=series_v1) > + create_series_reference(msgid=msgids[-1], series=series_v1, > + project=project) > > # now create a new series with "cover letter" > msgids.append(make_msgid()) > series_v2 = create_series(project=project) > - ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2) > + ref_v2 = create_series_reference(msgid=msgids[-1], series=series_v2, > + project=project) > > # ...and the "first patch" of this new series > msgid = make_msgid() > diff --git a/patchwork/tests/utils.py b/patchwork/tests/utils.py > index 577183d0..4ead1781 100644 > --- a/patchwork/tests/utils.py > +++ b/patchwork/tests/utils.py > @@ -282,8 +282,12 @@ def create_series(**kwargs): > > def create_series_reference(**kwargs): > """Create 'SeriesReference' object.""" > + project = kwargs.pop('project', create_project()) > + series = kwargs.pop('series', create_series(project=project)) > + > values = { > - 'series': create_series() if 'series' not in kwargs else None, > + 'series': series, > + 'project': project, > 'msgid': make_msgid(), > } > values.update(**kwargs) > -- > 2.23.0 _______________________________________________ Patchwork mailing list Patchwork@lists.ozlabs.org https://lists.ozlabs.org/listinfo/patchwork