On Fri, 2019-10-18 at 09:48 +1100, Daniel Axtens wrote: > Hi Stephen, > > Were you aiming this for 2.2 or for 3? > > I'm thinking 3 but happy to be convinced otherwise.
I'd personally like this to be a blocker for v2.2, though the Python 3 part of the migration can possibly be dropped (I can respin if necessary). This is a pretty dumb issue and the fix is pretty trivial, all in all, but it's not backportable (model changes). I don't really want to have broken series keep popping up for the next 12 months (or however long it takes for us to wrap up a v3.0). Let me know if there's anything I can do to help move it along. > I'll also open a topic branch for PW3 and start posting some of my > Python 2 removal patches. I've a question/comment on this but I'll reply on the other thread. Stephen > Regards, > Daniel > > > Currently, the 'SeriesReference' object has a unique constraint on the > > two fields it has, 'series', which is a foreign key to 'Series', and > > 'msgid'. This is the wrong constraint. What we actually want to enforce > > is that a patch, cover letter or comment is referenced by a single > > series, or rather a single series per project the submission appears on. > > As such, we should be enforcing uniqueness on the msgid and the project > > that the patch, cover letter or comment belongs to. > > > > This requires adding a new field to the object, 'project', since it's > > not possible to do something like the following: > > > > unique_together = [('msgid', 'series__project')] > > > > This is detailed here [1]. In addition, the migration needs a precursor > > migration step to merge any broken series. > > > > [1] https://stackoverflow.com/a/4440189/613428 > > > > Signed-off-by: Stephen Finucane <step...@that.guru> > > Closes: #241 > > Cc: Daniel Axtens <d...@axtens.net> > > Cc: Petr Vorel <petr.vo...@gmail.com> > > --- > > patchwork/migrations/0037_python_3.py | 298 ++++++++++++++++++ > > .../0038_unique_series_references.py | 121 +++++++ > > patchwork/models.py | 6 +- > > patchwork/parser.py | 122 +++---- > > patchwork/tests/test_parser.py | 9 +- > > patchwork/tests/utils.py | 6 +- > > 6 files changed, 496 insertions(+), 66 deletions(-) > > create mode 100644 patchwork/migrations/0037_python_3.py > > create mode 100644 patchwork/migrations/0038_unique_series_references.py > > > > diff --git patchwork/migrations/0037_python_3.py > > patchwork/migrations/0037_python_3.py > > new file mode 100644 > > index 00000000..416a7045 > > --- /dev/null > > +++ patchwork/migrations/0037_python_3.py > > @@ -0,0 +1,298 @@ > > +import datetime > > + > > +from django.conf import settings > > +from django.db import migrations, models > > +import django.db.models.deletion > > + > > +import patchwork.models > > + > > + > > +class Migration(migrations.Migration): > > + > > + dependencies = [('patchwork', '0036_project_commit_url_format')] > > + > > + operations = [ > > + migrations.AlterField( > > + model_name='check', > > + name='context', > > + field=models.SlugField( > > + default='default', > > + help_text='A label to discern check from checks of other > > testing systems.', # noqa > > + max_length=255, > > + ), > > + ), > > + migrations.AlterField( > > + model_name='check', > > + name='description', > > + field=models.TextField( > > + blank=True, > > + help_text='A brief description of the check.', > > + null=True, > > + ), > > + ), > > + migrations.AlterField( > > + model_name='check', > > + name='state', > > + field=models.SmallIntegerField( > > + choices=[ > > + (0, 'pending'), > > + (1, 'success'), > > + (2, 'warning'), > > + (3, 'fail'), > > + ], > > + default=0, > > + help_text='The state of the check.', > > + ), > > + ), > > + migrations.AlterField( > > + model_name='check', > > + name='target_url', > > + field=models.URLField( > > + blank=True, > > + help_text='The target URL to associate with this check. > > This should be specific to the patch.', # noqa > > + null=True, > > + ), > > + ), > > + migrations.AlterField( > > + model_name='comment', > > + name='submission', > > + field=models.ForeignKey( > > + on_delete=django.db.models.deletion.CASCADE, > > + related_name='comments', > > + related_query_name='comment', > > + to='patchwork.Submission', > > + ), > > + ), > > + migrations.AlterField( > > + model_name='delegationrule', > > + name='path', > > + field=models.CharField( > > + help_text='An fnmatch-style pattern to match filenames > > against.', # noqa > > + max_length=255, > > + ), > > + ), > > + migrations.AlterField( > > + model_name='delegationrule', > > + name='priority', > > + field=models.IntegerField( > > + default=0, > > + help_text='The priority of the rule. Rules with a higher > > priority will override rules with lower priorities', # noqa > > + ), > > + ), > > + migrations.AlterField( > > + model_name='delegationrule', > > + name='user', > > + field=models.ForeignKey( > > + help_text='A user to delegate the patch to.', > > + on_delete=django.db.models.deletion.CASCADE, > > + to=settings.AUTH_USER_MODEL, > > + ), > > + ), > > + migrations.AlterField( > > + model_name='emailconfirmation', > > + name='type', > > + field=models.CharField( > > + choices=[ > > + ('userperson', 'User-Person association'), > > + ('registration', 'Registration'), > > + ('optout', 'Email opt-out'), > > + ], > > + max_length=20, > > + ), > > + ), > > + migrations.AlterField( > > + model_name='event', > > + name='category', > > + field=models.CharField( > > + choices=[ > > + ('cover-created', 'Cover Letter Created'), > > + ('patch-created', 'Patch Created'), > > + ('patch-completed', 'Patch Completed'), > > + ('patch-state-changed', 'Patch State Changed'), > > + ('patch-delegated', 'Patch Delegate Changed'), > > + ('check-created', 'Check Created'), > > + ('series-created', 'Series Created'), > > + ('series-completed', 'Series Completed'), > > + ], > > + db_index=True, > > + help_text='The category of the event.', > > + max_length=20, > > + ), > > + ), > > + migrations.AlterField( > > + model_name='event', > > + name='cover', > > + field=models.ForeignKey( > > + blank=True, > > + help_text='The cover letter that this event was created > > for.', > > + null=True, > > + on_delete=django.db.models.deletion.CASCADE, > > + related_name='+', > > + to='patchwork.CoverLetter', > > + ), > > + ), > > + migrations.AlterField( > > + model_name='event', > > + name='date', > > + field=models.DateTimeField( > > + default=datetime.datetime.utcnow, > > + help_text='The time this event was created.', > > + ), > > + ), > > + migrations.AlterField( > > + model_name='event', > > + name='patch', > > + field=models.ForeignKey( > > + blank=True, > > + help_text='The patch that this event was created for.', > > + null=True, > > + on_delete=django.db.models.deletion.CASCADE, > > + related_name='+', > > + to='patchwork.Patch', > > + ), > > + ), > > + migrations.AlterField( > > + model_name='event', > > + name='project', > > + field=models.ForeignKey( > > + help_text='The project that the events belongs to.', > > + on_delete=django.db.models.deletion.CASCADE, > > + related_name='+', > > + to='patchwork.Project', > > + ), > > + ), > > + migrations.AlterField( > > + model_name='event', > > + name='series', > > + field=models.ForeignKey( > > + blank=True, > > + help_text='The series that this event was created for.', > > + null=True, > > + on_delete=django.db.models.deletion.CASCADE, > > + related_name='+', > > + to='patchwork.Series', > > + ), > > + ), > > + migrations.AlterField( > > + model_name='patch', > > + name='number', > > + field=models.PositiveSmallIntegerField( > > + default=None, > > + help_text='The number assigned to this patch in the > > series', > > + null=True, > > + ), > > + ), > > + migrations.AlterField( > > + model_name='project', > > + name='commit_url_format', > > + field=models.CharField( > > + blank=True, > > + help_text='URL format for a particular commit. {} will be > > replaced by the commit SHA.', # noqa > > + max_length=2000, > > + ), > > + ), > > + migrations.AlterField( > > + model_name='project', > > + name='list_archive_url_format', > > + field=models.CharField( > > + blank=True, > > + help_text="URL format for the list archive's Message-ID > > redirector. {} will be replaced by the Message-ID.", # noqa > > + max_length=2000, > > + ), > > + ), > > + migrations.AlterField( > > + model_name='project', > > + name='subject_match', > > + field=models.CharField( > > + blank=True, > > + default='', > > + help_text='Regex to match the subject against if only part > > of emails sent to the list belongs to this project. Will be used with > > IGNORECASE and MULTILINE flags. If rules for more projects match the first > > one returned from DB is chosen; empty field serves as a default for every > > email which has no other match.', # noqa > > + max_length=64, > > + validators=[patchwork.models.validate_regex_compiles], > > + ), > > + ), > > + migrations.AlterField( > > + model_name='series', > > + name='name', > > + field=models.CharField( > > + blank=True, > > + help_text='An optional name to associate with the series, > > e.g. "John\'s PCI series".', # noqa > > + max_length=255, > > + null=True, > > + ), > > + ), > > + migrations.AlterField( > > + model_name='series', > > + name='total', > > + field=models.IntegerField( > > + help_text='Number of patches in series as indicated by the > > subject prefix(es)' # noqa > > + ), > > + ), > > + migrations.AlterField( > > + model_name='series', > > + name='version', > > + field=models.IntegerField( > > + default=1, > > + help_text='Version of series as indicated by the subject > > prefix(es)', # noqa > > + ), > > + ), > > + migrations.AlterField( > > + model_name='seriesreference', > > + name='series', > > + field=models.ForeignKey( > > + on_delete=django.db.models.deletion.CASCADE, > > + related_name='references', > > + related_query_name='reference', > > + to='patchwork.Series', > > + ), > > + ), > > + migrations.AlterField( > > + model_name='tag', > > + name='abbrev', > > + field=models.CharField( > > + help_text='Short (one-or-two letter) abbreviation for the > > tag, used in table column headers', # noqa > > + max_length=2, > > + unique=True, > > + ), > > + ), > > + migrations.AlterField( > > + model_name='tag', > > + name='pattern', > > + field=models.CharField( > > + help_text='A simple regex to match the tag in the content > > of a message. Will be used with MULTILINE and IGNORECASE flags. eg. > > ^Acked-by:', # noqa > > + max_length=50, > > + validators=[patchwork.models.validate_regex_compiles], > > + ), > > + ), > > + migrations.AlterField( > > + model_name='tag', > > + name='show_column', > > + field=models.BooleanField( > > + default=True, > > + help_text="Show a column displaying this tag's count in > > the patch list view", # noqa > > + ), > > + ), > > + migrations.AlterField( > > + model_name='userprofile', > > + name='items_per_page', > > + field=models.PositiveIntegerField( > > + default=100, help_text='Number of items to display per > > page' > > + ), > > + ), > > + migrations.AlterField( > > + model_name='userprofile', > > + name='send_email', > > + field=models.BooleanField( > > + default=False, > > + help_text='Selecting this option allows patchwork to send > > email on your behalf', # noqa > > + ), > > + ), > > + migrations.AlterField( > > + model_name='userprofile', > > + name='show_ids', > > + field=models.BooleanField( > > + default=False, > > + help_text='Show click-to-copy patch IDs in the list view', > > + ), > > + ), > > + ] > > diff --git patchwork/migrations/0038_unique_series_references.py > > patchwork/migrations/0038_unique_series_references.py > > new file mode 100644 > > index 00000000..c5517ada > > --- /dev/null > > +++ patchwork/migrations/0038_unique_series_references.py > > @@ -0,0 +1,121 @@ > > +from django.db import connection, migrations, models > > +from django.db.models import Count > > +import django.db.models.deletion > > + > > + > > +def merge_duplicate_series(apps, schema_editor): > > + SeriesReference = apps.get_model('patchwork', 'SeriesReference') > > + Patch = apps.get_model('patchwork', 'Patch') > > + > > + msgid_seriesrefs = {} > > + > > + # find all SeriesReference that share a msgid but point to different > > series > > + # and decide which of the series is going to be the authoritative one > > + msgid_counts = ( > > + SeriesReference.objects.values('msgid') > > + .annotate(count=Count('msgid')) > > + .filter(count__gt=1) > > + ) > > + for msgid_count in msgid_counts: > > + msgid = msgid_count['msgid'] > > + chosen_ref = None > > + for series_ref in SeriesReference.objects.filter(msgid=msgid): > > + if series_ref.series.cover_letter: > > + if chosen_ref: > > + # I don't think this can happen, but explode if it does > > + raise Exception( > > + "Looks like you've got two or more series that > > share " > > + "some patches but do not share a cover letter. > > Unable " > > + "to auto-resolve." > > + ) > > + > > + # if a series has a cover letter, that's the one we'll > > group > > + # everything under > > + chosen_ref = series_ref > > + > > + if not chosen_ref: > > + # if none of the series have cover letters, simply use the last > > + # one (hint: this relies on Python's weird scoping for for > > loops > > + # where 'series_ref' is still accessible outside the loop) > > + chosen_ref = series_ref > > + > > + msgid_seriesrefs[msgid] = chosen_ref > > + > > + # reassign any patches referring to non-authoritative series to point > > to > > + # the authoritative one, and delete the other series; we do this > > separately > > + # to allow us a chance to raise the exception above if necessary > > + for msgid, chosen_ref in msgid_seriesrefs.items(): > > + for series_ref in SeriesReference.objects.filter(msgid=msgid): > > + if series_ref == chosen_ref: > > + continue > > + > > + # update the patches to point to our chosen series instead, on > > the > > + # assumption that all other metadata is correct > > + for patch in Patch.objects.filter(series=series_ref.series): > > + patch.series = chosen_ref.series > > + patch.save() > > + > > + # delete the other series (which will delete the series ref) > > + series_ref.series.delete() > > + > > + > > +def copy_project_field(apps, schema_editor): > > + if connection.vendor == 'postgresql': > > + schema_editor.execute( > > + """ > > + UPDATE patchwork_seriesreference > > + SET project_id = patchwork_series.project_id > > + FROM patchwork_series > > + WHERE patchwork_seriesreference.series_id = > > patchwork_series.id > > + """ > > + ) > > + elif connection.vendor == 'mysql': > > + schema_editor.execute( > > + """ > > + UPDATE patchwork_seriesreference, patchwork_series > > + SET patchwork_seriesreference.project_id = > > patchwork_series.project_id > > + WHERE patchwork_seriesreference.series_id = patchwork_series.id > > + """ > > + ) # noqa > > + else: > > + SeriesReference = apps.get_model('patchwork', 'SeriesReference') > > + > > + for series_ref in SeriesReference.objects.all().select_related( > > + 'series' > > + ): > > + series_ref.project = series_ref.series.project > > + series_ref.save() > > + > > + > > +class Migration(migrations.Migration): > > + > > + dependencies = [('patchwork', '0037_python_3')] > > + > > + operations = [ > > + migrations.RunPython( > > + merge_duplicate_series, migrations.RunPython.noop, atomic=False > > + ), > > + migrations.AddField( > > + model_name='seriesreference', > > + name='project', > > + field=models.ForeignKey( > > + null=True, > > + on_delete=django.db.models.deletion.CASCADE, > > + to='patchwork.Project', > > + ), > > + ), > > + migrations.AlterUniqueTogether( > > + name='seriesreference', unique_together={('project', 'msgid')} > > + ), > > + migrations.RunPython( > > + copy_project_field, migrations.RunPython.noop, atomic=False > > + ), > > + migrations.AlterField( > > + model_name='seriesreference', > > + name='project', > > + field=models.ForeignKey( > > + on_delete=django.db.models.deletion.CASCADE, > > + to='patchwork.Project', > > + ), > > + ), > > + ] > > diff --git patchwork/models.py patchwork/models.py > > index c198bc2c..e5bfecf8 100644 > > --- patchwork/models.py > > +++ patchwork/models.py > > @@ -79,7 +79,8 @@ class Project(models.Model): > > webscm_url = models.CharField(max_length=2000, blank=True) > > list_archive_url = models.CharField(max_length=2000, blank=True) > > list_archive_url_format = models.CharField( > > - max_length=2000, blank=True, > > + max_length=2000, > > + blank=True, > > help_text="URL format for the list archive's Message-ID > > redirector. " > > "{} will be replaced by the Message-ID.") > > commit_url_format = models.CharField( > > @@ -783,6 +784,7 @@ class SeriesReference(models.Model): > > required to handle the case whereby one or more patches are > > received before the cover letter. > > """ > > + project = models.ForeignKey(Project, on_delete=models.CASCADE) > > series = models.ForeignKey(Series, related_name='references', > > related_query_name='reference', > > on_delete=models.CASCADE) > > @@ -792,7 +794,7 @@ class SeriesReference(models.Model): > > return self.msgid > > > > class Meta: > > - unique_together = [('series', 'msgid')] > > + unique_together = [('project', 'msgid')] > > > > > > class Bundle(models.Model): > > diff --git patchwork/parser.py patchwork/parser.py > > index 7dc66bc0..4b837ae0 100644 > > --- patchwork/parser.py > > +++ patchwork/parser.py > > @@ -16,6 +16,7 @@ import re > > > > from django.contrib.auth.models import User > > from django.db.utils import IntegrityError > > +from django.db import transaction > > from django.utils import six > > > > from patchwork.models import Comment > > @@ -251,16 +252,9 @@ def _find_series_by_references(project, mail): > > for ref in refs: > > try: > > return SeriesReference.objects.get( > > - msgid=ref[:255], series__project=project).series > > + msgid=ref[:255], project=project).series > > except SeriesReference.DoesNotExist: > > continue > > - except SeriesReference.MultipleObjectsReturned: > > - # FIXME: Open bug: this can happen when we're processing > > - # messages in parallel. Pick the first and log. > > - logger.error("Multiple SeriesReferences for %s in project %s!" > > % > > - (ref[:255], project.name)) > > - return SeriesReference.objects.filter( > > - msgid=ref[:255], series__project=project).first().series > > > > > > def _find_series_by_markers(project, mail, author): > > @@ -1029,47 +1023,64 @@ def parse_mail(mail, list_id=None): > > except IntegrityError: > > raise DuplicateMailError(msgid=msgid) > > > > - # if we don't have a series marker, we will never have an existing > > - # series to match against. > > - series = None > > - if n: > > - series = find_series(project, mail, author) > > + for attempt in range(10): # arbitrary retry count > > + try: > > + with transaction.atomic(): > > + # if we don't have a series marker, we will never have > > an > > + # existing series to match against. > > + series = None > > + if n: > > + series = find_series(project, mail, author) > > + else: > > + x = n = 1 > > + > > + # We will create a new series if: > > + # - there is no existing series to assign this patch > > to, or > > + # - there is an existing series, but it already has a > > patch > > + # with this number in it > > + if not series or Patch.objects.filter( > > + series=series, number=x).count(): > > + series = Series.objects.create( > > + project=project, > > + date=date, > > + submitter=author, > > + version=version, > > + total=n) > > + > > + # NOTE(stephenfin) We must save references for > > series. > > + # We do this to handle the case where a later > > patch is > > + # received first. Without storing references, it > > would > > + # not be possible to identify the relationship > > between > > + # patches as the earlier patch does not reference > > the > > + # later one. > > + for ref in refs + [msgid]: > > + ref = ref[:255] > > + # we don't want duplicates > > + try: > > + # we could have a ref to a previous series. > > + # (For example, a series sent in reply to > > + # another series.) That should not create a > > + # series ref for this series, so check for > > the > > + # msg-id only, not the msg-id/series pair. > > + SeriesReference.objects.get( > > + msgid=ref, project=project) > > + except SeriesReference.DoesNotExist: > > + SeriesReference.objects.create( > > + msgid=ref, project=project, > > series=series) > > + break > > + except IntegrityError: > > + # we lost the race so go again > > + logger.info('Conflict while saving series. This is ' > > + 'probably because multiple patches belonging ' > > + 'to the same series have been received at ' > > + 'once. Trying again (attempt %02d/10)', > > + attempt) > > else: > > - x = n = 1 > > - > > - # We will create a new series if: > > - # - there is no existing series to assign this patch to, or > > - # - there is an existing series, but it already has a patch with > > this > > - # number in it > > - if not series or Patch.objects.filter(series=series, > > number=x).count(): > > - series = Series.objects.create( > > - project=project, > > - date=date, > > - submitter=author, > > - version=version, > > - total=n) > > - > > - # NOTE(stephenfin) We must save references for series. We > > - # do this to handle the case where a later patch is > > - # received first. Without storing references, it would not > > - # be possible to identify the relationship between patches > > - # as the earlier patch does not reference the later one. > > - for ref in refs + [msgid]: > > - ref = ref[:255] > > - # we don't want duplicates > > - try: > > - # we could have a ref to a previous series. (For > > - # example, a series sent in reply to another > > - # series.) That should not create a series ref > > - # for this series, so check for the msg-id only, > > - # not the msg-id/series pair. > > - SeriesReference.objects.get(msgid=ref, > > - series__project=project) > > - except SeriesReference.DoesNotExist: > > - SeriesReference.objects.create(series=series, > > msgid=ref) > > - except SeriesReference.MultipleObjectsReturned: > > - logger.error("Multiple SeriesReferences for %s" > > - " in project %s!" % (ref, project.name)) > > + # we failed to save the series so return the series-less patch > > + logger.warning('Failed to save series. Your patch has been ' > > + 'saved but this should not happen. Please ' > > + 'report this as a bug and include logs') > > + return patch > > > > # add to a series if we have found one, and we have a numbered > > # patch. Don't add unnumbered patches (for example diffs sent > > @@ -1107,14 +1118,9 @@ def parse_mail(mail, list_id=None): > > # message > > try: > > series = SeriesReference.objects.get( > > - msgid=msgid, series__project=project).series > > + msgid=msgid, project=project).series > > except SeriesReference.DoesNotExist: > > series = None > > - except SeriesReference.MultipleObjectsReturned: > > - logger.error("Multiple SeriesReferences for %s" > > - " in project %s!" % (msgid, project.name)) > > - series = SeriesReference.objects.filter( > > - msgid=msgid, series__project=project).first().series > > > > if not series: > > series = Series.objects.create( > > @@ -1127,12 +1133,8 @@ def parse_mail(mail, list_id=None): > > # we don't save the in-reply-to or references fields > > # for a cover letter, as they can't refer to the same > > # series > > - try: > > - SeriesReference.objects.get_or_create(series=series, > > - msgid=msgid) > > - except SeriesReference.MultipleObjectsReturned: > > - logger.error("Multiple SeriesReferences for %s" > > - " in project %s!" % (msgid, project.name)) > > + SeriesReference.objects.create( > > + msgid=msgid, project=project, series=series) > > > > try: > > cover_letter = CoverLetter.objects.create( > > diff --git patchwork/tests/test_parser.py patchwork/tests/test_parser.py > > index e5a2fca3..a69c64ba 100644 > > --- patchwork/tests/test_parser.py > > +++ patchwork/tests/test_parser.py > > @@ -361,17 +361,20 @@ class SeriesCorrelationTest(TestCase): > > msgids = [make_msgid()] > > project = create_project() > > series_v1 = create_series(project=project) > > - create_series_reference(msgid=msgids[0], series=series_v1) > > + create_series_reference(msgid=msgids[0], series=series_v1, > > + project=project) > > > > # ...and three patches > > for i in range(3): > > msgids.append(make_msgid()) > > - create_series_reference(msgid=msgids[-1], series=series_v1) > > + create_series_reference(msgid=msgids[-1], series=series_v1, > > + project=project) > > > > # now create a new series with "cover letter" > > msgids.append(make_msgid()) > > series_v2 = create_series(project=project) > > - ref_v2 = create_series_reference(msgid=msgids[-1], > > series=series_v2) > > + ref_v2 = create_series_reference(msgid=msgids[-1], > > series=series_v2, > > + project=project) > > > > # ...and the "first patch" of this new series > > msgid = make_msgid() > > diff --git patchwork/tests/utils.py patchwork/tests/utils.py > > index 577183d0..4ead1781 100644 > > --- patchwork/tests/utils.py > > +++ patchwork/tests/utils.py > > @@ -282,8 +282,12 @@ def create_series(**kwargs): > > > > def create_series_reference(**kwargs): > > """Create 'SeriesReference' object.""" > > + project = kwargs.pop('project', create_project()) > > + series = kwargs.pop('series', create_series(project=project)) > > + > > values = { > > - 'series': create_series() if 'series' not in kwargs else None, > > + 'series': series, > > + 'project': project, > > 'msgid': make_msgid(), > > } > > values.update(**kwargs) > > -- > > 2.21.0 _______________________________________________ Patchwork mailing list Patchwork@lists.ozlabs.org https://lists.ozlabs.org/listinfo/patchwork