On Mon, 2018-07-09 at 18:10 +0200, vkaba...@redhat.com wrote: > From: Veronika Kabatova <vkaba...@redhat.com> > > Solve #113 and #57 GitHub issues, fix up returning tags in the API, > keep track of tag origin to be able to add tags to comments in the API. > > Use relations Tag-Patch and Tag-CoverLetter to avoid duplication of > tags for each patch in series, and use `series` attribute of > SubmissionTag as a notion of a tag which is related to each patch in the > series (because it comes from cover letter or it's comments) > > Signed-off-by: Veronika Kabatova <vkaba...@redhat.com>
To start, apologies if I'm repeating myself on any of the comments below: it's been a long time since I first reviewed this (sorry!). Also, for the next revision of this, this can definitely be broken up on purpose :) Given that tags aren't currently shown in the API, I'd move the API changes into a separate patch. Similarly, docs (though not release notes) aren't needed and can be broken up and moved into a separate patch. Now to the review... > --- > patchwork/api/comment.py | 18 ++++- > patchwork/api/cover.py | 19 ++++- > patchwork/api/filters.py | 68 +++++++++++++++- > patchwork/api/patch.py | 18 ++++- > patchwork/models.py | 173 > ++++++++++++++++------------------------ > patchwork/templatetags/patch.py | 3 +- > patchwork/views/__init__.py | 3 - > patchwork/views/utils.py | 10 ++- > 8 files changed, 191 insertions(+), 121 deletions(-) > > diff --git a/patchwork/api/comment.py b/patchwork/api/comment.py > index 5a5adb1..e03207e 100644 > --- a/patchwork/api/comment.py > +++ b/patchwork/api/comment.py > @@ -26,6 +26,7 @@ from patchwork.api.base import > BaseHyperlinkedModelSerializer > from patchwork.api.base import PatchworkPermission > from patchwork.api.embedded import PersonSerializer > from patchwork.models import Comment > +from patchwork.models import SubmissionTag > > > class CommentListSerializer(BaseHyperlinkedModelSerializer): > @@ -34,6 +35,7 @@ class CommentListSerializer(BaseHyperlinkedModelSerializer): > subject = SerializerMethodField() > headers = SerializerMethodField() > submitter = PersonSerializer(read_only=True) > + tags = SerializerMethodField() > > def get_web_url(self, instance): > request = self.context.get('request') > @@ -43,6 +45,20 @@ class > CommentListSerializer(BaseHyperlinkedModelSerializer): > return email.parser.Parser().parsestr(comment.headers, > True).get('Subject', '') > > + def get_tags(self, instance): > + if not instance.submission.project.use_tags: > + return {} > + > + tags = SubmissionTag.objects.filter( > + comment=instance > + ).values_list('tag__name', 'value') > + > + result = {} > + for name, value in tags: > + result.setdefault(name, []).append(value) > + > + return result This smells of something that should be placed in models.py. Is there any reason we _can't_ do it there, e.g. via a property? > + > def get_headers(self, comment): > headers = {} > > @@ -60,7 +76,7 @@ class CommentListSerializer(BaseHyperlinkedModelSerializer): > class Meta: > model = Comment > fields = ('id', 'web_url', 'msgid', 'date', 'subject', 'submitter', > - 'content', 'headers') > + 'content', 'headers', 'tags') > read_only_fields = fields > versioned_fields = { > '1.1': ('web_url', ), You definitely need changes to 'get_queryset' here. I can help you work on these but tl;dr: look at what we do for 'check_set' in 'patchwork/api/patch.py'. > diff --git a/patchwork/api/cover.py b/patchwork/api/cover.py > index b497fd8..b17b9f7 100644 > --- a/patchwork/api/cover.py > +++ b/patchwork/api/cover.py > @@ -30,6 +30,7 @@ from patchwork.api.embedded import PersonSerializer > from patchwork.api.embedded import ProjectSerializer > from patchwork.api.embedded import SeriesSerializer > from patchwork.models import CoverLetter > +from patchwork.models import SubmissionTag > > > class CoverLetterListSerializer(BaseHyperlinkedModelSerializer): > @@ -40,6 +41,7 @@ class > CoverLetterListSerializer(BaseHyperlinkedModelSerializer): > mbox = SerializerMethodField() > series = SeriesSerializer(many=True, read_only=True) > comments = SerializerMethodField() > + tags = SerializerMethodField() > > def get_web_url(self, instance): > request = self.context.get('request') > @@ -53,13 +55,26 @@ class > CoverLetterListSerializer(BaseHyperlinkedModelSerializer): > return self.context.get('request').build_absolute_uri( > reverse('api-cover-comment-list', kwargs={'pk': cover.id})) > > + def get_tags(self, instance): > + if not instance.project.use_tags: > + return {} > + > + tags = SubmissionTag.objects.filter( > + submission=instance).values_list('tag__name', 'value').distinct() > + > + result = {} > + for name, value in tags: > + result.setdefault(name, []).append(value) > + > + return result > + As above, this should be a property on the model, I imagine. > class Meta: > model = CoverLetter > fields = ('id', 'url', 'web_url', 'project', 'msgid', 'date', 'name', > - 'submitter', 'mbox', 'series', 'comments') > + 'submitter', 'mbox', 'series', 'comments', 'tags') > read_only_fields = fields > versioned_fields = { > - '1.1': ('web_url', 'mbox', 'comments'), > + '1.1': ('web_url', 'mbox', 'comments', 'tags'), This should be '1.2' now. > } > extra_kwargs = { > 'url': {'view_name': 'api-cover-detail'}, Same comments RE: 'get_queryset'. > diff --git a/patchwork/api/filters.py b/patchwork/api/filters.py > index 73353d9..e213181 100644 > --- a/patchwork/api/filters.py > +++ b/patchwork/api/filters.py > @@ -21,9 +21,11 @@ from django.contrib.auth.models import User > from django.core.exceptions import ValidationError > from django.db.models import Q > from django_filters.rest_framework import FilterSet > +from django_filters import Filter > from django_filters import IsoDateTimeFilter > from django_filters import ModelMultipleChoiceFilter > from django.forms import ModelMultipleChoiceField as BaseMultipleChoiceField > +from django.forms.widgets import HiddenInput > from django.forms.widgets import MultipleHiddenInput > > from patchwork.models import Bundle > @@ -35,6 +37,7 @@ from patchwork.models import Person > from patchwork.models import Project > from patchwork.models import Series > from patchwork.models import State > +from patchwork.models import SubmissionTag > > > # custom fields, filters > @@ -136,6 +139,60 @@ class StateFilter(ModelMultipleChoiceFilter): > field_class = StateChoiceField > > > +class TagNameFilter(Filter): > + > + def filter(self, qs, tag_name): > + submissions_series = SubmissionTag.objects.filter( > + tag__name__iexact=tag_name > + ).values_list('submission__id', 'series') > + > + submission_list = [] > + series_list = [] > + for submission, series in submissions_series: > + submission_list.append(submission) > + series_list.append(series) > + > + return qs.filter(Q(id__in=submission_list) | > Q(series__in=series_list)) > + > + > +class TagValueFilter(Filter): > + > + def filter(self, qs, tag_value): > + submissions_series = SubmissionTag.objects.filter( > + value__icontains=tag_value > + ).values_list('submission__id', 'series') > + > + submission_list = [] > + series_list = [] > + for submission, series in submissions_series: > + submission_list.append(submission) > + series_list.append(series) > + > + return qs.filter(Q(id__in=submission_list) | > Q(series__in=series_list)) > + > + > +class TagFilter(Filter): > + > + def filter(self, qs, query): > + try: > + tag_name, tag_value = query.split(',', 1) > + except ValueError: > + raise ValidationError('Query in format `tag=name,value` > expected!') > + > + submissions_series = SubmissionTag.objects.filter( > + tag__name__iexact=tag_name, > + value__icontains=tag_value > + ).values_list('submission__id', 'series') > + > + submission_list = [] > + series_list = [] > + for submission, series in submissions_series: > + submission_list.append(submission) > + series_list.append(series) > + > + return qs.filter(Q(id__in=submission_list) | > Q(series__in=series_list)) > + > + The code for this _looks_ fine, but I do wonder if it's necessary? Would it be possible to support basic wildcarding in the filter instead? Something like this: ?tag=*: step...@that.guru (but with HTTP encoding). We could also make this wildcarded by default, e.g. ?tag=Reviewed-by: Would return all results for this tag. > class UserChoiceField(ModelMultipleChoiceField): > > alternate_lookup = 'username__iexact' > @@ -173,10 +230,14 @@ class CoverLetterFilterSet(TimestampMixin, FilterSet): > series = BaseFilter(queryset=Project.objects.all(), > widget=MultipleHiddenInput) > submitter = PersonFilter(queryset=Person.objects.all()) > + tagname = TagNameFilter(label='Tag name') > + tagvalue = TagValueFilter(widget=HiddenInput) > + tag = TagFilter(widget=HiddenInput) > > class Meta: > model = CoverLetter > - fields = ('project', 'series', 'submitter') > + fields = ('project', 'series', 'submitter', 'tagname', 'tagvalue', > + 'tag') > > > class PatchFilterSet(TimestampMixin, FilterSet): > @@ -189,11 +250,14 @@ class PatchFilterSet(TimestampMixin, FilterSet): > submitter = PersonFilter(queryset=Person.objects.all()) > delegate = UserFilter(queryset=User.objects.all()) > state = StateFilter(queryset=State.objects.all()) > + tagname = TagNameFilter(label='Tag name') > + tagvalue = TagValueFilter(widget=HiddenInput) > + tag = TagFilter(widget=HiddenInput) > > class Meta: > model = Patch > fields = ('project', 'series', 'submitter', 'delegate', > - 'state', 'archived') > + 'state', 'archived', 'tagname', 'tagvalue', 'tag') > > > class CheckFilterSet(TimestampMixin, FilterSet): > diff --git a/patchwork/api/patch.py b/patchwork/api/patch.py > index 9d890eb..cf97c40 100644 > --- a/patchwork/api/patch.py > +++ b/patchwork/api/patch.py > @@ -19,6 +19,7 @@ > > import email.parser > > +from django.db.models import Q > from django.utils.translation import ugettext_lazy as _ > from rest_framework.generics import ListAPIView > from rest_framework.generics import RetrieveUpdateAPIView > @@ -35,6 +36,7 @@ from patchwork.api.embedded import SeriesSerializer > from patchwork.api.embedded import UserSerializer > from patchwork.models import Patch > from patchwork.models import State > +from patchwork.models import SubmissionTag > from patchwork.parser import clean_subject > > > @@ -109,9 +111,18 @@ class > PatchListSerializer(BaseHyperlinkedModelSerializer): > reverse('api-check-list', kwargs={'patch_id': instance.id})) > > def get_tags(self, instance): > - # TODO(stephenfin): Make tags performant, possibly by reworking the > - # model > - return {} > + if not instance.project.use_tags: > + return {} > + > + tags = SubmissionTag.objects.filter( > + Q(submission=instance) | Q(series__in=instance.series.all()) > + ).values_list('tag__name', 'value').distinct() > + > + result = {} > + for name, value in tags: > + result.setdefault(name, []).append(value) > + > + return result > > class Meta: > model = Patch > @@ -160,6 +171,7 @@ class PatchDetailSerializer(PatchListSerializer): > 'headers', 'content', 'diff', 'prefixes') > versioned_fields = PatchListSerializer.Meta.versioned_fields > extra_kwargs = PatchListSerializer.Meta.extra_kwargs > + versioned_fields = PatchListSerializer.Meta.versioned_fields > > > class PatchList(ListAPIView): This is missing the entry in the versioned_fields setting. Also, same comment around 'get_tags'. > diff --git a/patchwork/models.py b/patchwork/models.py > index 6268f5b..ba53278 100644 > --- a/patchwork/models.py > +++ b/patchwork/models.py > @@ -20,8 +20,6 @@ > > from __future__ import absolute_import > > -from collections import Counter > -from collections import OrderedDict > import datetime > import random > import re > @@ -31,6 +29,7 @@ from django.conf import settings > from django.contrib.auth.models import User > from django.core.exceptions import ValidationError > from django.db import models > +from django.db.models import Q > from django.utils.encoding import python_2_unicode_compatible > from django.utils.functional import cached_property > > @@ -252,10 +251,6 @@ class Tag(models.Model): > ' tag\'s count in the patch list view', > default=True) > > - @property > - def attr_name(self): > - return 'tag_%d_count' % self.id > - > def __str__(self): > return self.name > > @@ -263,64 +258,21 @@ class Tag(models.Model): > ordering = ['abbrev'] > > > -class PatchTag(models.Model): > - patch = models.ForeignKey('Patch', on_delete=models.CASCADE) > +class SubmissionTag(models.Model): > + submission = models.ForeignKey('Submission', on_delete=models.CASCADE) > tag = models.ForeignKey('Tag', on_delete=models.CASCADE) > - count = models.IntegerField(default=1) > + value = models.CharField(max_length=255) > + comment = models.ForeignKey('Comment', null=True, > on_delete=models.CASCADE) > + series = models.ForeignKey('Series', null=True, on_delete=models.CASCADE) > > class Meta: > - unique_together = [('patch', 'tag')] > + unique_together = [('submission', 'tag', 'value', 'comment')] > > > def get_default_initial_patch_state(): > return State.objects.get(ordering=0) > > > -class PatchQuerySet(models.query.QuerySet): > - > - def with_tag_counts(self, project=None): > - if project and not project.use_tags: > - return self > - > - # We need the project's use_tags field loaded for Project.tags(). > - # Using prefetch_related means we'll share the one instance of > - # Project, and share the project.tags cache between all patch.project > - # references. > - qs = self.prefetch_related('project') > - select = OrderedDict() > - select_params = [] > - > - # All projects have the same tags, so we're good to go here > - if project: > - tags = project.tags > - else: > - tags = Tag.objects.all() > - > - for tag in tags: > - select[tag.attr_name] = ( > - "coalesce(" > - "(SELECT count FROM patchwork_patchtag" > - " WHERE patchwork_patchtag.patch_id=" > - "patchwork_patch.submission_ptr_id" > - " AND patchwork_patchtag.tag_id=%s), 0)") > - select_params.append(tag.id) > - > - return qs.extra(select=select, select_params=select_params) > - > - > -class PatchManager(models.Manager): > - use_for_related_fields = True > - # NOTE(stephenfin): This is necessary to silence a warning with Django >= > - # 1.10. Remove when 1.10 is the minimum supported version. > - silence_use_for_related_fields_deprecation = True > - > - def get_queryset(self): > - return PatchQuerySet(self.model, using=self.db) > - > - def with_tag_counts(self, project): > - return self.get_queryset().with_tag_counts(project) > - > - > class EmailMixin(models.Model): > """Mixin for models with an email-origin.""" > # email metadata > @@ -334,17 +286,16 @@ class EmailMixin(models.Model): > submitter = models.ForeignKey(Person, on_delete=models.CASCADE) > content = models.TextField(null=True, blank=True) > > - response_re = re.compile( > - r'^(Tested|Reviewed|Acked|Signed-off|Nacked|Reported)-by:.*$', > - re.M | re.I) > + def _extract_tags(self, tags): > + found_tags = {} > > - @property > - def patch_responses(self): > if not self.content: > - return '' > + return found_tags > > - return ''.join([match.group(0) + '\n' for match in > - self.response_re.finditer(self.content)]) > + for tag in tags: > + regex = re.compile(tag.pattern + r'\s*(.*)', re.M | re.I | re.U) > + found_tags[tag] = regex.findall(self.content) > + return found_tags I wonder if we can move this to 'parser.py' now? It doesn't sound like something we'll be doing at runtime so we could move it out of here now. > > def save(self, *args, **kwargs): > # Modifying a submission via admin interface changes '\n' newlines in > @@ -377,6 +328,40 @@ class Submission(FilenameMixin, EmailMixin, > models.Model): > # submission metadata > > name = models.CharField(max_length=255) > + related_tags = models.ManyToManyField(Tag, through=SubmissionTag) Just 'tags'? > + > + def add_tags(self, tag, values, comment=None): > + if hasattr(self, 'patch'): > + series = None > + else: > + series = self.coverletter.series.first() > + > + current_objs = SubmissionTag.objects.filter(submission=self, > + comment=comment, > + tag=tag, > + series=series) > + > + if not values: > + current_objs.delete() > + return > + > + # In case the origin is modified, delete tags that were removed > + current_objs.exclude(value__in=values).delete() > + > + values_to_add = set(values) - set(current_objs.values_list('value', > + > flat=True)) > + SubmissionTag.objects.bulk_create([SubmissionTag( > + submission=self, > + tag=tag, > + value=value, > + comment=comment, > + series=series > + ) for value in values_to_add]) > + > + def refresh_tags(self): > + submission_tags = self._extract_tags(Tag.objects.all()) > + for tag in submission_tags: > + self.add_tags(tag, submission_tags[tag]) Similar comments around moving these functions out of here and into 'parser.py'. I'd _really_ like to have all this stuff in 'parser.py' or 'retag.py'. > > # patchwork metadata > > @@ -426,7 +411,6 @@ class Patch(SeriesMixin, Submission): > diff = models.TextField(null=True, blank=True) > commit_ref = models.CharField(max_length=255, null=True, blank=True) > pull_url = models.CharField(max_length=255, null=True, blank=True) > - tags = models.ManyToManyField(Tag, through=PatchTag) > > # patchwork metadata > > @@ -440,40 +424,6 @@ class Patch(SeriesMixin, Submission): > # patches in a project without needing to do a JOIN. > patch_project = models.ForeignKey(Project, on_delete=models.CASCADE) > > - objects = PatchManager() > - > - @staticmethod > - def extract_tags(content, tags): > - counts = Counter() > - > - for tag in tags: > - regex = re.compile(tag.pattern, re.MULTILINE | re.IGNORECASE) > - counts[tag] = len(regex.findall(content)) > - > - return counts > - > - def _set_tag(self, tag, count): > - if count == 0: > - self.patchtag_set.filter(tag=tag).delete() > - return > - patchtag, _ = PatchTag.objects.get_or_create(patch=self, tag=tag) > - if patchtag.count != count: > - patchtag.count = count > - patchtag.save() > - > - def refresh_tag_counts(self): > - tags = self.project.tags > - counter = Counter() > - > - if self.content: > - counter += self.extract_tags(self.content, tags) > - > - for comment in self.comments.all(): > - counter = counter + self.extract_tags(comment.content, tags) > - > - for tag in tags: > - self._set_tag(tag, counter[tag]) > - > def save(self, *args, **kwargs): > if not hasattr(self, 'state') or not self.state: > self.state = get_default_initial_patch_state() > @@ -482,8 +432,7 @@ class Patch(SeriesMixin, Submission): > self.hash = hash_diff(self.diff) > > super(Patch, self).save(**kwargs) > - > - self.refresh_tag_counts() > + self.refresh_tags() > > def is_editable(self, user): > if not is_authenticated(user): > @@ -495,6 +444,18 @@ class Patch(SeriesMixin, Submission): > return self.project.is_editable(user) > > @property > + def all_tags(self): > + related_tags = SubmissionTag.objects.filter( > + Q(submission=self) | Q(series__in=self.series.all()) > + ).values_list('tag__name', 'value').distinct() > + > + patch_tags = {} > + for tag in self.project.tags: > + patch_tags[tag] = [related_tag[1] for related_tag in related_tags > + if related_tag[0] == tag.name] Hmm, I don't think you want to be doing a list comprehension here as Python is way slower than the DB. You could do: related_tags = related_tags.filter(name__in=self.project.tags) ...or similar. > + return patch_tags Yeah, we should probably be using this in the API. Not sure about performance though. This could need some work. > + > + @property > def combined_check_state(self): > """Return the combined state for all checks. > > @@ -611,13 +572,12 @@ class Comment(EmailMixin, models.Model): > > def save(self, *args, **kwargs): > super(Comment, self).save(*args, **kwargs) > - if hasattr(self.submission, 'patch'): > - self.submission.patch.refresh_tag_counts() > + self.refresh_tags() I'd _personally_ rather we did this in 'parser.py'. There's no reason to calculate this stuff everytime we change anything about the patch (e.g. updating the state). > - def delete(self, *args, **kwargs): > - super(Comment, self).delete(*args, **kwargs) > - if hasattr(self.submission, 'patch'): > - self.submission.patch.refresh_tag_counts() > + def refresh_tags(self): > + comment_tags = self._extract_tags(Tag.objects.all()) > + for tag in comment_tags: > + self.submission.add_tags(tag, comment_tags[tag], comment=self) > > def is_editable(self, user): > return False > @@ -716,6 +676,7 @@ class Series(FilenameMixin, models.Model): > self.name = self._format_name(cover) > > self.save() > + cover.refresh_tags() > > def add_patch(self, patch, number): > """Add a patch to the series.""" > diff --git a/patchwork/templatetags/patch.py b/patchwork/templatetags/patch.py > index 4350e09..f226dc0 100644 > --- a/patchwork/templatetags/patch.py > +++ b/patchwork/templatetags/patch.py > @@ -34,8 +34,9 @@ register = template.Library() > def patch_tags(patch): > counts = [] > titles = [] > + all_tags = patch.all_tags > for tag in [t for t in patch.project.tags if t.show_column]: > - count = getattr(patch, tag.attr_name) > + count = len(all_tags[tag]) > titles.append('%d %s' % (count, tag.name)) > if count == 0: > counts.append("-") > diff --git a/patchwork/views/__init__.py b/patchwork/views/__init__.py > index f8d23a3..8c41df6 100644 > --- a/patchwork/views/__init__.py > +++ b/patchwork/views/__init__.py > @@ -272,9 +272,6 @@ def generic_list(request, project, view, view_args=None, > filter_settings=None, > if patches is None: > patches = Patch.objects.filter(patch_project=project) > > - # annotate with tag counts > - patches = patches.with_tag_counts(project) > - > patches = context['filters'].apply(patches) > if not editable_order: > patches = order.apply(patches) > diff --git a/patchwork/views/utils.py b/patchwork/views/utils.py > index 2357ab8..f1f78c8 100644 > --- a/patchwork/views/utils.py > +++ b/patchwork/views/utils.py > @@ -27,11 +27,13 @@ import email.utils > import re > > from django.conf import settings > +from django.db.models import Q > from django.http import Http404 > from django.utils import six > > from patchwork.models import Comment > from patchwork.models import Patch > +from patchwork.models import SubmissionTag > from patchwork.models import Series > > if settings.ENABLE_REST_API: > @@ -75,9 +77,11 @@ def _submission_to_mbox(submission): > else: > postscript = '' > > - # TODO(stephenfin): Make this use the tags infrastructure > - for comment in Comment.objects.filter(submission=submission): > - body += comment.patch_responses > + for (tagname, value) in SubmissionTag.objects.filter( > + Q(submission=submission) > + | Q(series__in=submission.series.all())).values_list( > + 'tag__name', 'value').distinct(): > + body += '%s: %s\n' % (tagname, value) > > if postscript: > body += '---\n' + postscript + '\n' _______________________________________________ Patchwork mailing list Patchwork@lists.ozlabs.org https://lists.ozlabs.org/listinfo/patchwork