On Tue, 2024-07-30 at 17:05 -0400, Adam Hassick wrote: > Add a new function to parse "Depends-on" tags to the parser. The value > may either be the message ID of a patch or cover letter email already > received by Patchwork, or the web URL of a patch or series. When this > tag is found, the parser will add the series (or the series the patch > belongs to) as a dependency to the series it is creating. > > Signed-off-by: Adam Hassick <ahass...@iol.unh.edu>
Could of comments below but hopefully nothing major. Stephen > --- > patchwork/parser.py | 90 ++++++++++++++++++++++++++++++++++++++++++++- > 1 file changed, 89 insertions(+), 1 deletion(-) > > diff --git a/patchwork/parser.py b/patchwork/parser.py > index 09a53a0..6408c5c 100644 > --- a/patchwork/parser.py > +++ b/patchwork/parser.py > @@ -14,11 +14,13 @@ from email.errors import HeaderParseError > from fnmatch import fnmatch > import logging > import re > +from urllib.parse import urlparse > > from django.contrib.auth.models import User > from django.db.utils import IntegrityError > from django.db import transaction > from django.utils import timezone as tz_utils > +from django.urls import resolve, Resolver404 > > from patchwork.models import Cover > from patchwork.models import CoverComment > @@ -32,7 +34,6 @@ from patchwork.models import Series > from patchwork.models import SeriesReference > from patchwork.models import State > > - > _msgid_re = re.compile(r'<[^>]+>') > _hunk_re = re.compile(r'^\@\@ -\d+(?:,(\d+))? \+\d+(?:,(\d+))? \@\@') > _filename_re = re.compile(r'^(---|\+\+\+) (\S+)') > @@ -1054,6 +1055,90 @@ def parse_pull_request(content): > return None > > > +def find_series_from_url(url): > + """ > + Get a series from either a series or patch URL. > + """ > + parse_result = urlparse(url) > + > + # Resolve the URL path to see if this is a patch or series detail URL. > + try: > + result = resolve(parse_result.path) > + except Resolver404: > + return None nit: We're not great at logging, but this feels like something we could/should log? > + > + # TODO: Use the series detail view here. > + if result.view_name == 'patch-list' and parse_result.query: > + # Parse the query string. > + # This can be replaced with something much friendlier once the > + # series detail view is implemented. > + series_query_param = next( > + filter( > + lambda x: len(x) == 2 and x[0] == 'series', > + map(lambda x: x.split('='), parse_result.query.split('&')), > + ) > + ) Can we use 'urllib.parse.parse_qs' here? [1] [1] https://docs.python.org/3/library/urllib.parse.html#urllib.parse.parse_qs > + > + if not series_query_param: > + return None > + > + series_id = series_query_param[1] > + > + try: > + series_id_num = int(series_id) > + except ValueError: > + return None This is probably loggable also? > + > + return Series.objects.filter(id=series_id_num).first() For other reviewers (and a reminder to myself), this will return None if there are no matches. > + elif result.view_name == 'patch-detail': > + msgid = Patch.decode_msgid(result.kwargs['msgid']) > + patch = Patch.objects.filter(msgid=msgid).first() > + > + if patch: > + return patch.series nit: you use the walrus operator below so... if patch := Patch.objects.filter(msgid=msgid).first(): return patch.series > + > + > +def find_series_from_msgid(msgid): > + """ > + Get a series from > + """ > + if patch := Patch.objects.filter(msgid=msgid).first(): > + return patch.series > + > + if cover := Cover.objects.filter(msgid=msgid).first(): > + return cover.series > + > + > +def parse_depends_on(content): > + """Parses any dependency hints in the patch or series content.""" > + dependencies = [] > + > + # Discover dependencies given as URLs. > + dependencies.extend( > + series > + for url in re.findall( > + r'^Depends-on: (http[s]?:\/\/[\w\d\-.\/=&@:%?_\+()]+)\s*$', > + content, > + flags=re.MULTILINE | re.IGNORECASE, > + ) > + if (series := find_series_from_url(url)) is not None > + ) This is clever, but a for loop would likely be a little easier to read. Any chance you could rework this slightly? Ditto for the below. > + > + # Discover dependencies given as message IDs. > + dependencies.extend( > + series > + for msgid in re.findall( > + r'^Depends-on: (<[^>]+>)\s*$', > + content, > + flags=re.MULTILINE | re.IGNORECASE, > + ) > + if (series := find_series_from_msgid(msgid)) is not None > + ) > + > + # Return list of series objects to depend on. > + return dependencies > + > + > def find_state(mail): > """Return the state with the given name or the default.""" > state_name = clean_header(mail.get('X-Patchwork-State', '')) > @@ -1308,6 +1393,8 @@ def parse_mail(mail, list_id=None): > # always have a series > series.add_patch(patch, x) > > + series.add_dependencies(parse_depends_on(message)) > + > return patch > elif x == 0: # (potential) cover letters > # if refs are empty, it's implicitly a cover letter. If not, > @@ -1374,6 +1461,7 @@ def parse_mail(mail, list_id=None): > logger.debug('Cover letter saved') > > series.add_cover_letter(cover_letter) > + series.add_dependencies(parse_depends_on(message)) > > return cover_letter > _______________________________________________ Patchwork mailing list Patchwork@lists.ozlabs.org https://lists.ozlabs.org/listinfo/patchwork