[ https://issues.apache.org/jira/browse/ARROW-8456?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel ]
Krisztian Szucs updated ARROW-8456: ----------------------------------- Issue Type: Improvement (was: Task) > [Release] Add python script to help curating JIRA > ------------------------------------------------- > > Key: ARROW-8456 > URL: https://issues.apache.org/jira/browse/ARROW-8456 > Project: Apache Arrow > Issue Type: Improvement > Components: Developer Tools > Reporter: Krisztian Szucs > Priority: Major > Fix For: 1.0.0 > > > The following script produces reports like > https://gist.github.com/kszucs/9857ef69c92a230ce5a5068551b83ed8 > {code:python} > from jira import JIRA > import warnings > import pygit2 > import pandas as pd > from io import StringIO > class Patch: > > def __init__(self, commit): > self.commit = commit > self.issue_key, self.msg = self._parse(commit.message) > > def _parse(self, message): > first_line = message.splitlines()[0] > > m = re.match("(?P<ticket>((ARROW|PARQUET)\-\d+)):?(?P<msg>.*)", > first_line) > if m is None: > return None, '' > values = m.groupdict() > return values['ticket'], values['msg'] > > @property > def shortmessage(self): > if not self.msg: > return self.commit.message.splitlines()[0] > else: > return self.msg > @property > def sha(self): > return self.commit.id > > @property > def issue_url(self): > return > 'https://issues.apache.org/jira/browse/{}'.format(self.issue_key) > > @property > def commit_url(self): > return 'https://github.com/apache/arrow/commit/{}'.format(self.sha) > > def to_markdown(self): > if self.issue_key is None: > return "[{}]({})\n".format( > self.shortmessage, > self.commit_url > ) > else: > return "[{}]({}): [{}]({})\n".format( > self.issue_key, > self.issue_url, > self.shortmessage, > self.commit_url > ) > > > JIRA_SEARCH_LIMIT = 10000 > # JIRA_SEARCH_LIMIT = 50 > class Release: > """Release object for querying issues and commits > > Usage: > jira = JIRA( > {'server': 'https://issues.apache.org/jira'}, > basic_auth=(user, password) > ) > repo = pygit2.Repository('path/to/arrow/repo') > > release = Release(jira, repo, '0.15.1', '0.15.0') > # show the commits in application order > for commit in release.commits(): > print(commit.oid) > # cherry-pick the patches to a branch > release.apply_patches_to('a-branch') > """ > > def __init__(self, jira, repo, version, previous_version): > self.jira = jira > self.repo = repo > self.version = version > self.previous_version = previous_version > self._issues = None > self._patches = None > > def _tag(self, version): > return self.repo.revparse_single(f'refs/tags/apache-arrow-{version}') > > def issues(self): > # FIXME(kszucs): paginate instead of maxresults > if self._issues is None: > query = f'project=ARROW AND fixVersion={self.version}' > self._issues = self.jira.search_issues(query, > maxResults=JIRA_SEARCH_LIMIT) > return self._issues > > def patches(self): > """Commits belonging to release applied on master branch > > The returned commits' order corresponds to the output of > git log. > """ > if self._patches is None: > previous_tag = self._tag(self.previous_version) > master = self.repo.branches['master'] > ordering = pygit2.GIT_SORT_TOPOLOGICAL | pygit2.GIT_SORT_REVERSE > walker = self.repo.walk(master.target, ordering) > walker.hide(previous_tag.oid) > self._patches = list(map(Patch, walker)) > return self._patches > > def curate(self): > issues = self.issues() > patches = self.patches() > issue_keys = {issue.key for issue in self.issues()} > > within, outside, nojira = [], [], [] > for p in patches: > if p.issue_key is None: > nojira.append(p) > elif p.issue_key in issue_keys: > within.append(p) > issue_keys.remove(p.issue_key) > else: > outside.append(p) > > # remaining jira tickets > nopatch = list(issue_keys) > > return within, outside, nojira, nopatch > def curation_report(self): > out = StringIO() > > out.write('Total number of JIRA tickets assigned to version {}: {}\n' > .format(self.version, len(self.issues()))) > out.write('\n') > out.write('Total number of applied patches since {}: {}\n' > .format(self.previous_version, len(self.patches()))) > > out.write('\n\n') > within, outside, nojira, nopatch = self.curate() > > out.write('Patches with assigned issue in {}:\n'.format(self.version)) > for p in within: > out.write("- {}".format(p.to_markdown())) > > out.write('\n\n') > > out.write('Patches with assigned issue outside of > {}:\n'.format(self.version)) > for p in outside: > out.write("- {}".format(p.to_markdown())) > > out.write('\n\n') > > out.write('Patches without assigned issue:\n') > for p in nojira: > out.write("- {}".format(p.to_markdown())) > > out.write('\n\n') > > out.write('JIRAs in {} without assigned > patch:\n'.format(self.version)) > for issue_key in nopatch: > url = 'https://issues.apache.org/jira/browse/{}'.format(issue_key) > out.write("- [{}]({})\n".format(issue_key, url)) > > return out.getvalue() > > def apply_patches_to(self, branch_name): > previous_tag = self._tag(self.previous_version) > branch = repo.create_branch(branch_name, previous_tag.get_object()) > try: > head = branch.target > for commit in self.patches(): > base = repo.merge_base(commit.oid, head) > parent_tree = commit.parents[0].tree > index = repo.merge_trees(parent_tree, head, commit.oid) > tree_id = index.write_tree(repo) > head = repo.create_commit( > branch.name, > commit.author, > commit.committer, > commit.message, > tree_id, > [head] > ) > except pygit2.GitError: > repo.branches[branch_name].delete() > raise > import os > import pygit2 > from jira import JIRA > jira = JIRA( > {'server': 'https://issues.apache.org/jira'}, > basic_auth=( > os.environ.get('APACHE_JIRA_USER'), > os.environ.get('APACHE_JIRA_PASSWORD') > ) > ) > repo = pygit2.Repository('.') > release = Release(jira, repo, version='0.17.0', previous_version='0.16.0') > report = release.curation_report() > {code} -- This message was sent by Atlassian Jira (v8.3.4#803005)