Repository: incubator-hivemall Updated Branches: refs/heads/master 7ecc8df71 -> b814d9661
Close #119: [HIVEMALL-148] Add a script for merging GitHub PR Project: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/commit/b814d966 Tree: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/tree/b814d966 Diff: http://git-wip-us.apache.org/repos/asf/incubator-hivemall/diff/b814d966 Branch: refs/heads/master Commit: b814d9661ebb7145e32357caa7f04ea9131c85f2 Parents: 7ecc8df Author: Makoto Yui <[email protected]> Authored: Wed Dec 27 16:43:23 2017 +0900 Committer: Makoto Yui <[email protected]> Committed: Wed Dec 27 16:43:23 2017 +0900 ---------------------------------------------------------------------- bin/merge_pr.py | 488 +++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 488 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hivemall/blob/b814d966/bin/merge_pr.py ---------------------------------------------------------------------- diff --git a/bin/merge_pr.py b/bin/merge_pr.py new file mode 100755 index 0000000..ad5916c --- /dev/null +++ b/bin/merge_pr.py @@ -0,0 +1,488 @@ +#!/usr/bin/env python +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# + +# Utility for creating well-formed pull request merges and pushing them to Apache +# Hivemall. This script is a modified version of the ones created by the Spark project +# (https://github.com/apache/spark/blob/master/dev/merge_spark_pr.py) and Kafka project +# (https://github.com/apache/kafka/blob/trunk/kafka-merge-pr.py). +# usage: ./merge_pr.py (see config env vars below) +# +# This utility assumes you already have a local git folder and that you +# have added remotes corresponding to both (i) the github apache mirror +# and (ii) the apache git repo. + +import json +import os +import re +import subprocess +import sys +from urllib.request import Request, urlopen +from urllib.error import HTTPError + +try: + import jira.client + JIRA_IMPORTED = True +except ImportError: + JIRA_IMPORTED = False + +PROJECT_NAME = 'hivemall' +PROJECT_NAME_CAPITALIZED = PROJECT_NAME.upper() +INCUBATOR = True + +# git remote add apache https://git-wip-us.apache.org/repos/asf/incubator-hivemall.git +# git remote add apache-github https://github.com/apache/incubator-hivemall.git + +# Location of your project's git development area +REPO_HOME = os.environ.get('%s_HOME' % PROJECT_NAME_CAPITALIZED, os.getcwd()) +# Remote name which points to the Gihub site +PR_REMOTE_NAME = os.environ.get('PR_REMOTE_NAME', 'apache-github') +# Remote name which points to Apache git +PUSH_REMOTE_NAME = os.environ.get('PUSH_REMOTE_NAME', 'apache') +# ASF JIRA username +JIRA_USERNAME = os.environ.get('JIRA_USERNAME', '') +# ASF JIRA password +JIRA_PASSWORD = os.environ.get('JIRA_PASSWORD', '') +# OAuth key used for issuing requests against the GitHub API. If this is not defined, then requests +# will be unauthenticated. You should only need to configure this if you find yourself regularly +# exceeding your IP's unauthenticated request rate limit. You can create an OAuth key at +# https://github.com/settings/tokens. This script only requires the "public_repo" scope. +GITHUB_OAUTH_KEY = os.environ.get('GITHUB_OAUTH_KEY') + +REPO_NAME = ('incubator-%s' % PROJECT_NAME) if INCUBATOR else PROJECT_NAME +GITHUB_BASE = 'https://github.com/apache/%s/pull' % REPO_NAME +GITHUB_API_BASE = 'https://api.github.com/repos/apache/%s' % REPO_NAME +JIRA_BASE = 'https://issues.apache.org/jira/browse' +JIRA_API_BASE = 'https://issues.apache.org/jira' +# Prefix added to temporary branches +BRANCH_PREFIX = "PR_TOOL" + +RELEASE_BRANCH_PREFIX = 'dev/' # tmp + + +def get_json(url): + try: + request = Request(url) + if GITHUB_OAUTH_KEY: + request.add_header('Authorization', 'token %s' % GITHUB_OAUTH_KEY) + return json.loads(urlopen(request).read().decode('utf-8')) + except HTTPError as e: + if 'X-RateLimit-Remaining' in e.headers and e.headers['X-RateLimit-Remaining'] == '0': + print('Exceeded the GitHub API rate limit; see the instructions in ' + + 'merge_pr.py to configure an OAuth token for making authenticated ' + + 'GitHub requests.') + else: + print('Unable to fetch URL, exiting: %s' % url) + sys.exit(-1) + + +def fail(msg): + print(msg) + clean_up() + sys.exit(-1) + + +def run_cmd(cmd): + print(cmd) + if isinstance(cmd, list): + return subprocess.check_output(cmd).decode('utf-8') + else: + return subprocess.check_output(cmd.split(' ')).decode('utf-8') + + +def continue_maybe(prompt): + result = input('\n%s (y/n): ' % prompt) + if result.lower() != 'y': + fail('Okay, exiting') + + +def clean_up(): + print('Restoring head pointer to %s' % original_head) + run_cmd('git checkout %s' % original_head) + + branches = run_cmd('git branch').replace(' ', '').split('\n') + + for branch in filter(lambda x: x.startswith(BRANCH_PREFIX), branches): + print('Deleting local branch %s' % branch) + run_cmd('git branch -D %s' % branch) + + +# merge the requested PR and return the merge hash +def merge_pr(pr_num, target_ref, title, body, pr_repo_desc): + pr_branch_name = '%s_MERGE_PR_%s' % (BRANCH_PREFIX, pr_num) + target_branch_name = '%s_MERGE_PR_%s_%s' % (BRANCH_PREFIX, pr_num, target_ref.upper()) + run_cmd('git fetch %s pull/%s/head:%s' % (PR_REMOTE_NAME, pr_num, pr_branch_name)) + run_cmd('git fetch %s %s:%s' % (PUSH_REMOTE_NAME, target_ref, target_branch_name)) + run_cmd('git checkout %s' % target_branch_name) + + had_conflicts = False + try: + run_cmd(['git', 'merge', pr_branch_name, '--squash']) + except Exception as e: + msg = 'Error merging: %s\nWould you like to manually fix-up this merge?' % e + continue_maybe(msg) + msg = "Okay, please fix any conflicts and 'git add' conflicting files... Finished?" + continue_maybe(msg) + had_conflicts = True + + commit_authors = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name, + '--pretty=format:%an <%ae>']).split('\n') + distinct_authors = sorted(set(commit_authors), + key=lambda x: commit_authors.count(x), reverse=True) + primary_author = input( + 'Enter primary author in the format of "name <email>" [%s]: ' % + distinct_authors[0]) + if primary_author == '': + primary_author = distinct_authors[0] + + commits = run_cmd(['git', 'log', 'HEAD..%s' % pr_branch_name, + '--pretty=format:%h [%an] %s']).split('\n\n') + + merge_message_flags = [] + + merge_message_flags += ['-m', title] + if body is not None: + # We remove @ symbols from the body to avoid triggering e-mails + # to people every time someone creates a public fork of the project. + merge_message_flags += ['-m', body.replace('@', '')] + + authors = '\n'.join(['Author: %s' % a for a in distinct_authors]) + + merge_message_flags += ['-m', authors] + + if had_conflicts: + committer_name = run_cmd('git config --get user.name').strip() + committer_email = run_cmd('git config --get user.email').strip() + message = 'This patch had conflicts when merged, resolved by\nCommitter: %s <%s>' % ( + committer_name, committer_email) + merge_message_flags += ['-m', message] + + # The string "Closes #%s" string is required for GitHub to correctly close the PR + merge_message_flags += ['-m', 'Closes #%s from %s.' % (pr_num, pr_repo_desc)] + + run_cmd(['git', 'commit', '--author="%s"' % primary_author] + merge_message_flags) + + continue_maybe('Merge complete (local ref %s). Push to %s?' % ( + target_branch_name, PUSH_REMOTE_NAME)) + + try: + run_cmd('git push %s %s:%s' % (PUSH_REMOTE_NAME, target_branch_name, target_ref)) + except Exception as e: + clean_up() + fail('Exception while pushing: %s' % e) + + merge_hash = run_cmd('git rev-parse %s' % target_branch_name)[:8] + clean_up() + print('Pull request #%s merged!' % pr_num) + print('Merge hash: %s' % merge_hash) + return merge_hash + + +def cherry_pick(pr_num, merge_hash, default_branch): + pick_ref = input('Enter a branch name [%s]: ' % default_branch) + if pick_ref == '': + pick_ref = default_branch + + pick_branch_name = '%s_PICK_PR_%s_%s' % (BRANCH_PREFIX, pr_num, pick_ref.upper()) + + run_cmd('git fetch %s %s:%s' % (PUSH_REMOTE_NAME, pick_ref, pick_branch_name)) + run_cmd('git checkout %s' % pick_branch_name) + + try: + run_cmd('git cherry-pick -sx %s' % merge_hash) + except Exception as e: + msg = 'Error cherry-picking: %s\nWould you like to manually fix-up this merge?' % e + continue_maybe(msg) + msg = 'Okay, please fix any conflicts and finish the cherry-pick. Finished?' + continue_maybe(msg) + + continue_maybe('Pick complete (local ref %s). Push to %s?' % ( + pick_branch_name, PUSH_REMOTE_NAME)) + + try: + run_cmd('git push %s %s:%s' % (PUSH_REMOTE_NAME, pick_branch_name, pick_ref)) + except Exception as e: + clean_up() + fail('Exception while pushing: %s' % e) + + pick_hash = run_cmd('git rev-parse %s' % pick_branch_name)[:8] + clean_up() + + print('Pull request #%s picked into %s!' % (pr_num, pick_ref)) + print('Pick hash: %s' % pick_hash) + return pick_ref + + +def fix_version_from_branch(branch, versions): + # Note: Assumes this is a sorted (newest->oldest) list of un-released versions + if branch == 'master': + return versions[0] + else: + branch_ver = branch.replace('branch-', '') + return filter(lambda x: x.name.startswith(branch_ver), versions)[-1] + + +def resolve_jira_issue(merge_branches, comment, default_jira_id=''): + asf_jira = jira.client.JIRA({'server': JIRA_API_BASE}, + basic_auth=(JIRA_USERNAME, JIRA_PASSWORD)) + + jira_id = input('Enter a JIRA id [%s]: ' % default_jira_id) + if jira_id == '': + jira_id = default_jira_id + + try: + issue = asf_jira.issue(jira_id) + except Exception as e: + fail('ASF JIRA could not find %s\n%s' % (jira_id, e)) + + cur_status = issue.fields.status.name + cur_summary = issue.fields.summary + cur_assignee = issue.fields.assignee + if cur_assignee is None: + cur_assignee = 'NOT ASSIGNED!!!' + else: + cur_assignee = cur_assignee.displayName + + if cur_status == 'Resolved' or cur_status == 'Closed': + fail("JIRA issue %s already has status '%s'" % (jira_id, cur_status)) + print('=== JIRA %s ===' % jira_id) + print('summary\t\t%s\nassignee\t%s\nstatus\t\t%s\nurl\t\t%s/%s\n' % + (cur_summary, cur_assignee, cur_status, JIRA_BASE, jira_id)) + + versions = asf_jira.project_versions(PROJECT_NAME_CAPITALIZED) + versions = sorted(versions, key=lambda x: x.name, reverse=True) + versions = filter(lambda x: x.raw['released'] is False, versions) + # Consider only x.y.z versions + versions = filter(lambda x: re.match('\d+\.\d+\.\d+', x.name), versions) + + default_fix_versions = map(lambda x: fix_version_from_branch(x, versions).name, merge_branches) + for v in default_fix_versions: + # Handles the case where we have forked a release branch but not yet made the release. + # In this case, if the PR is committed to the master branch and the release branch, we + # only consider the release branch to be the fix version. E.g. it is not valid to have + # both 1.1.0 and 1.0.0 as fix versions. + (major, minor, patch) = v.split('.') + if patch == '0': + previous = '%s.%s.%s' % (major, int(minor) - 1, 0) + if previous in default_fix_versions: + default_fix_versions = filter(lambda x: x != v, default_fix_versions) + default_fix_versions = ','.join(default_fix_versions) + + fix_versions = input('Enter comma-separated fix version(s) [%s]: ' % default_fix_versions) + if fix_versions == '': + fix_versions = default_fix_versions + fix_versions = fix_versions.replace(' ', '').split(',') + + def get_version_json(version_str): + return filter(lambda v: v.name == version_str, versions)[0].raw + + jira_fix_versions = map(lambda v: get_version_json(v), fix_versions) + + resolve = filter(lambda a: a['name'] == 'Resolve Issue', asf_jira.transitions(jira_id))[0] + resolution = filter(lambda r: r.raw['name'] == 'Fixed', asf_jira.resolutions())[0] + asf_jira.transition_issue( + jira_id, resolve['id'], fixVersions=jira_fix_versions, + comment=comment, resolution={'id': resolution.raw['id']}) + + print('Successfully resolved %s with fixVersions=%s!' % (jira_id, fix_versions)) + + +def resolve_jira_issues(title, merge_branches, comment): + jira_ids = re.findall('%s-[0-9]{4,5}' % PROJECT_NAME_CAPITALIZED, title) + + if len(jira_ids) == 0: + resolve_jira_issue(merge_branches, comment) + for jira_id in jira_ids: + resolve_jira_issue(merge_branches, comment, jira_id) + + +def standardize_jira_ref(text): + """ + Standardize the [PROJECT_NAME_CAPITALIZED-XXXXX] [MODULE] prefix + Converts "[PROJECT_NAME_CAPITALIZED_XXX][module] Issue", "[Module] PROJECT_NAME_CAPITALIZED-XXX. Issue" or "PROJECT_NAME_CAPITALIZED XXX [MODULE]: Issue" to + "[PROJECT_NAME_CAPITALIZED-XXX][MODULE] Issue" + >>> standardize_jira_ref('[%s-0000] [MODULE] Issue' % PROJECT_NAME_CAPITALIZED) == '[%s-0000][MODULE] Issue' % PROJECT_NAME_CAPITALIZED + True + >>> standardize_jira_ref('[%s-0000][Module][WIP]: Issue' % PROJECT_NAME_CAPITALIZED) == '[%s-0000][MODULE][WIP] Issue' % PROJECT_NAME_CAPITALIZED + True + >>> standardize_jira_ref('[Module] %s 0000: Issue' % PROJECT_NAME_CAPITALIZED) == '[%s-0000][MODULE] Issue' % PROJECT_NAME_CAPITALIZED + True + >>> standardize_jira_ref('[%s-0000] Issue' % PROJECT_NAME_CAPITALIZED) == '[%s-0000] Issue' % PROJECT_NAME_CAPITALIZED + True + >>> standardize_jira_ref('%s-0000 Issue' % PROJECT_NAME_CAPITALIZED) == '[%s-0000] Issue' % PROJECT_NAME_CAPITALIZED + True + >>> standardize_jira_ref('[WIP] [%s-0000] Issue' % PROJECT_NAME_CAPITALIZED) == '[%s-0000][WIP] Issue' % PROJECT_NAME_CAPITALIZED + True + >>> standardize_jira_ref('%s-0000. Issue' % PROJECT_NAME_CAPITALIZED) == '[%s-0000] Issue' % PROJECT_NAME_CAPITALIZED + True + >>> standardize_jira_ref( + ... '[%s-0000][%s-1111][%s-2222][MODULE] Issue' % ((PROJECT_NAME_CAPITALIZED,) * 3)) == '[%s-0000][%s-1111][%s-2222][MODULE] Issue' % ((PROJECT_NAME_CAPITALIZED,) * 3) + True + >>> standardize_jira_ref('Issue') == 'Issue' + True + """ + jira_refs = [] + components = [] + + # If the string is compliant, no need to process any further + if (re.search('^\[%s-[0-9]{3,6}\](\[[A-Z0-9_\s,]+\] )+\S+' % PROJECT_NAME_CAPITALIZED, text)): + return text + + # Extract JIRA ref(s): + pattern = re.compile('(%s[-\s]*[0-9]{3,6})+' % PROJECT_NAME_CAPITALIZED, re.IGNORECASE) + for ref in pattern.findall(text): + # Add brackets, replace spaces with a dash, & convert to uppercase + jira_refs.append('[' + re.sub(r'\s+', '-', ref.upper()) + ']') + text = text.replace(ref, '') + + # Extract spark component(s): + # Look for alphanumeric chars, spaces, dashes, periods, and/or commas + pattern = re.compile(r'(\[[\w\s,-\.]+\])', re.IGNORECASE) + for component in pattern.findall(text): + components.append(component.upper()) + text = text.replace(component, '') + + # Cleanup any remaining symbols: + pattern = re.compile(r'^\W+(.*)', re.IGNORECASE) + if (pattern.search(text) is not None): + text = pattern.search(text).groups()[0] + + # Assemble full text (JIRA ref(s), module(s), remaining text) + clean_text = ''.join(jira_refs).strip() + ''.join(components).strip() + ' ' + text.strip() + + # Replace multiple spaces with a single space, e.g. if no jira refs and/or components were + # included + clean_text = re.sub(r'\s+', ' ', clean_text.strip()) + + return clean_text + + +def get_current_ref(): + ref = run_cmd('git rev-parse --abbrev-ref HEAD').strip() + if ref == 'HEAD': + # The current ref is a detached HEAD, so grab its SHA. + return run_cmd('git rev-parse HEAD').strip() + else: + return ref + + +def main(): + global original_head + + os.chdir(REPO_HOME) + original_head = get_current_ref() + + branches = get_json('%s/branches' % GITHUB_API_BASE) + branch_names = filter(lambda x: x.startswith(RELEASE_BRANCH_PREFIX), [x['name'] for x in branches]) + # Assumes branch names can be sorted lexicographically + latest_branch = sorted(branch_names, reverse=True)[0] + + pr_num = input('Which pull request would you like to merge? (e.g. 34): ') + pr = get_json('%s/pulls/%s' % (GITHUB_API_BASE, pr_num)) + pr_events = get_json('%s/issues/%s/events' % (GITHUB_API_BASE, pr_num)) + + url = pr['url'] + + # Decide whether to use the modified title or not + modified_title = standardize_jira_ref(pr['title']) + if modified_title != pr['title']: + print("I've re-written the title as follows to match the standard format:") + print('Original: %s' % pr['title']) + print("Modified: %s" % modified_title) + result = input('Would you like to use the modified title? (y/n): ') + if result.lower() == 'y': + title = modified_title + print('Using modified title:') + else: + title = pr['title'] + print('Using original title:') + print(title) + else: + title = pr['title'] + + body = pr['body'] + target_ref = pr['base']['ref'] + user_login = pr['user']['login'] + base_ref = pr['head']['ref'] + pr_repo_desc = '%s/%s' % (user_login, base_ref) + + # Merged pull requests don't appear as merged in the GitHub API; + # Instead, they're closed by asfgit. + merge_commits = \ + [e for e in pr_events if e['actor']['login'] == 'asfgit' and e['event'] == 'closed'] + + if merge_commits: + merge_hash = merge_commits[0]['commit_id'] + message = get_json('%s/commits/%s' % (GITHUB_API_BASE, merge_hash))['commit']['message'] + + print('Pull request %s has already been merged, assuming you want to backport' % pr_num) + commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', '--verify', + '%s^{commit}' % merge_hash]).strip() != '' + if not commit_is_downloaded: + fail("Couldn't find any merge commit for #%s, you may need to update HEAD." % pr_num) + + print('Found commit %s:\n%s' % (merge_hash, message)) + cherry_pick(pr_num, merge_hash, latest_branch) + sys.exit(0) + + if not bool(pr['mergeable']): + msg = 'Pull request %s is not mergeable in its current form.\n' % pr_num + \ + 'Continue? (experts only!)' + continue_maybe(msg) + + print('\n=== Pull Request #%s ===' % pr_num) + print('title\t%s\nsource\t%s\ntarget\t%s\nurl\t%s' % + (title, pr_repo_desc, target_ref, url)) + continue_maybe('Proceed with merging pull request #%s?' % pr_num) + + merged_refs = [target_ref] + + merge_hash = merge_pr(pr_num, target_ref, title, body, pr_repo_desc) + + pick_prompt = 'Would you like to pick %s into another branch?' % merge_hash + while input('\n%s (y/n): ' % pick_prompt).lower() == 'y': + merged_refs = merged_refs + [cherry_pick(pr_num, merge_hash, latest_branch)] + + if JIRA_IMPORTED: + if JIRA_USERNAME and JIRA_PASSWORD: + continue_maybe('Would you like to update an associated JIRA?') + jira_comment = 'Issue resolved by pull request %s\n[%s/%s]' % \ + (pr_num, GITHUB_BASE, pr_num) + resolve_jira_issues(title, merged_refs, jira_comment) + else: + print('JIRA_USERNAME and JIRA_PASSWORD not set') + print('Exiting without trying to close the associated JIRA.') + else: + print("Could not find jira-python library. Run 'sudo pip install jira' to install.") + print('Exiting without trying to close the associated JIRA.') + + +if __name__ == '__main__': + import doctest + (failure_count, test_count) = doctest.testmod() + if failure_count: + exit(-1) + if sys.version_info < (3,): + print('This script requires you to use Python 3') + exit(-1) + try: + main() + except Exception: + clean_up() + raise
