Github user pwendell commented on a diff in the pull request:
https://github.com/apache/spark/pull/5149#discussion_r28713910
--- Diff: dev/merge_spark_pr.py ---
@@ -286,68 +280,149 @@ def resolve_jira_issues(title, merge_branches,
comment):
resolve_jira_issue(merge_branches, comment, jira_id)
-branches = get_json("%s/branches" % GITHUB_API_BASE)
-branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x
in branches])
-# Assumes branch names can be sorted lexicographically
-latest_branch = sorted(branch_names, reverse=True)[0]
-
-pr_num = raw_input("Which pull request would you like to merge? (e.g. 34):
")
-pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
-pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
-
-url = pr["url"]
-title = pr["title"]
-body = pr["body"]
-target_ref = pr["base"]["ref"]
-user_login = pr["user"]["login"]
-base_ref = pr["head"]["ref"]
-pr_repo_desc = "%s/%s" % (user_login, base_ref)
-
-# Merged pull requests don't appear as merged in the GitHub API;
-# Instead, they're closed by asfgit.
-merge_commits = \
- [e for e in pr_events if e["actor"]["login"] == "asfgit" and
e["event"] == "closed"]
-
-if merge_commits:
- merge_hash = merge_commits[0]["commit_id"]
- message = get_json("%s/commits/%s" % (GITHUB_API_BASE,
merge_hash))["commit"]["message"]
-
- print "Pull request %s has already been merged, assuming you want to
backport" % pr_num
- commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet',
'--verify',
+def standardize_jira_ref(text):
+ """
+ Standardize the [SPARK-XXXXX] [MODULE] prefix
+ Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or
"SPARK XXX [MLLIB]: Issue" to "[SPARK-XXX] [MLLIB] Issue"
+
+ >>> standardize_jira_ref("[SPARK-5821] [SQL] ParquetRelation2 CTAS
should check if delete is successful")
+ '[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is
successful'
+ >>> standardize_jira_ref("[SPARK-4123][Project Infra][WIP]: Show new
dependencies added in pull requests")
+ '[SPARK-4123] [PROJECT INFRA] [WIP] Show new dependencies added in
pull requests'
+ >>> standardize_jira_ref("[MLlib] Spark 5954: Top by key")
+ '[SPARK-5954] [MLLIB] Top by key'
+ >>> standardize_jira_ref("[SPARK-979] a LRU scheduler for load
balancing in TaskSchedulerImpl")
+ '[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl'
+ >>> standardize_jira_ref("SPARK-1094 Support MiMa for reporting binary
compatibility accross versions.")
+ '[SPARK-1094] Support MiMa for reporting binary compatibility accross
versions.'
+ >>> standardize_jira_ref("[WIP] [SPARK-1146] Vagrant support for
Spark")
+ '[SPARK-1146] [WIP] Vagrant support for Spark'
+ >>> standardize_jira_ref("SPARK-1032. If Yarn app fails before
registering, app master stays aroun...")
+ '[SPARK-1032] If Yarn app fails before registering, app master stays
aroun...'
+ >>> standardize_jira_ref("[SPARK-6250][SPARK-6146][SPARK-5911][SQL]
Types are now reserved words in DDL parser.")
+ '[SPARK-6250] [SPARK-6146] [SPARK-5911] [SQL] Types are now reserved
words in DDL parser.'
+ >>> standardize_jira_ref("Additional information for users building
from source code")
+ 'Additional information for users building from source code'
+ """
+ jira_refs = []
+ components = []
+
+ # If the string is compliant, no need to process any further
+ if (re.search(r'^\[SPARK-[0-9]{3,6}\] (\[[A-Z0-9_\s,]+\] )+\S+',
text)):
+ return text
+
+ # Extract JIRA ref(s):
+ pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,6})+', re.IGNORECASE)
+ for ref in pattern.findall(text):
+ # Add brackets, replace spaces with a dash, & convert to uppercase
+ jira_refs.append('[' + re.sub(r'\s+', '-', ref.upper()) + ']')
+ text = text.replace(ref, '')
+
+ # Extract spark component(s):
+ # Look for alphanumeric chars, spaces, dashes, periods, and/or commas
+ pattern = re.compile(r'(\[[\w\s,-\.]+\])', re.IGNORECASE)
+ for component in pattern.findall(text):
+ components.append(component.upper())
+ text = text.replace(component, '')
+
+ # Cleanup any remaining symbols:
+ pattern = re.compile(r'^\W+(.*)', re.IGNORECASE)
+ if (pattern.search(text) is not None):
+ text = pattern.search(text).groups()[0]
+
+ # Assemble full text (JIRA ref(s), module(s), remaining text)
+ clean_text = ' '.join(jira_refs).strip() + " " + '
'.join(components).strip() + " " + text.strip()
+
+ # Replace multiple spaces with a single space, e.g. if no jira refs
and/or components were included
+ clean_text = re.sub(r'\s+', ' ', clean_text.strip())
+
+ return clean_text
+
+def main():
+ os.chdir(SPARK_HOME)
+ original_head = run_cmd("git rev-parse HEAD")[:8]
+
+ branches = get_json("%s/branches" % GITHUB_API_BASE)
+ branch_names = filter(lambda x: x.startswith("branch-"), [x['name']
for x in branches])
+ # Assumes branch names can be sorted lexicographically
+ latest_branch = sorted(branch_names, reverse=True)[0]
+
+ pr_num = raw_input("Which pull request would you like to merge? (e.g.
34): ")
+ pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
+ pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
+
+ url = pr["url"]
+
+ # Decide whether to use the modified title or not
+ print "I've re-written the title as follows to match the standard
format:"
--- End diff --
Can we only do the prompt if we've made a modification? In many cases the
title is properly formatted, so we don't need to do this. I'd compute
`standardize_jira_ref(pr["title"])` earlier on, then check whether it matches
`pr["title"]`. Only ask the user if it's different.
---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]