Github user pwendell commented on a diff in the pull request:

    https://github.com/apache/spark/pull/5149#discussion_r28713910
  
    --- Diff: dev/merge_spark_pr.py ---
    @@ -286,68 +280,149 @@ def resolve_jira_issues(title, merge_branches, 
comment):
             resolve_jira_issue(merge_branches, comment, jira_id)
     
     
    -branches = get_json("%s/branches" % GITHUB_API_BASE)
    -branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x 
in branches])
    -# Assumes branch names can be sorted lexicographically
    -latest_branch = sorted(branch_names, reverse=True)[0]
    -
    -pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): 
")
    -pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
    -pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
    -
    -url = pr["url"]
    -title = pr["title"]
    -body = pr["body"]
    -target_ref = pr["base"]["ref"]
    -user_login = pr["user"]["login"]
    -base_ref = pr["head"]["ref"]
    -pr_repo_desc = "%s/%s" % (user_login, base_ref)
    -
    -# Merged pull requests don't appear as merged in the GitHub API;
    -# Instead, they're closed by asfgit.
    -merge_commits = \
    -    [e for e in pr_events if e["actor"]["login"] == "asfgit" and 
e["event"] == "closed"]
    -
    -if merge_commits:
    -    merge_hash = merge_commits[0]["commit_id"]
    -    message = get_json("%s/commits/%s" % (GITHUB_API_BASE, 
merge_hash))["commit"]["message"]
    -
    -    print "Pull request %s has already been merged, assuming you want to 
backport" % pr_num
    -    commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', 
'--verify',
    +def standardize_jira_ref(text):
    +    """
    +    Standardize the [SPARK-XXXXX] [MODULE] prefix
    +    Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or 
"SPARK XXX [MLLIB]: Issue" to "[SPARK-XXX] [MLLIB] Issue"
    +    
    +    >>> standardize_jira_ref("[SPARK-5821] [SQL] ParquetRelation2 CTAS 
should check if delete is successful")
    +    '[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is 
successful'
    +    >>> standardize_jira_ref("[SPARK-4123][Project Infra][WIP]: Show new 
dependencies added in pull requests")
    +    '[SPARK-4123] [PROJECT INFRA] [WIP] Show new dependencies added in 
pull requests'
    +    >>> standardize_jira_ref("[MLlib] Spark  5954: Top by key")
    +    '[SPARK-5954] [MLLIB] Top by key'
    +    >>> standardize_jira_ref("[SPARK-979] a LRU scheduler for load 
balancing in TaskSchedulerImpl")
    +    '[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl'
    +    >>> standardize_jira_ref("SPARK-1094 Support MiMa for reporting binary 
compatibility accross versions.")
    +    '[SPARK-1094] Support MiMa for reporting binary compatibility accross 
versions.'
    +    >>> standardize_jira_ref("[WIP]  [SPARK-1146] Vagrant support for 
Spark")
    +    '[SPARK-1146] [WIP] Vagrant support for Spark'
    +    >>> standardize_jira_ref("SPARK-1032. If Yarn app fails before 
registering, app master stays aroun...")
    +    '[SPARK-1032] If Yarn app fails before registering, app master stays 
aroun...'
    +    >>> standardize_jira_ref("[SPARK-6250][SPARK-6146][SPARK-5911][SQL] 
Types are now reserved words in DDL parser.")
    +    '[SPARK-6250] [SPARK-6146] [SPARK-5911] [SQL] Types are now reserved 
words in DDL parser.'
    +    >>> standardize_jira_ref("Additional information for users building 
from source code")
    +    'Additional information for users building from source code'
    +    """
    +    jira_refs = []
    +    components = []
    +    
    +    # If the string is compliant, no need to process any further
    +    if (re.search(r'^\[SPARK-[0-9]{3,6}\] (\[[A-Z0-9_\s,]+\] )+\S+', 
text)):
    +        return text
    +    
    +    # Extract JIRA ref(s):
    +    pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,6})+', re.IGNORECASE)
    +    for ref in pattern.findall(text):
    +        # Add brackets, replace spaces with a dash, & convert to uppercase
    +        jira_refs.append('[' + re.sub(r'\s+', '-', ref.upper()) + ']')
    +        text = text.replace(ref, '')
    +
    +    # Extract spark component(s):
    +    # Look for alphanumeric chars, spaces, dashes, periods, and/or commas
    +    pattern = re.compile(r'(\[[\w\s,-\.]+\])', re.IGNORECASE)
    +    for component in pattern.findall(text):
    +        components.append(component.upper())
    +        text = text.replace(component, '')
    +
    +    # Cleanup any remaining symbols:
    +    pattern = re.compile(r'^\W+(.*)', re.IGNORECASE)
    +    if (pattern.search(text) is not None):
    +        text = pattern.search(text).groups()[0]
    +
    +    # Assemble full text (JIRA ref(s), module(s), remaining text)
    +    clean_text = ' '.join(jira_refs).strip() + " " + ' 
'.join(components).strip() + " " + text.strip()
    +    
    +    # Replace multiple spaces with a single space, e.g. if no jira refs 
and/or components were included
    +    clean_text = re.sub(r'\s+', ' ', clean_text.strip())
    +    
    +    return clean_text
    +
    +def main():
    +    os.chdir(SPARK_HOME)
    +    original_head = run_cmd("git rev-parse HEAD")[:8]
    +    
    +    branches = get_json("%s/branches" % GITHUB_API_BASE)
    +    branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] 
for x in branches])
    +    # Assumes branch names can be sorted lexicographically
    +    latest_branch = sorted(branch_names, reverse=True)[0]
    +
    +    pr_num = raw_input("Which pull request would you like to merge? (e.g. 
34): ")
    +    pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
    +    pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
    +
    +    url = pr["url"]
    +
    +    # Decide whether to use the modified title or not
    +    print "I've re-written the title as follows to match the standard 
format:"
    --- End diff --
    
    Can we only do the prompt if we've made a modification? In many cases the 
title is properly formatted, so we don't need to do this. I'd compute 
`standardize_jira_ref(pr["title"])` earlier on, then check whether it matches 
`pr["title"]`. Only ask the user if it's different.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to