Github user pwendell commented on a diff in the pull request:

    https://github.com/apache/spark/pull/5149#discussion_r28663536
  
    --- Diff: dev/merge_spark_pr.py ---
    @@ -286,68 +281,145 @@ def resolve_jira_issues(title, merge_branches, 
comment):
             resolve_jira_issue(merge_branches, comment, jira_id)
     
     
    -branches = get_json("%s/branches" % GITHUB_API_BASE)
    -branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] for x 
in branches])
    -# Assumes branch names can be sorted lexicographically
    -latest_branch = sorted(branch_names, reverse=True)[0]
    -
    -pr_num = raw_input("Which pull request would you like to merge? (e.g. 34): 
")
    -pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
    -pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
    -
    -url = pr["url"]
    -title = pr["title"]
    -body = pr["body"]
    -target_ref = pr["base"]["ref"]
    -user_login = pr["user"]["login"]
    -base_ref = pr["head"]["ref"]
    -pr_repo_desc = "%s/%s" % (user_login, base_ref)
    -
    -# Merged pull requests don't appear as merged in the GitHub API;
    -# Instead, they're closed by asfgit.
    -merge_commits = \
    -    [e for e in pr_events if e["actor"]["login"] == "asfgit" and 
e["event"] == "closed"]
    -
    -if merge_commits:
    -    merge_hash = merge_commits[0]["commit_id"]
    -    message = get_json("%s/commits/%s" % (GITHUB_API_BASE, 
merge_hash))["commit"]["message"]
    -
    -    print "Pull request %s has already been merged, assuming you want to 
backport" % pr_num
    -    commit_is_downloaded = run_cmd(['git', 'rev-parse', '--quiet', 
'--verify',
    +def standardize_jira_ref(text):
    +    """
    +    Standardize the [SPARK-XXXXX] [MODULE] prefix
    +    Converts "[SPARK-XXX][mllib] Issue", "[MLLib] SPARK-XXX. Issue" or 
"SPARK XXX [MLLIB]: Issue" to "[SPARK-XXX] [MLLIB] Issue"
    +    
    +    >>> standardize_jira_ref("[SPARK-5821] [SQL] ParquetRelation2 CTAS 
should check if delete is successful")
    +    '[SPARK-5821] [SQL] ParquetRelation2 CTAS should check if delete is 
successful'
    +    >>> standardize_jira_ref("[SPARK-4123][Project Infra][WIP]: Show new 
dependencies added in pull requests")
    +    '[SPARK-4123] [PROJECT INFRA] [WIP] Show new dependencies added in 
pull requests'
    +    >>> standardize_jira_ref("[MLlib] Spark  5954: Top by key")
    +    '[SPARK-5954] [MLLIB] Top by key'
    +    >>> standardize_jira_ref("[SPARK-979] a LRU scheduler for load 
balancing in TaskSchedulerImpl")
    +    '[SPARK-979] a LRU scheduler for load balancing in TaskSchedulerImpl'
    +    >>> standardize_jira_ref("SPARK-1094 Support MiMa for reporting binary 
compatibility accross versions.")
    +    '[SPARK-1094] Support MiMa for reporting binary compatibility accross 
versions.'
    +    >>> standardize_jira_ref("[WIP]  [SPARK-1146] Vagrant support for 
Spark")
    +    '[SPARK-1146] [WIP] Vagrant support for Spark'
    +    >>> standardize_jira_ref("SPARK-1032. If Yarn app fails before 
registering, app master stays aroun...")
    +    '[SPARK-1032] If Yarn app fails before registering, app master stays 
aroun...'
    +    """
    +    # If the string is compliant, no need to process any further
    +    if (re.search(r'^\[SPARK-[0-9]{3,6}\] (\[[A-Z0-9_\s,]+\] )+\S+', 
text)):
    +        return text
    +    
    +    # Extract JIRA ref(s):
    +    jira_refs = deque()
    +    pattern = re.compile(r'(SPARK[-\s]*[0-9]{3,6})', re.IGNORECASE)
    +    while (pattern.search(text) is not None):
    +        ref = pattern.search(text).groups()[0]
    +        # Replace any whitespace with a dash & convert to uppercase
    +        jira_refs.append('[' + re.sub(r'\s+', '-', ref.upper()) + ']')
    +        text = text.replace(ref, '')
    +
    +    # Extract spark component(s):
    +    components = deque()
    +    # Look for alphanumeric chars, spaces, dashes, periods, and/or commas
    +    pattern = re.compile(r'(\[[\w\s,-\.]+\])', re.IGNORECASE)
    +    while (pattern.search(text) is not None):
    +        component = pattern.search(text).groups()[0]
    +        # Convert to uppercase
    +        components.append(component.upper())
    +        text = text.replace(component, '')
    +
    +    # Cleanup remaining symbols:
    +    pattern = re.compile(r'^\W+(.*)', re.IGNORECASE)
    +    if (pattern.search(text) is not None):
    +        text = pattern.search(text).groups()[0]
    +
    +    # Assemble full text (JIRA ref(s), module(s), remaining text)
    +    if (len(jira_refs) < 1):
    +        jira_ref_text = ""
    +    jira_ref_text = ' '.join(jira_refs).strip()
    +    if (len(components) < 1):
    +        components = ""
    +    component_text = ' '.join(components).strip()
    +    
    +    if (len(jira_ref_text) < 1 and len(component_text) < 1):
    +        clean_text = text.strip()
    +    elif (len(jira_ref_text) < 1):
    +        clean_text = component_text + ' ' + text.strip()
    +    elif (len(component_text) < 1):
    +        clean_text = jira_ref_text + ' ' + text.strip()
    +    else:
    +        clean_text = jira_ref_text + ' ' + component_text + ' ' + 
text.strip()
    +    
    +    return clean_text
    +
    +def main():
    +    os.chdir(SPARK_HOME)
    +    original_head = run_cmd("git rev-parse HEAD")[:8]
    +    
    +    branches = get_json("%s/branches" % GITHUB_API_BASE)
    +    branch_names = filter(lambda x: x.startswith("branch-"), [x['name'] 
for x in branches])
    +    # Assumes branch names can be sorted lexicographically
    +    latest_branch = sorted(branch_names, reverse=True)[0]
    +
    +    pr_num = raw_input("Which pull request would you like to merge? (e.g. 
34): ")
    +    pr = get_json("%s/pulls/%s" % (GITHUB_API_BASE, pr_num))
    +    pr_events = get_json("%s/issues/%s/events" % (GITHUB_API_BASE, pr_num))
    +
    +    url = pr["url"]
    +    title = standardize_jira_ref(pr["title"])
    --- End diff --
    
    If the title has been modified, I'd prompt the user here for a yes/no as to 
whether to use the modified title. (i.e. just say "I've re-written the title as 
follows to match the format: <old> <new>. Would you like to use the new 
title?"... with <old> and <new> each on a new line). That way if our heuristic 
is broken in some way the committer can just ignore the re-writing.


---
If your project is set up for it, you can reply to this email and have your
reply appear on GitHub as well. If your project does not have this feature
enabled and wishes so, or if the feature is enabled but not working, please
contact infrastructure at [email protected] or file a JIRA ticket
with INFRA.
---

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to