This is an automated email from the ASF dual-hosted git repository. dongjoon pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/spark.git
The following commit(s) were added to refs/heads/master by this push: new d7845da6ddf [SPARK-45125][INFRA] Remove dev/github_jira_sync.py in favor of ASF jira_options d7845da6ddf is described below commit d7845da6ddf2f838b1d91606b8730d078fea11b4 Author: Kent Yao <y...@apache.org> AuthorDate: Tue Sep 12 00:27:17 2023 -0700 [SPARK-45125][INFRA] Remove dev/github_jira_sync.py in favor of ASF jira_options ### What changes were proposed in this pull request? Since SPARK-44942 and https://issues.apache.org/jira/browse/INFRA-24962, we've enabled jira_options for GitHub and JIRA syncing, and it's been working properly. Thus, this PR removes dev/github_jira_sync.py in favor of ASF jira_options. ### Why are the changes needed? code cleanup ### Does this PR introduce _any_ user-facing change? no ### How was this patch tested? confirmed with INFRA and watch the jiar for several days ### Was this patch authored or co-authored using generative AI tooling? no Closes #42882 from yaooqinn/SPARK-45125. Authored-by: Kent Yao <y...@apache.org> Signed-off-by: Dongjoon Hyun <dh...@apple.com> --- .github/labeler.yml | 4 +- dev/github_jira_sync.py | 202 ------------------------------------------------ 2 files changed, 1 insertion(+), 205 deletions(-) diff --git a/.github/labeler.yml b/.github/labeler.yml index 4ae831f2131..b252edd8873 100644 --- a/.github/labeler.yml +++ b/.github/labeler.yml @@ -42,12 +42,11 @@ INFRA: - ".asf.yaml" - ".gitattributes" - ".gitignore" - - "dev/github_jira_sync.py" - "dev/merge_spark_pr.py" - "dev/run-tests-jenkins*" BUILD: # Can be supported when a stable release with correct all/any is released - #- any: ['dev/**/*', '!dev/github_jira_sync.py', '!dev/merge_spark_pr.py', '!dev/.rat-excludes'] + #- any: ['dev/**/*', '!dev/merge_spark_pr.py', '!dev/.rat-excludes'] - "dev/**/*" - "build/**/*" - "project/**/*" @@ -58,7 +57,6 @@ BUILD: - "scalastyle-config.xml" # These can be added in the above `any` clause (and the /dev/**/* glob removed) when # `any`/`all` support is released - # - "!dev/github_jira_sync.py" # - "!dev/merge_spark_pr.py" # - "!dev/run-tests-jenkins*" # - "!dev/.rat-excludes" diff --git a/dev/github_jira_sync.py b/dev/github_jira_sync.py deleted file mode 100755 index 45908518d82..00000000000 --- a/dev/github_jira_sync.py +++ /dev/null @@ -1,202 +0,0 @@ -#!/usr/bin/env python3 - -# -# Licensed to the Apache Software Foundation (ASF) under one or more -# contributor license agreements. See the NOTICE file distributed with -# this work for additional information regarding copyright ownership. -# The ASF licenses this file to You under the Apache License, Version 2.0 -# (the "License"); you may not use this file except in compliance with -# the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, software -# distributed under the License is distributed on an "AS IS" BASIS, -# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. -# See the License for the specific language governing permissions and -# limitations under the License. -# -# Utility for updating JIRA's with information about GitHub pull requests - -import json -import os -import re -import sys -from urllib.request import urlopen -from urllib.request import Request -from urllib.error import HTTPError - -try: - import jira.client -except ImportError: - print("This tool requires the jira-python library") - print("Install using 'pip3 install jira'") - sys.exit(-1) - -# User facing configs -GITHUB_API_BASE = os.environ.get("GITHUB_API_BASE", "https://api.github.com/repos/apache/spark") -GITHUB_OAUTH_KEY = os.environ.get("GITHUB_OAUTH_KEY") -JIRA_PROJECT_NAME = os.environ.get("JIRA_PROJECT_NAME", "SPARK") -JIRA_API_BASE = os.environ.get("JIRA_API_BASE", "https://issues.apache.org/jira") -JIRA_USERNAME = os.environ.get("JIRA_USERNAME", "apachespark") -JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", "XXX") -# Maximum number of updates to perform in one run -MAX_UPDATES = int(os.environ.get("MAX_UPDATES", "100000")) -# Cut-off for oldest PR on which to comment. Useful for avoiding -# "notification overload" when running for the first time. -MIN_COMMENT_PR = int(os.environ.get("MIN_COMMENT_PR", "1496")) - -# File used as an optimization to store maximum previously seen PR -# Used mostly because accessing ASF JIRA is slow, so we want to avoid checking -# the state of JIRA's that are tied to PR's we've already looked at. -MAX_FILE = ".github-jira-max" - - -def get_url(url): - try: - request = Request(url) - request.add_header("Authorization", "token %s" % GITHUB_OAUTH_KEY) - return urlopen(request) - except HTTPError: - print("Unable to fetch URL, exiting: %s" % url) - sys.exit(-1) - - -def get_json(urllib_response): - return json.loads(urllib_response.read().decode("utf-8")) - - -# Return a list of (JIRA id, JSON dict) tuples: -# e.g. [('SPARK-1234', {.. json ..}), ('SPARK-5687', {.. json ..})} -def get_jira_prs(): - result = [] - has_next_page = True - page_num = 0 - while has_next_page: - page = get_url(GITHUB_API_BASE + "/pulls?page=%s&per_page=100" % page_num) - page_json = get_json(page) - - for pull in page_json: - jira_issues = re.findall(JIRA_PROJECT_NAME + "-[0-9]{4,5}", pull["title"]) - for jira_issue in jira_issues: - result = result + [(jira_issue, pull)] - - # Check if there is another page - link_headers = list(filter(lambda k: k.startswith("Link"), page.headers)) - if not link_headers or "next" not in link_headers[0]: - has_next_page = False - else: - page_num += 1 - return result - - -def set_max_pr(max_val): - f = open(MAX_FILE, "w") - f.write("%s" % max_val) - f.close() - print("Writing largest PR number seen: %s" % max_val) - - -def get_max_pr(): - if os.path.exists(MAX_FILE): - result = int(open(MAX_FILE, "r").read()) - print("Read largest PR number previously seen: %s" % result) - return result - else: - return 0 - - -def build_pr_component_dic(jira_prs): - print("Build PR dictionary") - dic = {} - for issue, pr in jira_prs: - print(issue) - page = get_json(get_url(JIRA_API_BASE + "/rest/api/2/issue/" + issue)) - jira_components = [c["name"].upper() for c in page["fields"]["components"]] - if pr["number"] in dic: - dic[pr["number"]][1].update(jira_components) - else: - pr_components = set(label["name"].upper() for label in pr["labels"]) - dic[pr["number"]] = (pr_components, set(jira_components)) - return dic - - -def reset_pr_labels(pr_num, jira_components): - url = "%s/issues/%s/labels" % (GITHUB_API_BASE, pr_num) - labels = ", ".join(('"%s"' % c) for c in jira_components) - try: - request = Request(url, data=('{"labels":[%s]}' % labels).encode("utf-8")) - request.add_header("Content-Type", "application/json") - request.add_header("Authorization", "token %s" % GITHUB_OAUTH_KEY) - request.get_method = lambda: "PUT" - urlopen(request) - print("Set %s with labels %s" % (pr_num, labels)) - except HTTPError: - print("Unable to update PR labels, exiting: %s" % url) - sys.exit(-1) - - -jira_client = jira.client.JIRA({"server": JIRA_API_BASE}, basic_auth=(JIRA_USERNAME, JIRA_PASSWORD)) - -jira_prs = get_jira_prs() - -previous_max = get_max_pr() -print("Retrieved %s JIRA PR's from GitHub" % len(jira_prs)) -jira_prs = [(k, v) for k, v in jira_prs if int(v["number"]) > previous_max] -print("%s PR's remain after excluding visited ones" % len(jira_prs)) - -num_updates = 0 -considered = [] -for issue, pr in sorted(jira_prs, key=lambda kv: int(kv[1]["number"])): - if num_updates >= MAX_UPDATES: - break - pr_num = int(pr["number"]) - - print("Checking issue %s" % issue) - considered = considered + [pr_num] - - url = pr["html_url"] - title = "[GitHub] Pull Request #%s (%s)" % (pr["number"], pr["user"]["login"]) - try: - page = get_json(get_url(JIRA_API_BASE + "/rest/api/2/issue/" + issue + "/remotelink")) - existing_links = map(lambda l: l["object"]["url"], page) - except BaseException: - print("Failure reading JIRA %s (does it exist?)" % issue) - print(sys.exc_info()[0]) - continue - - if url in existing_links: - continue - - icon = { - "title": "Pull request #%s" % pr["number"], - "url16x16": "https://assets-cdn.github.com/favicon.ico", - } - destination = {"title": title, "url": url, "icon": icon} - # For all possible fields see: - # https://developer.atlassian.com/display/JIRADEV/Fields+in+Remote+Issue+Links - # application = {"name": "GitHub pull requests", "type": "org.apache.spark.jira.github"} - jira_client.add_remote_link(issue, destination) - - comment = "User '%s' has created a pull request for this issue:" % pr["user"]["login"] - comment += "\n%s" % pr["html_url"] - if pr_num >= MIN_COMMENT_PR: - jira_client.add_comment(issue, comment) - - print("Added link %s <-> PR #%s" % (issue, pr["number"])) - num_updates += 1 - -if len(considered) > 0: - set_max_pr(max(considered)) - - -# Additionally, expose the JIRA labels to the PR -num_updates = 0 -for pr_num, (pr_components, jira_components) in build_pr_component_dic(jira_prs).items(): - print(pr_num) - if pr_components == jira_components: - continue - if num_updates >= MAX_UPDATES: - break - reset_pr_labels(pr_num, jira_components) - num_updates += 1 --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@spark.apache.org For additional commands, e-mail: commits-h...@spark.apache.org