This is an automated email from the ASF dual-hosted git repository.

wenchen pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/spark.git


The following commit(s) were added to refs/heads/master by this push:
     new de3430932757 [SPARK-54960][INFRA] Simplify generating contributors of 
the release process
de3430932757 is described below

commit de3430932757da54dbcf953a8cb0e6246ca2f065
Author: Wenchen Fan <[email protected]>
AuthorDate: Mon Jan 12 12:40:29 2026 +0800

    [SPARK-54960][INFRA] Simplify generating contributors of the release process
    
    ### What changes were proposed in this pull request?
    
    One step of the release process is to generate the contributor names and 
put them in the release notes. The script does a lot of work to find the full 
name of contributors, but I think it's more useful to show the github user id 
for credits, instead of the full name.
    
    This PR simplifies this step: we now list contributor names in the form of 
`github user id (Full Name)` or without full name if the github user does not 
specify full name in the profile.
    
    ### Why are the changes needed?
    
    Simplify release process.
    
    ### Does this PR introduce _any_ user-facing change?
    
    no
    
    ### How was this patch tested?
    
    manually, to generate 4.1.0 contributors
    
    ### Was this patch authored or co-authored using generative AI tooling?
    
    cursor 2.3.29
    
    Closes #53728 from cloud-fan/script.
    
    Lead-authored-by: Wenchen Fan <[email protected]>
    Co-authored-by: Wenchen Fan <[email protected]>
    Signed-off-by: Wenchen Fan <[email protected]>
---
 dev/create-release/generate-contributors.py  | 181 ++-------
 dev/create-release/known_translations        | 547 ---------------------------
 dev/create-release/releaseutils.py           | 177 +--------
 dev/create-release/translate-contributors.py | 308 ---------------
 4 files changed, 44 insertions(+), 1169 deletions(-)

diff --git a/dev/create-release/generate-contributors.py 
b/dev/create-release/generate-contributors.py
index bb635159e465..e32d81838e83 100755
--- a/dev/create-release/generate-contributors.py
+++ b/dev/create-release/generate-contributors.py
@@ -16,32 +16,25 @@
 # See the License for the specific language governing permissions and
 # limitations under the License.
 #
-# This script automates the process of creating release notes.
+# This script generates a list of contributors between two release tags.
 
 import os
-import re
 import sys
 
+from github import Github
+
 from releaseutils import (
     tag_exists,
     get_commits,
     yesOrNoPrompt,
-    get_date,
-    is_valid_author,
-    capitalize_author,
-    JIRA,
-    find_components,
-    translate_issue_type,
-    translate_component,
-    CORE_COMPONENT,
     contributors_file_name,
-    nice_join,
+    get_github_name,
 )
 
 # You must set the following before use!
-JIRA_API_BASE = os.environ.get("JIRA_API_BASE", 
"https://issues.apache.org/jira";)
 RELEASE_TAG = os.environ.get("RELEASE_TAG", "v1.2.0-rc2")
 PREVIOUS_RELEASE_TAG = os.environ.get("PREVIOUS_RELEASE_TAG", "v1.1.0")
+GITHUB_OAUTH_KEY = os.environ.get("GITHUB_OAUTH_KEY")
 
 # If the release tags are not provided, prompt the user to provide them
 while not tag_exists(RELEASE_TAG):
@@ -78,7 +71,6 @@ if not new_commits:
 
 # Prompt the user for confirmation that the commit range is correct
 
print("\n==================================================================================")
-print("JIRA server: %s" % JIRA_API_BASE)
 print("Release tag: %s" % RELEASE_TAG)
 print("Previous release tag: %s" % PREVIOUS_RELEASE_TAG)
 print("Number of commits in this range: %s" % len(new_commits))
@@ -100,7 +92,6 @@ if not yesOrNoPrompt("Does this look correct?"):
 releases = []
 maintenance = []
 reverts = []
-nojiras = []
 filtered_commits = []
 
 
@@ -117,18 +108,10 @@ def is_maintenance(commit_title):
     return "maintenance" in commit_title.lower() or "manually close" in 
commit_title.lower()
 
 
-def has_no_jira(commit_title):
-    return not re.findall("SPARK-[0-9]+", commit_title.upper())
-
-
 def is_revert(commit_title):
     return "revert" in commit_title.lower()
 
 
-def is_docs(commit_title):
-    return re.findall("docs*", commit_title.lower()) or "programming guide" in 
commit_title.lower()
-
-
 for c in new_commits:
     t = c.get_title()
     if not t:
@@ -139,15 +122,11 @@ for c in new_commits:
         maintenance.append(c)
     elif is_revert(t):
         reverts.append(c)
-    elif is_docs(t):
-        filtered_commits.append(c)  # docs may not have JIRA numbers
-    elif has_no_jira(t):
-        nojiras.append(c)
     else:
         filtered_commits.append(c)
 
 # Warn against ignored commits
-if releases or maintenance or reverts or nojiras:
+if releases or maintenance or reverts:
     
print("\n==================================================================================")
     if releases:
         print("Found %d release commits" % len(releases))
@@ -155,8 +134,6 @@ if releases or maintenance or reverts or nojiras:
         print("Found %d maintenance commits" % len(maintenance))
     if reverts:
         print("Found %d revert commits" % len(reverts))
-    if nojiras:
-        print("Found %d commits with no JIRA" % len(nojiras))
     print("* Warning: these commits will be ignored.\n")
     if yesOrNoPrompt("Show ignored commits?"):
         if releases:
@@ -168,137 +145,45 @@ if releases or maintenance or reverts or nojiras:
         if reverts:
             print("Revert (%d)" % len(reverts))
             print_indented(reverts)
-        if nojiras:
-            print("No JIRA (%d)" % len(nojiras))
-            print_indented(nojiras)
     print("==================== Warning: the above commits will be ignored 
==================\n")
 prompt_msg = "%d commits left to process after filtering. Ok to proceed?" % 
len(filtered_commits)
 if not yesOrNoPrompt(prompt_msg):
     sys.exit("Ok, exiting.")
 
-# Keep track of warnings to tell the user at the end
-warnings = []
-
-# Mapping from the invalid author name to its associated JIRA issues
-# E.g. andrewor14 -> set("SPARK-2413", "SPARK-3551", "SPARK-3471")
-invalid_authors = {}
+# Initialize GitHub client
+github_client = Github(GITHUB_OAUTH_KEY) if GITHUB_OAUTH_KEY else Github()
 
-# Populate a map that groups issues and components by author
-# It takes the form: Author name -> { Contribution type -> Spark components }
-# For instance,
-# {
-#   'Andrew Or': {
-#     'bug fixes': ['windows', 'core', 'web ui'],
-#     'improvements': ['core']
-#   },
-#   'Tathagata Das' : {
-#     'bug fixes': ['streaming']
-#     'new feature': ['streaming']
-#   }
-# }
-#
-author_info = {}
-jira_options = {"server": JIRA_API_BASE}
-jira_client = JIRA(options=jira_options)
+# Extract unique GitHub usernames from commits
 print("\n=========================== Compiling contributor list 
===========================")
+github_usernames = set()
 for commit in filtered_commits:
-    _hash = commit.get_hash()
-    title = commit.get_title()
-    issues = re.findall("SPARK-[0-9]+", title.upper())
-    author = commit.get_author()
-    date = get_date(_hash)
-    # If the author name is invalid, keep track of it along
-    # with all associated issues so we can translate it later
-    if is_valid_author(author):
-        author = capitalize_author(author)
+    # Get GitHub username from commit body (parsed by get_commits)
+    github_username = commit.get_github_username()
+    if github_username:
+        github_usernames.add(github_username)
+        print("  Processed commit %s by @%s" % (commit.get_hash(), 
github_username))
     else:
-        if author not in invalid_authors:
-            invalid_authors[author] = set()
-        for issue in issues:
-            invalid_authors[author].add(issue)
-    # Parse components from the commit title, if any
-    commit_components = find_components(title, _hash)
-    # Populate or merge an issue into author_info[author]
+        print("  Skipping commit %s (no GitHub username found)" % 
commit.get_hash())
 
-    def populate(issue_type, components):
-        components = components or [CORE_COMPONENT]  # assume core if no 
components provided
-        if author not in author_info:
-            author_info[author] = {}
-        if issue_type not in author_info[author]:
-            author_info[author][issue_type] = set()
-        for component in components:
-            author_info[author][issue_type].add(component)
-
-    # Find issues and components associated with this commit
-    for issue in issues:
-        try:
-            jira_issue = jira_client.issue(issue)
-            jira_type = jira_issue.fields.issuetype.name
-            jira_type = translate_issue_type(jira_type, issue, warnings)
-            jira_components = [
-                translate_component(c.name, _hash, warnings) for c in 
jira_issue.fields.components
-            ]
-            all_components = set(jira_components + commit_components)
-            populate(jira_type, all_components)
-        except Exception as e:
-            print("Unexpected error:", e)
-    # For docs without an associated JIRA, manually add it ourselves
-    if is_docs(title) and not issues:
-        populate("documentation", commit_components)
-    print("  Processed commit %s authored by %s on %s" % (_hash, author, date))
+print("==================================================================================\n")
+print("Found %d unique contributors" % len(github_usernames))
+
+# For each GitHub username, get the full name from GitHub profile
+contributors = []
+print("\n=========================== Fetching GitHub profiles 
===========================")
+for username in sorted(github_usernames):
+    full_name = get_github_name(username, github_client)
+    if full_name:
+        contributor = "%s (%s)" % (username, full_name)
+    else:
+        contributor = username
+    contributors.append(contributor)
+    print("  %s" % contributor)
 
print("==================================================================================\n")
 
-# Write to contributors file ordered by author names
-# Each line takes the format " * Author name -- semi-colon delimited 
contributions"
-# e.g. * Andrew Or -- Bug fixes in Windows, Core, and Web UI; improvements in 
Core
-# e.g. * Tathagata Das -- Bug fixes and new features in Streaming
+# Write to contributors file
 contributors_file = open(contributors_file_name, "w")
-authors = list(author_info.keys())
-authors.sort()
-for author in authors:
-    contribution = ""
-    components = set()
-    issue_types = set()
-    for issue_type, comps in author_info[author].items():
-        components.update(comps)
-        issue_types.add(issue_type)
-    # If there is only one component, mention it only once
-    # e.g. Bug fixes, improvements in MLlib
-    if len(components) == 1:
-        contribution = "%s in %s" % (nice_join(issue_types), 
next(iter(components)))
-    # Otherwise, group contributions by issue types instead of modules
-    # e.g. Bug fixes in MLlib, Core, and Streaming; documentation in YARN
-    else:
-        contributions = [
-            "%s in %s" % (issue_type, nice_join(comps))
-            for issue_type, comps in author_info[author].items()
-        ]
-        contribution = "; ".join(contributions)
-    # Do not use python's capitalize() on the whole string to preserve case
-    assert contribution
-    contribution = contribution[0].capitalize() + contribution[1:]
-    # If the author name is invalid, use an intermediate format that
-    # can be translated through translate-contributors.py later
-    # E.g. andrewor14/SPARK-3425/SPARK-1157/SPARK-6672
-    if author in invalid_authors and invalid_authors[author]:
-        author = author + "/" + "/".join(invalid_authors[author])
-    # line = " * %s -- %s" % (author, contribution)
-    line = author
-    contributors_file.write(line + "\n")
+for contributor in sorted(contributors, key=str.lower):
+    contributors_file.write(contributor + "\n")
 contributors_file.close()
 print("Contributors list is successfully written to %s!" % 
contributors_file_name)
-
-# Prompt the user to translate author names if necessary
-if invalid_authors:
-    warnings.append("Found the following invalid authors:")
-    for a in invalid_authors:
-        warnings.append("\t%s" % a)
-    warnings.append("Please run './translate-contributors.py' to translate 
them.")
-
-# Log any warnings encountered in the process
-if warnings:
-    print("\n============ Warnings encountered while creating the contributor 
list ============")
-    for w in warnings:
-        print(w)
-    print("Please correct these in the final contributors list at %s." % 
contributors_file_name)
-    
print("==================================================================================\n")
diff --git a/dev/create-release/known_translations 
b/dev/create-release/known_translations
deleted file mode 100644
index 0f07cb081416..000000000000
--- a/dev/create-release/known_translations
+++ /dev/null
@@ -1,547 +0,0 @@
-# This is a mapping of names to be translated through translate-contributors.py
-# The format expected on each line should be: <GitHub ID> - <full name>
-012huang - Weiyi Huang
-07ARB - Ankit Raj Boudh
-10110346 - Xian Liu
-979969786 - Yuming Wang
-Achuth17 - Achuth Narayan Rajagopal
-Adamyuanyuan - Adam Wang
-AiHe - Ai He
-Andrew-Crosby - Andrew Crosby
-AngersZhuuuu - Yi Zhu
-BenFradet - Ben Fradet
-Bilna - Bilna P
-ClassNotFoundExp - Fu Xing
-CodingCat - Nan Zhu
-CrazyJvm - Chao Chen
-Deegue - Yizhong Zhang
-DoingDone9 - Doing Done
-DylanGuedes - Dylan Guedes
-Earne - Ernest
-EugenCepoi - Eugen Cepoi
-FavioVazquez - Favio Vazquez
-FlytxtRnD - Meethu Mathew
-GenTang - Gen TANG
-GraceH - Jie Huang
-Gschiavon - German Schiavon Matteo
-GuoPhilipse - Philipse Guo
-HeartSaVioR - Jungtaek Lim
-Hellsen83 - Erik Christiansen
-HyukjinKwon - Hyukjin Kwon
-Icysandwich - Icysandwich
-JDrit - Joseph Batchik
-JasonWayne - Wenjie Wu
-JaysonSunshine - Jayson Sunshine
-JerryLead - Lijie Xu
-JiahuiJiang - Jiahui Jiang
-JkSelf - Ke Jia
-JoanFM - Joan Fontanals
-JoshRosen - Josh Rosen
-JuhongPark - Juhong Park
-JulienPeloton - Julien Peloton
-KaiXinXiaoLei - KaiXinXIaoLei
-KevinGrealish - Kevin Grealish
-KevinZwx - Kevin Zhang
-Koraseg - Artem Kupchinskiy
-KyleLi1985 - Liang Li
-LantaoJin - Lantao Jin
-Lemonjing - Rann Tao
-Leolh - Liu Hao
-Lewuathe - Kai Sasaki
-LiShuMing - Shuming Li
-LinhongLiu - Linhong Liu
-Liuchang0812 - Liu Chang
-LucaCanali - Luca Canali
-LuciferYang - Yang Jie
-MasterDDT - Mitesh Patel
-MaxGekk - Maxim Gekk
-MechCoder - Manoj Kumar
-NamelessAnalyst - NamelessAnalyst
-Ngone51 - Yi Wu
-OopsOutOfMemory - Sheng Li
-PavithraRamachandran - Pavithra Ramachandran
-Peishen-Jia - Peishen Jia
-RongGu - Rong Gu
-Rosstin - Rosstin Murphy
-SaintBacchus - Huang Zhaowei
-Sephiroth-Lin - Sephiroth Lin
-Shiti - Shiti Saxena
-SongYadong - Yadong Song
-TJX2014 - Jinxin Tang
-TigerYang414 - David Yang
-TomokoKomiyama - Tomoko Komiyama
-TopGunViper - TopGunViper
-Udbhav30 - Udbhav Agrawal
-Victsm - Min Shen
-VinceShieh - Vincent Xie
-WangGuangxin - Guangxin Wang
-WangTaoTheTonic - Wang Tao
-WeichenXu123 - Weichen Xu
-William1104 - William Wong
-XD-DENG - Xiaodong Deng
-XuTingjun - Tingjun Xu
-YanTangZhai - Yantang Zhai
-Yikf - Kaifei Yi
-YongjinZhou - Yongjin Zhou
-Yunni - Yun Ni
-aai95 - Aleksei Izmalkin
-aaruna - Aaruna Godthi
-actuaryzhang - Wayne Zhang
-adrian555 - Weiqiang Zhuang
-ajithme - Ajith S
-akonopko - Alexander Konopko
-alexdebrie - Alex DeBrie
-alicegugu - Gu Huiqin Alice
-allisonwang-db - Allison Wang
-alokito - Alok Saldanha
-alyaxey - Alex Slusarenko
-amanomer - Aman Omer
-ameyc - Amey Chaugule
-anabranch - Bill Chambers
-anantasty - Anant Asthana
-ancasarb - Anca Sarb
-anchovYu - Xinyi Yu
-andrewor14 - Andrew Or
-aniketbhatnagar - Aniket Bhatnagar
-animeshbaranawal - Animesh Baranawal
-ankuriitg - Ankur Gupta
-aokolnychyi - Anton Okolnychyi
-arahuja - Arun Ahuja
-arucard21 - Riaas Mokiem
-ashangit - Nicolas Fraison
-attilapiros - Attila Zsolt Piros
-avkgh - Aleksandr Kashkirov
-avulanov - Alexander Ulanov
-ayudovin - Artsiom Yudovin
-azagrebin - Andrey Zagrebin
-baishuo - Shuo Bai
-bartosz25 - Bartosz Konieczny
-beliefer - Jiaan Geng
-bettermouse - Chen Hao
-biglobster - Liang Ke
-bravo-zhang - Bravo Zhang
-brkyvz - Burak Yavuz
-bscan - Brian Scannell
-bzz - Alexander Bezzubov
-cafreeman - Chris Freeman
-caneGuy - Kang Zhou
-cchung100m - Neo Chien
-cclauss - Christian Clauss
-cenyuhai - Yuhai Cen
-chakravarthiT - Chakravarthi
-chandulal - Chandu Kavar
-chaoslawful - Xiaozhe Wang
-chesterxgchen - Chester Chen
-chiragaggarwal - Chirag Aggarwal
-chitralverma - Chitral Verma
-chouqin - Qiping Li
-cjn082030 - Juanni Chen
-cloud-fan - Wenchen Fan
-cluo512 - Chuan Luo
-cocoatomo - Tomohiko K.
-codeatri - Neha Patil
-codeborui - codeborui
-coderfi - Fairiz Azizi
-coderxiang - Shuo Xiang
-codlife - Jianfei Wang
-colinmjj - Colin Ma
-crafty-coder - Carlos Pena
-cxzl25 - Shaoyun Chen
-cyq89051127 - Yongqiang Chai
-darrentirto - Darren Tirto
-david-weiluo-ren - Weiluo (David) Ren
-daviddingly - Xiaoyuan Ding
-davidvrba - David Vrba
-davies - Davies Liu
-dding3 - Ding Ding
-debugger87 - Chaozhong Yang
-deepyaman - Deepyaman Datta
-denglingang - Lingang Deng
-dengziming - dengziming
-deshanxiao - deshanxiao
-dima-asana - Dima Kamalov
-dlindelof - David Lindelof
-dobashim - Masaru Dobashi
-dongjoon-hyun - Dongjoon Hyun
-e-dorigatti - Emilio Dorigatti
-eatoncys - Yanshan Chen
-ehnalis - Zoltan Zvara
-emres - Emre Sevinc
-epahomov - Egor Pahomov
-eric-maynard - Eric Maynard
-falaki - Hossein Falaki
-fan31415 - Yijie Fan
-fe2s - Oleksiy Dyagilev
-felixalbani - Felix Albani
-felixcheung - Felix Cheung
-feynmanliang - Feynman Liang
-fidato13 - Tarun Kumar
-fitermay - Yuli Fiterman
-fjh100456 - Jinhua Fu
-fjiang6 - Fan Jiang
-francis0407 - Mingcong Han
-freeman-lab - Jeremy Freeman
-frreiss - Fred Reiss
-fuwhu - Fuwang Hu
-gasparms - Gaspar Munoz
-gatorsmile - Xiao Li
-gchen - Guancheng Chen
-gss2002 - Greg Senia
-guowei2 - Guo Wei
-guoxiaolongzte - Xiaolong Guo
-haiyangsea - Haiyang Sea
-hayashidac - Chie Hayashida
-hddong - Dongdong Hong
-heary-cao - Xuewen Cao
-hehuiyuan - hehuiyuan
-helenyugithub - Helen Yu
-hhbyyh - Yuhao Yang
-highmoutain - highmoutain
-hlin09 - Hao Lin
-hqzizania - Qian Huang
-hseagle - Peng Xu
-httfighter - Tiantian Han
-huangtianhua - huangtianhua
-huangweizhe123 - Weizhe Huang
-hvanhovell - Herman Van Hovell
-iRakson - Rakesh Raushan
-igorcalabria - Igor Calabria
-imback82 - Terry Kim
-industrial-sloth - Jascha Swisher
-invkrh - Hao Ren
-itholic - Haejoon Lee
-ivoson - Tengfei Huang
-jackylee-ch - Junqing Li
-jackylk - Jacky Li
-jagadeesanas2 - Jagadeesan A S
-javadba - Stephen Boesch
-javierivanov - Javier Fuentes
-jayunit100 - Jay Vyas
-jbencook - Ben Cook
-jeanlyn - Jean Lyn
-jerluc - Jeremy A. Lucas
-jerryshao - Saisai Shao
-jiangxb1987 - Jiang Xingbo
-jinxing64 - Jin Xing
-jisookim0513 - Jisoo Kim
-jkbradley - Joseph Bradley
-joelgenter - Joel Genter
-josepablocam - Jose Cambronero
-jrabary - Jaonary Rabarisoa
-judynash - Judy Nash
-junyangq - Junyang Qian
-kai-zeng - Kai Zeng
-kaka1992 - Chen Song
-ketank-new - Ketan Kunde
-krishnakalyan3 - Krishna Kalyan
-ksonj - Kalle Jepsen
-kul - Kuldeep
-kuromatsu-nobuyuki - Nobuyuki Kuromatsu
-laskfla - Keith Sun
-lazyman500 - Dong Xu
-lcqzte10192193 - Chaoqun Li
-leahmcguire - Leah McGuire
-leanken - Leanken Lin
-lee19 - Lee
-leoluan2009 - Xuedong Luan
-liangxs - Xuesen Liang
-lianhuiwang - Lianhui Wang
-lidinghao - Li Hao
-ligangty - Gang Li
-linbojin - Linbo Jin
-linehrr - Ryne Yang
-linzebing - Zebing Lin
-lipzhu - Lipeng Zhu
-lirui-intel - Rui Li
-liu-zhaokun - Zhaokun Liu
-liucht-inspur - liucht-inspur
-liujiayi771 - Jiayi Liu
-liupc - Pengcheng Liu
-liutang123 - Lijia Liu
-liwensun - Liwen Sun
-lockwobr - Brian Lockwood
-luluorta - Lu Lu
-luogankun - Gankun Luo
-lw-lin - Liwei Lin
-maheshk114 - Mahesh Kumar Behera
-maji2014 - Derek Ma
-manuzhang - Manu Zhang
-mareksimunek - Marek Simunek
-maropu - Takeshi Yamamuro
-marsishandsome - Liangliang Gu
-maryannxue - Maryann Xue
-masa3141 - Masahiro Kazama
-mbittmann - Mark Bittmann
-mbonaci - Marko Bonaci
-mccheah - Matthew Cheah
-mcteo - Thomas Dunne
-mdianjun - Dianjun Ma
-meawoppl - Matthew Goodman
-medale - Markus Dale
-mengxr - Xiangrui Meng
-merrily01 - Ruilei Ma
-mingyukim - Mingyu Kim
-mn-mikke - Marek Novotny
-mob-ai - mob-ai
-mosessky - mosessky
-mpjlu - Peng Meng
-msannell - Michael Sannella
-mu5358271 - Shuheng Dai
-mwlon - Martin Loncaric
-myroslavlisniak - Myroslav Lisniak
-nandorKollar - Nandor Kollar
-nartz - Nathan Artz
-navis - Navis Ryu
-neggert - Nic Eggert
-nemccarthy - Nathan McCarthy
-nishkamravi2 - Nishkam Ravi
-noel-smith - Noel Smith
-nooberfsh - nooberfsh
-npoggi - Nicolas Poggi
-nxwhite-str - Nate Crosswhite
-nyaapa - Arsenii Krasikov
-odedz - Oded Zimerman
-oleg-smith - Oleg Kuznetsov
-ozancicek - Ozan Cicekci
-panbingkun - BingKun Pan
-pengbo - Peng Bo
-petermaxlee - Peter Lee
-petz2000 - Patrick Baier
-pgandhi999 - Parth Gandhi
-phalodi - Sandeep Purohit
-phatak-dev - Madhukara Phatak
-pkch - pkch
-planga82 - Pablo Langa Blanco
-pparkkin - Paavo Parkkinen
-prabeesh - Prabeesh K
-praneetsharma - Praneet Sharma
-priyankagargnitk - Priyanka Garg
-ptkool - Michael Styles
-qb-tarushg - Tarush Grover
-qiansl127 - Shilei Qian
-rahulpalamuttam - Rahul Palamuttam
-rakeshchalasani - Rakesh Chalasani
-ravipesala - Ravindra Pesala
-redsanket - Sanket Reddy
-redsk - Nicola Bova
-rekhajoshm - Rekha Joshi
-rimolive - Ricardo Martinelli De Oliveira
-roland1982 - Roland Pogonyi
-rongma1997 - Rong Ma
-rowan000 - Rowan Chattaway
-roxchkplusony - Victor Tso
-rrusso2007 - Rob Russo
-sadhen - Darcy Shen
-samsetegne - Samuel L. Setegne
-sandeep-katta - Sandeep Katta
-sangramga - Sangram Gaikwad
-sarthfrey - Sarth Frey
-sarutak - Kousuke Saruta
-scwf - Wang Fei
-seancxmao - Chenxiao Mao
-seayi - Xiaohua Yi
-seayoun - Haiyang Yu
-sel - Steve Larkin
-sethah - Seth Hendrickson
-sev7e0 - Jiaqi Li
-shahidki31 - Shahid K I
-sharangk - Sharanabasappa G Keriwaddi
-sharkdtu - Xiaogang Tu
-sheepstop - Ting Yang
-shenh062326 - Shen Hong
-shimamoto - Takako Shimamoto
-shimingfei - Shiming Fei
-shivsood - Shiv Prashant Sood
-shivusondur - Shivakumar Sondur
-sigmoidanalytics - Mayur Rustagi
-sisihj - June He
-sitegui - Guilherme Souza
-skonto - Stavros Kontopoulos
-slamke - Sun Ke
-small-wang - Wang Wei
-southernriver - Liang Chen
-squito - Imran Rashid
-stanzhai - Stan Zhai
-stczwd - Jackey Lee
-sujith71955 - Sujith Chacko
-surq - Surong Quan
-suxingfate - Xinglong Wang
-suyanNone - Su Yan
-szheng79 - Shuai Zheng
-tanelk - Tanel Kiis
-tedyu - Ted Yu
-teeyog - Yong Tian
-texasmichelle - Michelle Casbon
-tianyi - Yi Tian
-tien-dungle - Tien-Dung Le
-tigerquoll - Dale Richardson
-tinhto-000 - Tin Hang To
-tools4origins - tools4origins
-triplesheep - triplesheep
-trystanleftwich - Trystan Leftwich
-turboFei - Fei Wang
-ueshin - Takuya Ueshin
-ulysses-you - Xiduo You
-uncleGen - Uncle Gen
-uzadude - Ohad Raviv
-uzmijnlm - Weizhe Huang
-vinodkc - Vinod KC
-viper-kun - Xu Kun
-wForget - Zhen Wang
-wackxu - Shiwei Xu
-wangjiaochun - Jiaochun Wang
-wangshisan - wangshisan
-wangxiaojing - Xiaojing Wang
-watermen - Yadong Qi
-weixiuli - XiuLi Wei
-wenfang6 - wenfang6
-wenxuanguan - wenxuanguan
-williamhyun - William Hyun
-windpiger - Song Jun
-witgo - Guoqiang Li
-woudygao - Woudy Gao
-x1- - Yuri Saito
-xianyinxin - Xianyin Xin
-xinyunh - Xinyun Huang
-xuanyuanking - Yuanjian Li
-xubo245 - Bo Xu
-xuchenCN - Xu Chen
-xueyumusic - Xue Yu
-yanlin-Lynn - Yanlin Wang
-yaooqinn - Kent Yao
-yongtang - Yong Tang
-ypcat - Pei-Lun Lee
-yucai - Yucai Yu
-yunzoud - Yun Zou
-zapletal-martin - Martin Zapletal
-zero323 - Maciej Szymkiewicz
-zhangjiajin - Zhang JiaJin
-zhengruifeng - Ruifeng Zheng
-zhichao-li - Zhichao Li
-zhipengmao-db - Zhipeng Mao
-zjf2012 - Jiafu Zhang
-zsxwing - Shixiong Zhu
-zuotingbing - Tingbing Zuo
-zuxqoj - Shekhar Bansal
-zzcclp - Zhichao Zhang
-Aiden-Dong - Aiden Dong
-Alexvsalexvsalex - Alexey Shishkin
-BOOTMGR - Harsh Panchal
-CarterFendley - Carter Fendley
-CavemanIV - Zhang Liang
-CuiYanxiang - Yanxiang Cui
-Fokko - Fokko Driesprong
-George314159 - Yifeng Hua
-GideonPotok - Gideon Potok
-JacobZheng0927 - Jacob Zheng
-JinHelin404 - Helin Jin
-Justontheway - Justontheway
-KKtheGhost - Kivinsae Fang
-KazMiddelhoek - Kaz
-KnightChess - Knight Chess
-MaxNevermind - Maksim Konstantinov
-Mrhs121 - huangsheng
-PaysonXu - Ping Xu
-RaleSapic - Rastko Sapic
-SteNicholas - Nicholas Jiang
-SubhamSinghal - Subham Singhal
-TakawaAkirayo - Takawa Akirayo
-TongWei1105 - Tong Wei
-WweiL - Wei Liu
-YuanHanzhong - Hanzhong Yuan
-Zouxxyy - Xinyu Zou
-advancedxy - advancedxy
-alex35736 - alex35736
-amoylan2 - Mengran Lan
-andrej-db - Andrej Gobeljić
-antban - antban
-beryllw - Junbo Wang
-bogao007 - Bo Gao
-bowenliang123 - Bowen Liang
-camilesing - camilesing
-cashmand - David Cashman
-changgyoopark-db - Changgyoo Park
-chenyu-opensource - chenyu
-cosmind-db - Cosmin Dumitru
-dev-lpq - dev-lpq
-dzhigimont - Dmitry Zhyhimont
-erenavsarogullari - Eren Avsarogullari
-fanyue-xia - Fanyue Xia
-fred-db - Fredrik Klauss
-guixiaowen - Guihua Wen
-hannahkamundson - Hannah Amundson
-himadripal - Himadri Pal
-hongshaoyang - hongshaoyang
-huangxiaopingRD - Xiaoping Huang
-huangzhir - huangzhir
-huanliwang-db - Huanli Wang
-huaxingao - Huaxin Gao
-huciaa - Hubert Kaczmarczyk
-ilicmarkodb - Marko Ilić
-ivanjevtic-db - Ivan Jevtic
-j7nhai - Jinhai Chen
-jackierwzhang - Jackie Zhang
-jdesjean - Jean-Francois Desjeans Gauthier
-jiangzho - Zhou JIANG
-jiaoqingbo - Qingbo Jiao
-jingz-db - Jing Zhan
-jjayadeep06 - jjayadeep06
-jlfsdtc - Longfei Jiang
-jpcorreia99 - João Correia
-junyuc25 - junyuc25
-jwang0306 - Jared Wang
-kaashif - Kaashif Hymabaccus
-khalidmammadov - Khalid Mammadov
-laglangyue - laglangyue
-leesf - Shaofeng Li
-leletan - Jiale Tan
-liuzqt - Ziqi Liu
-llphxd - Xiaodong Huan
-logze - Zongze Li
-lyy-pineapple - Yongyuan Liang
-mayurdb - Mayur Bhosale
-micheal-o - Babatunde Micheal Okutubo
-mihailoale-db - Mihailo Aleksic
-milastdbx - Milan Stefanovic
-morvenhuang - morvenhuang
-mox692 - Motoyuki Kimura
-ostronaut - Dmytro Tsyliuryk
-philwalk - Phil Walker
-prathit06 - Prathit Malik
-ragnarok56 - Kevin Nacios
-rorueda - rorueda
-sebastianhillig-db - Sebastian Hillig
-sinaiamonkar-sai - Saidatt Sinai Amonkar
-srielau - Serge Rielau
-steveloughran - Steve Loughran
-sunan135 - Sunan Xiang
-the-sakthi - Sakthi
-thomhart31 - Thomas Hart
-tianhanhu - Tianhan Hu
-tomscut - Tao Li
-tomsisso - Tom Sisso
-utkarsh39 - Utkarsh Agarwal
-valentinp17 - Valentin
-vicennial - Venkata Sai Akhil Gudesa
-viktorluc-db - Viktor Lučić
-wankunde - Wan Kun
-wayneguow - Wei Guo
-wzx140 - Frank Wong
-xieshuaihu - Shuiahu Xie
-xin-aurora - xin-aurora
-xleoken - xleoken
-xunxunmimi5577 - xunxunmimi5577
-xuzifu666 - Yu Xu
-y-wei - Yifei Wei
-yabola - yabola
-yorksity - Qiang Yang
-zekai-li - zekai-li
-zeruibao - Zerui Bao
-zhangshuyan0 - Shuyan Zhang
-zhaomin1423 - Min Zhao
-zhengchenyu - Chenyu Zheng
-zhouyifan279 - Zhou Yifan
-zhuqi-lucas - Qi Zhu
-zml1206 - Mingliang Zhu
-zwangsheng - Binjie Yang
-zzzzming95 - Zhiming She
diff --git a/dev/create-release/releaseutils.py 
b/dev/create-release/releaseutils.py
index d7946fd7e3d9..728e58426b3d 100755
--- a/dev/create-release/releaseutils.py
+++ b/dev/create-release/releaseutils.py
@@ -22,19 +22,6 @@ import re
 import sys
 from subprocess import Popen, PIPE
 
-try:
-    from jira.client import JIRA  # noqa: F401
-
-    # Old versions have JIRAError in exceptions package, new (0.5+) in utils.
-    try:
-        from jira.exceptions import JIRAError
-    except ImportError:
-        from jira.utils import JIRAError
-except ImportError:
-    print("This tool requires the jira-python library")
-    print("Install using 'pip3 install jira'")
-    sys.exit(-1)
-
 try:
     from github import Github  # noqa: F401
     from github import GithubException
@@ -65,10 +52,6 @@ def run_cmd_error(cmd):
     return Popen(cmd, stdout=PIPE, stderr=PIPE).communicate()[1].decode("utf8")
 
 
-def get_date(commit_hash):
-    return run_cmd(["git", "show", "--quiet", "--pretty=format:%cd", 
commit_hash])
-
-
 def tag_exists(tag):
     stderr = run_cmd_error(["git", "show", tag])
     return "error" not in stderr
@@ -76,17 +59,17 @@ def tag_exists(tag):
 
 # A type-safe representation of a commit
 class Commit:
-    def __init__(self, _hash, author, title, pr_number=None):
+    def __init__(self, _hash, github_username, title, pr_number=None):
         self._hash = _hash
-        self.author = author
+        self.github_username = github_username
         self.title = title
         self.pr_number = pr_number
 
     def get_hash(self):
         return self._hash
 
-    def get_author(self):
-        return self.author
+    def get_github_username(self):
+        return self.github_username
 
     def get_title(self):
         return self.title
@@ -96,7 +79,7 @@ class Commit:
 
     def __str__(self):
         closes_pr = "(Closes #%s)" % self.pr_number if self.pr_number else ""
-        return "%s %s %s %s" % (self._hash, self.author, self.title, closes_pr)
+        return "%s @%s %s %s" % (self._hash, self.github_username, self.title, 
closes_pr)
 
 
 # Return all commits that belong to the specified tag.
@@ -112,16 +95,7 @@ def get_commits(tag):
     commit_start_marker = "|=== COMMIT START MARKER ===|"
     commit_end_marker = "|=== COMMIT END MARKER ===|"
     field_end_marker = "|=== COMMIT FIELD END MARKER ===|"
-    log_format = (
-        commit_start_marker
-        + "%h"
-        + field_end_marker
-        + "%an"
-        + field_end_marker
-        + "%s"
-        + commit_end_marker
-        + "%b"
-    )
+    log_format = commit_start_marker + "%h" + field_end_marker + "%s" + 
commit_end_marker + "%b"
     output = run_cmd(["git", "log", "--quiet", "--pretty=format:" + 
log_format, tag])
     commits = []
     raw_commits = [c for c in output.split(commit_start_marker) if c]
@@ -132,124 +106,24 @@ def get_commits(tag):
                 print(line)
             sys.exit(1)
         # Separate commit digest from the body
-        # From the digest we extract the hash, author and the title
+        # From the digest we extract the hash and the title
         # From the body, we extract the PR number and the github username
         [commit_digest, commit_body] = commit.split(commit_end_marker)
-        if commit_digest.count(field_end_marker) != 2:
+        if commit_digest.count(field_end_marker) != 1:
             sys.exit("Unexpected format in commit: %s" % commit_digest)
-        [_hash, author, title] = commit_digest.split(field_end_marker)
+        [_hash, title] = commit_digest.split(field_end_marker)
         # The PR number and github username is in the commit message
         # itself and cannot be accessed through any GitHub API
         pr_number = None
+        github_username = None
         match = re.search("Closes #([0-9]+) from ([^/\\s]+)/", commit_body)
         if match:
             [pr_number, github_username] = match.groups()
-            # If the author name is not valid, use the github
-            # username so we can translate it properly later
-            if not is_valid_author(author):
-                author = github_username
-        author = author.strip()
-        commit = Commit(_hash, author, title, pr_number)
+        commit = Commit(_hash, github_username, title, pr_number)
         commits.append(commit)
     return commits
 
 
-# Maintain a mapping for translating issue types to contributions in the 
release notes
-# This serves an additional function of warning the user against unknown issue 
types
-# Note: This list is partially derived from this link:
-# 
https://issues.apache.org/jira/plugins/servlet/project-config/SPARK/issuetypes
-# Keep these in lower case
-known_issue_types = {
-    "bug": "bug fixes",
-    "build": "build fixes",
-    "dependency upgrade": "build fixes",
-    "improvement": "improvements",
-    "new feature": "new features",
-    "documentation": "documentation",
-    "test": "test",
-    "task": "improvement",
-    "sub-task": "improvement",
-}
-
-# Maintain a mapping for translating component names when creating the release 
notes
-# This serves an additional function of warning the user against unknown 
components
-# Note: This list is largely derived from this link:
-# 
https://issues.apache.org/jira/plugins/servlet/project-config/SPARK/components
-CORE_COMPONENT = "Core"
-known_components = {
-    "block manager": CORE_COMPONENT,
-    "build": CORE_COMPONENT,
-    "deploy": CORE_COMPONENT,
-    "documentation": CORE_COMPONENT,
-    "examples": CORE_COMPONENT,
-    "graphx": "GraphX",
-    "input/output": CORE_COMPONENT,
-    "java api": "Java API",
-    "k8s": "Kubernetes",
-    "kubernetes": "Kubernetes",
-    "ml": "MLlib",
-    "mllib": "MLlib",
-    "project infra": "Project Infra",
-    "pyspark": "PySpark",
-    "shuffle": "Shuffle",
-    "spark core": CORE_COMPONENT,
-    "spark shell": CORE_COMPONENT,
-    "sql": "SQL",
-    "streaming": "Streaming",
-    "web ui": "Web UI",
-    "windows": "Windows",
-    "yarn": "YARN",
-}
-
-
-# Translate issue types using a format appropriate for writing contributions
-# If an unknown issue type is encountered, warn the user
-def translate_issue_type(issue_type, issue_id, warnings):
-    issue_type = issue_type.lower()
-    if issue_type in known_issue_types:
-        return known_issue_types[issue_type]
-    else:
-        warnings.append('Unknown issue type "%s" (see %s)' % (issue_type, 
issue_id))
-        return issue_type
-
-
-# Translate component names using a format appropriate for writing 
contributions
-# If an unknown component is encountered, warn the user
-def translate_component(component, commit_hash, warnings):
-    component = component.lower()
-    if component in known_components:
-        return known_components[component]
-    else:
-        warnings.append('Unknown component "%s" (see %s)' % (component, 
commit_hash))
-        return component
-
-
-# Parse components in the commit message
-# The returned components are already filtered and translated
-def find_components(commit, commit_hash):
-    components = re.findall(r"\[\w*\]", commit.lower())
-    components = [
-        translate_component(c, commit_hash, []) for c in components if c in 
known_components
-    ]
-    return components
-
-
-# Join a list of strings in a human-readable manner
-# e.g. ["Juice"] -> "Juice"
-# e.g. ["Juice", "baby"] -> "Juice and baby"
-# e.g. ["Juice", "baby", "moon"] -> "Juice, baby, and moon"
-def nice_join(str_list):
-    str_list = list(str_list)  # sometimes it's a set
-    if not str_list:
-        return ""
-    elif len(str_list) == 1:
-        return next(iter(str_list))
-    elif len(str_list) == 2:
-        return " and ".join(str_list)
-    else:
-        return ", ".join(str_list[:-1]) + ", and " + str_list[-1]
-
-
 # Return the full name of the specified user on GitHub
 # If the user doesn't exist, return None
 def get_github_name(author, github_client):
@@ -261,32 +135,3 @@ def get_github_name(author, github_client):
             if e.status != 404:
                 raise e
     return None
-
-
-# Return the full name of the specified user on JIRA
-# If the user doesn't exist, return None
-def get_jira_name(author, jira_client):
-    if jira_client:
-        try:
-            return jira_client.user(author).displayName
-        except JIRAError as e:
-            # If this is not a "not found" exception
-            if e.status_code != 404:
-                raise e
-    return None
-
-
-# Return whether the given name is in the form <First Name><space><Last Name>
-def is_valid_author(author):
-    if not author:
-        return False
-    return " " in author and not re.findall("[0-9]", author)
-
-
-# Capitalize the first letter of each word in the given author name
-def capitalize_author(author):
-    if not author:
-        return None
-    words = author.split(" ")
-    words = [w[0].capitalize() + w[1:] for w in words if w]
-    return " ".join(words)
diff --git a/dev/create-release/translate-contributors.py 
b/dev/create-release/translate-contributors.py
deleted file mode 100755
index 3505e0a7a33c..000000000000
--- a/dev/create-release/translate-contributors.py
+++ /dev/null
@@ -1,308 +0,0 @@
-#!/usr/bin/env python3
-#
-# Licensed to the Apache Software Foundation (ASF) under one or more
-# contributor license agreements.  See the NOTICE file distributed with
-# this work for additional information regarding copyright ownership.
-# The ASF licenses this file to You under the Apache License, Version 2.0
-# (the "License"); you may not use this file except in compliance with
-# the License.  You may obtain a copy of the License at
-#
-#    http://www.apache.org/licenses/LICENSE-2.0
-#
-# Unless required by applicable law or agreed to in writing, software
-# distributed under the License is distributed on an "AS IS" BASIS,
-# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
-# See the License for the specific language governing permissions and
-# limitations under the License.
-
-# This script translates invalid authors in the contributors list generated
-# by generate-contributors.py. When the script encounters an author name that
-# is considered invalid, it searches GitHub and JIRA in an attempt to search
-# for replacements. This tool runs in two modes:
-#
-# (1) Interactive mode: For each invalid author name, this script presents
-# all candidate replacements to the user and awaits user response. In this
-# mode, the user may also input a custom name. This is the default.
-#
-# (2) Non-interactive mode: For each invalid author name, this script replaces
-# the name with the first valid candidate it can find. If there is none, it
-# uses the original name. This can be enabled through the --non-interactive 
flag.
-
-import os
-import sys
-
-from releaseutils import (
-    JIRA,
-    JIRAError,
-    get_jira_name,
-    Github,
-    get_github_name,
-    contributors_file_name,
-    is_valid_author,
-    capitalize_author,
-    yesOrNoPrompt,
-)
-
-# You must set the following before use!
-JIRA_API_BASE = os.environ.get("JIRA_API_BASE", 
"https://issues.apache.org/jira";)
-JIRA_USERNAME = os.environ.get("JIRA_USERNAME", None)
-JIRA_PASSWORD = os.environ.get("JIRA_PASSWORD", None)
-# ASF JIRA access token
-# If it is configured, username and password are dismissed
-# Go to https://issues.apache.org/jira/secure/ViewProfile.jspa -> Personal 
Access Tokens for
-# your own token management.
-JIRA_ACCESS_TOKEN = os.environ.get("JIRA_ACCESS_TOKEN")
-
-GITHUB_OAUTH_KEY = os.environ.get("GITHUB_OAUTH_KEY", 
os.environ.get("GITHUB_API_TOKEN", None))
-
-if not GITHUB_OAUTH_KEY:
-    sys.exit("GITHUB_OAUTH_KEY must be set")
-
-# Setup JIRA client
-jira_options = {"server": JIRA_API_BASE}
-if JIRA_ACCESS_TOKEN:
-    client = JIRA(jira_options, token_auth=JIRA_ACCESS_TOKEN)
-    try:
-        # Eagerly check if the token is valid to align with the behavior of 
username/password
-        # authn
-        client.current_user()
-        jira_client = client
-    except Exception as e:
-        if e.__class__.__name__ == "JIRAError" and getattr(e, "status_code", 
None) == 401:
-            msg = (
-                "ASF JIRA could not authenticate with the invalid or expired 
token '%s'"
-                % JIRA_ACCESS_TOKEN
-            )
-            sys.exit(msg)
-        else:
-            raise e
-elif JIRA_USERNAME and JIRA_PASSWORD:
-    print("You can use JIRA_ACCESS_TOKEN instead of 
JIRA_USERNAME/JIRA_PASSWORD.")
-    print("Visit https://issues.apache.org/jira/secure/ViewProfile.jspa ")
-    print("and click 'Personal Access Tokens' menu to manage your own tokens.")
-    jira_client = JIRA(jira_options, basic_auth=(JIRA_USERNAME, JIRA_PASSWORD))
-else:
-    sys.exit("JIRA_ACCESS_TOKEN must be set.")
-
-
-# Write new contributors list to <old_file_name>.final
-if not os.path.isfile(contributors_file_name):
-    print("Contributors file %s does not exist!" % contributors_file_name)
-    print("Have you run ./generate-contributors.py yet?")
-    sys.exit(1)
-contributors_file = open(contributors_file_name, "r")
-warnings = []
-
-# In non-interactive mode, this script will choose the first replacement that 
is valid
-INTERACTIVE_MODE = True
-if len(sys.argv) > 1:
-    options = set(sys.argv[1:])
-    if "--non-interactive" in options:
-        INTERACTIVE_MODE = False
-if INTERACTIVE_MODE:
-    print("Running in interactive mode. To disable this, provide the 
--non-interactive flag.")
-
-# Setup GitHub client
-github_client = Github(GITHUB_OAUTH_KEY)
-
-# Load known author translations that are cached locally
-known_translations = {}
-known_translations_file_name = "known_translations"
-known_translations_file = open(known_translations_file_name, "r")
-for line in known_translations_file:
-    if line.startswith("#"):
-        continue
-    [old_name, new_name] = line.strip("\n").split(" - ")
-    known_translations[old_name] = new_name
-known_translations_file.close()
-
-# Open again in case the user adds new mappings
-known_translations_file = open(known_translations_file_name, "a")
-
-# Generate candidates for the given author. This should only be called if the 
given author
-# name does not represent a full name as this operation is somewhat expensive. 
Under the
-# hood, it makes several calls to the GitHub and JIRA API servers to find the 
candidates.
-#
-# This returns a list of (candidate name, source) 2-tuples. E.g.
-# [
-#   (NOT_FOUND, "No full name found for GitHub user andrewor14"),
-#   ("Andrew Or", "Full name of JIRA user andrewor14"),
-#   ("Andrew Orso", "Full name of SPARK-1444 assignee andrewor14"),
-#   ("Andrew Ordall", "Full name of SPARK-1663 assignee andrewor14"),
-#   (NOT_FOUND, "No assignee found for SPARK-1763")
-# ]
-NOT_FOUND = "Not found"
-
-
-def generate_candidates(author, issues):
-    candidates = []
-    # First check for full name of GitHub user
-    github_name = get_github_name(author, github_client)
-    if github_name:
-        candidates.append((github_name, "Full name of GitHub user %s" % 
author))
-    else:
-        candidates.append((NOT_FOUND, "No full name found for GitHub user %s" 
% author))
-    # Then do the same for JIRA user
-    jira_name = get_jira_name(author, jira_client)
-    if jira_name:
-        candidates.append((jira_name, "Full name of JIRA user %s" % author))
-    else:
-        candidates.append((NOT_FOUND, "No full name found for JIRA user %s" % 
author))
-    # Then do the same for the assignee of each of the associated JIRAs
-    # Note that a given issue may not have an assignee, or the assignee may 
not have a full name
-    for issue in issues:
-        try:
-            jira_issue = jira_client.issue(issue)
-        except JIRAError as e:
-            # Do not exit just because an issue is not found!
-            if e.status_code == 404:
-                warnings.append("Issue %s not found!" % issue)
-                continue
-            raise e
-        jira_assignee = jira_issue.fields.assignee
-        if jira_assignee:
-            user_name = jira_assignee.name
-            display_name = jira_assignee.displayName
-            if display_name:
-                candidates.append(
-                    (display_name, "Full name of %s assignee %s" % (issue, 
user_name))
-                )
-            else:
-                candidates.append(
-                    (NOT_FOUND, "No full name found for %s assignee %s" % 
(issue, user_name))
-                )
-        else:
-            candidates.append((NOT_FOUND, "No assignee found for %s" % issue))
-    for i, (candidate, source) in enumerate(candidates):
-        candidate = candidate.strip()
-        candidates[i] = (candidate, source)
-    return candidates
-
-
-# Translate each invalid author by searching for possible candidates from 
GitHub and JIRA
-# In interactive mode, this script presents the user with a list of choices 
and have the user
-# select from this list. Additionally, the user may also choose to enter a 
custom name.
-# In non-interactive mode, this script picks the first valid author name from 
the candidates
-# If no such name exists, the original name is used (without the JIRA numbers).
-print("\n========================== Translating contributor list 
==========================")
-lines = contributors_file.readlines()
-contributions = []
-for i, line in enumerate(lines):
-    # It is possible that a line in the contributor file only has the github 
name, e.g. yhuai.
-    # So, we need a strip() to remove the newline.
-    temp_author = line.strip(" * ").split(" -- ")[0].strip()
-    print("Processing author %s (%d/%d)" % (temp_author, i + 1, len(lines)))
-    if not temp_author:
-        error_msg = '    ERROR: Expected the following format " * <author> -- 
<contributions>"\n'
-        error_msg += "    ERROR: Actual = %s" % line
-        print(error_msg)
-        warnings.append(error_msg)
-        contributions.append(line)
-        continue
-    author = temp_author.split("/")[0]
-    # Use the local copy of known translations where possible
-    if author in known_translations:
-        line = line.replace(temp_author, known_translations[author])
-    elif not is_valid_author(author):
-        new_author = author
-        issues = temp_author.split("/")[1:]
-        candidates = generate_candidates(author, issues)
-        # Print out potential replacement candidates along with the sources, 
e.g.
-        #   [X] No full name found for GitHub user andrewor14
-        #   [X] No assignee found for SPARK-1763
-        #   [0] Andrew Or - Full name of JIRA user andrewor14
-        #   [1] Andrew Orso - Full name of SPARK-1444 assignee andrewor14
-        #   [2] Andrew Ordall - Full name of SPARK-1663 assignee andrewor14
-        #   [3] andrewor14 - Raw GitHub username
-        #   [4] Custom
-        candidate_names = []
-        bad_prompts = []  # Prompts that can't actually be selected; print 
these first.
-        good_prompts = []  # Prompts that contain valid choices
-        for candidate, source in candidates:
-            if candidate == NOT_FOUND:
-                bad_prompts.append("    [X] %s" % source)
-            else:
-                index = len(candidate_names)
-                candidate_names.append(candidate)
-                good_prompts.append("    [%d] %s - %s" % (index, candidate, 
source))
-        raw_index = len(candidate_names)
-        custom_index = len(candidate_names) + 1
-        for p in bad_prompts:
-            print(p)
-        if bad_prompts:
-            print("    ---")
-        for p in good_prompts:
-            print(p)
-        # In interactive mode, additionally provide "custom" option and await 
user response
-        if INTERACTIVE_MODE:
-            print("    [%d] %s - Raw GitHub username" % (raw_index, author))
-            print("    [%d] Custom" % custom_index)
-            response = input("    Your choice: ")
-            last_index = custom_index
-            while not response.isdigit() or int(response) > last_index:
-                response = input("    Please enter an integer between 0 and 
%d: " % last_index)
-            response = int(response)
-            if response == custom_index:
-                new_author = input("    Please type a custom name for this 
author: ")
-            elif response != raw_index:
-                new_author = candidate_names[response]
-        # In non-interactive mode, just pick the first candidate
-        else:
-            valid_candidate_names = [
-                name for name, _ in candidates if is_valid_author(name) and 
name != NOT_FOUND
-            ]
-            if valid_candidate_names:
-                new_author = valid_candidate_names[0]
-        # Finally, capitalize the author and replace the original one with it
-        # If the final replacement is still invalid, log a warning
-        if is_valid_author(new_author):
-            new_author = capitalize_author(new_author)
-        else:
-            warnings.append("Unable to find a valid name %s for author %s" % 
(author, temp_author))
-        print("    * Replacing %s with %s" % (author, new_author))
-        # If we are in interactive mode, prompt the user whether we want to 
remember this new
-        # mapping
-        if (
-            INTERACTIVE_MODE
-            and author not in known_translations
-            and yesOrNoPrompt(
-                "    Add mapping %s -> %s to known translations file?" % 
(author, new_author)
-            )
-        ):
-            known_translations_file.write("%s - %s\n" % (author, new_author))
-            known_translations_file.flush()
-        line = line.replace(temp_author, author)
-    contributions.append(line)
-print("==================================================================================\n")
-contributors_file.close()
-known_translations_file.close()
-
-# Sort the contributions before writing them to the new file.
-# Additionally, check if there are any duplicate author rows.
-# This could happen if the same user has both a valid full
-# name (e.g. Andrew Or) and an invalid one (andrewor14).
-# If so, warn the user about this at the end.
-contributions.sort()
-all_authors = set()
-new_contributors_file_name = contributors_file_name + ".final"
-new_contributors_file = open(new_contributors_file_name, "w")
-for line in contributions:
-    author = line.strip(" * ").split(" -- ")[0]
-    if author in all_authors:
-        warnings.append("Detected duplicate author name %s. Please merge these 
manually." % author)
-    all_authors.add(author)
-    new_contributors_file.write(line)
-new_contributors_file.close()
-
-print("Translated contributors list successfully written to %s!" % 
new_contributors_file_name)
-
-# Log any warnings encountered in the process
-if warnings:
-    print("\n========== Warnings encountered while translating the contributor 
list ===========")
-    for w in warnings:
-        print(w)
-    print(
-        "Please manually correct these in the final contributors list at %s."
-        % new_contributors_file_name
-    )
-    
print("==================================================================================\n")


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to