This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow-site.git
The following commit(s) were added to refs/heads/main by this push:
new dfc8ce5655 Parallelize user image downloads in CI (#1449)
dfc8ce5655 is described below
commit dfc8ce56553ccdbaadbfeff9836b6e9ff16b1d3a
Author: Jarek Potiuk <[email protected]>
AuthorDate: Wed Mar 11 13:24:02 2026 +0100
Parallelize user image downloads in CI (#1449)
Use ThreadPoolExecutor with 20 workers to download PMC/committer
profile images concurrently instead of sequentially.
Co-authored-by: Claude Opus 4.6 <[email protected]>
---
.github/scripts/refresh_pmc_committer_images.py | 40 ++++++++++++++++++-------
1 file changed, 29 insertions(+), 11 deletions(-)
diff --git a/.github/scripts/refresh_pmc_committer_images.py
b/.github/scripts/refresh_pmc_committer_images.py
index d9c94c5846..1b99130a93 100644
--- a/.github/scripts/refresh_pmc_committer_images.py
+++ b/.github/scripts/refresh_pmc_committer_images.py
@@ -25,6 +25,7 @@
# ///
import os
+from concurrent.futures import ThreadPoolExecutor, as_completed
from pathlib import Path
import requests
@@ -34,21 +35,38 @@ from rich.console import Console
console = Console(width=400, color_system="standard")
AIRFLOW_SOURCES_ROOT = Path(__file__).parents[2]
+PROFILES_DIR = AIRFLOW_SOURCES_ROOT /
"landing-pages/site/static/external/profiles"
-def refresh_committer_pmc_images(file_path: Path):
- with open(file_path) as f:
- pmc_committer_data = json.load(f)
- for data in pmc_committer_data:
- github_url = data.get("github")
- username = github_url.split("/")[-1]
- image_url = github_url+'.png'
- console.print(f"Downloading image for: [magenta]{data.get("name")}[/]
from {image_url}")
+MAX_WORKERS = 20
+
+
+def download_image(data: dict) -> str | None:
+ github_url = data.get("github")
+ username = github_url.split("/")[-1]
+ image_url = github_url + ".png"
+ name = data.get("name")
+ try:
response = requests.get(image_url)
if response.status_code == 200:
- with
open(f'{AIRFLOW_SOURCES_ROOT}/landing-pages/site/static/external/profiles/{username}.png',
'wb') as f:
- f.write(response.content)
+ (PROFILES_DIR / f"{username}.png").write_bytes(response.content)
+ return None
else:
- print(f"Failed to download image for {data.get('name')}")
+ return f"Failed to download image for {name} (HTTP
{response.status_code})"
+ except Exception as e:
+ return f"Failed to download image for {name}: {e}"
+
+
+def refresh_committer_pmc_images(file_path: Path):
+ with open(file_path) as f:
+ pmc_committer_data = json.load(f)
+ console.print(f"Downloading [cyan]{len(pmc_committer_data)}[/] images from
[magenta]{file_path.name}[/] ({MAX_WORKERS} parallel workers)")
+ with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
+ futures = {executor.submit(download_image, data): data for data in
pmc_committer_data}
+ for future in as_completed(futures):
+ error = future.result()
+ if error:
+ console.print(f"[red]{error}[/]")
+ console.print(f"[green]Done with {file_path.name}[/]")
if __name__ == "__main__":