This is an automated email from the ASF dual-hosted git repository.

potiuk pushed a commit to branch parallelize-image-downloads
in repository https://gitbox.apache.org/repos/asf/airflow-site.git

commit 2defbba0bcd17bc414875d81548cd4c7ce18dfad
Author: Jarek Potiuk <[email protected]>
AuthorDate: Wed Mar 11 13:11:46 2026 +0100

    Parallelize user image downloads in CI
    
    Use ThreadPoolExecutor with 20 workers to download PMC/committer
    profile images concurrently instead of sequentially.
    
    Co-Authored-By: Claude Opus 4.6 <[email protected]>
---
 .github/scripts/refresh_pmc_committer_images.py | 40 ++++++++++++++++++-------
 1 file changed, 29 insertions(+), 11 deletions(-)

diff --git a/.github/scripts/refresh_pmc_committer_images.py 
b/.github/scripts/refresh_pmc_committer_images.py
index d9c94c5846..1b99130a93 100644
--- a/.github/scripts/refresh_pmc_committer_images.py
+++ b/.github/scripts/refresh_pmc_committer_images.py
@@ -25,6 +25,7 @@
 # ///
 
 import os
+from concurrent.futures import ThreadPoolExecutor, as_completed
 from pathlib import Path
 
 import requests
@@ -34,21 +35,38 @@ from rich.console import Console
 console = Console(width=400, color_system="standard")
 
 AIRFLOW_SOURCES_ROOT = Path(__file__).parents[2]
+PROFILES_DIR = AIRFLOW_SOURCES_ROOT / 
"landing-pages/site/static/external/profiles"
 
-def refresh_committer_pmc_images(file_path: Path):
-    with open(file_path) as f:
-        pmc_committer_data = json.load(f)
-    for data in pmc_committer_data:
-        github_url = data.get("github")
-        username = github_url.split("/")[-1]
-        image_url = github_url+'.png'
-        console.print(f"Downloading image for: [magenta]{data.get("name")}[/] 
from {image_url}")
+MAX_WORKERS = 20
+
+
+def download_image(data: dict) -> str | None:
+    github_url = data.get("github")
+    username = github_url.split("/")[-1]
+    image_url = github_url + ".png"
+    name = data.get("name")
+    try:
         response = requests.get(image_url)
         if response.status_code == 200:
-            with 
open(f'{AIRFLOW_SOURCES_ROOT}/landing-pages/site/static/external/profiles/{username}.png',
 'wb') as f:
-                f.write(response.content)
+            (PROFILES_DIR / f"{username}.png").write_bytes(response.content)
+            return None
         else:
-            print(f"Failed to download image for {data.get('name')}")
+            return f"Failed to download image for {name} (HTTP 
{response.status_code})"
+    except Exception as e:
+        return f"Failed to download image for {name}: {e}"
+
+
+def refresh_committer_pmc_images(file_path: Path):
+    with open(file_path) as f:
+        pmc_committer_data = json.load(f)
+    console.print(f"Downloading [cyan]{len(pmc_committer_data)}[/] images from 
[magenta]{file_path.name}[/] ({MAX_WORKERS} parallel workers)")
+    with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor:
+        futures = {executor.submit(download_image, data): data for data in 
pmc_committer_data}
+        for future in as_completed(futures):
+            error = future.result()
+            if error:
+                console.print(f"[red]{error}[/]")
+    console.print(f"[green]Done with {file_path.name}[/]")
 
 
 if __name__ == "__main__":

Reply via email to