This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch parallelize-image-downloads in repository https://gitbox.apache.org/repos/asf/airflow-site.git
commit 2defbba0bcd17bc414875d81548cd4c7ce18dfad Author: Jarek Potiuk <[email protected]> AuthorDate: Wed Mar 11 13:11:46 2026 +0100 Parallelize user image downloads in CI Use ThreadPoolExecutor with 20 workers to download PMC/committer profile images concurrently instead of sequentially. Co-Authored-By: Claude Opus 4.6 <[email protected]> --- .github/scripts/refresh_pmc_committer_images.py | 40 ++++++++++++++++++------- 1 file changed, 29 insertions(+), 11 deletions(-) diff --git a/.github/scripts/refresh_pmc_committer_images.py b/.github/scripts/refresh_pmc_committer_images.py index d9c94c5846..1b99130a93 100644 --- a/.github/scripts/refresh_pmc_committer_images.py +++ b/.github/scripts/refresh_pmc_committer_images.py @@ -25,6 +25,7 @@ # /// import os +from concurrent.futures import ThreadPoolExecutor, as_completed from pathlib import Path import requests @@ -34,21 +35,38 @@ from rich.console import Console console = Console(width=400, color_system="standard") AIRFLOW_SOURCES_ROOT = Path(__file__).parents[2] +PROFILES_DIR = AIRFLOW_SOURCES_ROOT / "landing-pages/site/static/external/profiles" -def refresh_committer_pmc_images(file_path: Path): - with open(file_path) as f: - pmc_committer_data = json.load(f) - for data in pmc_committer_data: - github_url = data.get("github") - username = github_url.split("/")[-1] - image_url = github_url+'.png' - console.print(f"Downloading image for: [magenta]{data.get("name")}[/] from {image_url}") +MAX_WORKERS = 20 + + +def download_image(data: dict) -> str | None: + github_url = data.get("github") + username = github_url.split("/")[-1] + image_url = github_url + ".png" + name = data.get("name") + try: response = requests.get(image_url) if response.status_code == 200: - with open(f'{AIRFLOW_SOURCES_ROOT}/landing-pages/site/static/external/profiles/{username}.png', 'wb') as f: - f.write(response.content) + (PROFILES_DIR / f"{username}.png").write_bytes(response.content) + return None else: - print(f"Failed to download image for {data.get('name')}") + return f"Failed to download image for {name} (HTTP {response.status_code})" + except Exception as e: + return f"Failed to download image for {name}: {e}" + + +def refresh_committer_pmc_images(file_path: Path): + with open(file_path) as f: + pmc_committer_data = json.load(f) + console.print(f"Downloading [cyan]{len(pmc_committer_data)}[/] images from [magenta]{file_path.name}[/] ({MAX_WORKERS} parallel workers)") + with ThreadPoolExecutor(max_workers=MAX_WORKERS) as executor: + futures = {executor.submit(download_image, data): data for data in pmc_committer_data} + for future in as_completed(futures): + error = future.result() + if error: + console.print(f"[red]{error}[/]") + console.print(f"[green]Done with {file_path.name}[/]") if __name__ == "__main__":
