amoghrajesh commented on code in PR #62261: URL: https://github.com/apache/airflow/pull/62261#discussion_r2839256359
########## .github/workflows/registry-build.yml: ########## @@ -0,0 +1,232 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +--- +name: Build & Publish Registry +on: # yamllint disable-line rule:truthy + workflow_dispatch: + inputs: + destination: + description: "Publish to live or staging S3 bucket" + required: true + type: choice + options: + - staging + - live + default: staging + provider: + description: "Provider ID(s) for incremental build (space-separated, empty = full build)" + required: false + type: string + default: "" + workflow_call: + inputs: + destination: + description: "Publish to live or staging S3 bucket" + required: false + type: string + default: staging + provider: + description: "Provider ID(s) for incremental build (space-separated, empty = full build)" + required: false + type: string + default: "" + secrets: + DOCS_AWS_ACCESS_KEY_ID: + required: true + DOCS_AWS_SECRET_ACCESS_KEY: + required: true + +permissions: + contents: read + +jobs: + build-and-publish-registry: + timeout-minutes: 30 + name: "Build & Publish Registry" + runs-on: ubuntu-latest + permissions: + contents: read + id-token: write + if: > + github.event_name == 'workflow_call' || + contains(fromJSON('[ + "ashb", + "bugraoz93", + "eladkal", + "ephraimbuddy", + "jedcunningham", + "jscheffl", + "kaxil", + "pierrejeambrun", + "shahar1", + "potiuk", + "utkarsharma2", + "vincbeck" + ]'), github.event.sender.login) + steps: + - name: "Checkout repository" + uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683 # v4.2.2 + with: + persist-credentials: false + + # --- Breeze setup --- + # All three extraction scripts run inside breeze so that + # extract_parameters.py and extract_connections.py can import provider + # classes at runtime. extract_metadata.py also runs in breeze for + # consistency — it writes to dev/registry/ (mounted) so the other two + # scripts can read providers.json / modules.json from there. + - name: "Install Breeze" + uses: ./.github/actions/breeze + with: + python-version: "3.12" + + - name: "Build CI image" + # Fallback to raw docker buildx when breeze cache is stale — same + # pattern as publish-docs-to-s3.yml. + run: > + breeze ci-image build --python 3.12 || + docker buildx build --load --builder default --progress=auto --pull + --build-arg AIRFLOW_EXTRAS=devel-ci --build-arg AIRFLOW_PRE_CACHED_PIP_PACKAGES=false + --build-arg AIRFLOW_USE_UV=true --build-arg BUILD_PROGRESS=auto + --build-arg INSTALL_MYSQL_CLIENT_TYPE=mariadb + --build-arg VERSION_SUFFIX_FOR_PYPI=dev0 + -t ghcr.io/apache/airflow/main/ci/python3.12:latest --target main . + -f Dockerfile.ci --platform linux/amd64 + + - name: "Install AWS CLI v2" + run: | + curl "https://awscli.amazonaws.com/awscli-exe-linux-x86_64.zip" -o /tmp/awscliv2.zip + unzip -q /tmp/awscliv2.zip -d /tmp + rm /tmp/awscliv2.zip + sudo /tmp/aws/install --update + rm -rf /tmp/aws/ + + - name: "Configure AWS credentials" + uses: aws-actions/configure-aws-credentials@010d0da01d0b5a38af31e9c3470dbfdabdecca3a # v4.0.1 + with: + aws-access-key-id: ${{ secrets.DOCS_AWS_ACCESS_KEY_ID }} + aws-secret-access-key: ${{ secrets.DOCS_AWS_SECRET_ACCESS_KEY }} + aws-region: us-east-2 + + - name: "Determine S3 destination" + id: destination + env: + DESTINATION: ${{ inputs.destination || 'staging' }} + run: | + if [[ "${DESTINATION}" == "live" ]]; then + echo "bucket=s3://live-docs-airflow-apache-org/registry/" >> "${GITHUB_OUTPUT}" + else + echo "bucket=s3://staging-docs-airflow-apache-org/registry/" >> "${GITHUB_OUTPUT}" + fi + echo "Publishing registry to ${DESTINATION}" + + # --- Incremental: download existing data from S3 --- + - name: "Download existing registry data from S3" + id: download-existing + if: inputs.provider != '' + env: + S3_BUCKET: ${{ steps.destination.outputs.bucket }} + run: | + mkdir -p /tmp/existing-registry + PROVIDERS_URL="${S3_BUCKET}api/providers.json" + MODULES_URL="${S3_BUCKET}api/modules.json" + if aws s3 cp "${PROVIDERS_URL}" /tmp/existing-registry/providers.json 2>/dev/null; then + echo "found=true" >> "${GITHUB_OUTPUT}" + aws s3 cp "${MODULES_URL}" /tmp/existing-registry/modules.json || true + else + echo "found=false" >> "${GITHUB_OUTPUT}" + echo "No existing registry data on S3" + fi + + # --- Extract provider metadata --- + - name: "Extract registry data (breeze)" + env: + PROVIDER: ${{ inputs.provider }} + run: | + if [[ -n "${PROVIDER}" ]]; then + breeze registry extract-data --python 3.12 --provider "${PROVIDER}" + else + breeze registry extract-data --python 3.12 + fi + + # --- Incremental: merge new data with existing --- + - name: "Merge with existing registry data" + if: inputs.provider != '' && steps.download-existing.outputs.found == 'true' + run: | + uv run dev/registry/merge_registry_data.py \ + --existing-providers /tmp/existing-registry/providers.json \ + --existing-modules /tmp/existing-registry/modules.json \ + --new-providers dev/registry/providers.json \ + --new-modules dev/registry/modules.json \ + --output dev/registry/ Review Comment: Couple of magic strings in here, used across stages, can we extract them in `env`? ########## registry/README.md: ########## @@ -0,0 +1,470 @@ +<!-- Review Comment: Good catch, I think inline should be good since its just one dependency ########## registry/src/api/modules-json.njk: ########## @@ -0,0 +1,5 @@ +--- +permalink: "/api/modules.json" +eleventyExcludeFromCollections: true +--- Review Comment: Interesting to use nunjucks here, is it because of better support for JS compared to jinja rendering or something else? ########## .github/boring-cyborg.yml: ########## @@ -557,6 +557,14 @@ labelPRBasedOnFilePath: area:kubernetes-tests: - kubernetes-tests/**/* + area:registry: Review Comment: ```suggestion area:providers-registry: ``` to be explicit? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
