This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch better-machine-for-arm-builds in repository https://gitbox.apache.org/repos/asf/airflow.git
commit fc3b62bb9e43778cb691cf0604110b592c23624b Author: Jarek Potiuk <[email protected]> AuthorDate: Sat Jul 2 22:08:49 2022 +0200 Run ARM building way faster Since we are now building ARM images in parallel, We need more powerful machines and we implemented in-memory docker, similarly as in our AMD instances. The m6g.2xlarge are quite a bit better than c6g.xlarge for our case: 1) They have 8 vCPUs 2) They have 32 GB memory (should be enough to build 4 ARM CI images 3) Thye are Memory-optimised, and since docker is build in memory the memory speed is the most important factor This also allows to switch building all images (including cache) in parallel - so that we can have 1 job instead of 4 - similarly as we have in case of regular AMD builds. Another advantage of it is that we loose far less time of the AMD instance which "triggers" the docker build, because this instance will control 4 parallel builds at a time effectively, which will decrease a lot of overhead connected with running the instance mostly idle during the build (and since the builds will be generally faster, the overhead will be even smaller). --- .github/workflows/build-images.yml | 5 ++-- .github/workflows/ci.yml | 29 ++++++++++------------ .../ci_start_arm_instance_and_connect_to_docker.sh | 8 +++--- .../ci/images/{self_terminate.sh => initialize.sh} | 15 +++++++++-- setup.py | 2 ++ 5 files changed, 34 insertions(+), 25 deletions(-) diff --git a/.github/workflows/build-images.yml b/.github/workflows/build-images.yml index 03e156dacf..638118727b 100644 --- a/.github/workflows/build-images.yml +++ b/.github/workflows/build-images.yml @@ -370,7 +370,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" build-ci-images-arm: - timeout-minutes: 120 + timeout-minutes: 50 name: "Build ARM CI images ${{ needs.build-info.outputs.all-python-versions-list-as-string }}" runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: [build-info, build-prod-images] @@ -426,8 +426,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" Build ARM CI images ${{ env.IMAGE_TAG_FOR_THE_BUILD }} ${{ needs.build-info.outputs.all-python-versions-list-as-string }} run: > - breeze build-image --run-in-parallel --parallelism 1 - --builder airflow_cache --platform "linux/arm64" + breeze build-image --run-in-parallel --builder airflow_cache --platform "linux/arm64" env: UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }} diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 5cfe017ff7..ac1fb07688 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1677,7 +1677,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" push-buildx-cache-to-github-registry: permissions: packages: write - timeout-minutes: 120 + timeout-minutes: 50 name: "Push Image Cache" runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: @@ -1688,7 +1688,6 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" strategy: fail-fast: false matrix: - python-version: ${{ fromJson(needs.build-info.outputs.python-versions) }} platform: ["linux/amd64", "linux/arm64"] env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on) }} @@ -1738,6 +1737,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" breeze build-image --builder airflow_cache --prepare-buildx-cache + --run-in-parallel --force-build --platform ${{ matrix.platform }} env: @@ -1763,9 +1763,9 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" if: always() build-ci-arm-images: - timeout-minutes: 120 + timeout-minutes: 50 name: > - ${{needs.build-info.outputs.build-job-description}} CI ARM images + Build CI ARM images ${{ needs.build-info.outputs.all-python-versions-list-as-string }} runs-on: ${{ fromJson(needs.build-info.outputs.runs-on) }} needs: @@ -1782,45 +1782,42 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" DEFAULT_CONSTRAINTS_BRANCH: ${{ needs.build-info.outputs.default-constraints-branch }} DEBIAN_VERSION: ${{ needs.build-info.outputs.debian-version }} RUNS_ON: ${{ fromJson(needs.build-info.outputs.runs-on)[0] }} - if: needs.build-info.outputs.upgrade-to-newer-dependencies != 'false' + if: > + needs.build-info.outputs.upgrade-to-newer-dependencies != 'false' && + needs.build-info.outputs.in-workflow-build == 'true' steps: - name: Cleanup repo run: docker run -v "${GITHUB_WORKSPACE}:/workspace" -u 0:0 bash -c "rm -rf /workspace/*" - if: needs.build-info.outputs.in-workflow-build == 'true' - uses: actions/checkout@v2 with: ref: ${{ needs.build-info.outputs.targetCommitSha }} persist-credentials: false submodules: recursive - if: needs.build-info.outputs.in-workflow-build == 'true' - name: "Setup python" uses: actions/setup-python@v2 with: python-version: ${{ needs.build-info.outputs.default-python-version }} - if: needs.build-info.outputs.in-workflow-build == 'true' - run: ./scripts/ci/install_breeze.sh - if: needs.build-info.outputs.in-workflow-build == 'true' - name: "Free space" run: breeze free-space - if: needs.build-info.outputs.in-workflow-build == 'true' - name: "Start ARM instance" run: ./scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh - if: needs.build-info.outputs.in-workflow-build == 'true' - name: > Build CI ARM images ${{ env.IMAGE_TAG_FOR_THE_BUILD }} ${{ needs.build-info.outputs.all-python-versions-list-as-string }} run: > - breeze build-image --run-in-parallel --parallelism 1 - --builder airflow_cache --platform "linux/arm64" + breeze build-image + --run-in-parallel + --builder airflow_cache + --platform "linux/arm64" env: UPGRADE_TO_NEWER_DEPENDENCIES: ${{ needs.build-info.outputs.upgrade-to-newer-dependencies }} DOCKER_CACHE: ${{ needs.build-info.outputs.cache-directive }} IMAGE_TAG: ${{ env.IMAGE_TAG_FOR_THE_BUILD }} PYTHON_VERSIONS: ${{ needs.build-info.outputs.all-python-versions-list-as-string }} - if: needs.build-info.outputs.in-workflow-build == 'true' - name: "Stop ARM instance" run: ./scripts/ci/images/ci_stop_arm_instance.sh - if: always() && needs.build-info.outputs.in-workflow-build == 'true' + if: always() - name: "Fix ownership" run: breeze fix-ownership - if: always() && needs.build-info.outputs.in-workflow-build == 'true' + if: always() diff --git a/scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh b/scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh index a3fbf6b5ba..5cf486e611 100755 --- a/scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh +++ b/scripts/ci/images/ci_start_arm_instance_and_connect_to_docker.sh @@ -22,12 +22,12 @@ SCRIPTS_DIR="$( cd "$( dirname "${BASH_SOURCE[0]}" )" && pwd)" # This is an AMI that is based on Basic Amazon Linux AMI with installed and configured docker service WORKING_DIR="/tmp/armdocker" INSTANCE_INFO="${WORKING_DIR}/instance_info.json" -ARM_AMI="ami-06b8158ea372d3259" -INSTANCE_TYPE="c6g.xlarge" -MARKET_OPTIONS="MarketType=spot,SpotOptions={MaxPrice=0.1,SpotInstanceType=one-time}" +ARM_AMI="ami-0e43196369d299715" # AMI ID of latest arm-docker-ami-v* +INSTANCE_TYPE="m6g.2xlarge" # m6g.2xlarge -> 8 vCPUS 32 GB RAM +MARKET_OPTIONS="MarketType=spot,SpotOptions={MaxPrice=0.2,SpotInstanceType=one-time}" REGION="us-east-2" EC2_USER="ec2-user" -USER_DATA_FILE="${SCRIPTS_DIR}/self_terminate.sh" +USER_DATA_FILE="${SCRIPTS_DIR}/initialize.sh" METADATA_ADDRESS="http://169.254.169.254/latest/meta-data" MAC_ADDRESS=$(curl -s "${METADATA_ADDRESS}/network/interfaces/macs/" | head -n1 | tr -d '/') CIDR=$(curl -s "${METADATA_ADDRESS}/network/interfaces/macs/${MAC_ADDRESS}/vpc-ipv4-cidr-block/") diff --git a/scripts/ci/images/self_terminate.sh b/scripts/ci/images/initialize.sh similarity index 67% rename from scripts/ci/images/self_terminate.sh rename to scripts/ci/images/initialize.sh index 024f106d68..e71087bba8 100755 --- a/scripts/ci/images/self_terminate.sh +++ b/scripts/ci/images/initialize.sh @@ -16,7 +16,18 @@ # specific language governing permissions and limitations # under the License. -# This instance will run for maximum 100 minutes and +# We are mounting /var/lib/docker and /tmp as tmpfs in order +# to gain speed when building the images The docker storage +# is ephemeral anyway and will be removed when instance stops + +sudo service docker stop || true + +sudo mount -t tmpfs -o size=10% tmpfs /tmp +sudo mount -t tmpfs -o size=66% tmpfs /var/lib/docker + +sudo service docker start + +# This instance will run for maximum 40 minutes and # It will terminate itself after that (it can also # be terminated immediately when the job finishes) -echo "sudo shutdown -h now" | at now +100 min +echo "sudo shutdown -h now" | at now +40 min diff --git a/setup.py b/setup.py index 991b6b0617..02259c8e26 100644 --- a/setup.py +++ b/setup.py @@ -399,6 +399,8 @@ devel_only = [ 'yamllint', ] +# just to test + def get_provider_dependencies(provider_name: str) -> List[str]: return PROVIDER_DEPENDENCIES[provider_name][DEPS]
