This is an automated email from the ASF dual-hosted git repository. potiuk pushed a commit to branch v2-0-test in repository https://gitbox.apache.org/repos/asf/airflow.git
commit 144255fc683b6d862c8147c05e38f75bbe519035 Author: Jarek Potiuk <[email protected]> AuthorDate: Tue Apr 6 04:08:11 2021 +0200 Constraints are now parallelized and merged in single job (#15211) Originally, the constraints were generated in separate jobs and uploaded as artifacts and then joined be a separate push job. Thanks to parallel processing, we can now do that all in a single job, with both cost and time savings. (cherry picked from commit aebacd74058d01cfecaf913c04c0dbc50bb188ea) --- .github/workflows/ci.yml | 64 ++++++---------------- BREEZE.rst | 39 ++++++------- CONTRIBUTING.rst | 19 +++++-- scripts/ci/constraints/ci_commit_constraints.sh | 2 +- .../ci_generate_all_constraints.sh} | 13 ++++- scripts/ci/constraints/ci_generate_constraints.sh | 8 +++ .../images/ci_wait_for_and_verify_all_ci_images.sh | 5 +- .../ci_wait_for_and_verify_all_prod_images.sh | 4 +- scripts/ci/libraries/_parallel.sh | 13 +++++ 9 files changed, 89 insertions(+), 78 deletions(-) diff --git a/.github/workflows/ci.yml b/.github/workflows/ci.yml index 86bc960..21a5429 100644 --- a/.github/workflows/ci.yml +++ b/.github/workflows/ci.yml @@ -1114,17 +1114,21 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" timeout-minutes: 10 name: "Constraints" runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - strategy: - matrix: - python-version: ${{ fromJson(needs.build-info.outputs.pythonVersions) }} - fail-fast: false needs: - build-info - ci-images + - prod-images + - static-checks + - static-checks-pylint + - tests-sqlite + - tests-mysql + - tests-postgres + - tests-kubernetes env: RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} PYTHON_MAJOR_MINOR_VERSION: ${{ matrix.python-version }} GITHUB_REGISTRY: ${{ needs.ci-images.outputs.githubRegistry }} + CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING: ${{needs.build-info.outputs.pythonVersionsListAsString}} # Only run it for direct pushes if: > github.ref == 'refs/heads/master' || github.ref == 'refs/heads/v1-10-test' || @@ -1140,54 +1144,22 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" python-version: ${{ env.PYTHON_MAJOR_MINOR_VERSION }} - name: "Free space" run: ./scripts/ci/tools/ci_free_space_on_ci.sh - - name: "Prepare CI image ${{env.PYTHON_MAJOR_MINOR_VERSION}}:${{ github.sha }}" - run: ./scripts/ci/images/ci_prepare_ci_image_on_ci.sh + - name: > + Wait for CI images + ${{ needs.build-info.outputs.pythonVersions }}:${{ env.GITHUB_REGISTRY_PULL_IMAGE_TAG }} + run: ./scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh - name: "Generate constraints with PyPI providers" - run: ./scripts/ci/constraints/ci_generate_constraints.sh + run: ./scripts/ci/constraints/ci_generate_all_constraints.sh env: GENERATE_CONSTRAINTS_MODE: "pypi-providers" - name: "Generate constraints with source providers" - run: ./scripts/ci/constraints/ci_generate_constraints.sh + run: ./scripts/ci/constraints/ci_generate_all_constraints.sh env: GENERATE_CONSTRAINTS_MODE: "source-providers" - name: "Generate constraints without providers" - run: ./scripts/ci/constraints/ci_generate_constraints.sh + run: ./scripts/ci/constraints/ci_generate_all_constraints.sh env: GENERATE_CONSTRAINTS_MODE: "no-providers" - - name: "Upload constraint artifacts" - uses: actions/upload-artifact@v2 - with: - name: 'constraints-${{matrix.python-version}}' - path: './files/constraints-${{matrix.python-version}}/constraints-*${{matrix.python-version}}.txt' - retention-days: 7 - - constraints-push: - timeout-minutes: 10 - name: "Constraints push" - runs-on: ${{ fromJson(needs.build-info.outputs.runsOn) }} - needs: - - build-info - - constraints - - ci-images - - prod-images - - static-checks - - static-checks-pylint - - tests-sqlite - - tests-mysql - - tests-postgres - - tests-kubernetes - # Only run it for direct pushes - if: > - github.ref == 'refs/heads/master' || github.ref == 'refs/heads/v1-10-test' || - github.ref == 'refs/heads/v2-0-test' - env: - RUNS_ON: ${{ fromJson(needs.build-info.outputs.runsOn) }} - steps: - - name: "Checkout ${{ github.ref }} ( ${{ github.sha }} )" - uses: actions/checkout@v2 - with: - persist-credentials: false - submodules: recursive - name: "Set constraints branch name" id: constraints-branch run: ./scripts/ci/constraints/ci_branch_constraints.sh @@ -1197,10 +1169,6 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" path: "repo" ref: ${{ steps.constraints-branch.outputs.branch }} persist-credentials: false - - name: "Get all artifacts (constraints)" - uses: actions/download-artifact@v2 - with: - path: 'artifacts' - name: "Commit changed constraint files for ${{needs.build-info.outputs.pythonVersions}}" run: ./scripts/ci/constraints/ci_commit_constraints.sh - name: "Push changes" @@ -1223,7 +1191,7 @@ ${{ hashFiles('.pre-commit-config.yaml') }}" - tests-postgres - tests-mysql - tests-kubernetes - - constraints-push + - constraints - prepare-test-provider-packages-wheel - prepare-test-provider-packages-sdist if: github.event_name == 'schedule' && github.repository == 'apache/airflow' diff --git a/BREEZE.rst b/BREEZE.rst index 2a8a74a..72633e8 100644 --- a/BREEZE.rst +++ b/BREEZE.rst @@ -809,38 +809,39 @@ Generating constraints Whenever setup.py gets modified, the CI master job will re-generate constraint files. Those constraint files are stored in separated orphan branches: ``constraints-master``, ``constraints-2-0`` -and ``constraints-1-10``. They are stored separately for each python version and there are separate -constraints for: +and ``constraints-1-10``. + +Those are constraint files as described in detail in the +`<CONTRIBUTING.rst#pinned-constraint-files>`_ contributing documentation. + +You can use ``./breeze generate-constraints`` command to manually generate constraints for a single python +version and single constraint mode like this: + +.. code-block:: bash + + ./breeze generate-constraints --generate-constraints-mode pypi-providers + + +Constraints are generated separately for each python version and there are separate constraints modes: * 'constraints' - those are constraints generated by matching the current airflow version from sources and providers that are installed from PyPI. Those are constraints used by the users who want to - install airflow with pip + install airflow with pip. Use ``pypi-providers`` mode for that. * "constraints-source-providers" - those are constraints generated by using providers installed from current sources. While adding new providers their dependencies might change, so this set of providers is the current set of the constraints for airflow and providers from the current master sources. - Those providers are used by CI system to keep "stable" set of constraints. + Those providers are used by CI system to keep "stable" set of constraints. Use + ``source-providers`` mode for that. * "constraints-no-providers" - those are constraints generated from only Apache Airflow, without any providers. If you want to manage airflow separately and then add providers individually, you can - use those. - -Those are constraint files as described in detail in the -`<CONTRIBUTING.rst#pinned-constraint-files>`_ contributing documentation. + use those. Use ``no-providers`` mode for that. In case someone modifies setup.py, the ``CRON`` scheduled CI build automatically upgrades and pushes changed to the constraint files, however you can also perform test run of this locally using -``generate-constraints`` command of Breeze. - -.. code-block:: bash - - for python_version in 3.6 3.7 3.8 - do - ./breeze generate-constraints --generate-constraints-mode source-providers --python ${python_version} - ./breeze generate-constraints --generate-constraints-mode pypi-providers --python ${python_version} - ./breeze generate-constraints --generate-constraints-mode no-providers --python ${python_version} - done - +the procedure described in `<CONTRIBUTING.rst#mnully-generating-constraint-files>`_ which utilises +multiple processors on your local machine to generate such constraints faster. This bumps the constraint files to latest versions and stores hash of setup.py. The generated constraint and setup.py hash files are stored in the ``files`` folder and while generating the constraints diff diff --git a/CONTRIBUTING.rst b/CONTRIBUTING.rst index 6673ddf..19c4077 100644 --- a/CONTRIBUTING.rst +++ b/CONTRIBUTING.rst @@ -871,19 +871,26 @@ Manually generating constraint files ------------------------------------ The constraint files are generated automatically by the CI job. Sometimes however it is needed to regenerate -them manually (committers only). For example when master build did not succeed for quite some time). This can be done by -running this: +them manually (committers only). For example when master build did not succeed for quite some time). +This can be done by running this (it utilizes parallel preparation of the constraints): .. code-block:: bash - for python_version in 3.6 3.7 3.8 + export CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING="3.6 3.7 3.8" + for python_version in $(echo "${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING}") do - ./breeze generate-constraints --generate-constraints-mode source-providers --python ${python_version} --build-cache-local - ./breeze generate-constraints --generate-constraints-mode pypi-providers --python ${python_version} --build-cache-local - ./breeze generate-constraints --generate-constraints-mode no-providers --python ${python_version} --build-cache-local + ./breeze build-image --upgrade-to-newer-dependencies --python ${python_version} --build-cache-local + ./breeze build-image --upgrade-to-newer-dependencies --python ${python_version} --build-cache-local + ./breeze build-image --upgrade-to-newer-dependencies --python ${python_version} --build-cache-local done + + GENERATE_CONSTRAINTS_MODE="pypi-providers" ./scripts/ci/constraints/ci_generate_all_constraints.sh + GENERATE_CONSTRAINTS_MODE="source-providers" ./scripts/ci/constraints/ci_generate_all_constraints.sh + GENERATE_CONSTRAINTS_MODE="no-providers" ./scripts/ci/constraints/ci_generate_all_constraints.sh + AIRFLOW_SOURCES=$(pwd) + The constraints will be generated in "files/constraints-PYTHON_VERSION/constraints-*.txt files. You need to checkout the right 'constraints-' branch in a separate repository and then you can copy, commit and push the generated files: diff --git a/scripts/ci/constraints/ci_commit_constraints.sh b/scripts/ci/constraints/ci_commit_constraints.sh index 7c24dc5..c3a7521 100755 --- a/scripts/ci/constraints/ci_commit_constraints.sh +++ b/scripts/ci/constraints/ci_commit_constraints.sh @@ -18,7 +18,7 @@ # shellcheck source=scripts/ci/libraries/_script_init.sh . "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh" -cp -v ./artifacts/constraints-*/constraints*.txt repo/ +cp -v ./files/constraints-*/constraints*.txt repo/ cd repo || exit 1 git config --local user.email "[email protected]" git config --local user.name "Automated GitHub Actions commit" diff --git a/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh b/scripts/ci/constraints/ci_generate_all_constraints.sh similarity index 75% copy from scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh copy to scripts/ci/constraints/ci_generate_all_constraints.sh index 7e09b1c..9a7a77e 100755 --- a/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh +++ b/scripts/ci/constraints/ci_generate_all_constraints.sh @@ -17,6 +17,7 @@ # under the License. set -euo pipefail + # We cannot perform full initialization because it will be done later in the "single run" scripts # And some readonly variables are set there, therefore we only selectively reuse parallel lib needed LIBRARIES_DIR=$(cd "$(dirname "${BASH_SOURCE[0]}")/../libraries/" && pwd) @@ -25,10 +26,18 @@ source "${LIBRARIES_DIR}/_all_libs.sh" initialization::set_output_color_variables +export CHECK_IMAGE_FOR_REBUILD="false" +echo +echo "${COLOR_YELLOW}Skip rebuilding CI images. Assume the one we have is good!${COLOR_RESET}" +echo "${COLOR_YELLOW}You must run './breeze build-image --upgrade-to-newer-dependencies before for all python versions before running this one!${COLOR_RESET}" +echo + parallel::make_sure_gnu_parallel_is_installed +parallel::make_sure_python_versions_are_specified + echo -echo "Waiting for all CI images to appear: ${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING}" +echo "${COLOR_BLUE}Generating all constraint files${COLOR_RESET}" echo parallel::initialize_monitoring @@ -37,5 +46,5 @@ parallel::monitor_progress # shellcheck disable=SC2086 parallel --results "${PARALLEL_MONITORED_DIR}" \ - "$( dirname "${BASH_SOURCE[0]}" )/ci_wait_for_and_verify_ci_image.sh" ::: \ + "$( dirname "${BASH_SOURCE[0]}" )/ci_generate_constraints.sh" ::: \ ${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING} diff --git a/scripts/ci/constraints/ci_generate_constraints.sh b/scripts/ci/constraints/ci_generate_constraints.sh index 10a4107..7e1cefa 100755 --- a/scripts/ci/constraints/ci_generate_constraints.sh +++ b/scripts/ci/constraints/ci_generate_constraints.sh @@ -15,6 +15,14 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +if [[ $1 == "" ]]; then + >&2 echo "Requires python MAJOR/MINOR version as first parameter" + exit 1 +fi + +export PYTHON_MAJOR_MINOR_VERSION=$1 +shift + # shellcheck source=scripts/ci/libraries/_script_init.sh . "$( dirname "${BASH_SOURCE[0]}" )/../libraries/_script_init.sh" diff --git a/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh b/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh index 7e09b1c..4255374 100755 --- a/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh +++ b/scripts/ci/images/ci_wait_for_and_verify_all_ci_images.sh @@ -27,10 +27,13 @@ initialization::set_output_color_variables parallel::make_sure_gnu_parallel_is_installed +parallel::make_sure_python_versions_are_specified + echo -echo "Waiting for all CI images to appear: ${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING}" +echo "${COLOR_BLUE}Waiting for all CI images to appear${COLOR_RESET}" echo + parallel::initialize_monitoring parallel::monitor_progress diff --git a/scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh b/scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh index 2d1da54..08ed54b 100755 --- a/scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh +++ b/scripts/ci/images/ci_wait_for_and_verify_all_prod_images.sh @@ -27,8 +27,10 @@ initialization::set_output_color_variables parallel::make_sure_gnu_parallel_is_installed +parallel::make_sure_python_versions_are_specified + echo -echo "Waiting for all PROD images to appear: ${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING}" +echo "${COLOR_BLUE}Waiting for all PROD images to appear${COLOR_RESET}" echo parallel::initialize_monitoring diff --git a/scripts/ci/libraries/_parallel.sh b/scripts/ci/libraries/_parallel.sh index e2f8ad4..7239e82 100644 --- a/scripts/ci/libraries/_parallel.sh +++ b/scripts/ci/libraries/_parallel.sh @@ -193,3 +193,16 @@ function parallel::cleanup_runner() { parallel::kill_stale_semaphore_locks start_end::group_end } + + +function parallel::make_sure_python_versions_are_specified() { + if [[ -z "${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING=}" ]]; then + echo + echo "${COLOR_RED}The CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING variable must be set and list python versions to use!${COLOR_RESET}" + echo + exit 1 + fi + echo + echo "${COLOR_BLUE}Running parallel builds for those Python versions: ${CURRENT_PYTHON_MAJOR_MINOR_VERSIONS_AS_STRING}!${COLOR_RESET}" + echo +}
