amoghrajesh commented on code in PR #33144:
URL: https://github.com/apache/airflow/pull/33144#discussion_r1285705062
##########
dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md:
##########
@@ -73,6 +74,89 @@ git commit -m "Your commit message here" --no-verify
git push
```
+# Manually updating already tagged constraint files
+
+Sometimes - very rarely - we need to fix historical constraint files when
Airflow fails to install with the
+constraints that were used in the past. This happened already several times
and usually only happens when
+there is a backwards-incompatible change in the build environment in Python
installation toolchain
+(pip, setuptools, wheel, Cython etc.). The Python build environment is not
controllable by us - by default
+pip uses `build isolation` which means that it will install the latest version
of the build tools that
+are matching the expectations of the package being build and it might mean
that new version of such tools can
Review Comment:
nit: `expectations of the package being *built* and it might mean that new
version of such tools can`
##########
dev/breeze/src/airflow_breeze/commands/release_management_commands.py:
##########
@@ -1275,3 +1275,160 @@ def generate_providers_metadata(refresh_constraints:
bool, python: str | None):
import json
PROVIDER_METADATA_JSON_FILE_PATH.write_text(json.dumps(metadata_dict,
indent=4, sort_keys=True))
+
+
+def fetch_remote(constraints_repo: Path, remote_name: str) -> None:
+ run_command(["git", "fetch", remote_name], cwd=constraints_repo)
+
+
+def checkout_constraint_tag_and_reset_branch(constraints_repo: Path,
airflow_version: str) -> None:
+ run_command(
+ ["git", "reset", "--hard"],
+ cwd=constraints_repo,
+ )
+ # Switch to tag
+ run_command(
+ ["git", "checkout", f"constraints-{airflow_version}"],
+ cwd=constraints_repo,
+ )
+ # Create or reset branch to point
+ run_command(
+ ["git", "checkout", "-B", f"constraints-{airflow_version}-fix"],
+ cwd=constraints_repo,
+ )
+ get_console().print(
+ f"[info]Checked out constraints tag: constraints-{airflow_version} and
"
+ f"reset branch constraints-{airflow_version}-fix to it.[/]"
+ )
+ result = run_command(
+ ["git", "show", "-s", "--format=%H"],
+ cwd=constraints_repo,
+ text=True,
+ capture_output=True,
+ )
+ get_console().print(f"[info]The hash commit of the tag:[/]
{result.stdout}")
+
+
+def modify_single_file_constraints(constraints_file: Path,
updated_constraints: tuple[str]) -> bool:
+ constraint_content = constraints_file.read_text()
+ original_content = constraint_content
+ for constraint in updated_constraints:
+ package, version = constraint.split("==")
+ constraint_content = re.sub(
+ rf"^{package}==.*$", f"{package}=={version}", constraint_content,
flags=re.MULTILINE
+ )
+ if constraint_content != original_content:
+ if not get_dry_run():
+ constraints_file.write_text(constraint_content)
+ get_console().print("[success]Updated.[/]")
+ return True
+ else:
+ get_console().print("[warning]The file has not been modified.[/]")
+ return False
+
+
+def modify_all_constraint_files(constraints_repo: Path, updated_constraint:
tuple[str]) -> bool:
+ get_console().print("[info]Updating constraints files:[/]")
+ modified = False
+ for constraints_file in constraints_repo.glob("constraints-*.txt"):
+ get_console().print(f"[info]Updating {constraints_file.name}")
+ if modify_single_file_constraints(constraints_file,
updated_constraint):
+ modified = True
+ return modified
+
+
+def confirm_modifications(constraints_repo: Path) -> bool:
+ run_command(["git", "diff"], cwd=constraints_repo, env={"PAGER": ""})
+ confirm = user_confirm("Do you want to continue?")
+ if confirm == Answer.YES:
+ return True
+ elif confirm == Answer.NO:
+ return False
+ else:
+ sys.exit(1)
+
+
+def commit_constraints_and_tag(constraints_repo: Path, airflow_version: str,
message: str) -> None:
+ run_command(
+ ["git", "commit", "-a", "--no-verify", "-m", message],
+ cwd=constraints_repo,
+ )
+ run_command(
+ ["git", "tag", f"constraints-{airflow_version}", "--force", "-s",
"-m", message, "HEAD"],
+ cwd=constraints_repo,
+ )
+
+
+def push_constraints_and_tag(constraints_repo: Path, remote_name: str,
airflow_version: str) -> None:
+ run_command(
+ ["git", "push", remote_name, f"constraints-{airflow_version}-fix"],
+ cwd=constraints_repo,
+ )
+ run_command(
+ ["git", "push", remote_name, f"constraints-{airflow_version}",
"--force"],
+ cwd=constraints_repo,
+ )
+
+
+@release_management.command(
+ name="update-constraints", help="Update released constraints with manual
changes."
+)
[email protected](
+ "--constraints-repo",
+ type=click.Path(file_okay=False, dir_okay=True, path_type=Path,
exists=True),
+ required=True,
+ envvar="CONSTRAINTS_REPO",
+ help="Path where airflow repository is checked out, with
``constraints-main`` branch checked out.",
+)
[email protected](
+ "--remote-name",
+ type=str,
+ default="apache",
+ envvar="REMOTE_NAME",
+ help="Name of the remote to push the changes to.",
+)
[email protected](
+ "--airflow-versions",
+ type=str,
+ required=True,
+ envvar="AIRFLOW_VERSIONS",
+ help="Comma separated list of Airflow versions to update constraints for.",
+)
[email protected](
+ "--message",
Review Comment:
Lets call this `commit-message` instead?
##########
BREEZE.rst:
##########
@@ -2106,13 +2106,30 @@ Those are all available flags of
``generate-constraints`` command:
In case someone modifies setup.py, the scheduled CI Tests automatically
upgrades and
pushes changes to the constraint files, however you can also perform test run
of this locally using
-the procedure described in `Refreshing CI Cache
<dev/REFRESHING_CI_CACHE.md#manually-generating-constraint-files>`_
+the procedure described in the
+`Manually generating image cache and constraints
<dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md>`_
which utilises multiple processors on your local machine to generate such
constraints faster.
This bumps the constraint files to latest versions and stores hash of
setup.py. The generated constraint
and setup.py hash files are stored in the ``files`` folder and while
generating the constraints diff
of changes vs the previous constraint files is printed.
+Updating constraints
+""""""""""""""""""""
+
+Sometimes (very rarely) we might want to update individual packages in
constraints that we generated and
+tagged already in the past. This can be done using ``breeze release-management
update-constraints`` command.
+
+Those are all available flags of ``update-constraints`` command:
Review Comment:
nit: `These`
##########
dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md:
##########
@@ -73,6 +74,89 @@ git commit -m "Your commit message here" --no-verify
git push
```
+# Manually updating already tagged constraint files
+
+Sometimes - very rarely - we need to fix historical constraint files when
Airflow fails to install with the
+constraints that were used in the past. This happened already several times
and usually only happens when
+there is a backwards-incompatible change in the build environment in Python
installation toolchain
+(pip, setuptools, wheel, Cython etc.). The Python build environment is not
controllable by us - by default
+pip uses `build isolation` which means that it will install the latest version
of the build tools that
+are matching the expectations of the package being build and it might mean
that new version of such tools can
+break installation. This happened for example in July 2023 when major (3.0.0)
version of Cython has
+been released and it broke `pymssql` installation - we had to update the
constraint file to use `pymssql` 2.2.8
+instead of `2.2.7` because 2.2.7 did not limit but also did not work with the
new version of Cython.
+
+Breeze has `update-constraints` command in `release-management` group that can
be used to update the
+constraints.
+
+The way how updating constraints work:
+
+1. You need to have "airflow" repository checked out separately from the
repository you are working on. For
+ example in `/home/myuser/airflow-constratints` folder.
Review Comment:
typo: `/home/myuser/airflow-constraints`
##########
dev/MANUALLY_GENERATING_IMAGE_CACHE_AND_CONSTRAINTS.md:
##########
@@ -73,6 +74,89 @@ git commit -m "Your commit message here" --no-verify
git push
```
+# Manually updating already tagged constraint files
+
+Sometimes - very rarely - we need to fix historical constraint files when
Airflow fails to install with the
+constraints that were used in the past. This happened already several times
and usually only happens when
+there is a backwards-incompatible change in the build environment in Python
installation toolchain
+(pip, setuptools, wheel, Cython etc.). The Python build environment is not
controllable by us - by default
+pip uses `build isolation` which means that it will install the latest version
of the build tools that
+are matching the expectations of the package being build and it might mean
that new version of such tools can
+break installation. This happened for example in July 2023 when major (3.0.0)
version of Cython has
+been released and it broke `pymssql` installation - we had to update the
constraint file to use `pymssql` 2.2.8
+instead of `2.2.7` because 2.2.7 did not limit but also did not work with the
new version of Cython.
+
+Breeze has `update-constraints` command in `release-management` group that can
be used to update the
+constraints.
+
+The way how updating constraints work:
+
+1. You need to have "airflow" repository checked out separately from the
repository you are working on. For
+ example in `/home/myuser/airflow-constratints` folder.
+2. You need to checkout `constraints-main` branch in this repository. By
default the command expects that
+ there is a remote named "apache" pointing to the official Apache
repository. You can override this
+ by passing `--remote-name` option to the command.
+3. You need to run `breeze release-management update-constraints` command. You
can pass various options to
+ the command:
+ * path to the "constraints" repository
+ * remote name (optionally - default "apache")
+ * list of airflow versions to update constraints for
+ * list of constraints to update in the form of "package==version" (you
can specify more than one)
+ * message to be used in commit message
+
+ Make sure you use exactly the same case for the package to be updated as
the one already in the
+ constraints.
+
+ It's a good idea to add `--dry-run` option to the command to see what will
be updated before you actually
+ run the command. However, even if you do not use `--dry-run` option, the
command will ask you to
+ confirm the updates so you will have a chance to verify it before each
version change.
+
+ You can also add `--verbose` instead of `--dry-run` and you will see the
git commands being executed by
+ the command while it is doing its job.
+
+ Notes about "potentially breaking" constraints: the command is designed to
work in a very safe way
+ and provide ways of coming back to the previous state manually if needed.
+
+ Tags are moved with ``--force`` option - this needs to be done because we
are moving already existing tag,
+ however branches are pushed without force so there is no risk of losing
history in the repository - you
+ can always see the history and revert the changes and restore old tags
manually. Usually the "final" tags
+ are the same as the latest "rc*" tags for the same version so it is easy to
find where the tag was
+ pointing before - we also print hash of the commits before attempting to
make modifications so you can
+ always see what commit the tag has been pointing to before the command is
run.
+
+4. The command will do the following for every Airflow version specified:
+ * checkout "constraints-<version>" tag
+ * reset "constraints-<version>-fix" branch to the tag
+ * update constraints in-place
+ * commit the changes
+ * tag the commit with "constraints-<version>" tag
+ * push the "constraints-<version>-fix" branch with the commit to the
remote selected
+ * push the tag to the remote selected
+
+You should verify manually if the change is as expected by inspecting the
constraints at
+
+https://github.com/apache/airflow/tree/constraints-<airfow-version>
+
+Example of updating constraints for Airflow 2.5.0 - 2.6.3 and updating
`pymssql` constraint to 2.2.8:
+
+```bash
+breeze release-management update-constraints --constraints-repo
/home/user/airflow-constraints \
+ --airflow-versions 2.5.0,2.5.1,2.5.2,2.5.3,2.6.0,2.6.1,2.6.2,2.6.3 \
+ --updated-constraint pymssql==2.2.8 \
+ --message "Update pymssql constraint to 2.2.8"
+```
+
+Example of updating multiple constraints:
+
+```bash
+breeze release-management update-constraints --constraints-repo
/home/user/airflow-constraints \
+ --airflow-versions 2.5.0,2.5.1,2.5.2,2.5.3,2.6.0,2.6.1,2.6.2,2.6.3 \
+ --updated-constraint pymssql==2.2.8 \
+ --updated-constraint Authlib==1.3.0 \
+ --message "Update pymssql constraint to 2.2.8 and Authlib to 1.3.0"
+```
+
Review Comment:
Nice description with examples!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]