jens-scheffler-bosch commented on code in PR #33355:
URL: https://github.com/apache/airflow/pull/33355#discussion_r1323592818
##########
airflow/operators/python.py:
##########
@@ -606,7 +614,60 @@ def _prepare_venv(self, venv_path: Path) -> None:
index_urls=self.index_urls,
)
+ def _calculate_cache_hash(self) -> str:
+ """Helper to generate the hash of the cache folder to use.
+
+ The following factors are used as input for the hash:
+ - (sorted) list of requirements
+ - pip install options
+ - flag of system site packages
+ - python version
+ - Variable to override the hash with a cache key
+ - Index URLs
+ """
+ requirements_list = ",".join(self._requirements_list())
+ pip_options = ",".join(self.pip_install_options) if
self.pip_install_options else ""
+ index_urls = ",".join(self.index_urls) if self.index_urls else ""
+ cache_key = str(Variable.get("PythonVirtualenvOperator.cache_key", ""))
+ hash_text = (
+
f"{self.python_version};{requirements_list};{cache_key};{self.system_site_packages};{pip_options};"
+ f"{index_urls}"
+ )
+ hash_object = hashlib_wrapper.md5(hash_text.encode())
+ requirements_hash = hash_object.hexdigest()
+ return requirements_hash[0:8]
+
+ def _ensure_venv_cache_exists(self, venv_cache_path: Path) -> Path:
+ """Helper to ensure a valid venv is set up and will create inplace."""
+ venv_path = venv_cache_path / f"venv-{self._calculate_cache_hash()}"
+ self.log.info("Python Virtualenv will be cached in %s", venv_path)
+ venv_path.parent.mkdir(parents=True, exist_ok=True)
+ with open(f"{venv_path}.lock", "w") as f:
+ # Ensure that cache is not build by parallel workers
+ fcntl.flock(f, fcntl.LOCK_EX)
+
+ if venv_path.exists() and (venv_path /
"install_complete_marker").exists():
+ self.log.info("Re-using cached Python Virtualenv in %s",
venv_path)
Review Comment:
I tried to rename all occurrences in the PY file consistently - I hope it is
not tooo much now.
Might be (many) other usages of the same term across the docs and other
parts of the code as well. If you desire a general "cleanup" of the term, I'd
propose to make this in a separate "refactoring" type of PR.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]