uranusjr commented on code in PR #33355:
URL: https://github.com/apache/airflow/pull/33355#discussion_r1322385103


##########
airflow/operators/python.py:
##########
@@ -606,7 +614,60 @@ def _prepare_venv(self, venv_path: Path) -> None:
             index_urls=self.index_urls,
         )
 
+    def _calculate_cache_hash(self) -> str:
+        """Helper to generate the hash of the cache folder to use.
+
+        The following factors are used as input for the hash:
+        - (sorted) list of requirements
+        - pip install options
+        - flag of system site packages
+        - python version
+        - Variable to override the hash with a cache key
+        - Index URLs
+        """
+        requirements_list = ",".join(self._requirements_list())
+        pip_options = ",".join(self.pip_install_options) if 
self.pip_install_options else ""
+        index_urls = ",".join(self.index_urls) if self.index_urls else ""
+        cache_key = str(Variable.get("PythonVirtualenvOperator.cache_key", ""))
+        hash_text = (
+            
f"{self.python_version};{requirements_list};{cache_key};{self.system_site_packages};{pip_options};"
+            f"{index_urls}"
+        )
+        hash_object = hashlib_wrapper.md5(hash_text.encode())
+        requirements_hash = hash_object.hexdigest()
+        return requirements_hash[0:8]
+
+    def _ensure_venv_cache_exists(self, venv_cache_path: Path) -> Path:
+        """Helper to ensure a valid venv is set up and will create inplace."""
+        venv_path = venv_cache_path / f"venv-{self._calculate_cache_hash()}"
+        self.log.info("Python Virtualenv will be cached in %s", venv_path)
+        venv_path.parent.mkdir(parents=True, exist_ok=True)
+        with open(f"{venv_path}.lock", "w") as f:
+            # Ensure that cache is not build by parallel workers
+            fcntl.flock(f, fcntl.LOCK_EX)
+
+            if venv_path.exists() and (venv_path / 
"install_complete_marker").exists():
+                self.log.info("Re-using cached Python Virtualenv in %s", 
venv_path)

Review Comment:
   Let’s always use _virtual environment_ (official term in [PEP 
405](https://peps.python.org/pep-0405/)) in log messages. The current wording 
is quite messy, I count _Virtualenv_, _virtualenv_, and _venv_ just in this 
function.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to