ephraimbuddy commented on code in PR #44189:
URL: https://github.com/apache/airflow/pull/44189#discussion_r1850177637
##########
airflow/models/dagcode.py:
##########
@@ -151,21 +138,52 @@ def _get_code_from_db(cls, fileloc, session: Session =
NEW_SESSION) -> str:
return code
@staticmethod
- def dag_fileloc_hash(full_filepath: str) -> int:
+ def dag_source_hash(source: str) -> str:
"""
- Hashing file location for indexing.
+ Hash the source code of the DAG.
- :param full_filepath: full filepath of DAG file
- :return: hashed full_filepath
+ This is needed so we can update the source on code changes
"""
- # Hashing is needed because the length of fileloc is 2000 as an
Airflow convention,
- # which is over the limit of indexing.
- import hashlib
+ return md5(source.encode("utf-8")).hexdigest()
- # Only 7 bytes because MySQL BigInteger can hold only 8 bytes (signed).
- return (
- struct.unpack(
- ">Q", hashlib.sha1(full_filepath.encode("utf-8"),
usedforsecurity=False).digest()[-8:]
- )[0]
- >> 8
- )
+ @classmethod
+ def _latest_dagcode_select(cls, dag_id: str) -> Select:
+ """
+ Get the select object to get the latest dagcode.
+
+ :param dag_id: The DAG ID.
+ :return: The select object.
+ """
+ return select(cls).where(cls.dag_id ==
dag_id).order_by(cls.last_updated.desc()).limit(1)
+
+ @classmethod
+ @provide_session
+ def get_latest_dagcode(cls, dag_id: str, session: Session = NEW_SESSION)
-> DagCode | None:
+ """
+ Get the latest dagcode.
+
+ :param dag_id: The DAG ID.
+ :param session: The database session.
+ :return: The latest dagcode or None if not found.
+ """
+ return session.scalar(cls._latest_dagcode_select(dag_id))
+
+ @classmethod
+ @provide_session
+ def update_source_code(cls, dag: DAG, session: Session = NEW_SESSION) ->
None:
+ """
+ Check if the source code of the DAG has changed and update it if
needed.
+
+ :param dag: The DAG object.
+ :param session: The database session.
+ :return: None
+ """
+ latest_dagcode = cls.get_latest_dagcode(dag.dag_id, session)
Review Comment:
If we query for some fields, we won't be able to update it since it won't be
the whole object
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]