johnhoran commented on code in PR #60778:
URL: https://github.com/apache/airflow/pull/60778#discussion_r2798952240
##########
providers/cncf/kubernetes/src/airflow/providers/cncf/kubernetes/utils/pod_manager.py:
##########
@@ -1080,31 +1085,57 @@ async def fetch_container_logs_before_current_sec(
since_seconds=(math.ceil((now - since_time).total_seconds()) if
since_time else None),
)
message_to_log = None
- try:
- now_seconds = now.replace(microsecond=0)
- for line in logs:
- line_timestamp, message = parse_log_line(line)
- # Skip log lines from the current second to prevent duplicate
entries on the next read.
- # The API only allows specifying 'since_seconds', not an exact
timestamp.
- if line_timestamp and line_timestamp.replace(microsecond=0) ==
now_seconds:
- break
- if line_timestamp: # detect new log line
- if message_to_log is None: # first line in the log
- message_to_log = message
- else: # previous log line is complete
- if message_to_log is not None:
- if is_log_group_marker(message_to_log):
- print(message_to_log)
- else:
- self.log.info("[%s] %s", container_name,
message_to_log)
- message_to_log = message
- elif message_to_log: # continuation of the previous log line
- message_to_log = f"{message_to_log}\n{message}"
- finally:
- # log the last line and update the last_captured_timestamp
- if message_to_log is not None:
- if is_log_group_marker(message_to_log):
- print(message_to_log)
- else:
- self.log.info("[%s] %s", container_name, message_to_log)
+ async with self._hook.get_conn() as connection:
+ v1_api = async_k8s.CoreV1Api(connection)
+ try:
+ now_seconds = now.replace(microsecond=0)
+ for line in logs:
+ line_timestamp, message = parse_log_line(line)
+ # Skip log lines from the current second to prevent
duplicate entries on the next read.
+ # The API only allows specifying 'since_seconds', not an
exact timestamp.
+ if line_timestamp and
line_timestamp.replace(microsecond=0) == now_seconds:
+ break
+ if line_timestamp: # detect new log line
+ if message_to_log is None: # first line in the log
+ message_to_log = message
+ else: # previous log line is complete
+ if message_to_log is not None:
+ if is_log_group_marker(message_to_log):
+ print(message_to_log)
+ else:
+ for callback in self._callbacks:
+ cb = callback.progress_callback(
+ line=message_to_log,
+ client=v1_api,
+ mode=ExecutionMode.ASYNC,
+ container_name=container_name,
+ timestamp=line_timestamp,
+ pod=pod,
+ )
+ if asyncio.iscoroutine(cb):
+ await cb
Review Comment:
There aren't any restrictions on what can be run in the triggerer either
though. You can add a custom operator and run any arbitrary code you want
there.
The larger concern I have is that somebody might write a callback using the
non deferred mode and then switch to running in deferred mode, and then you
have the triggerer calling synchronous code it wasn't designed for. I don't
really have an answer to that, beyond noting that the progress_callback was
broken from first implementation until very recently, so I guess nobody has
been really using it.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]