andyjianzhou commented on code in PR #40447:
URL: https://github.com/apache/airflow/pull/40447#discussion_r1674429587
##########
airflow/api_connexion/endpoints/log_endpoint.py:
##########
@@ -112,3 +118,58 @@ def get_log(
logs = task_log_reader.read_log_stream(ti, task_try_number, metadata)
return Response(logs, headers={"Content-Type": return_type})
+
+
[email protected]_access_dag("GET", DagAccessEntity.TASK_LOGS)
+@provide_session
+@unify_bucket_name_and_key
+@provide_bucket_name
+def get_log_pages(
+ *,
+ dag_id: str,
+ dag_run_id: str,
+ task_id: str,
+ bucket_name: str,
+ key: str,
+ session: Session = NEW_SESSION,
+) -> APIResponse:
+ """Get total number of log pages for a specific task instance."""
+ task_log_reader = TaskLogReader()
+
+ if not task_log_reader.supports_read:
+ raise BadRequest("Task log handler does not support read logs.")
+
+ query = (
+ select(TaskInstance)
+ .where(
+ TaskInstance.task_id == task_id,
+ TaskInstance.dag_id == dag_id,
+ TaskInstance.run_id == dag_run_id,
+ )
+ .join(TaskInstance.dag_run)
+
.options(joinedload(TaskInstance.trigger).joinedload(Trigger.triggerer_job))
+ )
+
+ ti = session.scalar(query)
+
+ # Check if the task instance state is terminal
+ if ti.state is None:
+ raise BadRequest("TaskInstance state is None")
+
+ # Maybe add more?
+ if ti.state not in {TaskInstanceState.SUCCESS, TaskInstanceState.FAILED,
TaskInstanceState.UP_FOR_RETRY}:
+ return {"total_pages": 1}
+
+ # Fetch s3 log content length
+ s3_hook = S3Hook()
Review Comment:
Oh good catch!
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]