Taragolis commented on code in PR #28819:
URL: https://github.com/apache/airflow/pull/28819#discussion_r1069862030


##########
tests/system/providers/amazon/aws/utils/__init__.py:
##########
@@ -249,7 +251,43 @@ def set_env_id() -> str:
     return env_id
 
 
-def purge_logs(
+def all_tasks_passed(ti) -> bool:
+    task_runs = ti.get_dagrun().get_task_instances()
+    return all([_task.state != State.FAILED for _task in task_runs])
+
+
+@task(trigger_rule=TriggerRule.ALL_DONE)
+def prune_logs(
+    logs: list[tuple[str, str | None]],
+    force_delete: bool = False,
+    retry: bool = False,
+    retry_times: int = 3,
+    ti=None,
+):
+    """
+    If all tasks in this dagrun have succeeded, then delete the associated 
logs.
+    Otherwise, append the logs with a retention policy.  This allows the logs
+    to be used for troubleshooting but assures they won't build up 
indefinitely.
+
+    :param logs: A list of log_group/stream_prefix tuples to delete.
+    :param force_delete: Whether to check log streams within the log group 
before
+        removal. If True, removes the log group and all its log streams inside 
it.
+    :param retry: Whether to retry if the log group/stream was not found. In 
some
+        cases, the log group/stream is created seconds after the main resource 
has
+        been created. By default, it retries for 3 times with a 5s waiting 
period.
+    :param retry_times: Number of retries.
+    :param ti: Used to check the status of the tasks. This gets pulled from the
+        DAG's context and does not need to be passed manually.
+    """
+    if all_tasks_passed(ti):
+        _purge_logs(logs, force_delete, retry, retry_times)
+    else:
+        client: BaseClient = boto3.client("logs")
+        for group, _ in logs:
+            client.put_retention_policy(logGroupName=group, retentionInDays=30)

Review Comment:
   Store data in Cloudwatch Logs much cheaper (for about x15) rather then Put 
this log to Cloudwatch.
   So I don't think this is would be significant difference in costs between 
store between 7 day and 30 days  



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to