This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new bb234dc316f Remove sending of task logs in otel traces (#43943)
bb234dc316f is described below
commit bb234dc316f8421760255d85a37a10fd508de14f
Author: Ash Berlin-Taylor <[email protected]>
AuthorDate: Wed Nov 13 01:22:12 2024 +0000
Remove sending of task logs in otel traces (#43943)
This was sending task logs from the scheduler has two problems:
1. It blocks the scheduling loop reading remote logs -- a huge performance
foot-gun for anyone that turns this feature on
2. Sending task logs in a span seems too large, and way too verbose to
provide
any use in a span.
---
airflow/config_templates/config.yml | 8 --------
airflow/jobs/scheduler_job_runner.py | 25 -------------------------
newsfragments/43943.significant.rst | 5 +++++
3 files changed, 5 insertions(+), 33 deletions(-)
diff --git a/airflow/config_templates/config.yml
b/airflow/config_templates/config.yml
index 4d58de3c37e..dc703446af2 100644
--- a/airflow/config_templates/config.yml
+++ b/airflow/config_templates/config.yml
@@ -1283,14 +1283,6 @@ traces:
type: string
example: ~
default: "False"
- otel_task_log_event:
- description: |
- If True, after the task is complete, the full task log messages will
be added as the
- span events, chunked by 64k size. defaults to False.
- version_added: 2.10.0
- type: string
- example: ~
- default: "False"
secrets:
description: ~
options:
diff --git a/airflow/jobs/scheduler_job_runner.py
b/airflow/jobs/scheduler_job_runner.py
index ee101eaac44..52add3eaae2 100644
--- a/airflow/jobs/scheduler_job_runner.py
+++ b/airflow/jobs/scheduler_job_runner.py
@@ -799,31 +799,6 @@ class SchedulerJobRunner(BaseJobRunner, LoggingMixin):
with Trace.start_span_from_taskinstance(ti=ti) as span:
cls._set_span_attrs__process_executor_events(span, state, ti)
- if conf.has_option("traces", "otel_task_log_event") and
conf.getboolean(
- "traces", "otel_task_log_event"
- ):
- from airflow.utils.log.log_reader import TaskLogReader
-
- task_log_reader = TaskLogReader()
- if task_log_reader.supports_read:
- metadata: dict[str, Any] = {}
- logs, metadata = task_log_reader.read_log_chunks(ti,
ti.try_number, metadata)
- if ti.hostname in dict(logs[0]):
- message =
str(dict(logs[0])[ti.hostname]).replace("\\n", "\n")
- while metadata["end_of_log"] is False:
- logs, metadata =
task_log_reader.read_log_chunks(
- ti, ti.try_number - 1, metadata
- )
- if ti.hostname in dict(logs[0]):
- message = message +
str(dict(logs[0])[ti.hostname]).replace("\\n", "\n")
- if span.is_recording():
- span.add_event(
- name="task_log",
- attributes={
- "message": message,
- "metadata": str(metadata),
- },
- )
# There are two scenarios why the same TI with the same try_number
is queued
# after executor is finished with it:
diff --git a/newsfragments/43943.significant.rst
b/newsfragments/43943.significant.rst
new file mode 100644
index 00000000000..590caf792d2
--- /dev/null
+++ b/newsfragments/43943.significant.rst
@@ -0,0 +1,5 @@
+Remove the ``traces`` ``otel_task_log_event`` event config option and feature
+
+This was sending the task logs form the scheduler, and would be a huge
+scheduling performance hit (blocking the entire all scheduling while it was
+fetching logs to attach to the trace)