This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch v2-11-test
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/v2-11-test by this push:
new b337a544b2d Disable use of LogTemplate table by default (#61880)
b337a544b2d is described below
commit b337a544b2d2e6f40a08ee85b70634d54e1ed805
Author: Jarek Potiuk <[email protected]>
AuthorDate: Sat Feb 14 13:26:50 2026 +0100
Disable use of LogTemplate table by default (#61880)
Accessing the database when rendering log template might introduce
unnecesary risks to execute code in webserver and we should
disable it by default.
Update airflow/config_templates/config.yml
Co-authored-by: GPK <[email protected]>
---
airflow/config_templates/config.yml | 14 ++++++++++
airflow/models/dagrun.py | 20 ++++++++++++--
airflow/models/tasklog.py | 15 +++++++++++
newsfragments/61880.significant.rst | 17 ++++++++++++
tests/utils/log/test_file_processor_handler.py | 2 ++
tests/utils/log/test_log_reader.py | 5 ++++
tests/utils/test_log_handlers.py | 36 ++++++++++++++++++++++++++
tests/www/views/test_views_log.py | 23 ++++++++++++++++
8 files changed, 130 insertions(+), 2 deletions(-)
diff --git a/airflow/config_templates/config.yml
b/airflow/config_templates/config.yml
index 1e52a05bd43..536630de58a 100644
--- a/airflow/config_templates/config.yml
+++ b/airflow/config_templates/config.yml
@@ -927,6 +927,20 @@ logging:
default: "dag_id={{ ti.dag_id }}/run_id={{ ti.run_id }}/task_id={{
ti.task_id }}/\
{%% if ti.map_index >= 0 %%}map_index={{ ti.map_index }}/{%%
endif %%}\
attempt={{ try_number }}.log"
+ use_historical_filename_templates:
+ description: |
+ When this parameter is set to ``True``, Airflow will use the old
filename templates for historical
+ tasks. Similarly in this case elasticsearch_id is not properly set for
historical tasks if you
+ change it. Both require access to the database to render the template
filenames
+ by webserver, and it might lead to Dag Authors being able to execute
code on the webserver, that's why
+ it's disabled by default - but it might lead to old logs not being
displayed in the webserver UI.
+ You can enable it you change the value of ``log_filename_template`` in
the past and want to be able
+ to see the logs for historical tasks, however you should only do that
if you trust your Dag authors
+ to not abuse the capability of executing arbitrary code on the
webserver through template rendering.
+ version_added: 2.11.1
+ type: boolean
+ example: ~
+ default: "False"
log_processor_filename_template:
description: |
Formatting for how airflow generates file names for log
diff --git a/airflow/models/dagrun.py b/airflow/models/dagrun.py
index 30d1bda13d9..224f7b79ed7 100644
--- a/airflow/models/dagrun.py
+++ b/airflow/models/dagrun.py
@@ -58,7 +58,7 @@ from airflow.models.abstractoperator import NotMapped
from airflow.models.base import Base, StringID
from airflow.models.expandinput import NotFullyPopulated
from airflow.models.taskinstance import TaskInstance as TI
-from airflow.models.tasklog import LogTemplate
+from airflow.models.tasklog import LogTemplate, LogTemplateDataClass
from airflow.stats import Stats
from airflow.ti_deps.dep_context import DepContext
from airflow.ti_deps.dependencies_states import SCHEDULEABLE_STATES
@@ -1648,9 +1648,25 @@ class DagRun(Base, LoggingMixin):
return count
@provide_session
- def get_log_template(self, *, session: Session = NEW_SESSION) ->
LogTemplate | LogTemplatePydantic:
+ def get_db_log_template(self, *, session: Session = NEW_SESSION) ->
LogTemplate | LogTemplatePydantic:
return DagRun._get_log_template(log_template_id=self.log_template_id,
session=session)
+ @provide_session
+ def get_log_template(
+ self, session: Session = NEW_SESSION
+ ) -> LogTemplate | LogTemplatePydantic | LogTemplateDataClass:
+ if airflow_conf.getboolean("core",
"use_historical_filename_templates", fallback=False):
+ return self.get_db_log_template(session=session)
+ else:
+ return LogTemplateDataClass(
+ filename=airflow_conf.get_mandatory_value("core",
"log_filename_template"),
+ elasticsearch_id=airflow_conf.get(
+ "elasticsearch",
+ "log_id_template",
+
fallback="{dag_id}-{task_id}-{run_id}-{map_index}-{try_number}",
+ ),
+ )
+
@staticmethod
@internal_api_call
@provide_session
diff --git a/airflow/models/tasklog.py b/airflow/models/tasklog.py
index d55eb94a266..758c145c55a 100644
--- a/airflow/models/tasklog.py
+++ b/airflow/models/tasklog.py
@@ -17,6 +17,8 @@
# under the License.
from __future__ import annotations
+from dataclasses import dataclass
+
from sqlalchemy import Column, Integer, Text
from airflow.models.base import Base
@@ -42,3 +44,16 @@ class LogTemplate(Base):
def __repr__(self) -> str:
attrs = ", ".join(f"{k}={getattr(self, k)}" for k in ("filename",
"elasticsearch_id"))
return f"LogTemplate({attrs})"
+
+
+@dataclass
+class LogTemplateDataClass:
+ """
+ Dataclass for log template (used when log template is read from
configuration, not database).
+
+ :field filename: log filename template
+ :field elasticsearch_id: Elasticsearch document ID for log template
+ """
+
+ filename: str
+ elasticsearch_id: str
diff --git a/newsfragments/61880.significant.rst
b/newsfragments/61880.significant.rst
new file mode 100644
index 00000000000..d00105206c4
--- /dev/null
+++ b/newsfragments/61880.significant.rst
@@ -0,0 +1,17 @@
+Retrieving historical log templates is disabled in Airflow 2.11.1
+
+When you change the log template in Airflow 2.11.1, the historical log
templates are not retrieved.
+This means that if you have existing logs that were generated using a
different log template,
+they will not be accessible using the new log template.
+
+This change is due to potential security issues that could arise from
retrieving historical log templates,
+which allow Dag Authors to execute arbitrary code in webserver when retrieving
logs.
+By disabling the retrieval of historical log templates, Airflow 2.11.1 aims to
enhance the security of the
+system and prevent potential vulnerabilities in case the potential of
executing arbitrary code in webserver
+is important for Airflow deployment.
+
+Users who need to access historical logs generated with a different log
template will need to manually
+update their log files to match the naming of their historical log files with
the latest log template
+configured in Airflow configuration, or they can set the
"core.use_historical_filename_templates"
+configuration option to True to enable the retrieval of historical log
templates, if they are fine with
+the Dag Authors being able to execute arbitrary code in webserver when
retrieving logs.
diff --git a/tests/utils/log/test_file_processor_handler.py
b/tests/utils/log/test_file_processor_handler.py
index f50fc619ac5..9e7504e098d 100644
--- a/tests/utils/log/test_file_processor_handler.py
+++ b/tests/utils/log/test_file_processor_handler.py
@@ -25,6 +25,7 @@ import time_machine
from airflow.utils import timezone
from airflow.utils.log.file_processor_handler import FileProcessorHandler
+from tests.test_utils.config import conf_vars
class TestFileProcessorHandler:
@@ -60,6 +61,7 @@ class TestFileProcessorHandler:
handler.set_context(filename=os.path.join(self.dag_dir, "logfile"))
assert os.path.exists(os.path.join(path, "logfile.log"))
+ @conf_vars({("core", "use_historical_filename_templates"): "True"})
def test_symlink_latest_log_directory(self):
handler = FileProcessorHandler(base_log_folder=self.base_log_folder,
filename_template=self.filename)
handler.dag_dir = self.dag_dir
diff --git a/tests/utils/log/test_log_reader.py
b/tests/utils/log/test_log_reader.py
index 2463db11a5b..6e9bd2d3ca8 100644
--- a/tests/utils/log/test_log_reader.py
+++ b/tests/utils/log/test_log_reader.py
@@ -120,6 +120,7 @@ class TestLogView:
session.delete(log_template)
session.commit()
+ @conf_vars({("core", "use_historical_filename_templates"): "True"})
def test_test_read_log_chunks_should_read_one_try(self):
task_log_reader = TaskLogReader()
ti = copy.copy(self.ti)
@@ -137,6 +138,7 @@ class TestLogView:
]
assert metadatas == {"end_of_log": True, "log_pos": 13}
+ @conf_vars({("core", "use_historical_filename_templates"): "True"})
def test_test_read_log_chunks_should_read_all_files(self):
task_log_reader = TaskLogReader()
ti = copy.copy(self.ti)
@@ -152,6 +154,7 @@ class TestLogView:
assert f"try_number={i + 1}." in logs[i][0][1]
assert metadatas == {"end_of_log": True, "log_pos": 13}
+ @conf_vars({("core", "use_historical_filename_templates"): "True"})
def test_test_test_read_log_stream_should_read_one_try(self):
task_log_reader = TaskLogReader()
ti = copy.copy(self.ti)
@@ -163,6 +166,7 @@ class TestLogView:
" INFO - ::endgroup::\ntry_number=1.\n"
]
+ @conf_vars({("core", "use_historical_filename_templates"): "True"})
def test_test_test_read_log_stream_should_read_all_logs(self):
task_log_reader = TaskLogReader()
self.ti.state = TaskInstanceState.SUCCESS # Ensure mocked instance is
completed to return stream
@@ -262,6 +266,7 @@ class TestLogView:
mock_prop.return_value = True
assert task_log_reader.supports_external_link
+ @conf_vars({("core", "use_historical_filename_templates"): "True"})
def test_task_log_filename_unique(self, dag_maker):
"""Ensure the default log_filename_template produces a unique filename.
diff --git a/tests/utils/test_log_handlers.py b/tests/utils/test_log_handlers.py
index 95483f2285f..ab546332e3d 100644
--- a/tests/utils/test_log_handlers.py
+++ b/tests/utils/test_log_handlers.py
@@ -589,6 +589,7 @@ class TestFileTaskLogHandler:
class TestFilenameRendering:
+ @conf_vars({("core", "use_historical_filename_templates"): "True"})
def test_python_formatting(self, create_log_template,
create_task_instance):
create_log_template("{dag_id}/{task_id}/{execution_date}/{try_number}.log")
filename_rendering_ti = create_task_instance(
@@ -606,6 +607,24 @@ class TestFilenameRendering:
rendered_filename = fth._render_filename(filename_rendering_ti, 42)
assert expected_filename == rendered_filename
+ def test_python_formatting_historical_logs_not_enabled(self,
create_log_template, create_task_instance):
+
create_log_template("{dag_id}/{task_id}/{execution_date}/{try_number}.log")
+ filename_rendering_ti = create_task_instance(
+ dag_id="dag_for_testing_filename_rendering",
+ task_id="task_for_testing_filename_rendering",
+ run_type=DagRunType.SCHEDULED,
+ execution_date=DEFAULT_DATE,
+ )
+
+ expected_filename = (
+ f"dag_id=dag_for_testing_filename_rendering/"
+
f"run_id=scheduled__{DEFAULT_DATE.isoformat()}/task_id=task_for_testing_filename_rendering/attempt=42.log"
+ )
+ fth = FileTaskHandler("")
+ rendered_filename = fth._render_filename(filename_rendering_ti, 42)
+ assert expected_filename == rendered_filename
+
+ @conf_vars({("core", "use_historical_filename_templates"): "True"})
def test_jinja_rendering(self, create_log_template, create_task_instance):
create_log_template("{{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{
try_number }}.log")
filename_rendering_ti = create_task_instance(
@@ -623,6 +642,23 @@ class TestFilenameRendering:
rendered_filename = fth._render_filename(filename_rendering_ti, 42)
assert expected_filename == rendered_filename
+ def test_jinja_rendering_historical_logs_not_enabled(self,
create_log_template, create_task_instance):
+ create_log_template("{{ ti.dag_id }}/{{ ti.task_id }}/{{ ts }}/{{
try_number }}.log")
+ filename_rendering_ti = create_task_instance(
+ dag_id="dag_for_testing_filename_rendering",
+ task_id="task_for_testing_filename_rendering",
+ run_type=DagRunType.SCHEDULED,
+ execution_date=DEFAULT_DATE,
+ )
+
+ expected_filename = (
+ f"dag_id=dag_for_testing_filename_rendering/"
+
f"run_id=scheduled__{DEFAULT_DATE.isoformat()}/task_id=task_for_testing_filename_rendering/attempt=42.log"
+ )
+ fth = FileTaskHandler("")
+ rendered_filename = fth._render_filename(filename_rendering_ti, 42)
+ assert expected_filename == rendered_filename
+
class TestLogUrl:
def test_log_retrieval_valid(self, create_task_instance):
diff --git a/tests/www/views/test_views_log.py
b/tests/www/views/test_views_log.py
index 2607317c5fc..c2fa9bffd65 100644
--- a/tests/www/views/test_views_log.py
+++ b/tests/www/views/test_views_log.py
@@ -299,6 +299,7 @@ def dag_run_with_log_filename(tis):
session.query(LogTemplate).filter(LogTemplate.id ==
log_template.id).delete()
+@conf_vars({("core", "use_historical_filename_templates"): "True"})
def test_get_logs_for_changed_filename_format_db(
log_admin_client, dag_run_with_log_filename, create_expected_log_file
):
@@ -323,6 +324,28 @@ def test_get_logs_for_changed_filename_format_db(
assert expected_filename in content_disposition
+def test_get_logs_for_changed_filename_format_db_historical_logs_not_enabled(
+ log_admin_client, dag_run_with_log_filename, create_expected_log_file
+):
+ try_number = 1
+ create_expected_log_file(try_number)
+ url = (
+ f"get_logs_with_metadata?dag_id={dag_run_with_log_filename.dag_id}&"
+ f"task_id={TASK_ID}&"
+
f"execution_date={urllib.parse.quote_plus(dag_run_with_log_filename.logical_date.isoformat())}&"
+ f"try_number={try_number}&metadata={{}}&format=file"
+ )
+ response = log_admin_client.get(url)
+
+ # Should find the log under corresponding db entry.
+ assert 200 == response.status_code
+ assert "Log for testing." in response.data.decode("utf-8")
+ content_disposition = response.headers["Content-Disposition"]
+ expected_filename =
f"dag_id={dag_run_with_log_filename.dag_id}/run_id={dag_run_with_log_filename.run_id}/task_id={TASK_ID}/attempt={try_number}.log"
+ assert content_disposition.startswith("attachment")
+ assert expected_filename in content_disposition
+
+
@unittest.mock.patch(
"airflow.utils.log.file_task_handler.FileTaskHandler.read",
side_effect=[