This is an automated email from the ASF dual-hosted git repository.
potiuk pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git
The following commit(s) were added to refs/heads/main by this push:
new 53b5b42f8f0 Escape LIKE wildcards in non-search filter parameters
(#67496)
53b5b42f8f0 is described below
commit 53b5b42f8f09f7f96d3c26a640ecd0a979ba3540
Author: Jarek Potiuk <[email protected]>
AuthorDate: Tue May 26 00:09:43 2026 +0200
Escape LIKE wildcards in non-search filter parameters (#67496)
`_OwnersFilter`, `_AssetDependencyFilter`, and `_ConsumingAssetFilter`
embedded user-supplied values directly into `ILIKE '%...%'` patterns
without escaping the SQL wildcard metacharacters `%` and `_`. These
filter classes are not documented as supporting wildcard semantics
(unlike `_SearchParam`), so a user supplying `%` or `_` would trigger
pattern matching rather than the literal substring matching the filter
promises — widening match results beyond the filter's intent.
This is not SQL injection (SQLAlchemy parameterizes the values) and RBAC
still bounds results, but the pattern semantics leak is a defense-in-
depth issue worth closing.
Add `_escape_like_pattern()` helper that escapes `\`, `%`, and `_`, and
apply it in the three affected filters along with an explicit
`escape="\\"` clause on the `.ilike()` call. `_SearchParam` is left
unchanged — its docstring explicitly documents wildcard support.
---
.../src/airflow/api_fastapi/common/parameters.py | 42 ++++++++++++--
.../unit/api_fastapi/common/test_parameters.py | 65 ++++++++++++++++++++++
2 files changed, 101 insertions(+), 6 deletions(-)
diff --git a/airflow-core/src/airflow/api_fastapi/common/parameters.py
b/airflow-core/src/airflow/api_fastapi/common/parameters.py
index f478da5b898..56b4c20884c 100644
--- a/airflow-core/src/airflow/api_fastapi/common/parameters.py
+++ b/airflow-core/src/airflow/api_fastapi/common/parameters.py
@@ -284,6 +284,26 @@ class _PrefixPatternParam(BaseParam[str], ABC):
return value
+_LIKE_ESCAPE_CHAR = "\\"
+
+
+def _escape_like_pattern(value: str) -> str:
+ r"""
+ Escape SQL ``LIKE`` / ``ILIKE`` metacharacters in a user-supplied value.
+
+ Use together with ``column.ilike(f"%{_escape_like_pattern(value)}%",
escape="\\")`` on filter
+ parameters that intend literal substring matching (so a user-supplied
``%`` or ``_`` does not
+ widen the match beyond what the filter semantics promise). Search
parameters that explicitly
+ expose wildcard semantics (see :class:`_SearchParam`) must not call this —
they want the
+ metacharacters to pass through.
+ """
+ return (
+ value.replace(_LIKE_ESCAPE_CHAR, _LIKE_ESCAPE_CHAR * 2)
+ .replace("%", _LIKE_ESCAPE_CHAR + "%")
+ .replace("_", _LIKE_ESCAPE_CHAR + "_")
+ )
+
+
class _SearchParam(BaseParam[str]):
"""
Substring search on a column using ``ILIKE '%term%'`` (case-insensitive).
@@ -822,7 +842,10 @@ class _OwnersFilter(BaseParam[list[str]]):
if not self.value:
return select
- conditions = [DagModel.owners.ilike(f"%{owner}%") for owner in
self.value]
+ conditions = [
+ DagModel.owners.ilike(f"%{_escape_like_pattern(owner)}%",
escape=_LIKE_ESCAPE_CHAR)
+ for owner in self.value
+ ]
return select.where(or_(*conditions))
@classmethod
@@ -1108,13 +1131,19 @@ class _AssetDependencyFilter(BaseParam[str]):
"""Filter Dags by specific asset dependencies."""
def to_orm(self, select: Select) -> Select:
- if self.value is None and self.skip_none:
+ if self.value is None:
return select
+ escaped = _escape_like_pattern(self.value)
asset_dag_subquery = (
sql_select(DagScheduleAssetReference.dag_id)
.join(AssetModel, DagScheduleAssetReference.asset_id ==
AssetModel.id)
- .where(or_(AssetModel.name.ilike(f"%{self.value}%"),
AssetModel.uri.ilike(f"%{self.value}%")))
+ .where(
+ or_(
+ AssetModel.name.ilike(f"%{escaped}%",
escape=_LIKE_ESCAPE_CHAR),
+ AssetModel.uri.ilike(f"%{escaped}%",
escape=_LIKE_ESCAPE_CHAR),
+ )
+ )
.distinct()
)
@@ -1138,16 +1167,17 @@ class _ConsumingAssetFilter(BaseParam[str | None]):
"""Filter Dag runs by consuming asset (name or URI)."""
def to_orm(self, select: Select) -> Select:
- if not self.value and self.skip_none:
+ if not self.value:
return select
+ escaped = _escape_like_pattern(self.value)
event_subquery = (
sql_select(AssetEvent.id)
.join(AssetModel, AssetEvent.asset_id == AssetModel.id)
.where(
or_(
- AssetModel.name.ilike(f"%{self.value}%"),
- AssetModel.uri.ilike(f"%{self.value}%"),
+ AssetModel.name.ilike(f"%{escaped}%",
escape=_LIKE_ESCAPE_CHAR),
+ AssetModel.uri.ilike(f"%{escaped}%",
escape=_LIKE_ESCAPE_CHAR),
)
)
.distinct()
diff --git a/airflow-core/tests/unit/api_fastapi/common/test_parameters.py
b/airflow-core/tests/unit/api_fastapi/common/test_parameters.py
index 6d5014b4cec..e2742d88a94 100644
--- a/airflow-core/tests/unit/api_fastapi/common/test_parameters.py
+++ b/airflow-core/tests/unit/api_fastapi/common/test_parameters.py
@@ -31,6 +31,10 @@ from airflow.api_fastapi.common.parameters import (
NullableDatetimeRangeFilter,
RangeFilter,
SortParam,
+ _AssetDependencyFilter,
+ _ConsumingAssetFilter,
+ _escape_like_pattern,
+ _OwnersFilter,
_PrefixPatternParam,
_PrefixSearchParam,
_SearchParam,
@@ -220,6 +224,67 @@ class TestSearchParam:
assert " or " not in sql
+class TestEscapeLikePattern:
+ """The escape helper turns user input into a literal substring pattern.
+
+ Filter parameters that do *not* document wildcard semantics must call this
so a user-supplied
+ ``%`` or ``_`` does not widen the match beyond the filter's intent. Search
parameters that
+ explicitly expose wildcard semantics (see ``_SearchParam``) deliberately
do not call it.
+ """
+
+ @pytest.mark.parametrize(
+ ("raw", "expected"),
+ [
+ ("plain", "plain"),
+ ("a%b", r"a\%b"),
+ ("a_b", r"a\_b"),
+ (r"a\b", r"a\\b"),
+ (r"a\%b", r"a\\\%b"),
+ ("%_\\", r"\%\_\\"),
+ ("", ""),
+ ],
+ )
+ def test_escapes_metacharacters(self, raw, expected):
+ assert _escape_like_pattern(raw) == expected
+
+
+class TestNonSearchFilterEscaping:
+ """``_OwnersFilter`` / ``_AssetDependencyFilter`` /
``_ConsumingAssetFilter`` escape ``%`` and ``_``.
+
+ Compile-time check: the rendered SQL must wrap the *escaped* user value in
``%...%`` and
+ declare an ``ESCAPE`` clause so the database treats user-supplied
wildcards literally.
+ """
+
+ def test_owners_filter_escapes_user_wildcards(self):
+ param = _OwnersFilter().set_value(["100%_alice"])
+ statement = param.to_orm(select(DagModel))
+ sql = _compile(statement)
+ assert r"'%100\%\_alice%'" in sql
+ assert "escape" in sql
+
+ def test_asset_dependency_filter_escapes_user_wildcards(self):
+ param = _AssetDependencyFilter().set_value("ledger_%")
+ statement = param.to_orm(select(DagModel))
+ sql = _compile(statement)
+ assert r"'%ledger\_\%%'" in sql
+ assert "escape" in sql
+
+ def test_consuming_asset_filter_escapes_user_wildcards(self):
+ param = _ConsumingAssetFilter().set_value("foo_%bar")
+ statement = param.to_orm(select(DagRun))
+ sql = _compile(statement)
+ assert r"'%foo\_\%bar%'" in sql
+ assert "escape" in sql
+
+ def test_search_param_does_not_escape_user_wildcards(self):
+ """Counter-test: ``_SearchParam`` deliberately passes wildcards
through."""
+ param = _SearchParam(DagModel.dag_id).set_value("foo_%bar")
+ statement = param.to_orm(select(DagModel))
+ sql = _compile(statement)
+ # Raw user wildcards are present, not the escaped form.
+ assert "'%foo_%bar%'" in sql
+
+
class TestPrefixSearchParam:
"""Prefix search using range comparison (``attribute >= lower AND <
upper``)."""