This is an automated email from the ASF dual-hosted git repository.

vatsrahul1001 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 80f1ab4d5a0 Tighten deserialization allowlist regex to require 
full-string match (#66499)
80f1ab4d5a0 is described below

commit 80f1ab4d5a0f8b83873ce31f32b89d341f667b6f
Author: Jarek Potiuk <[email protected]>
AuthorDate: Mon May 18 11:12:57 2026 +0200

    Tighten deserialization allowlist regex to require full-string match 
(#66499)
    
    * Tighten deserialization allowlist regex to use full-string match
    
    The ``allowed_deserialization_classes_regexp`` allowlist used 
``re.match()``,
    which only anchors at the start of the string. A pattern like
    ``airflow\.models\.Variable`` therefore also admitted classnames such as
    ``airflow.models.Variable_Malicious``. Switch to ``re.fullmatch()`` so the
    admin's pattern matches the entire classname; document the semantics in
    the config description so operators know to use ``.*`` for prefix-style
    allowances.
    
    * Add newsfragment for #66499
    
    ---------
    
    Co-authored-by: Rahul Vats <[email protected]>
---
 airflow-core/newsfragments/66499.significant.rst     | 14 ++++++++++++++
 airflow-core/src/airflow/config_templates/config.yml |  5 ++++-
 task-sdk/src/airflow/sdk/serde/__init__.py           |  4 +++-
 task-sdk/tests/task_sdk/serde/test_serde.py          | 20 +++++++++++++++++++-
 4 files changed, 40 insertions(+), 3 deletions(-)

diff --git a/airflow-core/newsfragments/66499.significant.rst 
b/airflow-core/newsfragments/66499.significant.rst
new file mode 100644
index 00000000000..5b321588876
--- /dev/null
+++ b/airflow-core/newsfragments/66499.significant.rst
@@ -0,0 +1,14 @@
+Tighten ``[core] allowed_deserialization_classes_regexp`` to require 
full-string matches
+
+Patterns in ``[core] allowed_deserialization_classes_regexp`` are now matched
+against the entire classname using ``re.fullmatch()`` instead of 
``re.match()``.
+Previously a pattern such as ``airflow\.models\.Variable`` admitted not only
+the intended class but also names that started with it
+(e.g. ``airflow.models.Variable_Malicious``), because ``re.match`` only anchors
+at the start of the string.
+
+The default value of this option is empty, so out-of-the-box deployments are
+unaffected. Deployments that configured this option with patterns relying on
+prefix-match semantics — for example ``airflow\.models\.`` to mean "any class
+under ``airflow.models``" — must add ``.*`` to the pattern
+(``airflow\.models\..*``) to retain the previous behaviour.
diff --git a/airflow-core/src/airflow/config_templates/config.yml 
b/airflow-core/src/airflow/config_templates/config.yml
index 03593ce4ba0..57ddd14a6cb 100644
--- a/airflow-core/src/airflow/config_templates/config.yml
+++ b/airflow-core/src/airflow/config_templates/config.yml
@@ -261,7 +261,10 @@ core:
     allowed_deserialization_classes_regexp:
       description: |
         Space-separated list of classes that may be imported during 
deserialization. Items are processed
-        as regex expressions. Python built-in classes (like dict) are always 
allowed.
+        as regex expressions and matched against the full classname 
(``re.fullmatch`` semantics), so a
+        pattern such as ``airflow\.models\.Variable`` does not also admit 
``airflow.models.VariableXYZ``.
+        Use ``.*`` (e.g. ``airflow\.models\..*``) to allow a prefix and any 
suffix. Python built-in
+        classes (like dict) are always allowed.
         This is a secondary option to ``[core] 
allowed_deserialization_classes``.
       version_added: 2.8.2
       type: string
diff --git a/task-sdk/src/airflow/sdk/serde/__init__.py 
b/task-sdk/src/airflow/sdk/serde/__init__.py
index d25da4b79b6..7e96e73a604 100644
--- a/task-sdk/src/airflow/sdk/serde/__init__.py
+++ b/task-sdk/src/airflow/sdk/serde/__init__.py
@@ -332,8 +332,10 @@ def _match_glob(classname: str):
 @functools.cache
 def _match_regexp(classname: str):
     """Check if the given classname matches a pattern from 
allowed_deserialization_classes_regexp using regexp."""
+    # fullmatch (not match) so a pattern like ``airflow\.models\.Variable`` 
cannot also admit
+    # ``airflow.models.Variable_Malicious`` — re.match only anchors at the 
start of the string.
     patterns = _get_regexp_patterns()
-    return any(p.match(classname) is not None for p in patterns)
+    return any(p.fullmatch(classname) is not None for p in patterns)
 
 
 def _stringify(classname: str, version: int, value: T | None) -> str:
diff --git a/task-sdk/tests/task_sdk/serde/test_serde.py 
b/task-sdk/tests/task_sdk/serde/test_serde.py
index f264164ffca..17f71783cb6 100644
--- a/task-sdk/tests/task_sdk/serde/test_serde.py
+++ b/task-sdk/tests/task_sdk/serde/test_serde.py
@@ -367,7 +367,7 @@ class TestSerDe:
     @conf_vars(
         {
             ("core", "allowed_deserialization_classes"): "",
-            ("core", "allowed_deserialization_classes_regexp"): 
r"unit\.airflow\..",
+            ("core", "allowed_deserialization_classes_regexp"): 
r"unit\.airflow\..*",
         }
     )
     @pytest.mark.usefixtures("recalculate_patterns")
@@ -394,6 +394,24 @@ class TestSerDe:
         assert _match("unit.airflow.deep")
         assert _match("unit.airflow.FALSE") is False
 
+    @conf_vars(
+        {
+            ("core", "allowed_deserialization_classes"): "",
+            ("core", "allowed_deserialization_classes_regexp"): 
r"unit\.airflow\.Variable",
+        }
+    )
+    @pytest.mark.usefixtures("recalculate_patterns")
+    def test_allow_list_regexp_does_not_prefix_match(self):
+        """
+        A pattern without an explicit end anchor must not admit classes that 
share
+        the pattern as a prefix. ``re.match`` would let 
``unit.airflow.Variable_Malicious``
+        through because it only anchors at the start of the string; 
``re.fullmatch``
+        rejects it. Patterns with ``.*`` at the end retain prefix-style 
behaviour.
+        """
+        assert _match("unit.airflow.Variable")
+        assert _match("unit.airflow.Variable_Malicious") is False
+        assert _match("unit.airflow.VariableSubclass") is False
+
     def test_incompatible_version(self):
         data = dict(
             {

Reply via email to