This is an automated email from the ASF dual-hosted git repository.

pierrejeambrun pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/airflow.git


The following commit(s) were added to refs/heads/main by this push:
     new 116e607ddc Use re2 for matching untrusted regex (#32060)
116e607ddc is described below

commit 116e607ddcb32480e57c342f48226545ac6fc315
Author: Pierre Jeambrun <[email protected]>
AuthorDate: Fri Jun 23 09:55:19 2023 +0200

    Use re2 for matching untrusted regex (#32060)
---
 airflow/models/dag.py | 7 ++++---
 setup.cfg             | 1 +
 2 files changed, 5 insertions(+), 3 deletions(-)

diff --git a/airflow/models/dag.py b/airflow/models/dag.py
index 906ae989e2..53f69e4ff6 100644
--- a/airflow/models/dag.py
+++ b/airflow/models/dag.py
@@ -26,7 +26,6 @@ import logging
 import os
 import pathlib
 import pickle
-import re
 import sys
 import traceback
 import warnings
@@ -42,6 +41,7 @@ from typing import (
     Iterable,
     Iterator,
     List,
+    Pattern,
     Sequence,
     Union,
     cast,
@@ -51,6 +51,7 @@ from urllib.parse import urlsplit
 
 import jinja2
 import pendulum
+import re2 as re
 from dateutil.relativedelta import relativedelta
 from pendulum.tz.timezone import Timezone
 from sqlalchemy import Boolean, Column, ForeignKey, Index, Integer, String, 
Text, and_, case, func, not_, or_
@@ -2309,7 +2310,7 @@ class DAG(LoggingMixin):
 
     def partial_subset(
         self,
-        task_ids_or_regex: str | re.Pattern | Iterable[str],
+        task_ids_or_regex: str | Pattern | Iterable[str],
         include_downstream=False,
         include_upstream=True,
         include_direct_upstream=False,
@@ -2336,7 +2337,7 @@ class DAG(LoggingMixin):
         memo = {id(self.task_dict): None, id(self._task_group): None}
         dag = copy.deepcopy(self, memo)  # type: ignore
 
-        if isinstance(task_ids_or_regex, (str, re.Pattern)):
+        if isinstance(task_ids_or_regex, (str, Pattern)):
             matched_tasks = [t for t in self.tasks if 
re.findall(task_ids_or_regex, t.task_id)]
         else:
             matched_tasks = [t for t in self.tasks if t.task_id in 
task_ids_or_regex]
diff --git a/setup.cfg b/setup.cfg
index 59ee6d9130..d5d91d3d8f 100644
--- a/setup.cfg
+++ b/setup.cfg
@@ -98,6 +98,7 @@ install_requires =
     flask-login>=0.6.2
     flask-session>=0.4.0
     flask-wtf>=0.15
+    google-re2>=1.0
     graphviz>=0.12
     gunicorn>=20.1.0
     httpx

Reply via email to