This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


The following commit(s) were added to refs/heads/main by this push:
     new 66e7823  Use Hyperscan for ignore patterns to avoid backtracking 
attacks
66e7823 is described below

commit 66e7823b0cee7638e6cb13c1162acc88b75c96cb
Author: Sean B. Palmer <[email protected]>
AuthorDate: Wed Jan 28 20:30:10 2026 +0000

    Use Hyperscan for ignore patterns to avoid backtracking attacks
---
 atr/models/__init__.py             |  4 +-
 atr/models/api.py                  | 16 +++++++-
 atr/models/validation.py           | 77 ++++++++++++++++++++++++++++++++++++++
 atr/shared/ignores.py              | 22 +++++++++++
 atr/storage/readers/checks.py      | 21 +----------
 atr/storage/writers/checks.py      | 22 +++++++++++
 atr/util.py                        | 22 +++++++++++
 scripts/check_models_imports.py    |  9 +++--
 tests/unit/test_ignore_patterns.py | 62 ++++++++++++++++++++++++++++++
 9 files changed, 229 insertions(+), 26 deletions(-)

diff --git a/atr/models/__init__.py b/atr/models/__init__.py
index a9cb122..52c3cad 100644
--- a/atr/models/__init__.py
+++ b/atr/models/__init__.py
@@ -15,7 +15,7 @@
 # specific language governing permissions and limitations
 # under the License.
 
-from . import api, distribution, helpers, results, schema, sql, tabulate
+from . import api, distribution, helpers, results, schema, sql, tabulate, 
validation
 
 # If we use .__name__, pyright gives a warning
-__all__ = ["api", "distribution", "helpers", "results", "schema", "sql", 
"tabulate"]
+__all__ = ["api", "distribution", "helpers", "results", "schema", "sql", 
"tabulate", "validation"]
diff --git a/atr/models/api.py b/atr/models/api.py
index ad9842e..436f002 100644
--- a/atr/models/api.py
+++ b/atr/models/api.py
@@ -21,7 +21,7 @@ from typing import Annotated, Any, Literal, TypeVar
 
 import pydantic
 
-from . import schema, sql, tabulate
+from . import schema, sql, tabulate, validation
 
 T = TypeVar("T")
 
@@ -173,6 +173,20 @@ class IgnoreAddArgs(schema.Strict):
     status: sql.CheckResultStatusIgnore | None = schema.default_example(None, 
sql.CheckResultStatusIgnore.FAILURE)
     message_glob: str | None = schema.default_example(None, "sha512 matches 
for apache-example-0.0.1/*.xml")
 
+    @pydantic.model_validator(mode="after")
+    def validate_patterns(self) -> "IgnoreAddArgs":
+        for pattern in [
+            self.release_glob,
+            self.checker_glob,
+            self.primary_rel_path_glob,
+            self.member_rel_path_glob,
+            self.message_glob,
+        ]:
+            if pattern is None:
+                continue
+            validation.validate_ignore_pattern(pattern)
+        return self
+
 
 class IgnoreAddResults(schema.Strict):
     endpoint: Literal["/ignore/add"] = schema.alias("endpoint")
diff --git a/atr/models/validation.py b/atr/models/validation.py
new file mode 100644
index 0000000..00ff352
--- /dev/null
+++ b/atr/models/validation.py
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import re
+from typing import Final
+
+import hyperscan
+
+MAX_IGNORE_PATTERN_LENGTH: Final[int] = 128
+
+
+class HyperscanPattern:
+    __slots__ = ("_db",)
+
+    def __init__(self, db: hyperscan.Database) -> None:
+        self._db = db
+
+    def search(self, value: str):
+        matched = False
+
+        def on_match(_id: int, _start: int, _end: int, _flags: int, _context: 
object) -> bool:
+            nonlocal matched
+            matched = True
+            return True
+
+        try:
+            self._db.scan(value.encode("utf-8"), on_match)
+        except hyperscan.ScanTerminated:
+            return True
+        except hyperscan.HyperscanError:
+            return None
+
+        return True if matched else None
+
+
+def compile_ignore_pattern(pattern: str):
+    # TODO: This requires importing Hyperscan in atr/models
+    # We want to avoid such dependencies
+    # But if we move this out, we can't do full validation in the models
+    if len(pattern) > MAX_IGNORE_PATTERN_LENGTH:
+        raise ValueError(f"Pattern exceeds {MAX_IGNORE_PATTERN_LENGTH} 
characters")
+    if pattern.startswith("^") or pattern.endswith("$"):
+        regex_pattern = pattern
+    else:
+        regex_pattern = re.escape(pattern).replace(r"\*", ".*")
+        # Should maybe add .replace(r"\?", ".?")
+    # We must turn off Chimera mode to avoid backtracking
+    db = hyperscan.Database(mode=hyperscan.HS_MODE_BLOCK, chimera=False)
+    try:
+        db.compile([regex_pattern])
+    except hyperscan.HyperscanError as exc:
+        raise ValueError(f"Invalid ignore pattern: {exc}") from exc
+    return HyperscanPattern(db)
+
+
+def validate_ignore_pattern(pattern: str) -> None:
+    """Raise an exception if the pattern is invalid."""
+    if pattern == "!":
+        return
+    raw_pattern = pattern
+    if raw_pattern.startswith("!"):
+        raw_pattern = raw_pattern[1:]
+    compile_ignore_pattern(raw_pattern)
diff --git a/atr/shared/ignores.py b/atr/shared/ignores.py
index 8583466..347a133 100644
--- a/atr/shared/ignores.py
+++ b/atr/shared/ignores.py
@@ -24,6 +24,7 @@ import pydantic
 
 import atr.form as form
 import atr.models.sql as sql
+import atr.models.validation as validation
 
 type ADD = Literal["add"]
 type DELETE = Literal["delete"]
@@ -93,6 +94,13 @@ class AddIgnoreForm(form.Form):
             ]
         ):
             raise ValueError("At least one field must be set")
+        _validate_ignore_form_patterns(
+            self.release_glob,
+            self.checker_glob,
+            self.primary_rel_path_glob,
+            self.member_rel_path_glob,
+            self.message_glob,
+        )
         return self
 
 
@@ -130,6 +138,13 @@ class UpdateIgnoreForm(form.Form):
             ]
         ):
             raise ValueError("At least one field must be set")
+        _validate_ignore_form_patterns(
+            self.release_glob,
+            self.checker_glob,
+            self.primary_rel_path_glob,
+            self.member_rel_path_glob,
+            self.message_glob,
+        )
         return self
 
 
@@ -137,3 +152,10 @@ type IgnoreForm = Annotated[
     AddIgnoreForm | DeleteIgnoreForm | UpdateIgnoreForm,
     form.DISCRIMINATOR,
 ]
+
+
+def _validate_ignore_form_patterns(*patterns: str) -> None:
+    for pattern in patterns:
+        if not pattern:
+            continue
+        validation.validate_ignore_pattern(pattern)
diff --git a/atr/storage/readers/checks.py b/atr/storage/readers/checks.py
index 3b8de1b..15131bb 100644
--- a/atr/storage/readers/checks.py
+++ b/atr/storage/readers/checks.py
@@ -18,13 +18,13 @@
 # Removing this will cause circular imports
 from __future__ import annotations
 
-import re
 from typing import TYPE_CHECKING
 
 import atr.db as db
 import atr.models.sql as sql
 import atr.storage as storage
 import atr.storage.types as types
+import atr.util as util
 
 if TYPE_CHECKING:
     import pathlib
@@ -142,21 +142,4 @@ class GeneralPublic:
         return True
 
     def __check_ignore_match_pattern(self, pattern: str | None, value: str | 
None) -> bool:
-        if pattern == "!":
-            # Special case, "!" matches None
-            return True if (value is None) else False
-        if (pattern is None) or (value is None):
-            return False
-        negate = False
-        if pattern.startswith("!"):
-            pattern = pattern[1:]
-            negate = True
-        if pattern.startswith("^") or pattern.endswith("$"):
-            regex = re.compile(pattern)
-        else:
-            regex = re.compile(re.escape(pattern).replace(r"\*", ".*"))
-            # Should maybe add .replace(r"\?", ".?")
-        matched = regex.search(value) is not None
-        if negate:
-            return not matched
-        return matched
+        return util.match_ignore_pattern(pattern, value)
diff --git a/atr/storage/writers/checks.py b/atr/storage/writers/checks.py
index 1665119..ca18f2c 100644
--- a/atr/storage/writers/checks.py
+++ b/atr/storage/writers/checks.py
@@ -24,9 +24,17 @@ import sqlmodel
 
 import atr.db as db
 import atr.models.sql as sql
+import atr.models.validation as validation
 import atr.storage as storage
 
 
+def _validate_ignore_patterns(*patterns: str | None) -> None:
+    for pattern in patterns:
+        if pattern is None:
+            continue
+        validation.validate_ignore_pattern(pattern)
+
+
 class GeneralPublic:
     def __init__(
         self,
@@ -99,6 +107,13 @@ class CommitteeMember(CommitteeParticipant):
         status: sql.CheckResultStatusIgnore | None = None,
         message_glob: str | None = None,
     ) -> None:
+        _validate_ignore_patterns(
+            release_glob,
+            checker_glob,
+            primary_rel_path_glob,
+            member_rel_path_glob,
+            message_glob,
+        )
         cri = sql.CheckResultIgnore(
             asf_uid=self.__asf_uid,
             created=datetime.datetime.now(datetime.UTC),
@@ -138,6 +153,13 @@ class CommitteeMember(CommitteeParticipant):
         status: sql.CheckResultStatusIgnore | None = None,
         message_glob: str | None = None,
     ) -> None:
+        _validate_ignore_patterns(
+            release_glob,
+            checker_glob,
+            primary_rel_path_glob,
+            member_rel_path_glob,
+            message_glob,
+        )
         cri = await self.__data.get(sql.CheckResultIgnore, id)
         if cri is None:
             raise storage.AccessError(f"Ignore {id} not found")
diff --git a/atr/util.py b/atr/util.py
index b736abb..64a2318 100644
--- a/atr/util.py
+++ b/atr/util.py
@@ -50,6 +50,7 @@ import atr.config as config
 import atr.ldap as ldap
 import atr.log as log
 import atr.models.sql as sql
+import atr.models.validation as validation
 import atr.registry as registry
 import atr.tarzip as tarzip
 import atr.user as user
@@ -628,6 +629,27 @@ def key_ssh_fingerprint_core(ssh_key_string: str) -> str:
     raise ValueError("Invalid SSH key format")
 
 
+def match_ignore_pattern(pattern: str | None, value: str | None) -> bool:
+    if pattern == "!":
+        # Special case, "!" matches None
+        return value is None
+    if (pattern is None) or (value is None):
+        return False
+    negate = False
+    raw_pattern = pattern
+    if raw_pattern.startswith("!"):
+        raw_pattern = raw_pattern[1:]
+        negate = True
+    try:
+        regex = validation.compile_ignore_pattern(raw_pattern)
+    except ValueError:
+        return False
+    matched = regex.search(value) is not None
+    if negate:
+        return not matched
+    return matched
+
+
 async def number_of_release_files(release: sql.Release) -> int:
     """Return the number of files in a release."""
     if (path := release_directory_revision(release)) is None:
diff --git a/scripts/check_models_imports.py b/scripts/check_models_imports.py
index 80ae8e3..1045be8 100755
--- a/scripts/check_models_imports.py
+++ b/scripts/check_models_imports.py
@@ -23,6 +23,7 @@ from typing import Final
 
 _ALLOWED_PACKAGES: Final = frozenset(
     {
+        "hyperscan",
         "pydantic",
         "pydantic_core",
         "sqlalchemy",
@@ -31,6 +32,10 @@ _ALLOWED_PACKAGES: Final = frozenset(
 )
 
 
+def main() -> None:
+    sys.exit(_run())
+
+
 def _check_file(path: pathlib.Path) -> list[str]:
     errors = []
     tree = ast.parse(path.read_text(), filename=str(path))
@@ -74,9 +79,5 @@ def _run() -> int:
     return 1 if errors else 0
 
 
-def main() -> None:
-    sys.exit(_run())
-
-
 if __name__ == "__main__":
     main()
diff --git a/tests/unit/test_ignore_patterns.py 
b/tests/unit/test_ignore_patterns.py
new file mode 100644
index 0000000..47bd213
--- /dev/null
+++ b/tests/unit/test_ignore_patterns.py
@@ -0,0 +1,62 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Final
+
+import pytest
+
+import atr.models.validation as validation
+
+REDOS_PATTERN: Final[str] = "^(a+)+$"
+
+
+def test_match_ignore_pattern_avoids_redos_regression() -> None:
+    value = ("a" * 4096) + "X"
+    regex = validation.compile_ignore_pattern(REDOS_PATTERN)
+    assert regex.search(value) is None
+
+
+def test_validate_ignore_pattern_allows_literal_lookaround_tokens() -> None:
+    validation.validate_ignore_pattern("(?=a)")
+
+
+def test_validate_ignore_pattern_hyperscan_supported_constructs() -> None:
+    pattern = r"^(?i)apple(?-i)banana[[:digit:]]{2}\b(?#fruit)|^cherry\s+date$"
+    regex = validation.compile_ignore_pattern(pattern)
+    assert regex.search("APPLEbanana12 ") is True
+    assert regex.search("applebanana99-") is True
+    assert regex.search("cherry   date") is True
+    assert regex.search("cherry\tdate") is True
+
+    assert regex.search("APPLEBANANA12 ") is None
+    assert regex.search("applebanana123 ") is None
+    assert regex.search("applebanana12x") is None
+    assert regex.search("applebanana12_") is None
+    assert regex.search("cherrydate") is None
+    assert regex.search("xcherry   date") is None
+    assert regex.search("cherry   datex") is None
+
+
+def test_validate_ignore_pattern_rejects_regex_lookaround() -> None:
+    with pytest.raises(ValueError, match="Invalid ignore pattern"):
+        validation.validate_ignore_pattern("^(?=a)$")
+
+
+def test_validate_ignore_pattern_rejects_too_long() -> None:
+    pattern = "a" * (validation.MAX_IGNORE_PATTERN_LENGTH + 1)
+    with pytest.raises(ValueError, match="Pattern exceeds"):
+        validation.validate_ignore_pattern(pattern)


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to