This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/main by this push:
new f154e76 Add a script to detect markup in strings, and fix false
positives
f154e76 is described below
commit f154e76817a19409b17b17fed9b2ab6b4eb86fde
Author: Sean B. Palmer <[email protected]>
AuthorDate: Mon Oct 27 17:00:14 2025 +0000
Add a script to detect markup in strings, and fix false positives
---
atr/admin/__init__.py | 4 +-
atr/analysis.py | 6 +--
atr/config.py | 2 +-
atr/db/__init__.py | 2 +-
atr/htm.py | 1 +
atr/log.py | 8 +++-
scripts/markup_strings.py | 119 ++++++++++++++++++++++++++++++++++++++++++++++
7 files changed, 133 insertions(+), 9 deletions(-)
diff --git a/atr/admin/__init__.py b/atr/admin/__init__.py
index 1eae223..8edf6f2 100644
--- a/atr/admin/__init__.py
+++ b/atr/admin/__init__.py
@@ -170,9 +170,9 @@ async def configuration(session: web.Committer) ->
quart.wrappers.response.Respo
try:
val = getattr(conf, name)
except Exception as exc:
- val = f"<error: {exc}>"
+ val = log.python_repr(f"error: {exc}")
if name.endswith("_PASSWORD"):
- val = "<redacted>"
+ val = log.python_repr("redacted")
if callable(val):
continue
values.append(f"{name}={val}")
diff --git a/atr/analysis.py b/atr/analysis.py
index 96c89c2..d0ef58a 100755
--- a/atr/analysis.py
+++ b/atr/analysis.py
@@ -179,16 +179,16 @@ def architecture_pattern() -> str:
return "(" + "|".join(architectures) + ")(?=[_.-])"
-def candidate_highlight(path: pathlib.Path, prefix: str = "<strong>", suffix:
str = "</strong>") -> str:
+def candidate_highlight(path: pathlib.Path) -> str:
parts = []
for part in path.parts:
if ("<" in part) or (">" in part) or ("&" in part):
# TODO: Should perhaps check for ' and " too for attribute value
safety
raise ValueError(f"Invalid path segment: {part}")
if _CANDIDATE_WHOLE.match(part):
- parts.append(f"{prefix}{part}{suffix}")
+ parts.append(f"<strong>{part}</strong>")
continue
- parts.append(_CANDIDATE_PARTIAL.sub(rf"{prefix}\g<0>{suffix}", part))
+ parts.append(_CANDIDATE_PARTIAL.sub(r"<strong>\g<0></strong>", part))
return str(pathlib.Path(*parts))
diff --git a/atr/config.py b/atr/config.py
index fcbcc21..650d645 100644
--- a/atr/config.py
+++ b/atr/config.py
@@ -136,7 +136,7 @@ def get() -> type[AppConfig]:
try:
config = _CONFIG_DICT[get_mode()]
except KeyError:
- exit("Error: Invalid <mode>. Expected values [Debug, Production,
Profiling].")
+ exit("Error: Invalid mode. Expected values Debug, Production, or
Profiling.")
if config.ALLOW_TESTS and (get_mode() != Mode.Debug):
raise RuntimeError("ALLOW_TESTS can only be enabled in Debug mode")
diff --git a/atr/db/__init__.py b/atr/db/__init__.py
index c5e9d9d..b4d3f01 100644
--- a/atr/db/__init__.py
+++ b/atr/db/__init__.py
@@ -68,7 +68,7 @@ class NotSet:
return cls._instance
def __repr__(self) -> str:
- return "<NotSet>"
+ return log.python_repr(self.__class__.__name__)
def __copy__(self) -> NotSet:
return NotSet()
diff --git a/atr/htm.py b/atr/htm.py
index b2c5c76..ac3587d 100644
--- a/atr/htm.py
+++ b/atr/htm.py
@@ -125,6 +125,7 @@ class Block:
self.element._attrs,
self.element._children,
)
+ # TODO: Check that there are no injection attacks possible here
if ' data-src="' not in new_element._attrs:
if new_element._attrs:
new_element._attrs = new_element._attrs + f' data-src="{src}"'
diff --git a/atr/log.py b/atr/log.py
index 2bd214a..c2ce1aa 100644
--- a/atr/log.py
+++ b/atr/log.py
@@ -36,10 +36,10 @@ def caller_name(depth: int = 1) -> str:
if frame is None:
return __name__
- module = frame.f_globals.get("__name__", "<unknown>")
+ module = frame.f_globals.get("__name__", python_repr("unknown"))
func = frame.f_code.co_name
- if func == "<module>":
+ if func == python_repr("module"):
# We're at the top level
return module
@@ -89,6 +89,10 @@ def log(level: int, msg: str, *args: Any, **kwargs: Any) ->
None:
_event(level, msg, *args, **kwargs)
+def python_repr(object_name: str) -> str:
+ return f"<{object_name}>"
+
+
def performance(msg: str, *args: Any, **kwargs: Any) -> None:
if PERFORMANCE is not None:
PERFORMANCE.info(msg, *args, **kwargs)
diff --git a/scripts/markup_strings.py b/scripts/markup_strings.py
new file mode 100755
index 0000000..1b996ef
--- /dev/null
+++ b/scripts/markup_strings.py
@@ -0,0 +1,119 @@
+#!/usr/bin/env python3
+
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+# find atr -name '*.py' -exec python3 scripts/markup_strings.py {} \; | grep
-v '^ok '
+
+import ast
+import enum
+import pathlib
+import re
+import sys
+
+_EMAIL_PATTERN = re.compile(r"<[^>]*@[^>]*>")
+_MARKUP_PATTERN = re.compile(r'</?[A-Za-z]|[A-Za-z]="')
+
+
+class ExitCode(enum.IntEnum):
+ SUCCESS = 0
+ FAILURE = 1
+ USAGE_ERROR = 2
+
+
+class MarkupStringVisitor(ast.NodeVisitor):
+ def __init__(self, filename: str) -> None:
+ super().__init__()
+ self.filename: str = filename
+ self.matches: list[tuple[int, int, str]] = []
+
+ def visit_Constant(self, node: ast.Constant) -> None:
+ if isinstance(node.value, str):
+ if _MARKUP_PATTERN.search(node.value):
+ is_okay = "(?P<" in node.value
+ is_okay |= node.value.startswith("/") and ("/<" in node.value)
+ is_okay |= _EMAIL_PATTERN.search(node.value) is not None
+ if not is_okay:
+ self.matches.append((node.lineno, node.col_offset,
node.value))
+ self.generic_visit(node)
+
+
+def _parse_python_code(code: str, filename: str) -> ast.Module | None:
+ try:
+ return ast.parse(code, filename=filename)
+ except SyntaxError as e:
+ print(f"!! {filename} - invalid syntax: {e}", file=sys.stderr)
+ return None
+
+
+def _read_file_content(file_path: pathlib.Path) -> str | None:
+ try:
+ return file_path.read_text(encoding="utf-8")
+ except FileNotFoundError:
+ print(f"!! {file_path} - file not found", file=sys.stderr)
+ return None
+ except OSError:
+ print(f"!! {file_path} - could not read file", file=sys.stderr)
+ return None
+
+
+def main() -> None:
+ quiet = sys.argv[2:3] == ["--quiet"]
+ argc = len(sys.argv)
+ match (argc, quiet):
+ case (2, False):
+ ...
+ case (3, True):
+ ...
+ case _:
+ print(f"Usage: {sys.argv[0]} <filename.py> [ --quiet ]",
file=sys.stderr)
+ sys.exit(ExitCode.USAGE_ERROR)
+
+ file_path = pathlib.Path(sys.argv[1])
+ filename = str(file_path)
+
+ # if filename == "atr/htm.py":
+ # print(f"!! {filename} - ignored", file=sys.stderr)
+ # sys.exit(ExitCode.SUCCESS)
+
+ if not file_path.is_file() or (not filename.endswith(".py")):
+ print(f"!! {filename} - invalid file", file=sys.stderr)
+ sys.exit(ExitCode.USAGE_ERROR)
+
+ content = _read_file_content(file_path)
+ if content is None:
+ sys.exit(ExitCode.FAILURE)
+
+ tree = _parse_python_code(content, filename)
+ if tree is None:
+ sys.exit(ExitCode.FAILURE)
+
+ visitor = MarkupStringVisitor(filename)
+ visitor.visit(tree)
+
+ if visitor.matches:
+ for lineno, _col, string_value in visitor.matches:
+ print(f"{filename}:{lineno}: {string_value!r}")
+ sys.exit(ExitCode.FAILURE)
+ else:
+ if not quiet:
+ print(f"ok {filename}")
+ sys.exit(ExitCode.SUCCESS)
+
+
+if __name__ == "__main__":
+ main()
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]