This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


The following commit(s) were added to refs/heads/sbp by this push:
     new e5c186ad Add a variant of the license header
e5c186ad is described below

commit e5c186ad2000bd2c4f6f2bb756772fa1cddf870e
Author: Sean B. Palmer <[email protected]>
AuthorDate: Fri Mar 13 14:53:20 2026 +0000

    Add a variant of the license header
---
 atr/tasks/checks/license.py       |  32 ++++++++---
 tests/unit/test_checks_license.py | 117 ++++++++++++++++++++------------------
 2 files changed, 85 insertions(+), 64 deletions(-)

diff --git a/atr/tasks/checks/license.py b/atr/tasks/checks/license.py
index abef74e7..d3e00cec 100644
--- a/atr/tasks/checks/license.py
+++ b/atr/tasks/checks/license.py
@@ -47,6 +47,26 @@ HTTP_APACHE_LICENSE_HEADER: Final[bytes] = (
 
 HTTPS_APACHE_LICENSE_HEADER: Final[bytes] = 
HTTP_APACHE_LICENSE_HEADER.replace(b" http ", b" https ")
 
+HTTP_APACHE_LICENSE_HEADER_GENERIC: Final[bytes] = (
+    b"Licensed under the Apache License Version 2 0 the License you m"
+    b"ay not use this file except in compliance with the License You "
+    b"may obtain a copy of the License at http www apache org license"
+    b"s LICENSE 2 0 Unless required by applicable law or agreed to in"
+    b" writing software distributed under the License is distributed "
+    b"on an AS IS BASIS WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND "
+    b"either express or implied See the License for the specific lang"
+    b"uage governing permissions and limitations under the License"
+)
+
+HTTPS_APACHE_LICENSE_HEADER_GENERIC: Final[bytes] = 
HTTP_APACHE_LICENSE_HEADER_GENERIC.replace(b" http ", b" https ")
+
+ACCEPTED_APACHE_LICENSE_HEADERS: Final[tuple[bytes, ...]] = (
+    HTTP_APACHE_LICENSE_HEADER.lower(),
+    HTTPS_APACHE_LICENSE_HEADER.lower(),
+    HTTP_APACHE_LICENSE_HEADER_GENERIC.lower(),
+    HTTPS_APACHE_LICENSE_HEADER_GENERIC.lower(),
+)
+
 # Patterns for files to include in license header checks
 # Ordered by their popularity in the Stack Overflow Developer Survey 2024
 INCLUDED_PATTERNS: Final[list[str]] = [
@@ -227,20 +247,14 @@ def headers_validate(content: bytes, _filename: str) -> 
tuple[bool, str | None]:
         if pattern in content:
             return True, None
 
-    r_span = re.compile(rb"Licensed to the.*?under the License", re.MULTILINE)
     r_words = re.compile(rb"[A-Za-z0-9]+")
 
     # Normalise the content
     content = re.sub(rb"[ \t\r\n]+", b" ", content)
 
-    # For each matching heuristic span...
-    for span in r_span.finditer(content):
-        # Get only the words in the span
-        words = r_words.findall(span.group(0))
-        joined = b" ".join(words).lower()
-        if joined == HTTP_APACHE_LICENSE_HEADER.lower():
-            return True, None
-        elif joined == HTTPS_APACHE_LICENSE_HEADER.lower():
+    joined = b" ".join(r_words.findall(content)).lower()
+    for accepted_header in ACCEPTED_APACHE_LICENSE_HEADERS:
+        if accepted_header in joined:
             return True, None
     return False, "Could not find Apache License header"
 
diff --git a/tests/unit/test_checks_license.py 
b/tests/unit/test_checks_license.py
index 4353da73..abd7d4ad 100644
--- a/tests/unit/test_checks_license.py
+++ b/tests/unit/test_checks_license.py
@@ -40,6 +40,51 @@ NOTICE_VALID: str = (
 )
 
 
+class BinaryRecorder(checks.Recorder):
+    def __init__(self, path: pathlib.Path) -> None:
+        super().__init__(
+            checker="tests.unit.test_checks_license",
+            inputs_hash=None,
+            project_name="test",
+            version_name="0.2.0",
+            revision_number="00001",
+            primary_rel_path="apache-test-0.2-bin.zip",
+            member_rel_path=None,
+            afresh=False,
+        )
+        self._path = path
+        self.messages: list[tuple[str, str, dict | None]] = []
+
+    async def abs_path(self, rel_path: str | None = None) -> pathlib.Path | 
None:
+        return self._path if rel_path is None else self._path / rel_path
+
+    async def primary_path_is_binary(self) -> bool:
+        return True
+
+    async def _add(
+        self,
+        status: sql.CheckResultStatus,
+        message: str,
+        data: object,
+        primary_rel_path: str | None = None,
+        member_rel_path: str | None = None,
+    ) -> sql.CheckResult:
+        self.messages.append((status.value, message, data if isinstance(data, 
dict) else None))
+        return sql.CheckResult(
+            id=0,
+            release_name=self.release_name,
+            revision_number=self.revision_number,
+            checker=self.checker,
+            primary_rel_path=primary_rel_path,
+            member_rel_path=member_rel_path,
+            created=datetime.datetime.now(datetime.UTC),
+            status=status,
+            message=message,
+            data=data,
+            inputs_hash=None,
+        )
+
+
 def test_files_missing_cache_dir():
     results = 
list(license._files_check_core_logic(pathlib.Path("/nonexistent"), 
is_podling=False))
     assert len(results) == 1
@@ -89,61 +134,6 @@ def 
test_files_single_root_with_stray_top_level_file(tmp_path):
     assert sql.CheckResultStatus.SUCCESS in statuses
 
 
-def test_files_valid_license_and_notice(tmp_path):
-    cache_dir = _cache_with_root(tmp_path)
-    root = cache_dir / "apache-test-0.2"
-    (root / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
-    (root / "NOTICE").write_text(NOTICE_VALID)
-    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False))
-    artifact_results = [r for r in results if isinstance(r, 
license.ArtifactResult)]
-    assert all(r.status == sql.CheckResultStatus.SUCCESS for r in 
artifact_results)
-
-
-class BinaryRecorder(checks.Recorder):
-    def __init__(self, path: pathlib.Path) -> None:
-        super().__init__(
-            checker="tests.unit.test_checks_license",
-            inputs_hash=None,
-            project_name="test",
-            version_name="0.2.0",
-            revision_number="00001",
-            primary_rel_path="apache-test-0.2-bin.zip",
-            member_rel_path=None,
-            afresh=False,
-        )
-        self._path = path
-        self.messages: list[tuple[str, str, dict | None]] = []
-
-    async def abs_path(self, rel_path: str | None = None) -> pathlib.Path | 
None:
-        return self._path if rel_path is None else self._path / rel_path
-
-    async def primary_path_is_binary(self) -> bool:
-        return True
-
-    async def _add(
-        self,
-        status: sql.CheckResultStatus,
-        message: str,
-        data: object,
-        primary_rel_path: str | None = None,
-        member_rel_path: str | None = None,
-    ) -> sql.CheckResult:
-        self.messages.append((status.value, message, data if isinstance(data, 
dict) else None))
-        return sql.CheckResult(
-            id=0,
-            release_name=self.release_name,
-            revision_number=self.revision_number,
-            checker=self.checker,
-            primary_rel_path=primary_rel_path,
-            member_rel_path=member_rel_path,
-            created=datetime.datetime.now(datetime.UTC),
-            status=status,
-            message=message,
-            data=data,
-            inputs_hash=None,
-        )
-
-
 @pytest.mark.asyncio
 async def test_files_skip_binary_artifacts(tmp_path: pathlib.Path) -> None:
     recorder = BinaryRecorder(tmp_path / "apache-test-0.2-bin.zip")
@@ -164,6 +154,16 @@ async def test_files_skip_binary_artifacts(tmp_path: 
pathlib.Path) -> None:
     assert recorder.messages == []
 
 
+def test_files_valid_license_and_notice(tmp_path):
+    cache_dir = _cache_with_root(tmp_path)
+    root = cache_dir / "apache-test-0.2"
+    (root / "LICENSE").write_text(constants.APACHE_LICENSE_2_0)
+    (root / "NOTICE").write_text(NOTICE_VALID)
+    results = list(license._files_check_core_logic(cache_dir, 
is_podling=False))
+    artifact_results = [r for r in results if isinstance(r, 
license.ArtifactResult)]
+    assert all(r.status == sql.CheckResultStatus.SUCCESS for r in 
artifact_results)
+
+
 def test_headers_check_data_fields_match_model(tmp_path):
     cache_dir = _extract_test_archive(tmp_path)
     results = list(license._headers_check_core_logic(cache_dir, 
TEST_ARCHIVE_BASENAME, [], "none"))
@@ -209,6 +209,13 @@ def 
test_headers_check_includes_excludes_source_policy(tmp_path):
     assert final_result.data["excludes_source"] == "policy"
 
 
+def test_headers_validate_accepts_generic_apache_license_header():
+    content = pathlib.Path("license-normal.txt").read_bytes()
+    is_valid, error = license.headers_validate(content, 
"publish-on-homebrew.sh")
+    assert is_valid is True
+    assert error is None
+
+
 def _cache_with_root(tmp_path: pathlib.Path) -> pathlib.Path:
     cache_dir = tmp_path / "cache"
     cache_dir.mkdir()


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to