This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/main by this push:
new 99013e9 Use exclusions from release policies in lightweight license
checks
99013e9 is described below
commit 99013e96d18dab83a5fc7cdcbf3432c505771678
Author: Sean B. Palmer <[email protected]>
AuthorDate: Fri Jan 9 19:06:58 2026 +0000
Use exclusions from release policies in lightweight license checks
---
atr/tasks/checks/license.py | 209 ++++++++++++++++++-------------------
atr/tasks/gha.py | 2 +-
pyproject.toml | 3 +
tests/unit/test_license_headers.py | 61 +++++++++++
4 files changed, 169 insertions(+), 106 deletions(-)
diff --git a/atr/tasks/checks/license.py b/atr/tasks/checks/license.py
index fbde365..0e1a0c9 100644
--- a/atr/tasks/checks/license.py
+++ b/atr/tasks/checks/license.py
@@ -25,7 +25,6 @@ from collections.abc import Iterator
from typing import Any, Final
import atr.constants as constants
-import atr.db as db
import atr.log as log
import atr.models.results as results
import atr.models.schema as schema
@@ -88,6 +87,7 @@ class ArtifactData(schema.Strict):
files_with_valid_headers: int = schema.default(0)
files_with_invalid_headers: int = schema.default(0)
files_skipped: int = schema.default(0)
+ excludes_source: str = schema.default("none")
class ArtifactResult(schema.Strict):
@@ -172,17 +172,18 @@ async def headers(args: checks.FunctionArguments) ->
results.Results | None:
log.info(f"Checking license headers for {artifact_abs_path} (rel:
{args.primary_rel_path})")
- async with db.session() as data:
- release = await data.release(project_name=args.project_name,
version=args.version_name).get()
- ignore_lines = []
- if release is not None:
- release_directory_base = util.release_directory_base(release)
- release_directory_revision = release_directory_base /
args.revision_number
- ignore_file = release_directory_revision / ".atr" /
"license-headers-ignore"
- if ignore_file.exists():
- ignore_lines = ignore_file.read_text().splitlines()
+ is_source = await recorder.primary_path_is_source()
+ project = await recorder.project()
- return await _headers_core(recorder, str(artifact_abs_path), ignore_lines)
+ ignore_lines: list[str] = []
+ excludes_source: str
+ if is_source:
+ ignore_lines = project.policy_source_excludes_lightweight
+ excludes_source = "policy" if ignore_lines else "none"
+ else:
+ excludes_source = "none"
+
+ return await _headers_core(recorder, str(artifact_abs_path), ignore_lines,
excludes_source)
def headers_validate(content: bytes, _filename: str) -> tuple[bool, str |
None]:
@@ -213,9 +214,6 @@ def headers_validate(content: bytes, _filename: str) ->
tuple[bool, str | None]:
return False, "Could not find Apache License header"
-# File helpers
-
-
def _files_check_core_logic(artifact_path: str, is_podling: bool) ->
Iterator[Result]:
"""Verify that LICENSE and NOTICE files exist and are placed and formatted
correctly."""
license_results: dict[str, str | None] = {}
@@ -314,92 +312,6 @@ def _files_check_core_logic_notice(archive:
tarzip.Archive, member: tarzip.Membe
return len(issues) == 0, issues, preamble
-def _license_results(
- license_results: dict[str, str | None],
-) -> Iterator[Result]:
- """Build status messages for license file verification."""
- license_files_size = len(license_results)
- if license_files_size == 0:
- yield ArtifactResult(
- status=sql.CheckResultStatus.FAILURE,
- message="No LICENSE file found",
- data=None,
- )
- return
-
- if license_files_size > 1:
- yield ArtifactResult(
- status=sql.CheckResultStatus.FAILURE,
- message="Multiple LICENSE files found",
- data=None,
- )
- return
-
- for filename, license_diff in license_results.items():
- # Unpack the single result by iterating
- if license_diff is None:
- yield ArtifactResult(
- status=sql.CheckResultStatus.SUCCESS,
- message=f"{filename} is valid",
- data=None,
- )
- else:
- yield ArtifactResult(
- status=sql.CheckResultStatus.FAILURE,
- message=f"{filename} is invalid",
- data={"diff": license_diff},
- )
-
-
-def _normal_whitespace(lines: list[str]) -> list[str]:
- result = []
- for line in lines:
- line = line.strip()
- if line:
- result.append(line)
- return result
-
-
-def _notice_results(
- notice_results: dict[str, tuple[bool, list[str], str]],
-) -> Iterator[Result]:
- """Build status messages for notice file verification."""
- notice_files_size = len(notice_results)
- if notice_files_size == 0:
- yield ArtifactResult(
- status=sql.CheckResultStatus.FAILURE,
- message="No NOTICE file found",
- data=None,
- )
- return
-
- if notice_files_size > 1:
- yield ArtifactResult(
- status=sql.CheckResultStatus.FAILURE,
- message="Multiple NOTICE files found",
- data=None,
- )
- return
-
- for filename, (notice_ok, notice_issues, notice_preamble) in
notice_results.items():
- # Unpack the single result by iterating
- if notice_ok:
- yield ArtifactResult(
- status=sql.CheckResultStatus.SUCCESS,
- message=f"{filename} is valid",
- data=None,
- )
- else:
- yield ArtifactResult(
- status=sql.CheckResultStatus.FAILURE,
- message=f"{filename} is invalid",
- data={"issues": notice_issues, "preamble": notice_preamble},
- )
-
-
-# Header helpers
-
-
def _get_file_extension(filename: str) -> str | None:
"""Get the file extension without the dot."""
_, ext = os.path.splitext(filename)
@@ -408,12 +320,12 @@ def _get_file_extension(filename: str) -> str | None:
return ext[1:].lower()
-def _headers_check_core_logic(artifact_path: str, ignore_lines: list[str]) ->
Iterator[Result]:
+def _headers_check_core_logic(artifact_path: str, ignore_lines: list[str],
excludes_source: str) -> Iterator[Result]:
"""Verify Apache License headers in source files within an archive."""
# We could modify @Lucas-C/pre-commit-hooks instead for this
# But hopefully this will be robust enough, at least for testing
# First find and validate the root directory
- artifact_data = ArtifactData()
+ artifact_data = ArtifactData(excludes_source=excludes_source)
# try:
# targz.root_directory(artifact_path)
@@ -464,7 +376,7 @@ def _headers_check_core_logic(artifact_path: str,
ignore_lines: list[str]) -> It
f" found {artifact_data.files_with_valid_headers} with valid headers,"
f" {artifact_data.files_with_invalid_headers} with invalid headers,"
f" and {artifact_data.files_skipped} skipped",
- data=artifact_data.model_dump_json(),
+ data=artifact_data.model_dump(),
)
@@ -541,9 +453,13 @@ def _headers_check_core_logic_should_check(filepath: str)
-> bool:
return False
-async def _headers_core(recorder: checks.Recorder, artifact_abs_path: str,
ignore_lines: list[str]) -> None:
+async def _headers_core(
+ recorder: checks.Recorder, artifact_abs_path: str, ignore_lines:
list[str], excludes_source: str
+) -> None:
try:
- for result in await asyncio.to_thread(_headers_check_core_logic,
str(artifact_abs_path), ignore_lines):
+ for result in await asyncio.to_thread(
+ _headers_check_core_logic, str(artifact_abs_path), ignore_lines,
excludes_source
+ ):
match result:
case ArtifactResult():
await _record_artifact(recorder, result)
@@ -564,6 +480,89 @@ async def _headers_core(recorder: checks.Recorder,
artifact_abs_path: str, ignor
return None
+def _license_results(
+ license_results: dict[str, str | None],
+) -> Iterator[Result]:
+ """Build status messages for license file verification."""
+ license_files_size = len(license_results)
+ if license_files_size == 0:
+ yield ArtifactResult(
+ status=sql.CheckResultStatus.FAILURE,
+ message="No LICENSE file found",
+ data=None,
+ )
+ return
+
+ if license_files_size > 1:
+ yield ArtifactResult(
+ status=sql.CheckResultStatus.FAILURE,
+ message="Multiple LICENSE files found",
+ data=None,
+ )
+ return
+
+ for filename, license_diff in license_results.items():
+ # Unpack the single result by iterating
+ if license_diff is None:
+ yield ArtifactResult(
+ status=sql.CheckResultStatus.SUCCESS,
+ message=f"{filename} is valid",
+ data=None,
+ )
+ else:
+ yield ArtifactResult(
+ status=sql.CheckResultStatus.FAILURE,
+ message=f"{filename} is invalid",
+ data={"diff": license_diff},
+ )
+
+
+def _normal_whitespace(lines: list[str]) -> list[str]:
+ result = []
+ for line in lines:
+ line = line.strip()
+ if line:
+ result.append(line)
+ return result
+
+
+def _notice_results(
+ notice_results: dict[str, tuple[bool, list[str], str]],
+) -> Iterator[Result]:
+ """Build status messages for notice file verification."""
+ notice_files_size = len(notice_results)
+ if notice_files_size == 0:
+ yield ArtifactResult(
+ status=sql.CheckResultStatus.FAILURE,
+ message="No NOTICE file found",
+ data=None,
+ )
+ return
+
+ if notice_files_size > 1:
+ yield ArtifactResult(
+ status=sql.CheckResultStatus.FAILURE,
+ message="Multiple NOTICE files found",
+ data=None,
+ )
+ return
+
+ for filename, (notice_ok, notice_issues, notice_preamble) in
notice_results.items():
+ # Unpack the single result by iterating
+ if notice_ok:
+ yield ArtifactResult(
+ status=sql.CheckResultStatus.SUCCESS,
+ message=f"{filename} is valid",
+ data=None,
+ )
+ else:
+ yield ArtifactResult(
+ status=sql.CheckResultStatus.FAILURE,
+ message=f"{filename} is invalid",
+ data={"issues": notice_issues, "preamble": notice_preamble},
+ )
+
+
async def _record_artifact(recorder: checks.Recorder, result: ArtifactResult)
-> None:
match result.status:
case sql.CheckResultStatus.SUCCESS:
diff --git a/atr/tasks/gha.py b/atr/tasks/gha.py
index 3c811da..f64b2cc 100644
--- a/atr/tasks/gha.py
+++ b/atr/tasks/gha.py
@@ -97,7 +97,7 @@ async def _find_triggered_run(
def get_run(resp: dict[str, Any]) -> dict[str, Any] | None:
return next(
- (r for r in resp["workflow_runs"] if r["head_branch"] == args.ref
and r["name"] == unique_id),
+ (r for r in resp["workflow_runs"] if (r["head_branch"] ==
args.ref) and (r["name"] == unique_id)),
None,
)
diff --git a/pyproject.toml b/pyproject.toml
index cdd80d5..ddd5023 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -111,6 +111,9 @@ minversion = "8.0"
testpaths = ["tests"]
asyncio_mode = "auto"
addopts = "--ignore=tests/e2e"
+filterwarnings = [
+ "ignore:imghdr was removed in Python 3.13:DeprecationWarning:pgpy.constants",
+]
[tool.ruff]
line-length = 120
diff --git a/tests/unit/test_license_headers.py
b/tests/unit/test_license_headers.py
new file mode 100644
index 0000000..b1d8d6a
--- /dev/null
+++ b/tests/unit/test_license_headers.py
@@ -0,0 +1,61 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import pathlib
+
+import atr.tasks.checks.license as license
+
+TEST_ARCHIVE = pathlib.Path(__file__).parent.parent / "e2e" / "test_files" /
"apache-test-0.2.tar.gz"
+
+
+def test_headers_check_data_fields_match_model():
+ results = list(license._headers_check_core_logic(str(TEST_ARCHIVE), [],
"none"))
+ artifact_results = [r for r in results if isinstance(r,
license.ArtifactResult)]
+ final_result = artifact_results[-1]
+ expected_fields = set(license.ArtifactData.model_fields.keys())
+ actual_fields = set(final_result.data.keys())
+ assert actual_fields == expected_fields
+
+
+def test_headers_check_excludes_matching_files():
+ results_without_excludes =
list(license._headers_check_core_logic(str(TEST_ARCHIVE), [], "none"))
+ results_with_excludes =
list(license._headers_check_core_logic(str(TEST_ARCHIVE), ["*.py"], "policy"))
+
+ def get_files_checked(results: list) -> int:
+ for r in results:
+ if isinstance(r, license.ArtifactResult) and r.data and
("files_checked" in r.data):
+ return r.data["files_checked"]
+ return 0
+
+ without_excludes = get_files_checked(results_without_excludes)
+ with_excludes = get_files_checked(results_with_excludes)
+ assert with_excludes < without_excludes
+
+
+def test_headers_check_includes_excludes_source_none():
+ results = list(license._headers_check_core_logic(str(TEST_ARCHIVE), [],
"none"))
+ artifact_results = [r for r in results if isinstance(r,
license.ArtifactResult)]
+ assert len(artifact_results) > 0
+ final_result = artifact_results[-1]
+ assert final_result.data["excludes_source"] == "none"
+
+
+def test_headers_check_includes_excludes_source_policy():
+ results = list(license._headers_check_core_logic(str(TEST_ARCHIVE), [],
"policy"))
+ artifact_results = [r for r in results if isinstance(r,
license.ArtifactResult)]
+ final_result = artifact_results[-1]
+ assert final_result.data["excludes_source"] == "policy"
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]