This is an automated email from the ASF dual-hosted git repository. sbp pushed a commit to branch sbp in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
commit be6c4bfc82ce17db0aae1a327a0a0e8ba0f06513 Author: Sean B. Palmer <[email protected]> AuthorDate: Wed Mar 11 14:57:36 2026 +0000 Use extracted archives for source tree comparison checks --- atr/tasks/checks/compare.py | 61 +++-------------- tests/unit/test_checks_compare.py | 140 ++++++++++---------------------------- 2 files changed, 46 insertions(+), 155 deletions(-) diff --git a/atr/tasks/checks/compare.py b/atr/tasks/checks/compare.py index 54906530..e131d190 100644 --- a/atr/tasks/checks/compare.py +++ b/atr/tasks/checks/compare.py @@ -36,7 +36,6 @@ import dulwich.porcelain import dulwich.refs import pydantic -import atr.archives as archives import atr.attestable as attestable import atr.config as config import atr.log as log @@ -56,7 +55,7 @@ _PERMITTED_ADDED_PATHS: Final[dict[str, list[str]]] = { # Release policy fields which this check relies on - used for result caching INPUT_POLICY_KEYS: Final[list[str]] = [] INPUT_EXTRA_ARGS: Final[list[str]] = ["github_tp_sha"] -CHECK_VERSION: Final[str] = "1" +CHECK_VERSION: Final[str] = "2" @dataclasses.dataclass @@ -102,15 +101,18 @@ async def source_trees(args: checks.FunctionArguments) -> results.Results | None if payload is not None: if not (primary_abs_path := await recorder.abs_path()): return None - max_extract_size = args.extra_args.get("max_extract_size", _CONFIG.MAX_EXTRACT_SIZE) - chunk_size = args.extra_args.get("chunk_size", _CONFIG.EXTRACT_CHUNK_SIZE) + cache_dir = await checks.resolve_cache_dir(args) + if cache_dir is None: + await recorder.failure( + "Extracted archive tree is not available", + {"rel_path": args.primary_rel_path}, + ) + return None tmp_dir = paths.get_tmp_dir() await aiofiles.os.makedirs(tmp_dir, exist_ok=True) async with util.async_temporary_directory(prefix="trees-", dir=tmp_dir) as temp_dir: github_dir = temp_dir / "github" - archive_dir_path = temp_dir / "archive" await aiofiles.os.makedirs(github_dir, exist_ok=True) - await aiofiles.os.makedirs(archive_dir_path, exist_ok=True) checkout_dir = await _checkout_github_source(payload, github_dir) if checkout_dir is None: await recorder.failure( @@ -118,17 +120,11 @@ async def source_trees(args: checks.FunctionArguments) -> results.Results | None {"repo_url": f"https://github.com/{payload.repository}.git", "sha": payload.sha}, ) return None - if not await _decompress_archive(primary_abs_path, archive_dir_path, max_extract_size, chunk_size): - await recorder.failure( - "Failed to extract source archive for comparison", - {"archive_path": str(primary_abs_path), "extract_dir": str(archive_dir_path)}, - ) - return None - archive_root_result = await _find_archive_root(primary_abs_path, archive_dir_path) + archive_root_result = await _find_archive_root(primary_abs_path, cache_dir) if archive_root_result.root is None: await recorder.failure( "Could not determine archive root directory for comparison", - {"archive_path": str(primary_abs_path), "extract_dir": str(archive_dir_path)}, + {"archive_path": str(primary_abs_path), "extract_dir": str(cache_dir)}, ) return None if archive_root_result.extra_entries: @@ -141,7 +137,7 @@ async def source_trees(args: checks.FunctionArguments) -> results.Results | None }, ) return None - archive_content_dir = archive_dir_path / archive_root_result.root + archive_content_dir = cache_dir / archive_root_result.root archive_dir = str(archive_content_dir) try: comparison = await _compare_trees(github_dir, archive_content_dir) @@ -304,41 +300,6 @@ def _compare_trees_rsync(repo_dir: pathlib.Path, archive_dir: pathlib.Path) -> T return TreeComparisonResult(invalid, repo_only) -async def _decompress_archive( - archive_path: pathlib.Path, - extract_dir: pathlib.Path, - max_extract_size: int, - chunk_size: int, -) -> bool: - started_ns = time.perf_counter_ns() - try: - extracted_size, _extracted_paths = await asyncio.to_thread( - archives.extract, - str(archive_path), - str(extract_dir), - max_size=max_extract_size, - chunk_size=chunk_size, - ) - except (archives.ExtractionError, OSError): - elapsed_ms = (time.perf_counter_ns() - started_ns) / 1_000_000.0 - log.exception( - "Failed to extract source archive for compare.source_trees", - archive_path=str(archive_path), - extract_dir=str(extract_dir), - extract_ms=elapsed_ms, - ) - return False - elapsed_ms = (time.perf_counter_ns() - started_ns) / 1_000_000.0 - log.debug( - "Extracted source archive for compare.source_trees", - archive_path=str(archive_path), - extract_dir=str(extract_dir), - extracted_bytes=extracted_size, - extract_ms=elapsed_ms, - ) - return True - - def _ensure_clone_identity_env() -> None: os.environ["USER"] = _DEFAULT_USER os.environ["EMAIL"] = _DEFAULT_EMAIL diff --git a/tests/unit/test_checks_compare.py b/tests/unit/test_checks_compare.py index 7cb58981..b608241d 100644 --- a/tests/unit/test_checks_compare.py +++ b/tests/unit/test_checks_compare.py @@ -79,42 +79,12 @@ class CompareRecorder: return atr.tasks.checks.compare.TreeComparisonResult(set(self.invalid), set(self.repo_only)) -class DecompressRecorder: - def __init__(self, return_value: bool = True) -> None: - self.archive_path: pathlib.Path | None = None - self.extract_dir: pathlib.Path | None = None - self.max_extract_size: int | None = None - self.chunk_size: int | None = None - self.return_value = return_value - - async def __call__( - self, - archive_path: pathlib.Path, - extract_dir: pathlib.Path, - max_extract_size: int, - chunk_size: int, - ) -> bool: - self.archive_path = archive_path - self.extract_dir = extract_dir - self.max_extract_size = max_extract_size - self.chunk_size = chunk_size - assert await aiofiles.os.path.exists(extract_dir) - return self.return_value - +class CacheDirResolver: + def __init__(self, cache_dir: pathlib.Path | None) -> None: + self.cache_dir = cache_dir -class ExtractErrorRaiser: - def __call__(self, *args: object, **kwargs: object) -> tuple[int, list[str]]: - raise atr.tasks.checks.compare.archives.ExtractionError("Extraction error") - - -class ExtractRecorder: - def __init__(self, extracted_size: int = 123) -> None: - self.calls: list[tuple[str, str, int, int]] = [] - self.extracted_size = extracted_size - - def __call__(self, archive_path: str, extract_dir: str, max_size: int, chunk_size: int) -> tuple[int, list[str]]: - self.calls.append((archive_path, extract_dir, max_size, chunk_size)) - return self.extracted_size, [] + async def __call__(self, args: object) -> pathlib.Path | None: + return self.cache_dir class FindArchiveRootRecorder: @@ -528,36 +498,6 @@ def test_compare_trees_rsync_trees_match(monkeypatch: pytest.MonkeyPatch, tmp_pa assert result.repo_only == set() [email protected] -async def test_decompress_archive_calls_extract(monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path) -> None: - archive_path = tmp_path / "artifact.tar.gz" - extract_dir = tmp_path / "extracted" - extract_dir.mkdir() - extract_recorder = ExtractRecorder() - - monkeypatch.setattr(atr.tasks.checks.compare.archives, "extract", extract_recorder) - - result = await atr.tasks.checks.compare._decompress_archive(archive_path, extract_dir, 10, 20) - - assert result is True - assert extract_recorder.calls == [(str(archive_path), str(extract_dir), 10, 20)] - - [email protected] -async def test_decompress_archive_handles_extraction_error( - monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path -) -> None: - archive_path = tmp_path / "artifact.tar.gz" - extract_dir = tmp_path / "extracted" - extract_dir.mkdir() - - monkeypatch.setattr(atr.tasks.checks.compare.archives, "extract", ExtractErrorRaiser()) - - result = await atr.tasks.checks.compare._decompress_archive(archive_path, extract_dir, 10, 20) - - assert result is False - - @pytest.mark.asyncio async def test_find_archive_root_accepts_any_single_directory(tmp_path: pathlib.Path) -> None: archive_path = tmp_path / "my-project-1.0.0.tar.gz" @@ -674,14 +614,15 @@ async def test_source_trees_creates_temp_workspace_and_cleans_up( args = _make_args(recorder) payload = _make_payload() checkout = CheckoutRecorder() - decompress = DecompressRecorder() + cache_dir = tmp_path / "cache" + cache_dir.mkdir() find_root = FindArchiveRootRecorder("artifact") compare = CompareRecorder(repo_only={"extra1.txt", "extra2.txt"}) tmp_root = tmp_path / "temporary-root" monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload", PayloadLoader(payload)) monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source", checkout) - monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", decompress) + monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir", CacheDirResolver(cache_dir)) monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", find_root) monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare) monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", ReturnValue(tmp_root)) @@ -691,8 +632,6 @@ async def test_source_trees_creates_temp_workspace_and_cleans_up( assert checkout.checkout_dir is not None checkout_dir = checkout.checkout_dir assert checkout_dir.name == "github" - assert decompress.extract_dir is not None - assert decompress.extract_dir.name == "archive" assert checkout_dir.parent.parent == tmp_root assert checkout_dir.parent.name.startswith("trees-") assert await aiofiles.os.path.exists(tmp_root) @@ -716,11 +655,7 @@ async def test_source_trees_payload_none_skips_temp_workspace(monkeypatch: pytes "_checkout_github_source", RaiseAsync("_checkout_github_source should not be called"), ) - monkeypatch.setattr( - atr.tasks.checks.compare, - "_decompress_archive", - RaiseAsync("_decompress_archive should not be called"), - ) + monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir", RaiseAsync("resolve_cache_dir should not be called")) monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", RaiseSync("get_tmp_dir should not be called")) await atr.tasks.checks.compare.source_trees(args) @@ -734,24 +669,16 @@ async def test_source_trees_permits_pkg_info_when_pyproject_toml_exists( args = _make_args(recorder) payload = _make_payload() checkout = CheckoutRecorder() + cache_dir = tmp_path / "cache" + (cache_dir / "artifact").mkdir(parents=True) + (cache_dir / "artifact" / "pyproject.toml").write_text("[project]\nname = 'test'\n") find_root = FindArchiveRootRecorder("artifact") compare = CompareRecorder(invalid={"PKG-INFO"}) tmp_root = tmp_path / "temporary-root" - async def decompress_with_pyproject( - archive_path: pathlib.Path, - extract_dir: pathlib.Path, - max_extract_size: int, - chunk_size: int, - ) -> bool: - archive_content = extract_dir / "artifact" - archive_content.mkdir(parents=True, exist_ok=True) - (archive_content / "pyproject.toml").write_text("[project]\nname = 'test'\n") - return True - monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload", PayloadLoader(payload)) monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source", checkout) - monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", decompress_with_pyproject) + monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir", CacheDirResolver(cache_dir)) monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", find_root) monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare) monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", ReturnValue(tmp_root)) @@ -770,14 +697,15 @@ async def test_source_trees_records_failure_when_archive_has_invalid_files( args = _make_args(recorder) payload = _make_payload() checkout = CheckoutRecorder() - decompress = DecompressRecorder() + cache_dir = tmp_path / "cache" + cache_dir.mkdir() find_root = FindArchiveRootRecorder("artifact") compare = CompareRecorder(invalid={"bad1.txt", "bad2.txt"}, repo_only={"ok.txt"}) tmp_root = tmp_path / "temporary-root" monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload", PayloadLoader(payload)) monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source", checkout) - monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", decompress) + monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir", CacheDirResolver(cache_dir)) monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", find_root) monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare) monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", ReturnValue(tmp_root)) @@ -801,13 +729,14 @@ async def test_source_trees_records_failure_when_archive_root_not_found( args = _make_args(recorder) payload = _make_payload() checkout = CheckoutRecorder() - decompress = DecompressRecorder() + cache_dir = tmp_path / "cache" + cache_dir.mkdir() find_root = FindArchiveRootRecorder(root=None) tmp_root = tmp_path / "temporary-root" monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload", PayloadLoader(payload)) monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source", checkout) - monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", decompress) + monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir", CacheDirResolver(cache_dir)) monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", find_root) monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", ReturnValue(tmp_root)) @@ -820,29 +749,28 @@ async def test_source_trees_records_failure_when_archive_root_not_found( @pytest.mark.asyncio -async def test_source_trees_records_failure_when_decompress_fails( - monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path +async def test_source_trees_records_failure_when_cache_dir_unavailable( + monkeypatch: pytest.MonkeyPatch, ) -> None: recorder = RecorderStub(True) args = _make_args(recorder) payload = _make_payload() - checkout = CheckoutRecorder() - decompress = DecompressRecorder(return_value=False) - tmp_root = tmp_path / "temporary-root" monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload", PayloadLoader(payload)) - monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source", checkout) - monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", decompress) - monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", ReturnValue(tmp_root)) + monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir", CacheDirResolver(None)) + monkeypatch.setattr( + atr.tasks.checks.compare, + "_checkout_github_source", + RaiseAsync("_checkout_github_source should not be called"), + ) + monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", RaiseSync("get_tmp_dir should not be called")) await atr.tasks.checks.compare.source_trees(args) assert len(recorder.failure_calls) == 1 message, data = recorder.failure_calls[0] - assert message == "Failed to extract source archive for comparison" + assert message == "Extracted archive tree is not available" assert isinstance(data, dict) - assert data["archive_path"] == str(await recorder.abs_path()) - assert data["extract_dir"] == str(decompress.extract_dir) @pytest.mark.asyncio @@ -853,13 +781,14 @@ async def test_source_trees_records_failure_when_extra_entries_in_archive( args = _make_args(recorder) payload = _make_payload() checkout = CheckoutRecorder() - decompress = DecompressRecorder() + cache_dir = tmp_path / "cache" + cache_dir.mkdir() find_root = FindArchiveRootRecorder(root="artifact", extra_entries=["README.txt", "extra.txt"]) tmp_root = tmp_path / "temporary-root" monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload", PayloadLoader(payload)) monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source", checkout) - monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", decompress) + monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir", CacheDirResolver(cache_dir)) monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", find_root) monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", ReturnValue(tmp_root)) @@ -881,7 +810,8 @@ async def test_source_trees_reports_repo_only_sample_limited_to_five( args = _make_args(recorder) payload = _make_payload() checkout = CheckoutRecorder() - decompress = DecompressRecorder() + cache_dir = tmp_path / "cache" + cache_dir.mkdir() find_root = FindArchiveRootRecorder("artifact") repo_only_files = {f"file{i}.txt" for i in range(10)} compare = CompareRecorder(repo_only=repo_only_files) @@ -889,7 +819,7 @@ async def test_source_trees_reports_repo_only_sample_limited_to_five( monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload", PayloadLoader(payload)) monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source", checkout) - monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", decompress) + monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir", CacheDirResolver(cache_dir)) monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", find_root) monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare) monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir", ReturnValue(tmp_root)) --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
