This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/sbp by this push:
new ca13018f Use extracted archives for source tree comparison checks
ca13018f is described below
commit ca13018fb2910e0d7689c80a7ed390a3f0af8c44
Author: Sean B. Palmer <[email protected]>
AuthorDate: Wed Mar 11 14:57:36 2026 +0000
Use extracted archives for source tree comparison checks
---
atr/tasks/checks/compare.py | 61 +++--------------
tests/unit/test_checks_compare.py | 140 ++++++++++----------------------------
2 files changed, 46 insertions(+), 155 deletions(-)
diff --git a/atr/tasks/checks/compare.py b/atr/tasks/checks/compare.py
index 54906530..e131d190 100644
--- a/atr/tasks/checks/compare.py
+++ b/atr/tasks/checks/compare.py
@@ -36,7 +36,6 @@ import dulwich.porcelain
import dulwich.refs
import pydantic
-import atr.archives as archives
import atr.attestable as attestable
import atr.config as config
import atr.log as log
@@ -56,7 +55,7 @@ _PERMITTED_ADDED_PATHS: Final[dict[str, list[str]]] = {
# Release policy fields which this check relies on - used for result caching
INPUT_POLICY_KEYS: Final[list[str]] = []
INPUT_EXTRA_ARGS: Final[list[str]] = ["github_tp_sha"]
-CHECK_VERSION: Final[str] = "1"
+CHECK_VERSION: Final[str] = "2"
@dataclasses.dataclass
@@ -102,15 +101,18 @@ async def source_trees(args: checks.FunctionArguments) ->
results.Results | None
if payload is not None:
if not (primary_abs_path := await recorder.abs_path()):
return None
- max_extract_size = args.extra_args.get("max_extract_size",
_CONFIG.MAX_EXTRACT_SIZE)
- chunk_size = args.extra_args.get("chunk_size",
_CONFIG.EXTRACT_CHUNK_SIZE)
+ cache_dir = await checks.resolve_cache_dir(args)
+ if cache_dir is None:
+ await recorder.failure(
+ "Extracted archive tree is not available",
+ {"rel_path": args.primary_rel_path},
+ )
+ return None
tmp_dir = paths.get_tmp_dir()
await aiofiles.os.makedirs(tmp_dir, exist_ok=True)
async with util.async_temporary_directory(prefix="trees-",
dir=tmp_dir) as temp_dir:
github_dir = temp_dir / "github"
- archive_dir_path = temp_dir / "archive"
await aiofiles.os.makedirs(github_dir, exist_ok=True)
- await aiofiles.os.makedirs(archive_dir_path, exist_ok=True)
checkout_dir = await _checkout_github_source(payload, github_dir)
if checkout_dir is None:
await recorder.failure(
@@ -118,17 +120,11 @@ async def source_trees(args: checks.FunctionArguments) ->
results.Results | None
{"repo_url":
f"https://github.com/{payload.repository}.git", "sha": payload.sha},
)
return None
- if not await _decompress_archive(primary_abs_path,
archive_dir_path, max_extract_size, chunk_size):
- await recorder.failure(
- "Failed to extract source archive for comparison",
- {"archive_path": str(primary_abs_path), "extract_dir":
str(archive_dir_path)},
- )
- return None
- archive_root_result = await _find_archive_root(primary_abs_path,
archive_dir_path)
+ archive_root_result = await _find_archive_root(primary_abs_path,
cache_dir)
if archive_root_result.root is None:
await recorder.failure(
"Could not determine archive root directory for
comparison",
- {"archive_path": str(primary_abs_path), "extract_dir":
str(archive_dir_path)},
+ {"archive_path": str(primary_abs_path), "extract_dir":
str(cache_dir)},
)
return None
if archive_root_result.extra_entries:
@@ -141,7 +137,7 @@ async def source_trees(args: checks.FunctionArguments) ->
results.Results | None
},
)
return None
- archive_content_dir = archive_dir_path / archive_root_result.root
+ archive_content_dir = cache_dir / archive_root_result.root
archive_dir = str(archive_content_dir)
try:
comparison = await _compare_trees(github_dir,
archive_content_dir)
@@ -304,41 +300,6 @@ def _compare_trees_rsync(repo_dir: pathlib.Path,
archive_dir: pathlib.Path) -> T
return TreeComparisonResult(invalid, repo_only)
-async def _decompress_archive(
- archive_path: pathlib.Path,
- extract_dir: pathlib.Path,
- max_extract_size: int,
- chunk_size: int,
-) -> bool:
- started_ns = time.perf_counter_ns()
- try:
- extracted_size, _extracted_paths = await asyncio.to_thread(
- archives.extract,
- str(archive_path),
- str(extract_dir),
- max_size=max_extract_size,
- chunk_size=chunk_size,
- )
- except (archives.ExtractionError, OSError):
- elapsed_ms = (time.perf_counter_ns() - started_ns) / 1_000_000.0
- log.exception(
- "Failed to extract source archive for compare.source_trees",
- archive_path=str(archive_path),
- extract_dir=str(extract_dir),
- extract_ms=elapsed_ms,
- )
- return False
- elapsed_ms = (time.perf_counter_ns() - started_ns) / 1_000_000.0
- log.debug(
- "Extracted source archive for compare.source_trees",
- archive_path=str(archive_path),
- extract_dir=str(extract_dir),
- extracted_bytes=extracted_size,
- extract_ms=elapsed_ms,
- )
- return True
-
-
def _ensure_clone_identity_env() -> None:
os.environ["USER"] = _DEFAULT_USER
os.environ["EMAIL"] = _DEFAULT_EMAIL
diff --git a/tests/unit/test_checks_compare.py
b/tests/unit/test_checks_compare.py
index 7cb58981..b608241d 100644
--- a/tests/unit/test_checks_compare.py
+++ b/tests/unit/test_checks_compare.py
@@ -79,42 +79,12 @@ class CompareRecorder:
return
atr.tasks.checks.compare.TreeComparisonResult(set(self.invalid),
set(self.repo_only))
-class DecompressRecorder:
- def __init__(self, return_value: bool = True) -> None:
- self.archive_path: pathlib.Path | None = None
- self.extract_dir: pathlib.Path | None = None
- self.max_extract_size: int | None = None
- self.chunk_size: int | None = None
- self.return_value = return_value
-
- async def __call__(
- self,
- archive_path: pathlib.Path,
- extract_dir: pathlib.Path,
- max_extract_size: int,
- chunk_size: int,
- ) -> bool:
- self.archive_path = archive_path
- self.extract_dir = extract_dir
- self.max_extract_size = max_extract_size
- self.chunk_size = chunk_size
- assert await aiofiles.os.path.exists(extract_dir)
- return self.return_value
-
+class CacheDirResolver:
+ def __init__(self, cache_dir: pathlib.Path | None) -> None:
+ self.cache_dir = cache_dir
-class ExtractErrorRaiser:
- def __call__(self, *args: object, **kwargs: object) -> tuple[int,
list[str]]:
- raise atr.tasks.checks.compare.archives.ExtractionError("Extraction
error")
-
-
-class ExtractRecorder:
- def __init__(self, extracted_size: int = 123) -> None:
- self.calls: list[tuple[str, str, int, int]] = []
- self.extracted_size = extracted_size
-
- def __call__(self, archive_path: str, extract_dir: str, max_size: int,
chunk_size: int) -> tuple[int, list[str]]:
- self.calls.append((archive_path, extract_dir, max_size, chunk_size))
- return self.extracted_size, []
+ async def __call__(self, args: object) -> pathlib.Path | None:
+ return self.cache_dir
class FindArchiveRootRecorder:
@@ -528,36 +498,6 @@ def test_compare_trees_rsync_trees_match(monkeypatch:
pytest.MonkeyPatch, tmp_pa
assert result.repo_only == set()
[email protected]
-async def test_decompress_archive_calls_extract(monkeypatch:
pytest.MonkeyPatch, tmp_path: pathlib.Path) -> None:
- archive_path = tmp_path / "artifact.tar.gz"
- extract_dir = tmp_path / "extracted"
- extract_dir.mkdir()
- extract_recorder = ExtractRecorder()
-
- monkeypatch.setattr(atr.tasks.checks.compare.archives, "extract",
extract_recorder)
-
- result = await atr.tasks.checks.compare._decompress_archive(archive_path,
extract_dir, 10, 20)
-
- assert result is True
- assert extract_recorder.calls == [(str(archive_path), str(extract_dir),
10, 20)]
-
-
[email protected]
-async def test_decompress_archive_handles_extraction_error(
- monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
-) -> None:
- archive_path = tmp_path / "artifact.tar.gz"
- extract_dir = tmp_path / "extracted"
- extract_dir.mkdir()
-
- monkeypatch.setattr(atr.tasks.checks.compare.archives, "extract",
ExtractErrorRaiser())
-
- result = await atr.tasks.checks.compare._decompress_archive(archive_path,
extract_dir, 10, 20)
-
- assert result is False
-
-
@pytest.mark.asyncio
async def test_find_archive_root_accepts_any_single_directory(tmp_path:
pathlib.Path) -> None:
archive_path = tmp_path / "my-project-1.0.0.tar.gz"
@@ -674,14 +614,15 @@ async def
test_source_trees_creates_temp_workspace_and_cleans_up(
args = _make_args(recorder)
payload = _make_payload()
checkout = CheckoutRecorder()
- decompress = DecompressRecorder()
+ cache_dir = tmp_path / "cache"
+ cache_dir.mkdir()
find_root = FindArchiveRootRecorder("artifact")
compare = CompareRecorder(repo_only={"extra1.txt", "extra2.txt"})
tmp_root = tmp_path / "temporary-root"
monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload",
PayloadLoader(payload))
monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source",
checkout)
- monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive",
decompress)
+ monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir",
CacheDirResolver(cache_dir))
monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root",
find_root)
monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare)
monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir",
ReturnValue(tmp_root))
@@ -691,8 +632,6 @@ async def
test_source_trees_creates_temp_workspace_and_cleans_up(
assert checkout.checkout_dir is not None
checkout_dir = checkout.checkout_dir
assert checkout_dir.name == "github"
- assert decompress.extract_dir is not None
- assert decompress.extract_dir.name == "archive"
assert checkout_dir.parent.parent == tmp_root
assert checkout_dir.parent.name.startswith("trees-")
assert await aiofiles.os.path.exists(tmp_root)
@@ -716,11 +655,7 @@ async def
test_source_trees_payload_none_skips_temp_workspace(monkeypatch: pytes
"_checkout_github_source",
RaiseAsync("_checkout_github_source should not be called"),
)
- monkeypatch.setattr(
- atr.tasks.checks.compare,
- "_decompress_archive",
- RaiseAsync("_decompress_archive should not be called"),
- )
+ monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir",
RaiseAsync("resolve_cache_dir should not be called"))
monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir",
RaiseSync("get_tmp_dir should not be called"))
await atr.tasks.checks.compare.source_trees(args)
@@ -734,24 +669,16 @@ async def
test_source_trees_permits_pkg_info_when_pyproject_toml_exists(
args = _make_args(recorder)
payload = _make_payload()
checkout = CheckoutRecorder()
+ cache_dir = tmp_path / "cache"
+ (cache_dir / "artifact").mkdir(parents=True)
+ (cache_dir / "artifact" / "pyproject.toml").write_text("[project]\nname =
'test'\n")
find_root = FindArchiveRootRecorder("artifact")
compare = CompareRecorder(invalid={"PKG-INFO"})
tmp_root = tmp_path / "temporary-root"
- async def decompress_with_pyproject(
- archive_path: pathlib.Path,
- extract_dir: pathlib.Path,
- max_extract_size: int,
- chunk_size: int,
- ) -> bool:
- archive_content = extract_dir / "artifact"
- archive_content.mkdir(parents=True, exist_ok=True)
- (archive_content / "pyproject.toml").write_text("[project]\nname =
'test'\n")
- return True
-
monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload",
PayloadLoader(payload))
monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source",
checkout)
- monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive",
decompress_with_pyproject)
+ monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir",
CacheDirResolver(cache_dir))
monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root",
find_root)
monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare)
monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir",
ReturnValue(tmp_root))
@@ -770,14 +697,15 @@ async def
test_source_trees_records_failure_when_archive_has_invalid_files(
args = _make_args(recorder)
payload = _make_payload()
checkout = CheckoutRecorder()
- decompress = DecompressRecorder()
+ cache_dir = tmp_path / "cache"
+ cache_dir.mkdir()
find_root = FindArchiveRootRecorder("artifact")
compare = CompareRecorder(invalid={"bad1.txt", "bad2.txt"},
repo_only={"ok.txt"})
tmp_root = tmp_path / "temporary-root"
monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload",
PayloadLoader(payload))
monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source",
checkout)
- monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive",
decompress)
+ monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir",
CacheDirResolver(cache_dir))
monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root",
find_root)
monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare)
monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir",
ReturnValue(tmp_root))
@@ -801,13 +729,14 @@ async def
test_source_trees_records_failure_when_archive_root_not_found(
args = _make_args(recorder)
payload = _make_payload()
checkout = CheckoutRecorder()
- decompress = DecompressRecorder()
+ cache_dir = tmp_path / "cache"
+ cache_dir.mkdir()
find_root = FindArchiveRootRecorder(root=None)
tmp_root = tmp_path / "temporary-root"
monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload",
PayloadLoader(payload))
monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source",
checkout)
- monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive",
decompress)
+ monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir",
CacheDirResolver(cache_dir))
monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root",
find_root)
monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir",
ReturnValue(tmp_root))
@@ -820,29 +749,28 @@ async def
test_source_trees_records_failure_when_archive_root_not_found(
@pytest.mark.asyncio
-async def test_source_trees_records_failure_when_decompress_fails(
- monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
+async def test_source_trees_records_failure_when_cache_dir_unavailable(
+ monkeypatch: pytest.MonkeyPatch,
) -> None:
recorder = RecorderStub(True)
args = _make_args(recorder)
payload = _make_payload()
- checkout = CheckoutRecorder()
- decompress = DecompressRecorder(return_value=False)
- tmp_root = tmp_path / "temporary-root"
monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload",
PayloadLoader(payload))
- monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source",
checkout)
- monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive",
decompress)
- monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir",
ReturnValue(tmp_root))
+ monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir",
CacheDirResolver(None))
+ monkeypatch.setattr(
+ atr.tasks.checks.compare,
+ "_checkout_github_source",
+ RaiseAsync("_checkout_github_source should not be called"),
+ )
+ monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir",
RaiseSync("get_tmp_dir should not be called"))
await atr.tasks.checks.compare.source_trees(args)
assert len(recorder.failure_calls) == 1
message, data = recorder.failure_calls[0]
- assert message == "Failed to extract source archive for comparison"
+ assert message == "Extracted archive tree is not available"
assert isinstance(data, dict)
- assert data["archive_path"] == str(await recorder.abs_path())
- assert data["extract_dir"] == str(decompress.extract_dir)
@pytest.mark.asyncio
@@ -853,13 +781,14 @@ async def
test_source_trees_records_failure_when_extra_entries_in_archive(
args = _make_args(recorder)
payload = _make_payload()
checkout = CheckoutRecorder()
- decompress = DecompressRecorder()
+ cache_dir = tmp_path / "cache"
+ cache_dir.mkdir()
find_root = FindArchiveRootRecorder(root="artifact",
extra_entries=["README.txt", "extra.txt"])
tmp_root = tmp_path / "temporary-root"
monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload",
PayloadLoader(payload))
monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source",
checkout)
- monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive",
decompress)
+ monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir",
CacheDirResolver(cache_dir))
monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root",
find_root)
monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir",
ReturnValue(tmp_root))
@@ -881,7 +810,8 @@ async def
test_source_trees_reports_repo_only_sample_limited_to_five(
args = _make_args(recorder)
payload = _make_payload()
checkout = CheckoutRecorder()
- decompress = DecompressRecorder()
+ cache_dir = tmp_path / "cache"
+ cache_dir.mkdir()
find_root = FindArchiveRootRecorder("artifact")
repo_only_files = {f"file{i}.txt" for i in range(10)}
compare = CompareRecorder(repo_only=repo_only_files)
@@ -889,7 +819,7 @@ async def
test_source_trees_reports_repo_only_sample_limited_to_five(
monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload",
PayloadLoader(payload))
monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source",
checkout)
- monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive",
decompress)
+ monkeypatch.setattr(atr.tasks.checks, "resolve_cache_dir",
CacheDirResolver(cache_dir))
monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root",
find_root)
monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare)
monkeypatch.setattr(atr.tasks.checks.compare.paths, "get_tmp_dir",
ReturnValue(tmp_root))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]