This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/sbp by this push:
new 6440e1b Account for a root directory within extracted archives
6440e1b is described below
commit 6440e1b815e61ae17e12ac266acb9171dc720d46
Author: Sean B. Palmer <[email protected]>
AuthorDate: Thu Feb 5 16:51:39 2026 +0000
Account for a root directory within extracted archives
---
atr/tasks/checks/compare.py | 60 ++++++++++++-
tests/unit/test_checks_compare.py | 181 ++++++++++++++++++++++++++++++++++++++
2 files changed, 237 insertions(+), 4 deletions(-)
diff --git a/atr/tasks/checks/compare.py b/atr/tasks/checks/compare.py
index 3cf62c7..7cb41a6 100644
--- a/atr/tasks/checks/compare.py
+++ b/atr/tasks/checks/compare.py
@@ -50,6 +50,12 @@ _DEFAULT_EMAIL: Final[str] = "atr@localhost"
_DEFAULT_USER: Final[str] = "atr"
[email protected]
+class ArchiveRootResult:
+ root: str | None
+ extra_entries: list[str]
+
+
class DetermineWantsForSha:
def __init__(self, sha: str) -> None:
self.sha = sha
@@ -103,16 +109,33 @@ async def source_trees(args: checks.FunctionArguments) ->
results.Results | None
{"repo_url":
f"https://github.com/{payload.repository}.git", "sha": payload.sha},
)
return None
- if await _decompress_archive(primary_abs_path, archive_dir_path,
max_extract_size, chunk_size):
- archive_dir = str(archive_dir_path)
- else:
+ if not await _decompress_archive(primary_abs_path,
archive_dir_path, max_extract_size, chunk_size):
await recorder.failure(
"Failed to extract source archive for comparison",
{"archive_path": str(primary_abs_path), "extract_dir":
str(archive_dir_path)},
)
return None
+ archive_root_result = await _find_archive_root(primary_abs_path,
archive_dir_path)
+ if archive_root_result.root is None:
+ await recorder.failure(
+ "Could not determine archive root directory for
comparison",
+ {"archive_path": str(primary_abs_path), "extract_dir":
str(archive_dir_path)},
+ )
+ return None
+ if archive_root_result.extra_entries:
+ await recorder.failure(
+ "Archive contains entries outside the root directory",
+ {
+ "archive_path": str(primary_abs_path),
+ "root": archive_root_result.root,
+ "extra_entries":
sorted(archive_root_result.extra_entries),
+ },
+ )
+ return None
+ archive_content_dir = archive_dir_path / archive_root_result.root
+ archive_dir = str(archive_content_dir)
try:
- comparison = await _compare_trees(github_dir, archive_dir_path)
+ comparison = await _compare_trees(github_dir,
archive_content_dir)
except RuntimeError as exc:
await recorder.failure(
"Failed to compare source tree against GitHub checkout",
@@ -301,6 +324,35 @@ def _ensure_clone_identity_env() -> None:
os.environ["EMAIL"] = _DEFAULT_EMAIL
+async def _find_archive_root(archive_path: pathlib.Path, extract_dir:
pathlib.Path) -> ArchiveRootResult:
+ entries = await aiofiles.os.listdir(extract_dir)
+ directories: list[str] = []
+ for entry in entries:
+ if entry.startswith("._"):
+ continue
+ entry_path = extract_dir / entry
+ if await aiofiles.os.path.isdir(entry_path):
+ directories.append(entry)
+ if len(directories) != 1:
+ log.warning(
+ "Expected exactly one root directory in archive for
compare.source_trees",
+ archive_path=str(archive_path),
+ extract_dir=str(extract_dir),
+ directories=directories[:10],
+ )
+ return ArchiveRootResult(root=None, extra_entries=[])
+ found_root = directories[0]
+ extra_entries = [e for e in entries if (e != found_root) and (not
e.startswith("._"))]
+ log.info(
+ "Found archive root directory for compare.source_trees",
+ archive_path=str(archive_path),
+ extract_dir=str(extract_dir),
+ root=found_root,
+ extra_entries=extra_entries,
+ )
+ return ArchiveRootResult(root=found_root, extra_entries=extra_entries)
+
+
async def _load_tp_payload(
project_name: str, version_name: str, revision_number: str
) -> github_models.TrustedPublisherPayload | None:
diff --git a/tests/unit/test_checks_compare.py
b/tests/unit/test_checks_compare.py
index dbac4fc..f87248c 100644
--- a/tests/unit/test_checks_compare.py
+++ b/tests/unit/test_checks_compare.py
@@ -117,6 +117,19 @@ class ExtractRecorder:
return self.extracted_size, []
+class FindArchiveRootRecorder:
+ def __init__(self, root: str | None = "artifact", extra_entries: list[str]
| None = None) -> None:
+ self.calls: list[tuple[pathlib.Path, pathlib.Path]] = []
+ self.root = root
+ self.extra_entries = extra_entries or []
+
+ async def __call__(
+ self, archive_path: pathlib.Path, extract_dir: pathlib.Path
+ ) -> atr.tasks.checks.compare.ArchiveRootResult:
+ self.calls.append((archive_path, extract_dir))
+ return atr.tasks.checks.compare.ArchiveRootResult(root=self.root,
extra_entries=self.extra_entries)
+
+
class GitClientStub:
def __init__(self) -> None:
self.closed = False
@@ -522,6 +535,114 @@ async def
test_decompress_archive_handles_extraction_error(
assert result is False
[email protected]
+async def test_find_archive_root_finds_expected_root(tmp_path: pathlib.Path)
-> None:
+ archive_path = tmp_path / "my-project-1.0.0.tar.gz"
+ extract_dir = tmp_path / "extracted"
+ root_dir = extract_dir / "my-project-1.0.0"
+ root_dir.mkdir(parents=True)
+
+ result = await atr.tasks.checks.compare._find_archive_root(archive_path,
extract_dir)
+
+ assert result.root == "my-project-1.0.0"
+ assert result.extra_entries == []
+
+
[email protected]
+async def test_find_archive_root_finds_root_with_source_suffix(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "my-project-1.0.0-source.tar.gz"
+ extract_dir = tmp_path / "extracted"
+ root_dir = extract_dir / "my-project-1.0.0-source"
+ root_dir.mkdir(parents=True)
+
+ result = await atr.tasks.checks.compare._find_archive_root(archive_path,
extract_dir)
+
+ assert result.root == "my-project-1.0.0-source"
+ assert result.extra_entries == []
+
+
[email protected]
+async def test_find_archive_root_finds_root_without_source_suffix(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "my-project-1.0.0-source.tar.gz"
+ extract_dir = tmp_path / "extracted"
+ root_dir = extract_dir / "my-project-1.0.0"
+ root_dir.mkdir(parents=True)
+
+ result = await atr.tasks.checks.compare._find_archive_root(archive_path,
extract_dir)
+
+ assert result.root == "my-project-1.0.0"
+ assert result.extra_entries == []
+
+
[email protected]
+async def test_find_archive_root_accepts_any_single_directory(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "my-project-1.0.0.tar.gz"
+ extract_dir = tmp_path / "extracted"
+ extract_dir.mkdir(parents=True)
+ root_dir = extract_dir / "package"
+ root_dir.mkdir()
+
+ result = await atr.tasks.checks.compare._find_archive_root(archive_path,
extract_dir)
+
+ assert result.root == "package"
+ assert result.extra_entries == []
+
+
[email protected]
+async def
test_find_archive_root_returns_none_when_multiple_directories(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "my-project-1.0.0.tar.gz"
+ extract_dir = tmp_path / "extracted"
+ extract_dir.mkdir(parents=True)
+ (extract_dir / "dir1").mkdir()
+ (extract_dir / "dir2").mkdir()
+
+ result = await atr.tasks.checks.compare._find_archive_root(archive_path,
extract_dir)
+
+ assert result.root is None
+
+
[email protected]
+async def test_find_archive_root_returns_none_when_no_directories(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "my-project-1.0.0.tar.gz"
+ extract_dir = tmp_path / "extracted"
+ extract_dir.mkdir(parents=True)
+ (extract_dir / "file.txt").write_text("content")
+
+ result = await atr.tasks.checks.compare._find_archive_root(archive_path,
extract_dir)
+
+ assert result.root is None
+
+
[email protected]
+async def test_find_archive_root_detects_extra_file_entries(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "my-project-1.0.0.tar.gz"
+ extract_dir = tmp_path / "extracted"
+ root_dir = extract_dir / "my-project-1.0.0"
+ root_dir.mkdir(parents=True)
+ (extract_dir / "extra.txt").write_text("extra")
+ (extract_dir / "README").write_text("readme")
+
+ result = await atr.tasks.checks.compare._find_archive_root(archive_path,
extract_dir)
+
+ assert result.root == "my-project-1.0.0"
+ assert sorted(result.extra_entries) == ["README", "extra.txt"]
+
+
[email protected]
+async def test_find_archive_root_ignores_macos_metadata(tmp_path:
pathlib.Path) -> None:
+ archive_path = tmp_path / "my-project-1.0.0.tar.gz"
+ extract_dir = tmp_path / "extracted"
+ root_dir = extract_dir / "my-project-1.0.0"
+ root_dir.mkdir(parents=True)
+ metadata_file = extract_dir / "._my-project-1.0.0"
+ metadata_file.write_text("metadata")
+
+ result = await atr.tasks.checks.compare._find_archive_root(archive_path,
extract_dir)
+
+ assert result.root == "my-project-1.0.0"
+ assert result.extra_entries == []
+
+
@pytest.mark.asyncio
async def test_source_trees_creates_temp_workspace_and_cleans_up(
monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
@@ -531,12 +652,14 @@ async def
test_source_trees_creates_temp_workspace_and_cleans_up(
payload = _make_payload()
checkout = CheckoutRecorder()
decompress = DecompressRecorder()
+ find_root = FindArchiveRootRecorder("artifact")
compare = CompareRecorder(repo_only={"extra1.txt", "extra2.txt"})
tmp_root = tmp_path / "temporary-root"
monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload",
PayloadLoader(payload))
monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source",
checkout)
monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive",
decompress)
+ monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root",
find_root)
monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare)
monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir",
ReturnValue(tmp_root))
@@ -589,12 +712,14 @@ async def
test_source_trees_records_failure_when_archive_has_invalid_files(
payload = _make_payload()
checkout = CheckoutRecorder()
decompress = DecompressRecorder()
+ find_root = FindArchiveRootRecorder("artifact")
compare = CompareRecorder(invalid={"bad1.txt", "bad2.txt"},
repo_only={"ok.txt"})
tmp_root = tmp_path / "temporary-root"
monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload",
PayloadLoader(payload))
monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source",
checkout)
monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive",
decompress)
+ monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root",
find_root)
monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare)
monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir",
ReturnValue(tmp_root))
@@ -609,6 +734,60 @@ async def
test_source_trees_records_failure_when_archive_has_invalid_files(
assert data["invalid_paths"] == ["bad1.txt", "bad2.txt"]
[email protected]
+async def test_source_trees_records_failure_when_archive_root_not_found(
+ monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
+) -> None:
+ recorder = RecorderStub(True)
+ args = _make_args(recorder)
+ payload = _make_payload()
+ checkout = CheckoutRecorder()
+ decompress = DecompressRecorder()
+ find_root = FindArchiveRootRecorder(root=None)
+ tmp_root = tmp_path / "temporary-root"
+
+ monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload",
PayloadLoader(payload))
+ monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source",
checkout)
+ monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive",
decompress)
+ monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root",
find_root)
+ monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir",
ReturnValue(tmp_root))
+
+ await atr.tasks.checks.compare.source_trees(args)
+
+ assert len(recorder.failure_calls) == 1
+ message, data = recorder.failure_calls[0]
+ assert message == "Could not determine archive root directory for
comparison"
+ assert isinstance(data, dict)
+
+
[email protected]
+async def test_source_trees_records_failure_when_extra_entries_in_archive(
+ monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
+) -> None:
+ recorder = RecorderStub(True)
+ args = _make_args(recorder)
+ payload = _make_payload()
+ checkout = CheckoutRecorder()
+ decompress = DecompressRecorder()
+ find_root = FindArchiveRootRecorder(root="artifact",
extra_entries=["README.txt", "extra.txt"])
+ tmp_root = tmp_path / "temporary-root"
+
+ monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload",
PayloadLoader(payload))
+ monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source",
checkout)
+ monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive",
decompress)
+ monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root",
find_root)
+ monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir",
ReturnValue(tmp_root))
+
+ await atr.tasks.checks.compare.source_trees(args)
+
+ assert len(recorder.failure_calls) == 1
+ message, data = recorder.failure_calls[0]
+ assert message == "Archive contains entries outside the root directory"
+ assert isinstance(data, dict)
+ assert data["root"] == "artifact"
+ assert data["extra_entries"] == ["README.txt", "extra.txt"]
+
+
@pytest.mark.asyncio
async def test_source_trees_records_failure_when_decompress_fails(
monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
@@ -644,6 +823,7 @@ async def
test_source_trees_reports_repo_only_sample_limited_to_five(
payload = _make_payload()
checkout = CheckoutRecorder()
decompress = DecompressRecorder()
+ find_root = FindArchiveRootRecorder("artifact")
repo_only_files = {f"file{i}.txt" for i in range(10)}
compare = CompareRecorder(repo_only=repo_only_files)
tmp_root = tmp_path / "temporary-root"
@@ -651,6 +831,7 @@ async def
test_source_trees_reports_repo_only_sample_limited_to_five(
monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload",
PayloadLoader(payload))
monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source",
checkout)
monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive",
decompress)
+ monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root",
find_root)
monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare)
monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir",
ReturnValue(tmp_root))
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]