This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


The following commit(s) were added to refs/heads/sbp by this push:
     new 6440e1b  Account for a root directory within extracted archives
6440e1b is described below

commit 6440e1b815e61ae17e12ac266acb9171dc720d46
Author: Sean B. Palmer <[email protected]>
AuthorDate: Thu Feb 5 16:51:39 2026 +0000

    Account for a root directory within extracted archives
---
 atr/tasks/checks/compare.py       |  60 ++++++++++++-
 tests/unit/test_checks_compare.py | 181 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 237 insertions(+), 4 deletions(-)

diff --git a/atr/tasks/checks/compare.py b/atr/tasks/checks/compare.py
index 3cf62c7..7cb41a6 100644
--- a/atr/tasks/checks/compare.py
+++ b/atr/tasks/checks/compare.py
@@ -50,6 +50,12 @@ _DEFAULT_EMAIL: Final[str] = "atr@localhost"
 _DEFAULT_USER: Final[str] = "atr"
 
 
[email protected]
+class ArchiveRootResult:
+    root: str | None
+    extra_entries: list[str]
+
+
 class DetermineWantsForSha:
     def __init__(self, sha: str) -> None:
         self.sha = sha
@@ -103,16 +109,33 @@ async def source_trees(args: checks.FunctionArguments) -> 
results.Results | None
                     {"repo_url": 
f"https://github.com/{payload.repository}.git";, "sha": payload.sha},
                 )
                 return None
-            if await _decompress_archive(primary_abs_path, archive_dir_path, 
max_extract_size, chunk_size):
-                archive_dir = str(archive_dir_path)
-            else:
+            if not await _decompress_archive(primary_abs_path, 
archive_dir_path, max_extract_size, chunk_size):
                 await recorder.failure(
                     "Failed to extract source archive for comparison",
                     {"archive_path": str(primary_abs_path), "extract_dir": 
str(archive_dir_path)},
                 )
                 return None
+            archive_root_result = await _find_archive_root(primary_abs_path, 
archive_dir_path)
+            if archive_root_result.root is None:
+                await recorder.failure(
+                    "Could not determine archive root directory for 
comparison",
+                    {"archive_path": str(primary_abs_path), "extract_dir": 
str(archive_dir_path)},
+                )
+                return None
+            if archive_root_result.extra_entries:
+                await recorder.failure(
+                    "Archive contains entries outside the root directory",
+                    {
+                        "archive_path": str(primary_abs_path),
+                        "root": archive_root_result.root,
+                        "extra_entries": 
sorted(archive_root_result.extra_entries),
+                    },
+                )
+                return None
+            archive_content_dir = archive_dir_path / archive_root_result.root
+            archive_dir = str(archive_content_dir)
             try:
-                comparison = await _compare_trees(github_dir, archive_dir_path)
+                comparison = await _compare_trees(github_dir, 
archive_content_dir)
             except RuntimeError as exc:
                 await recorder.failure(
                     "Failed to compare source tree against GitHub checkout",
@@ -301,6 +324,35 @@ def _ensure_clone_identity_env() -> None:
     os.environ["EMAIL"] = _DEFAULT_EMAIL
 
 
+async def _find_archive_root(archive_path: pathlib.Path, extract_dir: 
pathlib.Path) -> ArchiveRootResult:
+    entries = await aiofiles.os.listdir(extract_dir)
+    directories: list[str] = []
+    for entry in entries:
+        if entry.startswith("._"):
+            continue
+        entry_path = extract_dir / entry
+        if await aiofiles.os.path.isdir(entry_path):
+            directories.append(entry)
+    if len(directories) != 1:
+        log.warning(
+            "Expected exactly one root directory in archive for 
compare.source_trees",
+            archive_path=str(archive_path),
+            extract_dir=str(extract_dir),
+            directories=directories[:10],
+        )
+        return ArchiveRootResult(root=None, extra_entries=[])
+    found_root = directories[0]
+    extra_entries = [e for e in entries if (e != found_root) and (not 
e.startswith("._"))]
+    log.info(
+        "Found archive root directory for compare.source_trees",
+        archive_path=str(archive_path),
+        extract_dir=str(extract_dir),
+        root=found_root,
+        extra_entries=extra_entries,
+    )
+    return ArchiveRootResult(root=found_root, extra_entries=extra_entries)
+
+
 async def _load_tp_payload(
     project_name: str, version_name: str, revision_number: str
 ) -> github_models.TrustedPublisherPayload | None:
diff --git a/tests/unit/test_checks_compare.py 
b/tests/unit/test_checks_compare.py
index dbac4fc..f87248c 100644
--- a/tests/unit/test_checks_compare.py
+++ b/tests/unit/test_checks_compare.py
@@ -117,6 +117,19 @@ class ExtractRecorder:
         return self.extracted_size, []
 
 
+class FindArchiveRootRecorder:
+    def __init__(self, root: str | None = "artifact", extra_entries: list[str] 
| None = None) -> None:
+        self.calls: list[tuple[pathlib.Path, pathlib.Path]] = []
+        self.root = root
+        self.extra_entries = extra_entries or []
+
+    async def __call__(
+        self, archive_path: pathlib.Path, extract_dir: pathlib.Path
+    ) -> atr.tasks.checks.compare.ArchiveRootResult:
+        self.calls.append((archive_path, extract_dir))
+        return atr.tasks.checks.compare.ArchiveRootResult(root=self.root, 
extra_entries=self.extra_entries)
+
+
 class GitClientStub:
     def __init__(self) -> None:
         self.closed = False
@@ -522,6 +535,114 @@ async def 
test_decompress_archive_handles_extraction_error(
     assert result is False
 
 
[email protected]
+async def test_find_archive_root_finds_expected_root(tmp_path: pathlib.Path) 
-> None:
+    archive_path = tmp_path / "my-project-1.0.0.tar.gz"
+    extract_dir = tmp_path / "extracted"
+    root_dir = extract_dir / "my-project-1.0.0"
+    root_dir.mkdir(parents=True)
+
+    result = await atr.tasks.checks.compare._find_archive_root(archive_path, 
extract_dir)
+
+    assert result.root == "my-project-1.0.0"
+    assert result.extra_entries == []
+
+
[email protected]
+async def test_find_archive_root_finds_root_with_source_suffix(tmp_path: 
pathlib.Path) -> None:
+    archive_path = tmp_path / "my-project-1.0.0-source.tar.gz"
+    extract_dir = tmp_path / "extracted"
+    root_dir = extract_dir / "my-project-1.0.0-source"
+    root_dir.mkdir(parents=True)
+
+    result = await atr.tasks.checks.compare._find_archive_root(archive_path, 
extract_dir)
+
+    assert result.root == "my-project-1.0.0-source"
+    assert result.extra_entries == []
+
+
[email protected]
+async def test_find_archive_root_finds_root_without_source_suffix(tmp_path: 
pathlib.Path) -> None:
+    archive_path = tmp_path / "my-project-1.0.0-source.tar.gz"
+    extract_dir = tmp_path / "extracted"
+    root_dir = extract_dir / "my-project-1.0.0"
+    root_dir.mkdir(parents=True)
+
+    result = await atr.tasks.checks.compare._find_archive_root(archive_path, 
extract_dir)
+
+    assert result.root == "my-project-1.0.0"
+    assert result.extra_entries == []
+
+
[email protected]
+async def test_find_archive_root_accepts_any_single_directory(tmp_path: 
pathlib.Path) -> None:
+    archive_path = tmp_path / "my-project-1.0.0.tar.gz"
+    extract_dir = tmp_path / "extracted"
+    extract_dir.mkdir(parents=True)
+    root_dir = extract_dir / "package"
+    root_dir.mkdir()
+
+    result = await atr.tasks.checks.compare._find_archive_root(archive_path, 
extract_dir)
+
+    assert result.root == "package"
+    assert result.extra_entries == []
+
+
[email protected]
+async def 
test_find_archive_root_returns_none_when_multiple_directories(tmp_path: 
pathlib.Path) -> None:
+    archive_path = tmp_path / "my-project-1.0.0.tar.gz"
+    extract_dir = tmp_path / "extracted"
+    extract_dir.mkdir(parents=True)
+    (extract_dir / "dir1").mkdir()
+    (extract_dir / "dir2").mkdir()
+
+    result = await atr.tasks.checks.compare._find_archive_root(archive_path, 
extract_dir)
+
+    assert result.root is None
+
+
[email protected]
+async def test_find_archive_root_returns_none_when_no_directories(tmp_path: 
pathlib.Path) -> None:
+    archive_path = tmp_path / "my-project-1.0.0.tar.gz"
+    extract_dir = tmp_path / "extracted"
+    extract_dir.mkdir(parents=True)
+    (extract_dir / "file.txt").write_text("content")
+
+    result = await atr.tasks.checks.compare._find_archive_root(archive_path, 
extract_dir)
+
+    assert result.root is None
+
+
[email protected]
+async def test_find_archive_root_detects_extra_file_entries(tmp_path: 
pathlib.Path) -> None:
+    archive_path = tmp_path / "my-project-1.0.0.tar.gz"
+    extract_dir = tmp_path / "extracted"
+    root_dir = extract_dir / "my-project-1.0.0"
+    root_dir.mkdir(parents=True)
+    (extract_dir / "extra.txt").write_text("extra")
+    (extract_dir / "README").write_text("readme")
+
+    result = await atr.tasks.checks.compare._find_archive_root(archive_path, 
extract_dir)
+
+    assert result.root == "my-project-1.0.0"
+    assert sorted(result.extra_entries) == ["README", "extra.txt"]
+
+
[email protected]
+async def test_find_archive_root_ignores_macos_metadata(tmp_path: 
pathlib.Path) -> None:
+    archive_path = tmp_path / "my-project-1.0.0.tar.gz"
+    extract_dir = tmp_path / "extracted"
+    root_dir = extract_dir / "my-project-1.0.0"
+    root_dir.mkdir(parents=True)
+    metadata_file = extract_dir / "._my-project-1.0.0"
+    metadata_file.write_text("metadata")
+
+    result = await atr.tasks.checks.compare._find_archive_root(archive_path, 
extract_dir)
+
+    assert result.root == "my-project-1.0.0"
+    assert result.extra_entries == []
+
+
 @pytest.mark.asyncio
 async def test_source_trees_creates_temp_workspace_and_cleans_up(
     monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
@@ -531,12 +652,14 @@ async def 
test_source_trees_creates_temp_workspace_and_cleans_up(
     payload = _make_payload()
     checkout = CheckoutRecorder()
     decompress = DecompressRecorder()
+    find_root = FindArchiveRootRecorder("artifact")
     compare = CompareRecorder(repo_only={"extra1.txt", "extra2.txt"})
     tmp_root = tmp_path / "temporary-root"
 
     monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload", 
PayloadLoader(payload))
     monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source", 
checkout)
     monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", 
decompress)
+    monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", 
find_root)
     monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare)
     monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir", 
ReturnValue(tmp_root))
 
@@ -589,12 +712,14 @@ async def 
test_source_trees_records_failure_when_archive_has_invalid_files(
     payload = _make_payload()
     checkout = CheckoutRecorder()
     decompress = DecompressRecorder()
+    find_root = FindArchiveRootRecorder("artifact")
     compare = CompareRecorder(invalid={"bad1.txt", "bad2.txt"}, 
repo_only={"ok.txt"})
     tmp_root = tmp_path / "temporary-root"
 
     monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload", 
PayloadLoader(payload))
     monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source", 
checkout)
     monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", 
decompress)
+    monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", 
find_root)
     monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare)
     monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir", 
ReturnValue(tmp_root))
 
@@ -609,6 +734,60 @@ async def 
test_source_trees_records_failure_when_archive_has_invalid_files(
     assert data["invalid_paths"] == ["bad1.txt", "bad2.txt"]
 
 
[email protected]
+async def test_source_trees_records_failure_when_archive_root_not_found(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
+) -> None:
+    recorder = RecorderStub(True)
+    args = _make_args(recorder)
+    payload = _make_payload()
+    checkout = CheckoutRecorder()
+    decompress = DecompressRecorder()
+    find_root = FindArchiveRootRecorder(root=None)
+    tmp_root = tmp_path / "temporary-root"
+
+    monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload", 
PayloadLoader(payload))
+    monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source", 
checkout)
+    monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", 
decompress)
+    monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", 
find_root)
+    monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir", 
ReturnValue(tmp_root))
+
+    await atr.tasks.checks.compare.source_trees(args)
+
+    assert len(recorder.failure_calls) == 1
+    message, data = recorder.failure_calls[0]
+    assert message == "Could not determine archive root directory for 
comparison"
+    assert isinstance(data, dict)
+
+
[email protected]
+async def test_source_trees_records_failure_when_extra_entries_in_archive(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
+) -> None:
+    recorder = RecorderStub(True)
+    args = _make_args(recorder)
+    payload = _make_payload()
+    checkout = CheckoutRecorder()
+    decompress = DecompressRecorder()
+    find_root = FindArchiveRootRecorder(root="artifact", 
extra_entries=["README.txt", "extra.txt"])
+    tmp_root = tmp_path / "temporary-root"
+
+    monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload", 
PayloadLoader(payload))
+    monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source", 
checkout)
+    monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", 
decompress)
+    monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", 
find_root)
+    monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir", 
ReturnValue(tmp_root))
+
+    await atr.tasks.checks.compare.source_trees(args)
+
+    assert len(recorder.failure_calls) == 1
+    message, data = recorder.failure_calls[0]
+    assert message == "Archive contains entries outside the root directory"
+    assert isinstance(data, dict)
+    assert data["root"] == "artifact"
+    assert data["extra_entries"] == ["README.txt", "extra.txt"]
+
+
 @pytest.mark.asyncio
 async def test_source_trees_records_failure_when_decompress_fails(
     monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
@@ -644,6 +823,7 @@ async def 
test_source_trees_reports_repo_only_sample_limited_to_five(
     payload = _make_payload()
     checkout = CheckoutRecorder()
     decompress = DecompressRecorder()
+    find_root = FindArchiveRootRecorder("artifact")
     repo_only_files = {f"file{i}.txt" for i in range(10)}
     compare = CompareRecorder(repo_only=repo_only_files)
     tmp_root = tmp_path / "temporary-root"
@@ -651,6 +831,7 @@ async def 
test_source_trees_reports_repo_only_sample_limited_to_five(
     monkeypatch.setattr(atr.tasks.checks.compare, "_load_tp_payload", 
PayloadLoader(payload))
     monkeypatch.setattr(atr.tasks.checks.compare, "_checkout_github_source", 
checkout)
     monkeypatch.setattr(atr.tasks.checks.compare, "_decompress_archive", 
decompress)
+    monkeypatch.setattr(atr.tasks.checks.compare, "_find_archive_root", 
find_root)
     monkeypatch.setattr(atr.tasks.checks.compare, "_compare_trees", compare)
     monkeypatch.setattr(atr.tasks.checks.compare.util, "get_tmp_dir", 
ReturnValue(tmp_root))
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to