This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


The following commit(s) were added to refs/heads/sbp by this push:
     new 7340df57 Add a method to perform quarantine checks when creating a 
revision
7340df57 is described below

commit 7340df576da4415fd1468c953e912f0e7fd14efd
Author: Sean B. Palmer <[email protected]>
AuthorDate: Mon Mar 2 19:43:58 2026 +0000

    Add a method to perform quarantine checks when creating a revision
---
 atr/detection.py                                |  14 +
 atr/models/sql.py                               |   1 +
 atr/storage/writers/revision.py                 | 234 +++++++++++++-
 migrations/versions/0054_2026.03.02_3799e8e6.py |  38 +++
 tests/unit/test_create_revision_quarantine.py   | 385 ++++++++++++++++++++++++
 tests/unit/test_detection.py                    |  38 +++
 6 files changed, 708 insertions(+), 2 deletions(-)

diff --git a/atr/detection.py b/atr/detection.py
index 07ecf22d..bedf8cee 100644
--- a/atr/detection.py
+++ b/atr/detection.py
@@ -83,6 +83,20 @@ def check_archive_safety(archive_path: str) -> list[str]:
     return errors
 
 
+def deduplicate_quarantine_archives(archive_paths: list[str], path_to_hash: 
dict[str, str]) -> list[tuple[str, str]]:
+    seen: dict[tuple[str, str], str] = {}
+    for rel_path in archive_paths:
+        content_hash = path_to_hash[rel_path]
+        basename = _path_basename(rel_path)
+        suffix = _quarantine_archive_suffix(basename)
+        if suffix is None:
+            continue
+        key = (content_hash, suffix)
+        if (key not in seen) or (rel_path < seen[key]):
+            seen[key] = rel_path
+    return [(rel_path, path_to_hash[rel_path]) for rel_path in 
sorted(seen.values())]
+
+
 def detect_archives_requiring_quarantine(
     path_to_hash: dict[str, str], previous_attestable: models.AttestableV1 | 
None
 ) -> list[str]:
diff --git a/atr/models/sql.py b/atr/models/sql.py
index 86a5d39b..cbb875b8 100644
--- a/atr/models/sql.py
+++ b/atr/models/sql.py
@@ -165,6 +165,7 @@ class ProjectStatus(enum.StrEnum):
 
 
 class QuarantineStatus(enum.Enum):
+    STAGING = "STAGING"
     PENDING = "PENDING"
     FAILED = "FAILED"
 
diff --git a/atr/storage/writers/revision.py b/atr/storage/writers/revision.py
index a2fbf3ac..909a9bd2 100644
--- a/atr/storage/writers/revision.py
+++ b/atr/storage/writers/revision.py
@@ -25,7 +25,7 @@ import pathlib
 import secrets
 import tempfile
 import uuid
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Final
 
 import aiofiles.os
 import aioshutil
@@ -47,6 +47,9 @@ if TYPE_CHECKING:
 
     import atr.models.attestable
 
+_QUARANTINE_TOKEN_ALPHABET: Final[str] = "qpzry9x8gf2tvdw0s3jn54khce6mua7b"
+_QUARANTINE_TOKEN_LENGTH: Final[int] = 24
+
 
 class SafeSession:
     def __init__(self, temp_dir: str):
@@ -199,7 +202,7 @@ async def _commit_new_revision(
 
     # Commit to end the transaction started by data.begin_immediate
     # We must commit the revision before starting the checks
-    # This also releases the write lock
+    # This also releases the write lock obtained in _lock_and_merge
     await data.commit()
 
     async with data.begin():
@@ -214,6 +217,16 @@ async def _commit_new_revision(
     return new_revision
 
 
+def _generate_quarantine_token() -> str:
+    value = int.from_bytes(secrets.token_bytes(15), "big")
+    alphabet = _QUARANTINE_TOKEN_ALPHABET
+    chars: list[str] = []
+    for _ in range(_QUARANTINE_TOKEN_LENGTH):
+        chars.append(alphabet[value & 0x1F])
+        value >>= 5
+    return "".join(chars)
+
+
 async def _lock_and_merge(
     data: db.Session,
     *,
@@ -424,6 +437,223 @@ class CommitteeParticipant(FoundationCommitter):
                 version_name=version_name,
             )
 
+    async def create_revision_with_quarantine(  # noqa: C901
+        self,
+        project_name: str,
+        version_name: str,
+        asf_uid: str,
+        description: str | None = None,
+        set_local_cache: bool = False,
+        reset_to_global_cache: bool = False,
+        modify: Callable[[pathlib.Path, sql.Revision | None], Awaitable[None]] 
| None = None,
+        clone_from: str | None = None,
+    ) -> sql.Revision | sql.Quarantined:
+        """Create a new revision, quarantining archives that require 
validation."""
+        release_name = sql.release_name(project_name, version_name)
+        async with db.session() as data:
+            release = await data.release(name=release_name, 
_release_policy=True, _project_release_policy=True).demand(
+                RuntimeError("Release does not exist for new revision 
creation")
+            )
+            if clone_from is not None:
+                old_revision = await data.revision(release_name=release_name, 
number=clone_from).demand(
+                    RuntimeError(f"Revision {clone_from} does not exist")
+                )
+            else:
+                old_revision = await interaction.latest_revision(release)
+            if set_local_cache:
+                release.check_cache_key = str(uuid.uuid4())
+            if reset_to_global_cache:
+                release.check_cache_key = None
+
+        if clone_from is not None:
+            old_release_dir = paths.release_directory_base(release) / 
clone_from
+        else:
+            old_release_dir = paths.release_directory(release)
+        merge_enabled = clone_from is None
+
+        # Create a temporary directory
+        # We ensure, below, that it's removed on any exception
+        # Use the tmp subdirectory of state, to ensure that it is on the same 
filesystem
+        prefix_token = secrets.token_hex(16)
+        temp_dir: str = await asyncio.to_thread(tempfile.mkdtemp, 
prefix=prefix_token + "-", dir=paths.get_tmp_dir())
+        temp_dir_path = pathlib.Path(temp_dir)
+
+        try:
+            # The directory was created by mkdtemp, but it's empty
+            if old_revision is not None:
+                # If this is not the first revision, hard link the previous 
revision
+                await util.create_hard_link_clone(old_release_dir, 
temp_dir_path, do_not_create_dest_dir=True)
+            # The directory is either empty or its files are hard linked to 
the previous revision
+            if modify is not None:
+                await modify(temp_dir_path, old_revision)
+        except types.FailedError:
+            await aioshutil.rmtree(temp_dir)
+            raise
+        except Exception:
+            await aioshutil.rmtree(temp_dir)
+            raise
+
+        validation_errors = await 
asyncio.to_thread(detection.validate_directory, temp_dir_path)
+        if validation_errors:
+            await aioshutil.rmtree(temp_dir)
+            raise types.FailedError("File validation failed:\n" + 
"\n".join(validation_errors))
+
+        # Ensure that the permissions of every directory are 755
+        try:
+            await asyncio.to_thread(util.chmod_directories, temp_dir_path)
+        except Exception:
+            await aioshutil.rmtree(temp_dir)
+            raise
+
+        # Make files read only to prevent them from being modified through 
hard links
+        try:
+            await asyncio.to_thread(util.chmod_files, temp_dir_path, 0o444)
+        except Exception:
+            await aioshutil.rmtree(temp_dir)
+            raise
+
+        try:
+            path_to_hash, path_to_size = await 
attestable.paths_to_hashes_and_sizes(temp_dir_path)
+            parent_revision_number = old_revision.number if old_revision else 
None
+            previous_attestable = None
+            if parent_revision_number is not None:
+                previous_attestable = await attestable.load(project_name, 
version_name, parent_revision_number)
+            base_inodes: dict[str, int] = {}
+            base_hashes: dict[str, str] = {}
+            if merge_enabled and (old_revision is not None):
+                base_dir = old_release_dir
+                base_inodes = await asyncio.to_thread(util.paths_to_inodes, 
base_dir)
+                base_hashes = dict(previous_attestable.paths) if 
(previous_attestable is not None) else {}
+            n_inodes = await asyncio.to_thread(util.paths_to_inodes, 
temp_dir_path)
+        except Exception:
+            await aioshutil.rmtree(temp_dir)
+            raise
+
+        async with SafeSession(temp_dir) as data:
+            try:
+                previous_attestable, prior_revision_name, merged_release = 
await _lock_and_merge(
+                    data,
+                    base_hashes=base_hashes,
+                    base_inodes=base_inodes,
+                    merge_enabled=merge_enabled,
+                    n_inodes=n_inodes,
+                    old_revision=old_revision,
+                    path_to_hash=path_to_hash,
+                    path_to_size=path_to_size,
+                    previous_attestable=previous_attestable,
+                    project_name=project_name,
+                    release=release,
+                    _release_name=release_name,
+                    temp_dir_path=temp_dir_path,
+                    version_name=version_name,
+                )
+            except Exception:
+                await aioshutil.rmtree(temp_dir)
+                raise
+
+            archive_paths = 
detection.detect_archives_requiring_quarantine(path_to_hash, 
previous_attestable)
+            if archive_paths:
+                deduped = 
detection.deduplicate_quarantine_archives(archive_paths, path_to_hash)
+                if deduped:
+                    return await self._quarantine_archives(
+                        data,
+                        asf_uid=asf_uid,
+                        deduped_archives=deduped,
+                        description=description,
+                        path_to_size=path_to_size,
+                        prior_revision_name=prior_revision_name,
+                        project_name=project_name,
+                        release_name=release_name,
+                        temp_dir=temp_dir,
+                        version_name=version_name,
+                    )
+
+            return await _commit_new_revision(
+                data,
+                asf_uid=asf_uid,
+                description=description,
+                path_to_hash=path_to_hash,
+                path_to_size=path_to_size,
+                previous_attestable=previous_attestable,
+                project_name=project_name,
+                release=merged_release,
+                release_name=release_name,
+                temp_dir=temp_dir,
+                version_name=version_name,
+            )
+
+    async def _quarantine_archives(
+        self,
+        data: db.Session,
+        *,
+        asf_uid: str,
+        deduped_archives: list[tuple[str, str]],
+        description: str | None,
+        path_to_size: dict[str, int],
+        prior_revision_name: str | None,
+        project_name: str,
+        release_name: str,
+        temp_dir: str,
+        version_name: str,
+    ) -> sql.Quarantined:
+        file_metadata = [
+            sql.QuarantineFileEntryV1(
+                rel_path=rel_path,
+                size_bytes=path_to_size[rel_path],
+                content_hash=content_hash,
+                errors=[],
+            )
+            for rel_path, content_hash in deduped_archives
+        ]
+
+        token = _generate_quarantine_token()
+        quarantined = sql.Quarantined(
+            release_name=release_name,
+            asf_uid=asf_uid,
+            prior_revision_name=prior_revision_name,
+            status=sql.QuarantineStatus.STAGING,
+            token=token,
+            created=datetime.datetime.now(datetime.UTC),
+            file_metadata=file_metadata,
+            description=description,
+        )
+
+        try:
+            data.add(quarantined)
+            await data.flush()
+
+            quarantine_dir = paths.quarantine_directory(quarantined)
+            await aiofiles.os.makedirs(quarantine_dir.parent, exist_ok=True)
+            await aiofiles.os.rename(temp_dir, quarantine_dir)
+        except Exception:
+            await aioshutil.rmtree(temp_dir)
+            raise
+
+        # Release the write lock obtained in _lock_and_merge
+        await data.commit()
+
+        data.add(
+            sql.Task(
+                status=sql.TaskStatus.QUEUED,
+                task_type=sql.TaskType.QUARANTINE_VALIDATE,
+                task_args={
+                    "quarantined_id": quarantined.id,
+                    "archives": [
+                        {"rel_path": entry.rel_path, "content_hash": 
entry.content_hash} for entry in file_metadata
+                    ],
+                },
+                asf_uid=asf_uid,
+                project_name=project_name,
+                version_name=version_name,
+                revision_number=None,
+                primary_rel_path=None,
+            )
+        )
+        quarantined.status = sql.QuarantineStatus.PENDING
+        await data.commit()
+
+        return quarantined
+
 
 class CommitteeMember(CommitteeParticipant):
     def __init__(
diff --git a/migrations/versions/0054_2026.03.02_3799e8e6.py 
b/migrations/versions/0054_2026.03.02_3799e8e6.py
new file mode 100644
index 00000000..81bff007
--- /dev/null
+++ b/migrations/versions/0054_2026.03.02_3799e8e6.py
@@ -0,0 +1,38 @@
+"""Add a status for quarantine staging
+
+Revision ID: 0054_2026.03.02_3799e8e6
+Revises: 0053_2026.02.23_5e288b2d
+Create Date: 2026-03-02 19:40:37.748553+00:00
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+
+# Revision identifiers, used by Alembic
+revision: str = "0054_2026.03.02_3799e8e6"
+down_revision: str | None = "0053_2026.02.23_5e288b2d"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def downgrade() -> None:
+    op.execute("UPDATE quarantined SET status='FAILED' WHERE status='STAGING'")
+    with op.batch_alter_table("quarantined", schema=None) as batch_op:
+        batch_op.alter_column(
+            "status",
+            existing_type=sa.Enum("STAGING", "PENDING", "FAILED", 
name="quarantinestatus"),
+            type_=sa.Enum("PENDING", "FAILED", name="quarantinestatus"),
+            existing_nullable=False,
+        )
+
+
+def upgrade() -> None:
+    with op.batch_alter_table("quarantined", schema=None) as batch_op:
+        batch_op.alter_column(
+            "status",
+            existing_type=sa.Enum("PENDING", "FAILED", 
name="quarantinestatus"),
+            type_=sa.Enum("STAGING", "PENDING", "FAILED", 
name="quarantinestatus"),
+            existing_nullable=False,
+        )
diff --git a/tests/unit/test_create_revision_quarantine.py 
b/tests/unit/test_create_revision_quarantine.py
new file mode 100644
index 00000000..33342922
--- /dev/null
+++ b/tests/unit/test_create_revision_quarantine.py
@@ -0,0 +1,385 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import contextlib
+import pathlib
+import unittest.mock as mock
+from typing import Final
+
+import pytest
+
+import atr.models.sql as sql
+import atr.storage.writers.revision as revision
+
+_QUARANTINE_TOKEN_ALPHABET: Final[str] = "qpzry9x8gf2tvdw0s3jn54khce6mua7b"
+
+
+class AsyncContextManager:
+    async def __aenter__(self):
+        return None
+
+    async def __aexit__(self, _exc_type, _exc, _tb):
+        return False
+
+
+class FakeQuarantined:
+    def __init__(self, **kwargs):
+        self.id = 42
+        self.release_name = kwargs.get("release_name", "")
+        self.asf_uid = kwargs.get("asf_uid", "")
+        self.prior_revision_name = kwargs.get("prior_revision_name")
+        self.status = kwargs.get("status", sql.QuarantineStatus.STAGING)
+        self.token = kwargs.get("token", "")
+        self.created = kwargs.get("created")
+        self.file_metadata = kwargs.get("file_metadata")
+        self.description = kwargs.get("description")
+        self.release = mock.MagicMock()
+
+
+class MockQuarantineData:
+    def __init__(self, latest_revision_name: str | None):
+        self._added_objects: list[object] = []
+        self._quarantined: FakeQuarantined | None = None
+        self._latest_revision_name = latest_revision_name
+        self._flush_count = 0
+        self.add = mock.MagicMock(side_effect=self._add)
+        self.begin = mock.MagicMock(return_value=AsyncContextManager())
+        self.begin_immediate = mock.AsyncMock()
+        self.commit = mock.AsyncMock(side_effect=self._commit)
+        self.commit_snapshots: list[tuple[sql.QuarantineStatus | None, int]] = 
[]
+        self.flush = mock.AsyncMock(side_effect=self._flush)
+        self.merge = mock.AsyncMock(side_effect=self._merge)
+        self.refresh = mock.AsyncMock()
+
+    def _add(self, obj: object) -> None:
+        self._added_objects.append(obj)
+        if isinstance(obj, FakeQuarantined):
+            self._quarantined = obj
+
+    async def _commit(self) -> None:
+        task_count = sum(1 for obj in self._added_objects if not 
isinstance(obj, FakeQuarantined))
+        status = self._quarantined.status if (self._quarantined is not None) 
else None
+        self.commit_snapshots.append((status, task_count))
+
+    async def _flush(self) -> None:
+        self._flush_count += 1
+        if (self._quarantined is not None) and (self._quarantined.id is None):
+            self._quarantined.id = 42
+
+    async def _merge(self, obj: object) -> object:
+        return obj
+
+
+class MockQuarantineSession:
+    def __init__(self, data: MockQuarantineData):
+        self._data = data
+
+    async def __aenter__(self) -> MockQuarantineData:
+        return self._data
+
+    async def __aexit__(self, _exc_type, _exc, _tb):
+        return False
+
+
+def test_generate_quarantine_token_length_and_alphabet():
+    token = revision._generate_quarantine_token()
+
+    assert len(token) == 24
+    assert all(c in _QUARANTINE_TOKEN_ALPHABET for c in token)
+
+
+def test_generate_quarantine_token_uniqueness():
+    tokens = {revision._generate_quarantine_token() for _ in range(100)}
+
+    assert len(tokens) == 100
+
+
[email protected]
+async def test_no_quarantine_returns_revision_when_no_archives(tmp_path: 
pathlib.Path):
+    release = mock.MagicMock()
+    release.phase = sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT
+    release.project = mock.MagicMock()
+    release.project.release_policy = None
+    release.release_policy = None
+    release.name = sql.release_name("proj", "1.0")
+
+    mock_session = _mock_db_session(release)
+    participant = _make_participant()
+
+    fake_revision = mock.MagicMock(spec=sql.Revision)
+
+    patches = [
+        mock.patch.object(revision.aiofiles.os, "makedirs", 
new_callable=mock.AsyncMock),
+        mock.patch.object(revision.aiofiles.os, "rename", 
new_callable=mock.AsyncMock),
+        mock.patch.object(
+            revision.attestable,
+            "paths_to_hashes_and_sizes",
+            new_callable=mock.AsyncMock,
+            return_value=({"README.md": "hash1"}, {"README.md": 100}),
+        ),
+        mock.patch.object(revision.attestable, "write_files_data", 
new_callable=mock.AsyncMock),
+        mock.patch.object(revision.db, "session", return_value=mock_session),
+        mock.patch.object(revision.detection, "validate_directory", 
return_value=[]),
+        mock.patch.object(revision.detection, 
"detect_archives_requiring_quarantine", return_value=[]),
+        mock.patch.object(revision.interaction, "latest_revision", 
new_callable=mock.AsyncMock, return_value=None),
+        mock.patch.object(revision, "_commit_new_revision", 
new_callable=mock.AsyncMock, return_value=fake_revision),
+        mock.patch.object(revision, "_lock_and_merge", 
new_callable=mock.AsyncMock, return_value=(None, None, release)),
+        mock.patch.object(revision, "SafeSession", 
return_value=MockQuarantineSession(MockQuarantineData(None))),
+        mock.patch.object(revision.paths, "get_tmp_dir", 
return_value=tmp_path),
+        mock.patch.object(revision.util, "chmod_directories"),
+        mock.patch.object(revision.util, "chmod_files"),
+        mock.patch.object(revision.util, "paths_to_inodes", return_value={}),
+        mock.patch.object(revision.attestable, "load", 
new_callable=mock.AsyncMock, return_value=None),
+    ]
+
+    with contextlib.ExitStack() as stack:
+        _apply_patches(stack, patches)
+        result = await participant.create_revision_with_quarantine("proj", 
"1.0", "test")
+
+    assert result is fake_revision
+
+
[email protected]
+async def 
test_quarantine_branch_returns_quarantined_when_archives_detected(tmp_path: 
pathlib.Path):
+    release = mock.MagicMock()
+    release.phase = sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT
+    release.project = mock.MagicMock()
+    release.name = sql.release_name("proj", "1.0")
+
+    mock_session = _mock_db_session(release)
+    participant = _make_participant()
+    safe_data = MockQuarantineData(latest_revision_name=None)
+
+    quarantine_dir = tmp_path / "quarantine" / "proj" / "1.0" / "testtoken"
+
+    patches = [
+        mock.patch.object(revision.aiofiles.os, "makedirs", 
new_callable=mock.AsyncMock),
+        mock.patch.object(revision.aiofiles.os, "rename", 
new_callable=mock.AsyncMock),
+        mock.patch.object(
+            revision.attestable,
+            "paths_to_hashes_and_sizes",
+            new_callable=mock.AsyncMock,
+            return_value=({"dist/apache-test-1.0.tar.gz": "hash1"}, 
{"dist/apache-test-1.0.tar.gz": 1000}),
+        ),
+        mock.patch.object(revision.db, "session", return_value=mock_session),
+        mock.patch.object(revision.detection, "validate_directory", 
return_value=[]),
+        mock.patch.object(
+            revision.detection,
+            "detect_archives_requiring_quarantine",
+            return_value=["dist/apache-test-1.0.tar.gz"],
+        ),
+        mock.patch.object(
+            revision.detection,
+            "deduplicate_quarantine_archives",
+            return_value=[("dist/apache-test-1.0.tar.gz", "hash1")],
+        ),
+        mock.patch.object(revision.interaction, "latest_revision", 
new_callable=mock.AsyncMock, return_value=None),
+        mock.patch.object(revision.sql, "Quarantined", 
side_effect=FakeQuarantined),
+        mock.patch.object(revision.sql, "Task", side_effect=lambda **kwargs: 
mock.MagicMock(**kwargs)),
+        mock.patch.object(revision, "SafeSession", 
return_value=MockQuarantineSession(safe_data)),
+        mock.patch.object(revision, "_generate_quarantine_token", 
return_value="aaaaaaaaaaaaaaaa"),
+        mock.patch.object(revision.paths, "quarantine_directory", 
return_value=quarantine_dir),
+        mock.patch.object(revision.paths, "get_tmp_dir", 
return_value=tmp_path),
+        mock.patch.object(revision.util, "chmod_directories"),
+        mock.patch.object(revision.util, "chmod_files"),
+        mock.patch.object(revision.util, "paths_to_inodes", return_value={}),
+        mock.patch.object(revision.attestable, "load", 
new_callable=mock.AsyncMock, return_value=None),
+    ]
+
+    with contextlib.ExitStack() as stack:
+        _apply_patches(stack, patches)
+        result = await participant.create_revision_with_quarantine("proj", 
"1.0", "test")
+
+    assert isinstance(result, FakeQuarantined)
+    assert result.status == sql.QuarantineStatus.PENDING
+    assert result.prior_revision_name is None
+    assert result.file_metadata is not None
+    assert len(result.file_metadata) == 1
+    assert result.file_metadata[0].rel_path == "dist/apache-test-1.0.tar.gz"
+    assert result.file_metadata[0].content_hash == "hash1"
+    assert result.file_metadata[0].size_bytes == 1000
+
+    assert safe_data.commit_snapshots == [
+        (sql.QuarantineStatus.STAGING, 0),
+        (sql.QuarantineStatus.PENDING, 1),
+    ]
+
+
[email protected]
+async def test_quarantine_dedup_applied_to_task_args(tmp_path: pathlib.Path):
+    release = mock.MagicMock()
+    release.phase = sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT
+    release.project = mock.MagicMock()
+    release.name = sql.release_name("proj", "1.0")
+
+    mock_session = _mock_db_session(release)
+    participant = _make_participant()
+    safe_data = MockQuarantineData(latest_revision_name=None)
+
+    quarantine_dir = tmp_path / "quarantine" / "proj" / "1.0" / "testtoken"
+
+    patches = [
+        mock.patch.object(revision.aiofiles.os, "makedirs", 
new_callable=mock.AsyncMock),
+        mock.patch.object(revision.aiofiles.os, "rename", 
new_callable=mock.AsyncMock),
+        mock.patch.object(
+            revision.attestable,
+            "paths_to_hashes_and_sizes",
+            new_callable=mock.AsyncMock,
+            return_value=(
+                {"a/test.tar.gz": "h1", "b/test.tar.gz": "h1", "c/other.zip": 
"h2"},
+                {"a/test.tar.gz": 100, "b/test.tar.gz": 100, "c/other.zip": 
200},
+            ),
+        ),
+        mock.patch.object(revision.db, "session", return_value=mock_session),
+        mock.patch.object(revision.detection, "validate_directory", 
return_value=[]),
+        mock.patch.object(
+            revision.detection,
+            "detect_archives_requiring_quarantine",
+            return_value=["a/test.tar.gz", "b/test.tar.gz", "c/other.zip"],
+        ),
+        mock.patch.object(
+            revision.detection,
+            "deduplicate_quarantine_archives",
+            return_value=[("a/test.tar.gz", "h1"), ("c/other.zip", "h2")],
+        ),
+        mock.patch.object(revision.interaction, "latest_revision", 
new_callable=mock.AsyncMock, return_value=None),
+        mock.patch.object(revision.sql, "Quarantined", 
side_effect=FakeQuarantined),
+        mock.patch.object(revision.sql, "Task", side_effect=lambda **kwargs: 
mock.MagicMock(**kwargs)),
+        mock.patch.object(revision, "SafeSession", 
return_value=MockQuarantineSession(safe_data)),
+        mock.patch.object(revision, "_generate_quarantine_token", 
return_value="cccccccccccccccc"),
+        mock.patch.object(revision.paths, "quarantine_directory", 
return_value=quarantine_dir),
+        mock.patch.object(revision.paths, "get_tmp_dir", 
return_value=tmp_path),
+        mock.patch.object(revision.util, "chmod_directories"),
+        mock.patch.object(revision.util, "chmod_files"),
+        mock.patch.object(revision.util, "paths_to_inodes", return_value={}),
+        mock.patch.object(revision.attestable, "load", 
new_callable=mock.AsyncMock, return_value=None),
+    ]
+
+    with contextlib.ExitStack() as stack:
+        _apply_patches(stack, patches)
+        result = await participant.create_revision_with_quarantine("proj", 
"1.0", "test")
+
+    assert isinstance(result, FakeQuarantined)
+
+    assert safe_data.commit_snapshots == [
+        (sql.QuarantineStatus.STAGING, 0),
+        (sql.QuarantineStatus.PENDING, 1),
+    ]
+
+    task_objects = [obj for obj in safe_data._added_objects if not 
isinstance(obj, FakeQuarantined)]
+    assert len(task_objects) == 1
+    task = task_objects[0]
+    archives = task.task_args["archives"]
+    assert len(archives) == 2
+    assert archives[0] == {"rel_path": "a/test.tar.gz", "content_hash": "h1"}
+    assert archives[1] == {"rel_path": "c/other.zip", "content_hash": "h2"}
+
+
[email protected]
+async def test_quarantine_stores_prior_revision_name_from_lock(tmp_path: 
pathlib.Path):
+    release = mock.MagicMock()
+    release.phase = sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT
+    release.project = mock.MagicMock()
+    release.name = sql.release_name("proj", "1.0")
+
+    old_revision = mock.MagicMock()
+    old_revision.name = f"{release.name} 00003"
+    old_revision.number = "00003"
+
+    mock_session = _mock_db_session(release)
+    participant = _make_participant()
+    safe_data = MockQuarantineData(latest_revision_name=old_revision.name)
+
+    quarantine_dir = tmp_path / "quarantine" / "proj" / "1.0" / "testtoken"
+
+    patches = [
+        mock.patch.object(revision.aiofiles.os, "makedirs", 
new_callable=mock.AsyncMock),
+        mock.patch.object(revision.aiofiles.os, "rename", 
new_callable=mock.AsyncMock),
+        mock.patch.object(
+            revision.attestable,
+            "paths_to_hashes_and_sizes",
+            new_callable=mock.AsyncMock,
+            return_value=({"dist/archive.tar.gz": "newhash"}, 
{"dist/archive.tar.gz": 500}),
+        ),
+        mock.patch.object(revision.db, "session", return_value=mock_session),
+        mock.patch.object(revision.detection, "validate_directory", 
return_value=[]),
+        mock.patch.object(
+            revision.detection,
+            "detect_archives_requiring_quarantine",
+            return_value=["dist/archive.tar.gz"],
+        ),
+        mock.patch.object(
+            revision.detection,
+            "deduplicate_quarantine_archives",
+            return_value=[("dist/archive.tar.gz", "newhash")],
+        ),
+        mock.patch.object(
+            revision.interaction,
+            "latest_revision",
+            new_callable=mock.AsyncMock,
+            side_effect=[old_revision, old_revision],
+        ),
+        mock.patch.object(revision.sql, "Quarantined", 
side_effect=FakeQuarantined),
+        mock.patch.object(revision.sql, "Task", side_effect=lambda **kwargs: 
mock.MagicMock(**kwargs)),
+        mock.patch.object(revision, "SafeSession", 
return_value=MockQuarantineSession(safe_data)),
+        mock.patch.object(revision, "_generate_quarantine_token", 
return_value="bbbbbbbbbbbbbbbb"),
+        mock.patch.object(revision.paths, "quarantine_directory", 
return_value=quarantine_dir),
+        mock.patch.object(revision.paths, "get_tmp_dir", 
return_value=tmp_path),
+        mock.patch.object(revision.util, "chmod_directories"),
+        mock.patch.object(revision.util, "chmod_files"),
+        mock.patch.object(revision.util, "create_hard_link_clone", 
new_callable=mock.AsyncMock),
+        mock.patch.object(revision.util, "paths_to_inodes", return_value={}),
+        mock.patch.object(
+            revision.attestable, "load", new_callable=mock.AsyncMock, 
return_value=mock.MagicMock(paths={})
+        ),
+        mock.patch.object(revision.paths, "release_directory_base", 
return_value=tmp_path / "releases"),
+        mock.patch.object(revision.paths, "release_directory", 
return_value=tmp_path / "releases" / "00003"),
+    ]
+
+    with contextlib.ExitStack() as stack:
+        _apply_patches(stack, patches)
+        result = await participant.create_revision_with_quarantine("proj", 
"1.0", "test")
+
+    assert isinstance(result, FakeQuarantined)
+    assert result.prior_revision_name == f"{release.name} 00003"
+    assert result.status == sql.QuarantineStatus.PENDING
+
+    assert safe_data.commit_snapshots == [
+        (sql.QuarantineStatus.STAGING, 0),
+        (sql.QuarantineStatus.PENDING, 1),
+    ]
+
+
+def _apply_patches(stack: contextlib.ExitStack, patches: list[mock._patch]) -> 
None:
+    for p in patches:
+        stack.enter_context(p)
+
+
+def _make_participant() -> revision.CommitteeParticipant:
+    mock_write = mock.MagicMock()
+    mock_write.authorisation.asf_uid = "test"
+    return revision.CommitteeParticipant(mock_write, mock.MagicMock(), 
mock.MagicMock(), "test")
+
+
+def _mock_db_session(release: mock.MagicMock) -> mock.MagicMock:
+    mock_query = mock.MagicMock()
+    mock_query.demand = mock.AsyncMock(return_value=release)
+    mock_data = mock.AsyncMock()
+    mock_data.release = mock.MagicMock(return_value=mock_query)
+    mock_data.__aenter__ = mock.AsyncMock(return_value=mock_data)
+    mock_data.__aexit__ = mock.AsyncMock(return_value=False)
+    return mock_data
diff --git a/tests/unit/test_detection.py b/tests/unit/test_detection.py
index 289c1b8c..97bc40bb 100644
--- a/tests/unit/test_detection.py
+++ b/tests/unit/test_detection.py
@@ -150,6 +150,44 @@ def 
test_check_archive_safety_rejects_symlink_target_outside_root(tmp_path):
     assert any("escapes root" in error for error in errors)
 
 
+def 
test_deduplicate_quarantine_archives_different_extensions_kept_separately():
+    paths_list = ["a/src.tar.gz", "a/src.zip"]
+    path_to_hash = {"a/src.tar.gz": "h1", "a/src.zip": "h1"}
+
+    result = detection.deduplicate_quarantine_archives(paths_list, 
path_to_hash)
+
+    assert len(result) == 2
+
+
+def test_deduplicate_quarantine_archives_empty_input():
+    result = detection.deduplicate_quarantine_archives([], {})
+
+    assert result == []
+
+
+def 
test_deduplicate_quarantine_archives_keeps_smallest_rel_path_per_hash_suffix():
+    paths_list = ["b/archive.tar.gz", "a/archive.tar.gz", "c/other.zip"]
+    path_to_hash = {
+        "a/archive.tar.gz": "h1",
+        "b/archive.tar.gz": "h1",
+        "c/other.zip": "h2",
+    }
+
+    result = detection.deduplicate_quarantine_archives(paths_list, 
path_to_hash)
+
+    assert result == [("a/archive.tar.gz", "h1"), ("c/other.zip", "h2")]
+
+
+def test_deduplicate_quarantine_archives_tgz_normalises_to_tar_gz():
+    paths_list = ["a/src.tgz", "b/src.tar.gz"]
+    path_to_hash = {"a/src.tgz": "h1", "b/src.tar.gz": "h1"}
+
+    result = detection.deduplicate_quarantine_archives(paths_list, 
path_to_hash)
+
+    assert len(result) == 1
+    assert result[0][1] == "h1"
+
+
 def 
test_detect_archives_requiring_quarantine_known_hash_and_different_extension():
     previous = models.AttestableV1(
         paths={"dist/apache-widget-1.0-src.tgz": "h1"},


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to