This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/sbp by this push:
new 7340df57 Add a method to perform quarantine checks when creating a
revision
7340df57 is described below
commit 7340df576da4415fd1468c953e912f0e7fd14efd
Author: Sean B. Palmer <[email protected]>
AuthorDate: Mon Mar 2 19:43:58 2026 +0000
Add a method to perform quarantine checks when creating a revision
---
atr/detection.py | 14 +
atr/models/sql.py | 1 +
atr/storage/writers/revision.py | 234 +++++++++++++-
migrations/versions/0054_2026.03.02_3799e8e6.py | 38 +++
tests/unit/test_create_revision_quarantine.py | 385 ++++++++++++++++++++++++
tests/unit/test_detection.py | 38 +++
6 files changed, 708 insertions(+), 2 deletions(-)
diff --git a/atr/detection.py b/atr/detection.py
index 07ecf22d..bedf8cee 100644
--- a/atr/detection.py
+++ b/atr/detection.py
@@ -83,6 +83,20 @@ def check_archive_safety(archive_path: str) -> list[str]:
return errors
+def deduplicate_quarantine_archives(archive_paths: list[str], path_to_hash:
dict[str, str]) -> list[tuple[str, str]]:
+ seen: dict[tuple[str, str], str] = {}
+ for rel_path in archive_paths:
+ content_hash = path_to_hash[rel_path]
+ basename = _path_basename(rel_path)
+ suffix = _quarantine_archive_suffix(basename)
+ if suffix is None:
+ continue
+ key = (content_hash, suffix)
+ if (key not in seen) or (rel_path < seen[key]):
+ seen[key] = rel_path
+ return [(rel_path, path_to_hash[rel_path]) for rel_path in
sorted(seen.values())]
+
+
def detect_archives_requiring_quarantine(
path_to_hash: dict[str, str], previous_attestable: models.AttestableV1 |
None
) -> list[str]:
diff --git a/atr/models/sql.py b/atr/models/sql.py
index 86a5d39b..cbb875b8 100644
--- a/atr/models/sql.py
+++ b/atr/models/sql.py
@@ -165,6 +165,7 @@ class ProjectStatus(enum.StrEnum):
class QuarantineStatus(enum.Enum):
+ STAGING = "STAGING"
PENDING = "PENDING"
FAILED = "FAILED"
diff --git a/atr/storage/writers/revision.py b/atr/storage/writers/revision.py
index a2fbf3ac..909a9bd2 100644
--- a/atr/storage/writers/revision.py
+++ b/atr/storage/writers/revision.py
@@ -25,7 +25,7 @@ import pathlib
import secrets
import tempfile
import uuid
-from typing import TYPE_CHECKING
+from typing import TYPE_CHECKING, Final
import aiofiles.os
import aioshutil
@@ -47,6 +47,9 @@ if TYPE_CHECKING:
import atr.models.attestable
+_QUARANTINE_TOKEN_ALPHABET: Final[str] = "qpzry9x8gf2tvdw0s3jn54khce6mua7b"
+_QUARANTINE_TOKEN_LENGTH: Final[int] = 24
+
class SafeSession:
def __init__(self, temp_dir: str):
@@ -199,7 +202,7 @@ async def _commit_new_revision(
# Commit to end the transaction started by data.begin_immediate
# We must commit the revision before starting the checks
- # This also releases the write lock
+ # This also releases the write lock obtained in _lock_and_merge
await data.commit()
async with data.begin():
@@ -214,6 +217,16 @@ async def _commit_new_revision(
return new_revision
+def _generate_quarantine_token() -> str:
+ value = int.from_bytes(secrets.token_bytes(15), "big")
+ alphabet = _QUARANTINE_TOKEN_ALPHABET
+ chars: list[str] = []
+ for _ in range(_QUARANTINE_TOKEN_LENGTH):
+ chars.append(alphabet[value & 0x1F])
+ value >>= 5
+ return "".join(chars)
+
+
async def _lock_and_merge(
data: db.Session,
*,
@@ -424,6 +437,223 @@ class CommitteeParticipant(FoundationCommitter):
version_name=version_name,
)
+ async def create_revision_with_quarantine( # noqa: C901
+ self,
+ project_name: str,
+ version_name: str,
+ asf_uid: str,
+ description: str | None = None,
+ set_local_cache: bool = False,
+ reset_to_global_cache: bool = False,
+ modify: Callable[[pathlib.Path, sql.Revision | None], Awaitable[None]]
| None = None,
+ clone_from: str | None = None,
+ ) -> sql.Revision | sql.Quarantined:
+ """Create a new revision, quarantining archives that require
validation."""
+ release_name = sql.release_name(project_name, version_name)
+ async with db.session() as data:
+ release = await data.release(name=release_name,
_release_policy=True, _project_release_policy=True).demand(
+ RuntimeError("Release does not exist for new revision
creation")
+ )
+ if clone_from is not None:
+ old_revision = await data.revision(release_name=release_name,
number=clone_from).demand(
+ RuntimeError(f"Revision {clone_from} does not exist")
+ )
+ else:
+ old_revision = await interaction.latest_revision(release)
+ if set_local_cache:
+ release.check_cache_key = str(uuid.uuid4())
+ if reset_to_global_cache:
+ release.check_cache_key = None
+
+ if clone_from is not None:
+ old_release_dir = paths.release_directory_base(release) /
clone_from
+ else:
+ old_release_dir = paths.release_directory(release)
+ merge_enabled = clone_from is None
+
+ # Create a temporary directory
+ # We ensure, below, that it's removed on any exception
+ # Use the tmp subdirectory of state, to ensure that it is on the same
filesystem
+ prefix_token = secrets.token_hex(16)
+ temp_dir: str = await asyncio.to_thread(tempfile.mkdtemp,
prefix=prefix_token + "-", dir=paths.get_tmp_dir())
+ temp_dir_path = pathlib.Path(temp_dir)
+
+ try:
+ # The directory was created by mkdtemp, but it's empty
+ if old_revision is not None:
+ # If this is not the first revision, hard link the previous
revision
+ await util.create_hard_link_clone(old_release_dir,
temp_dir_path, do_not_create_dest_dir=True)
+ # The directory is either empty or its files are hard linked to
the previous revision
+ if modify is not None:
+ await modify(temp_dir_path, old_revision)
+ except types.FailedError:
+ await aioshutil.rmtree(temp_dir)
+ raise
+ except Exception:
+ await aioshutil.rmtree(temp_dir)
+ raise
+
+ validation_errors = await
asyncio.to_thread(detection.validate_directory, temp_dir_path)
+ if validation_errors:
+ await aioshutil.rmtree(temp_dir)
+ raise types.FailedError("File validation failed:\n" +
"\n".join(validation_errors))
+
+ # Ensure that the permissions of every directory are 755
+ try:
+ await asyncio.to_thread(util.chmod_directories, temp_dir_path)
+ except Exception:
+ await aioshutil.rmtree(temp_dir)
+ raise
+
+ # Make files read only to prevent them from being modified through
hard links
+ try:
+ await asyncio.to_thread(util.chmod_files, temp_dir_path, 0o444)
+ except Exception:
+ await aioshutil.rmtree(temp_dir)
+ raise
+
+ try:
+ path_to_hash, path_to_size = await
attestable.paths_to_hashes_and_sizes(temp_dir_path)
+ parent_revision_number = old_revision.number if old_revision else
None
+ previous_attestable = None
+ if parent_revision_number is not None:
+ previous_attestable = await attestable.load(project_name,
version_name, parent_revision_number)
+ base_inodes: dict[str, int] = {}
+ base_hashes: dict[str, str] = {}
+ if merge_enabled and (old_revision is not None):
+ base_dir = old_release_dir
+ base_inodes = await asyncio.to_thread(util.paths_to_inodes,
base_dir)
+ base_hashes = dict(previous_attestable.paths) if
(previous_attestable is not None) else {}
+ n_inodes = await asyncio.to_thread(util.paths_to_inodes,
temp_dir_path)
+ except Exception:
+ await aioshutil.rmtree(temp_dir)
+ raise
+
+ async with SafeSession(temp_dir) as data:
+ try:
+ previous_attestable, prior_revision_name, merged_release =
await _lock_and_merge(
+ data,
+ base_hashes=base_hashes,
+ base_inodes=base_inodes,
+ merge_enabled=merge_enabled,
+ n_inodes=n_inodes,
+ old_revision=old_revision,
+ path_to_hash=path_to_hash,
+ path_to_size=path_to_size,
+ previous_attestable=previous_attestable,
+ project_name=project_name,
+ release=release,
+ _release_name=release_name,
+ temp_dir_path=temp_dir_path,
+ version_name=version_name,
+ )
+ except Exception:
+ await aioshutil.rmtree(temp_dir)
+ raise
+
+ archive_paths =
detection.detect_archives_requiring_quarantine(path_to_hash,
previous_attestable)
+ if archive_paths:
+ deduped =
detection.deduplicate_quarantine_archives(archive_paths, path_to_hash)
+ if deduped:
+ return await self._quarantine_archives(
+ data,
+ asf_uid=asf_uid,
+ deduped_archives=deduped,
+ description=description,
+ path_to_size=path_to_size,
+ prior_revision_name=prior_revision_name,
+ project_name=project_name,
+ release_name=release_name,
+ temp_dir=temp_dir,
+ version_name=version_name,
+ )
+
+ return await _commit_new_revision(
+ data,
+ asf_uid=asf_uid,
+ description=description,
+ path_to_hash=path_to_hash,
+ path_to_size=path_to_size,
+ previous_attestable=previous_attestable,
+ project_name=project_name,
+ release=merged_release,
+ release_name=release_name,
+ temp_dir=temp_dir,
+ version_name=version_name,
+ )
+
+ async def _quarantine_archives(
+ self,
+ data: db.Session,
+ *,
+ asf_uid: str,
+ deduped_archives: list[tuple[str, str]],
+ description: str | None,
+ path_to_size: dict[str, int],
+ prior_revision_name: str | None,
+ project_name: str,
+ release_name: str,
+ temp_dir: str,
+ version_name: str,
+ ) -> sql.Quarantined:
+ file_metadata = [
+ sql.QuarantineFileEntryV1(
+ rel_path=rel_path,
+ size_bytes=path_to_size[rel_path],
+ content_hash=content_hash,
+ errors=[],
+ )
+ for rel_path, content_hash in deduped_archives
+ ]
+
+ token = _generate_quarantine_token()
+ quarantined = sql.Quarantined(
+ release_name=release_name,
+ asf_uid=asf_uid,
+ prior_revision_name=prior_revision_name,
+ status=sql.QuarantineStatus.STAGING,
+ token=token,
+ created=datetime.datetime.now(datetime.UTC),
+ file_metadata=file_metadata,
+ description=description,
+ )
+
+ try:
+ data.add(quarantined)
+ await data.flush()
+
+ quarantine_dir = paths.quarantine_directory(quarantined)
+ await aiofiles.os.makedirs(quarantine_dir.parent, exist_ok=True)
+ await aiofiles.os.rename(temp_dir, quarantine_dir)
+ except Exception:
+ await aioshutil.rmtree(temp_dir)
+ raise
+
+ # Release the write lock obtained in _lock_and_merge
+ await data.commit()
+
+ data.add(
+ sql.Task(
+ status=sql.TaskStatus.QUEUED,
+ task_type=sql.TaskType.QUARANTINE_VALIDATE,
+ task_args={
+ "quarantined_id": quarantined.id,
+ "archives": [
+ {"rel_path": entry.rel_path, "content_hash":
entry.content_hash} for entry in file_metadata
+ ],
+ },
+ asf_uid=asf_uid,
+ project_name=project_name,
+ version_name=version_name,
+ revision_number=None,
+ primary_rel_path=None,
+ )
+ )
+ quarantined.status = sql.QuarantineStatus.PENDING
+ await data.commit()
+
+ return quarantined
+
class CommitteeMember(CommitteeParticipant):
def __init__(
diff --git a/migrations/versions/0054_2026.03.02_3799e8e6.py
b/migrations/versions/0054_2026.03.02_3799e8e6.py
new file mode 100644
index 00000000..81bff007
--- /dev/null
+++ b/migrations/versions/0054_2026.03.02_3799e8e6.py
@@ -0,0 +1,38 @@
+"""Add a status for quarantine staging
+
+Revision ID: 0054_2026.03.02_3799e8e6
+Revises: 0053_2026.02.23_5e288b2d
+Create Date: 2026-03-02 19:40:37.748553+00:00
+"""
+
+from collections.abc import Sequence
+
+import sqlalchemy as sa
+from alembic import op
+
+# Revision identifiers, used by Alembic
+revision: str = "0054_2026.03.02_3799e8e6"
+down_revision: str | None = "0053_2026.02.23_5e288b2d"
+branch_labels: str | Sequence[str] | None = None
+depends_on: str | Sequence[str] | None = None
+
+
+def downgrade() -> None:
+ op.execute("UPDATE quarantined SET status='FAILED' WHERE status='STAGING'")
+ with op.batch_alter_table("quarantined", schema=None) as batch_op:
+ batch_op.alter_column(
+ "status",
+ existing_type=sa.Enum("STAGING", "PENDING", "FAILED",
name="quarantinestatus"),
+ type_=sa.Enum("PENDING", "FAILED", name="quarantinestatus"),
+ existing_nullable=False,
+ )
+
+
+def upgrade() -> None:
+ with op.batch_alter_table("quarantined", schema=None) as batch_op:
+ batch_op.alter_column(
+ "status",
+ existing_type=sa.Enum("PENDING", "FAILED",
name="quarantinestatus"),
+ type_=sa.Enum("STAGING", "PENDING", "FAILED",
name="quarantinestatus"),
+ existing_nullable=False,
+ )
diff --git a/tests/unit/test_create_revision_quarantine.py
b/tests/unit/test_create_revision_quarantine.py
new file mode 100644
index 00000000..33342922
--- /dev/null
+++ b/tests/unit/test_create_revision_quarantine.py
@@ -0,0 +1,385 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import contextlib
+import pathlib
+import unittest.mock as mock
+from typing import Final
+
+import pytest
+
+import atr.models.sql as sql
+import atr.storage.writers.revision as revision
+
+_QUARANTINE_TOKEN_ALPHABET: Final[str] = "qpzry9x8gf2tvdw0s3jn54khce6mua7b"
+
+
+class AsyncContextManager:
+ async def __aenter__(self):
+ return None
+
+ async def __aexit__(self, _exc_type, _exc, _tb):
+ return False
+
+
+class FakeQuarantined:
+ def __init__(self, **kwargs):
+ self.id = 42
+ self.release_name = kwargs.get("release_name", "")
+ self.asf_uid = kwargs.get("asf_uid", "")
+ self.prior_revision_name = kwargs.get("prior_revision_name")
+ self.status = kwargs.get("status", sql.QuarantineStatus.STAGING)
+ self.token = kwargs.get("token", "")
+ self.created = kwargs.get("created")
+ self.file_metadata = kwargs.get("file_metadata")
+ self.description = kwargs.get("description")
+ self.release = mock.MagicMock()
+
+
+class MockQuarantineData:
+ def __init__(self, latest_revision_name: str | None):
+ self._added_objects: list[object] = []
+ self._quarantined: FakeQuarantined | None = None
+ self._latest_revision_name = latest_revision_name
+ self._flush_count = 0
+ self.add = mock.MagicMock(side_effect=self._add)
+ self.begin = mock.MagicMock(return_value=AsyncContextManager())
+ self.begin_immediate = mock.AsyncMock()
+ self.commit = mock.AsyncMock(side_effect=self._commit)
+ self.commit_snapshots: list[tuple[sql.QuarantineStatus | None, int]] =
[]
+ self.flush = mock.AsyncMock(side_effect=self._flush)
+ self.merge = mock.AsyncMock(side_effect=self._merge)
+ self.refresh = mock.AsyncMock()
+
+ def _add(self, obj: object) -> None:
+ self._added_objects.append(obj)
+ if isinstance(obj, FakeQuarantined):
+ self._quarantined = obj
+
+ async def _commit(self) -> None:
+ task_count = sum(1 for obj in self._added_objects if not
isinstance(obj, FakeQuarantined))
+ status = self._quarantined.status if (self._quarantined is not None)
else None
+ self.commit_snapshots.append((status, task_count))
+
+ async def _flush(self) -> None:
+ self._flush_count += 1
+ if (self._quarantined is not None) and (self._quarantined.id is None):
+ self._quarantined.id = 42
+
+ async def _merge(self, obj: object) -> object:
+ return obj
+
+
+class MockQuarantineSession:
+ def __init__(self, data: MockQuarantineData):
+ self._data = data
+
+ async def __aenter__(self) -> MockQuarantineData:
+ return self._data
+
+ async def __aexit__(self, _exc_type, _exc, _tb):
+ return False
+
+
+def test_generate_quarantine_token_length_and_alphabet():
+ token = revision._generate_quarantine_token()
+
+ assert len(token) == 24
+ assert all(c in _QUARANTINE_TOKEN_ALPHABET for c in token)
+
+
+def test_generate_quarantine_token_uniqueness():
+ tokens = {revision._generate_quarantine_token() for _ in range(100)}
+
+ assert len(tokens) == 100
+
+
[email protected]
+async def test_no_quarantine_returns_revision_when_no_archives(tmp_path:
pathlib.Path):
+ release = mock.MagicMock()
+ release.phase = sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT
+ release.project = mock.MagicMock()
+ release.project.release_policy = None
+ release.release_policy = None
+ release.name = sql.release_name("proj", "1.0")
+
+ mock_session = _mock_db_session(release)
+ participant = _make_participant()
+
+ fake_revision = mock.MagicMock(spec=sql.Revision)
+
+ patches = [
+ mock.patch.object(revision.aiofiles.os, "makedirs",
new_callable=mock.AsyncMock),
+ mock.patch.object(revision.aiofiles.os, "rename",
new_callable=mock.AsyncMock),
+ mock.patch.object(
+ revision.attestable,
+ "paths_to_hashes_and_sizes",
+ new_callable=mock.AsyncMock,
+ return_value=({"README.md": "hash1"}, {"README.md": 100}),
+ ),
+ mock.patch.object(revision.attestable, "write_files_data",
new_callable=mock.AsyncMock),
+ mock.patch.object(revision.db, "session", return_value=mock_session),
+ mock.patch.object(revision.detection, "validate_directory",
return_value=[]),
+ mock.patch.object(revision.detection,
"detect_archives_requiring_quarantine", return_value=[]),
+ mock.patch.object(revision.interaction, "latest_revision",
new_callable=mock.AsyncMock, return_value=None),
+ mock.patch.object(revision, "_commit_new_revision",
new_callable=mock.AsyncMock, return_value=fake_revision),
+ mock.patch.object(revision, "_lock_and_merge",
new_callable=mock.AsyncMock, return_value=(None, None, release)),
+ mock.patch.object(revision, "SafeSession",
return_value=MockQuarantineSession(MockQuarantineData(None))),
+ mock.patch.object(revision.paths, "get_tmp_dir",
return_value=tmp_path),
+ mock.patch.object(revision.util, "chmod_directories"),
+ mock.patch.object(revision.util, "chmod_files"),
+ mock.patch.object(revision.util, "paths_to_inodes", return_value={}),
+ mock.patch.object(revision.attestable, "load",
new_callable=mock.AsyncMock, return_value=None),
+ ]
+
+ with contextlib.ExitStack() as stack:
+ _apply_patches(stack, patches)
+ result = await participant.create_revision_with_quarantine("proj",
"1.0", "test")
+
+ assert result is fake_revision
+
+
[email protected]
+async def
test_quarantine_branch_returns_quarantined_when_archives_detected(tmp_path:
pathlib.Path):
+ release = mock.MagicMock()
+ release.phase = sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT
+ release.project = mock.MagicMock()
+ release.name = sql.release_name("proj", "1.0")
+
+ mock_session = _mock_db_session(release)
+ participant = _make_participant()
+ safe_data = MockQuarantineData(latest_revision_name=None)
+
+ quarantine_dir = tmp_path / "quarantine" / "proj" / "1.0" / "testtoken"
+
+ patches = [
+ mock.patch.object(revision.aiofiles.os, "makedirs",
new_callable=mock.AsyncMock),
+ mock.patch.object(revision.aiofiles.os, "rename",
new_callable=mock.AsyncMock),
+ mock.patch.object(
+ revision.attestable,
+ "paths_to_hashes_and_sizes",
+ new_callable=mock.AsyncMock,
+ return_value=({"dist/apache-test-1.0.tar.gz": "hash1"},
{"dist/apache-test-1.0.tar.gz": 1000}),
+ ),
+ mock.patch.object(revision.db, "session", return_value=mock_session),
+ mock.patch.object(revision.detection, "validate_directory",
return_value=[]),
+ mock.patch.object(
+ revision.detection,
+ "detect_archives_requiring_quarantine",
+ return_value=["dist/apache-test-1.0.tar.gz"],
+ ),
+ mock.patch.object(
+ revision.detection,
+ "deduplicate_quarantine_archives",
+ return_value=[("dist/apache-test-1.0.tar.gz", "hash1")],
+ ),
+ mock.patch.object(revision.interaction, "latest_revision",
new_callable=mock.AsyncMock, return_value=None),
+ mock.patch.object(revision.sql, "Quarantined",
side_effect=FakeQuarantined),
+ mock.patch.object(revision.sql, "Task", side_effect=lambda **kwargs:
mock.MagicMock(**kwargs)),
+ mock.patch.object(revision, "SafeSession",
return_value=MockQuarantineSession(safe_data)),
+ mock.patch.object(revision, "_generate_quarantine_token",
return_value="aaaaaaaaaaaaaaaa"),
+ mock.patch.object(revision.paths, "quarantine_directory",
return_value=quarantine_dir),
+ mock.patch.object(revision.paths, "get_tmp_dir",
return_value=tmp_path),
+ mock.patch.object(revision.util, "chmod_directories"),
+ mock.patch.object(revision.util, "chmod_files"),
+ mock.patch.object(revision.util, "paths_to_inodes", return_value={}),
+ mock.patch.object(revision.attestable, "load",
new_callable=mock.AsyncMock, return_value=None),
+ ]
+
+ with contextlib.ExitStack() as stack:
+ _apply_patches(stack, patches)
+ result = await participant.create_revision_with_quarantine("proj",
"1.0", "test")
+
+ assert isinstance(result, FakeQuarantined)
+ assert result.status == sql.QuarantineStatus.PENDING
+ assert result.prior_revision_name is None
+ assert result.file_metadata is not None
+ assert len(result.file_metadata) == 1
+ assert result.file_metadata[0].rel_path == "dist/apache-test-1.0.tar.gz"
+ assert result.file_metadata[0].content_hash == "hash1"
+ assert result.file_metadata[0].size_bytes == 1000
+
+ assert safe_data.commit_snapshots == [
+ (sql.QuarantineStatus.STAGING, 0),
+ (sql.QuarantineStatus.PENDING, 1),
+ ]
+
+
[email protected]
+async def test_quarantine_dedup_applied_to_task_args(tmp_path: pathlib.Path):
+ release = mock.MagicMock()
+ release.phase = sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT
+ release.project = mock.MagicMock()
+ release.name = sql.release_name("proj", "1.0")
+
+ mock_session = _mock_db_session(release)
+ participant = _make_participant()
+ safe_data = MockQuarantineData(latest_revision_name=None)
+
+ quarantine_dir = tmp_path / "quarantine" / "proj" / "1.0" / "testtoken"
+
+ patches = [
+ mock.patch.object(revision.aiofiles.os, "makedirs",
new_callable=mock.AsyncMock),
+ mock.patch.object(revision.aiofiles.os, "rename",
new_callable=mock.AsyncMock),
+ mock.patch.object(
+ revision.attestable,
+ "paths_to_hashes_and_sizes",
+ new_callable=mock.AsyncMock,
+ return_value=(
+ {"a/test.tar.gz": "h1", "b/test.tar.gz": "h1", "c/other.zip":
"h2"},
+ {"a/test.tar.gz": 100, "b/test.tar.gz": 100, "c/other.zip":
200},
+ ),
+ ),
+ mock.patch.object(revision.db, "session", return_value=mock_session),
+ mock.patch.object(revision.detection, "validate_directory",
return_value=[]),
+ mock.patch.object(
+ revision.detection,
+ "detect_archives_requiring_quarantine",
+ return_value=["a/test.tar.gz", "b/test.tar.gz", "c/other.zip"],
+ ),
+ mock.patch.object(
+ revision.detection,
+ "deduplicate_quarantine_archives",
+ return_value=[("a/test.tar.gz", "h1"), ("c/other.zip", "h2")],
+ ),
+ mock.patch.object(revision.interaction, "latest_revision",
new_callable=mock.AsyncMock, return_value=None),
+ mock.patch.object(revision.sql, "Quarantined",
side_effect=FakeQuarantined),
+ mock.patch.object(revision.sql, "Task", side_effect=lambda **kwargs:
mock.MagicMock(**kwargs)),
+ mock.patch.object(revision, "SafeSession",
return_value=MockQuarantineSession(safe_data)),
+ mock.patch.object(revision, "_generate_quarantine_token",
return_value="cccccccccccccccc"),
+ mock.patch.object(revision.paths, "quarantine_directory",
return_value=quarantine_dir),
+ mock.patch.object(revision.paths, "get_tmp_dir",
return_value=tmp_path),
+ mock.patch.object(revision.util, "chmod_directories"),
+ mock.patch.object(revision.util, "chmod_files"),
+ mock.patch.object(revision.util, "paths_to_inodes", return_value={}),
+ mock.patch.object(revision.attestable, "load",
new_callable=mock.AsyncMock, return_value=None),
+ ]
+
+ with contextlib.ExitStack() as stack:
+ _apply_patches(stack, patches)
+ result = await participant.create_revision_with_quarantine("proj",
"1.0", "test")
+
+ assert isinstance(result, FakeQuarantined)
+
+ assert safe_data.commit_snapshots == [
+ (sql.QuarantineStatus.STAGING, 0),
+ (sql.QuarantineStatus.PENDING, 1),
+ ]
+
+ task_objects = [obj for obj in safe_data._added_objects if not
isinstance(obj, FakeQuarantined)]
+ assert len(task_objects) == 1
+ task = task_objects[0]
+ archives = task.task_args["archives"]
+ assert len(archives) == 2
+ assert archives[0] == {"rel_path": "a/test.tar.gz", "content_hash": "h1"}
+ assert archives[1] == {"rel_path": "c/other.zip", "content_hash": "h2"}
+
+
[email protected]
+async def test_quarantine_stores_prior_revision_name_from_lock(tmp_path:
pathlib.Path):
+ release = mock.MagicMock()
+ release.phase = sql.ReleasePhase.RELEASE_CANDIDATE_DRAFT
+ release.project = mock.MagicMock()
+ release.name = sql.release_name("proj", "1.0")
+
+ old_revision = mock.MagicMock()
+ old_revision.name = f"{release.name} 00003"
+ old_revision.number = "00003"
+
+ mock_session = _mock_db_session(release)
+ participant = _make_participant()
+ safe_data = MockQuarantineData(latest_revision_name=old_revision.name)
+
+ quarantine_dir = tmp_path / "quarantine" / "proj" / "1.0" / "testtoken"
+
+ patches = [
+ mock.patch.object(revision.aiofiles.os, "makedirs",
new_callable=mock.AsyncMock),
+ mock.patch.object(revision.aiofiles.os, "rename",
new_callable=mock.AsyncMock),
+ mock.patch.object(
+ revision.attestable,
+ "paths_to_hashes_and_sizes",
+ new_callable=mock.AsyncMock,
+ return_value=({"dist/archive.tar.gz": "newhash"},
{"dist/archive.tar.gz": 500}),
+ ),
+ mock.patch.object(revision.db, "session", return_value=mock_session),
+ mock.patch.object(revision.detection, "validate_directory",
return_value=[]),
+ mock.patch.object(
+ revision.detection,
+ "detect_archives_requiring_quarantine",
+ return_value=["dist/archive.tar.gz"],
+ ),
+ mock.patch.object(
+ revision.detection,
+ "deduplicate_quarantine_archives",
+ return_value=[("dist/archive.tar.gz", "newhash")],
+ ),
+ mock.patch.object(
+ revision.interaction,
+ "latest_revision",
+ new_callable=mock.AsyncMock,
+ side_effect=[old_revision, old_revision],
+ ),
+ mock.patch.object(revision.sql, "Quarantined",
side_effect=FakeQuarantined),
+ mock.patch.object(revision.sql, "Task", side_effect=lambda **kwargs:
mock.MagicMock(**kwargs)),
+ mock.patch.object(revision, "SafeSession",
return_value=MockQuarantineSession(safe_data)),
+ mock.patch.object(revision, "_generate_quarantine_token",
return_value="bbbbbbbbbbbbbbbb"),
+ mock.patch.object(revision.paths, "quarantine_directory",
return_value=quarantine_dir),
+ mock.patch.object(revision.paths, "get_tmp_dir",
return_value=tmp_path),
+ mock.patch.object(revision.util, "chmod_directories"),
+ mock.patch.object(revision.util, "chmod_files"),
+ mock.patch.object(revision.util, "create_hard_link_clone",
new_callable=mock.AsyncMock),
+ mock.patch.object(revision.util, "paths_to_inodes", return_value={}),
+ mock.patch.object(
+ revision.attestable, "load", new_callable=mock.AsyncMock,
return_value=mock.MagicMock(paths={})
+ ),
+ mock.patch.object(revision.paths, "release_directory_base",
return_value=tmp_path / "releases"),
+ mock.patch.object(revision.paths, "release_directory",
return_value=tmp_path / "releases" / "00003"),
+ ]
+
+ with contextlib.ExitStack() as stack:
+ _apply_patches(stack, patches)
+ result = await participant.create_revision_with_quarantine("proj",
"1.0", "test")
+
+ assert isinstance(result, FakeQuarantined)
+ assert result.prior_revision_name == f"{release.name} 00003"
+ assert result.status == sql.QuarantineStatus.PENDING
+
+ assert safe_data.commit_snapshots == [
+ (sql.QuarantineStatus.STAGING, 0),
+ (sql.QuarantineStatus.PENDING, 1),
+ ]
+
+
+def _apply_patches(stack: contextlib.ExitStack, patches: list[mock._patch]) ->
None:
+ for p in patches:
+ stack.enter_context(p)
+
+
+def _make_participant() -> revision.CommitteeParticipant:
+ mock_write = mock.MagicMock()
+ mock_write.authorisation.asf_uid = "test"
+ return revision.CommitteeParticipant(mock_write, mock.MagicMock(),
mock.MagicMock(), "test")
+
+
+def _mock_db_session(release: mock.MagicMock) -> mock.MagicMock:
+ mock_query = mock.MagicMock()
+ mock_query.demand = mock.AsyncMock(return_value=release)
+ mock_data = mock.AsyncMock()
+ mock_data.release = mock.MagicMock(return_value=mock_query)
+ mock_data.__aenter__ = mock.AsyncMock(return_value=mock_data)
+ mock_data.__aexit__ = mock.AsyncMock(return_value=False)
+ return mock_data
diff --git a/tests/unit/test_detection.py b/tests/unit/test_detection.py
index 289c1b8c..97bc40bb 100644
--- a/tests/unit/test_detection.py
+++ b/tests/unit/test_detection.py
@@ -150,6 +150,44 @@ def
test_check_archive_safety_rejects_symlink_target_outside_root(tmp_path):
assert any("escapes root" in error for error in errors)
+def
test_deduplicate_quarantine_archives_different_extensions_kept_separately():
+ paths_list = ["a/src.tar.gz", "a/src.zip"]
+ path_to_hash = {"a/src.tar.gz": "h1", "a/src.zip": "h1"}
+
+ result = detection.deduplicate_quarantine_archives(paths_list,
path_to_hash)
+
+ assert len(result) == 2
+
+
+def test_deduplicate_quarantine_archives_empty_input():
+ result = detection.deduplicate_quarantine_archives([], {})
+
+ assert result == []
+
+
+def
test_deduplicate_quarantine_archives_keeps_smallest_rel_path_per_hash_suffix():
+ paths_list = ["b/archive.tar.gz", "a/archive.tar.gz", "c/other.zip"]
+ path_to_hash = {
+ "a/archive.tar.gz": "h1",
+ "b/archive.tar.gz": "h1",
+ "c/other.zip": "h2",
+ }
+
+ result = detection.deduplicate_quarantine_archives(paths_list,
path_to_hash)
+
+ assert result == [("a/archive.tar.gz", "h1"), ("c/other.zip", "h2")]
+
+
+def test_deduplicate_quarantine_archives_tgz_normalises_to_tar_gz():
+ paths_list = ["a/src.tgz", "b/src.tar.gz"]
+ path_to_hash = {"a/src.tgz": "h1", "b/src.tar.gz": "h1"}
+
+ result = detection.deduplicate_quarantine_archives(paths_list,
path_to_hash)
+
+ assert len(result) == 1
+ assert result[0][1] == "h1"
+
+
def
test_detect_archives_requiring_quarantine_known_hash_and_different_extension():
previous = models.AttestableV1(
paths={"dist/apache-widget-1.0-src.tgz": "h1"},
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]