This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git

commit 4e7d311192d2a568cc83393a1b4b6898e5b038b8
Author: Sean B. Palmer <[email protected]>
AuthorDate: Sun Mar 8 17:02:40 2026 +0000

    Always extract any archives uploaded before quarantine code was added
---
 atr/detection.py                       |  16 ++-
 atr/hashes.py                          |   9 ++
 atr/server.py                          |  12 +++
 atr/tasks/quarantine.py                | 168 ++++++++++++++++++++++++------
 tests/unit/test_detection.py           |  45 ++++++++
 tests/unit/test_quarantine_backfill.py | 181 +++++++++++++++++++++++++++++++++
 tests/unit/test_quarantine_task.py     |  19 ++--
 7 files changed, 407 insertions(+), 43 deletions(-)

diff --git a/atr/detection.py b/atr/detection.py
index 77ceba11..baf3a33d 100644
--- a/atr/detection.py
+++ b/atr/detection.py
@@ -25,6 +25,9 @@ import puremagic
 import atr.models.attestable as models
 import atr.tarzip as tarzip
 
+# TODO: Widen the range of types checked here
+QUARANTINE_ARCHIVE_SUFFIXES: Final[tuple[str, ...]] = (".tar.gz", ".tgz", 
".zip")
+
 _BZIP2_TYPES: Final[set[str]] = {"application/x-bzip2"}
 _DEB_TYPES: Final[set[str]] = {"application/vnd.debian.binary-package", 
"application/x-archive"}
 _EXE_TYPES: Final[set[str]] = 
{"application/vnd.microsoft.portable-executable", "application/octet-stream"}
@@ -60,8 +63,6 @@ _EXPECTED: Final[dict[str, set[str]]] = {
 }
 
 _COMPOUND_SUFFIXES: Final = tuple(s for s in _EXPECTED if s.count(".") > 1)
-# TODO: Widen the range of types checked here
-_QUARANTINE_ARCHIVE_SUFFIXES: Final[tuple[str, ...]] = (".tar.gz", ".tgz", 
".zip")
 _QUARANTINE_NORMALISED_SUFFIXES: Final[dict[str, str]] = {".tgz": ".tar.gz"}
 
 
@@ -72,6 +73,8 @@ def check_archive_safety(archive_path: str) -> list[str]:
             for member in archive:
                 if _archive_member_has_path_traversal(member.name):
                     errors.append(f"{member.name}: Archive member path 
traversal is not allowed")
+                elif _archive_member_has_disallowed_dotenv_path(member.name):
+                    errors.append(f"{member.name}: .env is only allowed at the 
archive root")
 
                 if (member.issym() or member.islnk()) and 
_archive_link_escapes_root(
                     member.name, member.linkname, is_hardlink=member.islnk()
@@ -161,6 +164,13 @@ def _archive_link_escapes_root(member_name: str, 
link_target: str | None, *, is_
     return False
 
 
+def _archive_member_has_disallowed_dotenv_path(path_key: str) -> bool:
+    parts = tuple(part for part in pathlib.PurePosixPath(path_key).parts if 
part not in ("", "."))
+    if not any(part.lower() == ".env" for part in parts):
+        return False
+    return parts != (".env",)
+
+
 def _archive_member_has_path_traversal(path_key: str) -> bool:
     if path_key.startswith("/"):
         return True
@@ -174,7 +184,7 @@ def _path_basename(path_key: str) -> str:
 
 def _quarantine_archive_suffix(filename: str) -> str | None:
     lower_name = filename.lower()
-    for suffix in _QUARANTINE_ARCHIVE_SUFFIXES:
+    for suffix in QUARANTINE_ARCHIVE_SUFFIXES:
         if lower_name.endswith(suffix):
             return _QUARANTINE_NORMALISED_SUFFIXES.get(suffix, suffix)
     return None
diff --git a/atr/hashes.py b/atr/hashes.py
index 274abf33..5b6cac71 100644
--- a/atr/hashes.py
+++ b/atr/hashes.py
@@ -42,6 +42,15 @@ async def compute_file_hash(path: str | pathlib.Path) -> str:
     return f"blake3:{hasher.hexdigest()}"
 
 
+def compute_file_hash_sync(path: str | pathlib.Path) -> str:
+    path = pathlib.Path(path)
+    hasher = blake3.blake3()
+    with open(path, "rb") as f:
+        while chunk := f.read(_HASH_CHUNK_SIZE):
+            hasher.update(chunk)
+    return f"blake3:{hasher.hexdigest()}"
+
+
 def compute_sha3_256(file_data: bytes) -> str:
     """Compute SHA3-256 hash of file data."""
     return hashlib.sha3_256(file_data).hexdigest()
diff --git a/atr/server.py b/atr/server.py
index 51318cb7..126c4680 100644
--- a/atr/server.py
+++ b/atr/server.py
@@ -62,6 +62,7 @@ import atr.preload as preload
 import atr.ssh as ssh
 import atr.svn.pubsub as pubsub
 import atr.tasks as tasks
+import atr.tasks.quarantine as quarantine
 import atr.template as template
 import atr.user as user
 import atr.util as util
@@ -283,6 +284,8 @@ def _app_setup_lifecycle(app: base.QuartApp, app_config: 
type[config.AppConfig])
         if migrated > 0:
             log.info(f"Migrated {migrated} attestable files to paths format")
 
+        await _backfill_archive_cache()
+
         await cache.admins_startup_load()
         admins_task = asyncio.create_task(cache.admins_refresh_loop())
         app.extensions["admins_task"] = admins_task
@@ -579,6 +582,15 @@ async def _app_shutdown_log_listeners(app):
         listener.stop()
 
 
+async def _backfill_archive_cache() -> None:
+    backfill_results = await 
asyncio.to_thread(quarantine.backfill_archive_cache)
+    if backfill_results:
+        total_duration = sum(d for _, _, d in backfill_results)
+        log.info(f"Backfilled {len(backfill_results)} archive cache entries in 
{total_duration:.1f}s")
+        for archive_path, cache_dir, duration in backfill_results:
+            log.info(f"  {cache_dir} ({duration:.1f}s) from {archive_path}")
+
+
 def _create_app(app_config: type[config.AppConfig]) -> base.QuartApp:
     """Create and configure the application."""
     if os.sep != "/":
diff --git a/atr/tasks/quarantine.py b/atr/tasks/quarantine.py
index 8cb699ea..9ce64daa 100644
--- a/atr/tasks/quarantine.py
+++ b/atr/tasks/quarantine.py
@@ -20,7 +20,10 @@ from __future__ import annotations
 import asyncio
 import datetime
 import errno
+import os
 import pathlib
+import shutil
+import time
 import uuid
 
 import aiofiles.os
@@ -54,6 +57,41 @@ class QuarantineValidate(schema.Strict):
     archives: list[QuarantineArchiveEntry]
 
 
+def backfill_archive_cache() -> list[tuple[str, pathlib.Path, float]]:
+    unfinished_dir = paths.get_unfinished_dir()
+    if not unfinished_dir.is_dir():
+        return []
+
+    cache_archives_dir = paths.get_cache_archives_dir()
+    staging_base = paths.get_tmp_dir()
+    staging_base.mkdir(parents=True, exist_ok=True)
+    extraction_cfg = _extraction_config()
+    seen_cache_keys: set[str] = set()
+    results_list: list[tuple[str, pathlib.Path, float]] = []
+
+    for project_dir in sorted(unfinished_dir.iterdir()):
+        if not project_dir.is_dir():
+            continue
+        for version_dir in sorted(project_dir.iterdir()):
+            if not version_dir.is_dir():
+                continue
+            for revision_dir in sorted(version_dir.iterdir()):
+                if not revision_dir.is_dir():
+                    continue
+                _backfill_revision(
+                    revision_dir,
+                    project_dir.name,
+                    version_dir.name,
+                    cache_archives_dir,
+                    staging_base,
+                    extraction_cfg,
+                    seen_cache_keys,
+                    results_list,
+                )
+
+    return results_list
+
+
 @checks.with_model(QuarantineValidate)
 async def validate(args: QuarantineValidate) -> results.Results | None:
     async with db.session() as data:
@@ -96,6 +134,73 @@ async def validate(args: QuarantineValidate) -> 
results.Results | None:
     return None
 
 
+def _backfill_extract_archive(
+    archive_path: pathlib.Path,
+    cache_dir: pathlib.Path,
+    staging_base: pathlib.Path,
+    extraction_cfg: exarch.SecurityConfig,
+    results_list: list[tuple[str, pathlib.Path, float]],
+) -> None:
+    try:
+        elapsed = _extract_archive_to_cache_dir(archive_path, cache_dir, 
staging_base, extraction_cfg)
+        results_list.append((str(archive_path), cache_dir, elapsed))
+    except Exception as exc:
+        log.warning(f"Backfill: failed to extract {archive_path}: {exc}")
+
+
+def _backfill_revision(
+    revision_dir: pathlib.Path,
+    project_name: str,
+    version_name: str,
+    cache_archives_dir: pathlib.Path,
+    staging_base: pathlib.Path,
+    extraction_cfg: exarch.SecurityConfig,
+    seen_cache_keys: set[str],
+    results_list: list[tuple[str, pathlib.Path, float]],
+) -> None:
+    cache_base = cache_archives_dir / project_name / version_name
+    for archive_path in sorted(revision_dir.rglob("*")):
+        if not archive_path.is_file():
+            continue
+        if not _is_archive_suffix(archive_path.name):
+            continue
+        content_hash = hashes.compute_file_hash_sync(archive_path)
+        cache_key = hashes.filesystem_cache_archives_key(content_hash)
+        dedupe_key = f"{project_name}/{version_name}/{cache_key}"
+        if dedupe_key in seen_cache_keys:
+            continue
+        seen_cache_keys.add(dedupe_key)
+        cache_dir = cache_base / cache_key
+        if cache_dir.is_dir():
+            continue
+        _backfill_extract_archive(archive_path, cache_dir, staging_base, 
extraction_cfg, results_list)
+
+
+def _extract_archive_to_cache_dir(
+    archive_path: pathlib.Path,
+    cache_dir: pathlib.Path,
+    staging_base: pathlib.Path,
+    extraction_cfg: exarch.SecurityConfig,
+) -> float:
+    staging_dir = staging_base / f"archive-extract-{uuid.uuid4().hex}"
+    try:
+        staging_dir.mkdir(parents=False, exist_ok=False)
+        start = time.monotonic()
+        exarch.extract_archive(str(archive_path), str(staging_dir), 
extraction_cfg)
+        cache_dir.parent.mkdir(parents=True, exist_ok=True)
+        try:
+            os.rename(staging_dir, cache_dir)
+        except OSError as err:
+            if isinstance(err, FileExistsError) or err.errno in {errno.EEXIST, 
errno.ENOTEMPTY}:
+                shutil.rmtree(staging_dir, ignore_errors=True)
+            else:
+                raise
+        return time.monotonic() - start
+    except Exception:
+        shutil.rmtree(staging_dir, ignore_errors=True)
+        raise
+
+
 async def _extract_archives_to_cache(
     archives: list[QuarantineArchiveEntry],
     quarantine_dir: pathlib.Path,
@@ -103,52 +208,28 @@ async def _extract_archives_to_cache(
     version_name: str,
     file_entries: list[sql.QuarantineFileEntryV1],
 ) -> None:
-    conf = config.get()
     cache_base = paths.get_cache_archives_dir() / project_name / version_name
     staging_base = paths.get_tmp_dir()
     await aiofiles.os.makedirs(cache_base, exist_ok=True)
     await aiofiles.os.makedirs(staging_base, exist_ok=True)
 
-    extraction_config = (
-        exarch.SecurityConfig()
-        .max_file_size(conf.MAX_EXTRACT_SIZE)
-        .max_total_size(conf.MAX_EXTRACT_SIZE)
-        .max_file_count(tarzip.MAX_ARCHIVE_MEMBERS)
-        .max_compression_ratio(100.0)
-        .max_path_depth(32)
-        # Escaping the root is still disallowed by exarch even when symlinks 
are allowed
-        .allow_symlinks(True)
-        .allow_hardlinks(False)
-        .allow_absolute_paths(False)
-        # Too many archives use this for us to disallow it
-        # We could set to 0o444 after extraction anyway
-        .allow_world_writable(True)
-    )
+    extraction_config = _extraction_config()
 
     for archive in archives:
         cache_dir = cache_base / 
hashes.filesystem_cache_archives_key(archive.content_hash)
         if await aiofiles.os.path.isdir(cache_dir):
             continue
-        archive_path = str(quarantine_dir / archive.rel_path)
-        staging_dir = staging_base / f"archive-extract-{uuid.uuid4().hex}"
-        await aiofiles.os.makedirs(staging_dir, exist_ok=False)
+        archive_path = quarantine_dir / archive.rel_path
         try:
             await asyncio.to_thread(
-                exarch.extract_archive,
+                _extract_archive_to_cache_dir,
                 archive_path,
-                str(staging_dir),
+                cache_dir,
+                staging_base,
                 extraction_config,
             )
-            try:
-                await aiofiles.os.rename(staging_dir, cache_dir)
-            except OSError as err:
-                if isinstance(err, FileExistsError) or err.errno in 
{errno.EEXIST, errno.ENOTEMPTY}:
-                    await aioshutil.rmtree(staging_dir, ignore_errors=True)
-                else:
-                    raise
         except Exception as exc:
             log.exception(f"Failed to extract archive {archive.rel_path} to 
cache")
-            await aioshutil.rmtree(staging_dir, ignore_errors=True)
             for entry in file_entries:
                 if entry.rel_path == archive.rel_path:
                     entry.errors.append(f"Extraction failed: {exc}")
@@ -156,6 +237,35 @@ async def _extract_archives_to_cache(
             raise
 
 
+def _extraction_config() -> exarch.SecurityConfig:
+    conf = config.get()
+    extraction_config = (
+        exarch.SecurityConfig()
+        .max_file_size(conf.MAX_EXTRACT_SIZE)
+        .max_total_size(conf.MAX_EXTRACT_SIZE)
+        .max_file_count(tarzip.MAX_ARCHIVE_MEMBERS)
+        .max_compression_ratio(100.0)
+        .max_path_depth(32)
+        # Escaping the root is still disallowed by exarch even when symlinks 
are allowed
+        .allow_symlinks(True)
+        .allow_hardlinks(False)
+        .allow_absolute_paths(False)
+        # Too many archives use this for us to disallow it
+        # We could set to 0o444 after extraction anyway
+        .allow_world_writable(True)
+    )
+    banned_path_components = extraction_config.banned_path_components  # 
pyright: ignore[reportAttributeAccessIssue]
+    extraction_config.banned_path_components = [  # pyright: 
ignore[reportAttributeAccessIssue]
+        component for component in banned_path_components if component.lower() 
!= ".env"
+    ]
+    return extraction_config
+
+
+def _is_archive_suffix(filename: str) -> bool:
+    lower_name = filename.lower()
+    return any(lower_name.endswith(suffix) for suffix in 
detection.QUARANTINE_ARCHIVE_SUFFIXES)
+
+
 async def _mark_failed(
     quarantined: sql.Quarantined,
     file_entries: list[sql.QuarantineFileEntryV1] | None,
diff --git a/tests/unit/test_detection.py b/tests/unit/test_detection.py
index 97bc40bb..57cff789 100644
--- a/tests/unit/test_detection.py
+++ b/tests/unit/test_detection.py
@@ -26,6 +26,26 @@ import atr.models.attestable as models
 type TarArchiveEntry = tuple[str, str, bytes | str]
 
 
+def test_check_archive_safety_accepts_root_dotenv_in_tar_and_zip(tmp_path):
+    tar_path = tmp_path / "safe-dotenv.tar.gz"
+    _write_tar_gz(
+        tar_path,
+        [
+            _tar_regular_file(".env", b"ATR_STATUS=ALPHA\n"),
+        ],
+    )
+    zip_path = tmp_path / "safe-dotenv.zip"
+    _write_zip(
+        zip_path,
+        [
+            (".env", b"ATR_STATUS=ALPHA\n"),
+        ],
+    )
+
+    assert detection.check_archive_safety(str(tar_path)) == []
+    assert detection.check_archive_safety(str(zip_path)) == []
+
+
 def test_check_archive_safety_accepts_safe_tar_gz(tmp_path):
     archive_path = tmp_path / "safe.tar.gz"
     _write_tar_gz(
@@ -109,6 +129,31 @@ def 
test_check_archive_safety_rejects_hardlink_target_resolved_from_root(tmp_pat
     assert any("escapes root" in error for error in errors)
 
 
+def test_check_archive_safety_rejects_nested_dotenv_in_tar_and_zip(tmp_path):
+    tar_path = tmp_path / "unsafe-nested-dotenv.tar.gz"
+    _write_tar_gz(
+        tar_path,
+        [
+            _tar_regular_file("config/.env", b"SECRET=value\n"),
+        ],
+    )
+    zip_path = tmp_path / "unsafe-nested-dotenv.zip"
+    _write_zip(
+        zip_path,
+        [
+            ("config/.env", b"SECRET=value\n"),
+        ],
+    )
+
+    tar_errors = detection.check_archive_safety(str(tar_path))
+    zip_errors = detection.check_archive_safety(str(zip_path))
+
+    assert any("config/.env" in error for error in tar_errors)
+    assert any(".env is only allowed at the archive root" in error for error 
in tar_errors)
+    assert any("config/.env" in error for error in zip_errors)
+    assert any(".env is only allowed at the archive root" in error for error 
in zip_errors)
+
+
 def 
test_check_archive_safety_rejects_parent_path_traversal_in_tar_and_zip(tmp_path):
     tar_path = tmp_path / "unsafe-parent.tar.gz"
     _write_tar_gz(
diff --git a/tests/unit/test_quarantine_backfill.py 
b/tests/unit/test_quarantine_backfill.py
new file mode 100644
index 00000000..89f1b5b7
--- /dev/null
+++ b/tests/unit/test_quarantine_backfill.py
@@ -0,0 +1,181 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import io
+import pathlib
+import tarfile
+
+import pytest
+
+import atr.hashes as hashes
+import atr.tasks.quarantine as quarantine
+
+
+def test_backfill_already_cached(monkeypatch: pytest.MonkeyPatch, tmp_path: 
pathlib.Path) -> None:
+    unfinished_dir, cache_dir = _setup_dirs(tmp_path)
+    _patch_paths(monkeypatch, tmp_path, unfinished_dir, cache_dir)
+
+    revision_dir = unfinished_dir / "proj" / "1.0" / "00001"
+    revision_dir.mkdir(parents=True)
+    archive_path = revision_dir / "artifact.tar.gz"
+    _create_tar_gz(archive_path)
+
+    content_hash = hashes.compute_file_hash_sync(archive_path)
+    cache_key = hashes.filesystem_cache_archives_key(content_hash)
+    existing_cache = cache_dir / "proj" / "1.0" / cache_key
+    existing_cache.mkdir(parents=True)
+
+    result = quarantine.backfill_archive_cache()
+
+    assert result == []
+
+
+def test_backfill_continues_after_extraction_failure(monkeypatch: 
pytest.MonkeyPatch, tmp_path: pathlib.Path) -> None:
+    unfinished_dir, cache_dir = _setup_dirs(tmp_path)
+    _patch_paths(monkeypatch, tmp_path, unfinished_dir, cache_dir)
+
+    revision_dir = unfinished_dir / "proj" / "1.0" / "00001"
+    revision_dir.mkdir(parents=True)
+    (revision_dir / "bad.tar.gz").write_bytes(b"not a valid archive")
+    _create_tar_gz(revision_dir / "good.tar.gz")
+
+    result = quarantine.backfill_archive_cache()
+
+    assert len(result) == 1
+    assert "good.tar.gz" in result[0][0]
+
+    good_hash = hashes.compute_file_hash_sync(revision_dir / "good.tar.gz")
+    good_cache = cache_dir / "proj" / "1.0" / 
hashes.filesystem_cache_archives_key(good_hash)
+    assert good_cache.is_dir()
+
+    bad_hash = hashes.compute_file_hash_sync(revision_dir / "bad.tar.gz")
+    bad_cache = cache_dir / "proj" / "1.0" / 
hashes.filesystem_cache_archives_key(bad_hash)
+    assert not bad_cache.exists()
+
+
+def test_backfill_deduplicates_within_same_version(monkeypatch: 
pytest.MonkeyPatch, tmp_path: pathlib.Path) -> None:
+    unfinished_dir, cache_dir = _setup_dirs(tmp_path)
+    _patch_paths(monkeypatch, tmp_path, unfinished_dir, cache_dir)
+
+    revision_1 = unfinished_dir / "proj" / "1.0" / "00001"
+    revision_1.mkdir(parents=True)
+    _create_tar_gz(revision_1 / "artifact.tar.gz")
+
+    revision_2 = unfinished_dir / "proj" / "1.0" / "00002"
+    revision_2.mkdir(parents=True)
+    (revision_2 / "artifact.tar.gz").write_bytes((revision_1 / 
"artifact.tar.gz").read_bytes())
+
+    result = quarantine.backfill_archive_cache()
+
+    assert len(result) == 1
+
+
+def test_backfill_empty_unfinished_dir(monkeypatch: pytest.MonkeyPatch, 
tmp_path: pathlib.Path) -> None:
+    unfinished_dir, cache_dir = _setup_dirs(tmp_path)
+    _patch_paths(monkeypatch, tmp_path, unfinished_dir, cache_dir)
+
+    result = quarantine.backfill_archive_cache()
+
+    assert result == []
+
+
+def test_backfill_extracts_same_content_into_different_namespaces(
+    monkeypatch: pytest.MonkeyPatch, tmp_path: pathlib.Path
+) -> None:
+    unfinished_dir, cache_dir = _setup_dirs(tmp_path)
+    _patch_paths(monkeypatch, tmp_path, unfinished_dir, cache_dir)
+
+    revision_a = unfinished_dir / "projA" / "1.0" / "00001"
+    revision_a.mkdir(parents=True)
+    _create_tar_gz(revision_a / "artifact.tar.gz")
+
+    revision_b = unfinished_dir / "projB" / "2.0" / "00001"
+    revision_b.mkdir(parents=True)
+    (revision_b / "artifact.tar.gz").write_bytes((revision_a / 
"artifact.tar.gz").read_bytes())
+
+    result = quarantine.backfill_archive_cache()
+
+    assert len(result) == 2
+
+    content_hash = hashes.compute_file_hash_sync(revision_a / 
"artifact.tar.gz")
+    cache_key = hashes.filesystem_cache_archives_key(content_hash)
+    assert (cache_dir / "projA" / "1.0" / cache_key).is_dir()
+    assert (cache_dir / "projB" / "2.0" / cache_key).is_dir()
+
+
+def test_backfill_extracts_uncached_archive(monkeypatch: pytest.MonkeyPatch, 
tmp_path: pathlib.Path) -> None:
+    unfinished_dir, cache_dir = _setup_dirs(tmp_path)
+    _patch_paths(monkeypatch, tmp_path, unfinished_dir, cache_dir)
+
+    revision_dir = unfinished_dir / "proj" / "1.0" / "00001"
+    revision_dir.mkdir(parents=True)
+    archive_path = revision_dir / "artifact.tar.gz"
+    _create_tar_gz(archive_path)
+    (revision_dir / "artifact.tar.gz.sha512").write_text("somehash  
artifact.tar.gz")
+
+    result = quarantine.backfill_archive_cache()
+
+    assert len(result) == 1
+    archive_path_str, result_cache_dir, duration = result[0]
+    assert archive_path_str == str(archive_path)
+    assert result_cache_dir.is_dir()
+    assert (result_cache_dir / "README.txt").read_text() == "Hello"
+    assert duration >= 0
+
+
+def test_backfill_skips_non_archive_files(monkeypatch: pytest.MonkeyPatch, 
tmp_path: pathlib.Path) -> None:
+    unfinished_dir, cache_dir = _setup_dirs(tmp_path)
+    _patch_paths(monkeypatch, tmp_path, unfinished_dir, cache_dir)
+
+    revision_dir = unfinished_dir / "proj" / "1.0" / "00001"
+    revision_dir.mkdir(parents=True)
+    (revision_dir / "artifact.tar.gz.sha512").write_text("somehash  
artifact.tar.gz")
+    (revision_dir / "artifact.tar.gz.asc").write_bytes(b"signature")
+
+    result = quarantine.backfill_archive_cache()
+
+    assert result == []
+
+
+def _create_tar_gz(path: pathlib.Path) -> None:
+    buf = io.BytesIO()
+    with tarfile.open(fileobj=buf, mode="w:gz") as tar:
+        info = tarfile.TarInfo(name="README.txt")
+        content = b"Hello"
+        info.size = len(content)
+        tar.addfile(info, io.BytesIO(content))
+    path.write_bytes(buf.getvalue())
+
+
+def _patch_paths(
+    monkeypatch: pytest.MonkeyPatch,
+    tmp_path: pathlib.Path,
+    unfinished_dir: pathlib.Path,
+    cache_dir: pathlib.Path,
+) -> None:
+    monkeypatch.setattr(quarantine.paths, "get_unfinished_dir", lambda: 
unfinished_dir)
+    monkeypatch.setattr(quarantine.paths, "get_cache_archives_dir", lambda: 
cache_dir)
+    monkeypatch.setattr(quarantine.paths, "get_tmp_dir", lambda: tmp_path / 
"temporary")
+
+
+def _setup_dirs(tmp_path: pathlib.Path) -> tuple[pathlib.Path, pathlib.Path]:
+    unfinished_dir = tmp_path / "unfinished"
+    cache_dir = tmp_path / "cache" / "archives"
+    staging_dir = tmp_path / "temporary"
+    for d in [unfinished_dir, cache_dir, staging_dir]:
+        d.mkdir(parents=True)
+    return unfinished_dir, cache_dir
diff --git a/tests/unit/test_quarantine_task.py 
b/tests/unit/test_quarantine_task.py
index c78600cf..dfbe1072 100644
--- a/tests/unit/test_quarantine_task.py
+++ b/tests/unit/test_quarantine_task.py
@@ -21,7 +21,6 @@ import pathlib
 import tarfile
 import unittest.mock as mock
 
-import aiofiles
 import pytest
 
 import atr.models.safe as safe
@@ -82,17 +81,16 @@ async def 
test_extract_archives_to_cache_discards_staging_dir_on_enotempty_colli
         recorded["staging_dir"] = staging_dir
         (staging_dir / "content.txt").write_text("staged")
 
-    async def rename(src: pathlib.Path | str, dst: pathlib.Path | str) -> None:
+    def rename(src: pathlib.Path | str, dst: pathlib.Path | str) -> None:
         dst_path = pathlib.Path(dst)
-        await aiofiles.os.makedirs(dst_path, exist_ok=True)
-        async with aiofiles.open(dst_path / "winner.txt", "w") as f:
-            await f.write("winner")
+        dst_path.mkdir(parents=True, exist_ok=True)
+        (dst_path / "winner.txt").write_text("winner")
         raise OSError(errno.ENOTEMPTY, "Directory not empty", str(dst_path))
 
     monkeypatch.setattr(quarantine.paths, "get_cache_archives_dir", lambda: 
cache_root)
     monkeypatch.setattr(quarantine.paths, "get_tmp_dir", lambda: tmp_root)
     monkeypatch.setattr(quarantine.exarch, "extract_archive", extract_archive)
-    monkeypatch.setattr(quarantine.aiofiles.os, "rename", rename)
+    monkeypatch.setattr(quarantine.os, "rename", rename)
 
     entries = [sql.QuarantineFileEntryV1(rel_path=archive_rel_path, 
size_bytes=7, content_hash="blake3:ghi", errors=[])]
 
@@ -128,17 +126,16 @@ async def 
test_extract_archives_to_cache_discards_staging_dir_when_other_worker_
         recorded["staging_dir"] = staging_dir
         (staging_dir / "content.txt").write_text("staged")
 
-    async def rename(src: pathlib.Path | str, dst: pathlib.Path | str) -> None:
+    def rename(src: pathlib.Path | str, dst: pathlib.Path | str) -> None:
         dst_path = pathlib.Path(dst)
-        await aiofiles.os.makedirs(dst_path, exist_ok=True)
-        async with aiofiles.open(dst_path / "winner.txt", "w") as f:
-            await f.write("winner")
+        dst_path.mkdir(parents=True, exist_ok=True)
+        (dst_path / "winner.txt").write_text("winner")
         raise FileExistsError(dst)
 
     monkeypatch.setattr(quarantine.paths, "get_cache_archives_dir", lambda: 
cache_root)
     monkeypatch.setattr(quarantine.paths, "get_tmp_dir", lambda: tmp_root)
     monkeypatch.setattr(quarantine.exarch, "extract_archive", extract_archive)
-    monkeypatch.setattr(quarantine.aiofiles.os, "rename", rename)
+    monkeypatch.setattr(quarantine.os, "rename", rename)
 
     entries = [sql.QuarantineFileEntryV1(rel_path=archive_rel_path, 
size_bytes=7, content_hash="blake3:def", errors=[])]
 


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to