This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


The following commit(s) were added to refs/heads/sbp by this push:
     new 47a304da Remove unnecessary archive integrity checks
47a304da is described below

commit 47a304da1816c42822884bc3aef675edbeb86b88
Author: Sean B. Palmer <[email protected]>
AuthorDate: Wed Mar 11 19:23:25 2026 +0000

    Remove unnecessary archive integrity checks
---
 atr/tasks/__init__.py                   | 24 ++--------------
 atr/tasks/checks/targz.py               | 22 ---------------
 atr/tasks/checks/zipformat.py           | 43 ----------------------------
 playwright/test.py                      | 18 ++++--------
 tests/unit/test_archive_member_limit.py | 50 ---------------------------------
 5 files changed, 7 insertions(+), 150 deletions(-)

diff --git a/atr/tasks/__init__.py b/atr/tasks/__init__.py
index a86cc8a0..db144349 100644
--- a/atr/tasks/__init__.py
+++ b/atr/tasks/__init__.py
@@ -336,7 +336,7 @@ def resolve(task_type: sql.TaskType) -> Callable[..., 
Awaitable[results.Results
         case sql.TaskType.SVN_IMPORT_FILES:
             return svn.import_files
         case sql.TaskType.TARGZ_INTEGRITY:
-            return targz.integrity
+            raise ValueError("TARGZ_INTEGRITY check has been removed; 
quarantine extraction validates integrity")
         case sql.TaskType.TARGZ_STRUCTURE:
             return targz.structure
         case sql.TaskType.VOTE_INITIATE:
@@ -344,7 +344,7 @@ def resolve(task_type: sql.TaskType) -> Callable[..., 
Awaitable[results.Results
         case sql.TaskType.WORKFLOW_STATUS:
             return gha.status_check
         case sql.TaskType.ZIPFORMAT_INTEGRITY:
-            return zipformat.integrity
+            raise ValueError("ZIPFORMAT_INTEGRITY check has been removed; 
quarantine extraction validates integrity")
         case sql.TaskType.ZIPFORMAT_STRUCTURE:
             return zipformat.structure
         # NOTE: Do NOT add "case _" here
@@ -421,15 +421,6 @@ async def tar_gz_checks(
         await checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release),
         file=path,
     )
-    targz_i_ck = await checks.resolve_cache_key(
-        resolve(sql.TaskType.TARGZ_INTEGRITY),
-        targz.CHECK_VERSION_INTEGRITY,
-        targz.INPUT_POLICY_KEYS,
-        release,
-        revision,
-        await checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release),
-        file=path,
-    )
     targz_s_ck = await checks.resolve_cache_key(
         resolve(sql.TaskType.TARGZ_STRUCTURE),
         targz.CHECK_VERSION_STRUCTURE,
@@ -452,7 +443,6 @@ async def tar_gz_checks(
         ),
         queued(asf_uid, sql.TaskType.LICENSE_HEADERS, release, revision, path, 
check_cache_key=license_h_ck),
         queued(asf_uid, sql.TaskType.RAT_CHECK, release, revision, path, 
check_cache_key=rat_ck),
-        queued(asf_uid, sql.TaskType.TARGZ_INTEGRITY, release, revision, path, 
check_cache_key=targz_i_ck),
         queued(asf_uid, sql.TaskType.TARGZ_STRUCTURE, release, revision, path, 
check_cache_key=targz_s_ck),
     ]
 
@@ -530,15 +520,6 @@ async def zip_checks(
         await checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release),
         file=path,
     )
-    zip_i_ck = await checks.resolve_cache_key(
-        resolve(sql.TaskType.ZIPFORMAT_INTEGRITY),
-        zipformat.CHECK_VERSION_INTEGRITY,
-        zipformat.INPUT_POLICY_KEYS,
-        release,
-        revision,
-        await checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release),
-        file=path,
-    )
     zip_s_ck = await checks.resolve_cache_key(
         resolve(sql.TaskType.ZIPFORMAT_STRUCTURE),
         zipformat.CHECK_VERSION_STRUCTURE,
@@ -562,7 +543,6 @@ async def zip_checks(
         ),
         queued(asf_uid, sql.TaskType.LICENSE_HEADERS, release, revision, path, 
check_cache_key=license_h_ck),
         queued(asf_uid, sql.TaskType.RAT_CHECK, release, revision, path, 
check_cache_key=rat_ck),
-        queued(asf_uid, sql.TaskType.ZIPFORMAT_INTEGRITY, release, revision, 
path, check_cache_key=zip_i_ck),
         queued(asf_uid, sql.TaskType.ZIPFORMAT_STRUCTURE, release, revision, 
path, check_cache_key=zip_s_ck),
     ]
     return await asyncio.gather(*tasks)
diff --git a/atr/tasks/checks/targz.py b/atr/tasks/checks/targz.py
index 0565d957..c4d76bc4 100644
--- a/atr/tasks/checks/targz.py
+++ b/atr/tasks/checks/targz.py
@@ -22,17 +22,14 @@ import pathlib
 import stat
 from typing import Final
 
-import atr.archives as archives
 import atr.log as log
 import atr.models.results as results
-import atr.tarzip as tarzip
 import atr.tasks.checks as checks
 import atr.util as util
 
 # Release policy fields which this check relies on - used for result caching
 INPUT_POLICY_KEYS: Final[list[str]] = []
 INPUT_EXTRA_ARGS: Final[list[str]] = []
-CHECK_VERSION_INTEGRITY: Final[str] = "3"
 CHECK_VERSION_STRUCTURE: Final[str] = "3"
 
 
@@ -42,25 +39,6 @@ class RootDirectoryError(Exception):
     ...
 
 
-async def integrity(args: checks.FunctionArguments) -> results.Results | None:
-    """Check the integrity of a .tar.gz file."""
-    recorder = await args.recorder()
-    if not (artifact_abs_path := await recorder.abs_path()):
-        return None
-
-    log.info(f"Checking integrity for {artifact_abs_path} (rel: 
{args.primary_rel_path})")
-
-    chunk_size = 4096
-    try:
-        size = await asyncio.to_thread(archives.total_size, 
str(artifact_abs_path), chunk_size)
-        await recorder.success("Able to read all entries of the archive using 
tarfile", {"size": size})
-    except tarzip.ArchiveMemberLimitExceededError as e:
-        await recorder.failure(f"Archive has too many members: {e}", {"error": 
str(e)})
-    except Exception as e:
-        await recorder.failure("Unable to read all entries of the archive 
using tarfile", {"error": str(e)})
-    return None
-
-
 def root_directory(cache_dir: pathlib.Path) -> tuple[str, bytes | None]:
     """Find root directory and read package/package.json from the extracted 
tree."""
     # The ._ prefix is a metadata convention
diff --git a/atr/tasks/checks/zipformat.py b/atr/tasks/checks/zipformat.py
index 9c1294a7..d991d98c 100644
--- a/atr/tasks/checks/zipformat.py
+++ b/atr/tasks/checks/zipformat.py
@@ -19,44 +19,19 @@ import asyncio
 import os
 import pathlib
 import stat
-import zipfile
 from typing import Any, Final
 
 import atr.log as log
 import atr.models.results as results
-import atr.tarzip as tarzip
 import atr.tasks.checks as checks
 import atr.util as util
 
 # Release policy fields which this check relies on - used for result caching
 INPUT_POLICY_KEYS: Final[list[str]] = []
 INPUT_EXTRA_ARGS: Final[list[str]] = []
-CHECK_VERSION_INTEGRITY: Final[str] = "2"
 CHECK_VERSION_STRUCTURE: Final[str] = "2"
 
 
-async def integrity(args: checks.FunctionArguments) -> results.Results | None:
-    """Check that the zip archive is not corrupted and can be opened."""
-    recorder = await args.recorder()
-    if not (artifact_abs_path := await recorder.abs_path()):
-        return None
-
-    log.info(f"Checking zip integrity for {artifact_abs_path} (rel: 
{args.primary_rel_path})")
-
-    try:
-        result_data = await asyncio.to_thread(_integrity_check_core_logic, 
str(artifact_abs_path))
-        if result_data.get("error"):
-            await recorder.failure(result_data["error"], result_data)
-        else:
-            await recorder.success(
-                f"Zip archive integrity OK 
({util.plural(result_data['member_count'], 'member')})", result_data
-            )
-    except Exception as e:
-        await recorder.failure("Error checking zip integrity", {"error": 
str(e)})
-
-    return None
-
-
 async def structure(args: checks.FunctionArguments) -> results.Results | None:
     """Check that the zip archive has a single root directory matching the 
artifact name."""
     # TODO: We can merge this with the targz structure check
@@ -92,24 +67,6 @@ async def structure(args: checks.FunctionArguments) -> 
results.Results | None:
     return None
 
 
-def _integrity_check_core_logic(artifact_path: str) -> dict[str, Any]:
-    """Verify that a zip file can be opened and its members listed."""
-    try:
-        with tarzip.open_archive(artifact_path) as archive:
-            # This is a simple check using list members
-            # We can use zf.testzip() for CRC checks if needed, though this 
will be slower
-            member_count = sum(1 for _ in archive)
-            return {"member_count": member_count}
-    except tarzip.ArchiveMemberLimitExceededError as e:
-        return {"error": f"Archive has too many members: {e}"}
-    except zipfile.BadZipFile as e:
-        return {"error": f"Bad zip file: {e}"}
-    except FileNotFoundError:
-        return {"error": "File not found"}
-    except Exception as e:
-        return {"error": f"Unexpected error: {e}"}
-
-
 def _structure_check_core_logic(cache_dir: pathlib.Path, artifact_path: str) 
-> dict[str, Any]:
     """Verify the internal structure of the zip archive."""
     # The ._ prefix is a metadata convention
diff --git a/playwright/test.py b/playwright/test.py
index d7e4db10..b8798b7f 100755
--- a/playwright/test.py
+++ b/playwright/test.py
@@ -733,14 +733,6 @@ def test_checks_06_targz(page: Page, credentials: 
Credentials) -> None:
 
     ensure_success_results_are_visible(page, "primary")
 
-    logging.info("Verifying Targz Integrity status")
-    integrity_row_locator = 
page.locator("tr.atr-result-primary:has(th:has-text('Targz Integrity'))")
-    expect(integrity_row_locator).to_be_visible()
-    logging.info("Located Targz Integrity row")
-    integrity_success_badge = integrity_row_locator.locator("td 
span.badge.bg-success:text-is('Success')")
-    expect(integrity_success_badge).to_be_visible()
-    logging.info("Targz Integrity status verified as Success")
-
     logging.info("Verifying Targz Structure status")
     structure_row_locator = 
page.locator("tr.atr-result-primary:has(th:has-text('Targz Structure'))")
     expect(structure_row_locator).to_be_visible()
@@ -785,12 +777,12 @@ def test_checks_07_cache(page: Page, credentials: 
Credentials) -> None:
 
     ensure_success_results_are_visible(page, "primary")
 
-    logging.info("Verifying Targz Integrity status exists")
-    integrity_row_locator = 
page.locator("tr.atr-result-primary:has(th:has-text('Targz Integrity'))")
-    expect(integrity_row_locator).to_be_visible()
+    logging.info("Verifying Targz Structure status exists")
+    structure_row_locator = 
page.locator("tr.atr-result-primary:has(th:has-text('Targz Structure'))")
+    expect(structure_row_locator).to_be_visible()
 
-    logging.info("Verifying Targz Integrity result is from previous revision")
-    check_link_locator = integrity_row_locator.locator("th:has-text('Targz 
Integrity') a")
+    logging.info("Verifying Targz Structure result is from previous revision")
+    check_link_locator = structure_row_locator.locator("th:has-text('Targz 
Structure') a")
     check_link_url = (check_link_locator.get_attribute("href") or 
"").replace(ATR_BASE_URL, "")
     check_link_locator.click()
     logging.info(f"Waiting for raw check result to load: {check_link_url}")
diff --git a/tests/unit/test_archive_member_limit.py 
b/tests/unit/test_archive_member_limit.py
index dfa6dea0..89762ebc 100644
--- a/tests/unit/test_archive_member_limit.py
+++ b/tests/unit/test_archive_member_limit.py
@@ -23,10 +23,6 @@ import pytest
 
 import atr.archives as archives
 import atr.tarzip as tarzip
-import atr.tasks.checks as checks
-import atr.tasks.checks.targz as targz
-import atr.tasks.checks.zipformat as zipformat
-import tests.unit.recorders as recorders
 
 
 def test_extract_wraps_member_limit(tmp_path, monkeypatch):
@@ -86,52 +82,6 @@ def test_open_archive_enforces_member_limit_zip(tmp_path):
             list(archive)
 
 
[email protected]
-async def test_targz_integrity_reports_member_limit(tmp_path, monkeypatch):
-    archive_path = tmp_path / "sample.tar"
-    _make_tar(archive_path, ["a.txt", "b.txt", "c.txt"])
-    recorder = recorders.RecorderStub(archive_path, 
"tests.unit.test_archive_member_limit")
-
-    original_open = tarzip.open_archive
-
-    def limited_open(path: str, *args, **kwargs):
-        return original_open(path, max_members=2)
-
-    monkeypatch.setattr(tarzip, "open_archive", limited_open)
-
-    args = await _args_for(recorder)
-    await targz.integrity(args)
-
-    assert any("too many members" in message.lower() for _, message, _ in 
recorder.messages)
-
-
-def test_zipformat_integrity_reports_member_limit(tmp_path, monkeypatch):
-    archive_path = tmp_path / "sample.zip"
-    _make_zip(archive_path, ["a.txt", "b.txt", "c.txt"])
-
-    original_open = tarzip.open_archive
-
-    def limited_open(path: str, *args, **kwargs):
-        return original_open(path, max_members=2)
-
-    monkeypatch.setattr(tarzip, "open_archive", limited_open)
-
-    result = zipformat._integrity_check_core_logic(str(archive_path))
-    assert "too many members" in result.get("error", "").lower()
-
-
-async def _args_for(recorder: recorders.RecorderStub) -> 
checks.FunctionArguments:
-    return checks.FunctionArguments(
-        recorder=recorders.get_recorder(recorder),
-        asf_uid="",
-        project_name="test",
-        version_name="test",
-        revision_number="00001",
-        primary_rel_path=None,
-        extra_args={},
-    )
-
-
 def _make_tar(path, members: list[str]) -> None:
     with tarfile.open(path, "w") as tf:
         for name in members:


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to