This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/sbp by this push:
new 47a304da Remove unnecessary archive integrity checks
47a304da is described below
commit 47a304da1816c42822884bc3aef675edbeb86b88
Author: Sean B. Palmer <[email protected]>
AuthorDate: Wed Mar 11 19:23:25 2026 +0000
Remove unnecessary archive integrity checks
---
atr/tasks/__init__.py | 24 ++--------------
atr/tasks/checks/targz.py | 22 ---------------
atr/tasks/checks/zipformat.py | 43 ----------------------------
playwright/test.py | 18 ++++--------
tests/unit/test_archive_member_limit.py | 50 ---------------------------------
5 files changed, 7 insertions(+), 150 deletions(-)
diff --git a/atr/tasks/__init__.py b/atr/tasks/__init__.py
index a86cc8a0..db144349 100644
--- a/atr/tasks/__init__.py
+++ b/atr/tasks/__init__.py
@@ -336,7 +336,7 @@ def resolve(task_type: sql.TaskType) -> Callable[...,
Awaitable[results.Results
case sql.TaskType.SVN_IMPORT_FILES:
return svn.import_files
case sql.TaskType.TARGZ_INTEGRITY:
- return targz.integrity
+ raise ValueError("TARGZ_INTEGRITY check has been removed;
quarantine extraction validates integrity")
case sql.TaskType.TARGZ_STRUCTURE:
return targz.structure
case sql.TaskType.VOTE_INITIATE:
@@ -344,7 +344,7 @@ def resolve(task_type: sql.TaskType) -> Callable[...,
Awaitable[results.Results
case sql.TaskType.WORKFLOW_STATUS:
return gha.status_check
case sql.TaskType.ZIPFORMAT_INTEGRITY:
- return zipformat.integrity
+ raise ValueError("ZIPFORMAT_INTEGRITY check has been removed;
quarantine extraction validates integrity")
case sql.TaskType.ZIPFORMAT_STRUCTURE:
return zipformat.structure
# NOTE: Do NOT add "case _" here
@@ -421,15 +421,6 @@ async def tar_gz_checks(
await checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release),
file=path,
)
- targz_i_ck = await checks.resolve_cache_key(
- resolve(sql.TaskType.TARGZ_INTEGRITY),
- targz.CHECK_VERSION_INTEGRITY,
- targz.INPUT_POLICY_KEYS,
- release,
- revision,
- await checks.resolve_extra_args(targz.INPUT_EXTRA_ARGS, release),
- file=path,
- )
targz_s_ck = await checks.resolve_cache_key(
resolve(sql.TaskType.TARGZ_STRUCTURE),
targz.CHECK_VERSION_STRUCTURE,
@@ -452,7 +443,6 @@ async def tar_gz_checks(
),
queued(asf_uid, sql.TaskType.LICENSE_HEADERS, release, revision, path,
check_cache_key=license_h_ck),
queued(asf_uid, sql.TaskType.RAT_CHECK, release, revision, path,
check_cache_key=rat_ck),
- queued(asf_uid, sql.TaskType.TARGZ_INTEGRITY, release, revision, path,
check_cache_key=targz_i_ck),
queued(asf_uid, sql.TaskType.TARGZ_STRUCTURE, release, revision, path,
check_cache_key=targz_s_ck),
]
@@ -530,15 +520,6 @@ async def zip_checks(
await checks.resolve_extra_args(rat.INPUT_EXTRA_ARGS, release),
file=path,
)
- zip_i_ck = await checks.resolve_cache_key(
- resolve(sql.TaskType.ZIPFORMAT_INTEGRITY),
- zipformat.CHECK_VERSION_INTEGRITY,
- zipformat.INPUT_POLICY_KEYS,
- release,
- revision,
- await checks.resolve_extra_args(zipformat.INPUT_EXTRA_ARGS, release),
- file=path,
- )
zip_s_ck = await checks.resolve_cache_key(
resolve(sql.TaskType.ZIPFORMAT_STRUCTURE),
zipformat.CHECK_VERSION_STRUCTURE,
@@ -562,7 +543,6 @@ async def zip_checks(
),
queued(asf_uid, sql.TaskType.LICENSE_HEADERS, release, revision, path,
check_cache_key=license_h_ck),
queued(asf_uid, sql.TaskType.RAT_CHECK, release, revision, path,
check_cache_key=rat_ck),
- queued(asf_uid, sql.TaskType.ZIPFORMAT_INTEGRITY, release, revision,
path, check_cache_key=zip_i_ck),
queued(asf_uid, sql.TaskType.ZIPFORMAT_STRUCTURE, release, revision,
path, check_cache_key=zip_s_ck),
]
return await asyncio.gather(*tasks)
diff --git a/atr/tasks/checks/targz.py b/atr/tasks/checks/targz.py
index 0565d957..c4d76bc4 100644
--- a/atr/tasks/checks/targz.py
+++ b/atr/tasks/checks/targz.py
@@ -22,17 +22,14 @@ import pathlib
import stat
from typing import Final
-import atr.archives as archives
import atr.log as log
import atr.models.results as results
-import atr.tarzip as tarzip
import atr.tasks.checks as checks
import atr.util as util
# Release policy fields which this check relies on - used for result caching
INPUT_POLICY_KEYS: Final[list[str]] = []
INPUT_EXTRA_ARGS: Final[list[str]] = []
-CHECK_VERSION_INTEGRITY: Final[str] = "3"
CHECK_VERSION_STRUCTURE: Final[str] = "3"
@@ -42,25 +39,6 @@ class RootDirectoryError(Exception):
...
-async def integrity(args: checks.FunctionArguments) -> results.Results | None:
- """Check the integrity of a .tar.gz file."""
- recorder = await args.recorder()
- if not (artifact_abs_path := await recorder.abs_path()):
- return None
-
- log.info(f"Checking integrity for {artifact_abs_path} (rel:
{args.primary_rel_path})")
-
- chunk_size = 4096
- try:
- size = await asyncio.to_thread(archives.total_size,
str(artifact_abs_path), chunk_size)
- await recorder.success("Able to read all entries of the archive using
tarfile", {"size": size})
- except tarzip.ArchiveMemberLimitExceededError as e:
- await recorder.failure(f"Archive has too many members: {e}", {"error":
str(e)})
- except Exception as e:
- await recorder.failure("Unable to read all entries of the archive
using tarfile", {"error": str(e)})
- return None
-
-
def root_directory(cache_dir: pathlib.Path) -> tuple[str, bytes | None]:
"""Find root directory and read package/package.json from the extracted
tree."""
# The ._ prefix is a metadata convention
diff --git a/atr/tasks/checks/zipformat.py b/atr/tasks/checks/zipformat.py
index 9c1294a7..d991d98c 100644
--- a/atr/tasks/checks/zipformat.py
+++ b/atr/tasks/checks/zipformat.py
@@ -19,44 +19,19 @@ import asyncio
import os
import pathlib
import stat
-import zipfile
from typing import Any, Final
import atr.log as log
import atr.models.results as results
-import atr.tarzip as tarzip
import atr.tasks.checks as checks
import atr.util as util
# Release policy fields which this check relies on - used for result caching
INPUT_POLICY_KEYS: Final[list[str]] = []
INPUT_EXTRA_ARGS: Final[list[str]] = []
-CHECK_VERSION_INTEGRITY: Final[str] = "2"
CHECK_VERSION_STRUCTURE: Final[str] = "2"
-async def integrity(args: checks.FunctionArguments) -> results.Results | None:
- """Check that the zip archive is not corrupted and can be opened."""
- recorder = await args.recorder()
- if not (artifact_abs_path := await recorder.abs_path()):
- return None
-
- log.info(f"Checking zip integrity for {artifact_abs_path} (rel:
{args.primary_rel_path})")
-
- try:
- result_data = await asyncio.to_thread(_integrity_check_core_logic,
str(artifact_abs_path))
- if result_data.get("error"):
- await recorder.failure(result_data["error"], result_data)
- else:
- await recorder.success(
- f"Zip archive integrity OK
({util.plural(result_data['member_count'], 'member')})", result_data
- )
- except Exception as e:
- await recorder.failure("Error checking zip integrity", {"error":
str(e)})
-
- return None
-
-
async def structure(args: checks.FunctionArguments) -> results.Results | None:
"""Check that the zip archive has a single root directory matching the
artifact name."""
# TODO: We can merge this with the targz structure check
@@ -92,24 +67,6 @@ async def structure(args: checks.FunctionArguments) ->
results.Results | None:
return None
-def _integrity_check_core_logic(artifact_path: str) -> dict[str, Any]:
- """Verify that a zip file can be opened and its members listed."""
- try:
- with tarzip.open_archive(artifact_path) as archive:
- # This is a simple check using list members
- # We can use zf.testzip() for CRC checks if needed, though this
will be slower
- member_count = sum(1 for _ in archive)
- return {"member_count": member_count}
- except tarzip.ArchiveMemberLimitExceededError as e:
- return {"error": f"Archive has too many members: {e}"}
- except zipfile.BadZipFile as e:
- return {"error": f"Bad zip file: {e}"}
- except FileNotFoundError:
- return {"error": "File not found"}
- except Exception as e:
- return {"error": f"Unexpected error: {e}"}
-
-
def _structure_check_core_logic(cache_dir: pathlib.Path, artifact_path: str)
-> dict[str, Any]:
"""Verify the internal structure of the zip archive."""
# The ._ prefix is a metadata convention
diff --git a/playwright/test.py b/playwright/test.py
index d7e4db10..b8798b7f 100755
--- a/playwright/test.py
+++ b/playwright/test.py
@@ -733,14 +733,6 @@ def test_checks_06_targz(page: Page, credentials:
Credentials) -> None:
ensure_success_results_are_visible(page, "primary")
- logging.info("Verifying Targz Integrity status")
- integrity_row_locator =
page.locator("tr.atr-result-primary:has(th:has-text('Targz Integrity'))")
- expect(integrity_row_locator).to_be_visible()
- logging.info("Located Targz Integrity row")
- integrity_success_badge = integrity_row_locator.locator("td
span.badge.bg-success:text-is('Success')")
- expect(integrity_success_badge).to_be_visible()
- logging.info("Targz Integrity status verified as Success")
-
logging.info("Verifying Targz Structure status")
structure_row_locator =
page.locator("tr.atr-result-primary:has(th:has-text('Targz Structure'))")
expect(structure_row_locator).to_be_visible()
@@ -785,12 +777,12 @@ def test_checks_07_cache(page: Page, credentials:
Credentials) -> None:
ensure_success_results_are_visible(page, "primary")
- logging.info("Verifying Targz Integrity status exists")
- integrity_row_locator =
page.locator("tr.atr-result-primary:has(th:has-text('Targz Integrity'))")
- expect(integrity_row_locator).to_be_visible()
+ logging.info("Verifying Targz Structure status exists")
+ structure_row_locator =
page.locator("tr.atr-result-primary:has(th:has-text('Targz Structure'))")
+ expect(structure_row_locator).to_be_visible()
- logging.info("Verifying Targz Integrity result is from previous revision")
- check_link_locator = integrity_row_locator.locator("th:has-text('Targz
Integrity') a")
+ logging.info("Verifying Targz Structure result is from previous revision")
+ check_link_locator = structure_row_locator.locator("th:has-text('Targz
Structure') a")
check_link_url = (check_link_locator.get_attribute("href") or
"").replace(ATR_BASE_URL, "")
check_link_locator.click()
logging.info(f"Waiting for raw check result to load: {check_link_url}")
diff --git a/tests/unit/test_archive_member_limit.py
b/tests/unit/test_archive_member_limit.py
index dfa6dea0..89762ebc 100644
--- a/tests/unit/test_archive_member_limit.py
+++ b/tests/unit/test_archive_member_limit.py
@@ -23,10 +23,6 @@ import pytest
import atr.archives as archives
import atr.tarzip as tarzip
-import atr.tasks.checks as checks
-import atr.tasks.checks.targz as targz
-import atr.tasks.checks.zipformat as zipformat
-import tests.unit.recorders as recorders
def test_extract_wraps_member_limit(tmp_path, monkeypatch):
@@ -86,52 +82,6 @@ def test_open_archive_enforces_member_limit_zip(tmp_path):
list(archive)
[email protected]
-async def test_targz_integrity_reports_member_limit(tmp_path, monkeypatch):
- archive_path = tmp_path / "sample.tar"
- _make_tar(archive_path, ["a.txt", "b.txt", "c.txt"])
- recorder = recorders.RecorderStub(archive_path,
"tests.unit.test_archive_member_limit")
-
- original_open = tarzip.open_archive
-
- def limited_open(path: str, *args, **kwargs):
- return original_open(path, max_members=2)
-
- monkeypatch.setattr(tarzip, "open_archive", limited_open)
-
- args = await _args_for(recorder)
- await targz.integrity(args)
-
- assert any("too many members" in message.lower() for _, message, _ in
recorder.messages)
-
-
-def test_zipformat_integrity_reports_member_limit(tmp_path, monkeypatch):
- archive_path = tmp_path / "sample.zip"
- _make_zip(archive_path, ["a.txt", "b.txt", "c.txt"])
-
- original_open = tarzip.open_archive
-
- def limited_open(path: str, *args, **kwargs):
- return original_open(path, max_members=2)
-
- monkeypatch.setattr(tarzip, "open_archive", limited_open)
-
- result = zipformat._integrity_check_core_logic(str(archive_path))
- assert "too many members" in result.get("error", "").lower()
-
-
-async def _args_for(recorder: recorders.RecorderStub) ->
checks.FunctionArguments:
- return checks.FunctionArguments(
- recorder=recorders.get_recorder(recorder),
- asf_uid="",
- project_name="test",
- version_name="test",
- revision_number="00001",
- primary_rel_path=None,
- extra_args={},
- )
-
-
def _make_tar(path, members: list[str]) -> None:
with tarfile.open(path, "w") as tf:
for name in members:
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]