This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-release.git
The following commit(s) were added to refs/heads/main by this push:
new 6180535 Add a check for .sha256 and .sha512 files
6180535 is described below
commit 61805357dc14aeb472183f2e8155a0f5ea5511a5
Author: Sean B. Palmer <[email protected]>
AuthorDate: Tue Mar 25 17:05:11 2025 +0200
Add a check for .sha256 and .sha512 files
---
atr/tasks/__init__.py | 31 +++++++++++++++++++++
atr/tasks/hashing.py | 77 +++++++++++++++++++++++++++++++++++++++++++++++++++
atr/tasks/rsync.py | 3 ++
atr/worker.py | 2 ++
4 files changed, 113 insertions(+)
diff --git a/atr/tasks/__init__.py b/atr/tasks/__init__.py
index 441ace6..d861398 100644
--- a/atr/tasks/__init__.py
+++ b/atr/tasks/__init__.py
@@ -21,6 +21,7 @@ import aiofiles.os
import atr.db.models as models
import atr.tasks.archive as archive
+import atr.tasks.hashing as hashing
import atr.util as util
@@ -55,6 +56,36 @@ async def asc_checks(release: models.Release,
signature_path: str) -> list[model
return tasks
+async def sha_checks(release: models.Release, hash_file: str) ->
list[models.Task]:
+ tasks = []
+
+ full_hash_file_path = str(util.get_candidate_draft_dir() /
release.project.name / release.version / hash_file)
+ modified = int(await aiofiles.os.path.getmtime(full_hash_file_path))
+ algorithm = "sha512"
+ if hash_file.endswith(".sha512"):
+ original_file = full_hash_file_path.removesuffix(".sha512")
+ elif hash_file.endswith(".sha256"):
+ original_file = full_hash_file_path.removesuffix(".sha256")
+ algorithm = "sha256"
+ else:
+ raise RuntimeError(f"Unsupported hash file: {hash_file}")
+
+ tasks.append(
+ models.Task(
+ status=models.TaskStatus.QUEUED,
+ task_type="verify_file_hash",
+ task_args=hashing.Check(
+ original_file=original_file, hash_file=full_hash_file_path,
algorithm=algorithm
+ ).model_dump(),
+ release_name=release.name,
+ path=hash_file,
+ modified=modified,
+ ),
+ )
+
+ return tasks
+
+
async def tar_gz_checks(release: models.Release, path: str, signature_path:
str | None = None) -> list[models.Task]:
# TODO: We should probably use an enum for task_type
full_path = str(util.get_candidate_draft_dir() / release.project.name /
release.version / path)
diff --git a/atr/tasks/hashing.py b/atr/tasks/hashing.py
new file mode 100644
index 0000000..f445f33
--- /dev/null
+++ b/atr/tasks/hashing.py
@@ -0,0 +1,77 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+import hashlib
+import logging
+import secrets
+from typing import Any, Final
+
+import aiofiles
+import pydantic
+
+import atr.tasks.task as task
+from atr.db import models
+
+_LOGGER: Final = logging.getLogger(__name__)
+
+
+class Check(pydantic.BaseModel):
+ """Parameters for file hash checking."""
+
+ original_file: str = pydantic.Field(..., description="Path to the original
file")
+ hash_file: str = pydantic.Field(..., description="Path to the hash file")
+ algorithm: str = pydantic.Field(..., description="Hash algorithm to use")
+
+
+async def check(args: dict[str, Any]) -> tuple[models.TaskStatus, str | None,
tuple[Any, ...]]:
+ """Check the hash of a file."""
+ data = Check(**args)
+ task_results = task.results_as_tuple(await _check_core(data.original_file,
data.hash_file, data.algorithm))
+ _LOGGER.info(f"Verified {data.original_file} and computed size
{task_results[0]}")
+ return task.COMPLETED, None, task_results
+
+
+async def _check_core(
+ original_file: str, hash_file: str, algorithm: str
+) -> tuple[models.TaskStatus, str | None, tuple[Any, ...]]:
+ """Check the hash of a file."""
+ if algorithm == "sha256":
+ hash_func = hashlib.sha256
+ elif algorithm == "sha512":
+ hash_func = hashlib.sha512
+ else:
+ raise task.Error(f"Unsupported hash algorithm: {algorithm}")
+ h = hash_func()
+ async with aiofiles.open(original_file, mode="rb") as f:
+ while True:
+ chunk = await f.read(4096)
+ if not chunk:
+ break
+ h.update(chunk)
+ computed_hash = h.hexdigest()
+ async with aiofiles.open(hash_file) as f:
+ expected_hash = await f.read()
+ # May be in the format "HASH FILENAME\n"
+ expected_hash = expected_hash.strip().split()[0]
+ if secrets.compare_digest(computed_hash, expected_hash):
+ return task.COMPLETED, None, ({"computed_hash": computed_hash,
"expected_hash": expected_hash},)
+ else:
+ return (
+ task.FAILED,
+ f"Hash mismatch for {original_file}",
+ ({"computed_hash": computed_hash, "expected_hash":
expected_hash},),
+ )
diff --git a/atr/tasks/rsync.py b/atr/tasks/rsync.py
index e30701f..764eb55 100644
--- a/atr/tasks/rsync.py
+++ b/atr/tasks/rsync.py
@@ -64,6 +64,9 @@ async def _analyse_core(asf_uid: str, project_name: str,
release_version: str) -
if path.name.endswith(".asc"):
for task in await tasks.asc_checks(release, str(path)):
data.add(task)
+ elif path.name.endswith(".sha256") or
path.name.endswith(".sha512"):
+ for task in await tasks.sha_checks(release, str(path)):
+ data.add(task)
elif path.name.endswith(".tar.gz"):
for task in await tasks.tar_gz_checks(release, str(path)):
data.add(task)
diff --git a/atr/worker.py b/atr/worker.py
index 968051e..a1d9911 100644
--- a/atr/worker.py
+++ b/atr/worker.py
@@ -38,6 +38,7 @@ import sqlmodel
import atr.db as db
import atr.tasks.archive as archive
import atr.tasks.bulk as bulk
+import atr.tasks.hashing as hashing
import atr.tasks.license as license
import atr.tasks.mailtest as mailtest
import atr.tasks.rat as rat
@@ -186,6 +187,7 @@ async def _task_process(task_id: int, task_type: str,
task_args: list[str] | dic
"verify_archive_integrity": archive.check_integrity,
"package_bulk_download": bulk.download,
"rsync_analyse": rsync.analyse,
+ "verify_file_hash": hashing.check,
}
# TODO: These are synchronous
# We plan to convert these to async dict handlers
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]