This is an automated email from the ASF dual-hosted git repository.
sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
The following commit(s) were added to refs/heads/main by this push:
new 81a5678 Record some attestable file metadata
81a5678 is described below
commit 81a5678539e41fdb002de278de4ba9a9f1e1dfa7
Author: Sean B. Palmer <[email protected]>
AuthorDate: Tue Dec 30 19:41:07 2025 +0000
Record some attestable file metadata
---
atr/attestable.py | 148 ++++++++++++++++++++++++++++++++++++++++
atr/config.py | 2 +
atr/models/attestable.py | 33 +++++++++
atr/storage/writers/revision.py | 6 ++
atr/util.py | 41 +++++------
5 files changed, 210 insertions(+), 20 deletions(-)
diff --git a/atr/attestable.py b/atr/attestable.py
new file mode 100644
index 0000000..01ed8ed
--- /dev/null
+++ b/atr/attestable.py
@@ -0,0 +1,148 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import json
+from typing import TYPE_CHECKING, Final
+
+import aiofiles
+import aiofiles.os
+import blake3
+import pydantic
+
+import atr.log as log
+import atr.models.attestable as models
+import atr.util as util
+
+if TYPE_CHECKING:
+ import pathlib
+
+_HASH_CHUNK_SIZE: Final[int] = 4 * 1024 * 1024
+
+
+def attestable_path(project_name: str, version_name: str, revision_number:
str) -> pathlib.Path:
+ return util.get_attestable_dir() / project_name / version_name /
f"{revision_number}.json"
+
+
+async def compute_file_hash(path: pathlib.Path) -> str:
+ hasher = blake3.blake3()
+ async with aiofiles.open(path, "rb") as f:
+ while chunk := await f.read(_HASH_CHUNK_SIZE):
+ hasher.update(chunk)
+ return f"blake3:{hasher.hexdigest()}"
+
+
+async def load(
+ project_name: str,
+ version_name: str,
+ revision_number: str,
+) -> models.AttestableV1 | None:
+ file_path = attestable_path(project_name, version_name, revision_number)
+ if not await aiofiles.os.path.isfile(file_path):
+ return None
+ try:
+ async with aiofiles.open(file_path, encoding="utf-8") as f:
+ data = json.loads(await f.read())
+ return models.AttestableV1.model_validate(data)
+ except (json.JSONDecodeError, pydantic.ValidationError) as e:
+ log.warning(f"Could not parse {file_path}, starting fresh: {e}")
+ return None
+
+
+async def write(
+ release_directory: pathlib.Path,
+ project_name: str,
+ version_name: str,
+ revision_number: str,
+ uploader_uid: str,
+ parent_revision_number: str | None,
+) -> None:
+ previous: models.AttestableV1 | None = None
+ if parent_revision_number is not None:
+ previous = await load(project_name, version_name,
parent_revision_number)
+ result = await _generate(release_directory, revision_number, uploader_uid,
previous)
+ file_path = attestable_path(project_name, version_name, revision_number)
+ await util.atomic_write_file(file_path, result.model_dump_json(indent=2))
+
+
+def _compute_hashes_with_attribution(
+ current_hash_to_paths: dict[str, set[str]],
+ path_to_size: dict[str, int],
+ previous: models.AttestableV1 | None,
+ uploader_uid: str,
+ revision_number: str,
+) -> dict[str, models.HashEntry]:
+ previous_hash_to_paths: dict[str, set[str]] = {}
+ if previous is not None:
+ for path_key, hash_ref in previous.paths.items():
+ previous_hash_to_paths.setdefault(hash_ref, set()).add(path_key)
+
+ new_hashes: dict[str, models.HashEntry] = {}
+ if previous is not None:
+ for hash_key, hash_entry in previous.hashes.items():
+ new_hashes[hash_key] = hash_entry.model_copy(deep=True)
+
+ for hash_ref, current_paths in current_hash_to_paths.items():
+ previous_paths = previous_hash_to_paths.get(hash_ref, set())
+ sample_path = next(iter(current_paths))
+ file_size = path_to_size[sample_path]
+
+ if hash_ref not in new_hashes:
+ new_hashes[hash_ref] = models.HashEntry(
+ size=file_size,
+ uploaders=[(uploader_uid, revision_number)],
+ )
+ elif len(current_paths) > len(previous_paths):
+ existing_entries = set(new_hashes[hash_ref].uploaders)
+ if (uploader_uid, revision_number) not in existing_entries:
+ new_hashes[hash_ref].uploaders.append((uploader_uid,
revision_number))
+
+ return new_hashes
+
+
+async def _generate(
+ directory: pathlib.Path,
+ revision_number: str,
+ uploader_uid: str,
+ previous: models.AttestableV1 | None,
+) -> models.AttestableV1:
+ current_path_to_hash: dict[str, str] = {}
+ current_hash_to_paths: dict[str, set[str]] = {}
+ path_to_size: dict[str, int] = {}
+
+ async for rel_path in util.paths_recursive(directory):
+ full_path = directory / rel_path
+ path_key = str(rel_path)
+ if "\\" in path_key:
+ # TODO: We should centralise this, and forbid some other
characters too
+ raise ValueError(f"Backslash in path is forbidden: {path_key}")
+ hash_ref = await compute_file_hash(full_path)
+ file_size = (await aiofiles.os.stat(full_path)).st_size
+
+ current_path_to_hash[path_key] = hash_ref
+ path_to_size[path_key] = file_size
+ current_hash_to_paths.setdefault(hash_ref, set()).add(path_key)
+
+ new_hashes = _compute_hashes_with_attribution(
+ current_hash_to_paths, path_to_size, previous, uploader_uid,
revision_number
+ )
+
+ return models.AttestableV1(
+ paths=dict(current_path_to_hash),
+ hashes=dict(new_hashes),
+ )
diff --git a/atr/config.py b/atr/config.py
index a62470a..77d7d32 100644
--- a/atr/config.py
+++ b/atr/config.py
@@ -70,6 +70,7 @@ class AppConfig:
# TODO: By convention this is at /x1/, but we can symlink it here perhaps?
# TODO: We need to get Puppet to check SVN out initially, or do it manually
SVN_STORAGE_DIR = os.path.join(STATE_DIR, "svn")
+ ATTESTABLE_STORAGE_DIR = os.path.join(STATE_DIR, "attestable")
SQLITE_DB_PATH = decouple.config("SQLITE_DB_PATH", default="atr.db")
STORAGE_AUDIT_LOG_FILE = os.path.join(STATE_DIR, "storage-audit.log")
PERFORMANCE_LOG_FILE = os.path.join(STATE_DIR, "route-performance.log")
@@ -150,6 +151,7 @@ def get() -> type[AppConfig]:
(config.FINISHED_STORAGE_DIR, "FINISHED_STORAGE_DIR"),
(config.UNFINISHED_STORAGE_DIR, "UNFINISHED_STORAGE_DIR"),
(config.SVN_STORAGE_DIR, "SVN_STORAGE_DIR"),
+ (config.ATTESTABLE_STORAGE_DIR, "ATTESTABLE_STORAGE_DIR"),
(config.STORAGE_AUDIT_LOG_FILE, "STORAGE_AUDIT_LOG_FILE"),
(config.PERFORMANCE_LOG_FILE, "PERFORMANCE_LOG_FILE"),
]
diff --git a/atr/models/attestable.py b/atr/models/attestable.py
new file mode 100644
index 0000000..f49cc1f
--- /dev/null
+++ b/atr/models/attestable.py
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Annotated, Literal
+
+import pydantic
+
+import atr.models.schema as schema
+
+
+class HashEntry(schema.Strict):
+ size: int
+ uploaders: list[Annotated[tuple[str, str],
pydantic.BeforeValidator(tuple)]]
+
+
+class AttestableV1(schema.Strict):
+ version: Literal[1] = 1
+ paths: dict[str, str] = schema.factory(dict)
+ hashes: dict[str, HashEntry] = schema.factory(dict)
diff --git a/atr/storage/writers/revision.py b/atr/storage/writers/revision.py
index 24e681a..ddc89e5 100644
--- a/atr/storage/writers/revision.py
+++ b/atr/storage/writers/revision.py
@@ -29,6 +29,7 @@ from typing import TYPE_CHECKING
import aiofiles.os
import aioshutil
+import atr.attestable as attestable
import atr.db as db
import atr.db.interaction as interaction
import atr.detection as detection
@@ -196,6 +197,11 @@ class CommitteeParticipant(FoundationCommitter):
await aioshutil.rmtree(temp_dir)
raise
+ parent_revision_number = old_revision.number if old_revision else
None
+ await attestable.write(
+ new_revision_dir, project_name, version_name,
new_revision.number, asf_uid, parent_revision_number
+ )
+
# Commit to end the transaction started by data.begin_immediate
# We must commit the revision before starting the checks
# This also releases the write lock
diff --git a/atr/util.py b/atr/util.py
index c3281f6..c3bc707 100644
--- a/atr/util.py
+++ b/atr/util.py
@@ -183,6 +183,22 @@ async def async_temporary_directory(
log.exception(f"Failed to remove temporary directory
{temp_dir_path}")
+async def atomic_write_file(file_path: pathlib.Path, content: str, encoding:
str = "utf-8") -> None:
+ """Atomically write content to a file using a temporary file."""
+ await aiofiles.os.makedirs(file_path.parent, exist_ok=True)
+ temp_path = file_path.parent / f".{file_path.name}.{uuid.uuid4()}.tmp"
+ try:
+ async with aiofiles.open(temp_path, "w", encoding=encoding) as f:
+ await f.write(content)
+ await f.flush()
+ await asyncio.to_thread(os.fsync, f.fileno())
+ await aiofiles.os.rename(temp_path, file_path)
+ except Exception:
+ with contextlib.suppress(FileNotFoundError):
+ await aiofiles.os.remove(temp_path)
+ raise
+
+
def chmod_directories(path: pathlib.Path, permissions: int = 0o755) -> None:
# codeql[py/overly-permissive-file]
os.chmod(path, permissions)
@@ -435,6 +451,10 @@ async def get_asf_id_or_die() -> str:
return web_session.uid
+def get_attestable_dir() -> pathlib.Path:
+ return pathlib.Path(config.get().ATTESTABLE_STORAGE_DIR)
+
+
def get_downloads_dir() -> pathlib.Path:
return pathlib.Path(config.get().DOWNLOADS_STORAGE_DIR)
@@ -818,26 +838,7 @@ async def session_cache_read() -> dict[str, dict]:
async def session_cache_write(cache_data: dict[str, dict]) -> None:
cache_path = pathlib.Path(config.get().STATE_DIR) /
"user_session_cache.json"
-
- cache_dir = cache_path.parent
- await asyncio.to_thread(os.makedirs, cache_dir, exist_ok=True)
-
- # Use the same pattern as update_atomic_symlink for the temporary file name
- temp_path = cache_dir / f".{cache_path.name}.{uuid.uuid4()}.tmp"
-
- try:
- async with aiofiles.open(temp_path, "w") as f:
- await f.write(json.dumps(cache_data, indent=2))
- await f.flush()
- await asyncio.to_thread(os.fsync, f.fileno())
-
- await aiofiles.os.rename(temp_path, cache_path)
- except Exception:
- try:
- await aiofiles.os.remove(temp_path)
- except FileNotFoundError:
- pass
- raise
+ await atomic_write_file(cache_path, json.dumps(cache_data, indent=2))
def static_path(*args: str) -> str:
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]