This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


The following commit(s) were added to refs/heads/main by this push:
     new 81a5678  Record some attestable file metadata
81a5678 is described below

commit 81a5678539e41fdb002de278de4ba9a9f1e1dfa7
Author: Sean B. Palmer <[email protected]>
AuthorDate: Tue Dec 30 19:41:07 2025 +0000

    Record some attestable file metadata
---
 atr/attestable.py               | 148 ++++++++++++++++++++++++++++++++++++++++
 atr/config.py                   |   2 +
 atr/models/attestable.py        |  33 +++++++++
 atr/storage/writers/revision.py |   6 ++
 atr/util.py                     |  41 +++++------
 5 files changed, 210 insertions(+), 20 deletions(-)

diff --git a/atr/attestable.py b/atr/attestable.py
new file mode 100644
index 0000000..01ed8ed
--- /dev/null
+++ b/atr/attestable.py
@@ -0,0 +1,148 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from __future__ import annotations
+
+import json
+from typing import TYPE_CHECKING, Final
+
+import aiofiles
+import aiofiles.os
+import blake3
+import pydantic
+
+import atr.log as log
+import atr.models.attestable as models
+import atr.util as util
+
+if TYPE_CHECKING:
+    import pathlib
+
+_HASH_CHUNK_SIZE: Final[int] = 4 * 1024 * 1024
+
+
+def attestable_path(project_name: str, version_name: str, revision_number: 
str) -> pathlib.Path:
+    return util.get_attestable_dir() / project_name / version_name / 
f"{revision_number}.json"
+
+
+async def compute_file_hash(path: pathlib.Path) -> str:
+    hasher = blake3.blake3()
+    async with aiofiles.open(path, "rb") as f:
+        while chunk := await f.read(_HASH_CHUNK_SIZE):
+            hasher.update(chunk)
+    return f"blake3:{hasher.hexdigest()}"
+
+
+async def load(
+    project_name: str,
+    version_name: str,
+    revision_number: str,
+) -> models.AttestableV1 | None:
+    file_path = attestable_path(project_name, version_name, revision_number)
+    if not await aiofiles.os.path.isfile(file_path):
+        return None
+    try:
+        async with aiofiles.open(file_path, encoding="utf-8") as f:
+            data = json.loads(await f.read())
+        return models.AttestableV1.model_validate(data)
+    except (json.JSONDecodeError, pydantic.ValidationError) as e:
+        log.warning(f"Could not parse {file_path}, starting fresh: {e}")
+        return None
+
+
+async def write(
+    release_directory: pathlib.Path,
+    project_name: str,
+    version_name: str,
+    revision_number: str,
+    uploader_uid: str,
+    parent_revision_number: str | None,
+) -> None:
+    previous: models.AttestableV1 | None = None
+    if parent_revision_number is not None:
+        previous = await load(project_name, version_name, 
parent_revision_number)
+    result = await _generate(release_directory, revision_number, uploader_uid, 
previous)
+    file_path = attestable_path(project_name, version_name, revision_number)
+    await util.atomic_write_file(file_path, result.model_dump_json(indent=2))
+
+
+def _compute_hashes_with_attribution(
+    current_hash_to_paths: dict[str, set[str]],
+    path_to_size: dict[str, int],
+    previous: models.AttestableV1 | None,
+    uploader_uid: str,
+    revision_number: str,
+) -> dict[str, models.HashEntry]:
+    previous_hash_to_paths: dict[str, set[str]] = {}
+    if previous is not None:
+        for path_key, hash_ref in previous.paths.items():
+            previous_hash_to_paths.setdefault(hash_ref, set()).add(path_key)
+
+    new_hashes: dict[str, models.HashEntry] = {}
+    if previous is not None:
+        for hash_key, hash_entry in previous.hashes.items():
+            new_hashes[hash_key] = hash_entry.model_copy(deep=True)
+
+    for hash_ref, current_paths in current_hash_to_paths.items():
+        previous_paths = previous_hash_to_paths.get(hash_ref, set())
+        sample_path = next(iter(current_paths))
+        file_size = path_to_size[sample_path]
+
+        if hash_ref not in new_hashes:
+            new_hashes[hash_ref] = models.HashEntry(
+                size=file_size,
+                uploaders=[(uploader_uid, revision_number)],
+            )
+        elif len(current_paths) > len(previous_paths):
+            existing_entries = set(new_hashes[hash_ref].uploaders)
+            if (uploader_uid, revision_number) not in existing_entries:
+                new_hashes[hash_ref].uploaders.append((uploader_uid, 
revision_number))
+
+    return new_hashes
+
+
+async def _generate(
+    directory: pathlib.Path,
+    revision_number: str,
+    uploader_uid: str,
+    previous: models.AttestableV1 | None,
+) -> models.AttestableV1:
+    current_path_to_hash: dict[str, str] = {}
+    current_hash_to_paths: dict[str, set[str]] = {}
+    path_to_size: dict[str, int] = {}
+
+    async for rel_path in util.paths_recursive(directory):
+        full_path = directory / rel_path
+        path_key = str(rel_path)
+        if "\\" in path_key:
+            # TODO: We should centralise this, and forbid some other 
characters too
+            raise ValueError(f"Backslash in path is forbidden: {path_key}")
+        hash_ref = await compute_file_hash(full_path)
+        file_size = (await aiofiles.os.stat(full_path)).st_size
+
+        current_path_to_hash[path_key] = hash_ref
+        path_to_size[path_key] = file_size
+        current_hash_to_paths.setdefault(hash_ref, set()).add(path_key)
+
+    new_hashes = _compute_hashes_with_attribution(
+        current_hash_to_paths, path_to_size, previous, uploader_uid, 
revision_number
+    )
+
+    return models.AttestableV1(
+        paths=dict(current_path_to_hash),
+        hashes=dict(new_hashes),
+    )
diff --git a/atr/config.py b/atr/config.py
index a62470a..77d7d32 100644
--- a/atr/config.py
+++ b/atr/config.py
@@ -70,6 +70,7 @@ class AppConfig:
     # TODO: By convention this is at /x1/, but we can symlink it here perhaps?
     # TODO: We need to get Puppet to check SVN out initially, or do it manually
     SVN_STORAGE_DIR = os.path.join(STATE_DIR, "svn")
+    ATTESTABLE_STORAGE_DIR = os.path.join(STATE_DIR, "attestable")
     SQLITE_DB_PATH = decouple.config("SQLITE_DB_PATH", default="atr.db")
     STORAGE_AUDIT_LOG_FILE = os.path.join(STATE_DIR, "storage-audit.log")
     PERFORMANCE_LOG_FILE = os.path.join(STATE_DIR, "route-performance.log")
@@ -150,6 +151,7 @@ def get() -> type[AppConfig]:
         (config.FINISHED_STORAGE_DIR, "FINISHED_STORAGE_DIR"),
         (config.UNFINISHED_STORAGE_DIR, "UNFINISHED_STORAGE_DIR"),
         (config.SVN_STORAGE_DIR, "SVN_STORAGE_DIR"),
+        (config.ATTESTABLE_STORAGE_DIR, "ATTESTABLE_STORAGE_DIR"),
         (config.STORAGE_AUDIT_LOG_FILE, "STORAGE_AUDIT_LOG_FILE"),
         (config.PERFORMANCE_LOG_FILE, "PERFORMANCE_LOG_FILE"),
     ]
diff --git a/atr/models/attestable.py b/atr/models/attestable.py
new file mode 100644
index 0000000..f49cc1f
--- /dev/null
+++ b/atr/models/attestable.py
@@ -0,0 +1,33 @@
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+
+from typing import Annotated, Literal
+
+import pydantic
+
+import atr.models.schema as schema
+
+
+class HashEntry(schema.Strict):
+    size: int
+    uploaders: list[Annotated[tuple[str, str], 
pydantic.BeforeValidator(tuple)]]
+
+
+class AttestableV1(schema.Strict):
+    version: Literal[1] = 1
+    paths: dict[str, str] = schema.factory(dict)
+    hashes: dict[str, HashEntry] = schema.factory(dict)
diff --git a/atr/storage/writers/revision.py b/atr/storage/writers/revision.py
index 24e681a..ddc89e5 100644
--- a/atr/storage/writers/revision.py
+++ b/atr/storage/writers/revision.py
@@ -29,6 +29,7 @@ from typing import TYPE_CHECKING
 import aiofiles.os
 import aioshutil
 
+import atr.attestable as attestable
 import atr.db as db
 import atr.db.interaction as interaction
 import atr.detection as detection
@@ -196,6 +197,11 @@ class CommitteeParticipant(FoundationCommitter):
                 await aioshutil.rmtree(temp_dir)
                 raise
 
+            parent_revision_number = old_revision.number if old_revision else 
None
+            await attestable.write(
+                new_revision_dir, project_name, version_name, 
new_revision.number, asf_uid, parent_revision_number
+            )
+
             # Commit to end the transaction started by data.begin_immediate
             # We must commit the revision before starting the checks
             # This also releases the write lock
diff --git a/atr/util.py b/atr/util.py
index c3281f6..c3bc707 100644
--- a/atr/util.py
+++ b/atr/util.py
@@ -183,6 +183,22 @@ async def async_temporary_directory(
             log.exception(f"Failed to remove temporary directory 
{temp_dir_path}")
 
 
+async def atomic_write_file(file_path: pathlib.Path, content: str, encoding: 
str = "utf-8") -> None:
+    """Atomically write content to a file using a temporary file."""
+    await aiofiles.os.makedirs(file_path.parent, exist_ok=True)
+    temp_path = file_path.parent / f".{file_path.name}.{uuid.uuid4()}.tmp"
+    try:
+        async with aiofiles.open(temp_path, "w", encoding=encoding) as f:
+            await f.write(content)
+            await f.flush()
+            await asyncio.to_thread(os.fsync, f.fileno())
+        await aiofiles.os.rename(temp_path, file_path)
+    except Exception:
+        with contextlib.suppress(FileNotFoundError):
+            await aiofiles.os.remove(temp_path)
+        raise
+
+
 def chmod_directories(path: pathlib.Path, permissions: int = 0o755) -> None:
     # codeql[py/overly-permissive-file]
     os.chmod(path, permissions)
@@ -435,6 +451,10 @@ async def get_asf_id_or_die() -> str:
     return web_session.uid
 
 
+def get_attestable_dir() -> pathlib.Path:
+    return pathlib.Path(config.get().ATTESTABLE_STORAGE_DIR)
+
+
 def get_downloads_dir() -> pathlib.Path:
     return pathlib.Path(config.get().DOWNLOADS_STORAGE_DIR)
 
@@ -818,26 +838,7 @@ async def session_cache_read() -> dict[str, dict]:
 
 async def session_cache_write(cache_data: dict[str, dict]) -> None:
     cache_path = pathlib.Path(config.get().STATE_DIR) / 
"user_session_cache.json"
-
-    cache_dir = cache_path.parent
-    await asyncio.to_thread(os.makedirs, cache_dir, exist_ok=True)
-
-    # Use the same pattern as update_atomic_symlink for the temporary file name
-    temp_path = cache_dir / f".{cache_path.name}.{uuid.uuid4()}.tmp"
-
-    try:
-        async with aiofiles.open(temp_path, "w") as f:
-            await f.write(json.dumps(cache_data, indent=2))
-            await f.flush()
-            await asyncio.to_thread(os.fsync, f.fileno())
-
-        await aiofiles.os.rename(temp_path, cache_path)
-    except Exception:
-        try:
-            await aiofiles.os.remove(temp_path)
-        except FileNotFoundError:
-            pass
-        raise
+    await atomic_write_file(cache_path, json.dumps(cache_data, indent=2))
 
 
 def static_path(*args: str) -> str:


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to