(tooling-trusted-releases) 02/02: Record file metadata change events in the database

sbp Tue, 17 Mar 2026 08:10:00 -0700

This is an automated email from the ASF dual-hosted git repository.

sbp pushed a commit to branch sbp
in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git


commit caf1fc3e36de9003324182b8191b4a3ca73b496d
Author: Sean B. Palmer <[email protected]>
AuthorDate: Tue Mar 17 15:08:11 2026 +0000

    Record file metadata change events in the database
---
 atr/attestable.py               | 107 +++++++++++++++++++++++++++++++++-------
 atr/post/upload.py              |   2 +-
 atr/storage/writers/release.py  |  10 ++++
 atr/storage/writers/revision.py |  16 +++++-
 4 files changed, 114 insertions(+), 21 deletions(-)

diff --git a/atr/attestable.py b/atr/attestable.py
index 03a08e16..b07bad25 100644
--- a/atr/attestable.py
+++ b/atr/attestable.py
@@ -30,6 +30,7 @@ import atr.hashes as hashes
 import atr.log as log
 import atr.models.attestable as models
 import atr.models.safe as safe
+import atr.models.sql as sql
 import atr.paths as paths
 import atr.util as util
 
@@ -52,6 +53,81 @@ def attestable_paths_path(
     return paths.get_attestable_dir() / str(project_name) / str(version_name) 
/ f"{revision_number!s}.paths.json"
 
 
+def can_write_file_state_rows(
+    previous: models.Attestable | None,
+    parent_name: str | None,
+) -> bool:
+    is_first_revision = (previous is None) and (parent_name is None)
+    is_v2_continuation = isinstance(previous, models.AttestableV2)
+    return is_first_revision or is_v2_continuation
+
+
+def compute_classifications(
+    path_to_hash: dict[str, str],
+    release_policy: dict[str, Any] | None,
+    base_path: pathlib.Path,
+) -> dict[str, str]:
+    policy = release_policy or {}
+    source_matcher, binary_matcher = classify.matchers_from_policy(
+        policy.get("source_artifact_paths", []),
+        policy.get("binary_artifact_paths", []),
+        base_path,
+    )
+    return {
+        path_key: classify.classify(pathlib.Path(path_key), base_path, 
source_matcher, binary_matcher).value
+        for path_key in path_to_hash
+    }
+
+
+def compute_file_state_rows(
+    release_name: str,
+    since_revision_seq: int,
+    path_to_hash: dict[str, str],
+    classifications: dict[str, str],
+    previous: models.Attestable | None,
+) -> list[sql.ReleaseFileState]:
+    prev_hashes: dict[str, str] = {}
+    prev_classifications: dict[str, str] = {}
+    if previous is not None:
+        prev_hashes = path_hashes(previous)
+        if isinstance(previous, models.AttestableV2):
+            prev_classifications = {path_key: entry.classification for 
path_key, entry in previous.paths.items()}
+
+    rows: list[sql.ReleaseFileState] = []
+
+    for path_key in sorted(path_to_hash):
+        content_hash = path_to_hash[path_key]
+        classification = classifications[path_key]
+        # If all prior metadata properties are the same, we skip recording an 
event
+        if (prev_hashes.get(path_key) == content_hash) and 
(prev_classifications.get(path_key) == classification):
+            continue
+        rows.append(
+            sql.ReleaseFileState(
+                release_name=release_name,
+                path=path_key,
+                since_revision_seq=since_revision_seq,
+                present=True,
+                content_hash=content_hash,
+                classification=classification,
+            )
+        )
+
+    for path_key in sorted(prev_hashes):
+        if path_key not in path_to_hash:
+            rows.append(
+                sql.ReleaseFileState(
+                    release_name=release_name,
+                    path=path_key,
+                    since_revision_seq=since_revision_seq,
+                    present=False,
+                    content_hash=None,
+                    classification=None,
+                )
+            )
+
+    return rows
+
+
 def github_tp_payload_path(
     project_name: safe.ProjectKey, version_name: safe.VersionKey, 
revision_number: safe.RevisionNumber
 ) -> pathlib.Path:
@@ -227,9 +303,17 @@ async def write_files_data(
     path_to_hash: dict[str, str],
     path_to_size: dict[str, int],
     base_path: pathlib.Path,
+    classifications: dict[str, str] | None = None,
 ) -> None:
     result = _generate_files_data(
-        path_to_hash, path_to_size, revision_number, release_policy, 
uploader_uid, previous, base_path
+        path_to_hash,
+        path_to_size,
+        revision_number,
+        release_policy,
+        uploader_uid,
+        previous,
+        base_path,
+        classifications=classifications,
     )
     file_path = attestable_path(project_name, version_name, revision_number)
     await util.atomic_write_file(file_path, result.model_dump_json(indent=2))
@@ -242,23 +326,6 @@ async def write_files_data(
             await 
f.write(models.AttestableChecksV2().model_dump_json(indent=2))
 
 
-def _compute_classifications(
-    path_to_hash: dict[str, str],
-    release_policy: dict[str, Any] | None,
-    base_path: pathlib.Path,
-) -> dict[str, str]:
-    policy = release_policy or {}
-    source_matcher, binary_matcher = classify.matchers_from_policy(
-        policy.get("source_artifact_paths", []),
-        policy.get("binary_artifact_paths", []),
-        base_path,
-    )
-    return {
-        path_key: classify.classify(pathlib.Path(path_key), base_path, 
source_matcher, binary_matcher).value
-        for path_key in path_to_hash
-    }
-
-
 def _compute_hashes_with_attribution(  # noqa: C901
     current_hash_to_paths: dict[str, set[str]],
     path_to_size: dict[str, int],
@@ -312,6 +379,7 @@ def _generate_files_data(
     uploader_uid: str,
     previous: models.Attestable | None,
     base_path: pathlib.Path,
+    classifications: dict[str, str] | None = None,
 ) -> models.AttestableV2:
     current_hash_to_paths: dict[str, set[str]] = {}
     for path_key, hash_ref in path_to_hash.items():
@@ -321,7 +389,8 @@ def _generate_files_data(
         current_hash_to_paths, path_to_size, previous, uploader_uid, 
revision_number
     )
 
-    classifications = _compute_classifications(path_to_hash, release_policy, 
base_path)
+    if classifications is None:
+        classifications = compute_classifications(path_to_hash, 
release_policy, base_path)
     return models.AttestableV2(
         hashes=dict(new_hashes),
         paths={
diff --git a/atr/post/upload.py b/atr/post/upload.py
index ba46aa2f..59b80880 100644
--- a/atr/post/upload.py
+++ b/atr/post/upload.py
@@ -112,7 +112,7 @@ async def finalise(
             version_name=str(version_name),
         )
     except Exception as e:
-        log.exception("Error finalising upload:")
+        log.exception(f"Error finalising upload: {e!r}")
         return _json_error(f"Error finalising upload: {e!s}", 500)
 
 
diff --git a/atr/storage/writers/release.py b/atr/storage/writers/release.py
index 2aee86fc..8120a1e3 100644
--- a/atr/storage/writers/release.py
+++ b/atr/storage/writers/release.py
@@ -127,6 +127,16 @@ class CommitteeParticipant(FoundationCommitter):
         log.debug(f"Deleted {util.plural(task_count, 'task')} for 
{project_name!s} {version!s}")
 
         release_name = release.name
+
+        # These deletes would also be performed by database cascade
+        # We do them here before the commit instead to be explicit
+        rfs_delete_stmt = sqlmodel.delete(sql.ReleaseFileState).where(
+            via(sql.ReleaseFileState.release_name) == release_name,
+        )
+        rfs_result = await self.__data.execute(rfs_delete_stmt)
+        rfs_count = rfs_result.rowcount if isinstance(rfs_result, 
engine.CursorResult) else 0
+        log.debug(f"Deleted {util.plural(rfs_count, 'file state row')} for 
{project_name!s} {version!s}")
+
         await self.__data.delete(release)
         log.info(f"Deleted release record: {project_name!s} {version!s}")
 
diff --git a/atr/storage/writers/revision.py b/atr/storage/writers/revision.py
index f55b4290..5fe9230a 100644
--- a/atr/storage/writers/revision.py
+++ b/atr/storage/writers/revision.py
@@ -192,19 +192,33 @@ async def _commit_new_revision(
     await asyncio.to_thread(util.chmod_directories, new_revision_dir, 0o555)
 
     policy = release.release_policy or release.project.release_policy
+    policy_dict = policy.model_dump() if policy else None
+
+    classifications = attestable.compute_classifications(path_to_hash, 
policy_dict, new_revision_dir)
 
     await attestable.write_files_data(
         project_name,
         version_name,
         new_revision.safe_number,
-        policy.model_dump() if policy else None,
+        policy_dict,
         asf_uid,
         previous_attestable,
         path_to_hash,
         path_to_size,
         new_revision_dir,
+        classifications=classifications,
     )
 
+    if attestable.can_write_file_state_rows(previous_attestable, 
new_revision.parent_name):
+        for row in attestable.compute_file_state_rows(
+            release_name,
+            new_revision.seq,
+            path_to_hash,
+            classifications,
+            previous_attestable,
+        ):
+            data.add(row)
+
     # Commit to end the transaction started by data.begin_immediate
     # We must commit the revision before starting the checks
     # This also releases the write lock obtained in _lock_and_merge


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

(tooling-trusted-releases) 02/02: Record file metadata change events in the database

Reply via email to