This is an automated email from the ASF dual-hosted git repository. sbp pushed a commit to branch sbp in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
commit caf1fc3e36de9003324182b8191b4a3ca73b496d Author: Sean B. Palmer <[email protected]> AuthorDate: Tue Mar 17 15:08:11 2026 +0000 Record file metadata change events in the database --- atr/attestable.py | 107 +++++++++++++++++++++++++++++++++------- atr/post/upload.py | 2 +- atr/storage/writers/release.py | 10 ++++ atr/storage/writers/revision.py | 16 +++++- 4 files changed, 114 insertions(+), 21 deletions(-) diff --git a/atr/attestable.py b/atr/attestable.py index 03a08e16..b07bad25 100644 --- a/atr/attestable.py +++ b/atr/attestable.py @@ -30,6 +30,7 @@ import atr.hashes as hashes import atr.log as log import atr.models.attestable as models import atr.models.safe as safe +import atr.models.sql as sql import atr.paths as paths import atr.util as util @@ -52,6 +53,81 @@ def attestable_paths_path( return paths.get_attestable_dir() / str(project_name) / str(version_name) / f"{revision_number!s}.paths.json" +def can_write_file_state_rows( + previous: models.Attestable | None, + parent_name: str | None, +) -> bool: + is_first_revision = (previous is None) and (parent_name is None) + is_v2_continuation = isinstance(previous, models.AttestableV2) + return is_first_revision or is_v2_continuation + + +def compute_classifications( + path_to_hash: dict[str, str], + release_policy: dict[str, Any] | None, + base_path: pathlib.Path, +) -> dict[str, str]: + policy = release_policy or {} + source_matcher, binary_matcher = classify.matchers_from_policy( + policy.get("source_artifact_paths", []), + policy.get("binary_artifact_paths", []), + base_path, + ) + return { + path_key: classify.classify(pathlib.Path(path_key), base_path, source_matcher, binary_matcher).value + for path_key in path_to_hash + } + + +def compute_file_state_rows( + release_name: str, + since_revision_seq: int, + path_to_hash: dict[str, str], + classifications: dict[str, str], + previous: models.Attestable | None, +) -> list[sql.ReleaseFileState]: + prev_hashes: dict[str, str] = {} + prev_classifications: dict[str, str] = {} + if previous is not None: + prev_hashes = path_hashes(previous) + if isinstance(previous, models.AttestableV2): + prev_classifications = {path_key: entry.classification for path_key, entry in previous.paths.items()} + + rows: list[sql.ReleaseFileState] = [] + + for path_key in sorted(path_to_hash): + content_hash = path_to_hash[path_key] + classification = classifications[path_key] + # If all prior metadata properties are the same, we skip recording an event + if (prev_hashes.get(path_key) == content_hash) and (prev_classifications.get(path_key) == classification): + continue + rows.append( + sql.ReleaseFileState( + release_name=release_name, + path=path_key, + since_revision_seq=since_revision_seq, + present=True, + content_hash=content_hash, + classification=classification, + ) + ) + + for path_key in sorted(prev_hashes): + if path_key not in path_to_hash: + rows.append( + sql.ReleaseFileState( + release_name=release_name, + path=path_key, + since_revision_seq=since_revision_seq, + present=False, + content_hash=None, + classification=None, + ) + ) + + return rows + + def github_tp_payload_path( project_name: safe.ProjectKey, version_name: safe.VersionKey, revision_number: safe.RevisionNumber ) -> pathlib.Path: @@ -227,9 +303,17 @@ async def write_files_data( path_to_hash: dict[str, str], path_to_size: dict[str, int], base_path: pathlib.Path, + classifications: dict[str, str] | None = None, ) -> None: result = _generate_files_data( - path_to_hash, path_to_size, revision_number, release_policy, uploader_uid, previous, base_path + path_to_hash, + path_to_size, + revision_number, + release_policy, + uploader_uid, + previous, + base_path, + classifications=classifications, ) file_path = attestable_path(project_name, version_name, revision_number) await util.atomic_write_file(file_path, result.model_dump_json(indent=2)) @@ -242,23 +326,6 @@ async def write_files_data( await f.write(models.AttestableChecksV2().model_dump_json(indent=2)) -def _compute_classifications( - path_to_hash: dict[str, str], - release_policy: dict[str, Any] | None, - base_path: pathlib.Path, -) -> dict[str, str]: - policy = release_policy or {} - source_matcher, binary_matcher = classify.matchers_from_policy( - policy.get("source_artifact_paths", []), - policy.get("binary_artifact_paths", []), - base_path, - ) - return { - path_key: classify.classify(pathlib.Path(path_key), base_path, source_matcher, binary_matcher).value - for path_key in path_to_hash - } - - def _compute_hashes_with_attribution( # noqa: C901 current_hash_to_paths: dict[str, set[str]], path_to_size: dict[str, int], @@ -312,6 +379,7 @@ def _generate_files_data( uploader_uid: str, previous: models.Attestable | None, base_path: pathlib.Path, + classifications: dict[str, str] | None = None, ) -> models.AttestableV2: current_hash_to_paths: dict[str, set[str]] = {} for path_key, hash_ref in path_to_hash.items(): @@ -321,7 +389,8 @@ def _generate_files_data( current_hash_to_paths, path_to_size, previous, uploader_uid, revision_number ) - classifications = _compute_classifications(path_to_hash, release_policy, base_path) + if classifications is None: + classifications = compute_classifications(path_to_hash, release_policy, base_path) return models.AttestableV2( hashes=dict(new_hashes), paths={ diff --git a/atr/post/upload.py b/atr/post/upload.py index ba46aa2f..59b80880 100644 --- a/atr/post/upload.py +++ b/atr/post/upload.py @@ -112,7 +112,7 @@ async def finalise( version_name=str(version_name), ) except Exception as e: - log.exception("Error finalising upload:") + log.exception(f"Error finalising upload: {e!r}") return _json_error(f"Error finalising upload: {e!s}", 500) diff --git a/atr/storage/writers/release.py b/atr/storage/writers/release.py index 2aee86fc..8120a1e3 100644 --- a/atr/storage/writers/release.py +++ b/atr/storage/writers/release.py @@ -127,6 +127,16 @@ class CommitteeParticipant(FoundationCommitter): log.debug(f"Deleted {util.plural(task_count, 'task')} for {project_name!s} {version!s}") release_name = release.name + + # These deletes would also be performed by database cascade + # We do them here before the commit instead to be explicit + rfs_delete_stmt = sqlmodel.delete(sql.ReleaseFileState).where( + via(sql.ReleaseFileState.release_name) == release_name, + ) + rfs_result = await self.__data.execute(rfs_delete_stmt) + rfs_count = rfs_result.rowcount if isinstance(rfs_result, engine.CursorResult) else 0 + log.debug(f"Deleted {util.plural(rfs_count, 'file state row')} for {project_name!s} {version!s}") + await self.__data.delete(release) log.info(f"Deleted release record: {project_name!s} {version!s}") diff --git a/atr/storage/writers/revision.py b/atr/storage/writers/revision.py index f55b4290..5fe9230a 100644 --- a/atr/storage/writers/revision.py +++ b/atr/storage/writers/revision.py @@ -192,19 +192,33 @@ async def _commit_new_revision( await asyncio.to_thread(util.chmod_directories, new_revision_dir, 0o555) policy = release.release_policy or release.project.release_policy + policy_dict = policy.model_dump() if policy else None + + classifications = attestable.compute_classifications(path_to_hash, policy_dict, new_revision_dir) await attestable.write_files_data( project_name, version_name, new_revision.safe_number, - policy.model_dump() if policy else None, + policy_dict, asf_uid, previous_attestable, path_to_hash, path_to_size, new_revision_dir, + classifications=classifications, ) + if attestable.can_write_file_state_rows(previous_attestable, new_revision.parent_name): + for row in attestable.compute_file_state_rows( + release_name, + new_revision.seq, + path_to_hash, + classifications, + previous_attestable, + ): + data.add(row) + # Commit to end the transaction started by data.begin_immediate # We must commit the revision before starting the checks # This also releases the write lock obtained in _lock_and_merge --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
