This is an automated email from the ASF dual-hosted git repository. sbp pushed a commit to branch sbp in repository https://gitbox.apache.org/repos/asf/tooling-trusted-releases.git
commit 472d371ea04117f328bfc9612dac2069ebbe351d Author: Sean B. Palmer <[email protected]> AuthorDate: Tue Mar 17 19:18:11 2026 +0000 Read hashes either from the database or by recomputing from disk --- atr/db/__init__.py | 52 +++++++++++++++++++++++++ atr/merge.py | 77 ++++++++++++++++++++++++-------------- atr/storage/writers/revision.py | 3 +- atr/tasks/checks/__init__.py | 24 +++++++++--- tests/unit/test_create_revision.py | 3 +- tests/unit/test_merge.py | 59 ++++++++++++++++++----------- 6 files changed, 159 insertions(+), 59 deletions(-) diff --git a/atr/db/__init__.py b/atr/db/__init__.py index 41f68796..ebc47462 100644 --- a/atr/db/__init__.py +++ b/atr/db/__init__.py @@ -569,6 +569,58 @@ class Session(sqlalchemy.ext.asyncio.AsyncSession): return Query(self, query) + async def release_file_hash_at( + self, + release_key: str, + path: str, + at_revision_seq: int, + ) -> str | None: + via = sql.validate_instrumented_attribute + query = ( + sqlmodel.select(sql.ReleaseFileState) + .where( + sql.ReleaseFileState.release_key == release_key, + sql.ReleaseFileState.path == path, + via(sql.ReleaseFileState.since_revision_seq) <= at_revision_seq, + ) + .order_by(via(sql.ReleaseFileState.since_revision_seq).desc()) + .limit(1) + ) + result = await self.execute(query) + row = result.scalar_one_or_none() + if (row is None) or (not row.present): + return None + return row.content_hash + + async def release_file_hashes_at( + self, + release_key: str, + at_revision_seq: int, + ) -> dict[str, str]: + via = sql.validate_instrumented_attribute + query = ( + sqlmodel.select(sql.ReleaseFileState) + .where( + sql.ReleaseFileState.release_key == release_key, + via(sql.ReleaseFileState.since_revision_seq) <= at_revision_seq, + ) + .order_by( + sql.ReleaseFileState.path, + via(sql.ReleaseFileState.since_revision_seq).desc(), + ) + ) + result = await self.execute(query) + rows = result.scalars().all() + hashes: dict[str, str] = {} + seen: set[str] = set() + for row in rows: + if row.path in seen: + continue + seen.add(row.path) + if row.present and (row.content_hash is not None): + hashes[row.path] = row.content_hash + return hashes + def release_file_state( self, release_key: Opt[str] = NOT_SET, diff --git a/atr/merge.py b/atr/merge.py index 8f929172..c9b4be66 100644 --- a/atr/merge.py +++ b/atr/merge.py @@ -23,9 +23,10 @@ from typing import TYPE_CHECKING import aiofiles.os -import atr.attestable as attestable +import atr.db as db import atr.hashes as hashes import atr.models.safe as safe +import atr.models.sql as sql import atr.util as util if TYPE_CHECKING: @@ -33,12 +34,13 @@ if TYPE_CHECKING: async def merge( + data: db.Session, base_inodes: dict[str, int], base_hashes: dict[str, str], prior_dir: pathlib.Path, project_key: safe.ProjectKey, version_key: safe.VersionKey, - prior_revision_number: safe.RevisionNumber, + prior_revision_seq: int, temp_dir: pathlib.Path, n_inodes: dict[str, int], n_hashes: dict[str, str], @@ -48,6 +50,7 @@ async def merge( # This happens in the _add_from_prior and _replace_with_prior calls somewhat below prior_inodes = await asyncio.to_thread(util.paths_to_inodes, prior_dir) prior_hashes: dict[str, str] | None = None + release_key = str(sql.release_key(project_key, version_key)) # Collect implicit directory paths from new (N) files for type conflict detection n_dirs: set[str] = set() @@ -70,15 +73,15 @@ async def merge( if await aiofiles.os.path.isdir(temp_dir / path): continue prior_hashes = await _add_from_prior( + data, prior_dir, temp_dir, path, n_hashes, n_sizes, prior_hashes, - project_key, - version_key, - prior_revision_number, + release_key, + prior_revision_seq, ) continue @@ -96,6 +99,7 @@ async def merge( # Cases 4, 5, 6, 8, 11, and 15: all three revisions have this path if (b_ino is not None) and (p_ino is not None) and (n_ino is not None): prior_hashes = await _merge_all_present( + data, base_inodes, base_hashes, prior_dir, @@ -107,30 +111,29 @@ async def merge( n_hashes, n_sizes, prior_hashes, - project_key, - version_key, - prior_revision_number, + release_key, + prior_revision_seq, ) async def _add_from_prior( + data: db.Session, prior_dir: pathlib.Path, temp_dir: pathlib.Path, path: str, n_hashes: dict[str, str], n_sizes: dict[str, int], prior_hashes: dict[str, str] | None, - project_key: safe.ProjectKey, - version_key: safe.VersionKey, - prior_revision_number: safe.RevisionNumber, + release_key: str, + prior_revision_seq: int, ) -> dict[str, str] | None: target = temp_dir / path await asyncio.to_thread(_makedirs_with_permissions, target.parent, temp_dir) await aiofiles.os.link(prior_dir / path, target) if prior_hashes is None: - prior_hashes = await attestable.load_paths(project_key, version_key, prior_revision_number) + prior_hashes = await _prior_hashes_load(data, release_key, prior_revision_seq, prior_dir) # Update n_hashes and n_sizes in place - if (prior_hashes is not None) and (path in prior_hashes): + if path in prior_hashes: n_hashes[path] = prior_hashes[path] else: n_hashes[path] = await hashes.compute_file_hash(target) @@ -166,6 +169,7 @@ def _makedirs_with_permissions(target_parent: pathlib.Path, root: pathlib.Path) async def _merge_all_present( + data: db.Session, _base_inodes: dict[str, int], base_hashes: dict[str, str], prior_dir: pathlib.Path, @@ -177,9 +181,8 @@ async def _merge_all_present( n_hashes: dict[str, str], n_sizes: dict[str, int], prior_hashes: dict[str, str] | None, - project_key: safe.ProjectKey, - version_key: safe.VersionKey, - prior_revision_number: safe.RevisionNumber, + release_key: str, + prior_revision_seq: int, ) -> dict[str, str] | None: # Cases 6, 8: prior and new share an inode so they already agree if p_ino == n_ino: @@ -192,15 +195,15 @@ async def _merge_all_present( # Case 11 via inode: base and new share an inode so prior wins if b_ino == n_ino: return await _replace_with_prior( + data, prior_dir, temp_dir, path, n_hashes, n_sizes, prior_hashes, - project_key, - version_key, - prior_revision_number, + release_key, + prior_revision_seq, ) # Cases 4, 5, 8, 11, 15: all inodes differ, so use hash to distinguish @@ -208,47 +211,63 @@ async def _merge_all_present( n_hash = n_hashes[path] if b_hash == n_hash: if prior_hashes is None: - prior_hashes = await attestable.load_paths(project_key, version_key, prior_revision_number) - if (prior_hashes is not None) and (path in prior_hashes): + prior_hashes = await _prior_hashes_load(data, release_key, prior_revision_seq, prior_dir) + if path in prior_hashes: p_hash = prior_hashes[path] else: p_hash = await hashes.compute_file_hash(prior_dir / path) if p_hash != b_hash: # Case 11 via hash: base and new have the same content but prior differs return await _replace_with_prior( + data, prior_dir, temp_dir, path, n_hashes, n_sizes, prior_hashes, - project_key, - version_key, - prior_revision_number, + release_key, + prior_revision_seq, ) # Cases 4, 5, 8, 15: no merge action needed so new wins return prior_hashes +async def _prior_hashes_load( + data: db.Session, + release_key: str, + prior_revision_seq: int, + prior_dir: pathlib.Path, +) -> dict[str, str]: + result = await data.release_file_hashes_at(release_key, prior_revision_seq) + if result: + return result + prior_hashes: dict[str, str] = {} + # Slow, but only applies to pre-AttestableV2 release revisions + async for rel_path in util.paths_recursive(prior_dir): + prior_hashes[str(rel_path)] = await hashes.compute_file_hash(prior_dir / rel_path) + return prior_hashes + + async def _replace_with_prior( + data: db.Session, prior_dir: pathlib.Path, temp_dir: pathlib.Path, path: str, n_hashes: dict[str, str], n_sizes: dict[str, int], prior_hashes: dict[str, str] | None, - project_key: safe.ProjectKey, - version_key: safe.VersionKey, - prior_revision_number: safe.RevisionNumber, + release_key: str, + prior_revision_seq: int, ) -> dict[str, str] | None: await aiofiles.os.remove(temp_dir / path) await aiofiles.os.link(prior_dir / path, temp_dir / path) if prior_hashes is None: - prior_hashes = await attestable.load_paths(project_key, version_key, prior_revision_number) + prior_hashes = await _prior_hashes_load(data, release_key, prior_revision_seq, prior_dir) # Update n_hashes and n_sizes in place file_path = temp_dir / path - if (prior_hashes is not None) and (path in prior_hashes): + if path in prior_hashes: n_hashes[path] = prior_hashes[path] else: n_hashes[path] = await hashes.compute_file_hash(file_path) diff --git a/atr/storage/writers/revision.py b/atr/storage/writers/revision.py index 9a10f118..51809f7c 100644 --- a/atr/storage/writers/revision.py +++ b/atr/storage/writers/revision.py @@ -287,12 +287,13 @@ async def _lock_and_merge( prior_number = latest.safe_number prior_dir = paths.release_directory_base(merged_release) / str(prior_number) await merge.merge( + data, base_inodes, base_hashes, prior_dir, project_key, version_key, - prior_number, + latest.seq, temp_dir_path, n_inodes, path_to_hash, diff --git a/atr/tasks/checks/__init__.py b/atr/tasks/checks/__init__.py index a488d3f5..9065281f 100644 --- a/atr/tasks/checks/__init__.py +++ b/atr/tasks/checks/__init__.py @@ -324,10 +324,16 @@ async def resolve_archive_dir(args: FunctionArguments) -> pathlib.Path | None: """Resolve the extracted archive directory for the primary archive.""" if args.primary_rel_path is None: return None - paths_data = await attestable.load_paths(args.project_key, args.version_key, args.revision_number) - if paths_data is None: - return None - content_hash = paths_data.get(args.primary_rel_path) + release_key = sql.release_key(str(args.project_key), str(args.version_key)) + revision_seq = int(str(args.revision_number)) + async with db.session() as data: + content_hash = await data.release_file_hash_at(release_key, args.primary_rel_path, revision_seq) + if content_hash is None: + abs_path = file_paths.revision_path_for_file( + args.project_key, args.version_key, args.revision_number, args.primary_rel_path + ) + if await aiofiles.os.path.isfile(abs_path): + content_hash = await hashes.compute_file_hash(abs_path) if content_hash is None: return None archive_key = hashes.filesystem_archives_key(content_hash) @@ -337,7 +343,7 @@ async def resolve_archive_dir(args: FunctionArguments) -> pathlib.Path | None: return None -async def resolve_cache_key( +async def resolve_cache_key( # noqa: C901 checker: str | Callable[..., Any], checker_version: str, policy_keys: list[str], @@ -361,7 +367,13 @@ async def resolve_cache_key( else: # TODO: Is this fallback valid / necessary? Or should we bail out if there's no attestable data? policy = release.release_policy or release.project.release_policy - if not ignore_path: + if not ignore_path: + if file: + release_key = sql.release_key(str(release.safe_project_key), str(release.safe_version_key)) + revision_seq = int(str(revision)) + async with db.session() as data: + file_hash = await data.release_file_hash_at(release_key, file, revision_seq) + if file_hash is None: if path is None: path = file_paths.revision_path_for_file( release.safe_project_key, release.safe_version_key, revision, file or "" diff --git a/tests/unit/test_create_revision.py b/tests/unit/test_create_revision.py index 4c1d945e..a377baa7 100644 --- a/tests/unit/test_create_revision.py +++ b/tests/unit/test_create_revision.py @@ -209,6 +209,7 @@ async def test_intervening_revision_triggers_merge_and_uses_latest_parent(tmp_pa intervening_revision = mock.MagicMock() intervening_revision.key = f"{release_key} 00006" intervening_revision.number = "00006" + intervening_revision.seq = 6 intervening_revision.safe_number = safe.RevisionNumber("00006") first_attestable = mock.MagicMock(paths={"dist/a.tar.gz": "h1"}) @@ -259,7 +260,7 @@ async def test_intervening_revision_triggers_merge_and_uses_latest_parent(tmp_pa merge_await_args = merge_mock.await_args assert merge_await_args is not None - assert merge_await_args.args[5] == safe.RevisionNumber("00006") + assert merge_await_args.args[6] == 6 @pytest.mark.asyncio diff --git a/tests/unit/test_merge.py b/tests/unit/test_merge.py index 46fd2a82..61adbc0c 100644 --- a/tests/unit/test_merge.py +++ b/tests/unit/test_merge.py @@ -39,14 +39,15 @@ async def test_case_09_prior_adds_file(tmp_path: pathlib.Path): mock_prior_hashes = {"added.txt": "blake3:abc123"} - with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, return_value=mock_prior_hashes): + with mock.patch("atr.merge._prior_hashes_load", new_callable=mock.AsyncMock, return_value=mock_prior_hashes): await merge.merge( + mock.AsyncMock(), base_inodes, base_hashes, prior_dir, "proj", "ver", - "00002", + 2, temp_dir, n_inodes, n_hashes, @@ -74,14 +75,15 @@ async def test_case_09_prior_adds_file_in_subdirectory(tmp_path: pathlib.Path): mock_prior_hashes = {"apple/banana.txt": "blake3:xyz890"} - with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, return_value=mock_prior_hashes): + with mock.patch("atr.merge._prior_hashes_load", new_callable=mock.AsyncMock, return_value=mock_prior_hashes): await merge.merge( + mock.AsyncMock(), base_inodes, base_hashes, prior_dir, "proj", "ver", - "00002", + 2, temp_dir, n_inodes, n_hashes, @@ -108,14 +110,15 @@ async def test_case_10_prior_deletion_via_hash(tmp_path: pathlib.Path): n_hashes = {"removed.txt": "blake3:matching"} n_sizes = {"removed.txt": len("same content")} - with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, return_value={}): + with mock.patch("atr.merge._prior_hashes_load", new_callable=mock.AsyncMock, return_value={}): await merge.merge( + mock.AsyncMock(), base_inodes, base_hashes, prior_dir, "proj", "ver", - "00002", + 2, temp_dir, n_inodes, n_hashes, @@ -140,14 +143,15 @@ async def test_case_10_prior_deletion_via_inode(tmp_path: pathlib.Path): n_hashes = {"removed.txt": "blake3:aaa"} n_sizes = {"removed.txt": len("to be deleted")} - with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, return_value={}): + with mock.patch("atr.merge._prior_hashes_load", new_callable=mock.AsyncMock, return_value={}): await merge.merge( + mock.AsyncMock(), base_inodes, base_hashes, prior_dir, "proj", "ver", - "00002", + 2, temp_dir, n_inodes, n_hashes, @@ -181,14 +185,15 @@ async def test_case_11_prior_replacement_via_hash(tmp_path: pathlib.Path): mock_prior_hashes = {"shared.txt": "blake3:updated"} - with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, return_value=mock_prior_hashes): + with mock.patch("atr.merge._prior_hashes_load", new_callable=mock.AsyncMock, return_value=mock_prior_hashes): await merge.merge( + mock.AsyncMock(), base_inodes, base_hashes, prior_dir, "proj", "ver", - "00002", + 2, temp_dir, n_inodes, n_hashes, @@ -216,14 +221,15 @@ async def test_case_11_prior_replacement_via_inode(tmp_path: pathlib.Path): mock_prior_hashes = {"shared.txt": "blake3:updated"} - with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, return_value=mock_prior_hashes): + with mock.patch("atr.merge._prior_hashes_load", new_callable=mock.AsyncMock, return_value=mock_prior_hashes): await merge.merge( + mock.AsyncMock(), base_inodes, base_hashes, prior_dir, "proj", "ver", - "00002", + 2, temp_dir, n_inodes, n_hashes, @@ -248,14 +254,15 @@ async def test_case_13_new_wins_when_prior_deletes(tmp_path: pathlib.Path): n_hashes = {"modified.txt": "blake3:new"} n_sizes = {"modified.txt": len("new content")} - with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, return_value={}): + with mock.patch("atr.merge._prior_hashes_load", new_callable=mock.AsyncMock, return_value={}): await merge.merge( + mock.AsyncMock(), base_inodes, base_hashes, prior_dir, "proj", "ver", - "00002", + 2, temp_dir, n_inodes, n_hashes, @@ -282,14 +289,15 @@ async def test_noop_when_base_and_prior_agree(tmp_path: pathlib.Path): n_hashes = {"unchanged.txt": "blake3:modified"} n_sizes = {"unchanged.txt": len("modified by new")} - with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, return_value={}) as mock_load: + with mock.patch("atr.merge._prior_hashes_load", new_callable=mock.AsyncMock, return_value={}) as mock_load: await merge.merge( + mock.AsyncMock(), base_inodes, base_hashes, prior_dir, "proj", "ver", - "00002", + 2, temp_dir, n_inodes, n_hashes, @@ -315,14 +323,15 @@ async def test_type_conflict_prior_file_vs_new_directory(tmp_path: pathlib.Path) n_hashes = {"docs/guide.txt": "blake3:guide"} n_sizes = {"docs/guide.txt": len("a file under a directory in new")} - with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, return_value={"docs": "blake3:docs"}): + with mock.patch("atr.merge._prior_hashes_load", new_callable=mock.AsyncMock, return_value={"docs": "blake3:docs"}): await merge.merge( + mock.AsyncMock(), base_inodes, base_hashes, prior_dir, "proj", "ver", - "00002", + 2, temp_dir, n_inodes, n_hashes, @@ -348,14 +357,17 @@ async def test_type_conflict_prior_file_vs_new_empty_directory(tmp_path: pathlib n_hashes: dict[str, str] = {} n_sizes: dict[str, int] = {} - with mock.patch("atr.attestable.load_paths", new_callable=mock.AsyncMock, return_value={"empty": "blake3:empty"}): + with mock.patch( + "atr.merge._prior_hashes_load", new_callable=mock.AsyncMock, return_value={"empty": "blake3:empty"} + ): await merge.merge( + mock.AsyncMock(), base_inodes, base_hashes, prior_dir, "proj", "ver", - "00002", + 2, temp_dir, n_inodes, n_hashes, @@ -381,15 +393,18 @@ async def test_type_conflict_prior_subdir_vs_new_file(tmp_path: pathlib.Path): n_sizes = {"docs": len("a file in new")} with mock.patch( - "atr.attestable.load_paths", new_callable=mock.AsyncMock, return_value={"docs/guide.txt": "blake3:guide"} + "atr.merge._prior_hashes_load", + new_callable=mock.AsyncMock, + return_value={"docs/guide.txt": "blake3:guide"}, ): await merge.merge( + mock.AsyncMock(), base_inodes, base_hashes, prior_dir, "proj", "ver", - "00002", + 2, temp_dir, n_inodes, n_hashes, --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
