Ines Almeida has proposed merging ~ines-almeida/turnip:add-diff-stats-endpoint into turnip:master.
Commit message: Add diff stats endpoint This endpoint will list which files changed on a diff between 2 commits, allowing also cross-repo (using the same mechanisms as the already existing diff endpoints) and a diff from an empty tree Requested reviews: Launchpad code reviewers (launchpad-reviewers) For more details, see: https://code.launchpad.net/~ines-almeida/turnip/+git/turnip/+merge/491273 -- Your team Launchpad code reviewers is requested to review the proposed merge of ~ines-almeida/turnip:add-diff-stats-endpoint into turnip:master.
diff --git a/turnip/api/store.py b/turnip/api/store.py index 346c1f3..43e9078 100644 --- a/turnip/api/store.py +++ b/turnip/api/store.py @@ -13,6 +13,10 @@ from collections import defaultdict import six from contextlib2 import ExitStack, contextmanager from pygit2 import ( + GIT_DELTA_ADDED, + GIT_DELTA_DELETED, + GIT_DELTA_MODIFIED, + GIT_DELTA_RENAMED, GIT_OBJ_COMMIT, GIT_OBJ_TAG, GIT_REF_OID, @@ -1183,6 +1187,68 @@ def merge_async( ) +def _parse_diff(diff): + """Parse pygit2.Diff object and return stats + + Currently, only returning file stats (modified, added, deleted, renamed) + """ + + result = { + "added": [], + "modified": [], + "deleted": [], + "renamed": [], + } + + for delta in diff.deltas: + old_file = delta.old_file.path + new_file = delta.new_file.path + if delta.status == GIT_DELTA_ADDED: + result["added"].append(new_file) + elif delta.status == GIT_DELTA_DELETED: + result["deleted"].append(old_file) + elif delta.status == GIT_DELTA_MODIFIED: + result["modified"].append(new_file) + elif delta.status == GIT_DELTA_RENAMED: + result["renamed"].append({"old": old_file, "new": new_file}) + + return result + + +def get_diff_stats(repo_store, repo_name, sha1_from, sha1_to, diff_type): + """Get diff stats and associated commits of two sha1s. + + :param sha1_from: diff from sha1. If empty, diffs against empty tree. + :param sha1_to: diff to sha1. + :param diff_type: type of diff, either '..' or '...' + """ + + with open_repo(repo_store, repo_name) as repo: + # If we want to compare against the base (no source) + if sha1_from is None or sha1_from == "": + empty_tree_id = repo.TreeBuilder().write() + from_tree = repo[empty_tree_id] + + else: + # If we want to compare against a common ancester + if diff_type == "...": + common_ancestor = repo.merge_base(sha1_from, sha1_to) + if common_ancestor is not None: + # We have a merge base + sha1_from = common_ancestor + + from_tree = repo[sha1_from].tree + + to_tree = repo[sha1_to].tree + + diff = repo.diff(from_tree, to_tree) + # Enable rename/copy similarity detection so we can classify renames + diff.find_similar() + + return _parse_diff(diff) + return {} + + def get_diff(repo_store, repo_name, sha1_from, sha1_to, context_lines=3): """Get patch and associated commits of two sha1s. diff --git a/turnip/api/tests/test_api.py b/turnip/api/tests/test_api.py index dcb2d68..91c6ca8 100644 --- a/turnip/api/tests/test_api.py +++ b/turnip/api/tests/test_api.py @@ -1927,6 +1927,164 @@ class ApiTestCase(TestCase, ApiRepoStoreMixin): self.assertEqual(400, resp.status_code) mock_apply_async.assert_not_called() + def test_diff_stats_basic(self): + """Test diff stats for cases of adding, modifying and deleting files""" + repo = RepoFactory(self.repo_store) + c1 = repo.add_commit("a", "file.txt") + c2 = repo.add_commit("b", "file.txt", parents=[c1]) + path = f"/repo/{self.repo_path}/compare/{c1}..{c2}/stats" + resp = self.app.get(path) + self.assertEqual(200, resp.status_code) + self.assertIn("modified", resp.json) + self.assertIn("file.txt", resp.json["modified"]) + + c3 = repo.add_commit("x", "new.txt", parents=[c2]) + resp2 = self.app.get( + f"/repo/{self.repo_path}/compare/{c2}..{c3}/stats" + ) + self.assertIn("new.txt", resp2.json["added"]) + + # delete file.txt (remove then commit another file) + repo.repo.index.remove("file.txt") + c4 = repo.add_commit("y", "another.txt", parents=[c3]) + resp3 = self.app.get( + f"/repo/{self.repo_path}/compare/{c3}..{c4}/stats" + ) + self.assertIn("file.txt", resp3.json["deleted"]) + + def test_diff_stats_renamed(self): + """Test diff stats for renaming files""" + repo = RepoFactory(self.repo_store) + c1 = repo.add_commit("foo\n", "foo.txt") + repo.repo.index.remove("foo.txt") + c2 = repo.add_commit("foo\n", "bar.txt", parents=[c1]) + resp = self.app.get(f"/repo/{self.repo_path}/compare/{c1}..{c2}/stats") + self.assertEqual(200, resp.status_code) + self.assertEqual(1, len(resp.json["renamed"])) + self.assertEqual( + {"old": "foo.txt", "new": "bar.txt"}, resp.json["renamed"][0] + ) + + def test_diff_stats_triple_dot(self): + """Test triple dot diff stats""" + repo = RepoFactory(self.repo_store) + base = repo.add_commit("base", "base.txt") + left = repo.add_commit("left", "left.txt", parents=[base]) + repo.repo.index.remove("left.txt") + right = repo.add_commit("right", "right.txt", parents=[base]) + resp = self.app.get( + f"/repo/{self.repo_path}/compare/{left}...{right}/stats" + ) + self.assertEqual(200, resp.status_code) + self.assertIn("right.txt", resp.json["added"]) + self.assertNotIn("left.txt", resp.json["added"]) + + def test_diff_stats_invalid_separator(self): + """Test invalid separator returns 400 BadRequest""" + # invalid separator or invalid shas should result in 400/404 + RepoFactory(self.repo_store).build() + resp = self.app.get( + f"/repo/{self.repo_path}/compare/1++2/stats", expect_errors=True + ) + self.assertEqual(400, resp.status_code) + + def test_diff_stats_nonexistent_from_sha(self): + """Test diff stats with non-existent 'from' SHA""" + repo = RepoFactory(self.repo_store) + c1 = repo.add_commit("test", "file.txt") + + # Use a non-existent SHA for the 'from' parameter + resp = self.app.get( + f"/repo/{self.repo_path}/compare/bar..{c1.hex}/stats", + expect_errors=True, + ) + self.assertEqual(404, resp.status_code) + + def test_diff_stats_nonexistent_to_sha(self): + """Test diff stats with non-existent 'to' SHA""" + repo = RepoFactory(self.repo_store) + c1 = repo.add_commit("test", "file.txt") + + # Use a non-existent SHA for the 'to' parameter + resp = self.app.get( + f"/repo/{self.repo_path}/compare/{c1.hex}..foo/stats", + expect_errors=True, + ) + self.assertEqual(404, resp.status_code) + + def test_diff_stats_empty_to_sha(self): + """Test diff stats with empty 'to' SHA""" + repo = RepoFactory(self.repo_store) + c1 = repo.add_commit("a", "file.txt") + + resp = self.app.get( + f"/repo/{self.repo_path}/compare/{c1.hex}../stats", + expect_errors=True, + ) + self.assertEqual(400, resp.status_code) + + def test_diff_stats_empty_from_sha(self): + """Test diff stats with empty 'from' SHA""" + repo = RepoFactory(self.repo_store) + c1 = repo.add_commit("a", "file.txt") + + resp2 = self.app.get( + f"/repo/{self.repo_path}/compare/..{c1.hex}/stats", + expect_errors=True, + ) + self.assertEqual(200, resp2.status_code) + + def test_diff_stats_nonexistent_repo(self): + """Test diff stats with non-existent repository""" + resp = self.app.get( + "/repo/nonexistent/compare/abc123..def456/stats", + expect_errors=True, + ) + self.assertEqual(404, resp.status_code) + + def test_diff_stats_cross_repo_basic(self): + """Test diff stats across two repos via ephemeral alternates.""" + # Create target repo under expected name + target = RepoFactory(self.repo_store) + base = target.add_commit("base", "base.txt") + + # Create source repo under a different name and clone from target + source_repo_path = os.path.join(self.repo_root, "source") + source = RepoFactory(source_repo_path, clone_from=target) + source_commit = source.add_commit( + "source change", "right.txt", parents=[base] + ) + + # Cross-repo name format: <target>:<source> + path = ( + f"/repo/{self.repo_path}:source/compare/" + f"{base.hex}..{source_commit.hex}/stats" + ) + resp = self.app.get(path) + self.assertEqual(200, resp.status_code) + self.assertIn("right.txt", resp.json["added"]) + + def test_diff_stats_cross_repo_empty_from(self): + """Test cross-repo stats when 'from' is empty (diff from empty tree)""" + target = RepoFactory(self.repo_store) + base = target.add_commit("base", "base.txt") + + source_repo_path = os.path.join(self.repo_root, "source") + source = RepoFactory(source_repo_path, clone_from=target) + source_commit = source.add_commit( + "source change", "right.txt", parents=[base] + ) + + path = ( + f"/repo/{self.repo_path}:source/compare/" + f"..{source_commit.hex}/stats" + ) + resp = self.app.get(path) + self.assertEqual(200, resp.status_code) + # Entire tree of the 'to' commit should appear as added + self.assertIn("base.txt", resp.json["added"]) + self.assertIn("right.txt", resp.json["added"]) + class AsyncRepoCreationAPI(TestCase, ApiRepoStoreMixin): def setUp(self): diff --git a/turnip/api/tests/test_store.py b/turnip/api/tests/test_store.py index c5242f6..3f819f2 100644 --- a/turnip/api/tests/test_store.py +++ b/turnip/api/tests/test_store.py @@ -1815,3 +1815,215 @@ class RequestMergeTestCase(TestCase): commit = self.target_repo.get(ref.target) # No merge happended self.assertEqual(commit.hex, self.initial_commit.hex) + + +class DiffStatsStoreTestCase(TestCase): + """Test cases for the get_diff_stats function in the store module.""" + + def setUp(self): + super().setUp() + self.repo_store = self.useFixture(TempDir()).path + self.useFixture(EnvironmentVariable("REPO_STORE", self.repo_store)) + self.repo_path = os.path.join(self.repo_store, uuid.uuid1().hex) + self.factory = RepoFactory(self.repo_path) + + # Create base commits + c1 = self.factory.add_commit("d", "to_delete.txt") + c2 = self.factory.add_commit("a", "file.txt", parents=[c1]) + self.base = self.factory.add_commit("r", "to_rename.txt", parents=[c2]) + + def test_get_diff_stats_non_existing_sha1_from(self): + """Test file modifications are correctly identified in diff stats""" + c1 = self.factory.add_commit("b", "file.txt", parents=[self.base]) + self.assertRaises( + ValueError, + store.get_diff_stats, + self.repo_store, + self.repo_path, + "unkonwn", + c1.hex, + "..", + ) + + def test_get_diff_stats_non_existing_sha1_to(self): + """Test file modifications are correctly identified in diff stats""" + self.assertRaises( + ValueError, + store.get_diff_stats, + self.repo_store, + self.repo_path, + self.base, + "unkonwn", + "..", + ) + + def test_get_diff_stats_basic_modified(self): + """Test file modifications are correctly identified in diff stats""" + c1 = self.factory.add_commit("b", "file.txt", parents=[self.base]) + stats_mod = store.get_diff_stats( + self.repo_store, self.repo_path, self.base.hex, c1.hex, ".." + ) + self.assertEqual([], stats_mod["added"]) + self.assertIn("file.txt", stats_mod["modified"]) + self.assertEqual([], stats_mod["deleted"]) + self.assertEqual([], stats_mod["renamed"]) + + def test_get_diff_stats_basic_added(self): + """Test file additions are correctly identified in diff stats""" + c1 = self.factory.add_commit("n", "new.txt", parents=[self.base]) + stats_add = store.get_diff_stats( + self.repo_store, self.repo_path, self.base.hex, c1.hex, ".." + ) + self.assertIn("new.txt", stats_add["added"]) + self.assertEqual([], stats_add["modified"]) + self.assertEqual([], stats_add["deleted"]) + self.assertEqual([], stats_add["renamed"]) + + def test_get_diff_stats_basic_deleted(self): + """Test file deletions are correctly identified in diff stats""" + # Delete file.txt in next commit (by removing then committing a change) + self.factory.repo.index.remove("file.txt") + c1 = self.factory.add_commit("y", "another.txt", parents=[self.base]) + stats_del = store.get_diff_stats( + self.repo_store, self.repo_path, self.base.hex, c1.hex, ".." + ) + self.assertIn("another.txt", stats_del["added"]) + self.assertEqual([], stats_del["modified"]) + self.assertIn("file.txt", stats_del["deleted"]) + self.assertEqual([], stats_del["renamed"]) + + def test_get_diff_stats_renamed(self): + """Test file renames are correctly identified in diff stats""" + self.factory.repo.index.remove("to_rename.txt") + c1 = self.factory.add_commit("r", "renamed.txt", parents=[self.base]) + + stats = store.get_diff_stats( + self.repo_store, self.repo_path, self.base.hex, c1.hex, ".." + ) + self.assertEqual([], stats["added"]) + self.assertEqual([], stats["deleted"]) + self.assertEqual([], stats["modified"]) + self.assertEqual(1, len(stats["renamed"])) + self.assertEqual( + {"old": "to_rename.txt", "new": "renamed.txt"}, stats["renamed"][0] + ) + + def test_get_diff_stats_multiple_added_modified_deleted(self): + """Test diff stats with multiple changes across several commits""" + # Create commits to compare against + c1 = self.factory.add_commit("f", "file.txt", parents=[self.base]) + c2 = self.factory.add_commit("n", "new.txt", parents=[c1]) + self.factory.repo.index.remove("to_delete.txt") + c3 = self.factory.add_commit("a", "another.txt", parents=[c2]) + + self.factory.repo.index.remove("to_rename.txt") + c4 = self.factory.add_commit("r", "renamed.txt", parents=[c3]) + + stats = store.get_diff_stats( + self.repo_store, self.repo_path, self.base.hex, c4.hex, ".." + ) + self.assertIn("new.txt", stats["added"]) + self.assertIn("another.txt", stats["added"]) + self.assertIn("file.txt", stats["modified"]) + self.assertIn("to_delete.txt", stats["deleted"]) + self.assertIn( + {"old": "to_rename.txt", "new": "renamed.txt"}, stats["renamed"] + ) + + def test_get_diff_stats_no_from_sha(self): + """Test diff stats when comparing against no source commit""" + c1 = self.factory.add_commit("b", "file.txt", parents=[self.base]) + stats = store.get_diff_stats( + self.repo_store, self.repo_path, None, c1.hex, "..." + ) + self.assertIn("file.txt", stats["added"]) + self.assertEqual([], stats["modified"]) + self.assertEqual([], stats["deleted"]) + self.assertEqual([], stats["renamed"]) + + stats = store.get_diff_stats( + self.repo_store, self.repo_path, "", c1.hex, "..." + ) + self.assertIn("file.txt", stats["added"]) + self.assertEqual([], stats["modified"]) + self.assertEqual([], stats["deleted"]) + self.assertEqual([], stats["renamed"]) + + def test_get_diff_stats_triple_dot_uses_merge_base(self): + """Test that triple-dot diff notation correctly uses common base""" + left = self.factory.add_commit("left", "left.txt", parents=[self.base]) + self.factory.repo.index.remove("left.txt") + right = self.factory.add_commit( + "right", "right.txt", parents=[self.base] + ) + + # Compare left...right should use merge-base (base) vs right + stats = store.get_diff_stats( + self.repo_store, self.repo_path, left.hex, right.hex, "..." + ) + self.assertIn("right.txt", stats["added"]) + self.assertNotIn("left.txt", stats["added"]) + + def test_get_diff_stats_empty_diff(self): + """Test diff stats when comparing identical commits""" + stats = store.get_diff_stats( + self.repo_store, self.repo_path, self.base.hex, self.base.hex, ".." + ) + self.assertEqual([], stats["added"]) + self.assertEqual([], stats["modified"]) + self.assertEqual([], stats["deleted"]) + self.assertEqual([], stats["renamed"]) + + def test_cross_repo_basic_diff_stats(self): + """Compare commits across repos using ephemeral alternates.""" + # Create target repo + target_path = os.path.join(self.repo_store, "target") + target_factory = RepoFactory(target_path) + target_repo = target_factory.build() + shared_base = target_factory.add_commit("base", "base.txt") + target_repo.create_branch("main", target_repo.get(shared_base)) + target_repo.set_head("refs/heads/main") + + # Create source repo as a clone of target (shares history up to base) + source_path = os.path.join(self.repo_store, "source") + source_factory = RepoFactory(source_path, clone_from=target_factory) + source_change = source_factory.add_commit( + "source change", "right.txt", parents=[shared_base] + ) + + stats = store.get_diff_stats( + self.repo_store, + "target:source", + shared_base.hex, + source_change.hex, + "..", + ) + self.assertIn("right.txt", stats["added"]) + self.assertEqual([], stats["modified"]) + + def test_cross_repo_empty_from_diff_stats(self): + """Empty 'from' should diff against empty tree across repos.""" + # Create target repo + target_path = os.path.join(self.repo_store, "target") + target_factory = RepoFactory(target_path) + target_repo = target_factory.build() + shared_base = target_factory.add_commit("base", "base.txt") + target_repo.create_branch("main", target_repo.get(shared_base)) + target_repo.set_head("refs/heads/main") + + # Create source repo as a clone of target (shares history up to base) + source_path = os.path.join(self.repo_store, "source") + source_factory = RepoFactory(source_path, clone_from=target_factory) + source_change = source_factory.add_commit( + "source change", "right.txt", parents=[shared_base] + ) + + stats = store.get_diff_stats( + self.repo_store, + "target:source", + None, + source_change.hex, + "..", + ) + self.assertIn("base.txt", stats["added"]) # from initial commit + self.assertIn("right.txt", stats["added"]) # from source commit diff --git a/turnip/api/views.py b/turnip/api/views.py index 6931d17..116e0f4 100644 --- a/turnip/api/views.py +++ b/turnip/api/views.py @@ -414,6 +414,53 @@ class DiffAPI(BaseAPI): return patch +@resource(path="/repo/{name}/compare/{commits}/stats") +class DiffStatsAPI(BaseAPI): + """Provides HTTP API for rev-rev 'double' and 'triple dot' diff stats. + + {commits} can be in the form sha1..sha1 or sha1...sha1. + Two dots provides a simple diff, equivalent to `git diff A B`. + Three dots provides the symmetric or common ancestor diff, equivalent + to `git diff $(git-merge-base A B) B`. + The first sha1 can be empty, meaning we want the diff against an empty tree + {name} can be two : separated repositories, for a cross repository diff. + """ + + def __init__(self, request, context=None): + super().__init__() + self.request = request + + @validate_path + def get(self, repo_store, repo_name): + """Returns diff statas of two commits.""" + commits = re.split(r"(\.{2,3})", self.request.matchdict["commits"]) + if not len(commits) == 3: + return exc.HTTPBadRequest() + + sha1_from = commits[0] + diff_type = commits[1] + sha1_to = commits[2] + + # sha1_to shouldn't be empty + if sha1_to == "" or sha1_to is None: + return exc.HTTPBadRequest( + "You need to set the last commit against which to compare" + ) + + try: + diff_stats = store.get_diff_stats( + repo_store, + repo_name, + sha1_from, + sha1_to, + diff_type, + ) + except (KeyError, ValueError, GitError): + # invalid pygit2 sha1's return ValueError: 1: Ambiguous lookup + return exc.HTTPNotFound() + return diff_stats + + @resource(path="/repo/{name}/compare-merge/{base}:{head}") class DiffMergeAPI(BaseAPI): """Provides an HTTP API for merge previews.
_______________________________________________ Mailing list: https://launchpad.net/~launchpad-reviewers Post to : launchpad-reviewers@lists.launchpad.net Unsubscribe : https://launchpad.net/~launchpad-reviewers More help : https://help.launchpad.net/ListHelp