Colin Watson has proposed merging ~cjwatson/turnip:detect-merges-stop into 
turnip:master.

Commit message:
Add stop commits to detect-merges

Requested reviews:
  Launchpad code reviewers (launchpad-reviewers)
Related bugs:
  Bug #1955040 in turnip: "Merge detection times out during ref scans in 
repositories with deep history"
  https://bugs.launchpad.net/turnip/+bug/1955040

For more details, see:
https://code.launchpad.net/~cjwatson/turnip/+git/turnip/+merge/445341

The `detect-merges` API can be very slow for repositories with deep history, 
because as currently defined it has to walk through the whole ancestry of a 
given target commit looking for "mainline" (first parent only) commits that 
introduce a merge of any of the given source commits.  In particular, this 
often times out for Linux kernel repositories.

However, we can do much better.  Merge proposals are normally only created in 
Launchpad once the target branch has been scanned, and we can reasonably assume 
that when a merge proposal is created it won't yet have been merged.  This 
means that Launchpad could tell turnip's `detect-merges` API to stop walking 
through history whenever it encounters the commit that it had previously 
scanned on the target branch, and then we'd only have to look through the 
changes introduced by a push rather than the entire history of the branch.  
This should be much less likely to time out.
-- 
Your team Launchpad code reviewers is requested to review the proposed merge of 
~cjwatson/turnip:detect-merges-stop into turnip:master.
diff --git a/turnip/api/store.py b/turnip/api/store.py
index 7a2f883..72554ac 100644
--- a/turnip/api/store.py
+++ b/turnip/api/store.py
@@ -703,7 +703,7 @@ def get_commits(repo_store, repo_name, commit_oids):
         return commits
 
 
-def detect_merges(repo_store, repo_name, target_oid, source_oids):
+def detect_merges(repo_store, repo_name, target_oid, source_oids, stop_oids):
     """Check whether each of the requested commits has been merged."""
     with open_repo(repo_store, repo_name) as repo:
         target = repo.get(target_oid)
@@ -717,7 +717,10 @@ def detect_merges(repo_store, repo_name, target_oid, source_oids):
         merge_info = {}
         last_mainline = target_oid
         next_mainline = target_oid
-        for commit in repo.walk(target_oid, GIT_SORT_TOPOLOGICAL):
+        walker = repo.walk(target_oid, GIT_SORT_TOPOLOGICAL)
+        for stop_oid in stop_oids:
+            walker.hide(stop_oid)
+        for commit in walker:
             if commit.id.hex == next_mainline:
                 last_mainline = commit.id.hex
                 if commit.parent_ids:
diff --git a/turnip/api/tests/test_api.py b/turnip/api/tests/test_api.py
index 0e566b5..1a6f195 100644
--- a/turnip/api/tests/test_api.py
+++ b/turnip/api/tests/test_api.py
@@ -982,6 +982,36 @@ class ApiTestCase(TestCase, ApiRepoStoreMixin):
         self.assertEqual(200, resp.status_code)
         self.assertEqual({b.hex: c.hex, e.hex: g.hex}, resp.json)
 
+    def test_repo_detect_merges_stop(self):
+        """detect-merges stops walking at specified stop commits.
+
+        This reduces the depth of history it needs to scan, which can be
+        important for large repositories.
+        """
+        factory = RepoFactory(self.repo_store)
+        # A---C---D---G---H
+        #  \ /       /
+        #   B---E---F---I
+        a = factory.add_commit('a\n', 'file')
+        b = factory.add_commit('b\n', 'file', parents=[a])
+        c = factory.add_commit('c\n', 'file', parents=[a, b])
+        d = factory.add_commit('d\n', 'file', parents=[c])
+        e = factory.add_commit('e\n', 'file', parents=[b])
+        f = factory.add_commit('f\n', 'file', parents=[e])
+        g = factory.add_commit('g\n', 'file', parents=[d, f])
+        h = factory.add_commit('h\n', 'file', parents=[g])
+        i = factory.add_commit('i\n', 'file', parents=[f])
+        resp = self.app.post_json('/repo/{}/detect-merges/{}'.format(
+            self.repo_path, h),
+            {'sources': [b.hex, e.hex, i.hex], 'stop': [c.hex]})
+        self.assertEqual(200, resp.status_code)
+        self.assertEqual({e.hex: g.hex}, resp.json)
+        resp = self.app.post_json('/repo/{}/detect-merges/{}'.format(
+            self.repo_path, h),
+            {'sources': [b.hex, e.hex, i.hex], 'stop': [c.hex, g.hex]})
+        self.assertEqual(200, resp.status_code)
+        self.assertEqual({}, resp.json)
+
     def test_repo_blob(self):
         """Getting an existing blob works."""
         factory = RepoFactory(self.repo_store)
diff --git a/turnip/api/views.py b/turnip/api/views.py
index 2f199af..db98f05 100644
--- a/turnip/api/views.py
+++ b/turnip/api/views.py
@@ -426,10 +426,12 @@ class DetectMergesAPI(BaseAPI):
         omitted from the response.
         """
         target = self.request.matchdict['target']
-        sources = extract_cstruct(self.request)['body'].get('sources')
+        payload = extract_cstruct(self.request)['body']
+        sources = payload.get('sources')
+        stops = payload.get('stop', [])
         try:
             merges = store.detect_merges(
-                repo_store, repo_name, target, sources)
+                repo_store, repo_name, target, sources, stops)
         except GitError:
             return exc.HTTPNotFound()
         return merges
_______________________________________________
Mailing list: https://launchpad.net/~launchpad-reviewers
Post to     : launchpad-reviewers@lists.launchpad.net
Unsubscribe : https://launchpad.net/~launchpad-reviewers
More help   : https://help.launchpad.net/ListHelp

Reply via email to