From: Dave Borowitz <dborow...@google.com> Topological ordering is applied by correcting the original commit time order output. This is fairly slow, requiring O(N) additional time and storage compared to not reordering. (Note that the O(N) estimate is under the assumption that commits are not too far out of topological order.)
Change-Id: I77713a036ff69a4f5f7fb36a5bbbe7193e3e4721 --- dulwich/tests/test_walk.py | 57 ++++++++++++++++++++++++++++++++++++++++++++ dulwich/walk.py | 56 +++++++++++++++++++++++++++++++++++++++---- 2 files changed, 108 insertions(+), 5 deletions(-) diff --git a/dulwich/tests/test_walk.py b/dulwich/tests/test_walk.py index ead7bd1..c5d970e 100644 --- a/dulwich/tests/test_walk.py +++ b/dulwich/tests/test_walk.py @@ -18,6 +18,9 @@ """Tests for commit walking functionality.""" +from dulwich._compat import ( + permutations, + ) from dulwich.diff_tree import ( CHANGE_ADD, CHANGE_MODIFY, @@ -37,8 +40,10 @@ from dulwich.objects import ( Blob, ) from dulwich.walk import ( + ORDER_TOPO, WalkEntry, Walker, + _topo_reorder ) from dulwich.tests import TestCase from utils import ( @@ -135,6 +140,14 @@ class WalkerTest(TestCase): self.assertWalkYields([y4], [y4.id], exclude=[x3.id]) self.assertWalkYields([x3, x2], [x3.id], exclude=[y4.id]) + def test_merge(self): + c1, c2, c3, c4 = self.make_commits([[1], [2, 1], [3, 1], [4, 2, 3]]) + self.assertWalkYields([c4, c3, c2, c1], [c4.id]) + self.assertWalkYields([c3, c1], [c3.id]) + self.assertWalkYields([c2, c1], [c2.id]) + self.assertWalkYields([c4, c3], [c4.id], exclude=[c2.id]) + self.assertWalkYields([c4, c2], [c4.id], exclude=[c3.id]) + def test_reverse(self): c1, c2, c3 = self.make_linear_commits(3) self.assertWalkYields([c1, c2, c3], [c3.id], reverse=True) @@ -344,3 +357,47 @@ class WalkerTest(TestCase): # c1 would also match, but we've deleted it, and it should get pruned # even with over-scanning. self.assertWalkYields([c11, c10, c8], [c11.id], since=7) + + def assertTopoOrderEqual(self, expected_commits, commits): + entries = [TestWalkEntry(c, None) for c in commits] + actual_ids = [e.commit.id for e in list(_topo_reorder(entries))] + self.assertEqual([c.id for c in expected_commits], actual_ids) + + def test_topo_reorder_linear(self): + commits = self.make_linear_commits(5) + commits.reverse() + for perm in permutations(commits): + self.assertTopoOrderEqual(commits, perm) + + def test_topo_reorder_multiple_parents(self): + c1, c2, c3 = self.make_commits([[1], [2], [3, 1, 2]]) + # Already sorted, so totally FIFO. + self.assertTopoOrderEqual([c3, c2, c1], [c3, c2, c1]) + self.assertTopoOrderEqual([c3, c1, c2], [c3, c1, c2]) + + # c3 causes one parent to be yielded. + self.assertTopoOrderEqual([c3, c2, c1], [c2, c3, c1]) + self.assertTopoOrderEqual([c3, c1, c2], [c1, c3, c2]) + + # c3 causes both parents to be yielded. + self.assertTopoOrderEqual([c3, c2, c1], [c1, c2, c3]) + self.assertTopoOrderEqual([c3, c2, c1], [c2, c1, c3]) + + def test_topo_reorder_multiple_children(self): + c1, c2, c3 = self.make_commits([[1], [2, 1], [3, 1]]) + + # c2 and c3 are FIFO but c1 moves to the end. + self.assertTopoOrderEqual([c3, c2, c1], [c3, c2, c1]) + self.assertTopoOrderEqual([c3, c2, c1], [c3, c1, c2]) + self.assertTopoOrderEqual([c3, c2, c1], [c1, c3, c2]) + + self.assertTopoOrderEqual([c2, c3, c1], [c2, c3, c1]) + self.assertTopoOrderEqual([c2, c3, c1], [c2, c1, c3]) + self.assertTopoOrderEqual([c2, c3, c1], [c1, c2, c3]) + + def test_out_of_order_children(self): + c1, c2, c3, c4, c5 = self.make_commits( + [[1], [2, 1], [3, 2], [4, 1], [5, 3, 4]], + times=[2, 1, 3, 4, 5]) + self.assertWalkYields([c5, c4, c3, c1, c2], [c5.id]) + self.assertWalkYields([c5, c4, c3, c2, c1], [c5.id], order=ORDER_TOPO) diff --git a/dulwich/walk.py b/dulwich/walk.py index a54f52d..83b2939 100644 --- a/dulwich/walk.py +++ b/dulwich/walk.py @@ -18,6 +18,13 @@ """General implementation of walking commits and their contents.""" + +try: + from collections import defaultdict +except ImportError: + from _compat import defaultdict + +import collections import heapq import itertools import os @@ -33,6 +40,9 @@ from dulwich.errors import ( ) ORDER_DATE = 'date' +ORDER_TOPO = 'topo' + +ALL_ORDERS = (ORDER_DATE, ORDER_TOPO) # Maximum number of commits to walk past a commit time boundary. _MAX_EXTRA_COMMITS = 5 @@ -170,7 +180,7 @@ class Walker(object): iterator protocol. The constructor takes a single argument, the Walker. """ - if order not in (ORDER_DATE,): + if order not in ALL_ORDERS: raise ValueError('Unknown walk order %s' % order) self.store = store self.include = include @@ -257,14 +267,50 @@ class Walker(object): def _reorder(self, results): """Possibly reorder a results iterator. - :param results: An iterator of results, in the order returned from the - queue_cls. - :return: An iterator or list of results, in the order required by the - Walker. + :param results: An iterator of WalkEntry objects, in the order returned + from the queue_cls. + :return: An iterator or list of WalkEntry objects, in the order required + by the Walker. """ + if self.order == ORDER_TOPO: + results = _topo_reorder(results) if self.reverse: results = reversed(list(results)) return results def __iter__(self): return iter(self._reorder(iter(self._next, None))) + + +def _topo_reorder(entries): + """Reorder an iterable of entries topologically. + + This works best assuming the entries are already in almost-topological + order, e.g. in commit time order. + + :param entries: An iterable of WalkEntry objects. + :yield: WalkEntry objects from entries in FIFO order, except where a parent + would be yielded before any of its children. + """ + todo = collections.deque() + pending = {} + num_children = defaultdict(int) + for entry in entries: + todo.append(entry) + for p in entry.commit.parents: + num_children[p] += 1 + + while todo: + entry = todo.popleft() + commit = entry.commit + commit_id = commit.id + if num_children[commit_id]: + pending[commit_id] = entry + continue + for parent_id in commit.parents: + num_children[parent_id] -= 1 + if not num_children[parent_id]: + parent_entry = pending.pop(parent_id, None) + if parent_entry: + todo.appendleft(parent_entry) + yield entry -- 1.7.3.1 _______________________________________________ Mailing list: https://launchpad.net/~dulwich-users Post to : dulwich-users@lists.launchpad.net Unsubscribe : https://launchpad.net/~dulwich-users More help : https://help.launchpad.net/ListHelp