On 02/25/2013 08:48 PM, Kai Willadsen wrote: > > You're talking about your suggestion of doing an input split followed > by a double compare? In that case, yes, you should always get the sync > point included. > > I was talking about a different option entirely of simply forcing the > algorithm through that point. Since the result should be almost > identical (bar some heuristic speed-ups maybe?), I think doing > whatever is easiest is the best plan. >
I am attaching a patch with SyncPointMyersSequenceMatcher class that does the split & merge as described before. There are also some unit tests included (at last). I have no idea what the UI should look like hence I did not even try to implement anything but the 'backend' stuff. Cheers, Piotr
>From 9bd7ceac635d50b658c501e143a820a9c5fd6cc2 Mon Sep 17 00:00:00 2001 From: Piotr Piastucki <[email protected]> Date: Tue, 19 Mar 2013 23:28:06 +0100 Subject: [PATCH] Add SyncPointMyersSequenceMatcher and matcher-related unit tests --- meld/matchers.py | 40 ++++++++++++++++++++++++++++++ meld/test_matchers.py | 66 +++++++++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 106 insertions(+) create mode 100644 meld/test_matchers.py diff --git a/meld/matchers.py b/meld/matchers.py index 27b41cc..cf103b7 100644 --- a/meld/matchers.py +++ b/meld/matchers.py @@ -359,3 +359,43 @@ class InlineMyersSequenceMatcher(MyersSequenceMatcher): a = indexed_a b = indexed_b return (a, b) + +class SyncPointMyersSequenceMatcher(MyersSequenceMatcher): + + def __init__(self, isjunk=None, a="", b="", syncpoints=None): + MyersSequenceMatcher.__init__(self, isjunk, a, b) + self.isjunk = isjunk + self.syncpoints = syncpoints + + def initialise(self): + if self.syncpoints is None or len(self.syncpoints) == 0: + for i in MyersSequenceMatcher.initialise(self): + yield i + else: + chunks = [] + ai = 0 + bi = 0 + for aj, bj in self.syncpoints: + chunks.append((ai, bi, self.a[ai:aj], self.b[bi:bj])) + ai = aj + bi = bj + if ai < len(self.a) or bi < len(self.b): + chunks.append((ai, bi, self.a[ai:len(self.a)], self.b[bi:len(self.b)])) + self.matching_blocks = [] + for ai, bi, a, b in chunks: + matcher = MyersSequenceMatcher(self.isjunk, a, b) + for i in matcher.initialise(): + yield i + blocks = matcher.get_matching_blocks() + l = len(self.matching_blocks) - 1 + if l >= 0 and len(blocks) > 1: + aj = self.matching_blocks[l][0] + bj = self.matching_blocks[l][1] + bl = self.matching_blocks[l][2] + if aj + bl == ai and bj + bl == bi and blocks[0][0] == 0 and blocks[0][1] == 0: + block = blocks.pop(0) + self.matching_blocks[l] = (aj, bj, bl + block[2]) + for x, y, l in blocks[:-1]: + self.matching_blocks.append((ai + x, bi + y, l)) + self.matching_blocks.append((len(self.a), len(self.b), 0)) + diff --git a/meld/test_matchers.py b/meld/test_matchers.py new file mode 100644 index 0000000..9cb7069 --- /dev/null +++ b/meld/test_matchers.py @@ -0,0 +1,66 @@ + +import unittest +import matchers + +class MatchersTests(unittest.TestCase): + + def testBasicMatcher(self): + a = list('abcbdefgabcdefg') + b = list('gfabcdefcd') + r = [(0, 2, 3), (4, 5, 3), (10, 8, 2), (15, 10, 0)] + matcher = matchers.MyersSequenceMatcher(None, a, b) + blocks = matcher.get_matching_blocks() + self.assertEqual(len(blocks), len(r)) + for i in range(len(blocks)): + self.assertEqual(blocks[i], r[i]) + + def testPostprocessingCleanup(self): + a = list('abcfabgcd') + b = list('afabcgabgcabcd') + r = [(0, 2, 3), (4, 6, 3), (7, 12, 2), (9, 14, 0)] + matcher = matchers.MyersSequenceMatcher(None, a, b) + blocks = matcher.get_matching_blocks() + self.assertEqual(len(blocks), len(r)) + for i in range(len(blocks)): + self.assertEqual(blocks[i], r[i]) + + def testInlineMatcher(self): + a = 'red, blue, yellow, white' + b = 'black green, hue, white' + r = [(17, 16, 7), (24, 23, 0)] + matcher = matchers.InlineMyersSequenceMatcher(None, a, b) + blocks = matcher.get_matching_blocks() + self.assertEqual(len(blocks), len(r)) + for i in range(len(blocks)): + self.assertEqual(blocks[i], r[i]) + + def testSyncPointMatcher0(self): + a = list('012a3456c789') + b = list('0a3412b5678') + r = [(0, 0, 1), (3, 1, 3), (6, 7, 2), (9, 9, 2), (12, 11, 0)] + matcher = matchers.SyncPointMyersSequenceMatcher(None, a, b) + blocks = matcher.get_matching_blocks() + self.assertEqual(len(blocks), len(r)) + for i in range(len(blocks)): + self.assertEqual(blocks[i], r[i]) + + def testSyncPointMatcher1(self): + a = list('012a3456c789') + b = list('0a3412b5678') + r = [(0, 0, 1), (1, 4, 2), (6, 7, 2), (9, 9, 2), (12, 11, 0)] + matcher = matchers.SyncPointMyersSequenceMatcher(None, a, b, [(3,6)]) + blocks = matcher.get_matching_blocks() + self.assertEqual(len(blocks), len(r)) + for i in range(len(blocks)): + self.assertEqual(blocks[i], r[i]) + + def testSyncPointMatcher2(self): + a = list('012a3456c789') + b = list('02a341b5678') + r = [(0, 0, 1), (2, 1, 4), (9, 9, 2), (12, 11, 0)] + matcher = matchers.SyncPointMyersSequenceMatcher(None, a, b, [(3,2), (8,6)]) + blocks = matcher.get_matching_blocks() + self.assertEqual(len(blocks), len(r)) + self.assertEqual(blocks[0], r[0]) + self.assertEqual(blocks[1], r[1]) + \ No newline at end of file -- 1.7.10.4
_______________________________________________ meld-list mailing list [email protected] https://mail.gnome.org/mailman/listinfo/meld-list
