This is an automated email from the ASF dual-hosted git repository.

brondsem pushed a commit to branch db/8515
in repository https://gitbox.apache.org/repos/asf/allura.git

commit 31af18a0edd8cc8d2cc2abe2046f114deec3f495
Author: Dave Brondsema <[email protected]>
AuthorDate: Fri Jul 7 18:00:10 2023 -0400

    [#8515] convert to sxsdiff impl
---
 Allura/allura/lib/diff.py                          | 255 ++++++++++++++-------
 Allura/allura/tests/test_diff.py                   |  33 +--
 .../forgegit/tests/functional/test_controllers.py  |  24 +-
 requirements.in                                    |   1 +
 requirements.txt                                   |   5 +
 5 files changed, 209 insertions(+), 109 deletions(-)

diff --git a/Allura/allura/lib/diff.py b/Allura/allura/lib/diff.py
index 77cffdf26..000ecce01 100644
--- a/Allura/allura/lib/diff.py
+++ b/Allura/allura/lib/diff.py
@@ -14,17 +14,38 @@
 #       KIND, either express or implied.  See the License for the
 #       specific language governing permissions and limitations
 #       under the License.
+from __future__ import annotations
 
-import difflib
+import html
+import contextlib
+import logging
+from collections.abc import Iterable, Generator
 
+import sxsdiff
+from diff_match_patch import diff_match_patch
 import six
+from sxsdiff.calculator import LineChange, ElementsHolder, PlainElement, 
AdditionElement, DeletionElement
 
-from allura.lib import helpers as h
+log = logging.getLogger(__name__)
 
+# There are a few cases where we get empty trailing '' elements.  Maybe a bug 
within sxsdiff.calculator?
+# we work around them, using these constants in a few places below
+emptyUnchangedElem = PlainElement('')
+spaceUnchangedElem = PlainElement(' ')
+emptyAddElem = AdditionElement('')
+emptyDelElem = DeletionElement('')
+
+
+def is_single_chg(chg_parts: ElementsHolder) -> bool:
+    return len(chg_parts) <= 1 or (
+        len(chg_parts) == 2 and chg_parts.elements[-1] in [emptyUnchangedElem, 
spaceUnchangedElem]
+    )
 
-class HtmlSideBySideDiff:
 
-    table_tmpl = '''
+class SxsOutputGenerator(sxsdiff.BaseGenerator):
+    # based on sxsdiff.generators.github.GitHubStyledGenerator
+
+    table_tmpl_start = '''
 <table class="side-by-side-diff">
   <thead>
     <th class="lineno"></th>
@@ -32,89 +53,167 @@ class HtmlSideBySideDiff:
     <th class="lineno"></th>
     <th>%s</th>
   </thead>
-%s
-</table>
 '''.strip()
 
-    line_tmpl = '''
-<tr>
-  <td class="lineno">%s</td>
-  <td%s><pre>%s</pre></td>
-  <td class="lineno">%s</td>
-  <td%s><pre>%s</pre></td>
-</tr>'''.strip()
-
-    def __init__(self, tabsize=4):
-        self._tabsize = tabsize
-
-    def _render_change(self, aline, bline, anum=None, bnum=None, astyle=None, 
bstyle=None):
-        astyle = (' class="%s"' % astyle) if astyle else ''
-        bstyle = (' class="%s"' % bstyle) if bstyle else ''
-        anum = anum if anum is not None else ''
-        bnum = bnum if bnum is not None else ''
-        return self.line_tmpl % (anum, astyle, aline, bnum, bstyle, bline)
-
-    def _preprocess(self, line):
-        if not line:
-            return line
-        line = h.really_unicode(line).expandtabs(self._tabsize)
-        return line.replace('&', '&amp;').replace('<', '&lt;').replace('>', 
'&gt;')
-
-    def _replace_marks(self, line):
-        # if entire line was added/removed/changed
-        # we strip first mark and return corresponding flag
-        # this is needed to be able to highlight entire <td> in the table,
-        # rather then highlighting only chunk inside the <span>
-        flag = ''
-        if line.startswith('\0+') and line.endswith('\1'):
-            line = line.lstrip('\0+').rstrip('\1')
-            flag = 'diff-add'
-        elif line.startswith('\0-') and line.endswith('\1'):
-            line = line.lstrip('\0-').rstrip('\1')
-            flag = 'diff-rem'
-        elif '\0^' in line or '\0+' in line or '\0-' in line:
-            flag = 'diff-chg'
-
-        # replace all other marks with <span>'s
-        span = '<span class="%s">'
-        line = line.replace('\0+', span % 'diff-add')
-        line = line.replace('\0-', span % 'diff-rem')
-        line = line.replace('\0^', span % 'diff-chg')
-        line = line.replace('\1', '</span>')
-        return line, flag
-
-    def _make_line(self, diff):
-        aline, bline, changed = diff
-        if changed is None:
-            # context separation
-            return self._render_change('...', '...', '', '', 'diff-gap', 
'diff-gap')
-        anum, aline = aline
-        bnum, bline = bline
-        aline = self._preprocess(aline)
-        bline = self._preprocess(bline)
-        if not changed:
-            # line doesn't changed - render with default style
-            return self._render_change(aline, bline, anum, bnum)
-
-        aline, aflag = self._replace_marks(aline)
-        bline, bflag = self._replace_marks(bline)
-        return self._render_change(aline, bline, anum, bnum, aflag, bflag)
-
-    def make_table(self, a, b, adesc=None, bdesc=None, context=5):
+    table_tmpl_end = '</table>'
+
+    def __init__(self, adesc: str, bdesc: str):
+        self.adesc = adesc
+        self.bdesc = bdesc
+
+    def _spit(self, content):
+        self.out += content + '\n'
+
+    def run(self, diff_result: Iterable[LineChange | None]):
+        self.out = ''
+        super().run(diff_result)
+        return self.out
+
+    def visit_row(self, line_change: LineChange | None):
+        if line_change is None:
+            self._spit_context_marker()
+            self._spit_context_marker()
+        elif not line_change.changed:
+            self._spit_unchanged_side(line_change.left_no, line_change.left)
+            self._spit_unchanged_side(line_change.right_no, line_change.right)
+        else:
+            whole_line_changed = is_single_chg(line_change.left) and 
is_single_chg(line_change.right)
+            self._spit_changed_side('diff-rem' if whole_line_changed else 
'diff-chg',
+                                    line_change.left_no, line_change.left)
+            self._spit_changed_side('diff-add' if whole_line_changed else 
'diff-chg',
+                                    line_change.right_no, line_change.right)
+
+    @contextlib.contextmanager
+    def wrap_row(self, line_change):
+        self._spit('<tr>')
+        yield
+        self._spit('</tr>')
+
+    @contextlib.contextmanager
+    def wrap_result(self, sxs_result):
+        self._spit(self.table_tmpl_start % (self.adesc, self.bdesc))
+        yield
+        self._spit(self.table_tmpl_end)
+
+    def _spit_context_marker(self):
+        context = {
+            'mode': 'diff-gap',
+            'lineno': '',
+            'code': '...',
+        }
+        self._spit_side_from_context(context)
+
+    def _spit_unchanged_side(self, lineno, holder):
+        context = {
+            'mode': 'context',
+            'lineno': lineno,
+            'code': html.escape(str(holder)),
+        }
+        self._spit_side_from_context(context)
+
+    def _spit_changed_side(self, mode, lineno, holder):
+        if not holder:
+            self._spit_side_from_context({'lineno': '', 'code': '', 'mode': 
''})
+            return
+
+        bits = []
+        for elem in holder.elements:
+            piece = html.escape(str(elem))
+            if elem.is_changed and not is_single_chg(holder):
+                if elem.flag == diff_match_patch.DIFF_INSERT:
+                    clss = 'diff-add'
+                elif elem.flag == diff_match_patch.DIFF_DELETE:
+                    clss = 'diff-rem'
+                else:
+                    clss = ''
+                bits.append(f'<span class="{clss}">{piece}</span>')
+            else:
+                bits.append(piece)
+        code = ''.join(bits)
+
+        context = {
+            'mode': mode,
+            'lineno': lineno,
+            'code': code,
+        }
+        self._spit_side_from_context(context)
+
+    def _spit_side_from_context(self, context):
+        self._spit(f'  <td class="lineno">{context["lineno"]}</td>')
+        mode = context['mode']
+        clss = (' class="%s"' % mode) if mode not in ['context', ''] else ''
+        self._spit(f'  <td{clss}><pre>{context["code"]}</pre></td>')
+
+
+def sxsdiff_cleanup_trailing(input_lines: Iterable[LineChange]) -> 
Iterable[LineChange]:
+    cleaned_lines = list(input_lines)
+    if not cleaned_lines:
+        return []
+    lastline = cleaned_lines[-1]
+    if (
+            (lastline.left.elements == [emptyUnchangedElem] and 
lastline.right.elements == [emptyUnchangedElem]) or
+            (lastline.left.elements == [] and lastline.right.elements == 
[emptyAddElem]) or
+            (lastline.left.elements == [emptyDelElem] and 
lastline.right.elements == [])
+    ):
+        cleaned_lines.pop()
+    return cleaned_lines
+
+
+def update_with_context_chunks(input_lines: Iterable[LineChange], num_context: 
int) -> Iterable[LineChange | None]:
+    """
+    Remove identical lines, except several around any changed lines
+    """
+    lines_with_flag: list[list] = []  # [LineChange, bool] in each item
+    changed_line_idxs: list[int] = []
+
+    for current_line, line in enumerate(input_lines):
+        lines_with_flag.append([line, False])
+        if line.changed:
+            changed_line_idxs.append(current_line)
+
+    for changed_line_idx in changed_line_idxs:
+        for context_idx in range(changed_line_idx-num_context, 
changed_line_idx+num_context+1):
+            try:
+                lines_with_flag[context_idx][1] = True
+            except IndexError:
+                pass
+
+    output_lines: list[LineChange | None] = []
+    prev_was_shown = True
+    for line, show in lines_with_flag:
+        if show:
+            if not prev_was_shown:
+                output_lines.append(None)
+            output_lines.append(line)
+
+        prev_was_shown = show
+
+    return output_lines
+
+
+class HtmlSideBySideDiff:
+
+    def make_table(self, a: list[str], b: list[str], adesc=None, bdesc=None, 
context=5, tabsize=4) -> str:
         """Make html table that displays side-by-side diff
 
         Arguments:
-         - a -- list of text lines to be compared to b
-         - b -- list of text lines to be compared to a
+         - a -- list of text lines with \n, to be compared to b
+         - b -- list of text lines with \n, to be compared to a
          - adesc -- description of the 'a' lines (e.g. filename)
          - bdesc -- description of the 'b' lines (e.g. filename)
          - context -- number of context lines to display
 
-        Uses difflib._mdiff function to generate diff.
+        Uses sxsdiff/diff_match_patch which has much better performance than 
stdlib htmldiff/_mdiff
+            https://github.com/python/cpython/issues/51180
         """
         adesc = six.ensure_text(adesc) or ''
         bdesc = six.ensure_text(bdesc) or ''
-        diff = difflib._mdiff(a, b, context=context)
-        lines = [self._make_line(d) for d in diff]
-        return h.really_unicode(
-            self.table_tmpl % (adesc, bdesc, '\n'.join(lines)))
+
+        atext = ''.join(a).expandtabs(tabsize)
+        btext = ''.join(b).expandtabs(tabsize)
+
+        sxsdiff_result = sxsdiff.DiffCalculator().run(atext, btext)
+        sxsdiff_cleaned = sxsdiff_cleanup_trailing(sxsdiff_result)
+        sxsdiff_with_contexts = update_with_context_chunks(sxsdiff_cleaned, 
context)
+
+        return SxsOutputGenerator(adesc, bdesc).run(sxsdiff_with_contexts)
diff --git a/Allura/allura/tests/test_diff.py b/Allura/allura/tests/test_diff.py
index 46a9d8f21..153f37798 100644
--- a/Allura/allura/tests/test_diff.py
+++ b/Allura/allura/tests/test_diff.py
@@ -22,6 +22,10 @@ from allura.lib.diff import HtmlSideBySideDiff
 
 class TestHtmlSideBySideDiff:
 
+    # these tests are representative but not complete
+    # there are a lot of different nuanced situations (e.g. trailing blanks 
etc)
+    # and manually testing after changes is recommended too
+
     def test_make_table(self):
         a = 'line A1\nline 2\nline 3'.splitlines(keepends=True)
         b = 'line 1B\nline 
X\ntotalchg\n\tnew<script>&"'.splitlines(keepends=True)
@@ -35,27 +39,27 @@ class TestHtmlSideBySideDiff:
   </thead>
 <tr>
   <td class="lineno">1</td>
-  <td class="diff-chg"><pre>line <span class="diff-rem">A</span>1\n</pre></td>
+  <td class="diff-chg"><pre>line <span class="diff-rem">A</span>1</pre></td>
   <td class="lineno">1</td>
-  <td class="diff-chg"><pre>line 1<span class="diff-add">B</span>\n</pre></td>
+  <td class="diff-chg"><pre>line 1<span class="diff-add">B</span></pre></td>
 </tr>
 <tr>
   <td class="lineno">2</td>
-  <td class="diff-chg"><pre>line <span class="diff-chg">2</span>\n</pre></td>
+  <td class="diff-chg"><pre>line <span class="diff-rem">2</span></pre></td>
   <td class="lineno">2</td>
-  <td class="diff-chg"><pre>line <span class="diff-chg">X</span>\n</pre></td>
+  <td class="diff-chg"><pre>line <span class="diff-add">X</span></pre></td>
 </tr>
 <tr>
   <td class="lineno">3</td>
   <td class="diff-rem"><pre>line 3</pre></td>
   <td class="lineno">3</td>
-  <td class="diff-add"><pre>totalchg\n</pre></td>
+  <td class="diff-add"><pre>totalchg</pre></td>
 </tr>
 <tr>
   <td class="lineno"></td>
-  <td><pre>\n</pre></td>
+  <td><pre></pre></td>
   <td class="lineno">4</td>
-  <td class="diff-add"><pre>  new&lt;script&gt;&amp;"</pre></td>
+  <td class="diff-add"><pre>    new&lt;script&gt;&amp;&quot;</pre></td>
 </tr>
 </table>
 '''.strip()
@@ -81,11 +85,12 @@ class TestHtmlSideBySideDiff:
 </tr>
 <tr>
   <td class="lineno">2</td>
-  <td><pre>line 2\n</pre></td>
+  <td><pre>line 2</pre></td>
   <td class="lineno">2</td>
-  <td><pre>line 2\n</pre></td>
+  <td><pre>line 2</pre></td>
 </tr>
-'''.strip()
+'''.strip()  # more lines follow in full output
+
         html = HtmlSideBySideDiff().make_table(a, b, 'file a', 'file 
b').strip()
         assert start in html
 
@@ -95,9 +100,9 @@ class TestHtmlSideBySideDiff:
         middle = '''\
 <tr>
   <td class="lineno">6</td>
-  <td><pre>line 6\n</pre></td>
+  <td><pre>line 6</pre></td>
   <td class="lineno">6</td>
-  <td><pre>line 6\n</pre></td>
+  <td><pre>line 6</pre></td>
 </tr>
 <tr>
   <td class="lineno"></td>
@@ -107,9 +112,9 @@ class TestHtmlSideBySideDiff:
 </tr>
 <tr>
   <td class="lineno">8</td>
-  <td><pre>line 8\n</pre></td>
+  <td><pre>line 8</pre></td>
   <td class="lineno">8</td>
-  <td><pre>line 8\n</pre></td>
+  <td><pre>line 8</pre></td>
 </tr>
 '''.strip()
         html = HtmlSideBySideDiff().make_table(a, b, 'file a', 'file 
b').strip()
diff --git a/ForgeGit/forgegit/tests/functional/test_controllers.py 
b/ForgeGit/forgegit/tests/functional/test_controllers.py
index 24021ed40..cb5c7bba7 100644
--- a/ForgeGit/forgegit/tests/functional/test_controllers.py
+++ b/ForgeGit/forgegit/tests/functional/test_controllers.py
@@ -334,14 +334,6 @@ class TestRootController(_TestCase):
         ci = self._get_ci(repo='/p/test/weird-chars/')
         resp = self.app.get(h.urlquote(ci + 'tree/привіт.txt') + 
'?diff=407950e8fba4dbc108ffbce0128ed1085c52cfd7')
         diffhtml = str(resp.html.select_one('.diffbrowser'))
-        print("HTML", diffhtml)
-        print("=============================")
-        print(textwrap.dedent('''\
-                    <span class="gd">--- a/привіт.txt</span>
-                    <span class="gi">+++ b/привіт.txt</span>
-                    <span class="gu">@@ -1 +1,2 @@</span>
-                    <span class="w"> </span>Привіт!
-                    <span class="gi">+Which means Hello!</span>'''))
         assert (textwrap.dedent('''\
                     <span class="gd">--- a/привіт.txt</span>
                     <span class="gi">+++ b/привіт.txt</span>
@@ -361,20 +353,18 @@ class TestRootController(_TestCase):
                     </thead>
                     <tr>
                     <td class="lineno">1</td>
-                    <td><pre>Привіт!
-                    </pre></td>
+                    <td><pre>Привіт!</pre></td>
                     <td class="lineno">1</td>
-                    <td><pre>Привіт!
-                    </pre></td>
+                    <td><pre>Привіт!</pre></td>
                     </tr>
                     <tr>
-                    <td class="lineno"></td>
-                    <td><pre>
-                    </pre></td>
                     <td class="lineno">2</td>
-                    <td class="diff-add"><pre>Which means Hello!
-                    </pre></td>''') in
+                    <td class="diff-rem"><pre></pre></td>
+                    <td class="lineno">2</td>
+                    <td class="diff-add"><pre>Which means 
Hello!</pre></td>''') in
                 diffhtml)
+        # FIXME: we really shouldn't have: class="lineno">2 and 
class="diff-rem"><pre></pre>
+        # but I couldn't get sxsdiff to handle it correctly (maybe a bug or 
subtle issue with trailing \n at end)
 
     def test_diff_view_mode(self):
         ci = self._get_ci()
diff --git a/requirements.in b/requirements.in
index fe67af3b5..fd162d5c3 100644
--- a/requirements.in
+++ b/requirements.in
@@ -41,6 +41,7 @@ requests-oauthlib
 # for taskd proc name switching
 setproctitle
 six
+sxsdiff
 TimerMiddleware
 TurboGears2
 WebHelpers2
diff --git a/requirements.txt b/requirements.txt
index 3e49f9a8e..fc9e8f4a5 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -30,6 +30,8 @@ cryptography==41.0.1
     # via -r requirements.in
 decorator==5.1.1
     # via -r requirements.in
+diff-match-patch==20230430
+    # via sxsdiff
 docutils==0.19
     # via pypeline
 easywidgets==0.4.1
@@ -196,11 +198,14 @@ six==1.16.0
     #   paste
     #   pastescript
     #   python-dateutil
+    #   sxsdiff
     #   webhelpers2
 smmap==5.0.0
     # via gitdb
 soupsieve==2.4.1
     # via beautifulsoup4
+sxsdiff==0.3.0
+    # via -r requirements.in
 termcolor==2.2.0
     # via pytest-sugar
 testfixtures==7.1.0

Reply via email to