This is an automated email from the ASF dual-hosted git repository. brondsem pushed a commit to branch db/8515 in repository https://gitbox.apache.org/repos/asf/allura.git
commit 31af18a0edd8cc8d2cc2abe2046f114deec3f495 Author: Dave Brondsema <[email protected]> AuthorDate: Fri Jul 7 18:00:10 2023 -0400 [#8515] convert to sxsdiff impl --- Allura/allura/lib/diff.py | 255 ++++++++++++++------- Allura/allura/tests/test_diff.py | 33 +-- .../forgegit/tests/functional/test_controllers.py | 24 +- requirements.in | 1 + requirements.txt | 5 + 5 files changed, 209 insertions(+), 109 deletions(-) diff --git a/Allura/allura/lib/diff.py b/Allura/allura/lib/diff.py index 77cffdf26..000ecce01 100644 --- a/Allura/allura/lib/diff.py +++ b/Allura/allura/lib/diff.py @@ -14,17 +14,38 @@ # KIND, either express or implied. See the License for the # specific language governing permissions and limitations # under the License. +from __future__ import annotations -import difflib +import html +import contextlib +import logging +from collections.abc import Iterable, Generator +import sxsdiff +from diff_match_patch import diff_match_patch import six +from sxsdiff.calculator import LineChange, ElementsHolder, PlainElement, AdditionElement, DeletionElement -from allura.lib import helpers as h +log = logging.getLogger(__name__) +# There are a few cases where we get empty trailing '' elements. Maybe a bug within sxsdiff.calculator? +# we work around them, using these constants in a few places below +emptyUnchangedElem = PlainElement('') +spaceUnchangedElem = PlainElement(' ') +emptyAddElem = AdditionElement('') +emptyDelElem = DeletionElement('') + + +def is_single_chg(chg_parts: ElementsHolder) -> bool: + return len(chg_parts) <= 1 or ( + len(chg_parts) == 2 and chg_parts.elements[-1] in [emptyUnchangedElem, spaceUnchangedElem] + ) -class HtmlSideBySideDiff: - table_tmpl = ''' +class SxsOutputGenerator(sxsdiff.BaseGenerator): + # based on sxsdiff.generators.github.GitHubStyledGenerator + + table_tmpl_start = ''' <table class="side-by-side-diff"> <thead> <th class="lineno"></th> @@ -32,89 +53,167 @@ class HtmlSideBySideDiff: <th class="lineno"></th> <th>%s</th> </thead> -%s -</table> '''.strip() - line_tmpl = ''' -<tr> - <td class="lineno">%s</td> - <td%s><pre>%s</pre></td> - <td class="lineno">%s</td> - <td%s><pre>%s</pre></td> -</tr>'''.strip() - - def __init__(self, tabsize=4): - self._tabsize = tabsize - - def _render_change(self, aline, bline, anum=None, bnum=None, astyle=None, bstyle=None): - astyle = (' class="%s"' % astyle) if astyle else '' - bstyle = (' class="%s"' % bstyle) if bstyle else '' - anum = anum if anum is not None else '' - bnum = bnum if bnum is not None else '' - return self.line_tmpl % (anum, astyle, aline, bnum, bstyle, bline) - - def _preprocess(self, line): - if not line: - return line - line = h.really_unicode(line).expandtabs(self._tabsize) - return line.replace('&', '&').replace('<', '<').replace('>', '>') - - def _replace_marks(self, line): - # if entire line was added/removed/changed - # we strip first mark and return corresponding flag - # this is needed to be able to highlight entire <td> in the table, - # rather then highlighting only chunk inside the <span> - flag = '' - if line.startswith('\0+') and line.endswith('\1'): - line = line.lstrip('\0+').rstrip('\1') - flag = 'diff-add' - elif line.startswith('\0-') and line.endswith('\1'): - line = line.lstrip('\0-').rstrip('\1') - flag = 'diff-rem' - elif '\0^' in line or '\0+' in line or '\0-' in line: - flag = 'diff-chg' - - # replace all other marks with <span>'s - span = '<span class="%s">' - line = line.replace('\0+', span % 'diff-add') - line = line.replace('\0-', span % 'diff-rem') - line = line.replace('\0^', span % 'diff-chg') - line = line.replace('\1', '</span>') - return line, flag - - def _make_line(self, diff): - aline, bline, changed = diff - if changed is None: - # context separation - return self._render_change('...', '...', '', '', 'diff-gap', 'diff-gap') - anum, aline = aline - bnum, bline = bline - aline = self._preprocess(aline) - bline = self._preprocess(bline) - if not changed: - # line doesn't changed - render with default style - return self._render_change(aline, bline, anum, bnum) - - aline, aflag = self._replace_marks(aline) - bline, bflag = self._replace_marks(bline) - return self._render_change(aline, bline, anum, bnum, aflag, bflag) - - def make_table(self, a, b, adesc=None, bdesc=None, context=5): + table_tmpl_end = '</table>' + + def __init__(self, adesc: str, bdesc: str): + self.adesc = adesc + self.bdesc = bdesc + + def _spit(self, content): + self.out += content + '\n' + + def run(self, diff_result: Iterable[LineChange | None]): + self.out = '' + super().run(diff_result) + return self.out + + def visit_row(self, line_change: LineChange | None): + if line_change is None: + self._spit_context_marker() + self._spit_context_marker() + elif not line_change.changed: + self._spit_unchanged_side(line_change.left_no, line_change.left) + self._spit_unchanged_side(line_change.right_no, line_change.right) + else: + whole_line_changed = is_single_chg(line_change.left) and is_single_chg(line_change.right) + self._spit_changed_side('diff-rem' if whole_line_changed else 'diff-chg', + line_change.left_no, line_change.left) + self._spit_changed_side('diff-add' if whole_line_changed else 'diff-chg', + line_change.right_no, line_change.right) + + @contextlib.contextmanager + def wrap_row(self, line_change): + self._spit('<tr>') + yield + self._spit('</tr>') + + @contextlib.contextmanager + def wrap_result(self, sxs_result): + self._spit(self.table_tmpl_start % (self.adesc, self.bdesc)) + yield + self._spit(self.table_tmpl_end) + + def _spit_context_marker(self): + context = { + 'mode': 'diff-gap', + 'lineno': '', + 'code': '...', + } + self._spit_side_from_context(context) + + def _spit_unchanged_side(self, lineno, holder): + context = { + 'mode': 'context', + 'lineno': lineno, + 'code': html.escape(str(holder)), + } + self._spit_side_from_context(context) + + def _spit_changed_side(self, mode, lineno, holder): + if not holder: + self._spit_side_from_context({'lineno': '', 'code': '', 'mode': ''}) + return + + bits = [] + for elem in holder.elements: + piece = html.escape(str(elem)) + if elem.is_changed and not is_single_chg(holder): + if elem.flag == diff_match_patch.DIFF_INSERT: + clss = 'diff-add' + elif elem.flag == diff_match_patch.DIFF_DELETE: + clss = 'diff-rem' + else: + clss = '' + bits.append(f'<span class="{clss}">{piece}</span>') + else: + bits.append(piece) + code = ''.join(bits) + + context = { + 'mode': mode, + 'lineno': lineno, + 'code': code, + } + self._spit_side_from_context(context) + + def _spit_side_from_context(self, context): + self._spit(f' <td class="lineno">{context["lineno"]}</td>') + mode = context['mode'] + clss = (' class="%s"' % mode) if mode not in ['context', ''] else '' + self._spit(f' <td{clss}><pre>{context["code"]}</pre></td>') + + +def sxsdiff_cleanup_trailing(input_lines: Iterable[LineChange]) -> Iterable[LineChange]: + cleaned_lines = list(input_lines) + if not cleaned_lines: + return [] + lastline = cleaned_lines[-1] + if ( + (lastline.left.elements == [emptyUnchangedElem] and lastline.right.elements == [emptyUnchangedElem]) or + (lastline.left.elements == [] and lastline.right.elements == [emptyAddElem]) or + (lastline.left.elements == [emptyDelElem] and lastline.right.elements == []) + ): + cleaned_lines.pop() + return cleaned_lines + + +def update_with_context_chunks(input_lines: Iterable[LineChange], num_context: int) -> Iterable[LineChange | None]: + """ + Remove identical lines, except several around any changed lines + """ + lines_with_flag: list[list] = [] # [LineChange, bool] in each item + changed_line_idxs: list[int] = [] + + for current_line, line in enumerate(input_lines): + lines_with_flag.append([line, False]) + if line.changed: + changed_line_idxs.append(current_line) + + for changed_line_idx in changed_line_idxs: + for context_idx in range(changed_line_idx-num_context, changed_line_idx+num_context+1): + try: + lines_with_flag[context_idx][1] = True + except IndexError: + pass + + output_lines: list[LineChange | None] = [] + prev_was_shown = True + for line, show in lines_with_flag: + if show: + if not prev_was_shown: + output_lines.append(None) + output_lines.append(line) + + prev_was_shown = show + + return output_lines + + +class HtmlSideBySideDiff: + + def make_table(self, a: list[str], b: list[str], adesc=None, bdesc=None, context=5, tabsize=4) -> str: """Make html table that displays side-by-side diff Arguments: - - a -- list of text lines to be compared to b - - b -- list of text lines to be compared to a + - a -- list of text lines with \n, to be compared to b + - b -- list of text lines with \n, to be compared to a - adesc -- description of the 'a' lines (e.g. filename) - bdesc -- description of the 'b' lines (e.g. filename) - context -- number of context lines to display - Uses difflib._mdiff function to generate diff. + Uses sxsdiff/diff_match_patch which has much better performance than stdlib htmldiff/_mdiff + https://github.com/python/cpython/issues/51180 """ adesc = six.ensure_text(adesc) or '' bdesc = six.ensure_text(bdesc) or '' - diff = difflib._mdiff(a, b, context=context) - lines = [self._make_line(d) for d in diff] - return h.really_unicode( - self.table_tmpl % (adesc, bdesc, '\n'.join(lines))) + + atext = ''.join(a).expandtabs(tabsize) + btext = ''.join(b).expandtabs(tabsize) + + sxsdiff_result = sxsdiff.DiffCalculator().run(atext, btext) + sxsdiff_cleaned = sxsdiff_cleanup_trailing(sxsdiff_result) + sxsdiff_with_contexts = update_with_context_chunks(sxsdiff_cleaned, context) + + return SxsOutputGenerator(adesc, bdesc).run(sxsdiff_with_contexts) diff --git a/Allura/allura/tests/test_diff.py b/Allura/allura/tests/test_diff.py index 46a9d8f21..153f37798 100644 --- a/Allura/allura/tests/test_diff.py +++ b/Allura/allura/tests/test_diff.py @@ -22,6 +22,10 @@ from allura.lib.diff import HtmlSideBySideDiff class TestHtmlSideBySideDiff: + # these tests are representative but not complete + # there are a lot of different nuanced situations (e.g. trailing blanks etc) + # and manually testing after changes is recommended too + def test_make_table(self): a = 'line A1\nline 2\nline 3'.splitlines(keepends=True) b = 'line 1B\nline X\ntotalchg\n\tnew<script>&"'.splitlines(keepends=True) @@ -35,27 +39,27 @@ class TestHtmlSideBySideDiff: </thead> <tr> <td class="lineno">1</td> - <td class="diff-chg"><pre>line <span class="diff-rem">A</span>1\n</pre></td> + <td class="diff-chg"><pre>line <span class="diff-rem">A</span>1</pre></td> <td class="lineno">1</td> - <td class="diff-chg"><pre>line 1<span class="diff-add">B</span>\n</pre></td> + <td class="diff-chg"><pre>line 1<span class="diff-add">B</span></pre></td> </tr> <tr> <td class="lineno">2</td> - <td class="diff-chg"><pre>line <span class="diff-chg">2</span>\n</pre></td> + <td class="diff-chg"><pre>line <span class="diff-rem">2</span></pre></td> <td class="lineno">2</td> - <td class="diff-chg"><pre>line <span class="diff-chg">X</span>\n</pre></td> + <td class="diff-chg"><pre>line <span class="diff-add">X</span></pre></td> </tr> <tr> <td class="lineno">3</td> <td class="diff-rem"><pre>line 3</pre></td> <td class="lineno">3</td> - <td class="diff-add"><pre>totalchg\n</pre></td> + <td class="diff-add"><pre>totalchg</pre></td> </tr> <tr> <td class="lineno"></td> - <td><pre>\n</pre></td> + <td><pre></pre></td> <td class="lineno">4</td> - <td class="diff-add"><pre> new<script>&"</pre></td> + <td class="diff-add"><pre> new<script>&"</pre></td> </tr> </table> '''.strip() @@ -81,11 +85,12 @@ class TestHtmlSideBySideDiff: </tr> <tr> <td class="lineno">2</td> - <td><pre>line 2\n</pre></td> + <td><pre>line 2</pre></td> <td class="lineno">2</td> - <td><pre>line 2\n</pre></td> + <td><pre>line 2</pre></td> </tr> -'''.strip() +'''.strip() # more lines follow in full output + html = HtmlSideBySideDiff().make_table(a, b, 'file a', 'file b').strip() assert start in html @@ -95,9 +100,9 @@ class TestHtmlSideBySideDiff: middle = '''\ <tr> <td class="lineno">6</td> - <td><pre>line 6\n</pre></td> + <td><pre>line 6</pre></td> <td class="lineno">6</td> - <td><pre>line 6\n</pre></td> + <td><pre>line 6</pre></td> </tr> <tr> <td class="lineno"></td> @@ -107,9 +112,9 @@ class TestHtmlSideBySideDiff: </tr> <tr> <td class="lineno">8</td> - <td><pre>line 8\n</pre></td> + <td><pre>line 8</pre></td> <td class="lineno">8</td> - <td><pre>line 8\n</pre></td> + <td><pre>line 8</pre></td> </tr> '''.strip() html = HtmlSideBySideDiff().make_table(a, b, 'file a', 'file b').strip() diff --git a/ForgeGit/forgegit/tests/functional/test_controllers.py b/ForgeGit/forgegit/tests/functional/test_controllers.py index 24021ed40..cb5c7bba7 100644 --- a/ForgeGit/forgegit/tests/functional/test_controllers.py +++ b/ForgeGit/forgegit/tests/functional/test_controllers.py @@ -334,14 +334,6 @@ class TestRootController(_TestCase): ci = self._get_ci(repo='/p/test/weird-chars/') resp = self.app.get(h.urlquote(ci + 'tree/привіт.txt') + '?diff=407950e8fba4dbc108ffbce0128ed1085c52cfd7') diffhtml = str(resp.html.select_one('.diffbrowser')) - print("HTML", diffhtml) - print("=============================") - print(textwrap.dedent('''\ - <span class="gd">--- a/привіт.txt</span> - <span class="gi">+++ b/привіт.txt</span> - <span class="gu">@@ -1 +1,2 @@</span> - <span class="w"> </span>Привіт! - <span class="gi">+Which means Hello!</span>''')) assert (textwrap.dedent('''\ <span class="gd">--- a/привіт.txt</span> <span class="gi">+++ b/привіт.txt</span> @@ -361,20 +353,18 @@ class TestRootController(_TestCase): </thead> <tr> <td class="lineno">1</td> - <td><pre>Привіт! - </pre></td> + <td><pre>Привіт!</pre></td> <td class="lineno">1</td> - <td><pre>Привіт! - </pre></td> + <td><pre>Привіт!</pre></td> </tr> <tr> - <td class="lineno"></td> - <td><pre> - </pre></td> <td class="lineno">2</td> - <td class="diff-add"><pre>Which means Hello! - </pre></td>''') in + <td class="diff-rem"><pre></pre></td> + <td class="lineno">2</td> + <td class="diff-add"><pre>Which means Hello!</pre></td>''') in diffhtml) + # FIXME: we really shouldn't have: class="lineno">2 and class="diff-rem"><pre></pre> + # but I couldn't get sxsdiff to handle it correctly (maybe a bug or subtle issue with trailing \n at end) def test_diff_view_mode(self): ci = self._get_ci() diff --git a/requirements.in b/requirements.in index fe67af3b5..fd162d5c3 100644 --- a/requirements.in +++ b/requirements.in @@ -41,6 +41,7 @@ requests-oauthlib # for taskd proc name switching setproctitle six +sxsdiff TimerMiddleware TurboGears2 WebHelpers2 diff --git a/requirements.txt b/requirements.txt index 3e49f9a8e..fc9e8f4a5 100644 --- a/requirements.txt +++ b/requirements.txt @@ -30,6 +30,8 @@ cryptography==41.0.1 # via -r requirements.in decorator==5.1.1 # via -r requirements.in +diff-match-patch==20230430 + # via sxsdiff docutils==0.19 # via pypeline easywidgets==0.4.1 @@ -196,11 +198,14 @@ six==1.16.0 # paste # pastescript # python-dateutil + # sxsdiff # webhelpers2 smmap==5.0.0 # via gitdb soupsieve==2.4.1 # via beautifulsoup4 +sxsdiff==0.3.0 + # via -r requirements.in termcolor==2.2.0 # via pytest-sugar testfixtures==7.1.0
