I believe the attached patch would provide the requested functionality.
From 0ae6d16037cc4912e5a165ee050e31e99402c912 Mon Sep 17 00:00:00 2001 From: Maria Glukhova <siamez...@gmail.com> Date: Sat, 24 Dec 2016 12:29:57 +0200 Subject: [PATCH] Add detection of order-only difference in plain text format.
Detect if the text files' contents differ only in line ordering, and give an appropriate comment. Closes: #848049 --- diffoscope/comparators/text.py | 14 ++++++++++++++ tests/comparators/test_text.py | 8 ++++++++ tests/data/text_order1 | 7 +++++++ tests/data/text_order2 | 7 +++++++ tests/data/text_order_expected_diff | 11 +++++++++++ 5 files changed, 47 insertions(+) create mode 100644 tests/data/text_order1 create mode 100644 tests/data/text_order2 create mode 100644 tests/data/text_order_expected_diff diff --git a/diffoscope/comparators/text.py b/diffoscope/comparators/text.py index 909ff98..f7f423f 100644 --- a/diffoscope/comparators/text.py +++ b/diffoscope/comparators/text.py @@ -24,6 +24,17 @@ from diffoscope.difference import Difference from diffoscope.comparators.binary import File +def order_only_difference(unified_diff): + diff_lines = unified_diff.splitlines() + added_lines = [line[1:] for line in diff_lines if line.startswith('+')] + removed_lines = [line[1:] for line in diff_lines if line.startswith('-')] + # Faster check: does number of lines match? + if len(added_lines) != len(removed_lines): + return False + # Counter stores line and number of its occurrences. + return sorted(added_lines) == sorted(removed_lines) + + class TextFile(File): RE_FILE_TYPE = re.compile(r'\btext\b') @@ -44,6 +55,9 @@ class TextFile(File): with codecs.open(self.path, 'r', encoding=my_encoding) as my_content, \ codecs.open(other.path, 'r', encoding=other_encoding) as other_content: difference = Difference.from_text_readers(my_content, other_content, self.name, other.name, source) + # Check if difference is only in line order. + if difference and order_only_difference(difference.unified_diff): + difference.add_comment("ordering differences only") if my_encoding != other_encoding: if difference is None: difference = Difference(None, self.path, other.path, source) diff --git a/tests/comparators/test_text.py b/tests/comparators/test_text.py index 9892826..afa0716 100644 --- a/tests/comparators/test_text.py +++ b/tests/comparators/test_text.py @@ -65,3 +65,11 @@ def test_difference_between_iso88591_and_unicode_only(iso8859, tmpdir): def test_compare_non_existing(monkeypatch, ascii1): assert_non_existing(monkeypatch, ascii1, has_null_source=False, has_details=False) + +text_order1 = load_fixture(data('text_order1')) +text_order2 = load_fixture(data('text_order2')) + +def test_ordering_differences(text_order1, text_order2): + difference = text_order1.compare(text_order2) + assert difference.comments == ['ordering differences only'] + assert difference.unified_diff == open(data('text_order_expected_diff')).read() diff --git a/tests/data/text_order1 b/tests/data/text_order1 new file mode 100644 index 0000000..9f85b81 --- /dev/null +++ b/tests/data/text_order1 @@ -0,0 +1,7 @@ +These +lines +follow +in +some +order +. diff --git a/tests/data/text_order2 b/tests/data/text_order2 new file mode 100644 index 0000000..7890b50 --- /dev/null +++ b/tests/data/text_order2 @@ -0,0 +1,7 @@ +These +some +order +follow +in +lines +. diff --git a/tests/data/text_order_expected_diff b/tests/data/text_order_expected_diff new file mode 100644 index 0000000..2d8b915 --- /dev/null +++ b/tests/data/text_order_expected_diff @@ -0,0 +1,11 @@ +@@ -1,7 +1,7 @@ + These +-lines +-follow +-in + some + order ++follow ++in ++lines + . -- 2.11.0
_______________________________________________ Reproducible-builds mailing list Reproducible-builds@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/reproducible-builds