commit python-unidiff for openSUSE:Factory

root Tue, 09 Jun 2020 15:53:04 -0700

Hello community,

here is the log from the commit of package python-unidiff for openSUSE:Factory 
checked in at 2020-06-10 00:52:00
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-unidiff (Old)
 and      /work/SRC/openSUSE:Factory/.python-unidiff.new.3606 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-unidiff"

Wed Jun 10 00:52:00 2020 rev:6 rq:809785 version:0.6.0

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-unidiff/python-unidiff.changes    
2020-05-19 14:48:35.760084953 +0200
+++ /work/SRC/openSUSE:Factory/.python-unidiff.new.3606/python-unidiff.changes  
2020-06-10 00:52:07.991527895 +0200
@@ -1,0 +2,10 @@
+Wed May 27 18:05:48 UTC 2020 - Martin Liška <[email protected]>
+
+- Update to version 0.6.0
+  * Updated PatchSet constructor to accept an optional (default to False)
+    metadata_only parameter to only keep diff metadata information without
+    the diff text data (better performance).
+  * Identify and track changed binary files.
+  * Added support for git rename syntax.
+
+-------------------------------------------------------------------

Old:
----
  v0.5.5.tar.gz

New:
----
  v0.6.0.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-unidiff.spec ++++++
--- /var/tmp/diff_new_pack.wEbg6a/_old  2020-06-10 00:52:09.183530979 +0200
+++ /var/tmp/diff_new_pack.wEbg6a/_new  2020-06-10 00:52:09.183530979 +0200
@@ -18,7 +18,7 @@
 
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 Name:           python-unidiff
-Version:        0.5.5
+Version:        0.6.0
 Release:        0
 Summary:        Unified diff parsing/metadata extraction library
 License:        MIT

++++++ v0.5.5.tar.gz -> v0.6.0.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/python-unidiff-0.5.5/.travis.yml 
new/python-unidiff-0.6.0/.travis.yml
--- old/python-unidiff-0.5.5/.travis.yml        2018-01-03 22:14:18.000000000 
+0100
+++ new/python-unidiff-0.6.0/.travis.yml        2020-05-08 00:16:37.000000000 
+0200
@@ -1,9 +1,9 @@
 language: python
 python:
   - "2.7"
-  - "3.2"
-  - "3.3"
   - "3.4"
   - "3.5"
   - "3.6"
+  - "3.7"
+  - "3.8"
 script: ./run_tests.sh
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/python-unidiff-0.5.5/AUTHORS 
new/python-unidiff-0.6.0/AUTHORS
--- old/python-unidiff-0.5.5/AUTHORS    2018-01-03 22:14:18.000000000 +0100
+++ new/python-unidiff-0.6.0/AUTHORS    2020-05-08 00:16:37.000000000 +0200
@@ -21,3 +21,8 @@
   * Dan Callaghan (`@danc86`_)
   * Max Bittker (`@MaxBittker`_)
   * Volo Zyko (`@volo-zyko`_)
+  * Robert Estelle (`@erydo`_)
+  * Dylan Grafmyre
+  * Povilas Kanapickas (`@p12tic`_)
+  * Snowhite (`@CirQ`_)
+  * earonesty (`@earonesty`_)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/python-unidiff-0.5.5/HISTORY 
new/python-unidiff-0.6.0/HISTORY
--- old/python-unidiff-0.5.5/HISTORY    2018-01-03 22:14:18.000000000 +0100
+++ new/python-unidiff-0.6.0/HISTORY    2020-05-08 00:16:37.000000000 +0200
@@ -1,6 +1,15 @@
 History
 -------
 
+0.6.0 - 2020-05-07
+----------------
+
+* Updated PatchSet constructor to accept an optional (default to False)
+metadata_only parameter to only keep diff metadata information without
+the diff text data (better performance).
+* Identify and track changed binary files.
+* Added support for git rename syntax.
+
 0.5.5 - 2018-01-03
 ------------------
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/python-unidiff-0.5.5/README.rst 
new/python-unidiff-0.6.0/README.rst
--- old/python-unidiff-0.5.5/README.rst 2018-01-03 22:14:18.000000000 +0100
+++ new/python-unidiff-0.6.0/README.rst 2020-05-08 00:16:37.000000000 +0200
@@ -19,10 +19,10 @@
 
 ::
 
-    >>> import urllib2
+    >>> import urllib.request
     >>> from unidiff import PatchSet
-    >>> diff = 
urllib2.urlopen('https://github.com/matiasb/python-unidiff/pull/3.diff')
-    >>> encoding = diff.headers.getparam('charset')
+    >>> diff = 
urllib.request.urlopen('https://github.com/matiasb/python-unidiff/pull/3.diff')
+    >>> encoding = diff.headers.get_charsets()[0]
     >>> patch = PatchSet(diff, encoding=encoding)
     >>> patch
     <PatchSet: [<PatchedFile: .gitignore>, <PatchedFile: unidiff/patch.py>, 
<PatchedFile: unidiff/utils.py>]>
@@ -42,19 +42,22 @@
     <Hunk: @@ 109,14 110,21 @@ def __repr__(self):>
     >>> patch[2]
     <PatchedFile: unidiff/utils.py>
-    >>> print patch[2]
+    >>> print(patch[2])
+    diff --git a/unidiff/utils.py b/unidiff/utils.py
+    index eae63e6..29c896a 100644
     --- a/unidiff/utils.py
     +++ b/unidiff/utils.py
     @@ -37,4 +37,3 @@
     # - deleted line
     # \ No newline case (ignore)
     RE_HUNK_BODY_LINE = re.compile(r'^([- \+\\])')
+    -
 
 
-Load unified diff data by instantiating PatchSet with a file-like object as
-argument, or using PatchSet.from_filename class method to read diff from file.
+Load unified diff data by instantiating :code:`PatchSet` with a file-like 
object as
+argument, or using :code:`PatchSet.from_filename` class method to read diff 
from file.
 
-A PatchSet is a list of files updated by the given patch. For each PatchedFile
+A :code:`PatchSet` is a list of files updated by the given patch. For each 
:code:`PatchedFile`
 you can get stats (if it is a new, removed or modified file; the source/target
 lines; etc), besides having access to each hunk (also like a list) and its
 respective info.
@@ -81,7 +84,7 @@
 Load a local diff file
 ----------------------
 
-To instantiate PatchSet from a local file, you can use:
+To instantiate :code:`PatchSet` from a local file, you can use:
 
 ::
 
@@ -90,7 +93,7 @@
     >>> patch
     <PatchSet: [<PatchedFile: added_file>, <PatchedFile: modified_file>, 
<PatchedFile: removed_file>]>
 
-Notice the (optional) encoding parameter. If not specified, unicode input will 
be expected. Or alternatively:
+Notice the (optional) :code:`encoding` parameter. If not specified, unicode 
input will be expected. Or alternatively:
 
 ::
 
@@ -102,7 +105,7 @@
     >>> patch
     <PatchSet: [<PatchedFile: added_file>, <PatchedFile: modified_file>, 
<PatchedFile: removed_file>]>
 
-Finally, you can also instantiate PatchSet passing any iterable (and encoding, 
if needed):
+Finally, you can also instantiate :code:`PatchSet` passing any iterable (and 
encoding, if needed):
 
 ::
 
@@ -110,10 +113,19 @@
     >>> with open('tests/samples/bzr.diff', 'r') as diff:
     ...     data = diff.readlines()
     ...
-    >>> patch = PatchSet(data, encoding='utf-8')
+    >>> patch = PatchSet(data)
     >>> patch
     <PatchSet: [<PatchedFile: added_file>, <PatchedFile: modified_file>, 
<PatchedFile: removed_file>]>
 
+If you don't need to be able to rebuild the original unified diff input, you 
can pass
+:code:`metadata_only=True` (defaults to :code:`False`), which should help 
making the
+parsing more efficient:
+
+::
+
+    >>> from unidiff import PatchSet
+    >>> patch = PatchSet.from_filename('tests/samples/bzr.diff', 
encoding='utf-8', metadata_only=True)
+
 
 References
 ----------
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/python-unidiff-0.5.5/bin/unidiff 
new/python-unidiff-0.6.0/bin/unidiff
--- old/python-unidiff-0.5.5/bin/unidiff        2018-01-03 22:14:18.000000000 
+0100
+++ new/python-unidiff-0.6.0/bin/unidiff        2020-05-08 00:16:37.000000000 
+0200
@@ -45,7 +45,7 @@
     if PY2:
         diff_file = codecs.getreader(encoding)(diff_file)
 
-    patch = PatchSet(diff_file)
+    patch = PatchSet(diff_file, metadata_only=(not args.show_diff))
 
     if args.show_diff:
         print(patch)
@@ -55,14 +55,21 @@
     print('-------')
     additions = 0
     deletions = 0
+    renamed_files = 0
     for f in patch:
-        additions += f.added
-        deletions += f.removed
-        print('%s:' % f.path, '+%d additions,' % f.added,
-              '-%d deletions' % f.removed)
+        if f.is_binary_file:
+            print('%s:' % f.path, '(binary file)')
+        else:
+            additions += f.added
+            deletions += f.removed
+            print('%s:' % f.path, '+%d additions,' % f.added,
+                  '-%d deletions' % f.removed)
+        renamed_files = renamed_files + 1 if f.is_rename else renamed_files
 
     print()
     print('%d modified file(s), %d added file(s), %d removed file(s)' % (
         len(patch.modified_files), len(patch.added_files),
         len(patch.removed_files)))
+    if renamed_files:
+        print('%d file(s) renamed' % renamed_files)
     print('Total: %d addition(s), %d deletion(s)' % (additions, deletions))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/python-unidiff-0.5.5/setup.py 
new/python-unidiff-0.6.0/setup.py
--- old/python-unidiff-0.5.5/setup.py   2018-01-03 22:14:18.000000000 +0100
+++ new/python-unidiff-0.6.0/setup.py   2020-05-08 00:16:37.000000000 +0200
@@ -48,9 +48,10 @@
         "Programming Language :: Python :: 2",
         'Programming Language :: Python :: 2.7',
         'Programming Language :: Python :: 3',
-        'Programming Language :: Python :: 3.3',
-        'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
         'Programming Language :: Python :: 3.6',
+        'Programming Language :: Python :: 3.7',
+        'Programming Language :: Python :: 3.8',
     ],
+    test_suite='tests',
 )
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/python-unidiff-0.5.5/tests/samples/git_rename.diff 
new/python-unidiff-0.6.0/tests/samples/git_rename.diff
--- old/python-unidiff-0.5.5/tests/samples/git_rename.diff      1970-01-01 
01:00:00.000000000 +0100
+++ new/python-unidiff-0.6.0/tests/samples/git_rename.diff      2020-05-08 
00:16:37.000000000 +0200
@@ -0,0 +1,13 @@
+diff --git a/added b/moved
+similarity index 85%
+rename from added
+rename to moved
+index a071991..4dbab21 100644
+--- a/added
++++ b/moved
+@@ -9,4 +9,4 @@ Some content
+ Some content
+ Some content
+ Some content
+-Some content
++Some modified content
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/python-unidiff-0.5.5/tests/samples/sample8.diff 
new/python-unidiff-0.6.0/tests/samples/sample8.diff
--- old/python-unidiff-0.5.5/tests/samples/sample8.diff 1970-01-01 
01:00:00.000000000 +0100
+++ new/python-unidiff-0.6.0/tests/samples/sample8.diff 2020-05-08 
00:16:37.000000000 +0200
@@ -0,0 +1,11 @@
+diff --git a/foo.bin b/foo.bin
+new file mode 100644
+index 0000000..af000000
+Binary files /dev/null and b/foo.bin differ
+diff --git a/bar.bin b/bar.bin
+index ad000000..ac000000 100644
+Binary files a/bar.bin and b/bar.bin differ
+diff --git a/baz.bin b/baz.bin
+deleted file mode 100644
+index af000000..0000000
+Binary files a/baz.bin and /dev/null differ
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/python-unidiff-0.5.5/tests/test_parser.py 
new/python-unidiff-0.6.0/tests/test_parser.py
--- old/python-unidiff-0.5.5/tests/test_parser.py       2018-01-03 
22:14:18.000000000 +0100
+++ new/python-unidiff-0.6.0/tests/test_parser.py       2020-05-08 
00:16:37.000000000 +0200
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
 # The MIT License (MIT)
-# Copyright (c) 2014-2017 Matias Bordese
+# Copyright (c) 2014-2020 Matias Bordese
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -114,10 +114,10 @@
         self.assertEqual(lines[12], '@@ -5,16 +11,10 @@')
         self.assertEqual(lines[31], '@@ -22,3 +22,7 @@')
 
-    def test_parse_sample(self):
+    def _test_parse_sample(self, metadata_only):
         """Parse sample file."""
         with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file:
-            res = PatchSet(diff_file)
+            res = PatchSet(diff_file, metadata_only=metadata_only)
 
         # three file in the patch
         self.assertEqual(len(res), 3)
@@ -128,6 +128,7 @@
         self.assertTrue(res[0].is_modified_file)
         self.assertFalse(res[0].is_removed_file)
         self.assertFalse(res[0].is_added_file)
+        self.assertFalse(res[0].is_binary_file)
 
         # Hunk 1: five additions, no deletions, a section header
         self.assertEqual(res[0][0].added, 6)
@@ -152,15 +153,23 @@
         self.assertFalse(res[1].is_modified_file)
         self.assertFalse(res[1].is_removed_file)
         self.assertTrue(res[1].is_added_file)
+        self.assertFalse(res[1].is_binary_file)
 
         # third file is removed
         self.assertFalse(res[2].is_modified_file)
         self.assertTrue(res[2].is_removed_file)
         self.assertFalse(res[2].is_added_file)
+        self.assertFalse(res[2].is_binary_file)
 
         self.assertEqual(res.added, 21)
         self.assertEqual(res.removed, 17)
 
+    def test_parse_sample_full(self):
+        self._test_parse_sample(metadata_only=False)
+
+    def test_parse_sample_metadata_only(self):
+        self._test_parse_sample(metadata_only=True)
+
     def test_patchset_compare(self):
         with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file:
             ps1 = PatchSet(diff_file)
@@ -222,6 +231,42 @@
         with open(utf8_file, 'r') as diff_file:
             self.assertRaises(UnidiffParseError, PatchSet, diff_file)
 
+    def test_parse_diff_with_new_and_modified_binary_files(self):
+        """Parse git diff file with newly added and modified binaries files."""
+        utf8_file = os.path.join(self.samples_dir, 'samples/sample8.diff')
+        with open(utf8_file, 'r') as diff_file:
+            res = PatchSet(diff_file)
+
+        # three file in the patch
+        self.assertEqual(len(res), 3)
+
+        # first file is added
+        self.assertFalse(res[0].is_modified_file)
+        self.assertFalse(res[0].is_removed_file)
+        self.assertTrue(res[0].is_added_file)
+        self.assertTrue(res[0].is_binary_file)
+
+        # second file is added
+        self.assertTrue(res[1].is_modified_file)
+        self.assertFalse(res[1].is_removed_file)
+        self.assertFalse(res[1].is_added_file)
+        self.assertTrue(res[1].is_binary_file)
+
+        # third file is removed
+        self.assertFalse(res[2].is_modified_file)
+        self.assertTrue(res[2].is_removed_file)
+        self.assertFalse(res[2].is_added_file)
+        self.assertTrue(res[2].is_binary_file)
+
+    def test_parse_round_trip_with_binary_files_in_diff(self):
+        """Parse git diff with binary files though round trip"""
+        utf8_file = os.path.join(self.samples_dir, 'samples/sample8.diff')
+        with open(utf8_file, 'r') as diff_file:
+            res1 = PatchSet(diff_file)
+
+        res2 = PatchSet(str(res1))
+        self.assertEqual(res1, res2)
+
     def test_diff_lines_linenos(self):
         with open(self.sample_file, 'rb') as diff_file:
             res = PatchSet(diff_file, encoding='utf-8')
@@ -277,6 +322,38 @@
         self.assertEqual(source_line_nos, expected_source_line_nos)
         self.assertEqual(diff_line_nos, expected_diff_line_nos)
 
+    def test_diff_hunk_positions(self):
+        with open(self.sample_file, 'rb') as diff_file:
+            res = PatchSet(diff_file, encoding='utf-8')
+        self.do_test_diff_hunk_positions(res)
+
+    def test_diff_metadata_only(self):
+        with open(self.sample_file, 'rb') as diff_file:
+            res = PatchSet(diff_file, encoding='utf-8', metadata_only=True)
+        self.do_test_diff_hunk_positions(res)
+
+    def do_test_diff_hunk_positions(self, res):
+        hunk_positions = []
+        for diff_file in res:
+            for hunk in diff_file:
+                hunk_positions.append((hunk.source_start, hunk.target_start,
+                                       hunk.source_length, hunk.target_length))
+
+        expected_hunk_positions = [
+            # File: 1, Hunk: 1
+            (1, 1, 3, 9),
+            # File: 1, Hunk: 2
+            (5, 11, 16, 10),
+            # File: 1, Hunk: 3
+            (22, 22, 3, 7),
+            # File: 2, Hunk: 1
+            (0, 1, 0, 9),
+            # File: 3, Hunk: 1
+            (1, 0, 9, 0)
+        ]
+
+        self.assertEqual(hunk_positions, expected_hunk_positions)
+
 
 class TestVCSSamples(unittest.TestCase):
     """Tests for real examples from VCS."""
@@ -327,3 +404,24 @@
             # by unidiff are the same
             with codecs.open(file_path, 'r', encoding='utf-8') as diff_file:
                 self.assertEqual(diff_file.read(), str(res))
+
+    def test_git_renaming(self):
+        tests_dir = os.path.dirname(os.path.realpath(__file__))
+        file_path = os.path.join(tests_dir, 'samples/git_rename.diff')
+        with codecs.open(file_path, 'r', encoding='utf-8') as diff_file:
+            res = PatchSet(diff_file)
+
+        self.assertEqual(len(res), 1)
+
+        patch = res[0]
+        self.assertTrue(patch.is_rename)
+        self.assertEqual(patch.added, 1)
+        self.assertEqual(patch.removed, 1)
+        self.assertEqual(len(res.modified_files), 1)
+        self.assertEqual(len(res.added_files), 0)
+        self.assertEqual(len(res.removed_files), 0)
+
+        # check that original diffs and those produced
+        # by unidiff are the same
+        with codecs.open(file_path, 'r', encoding='utf-8') as diff_file:
+            self.assertEqual(diff_file.read(), str(res))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/python-unidiff-0.5.5/unidiff/__version__.py 
new/python-unidiff-0.6.0/unidiff/__version__.py
--- old/python-unidiff-0.5.5/unidiff/__version__.py     2018-01-03 
22:14:18.000000000 +0100
+++ new/python-unidiff-0.6.0/unidiff/__version__.py     2020-05-08 
00:16:37.000000000 +0200
@@ -21,4 +21,4 @@
 # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE
 # OR OTHER DEALINGS IN THE SOFTWARE.
 
-__version__ = '0.5.5'
+__version__ = '0.6.0'
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/python-unidiff-0.5.5/unidiff/constants.py 
new/python-unidiff-0.6.0/unidiff/constants.py
--- old/python-unidiff-0.5.5/unidiff/constants.py       2018-01-03 
22:14:18.000000000 +0100
+++ new/python-unidiff-0.6.0/unidiff/constants.py       2020-05-08 
00:16:37.000000000 +0200
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
 # The MIT License (MIT)
-# Copyright (c) 2014-2017 Matias Bordese
+# Copyright (c) 2014-2020 Matias Bordese
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -34,6 +34,12 @@
 RE_TARGET_FILENAME = re.compile(
     r'^\+\+\+ (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?')
 
+
+# git renamed files support
+RE_RENAME_SOURCE_FILENAME = re.compile(r'^rename from (?P<filename>[^\t\n]+)')
+RE_RENAME_TARGET_FILENAME = re.compile(r'^rename to (?P<filename>[^\t\n]+)')
+
+
 # @@ (source offset, length) (target offset, length) @@ (section header)
 RE_HUNK_HEADER = re.compile(
     r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))?\ @@[ ]?(.*)")
@@ -50,6 +56,11 @@
 
 RE_NO_NEWLINE_MARKER = re.compile(r'^\\ No newline at end of file')
 
+RE_BINARY_DIFF = re.compile(
+    r'^Binary files? '
+    '(?P<source_filename>[^\t]+?)(?:\t(?P<source_timestamp>[\s0-9:\+-]+))?'
+    '(?: and 
(?P<target_filename>[^\t]+?)(?:\t(?P<target_timestamp>[\s0-9:\+-]+))?)? 
(differ|has changed)')
+
 DEFAULT_ENCODING = 'UTF-8'
 
 LINE_TYPE_ADDED = '+'
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/python-unidiff-0.5.5/unidiff/patch.py 
new/python-unidiff-0.6.0/unidiff/patch.py
--- old/python-unidiff-0.5.5/unidiff/patch.py   2018-01-03 22:14:18.000000000 
+0100
+++ new/python-unidiff-0.6.0/unidiff/patch.py   2020-05-08 00:16:37.000000000 
+0200
@@ -1,7 +1,7 @@
 # -*- coding: utf-8 -*-
 
 # The MIT License (MIT)
-# Copyright (c) 2014-2017 Matias Bordese
+# Copyright (c) 2014-2020 Matias Bordese
 #
 # Permission is hereby granted, free of charge, to any person obtaining a copy
 # of this software and associated documentation files (the "Software"), to deal
@@ -40,9 +40,12 @@
     RE_HUNK_BODY_LINE,
     RE_HUNK_EMPTY_BODY_LINE,
     RE_HUNK_HEADER,
+    RE_RENAME_SOURCE_FILENAME,
+    RE_RENAME_TARGET_FILENAME,
     RE_SOURCE_FILENAME,
     RE_TARGET_FILENAME,
     RE_NO_NEWLINE_MARKER,
+    RE_BINARY_DIFF,
 )
 from unidiff.errors import UnidiffParseError
 
@@ -128,19 +131,18 @@
 
     def __init__(self, src_start=0, src_len=0, tgt_start=0, tgt_len=0,
                  section_header=''):
+        super(Hunk, self).__init__()
         if src_len is None:
             src_len = 1
         if tgt_len is None:
             tgt_len = 1
-        self.added = 0  # number of added lines
-        self.removed = 0  # number of removed lines
-        self.source = []
         self.source_start = int(src_start)
         self.source_length = int(src_len)
-        self.target = []
         self.target_start = int(tgt_start)
         self.target_length = int(tgt_len)
         self.section_header = section_header
+        self._added = None
+        self._removed = None
 
     def __repr__(self):
         value = "<Hunk: @@ %d,%d %d,%d @@ %s>" % (self.source_start,
@@ -161,17 +163,26 @@
 
     def append(self, line):
         """Append the line to hunk, and keep track of source/target lines."""
+        # Make sure the line is encoded correctly. This is a no-op except for
+        # potentially raising a UnicodeDecodeError.
+        str(line)
         super(Hunk, self).append(line)
-        s = str(line)
-        if line.is_added:
-            self.added += 1
-            self.target.append(s)
-        elif line.is_removed:
-            self.removed += 1
-            self.source.append(s)
-        elif line.is_context:
-            self.target.append(s)
-            self.source.append(s)
+
+    @property
+    def added(self):
+        if self._added is not None:
+            return self._added
+        # re-calculate each time to allow for hunk modifications
+        # (which should mean metadata_only switch wasn't used)
+        return sum(1 for line in self if line.is_added)
+
+    @property
+    def removed(self):
+        if self._removed is not None:
+            return self._removed
+        # re-calculate each time to allow for hunk modifications
+        # (which should mean metadata_only switch wasn't used)
+        return sum(1 for line in self if line.is_removed)
 
     def is_valid(self):
         """Check hunk header data matches entered lines info."""
@@ -182,39 +193,53 @@
         """Hunk lines from source file (generator)."""
         return (l for l in self if l.is_context or l.is_removed)
 
+    @property
+    def source(self):
+        return [str(l) for l in self.source_lines()]
+
     def target_lines(self):
         """Hunk lines from target file (generator)."""
         return (l for l in self if l.is_context or l.is_added)
 
+    @property
+    def target(self):
+        return [str(l) for l in self.target_lines()]
+
 
 class PatchedFile(list):
     """Patch updated file, it is a list of Hunks."""
 
     def __init__(self, patch_info=None, source='', target='',
-                 source_timestamp=None, target_timestamp=None):
+                 source_timestamp=None, target_timestamp=None,
+                 is_binary_file=False, is_rename=False):
         super(PatchedFile, self).__init__()
         self.patch_info = patch_info
         self.source_file = source
         self.source_timestamp = source_timestamp
         self.target_file = target
         self.target_timestamp = target_timestamp
+        self.is_binary_file = is_binary_file
+        self.is_rename = is_rename
 
     def __repr__(self):
         return make_str("<PatchedFile: %s>") % make_str(self.path)
 
     def __str__(self):
+        source = ''
+        target = ''
         # patch info is optional
         info = '' if self.patch_info is None else str(self.patch_info)
-        source = "--- %s%s\n" % (
-            self.source_file,
-            '\t' + self.source_timestamp if self.source_timestamp else '')
-        target = "+++ %s%s\n" % (
-            self.target_file,
-            '\t' + self.target_timestamp if self.target_timestamp else '')
+        if not self.is_binary_file and self:
+            source = "--- %s%s\n" % (
+                self.source_file,
+                '\t' + self.source_timestamp if self.source_timestamp else '')
+            target = "+++ %s%s\n" % (
+                self.target_file,
+                '\t' + self.target_timestamp if self.target_timestamp else '')
         hunks = ''.join(unicode(hunk) for hunk in self)
         return info + source + target + hunks
 
-    def _parse_hunk(self, header, diff, encoding):
+    def _parse_hunk(self, header, diff, encoding, metadata_only):
         """Parse hunk details."""
         header_info = RE_HUNK_HEADER.match(header)
         hunk_info = header_info.groups()
@@ -224,38 +249,68 @@
         target_line_no = hunk.target_start
         expected_source_end = source_line_no + hunk.source_length
         expected_target_end = target_line_no + hunk.target_length
+        added = 0
+        removed = 0
 
         for diff_line_no, line in diff:
             if encoding is not None:
                 line = line.decode(encoding)
 
-            valid_line = RE_HUNK_EMPTY_BODY_LINE.match(line)
-            if not valid_line:
-                valid_line = RE_HUNK_BODY_LINE.match(line)
+            if metadata_only:
+                # quick line type detection, no regex required
+                line_type = line[0] if line else LINE_TYPE_CONTEXT
+                if line_type not in (LINE_TYPE_ADDED,
+                                     LINE_TYPE_REMOVED,
+                                     LINE_TYPE_CONTEXT,
+                                     LINE_TYPE_NO_NEWLINE):
+                    raise UnidiffParseError(
+                        'Hunk diff line expected: %s' % line)
+
+                if line_type == LINE_TYPE_ADDED:
+                    target_line_no += 1
+                    added += 1
+                elif line_type == LINE_TYPE_REMOVED:
+                    source_line_no += 1
+                    removed += 1
+                elif line_type == LINE_TYPE_CONTEXT:
+                    target_line_no += 1
+                    source_line_no += 1
 
-            if not valid_line:
-                raise UnidiffParseError('Hunk diff line expected: %s' % line)
+                # no file content tracking
+                original_line = None
 
-            line_type = valid_line.group('line_type')
-            if line_type == LINE_TYPE_EMPTY:
-                line_type = LINE_TYPE_CONTEXT
-            value = valid_line.group('value')
-            original_line = Line(value, line_type=line_type)
-            if line_type == LINE_TYPE_ADDED:
-                original_line.target_line_no = target_line_no
-                target_line_no += 1
-            elif line_type == LINE_TYPE_REMOVED:
-                original_line.source_line_no = source_line_no
-                source_line_no += 1
-            elif line_type == LINE_TYPE_CONTEXT:
-                original_line.target_line_no = target_line_no
-                target_line_no += 1
-                original_line.source_line_no = source_line_no
-                source_line_no += 1
-            elif line_type == LINE_TYPE_NO_NEWLINE:
-                pass
             else:
-                original_line = None
+                # parse diff line content
+                valid_line = RE_HUNK_BODY_LINE.match(line)
+                if not valid_line:
+                    valid_line = RE_HUNK_EMPTY_BODY_LINE.match(line)
+
+                if not valid_line:
+                    raise UnidiffParseError(
+                        'Hunk diff line expected: %s' % line)
+
+                line_type = valid_line.group('line_type')
+                if line_type == LINE_TYPE_EMPTY:
+                    line_type = LINE_TYPE_CONTEXT
+
+                value = valid_line.group('value')
+                original_line = Line(value, line_type=line_type)
+
+                if line_type == LINE_TYPE_ADDED:
+                    original_line.target_line_no = target_line_no
+                    target_line_no += 1
+                elif line_type == LINE_TYPE_REMOVED:
+                    original_line.source_line_no = source_line_no
+                    source_line_no += 1
+                elif line_type == LINE_TYPE_CONTEXT:
+                    original_line.target_line_no = target_line_no
+                    original_line.source_line_no = source_line_no
+                    target_line_no += 1
+                    source_line_no += 1
+                elif line_type == LINE_TYPE_NO_NEWLINE:
+                    pass
+                else:
+                    original_line = None
 
             # stop parsing if we got past expected number of lines
             if (source_line_no > expected_source_end or
@@ -276,6 +331,11 @@
                 target_line_no < expected_target_end):
             raise UnidiffParseError('Hunk is shorter than expected')
 
+        if metadata_only:
+            # HACK: set fixed calculated values when metadata_only is enabled
+            hunk._added = added
+            hunk._removed = removed
+
         self.append(hunk)
 
     def _add_no_newline_marker_to_last_hunk(self):
@@ -301,7 +361,8 @@
         elif (self.source_file.startswith('a/') and
               self.target_file == '/dev/null'):
             filepath = self.source_file[2:]
-        elif (self.target_file.startswith('b/') and
+        elif (self.target_file is not None and
+              self.target_file.startswith('b/') and
               self.source_file == '/dev/null'):
             filepath = self.target_file[2:]
         else:
@@ -321,12 +382,16 @@
     @property
     def is_added_file(self):
         """Return True if this patch adds the file."""
+        if self.source_file == '/dev/null':
+            return True
         return (len(self) == 1 and self[0].source_start == 0 and
                 self[0].source_length == 0)
 
     @property
     def is_removed_file(self):
         """Return True if this patch removes the file."""
+        if self.target_file == '/dev/null':
+            return True
         return (len(self) == 1 and self[0].target_start == 0 and
                 self[0].target_length == 0)
 
@@ -340,7 +405,7 @@
 class PatchSet(list):
     """A list of PatchedFiles."""
 
-    def __init__(self, f, encoding=None):
+    def __init__(self, f, encoding=None, metadata_only=False):
         super(PatchSet, self).__init__()
 
         # convert string inputs to StringIO objects
@@ -350,7 +415,10 @@
         # make sure we pass an iterator object to parse
         data = iter(f)
         # if encoding is None, assume we are reading unicode data
-        self._parse(data, encoding=encoding)
+        # when metadata_only is True, only perform a minimal metadata parsing
+        # (ie. hunks without content) which is around 2.5-6 times faster;
+        # it will still validate the diff metadata consistency and get counts
+        self._parse(data, encoding=encoding, metadata_only=metadata_only)
 
     def __repr__(self):
         return make_str('<PatchSet: %s>') % super(PatchSet, self).__repr__()
@@ -358,7 +426,7 @@
     def __str__(self):
         return ''.join(unicode(patched_file) for patched_file in self)
 
-    def _parse(self, diff, encoding):
+    def _parse(self, diff, encoding, metadata_only):
         current_file = None
         patch_info = None
 
@@ -367,28 +435,61 @@
             if encoding is not None:
                 line = line.decode(encoding)
 
+            # check for a git rename, source file
+            is_rename_source_filename = RE_RENAME_SOURCE_FILENAME.match(line)
+            if is_rename_source_filename:
+                # prefix with 'a/' to match expected git source format
+                source_file = (
+                    'a/' + is_rename_source_filename.group('filename'))
+                # keep line as patch_info
+                patch_info.append(line)
+                # reset current file
+                current_file = None
+                continue
+
+            # check for a git rename, target file
+            is_rename_target_filename = RE_RENAME_TARGET_FILENAME.match(line)
+            if is_rename_target_filename:
+                if current_file is not None:
+                    raise UnidiffParseError('Target without source: %s' % line)
+                # prefix with 'b/' to match expected git source format
+                target_file = (
+                    'b/' + is_rename_target_filename.group('filename'))
+                # keep line as patch_info
+                patch_info.append(line)
+                # add current file to PatchSet
+                current_file = PatchedFile(
+                    patch_info, source_file, target_file, None, None,
+                    is_rename=True)
+                self.append(current_file)
+                continue
+
             # check for source file header
             is_source_filename = RE_SOURCE_FILENAME.match(line)
             if is_source_filename:
                 source_file = is_source_filename.group('filename')
                 source_timestamp = is_source_filename.group('timestamp')
-                # reset current file
-                current_file = None
+                # reset current file, unless we are processing a rename
+                # (in that case, source files should match)
+                if current_file is not None and not (current_file.is_rename and
+                        current_file.source_file == source_file):
+                    current_file = None
                 continue
 
             # check for target file header
             is_target_filename = RE_TARGET_FILENAME.match(line)
             if is_target_filename:
-                if current_file is not None:
+                if current_file is not None and not current_file.is_rename:
                     raise UnidiffParseError('Target without source: %s' % line)
                 target_file = is_target_filename.group('filename')
                 target_timestamp = is_target_filename.group('timestamp')
-                # add current file to PatchSet
-                current_file = PatchedFile(
-                    patch_info, source_file, target_file,
-                    source_timestamp, target_timestamp)
-                self.append(current_file)
-                patch_info = None
+                if current_file is None:
+                    # add current file to PatchSet
+                    current_file = PatchedFile(
+                        patch_info, source_file, target_file,
+                        source_timestamp, target_timestamp)
+                    self.append(current_file)
+                    patch_info = None
                 continue
 
             # check for hunk header
@@ -396,7 +497,7 @@
             if is_hunk_header:
                 if current_file is None:
                     raise UnidiffParseError('Unexpected hunk found: %s' % line)
-                current_file._parse_hunk(line, diff, encoding)
+                current_file._parse_hunk(line, diff, encoding, metadata_only)
                 continue
 
             # check for no newline marker
@@ -412,6 +513,18 @@
                 current_file._append_trailing_empty_line()
                 continue
 
+            is_binary_diff = RE_BINARY_DIFF.match(line)
+            if is_binary_diff:
+                source_file = is_binary_diff.group('source_filename')
+                target_file = is_binary_diff.group('target_filename')
+                patch_info.append(line)
+                current_file = PatchedFile(
+                    patch_info, source_file, target_file, is_binary_file=True)
+                self.append(current_file)
+                patch_info = None
+                current_file = None
+                continue
+
             # if nothing has matched above then this line is a patch info
             if patch_info is None:
                 current_file = None

commit python-unidiff for openSUSE:Factory

Reply via email to