Hello community, here is the log from the commit of package python-unidiff for openSUSE:Factory checked in at 2020-06-10 00:52:00 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-unidiff (Old) and /work/SRC/openSUSE:Factory/.python-unidiff.new.3606 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-unidiff" Wed Jun 10 00:52:00 2020 rev:6 rq:809785 version:0.6.0 Changes: -------- --- /work/SRC/openSUSE:Factory/python-unidiff/python-unidiff.changes 2020-05-19 14:48:35.760084953 +0200 +++ /work/SRC/openSUSE:Factory/.python-unidiff.new.3606/python-unidiff.changes 2020-06-10 00:52:07.991527895 +0200 @@ -1,0 +2,10 @@ +Wed May 27 18:05:48 UTC 2020 - Martin Liška <[email protected]> + +- Update to version 0.6.0 + * Updated PatchSet constructor to accept an optional (default to False) + metadata_only parameter to only keep diff metadata information without + the diff text data (better performance). + * Identify and track changed binary files. + * Added support for git rename syntax. + +------------------------------------------------------------------- Old: ---- v0.5.5.tar.gz New: ---- v0.6.0.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-unidiff.spec ++++++ --- /var/tmp/diff_new_pack.wEbg6a/_old 2020-06-10 00:52:09.183530979 +0200 +++ /var/tmp/diff_new_pack.wEbg6a/_new 2020-06-10 00:52:09.183530979 +0200 @@ -18,7 +18,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-unidiff -Version: 0.5.5 +Version: 0.6.0 Release: 0 Summary: Unified diff parsing/metadata extraction library License: MIT ++++++ v0.5.5.tar.gz -> v0.6.0.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/python-unidiff-0.5.5/.travis.yml new/python-unidiff-0.6.0/.travis.yml --- old/python-unidiff-0.5.5/.travis.yml 2018-01-03 22:14:18.000000000 +0100 +++ new/python-unidiff-0.6.0/.travis.yml 2020-05-08 00:16:37.000000000 +0200 @@ -1,9 +1,9 @@ language: python python: - "2.7" - - "3.2" - - "3.3" - "3.4" - "3.5" - "3.6" + - "3.7" + - "3.8" script: ./run_tests.sh diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/python-unidiff-0.5.5/AUTHORS new/python-unidiff-0.6.0/AUTHORS --- old/python-unidiff-0.5.5/AUTHORS 2018-01-03 22:14:18.000000000 +0100 +++ new/python-unidiff-0.6.0/AUTHORS 2020-05-08 00:16:37.000000000 +0200 @@ -21,3 +21,8 @@ * Dan Callaghan (`@danc86`_) * Max Bittker (`@MaxBittker`_) * Volo Zyko (`@volo-zyko`_) + * Robert Estelle (`@erydo`_) + * Dylan Grafmyre + * Povilas Kanapickas (`@p12tic`_) + * Snowhite (`@CirQ`_) + * earonesty (`@earonesty`_) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/python-unidiff-0.5.5/HISTORY new/python-unidiff-0.6.0/HISTORY --- old/python-unidiff-0.5.5/HISTORY 2018-01-03 22:14:18.000000000 +0100 +++ new/python-unidiff-0.6.0/HISTORY 2020-05-08 00:16:37.000000000 +0200 @@ -1,6 +1,15 @@ History ------- +0.6.0 - 2020-05-07 +---------------- + +* Updated PatchSet constructor to accept an optional (default to False) +metadata_only parameter to only keep diff metadata information without +the diff text data (better performance). +* Identify and track changed binary files. +* Added support for git rename syntax. + 0.5.5 - 2018-01-03 ------------------ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/python-unidiff-0.5.5/README.rst new/python-unidiff-0.6.0/README.rst --- old/python-unidiff-0.5.5/README.rst 2018-01-03 22:14:18.000000000 +0100 +++ new/python-unidiff-0.6.0/README.rst 2020-05-08 00:16:37.000000000 +0200 @@ -19,10 +19,10 @@ :: - >>> import urllib2 + >>> import urllib.request >>> from unidiff import PatchSet - >>> diff = urllib2.urlopen('https://github.com/matiasb/python-unidiff/pull/3.diff') - >>> encoding = diff.headers.getparam('charset') + >>> diff = urllib.request.urlopen('https://github.com/matiasb/python-unidiff/pull/3.diff') + >>> encoding = diff.headers.get_charsets()[0] >>> patch = PatchSet(diff, encoding=encoding) >>> patch <PatchSet: [<PatchedFile: .gitignore>, <PatchedFile: unidiff/patch.py>, <PatchedFile: unidiff/utils.py>]> @@ -42,19 +42,22 @@ <Hunk: @@ 109,14 110,21 @@ def __repr__(self):> >>> patch[2] <PatchedFile: unidiff/utils.py> - >>> print patch[2] + >>> print(patch[2]) + diff --git a/unidiff/utils.py b/unidiff/utils.py + index eae63e6..29c896a 100644 --- a/unidiff/utils.py +++ b/unidiff/utils.py @@ -37,4 +37,3 @@ # - deleted line # \ No newline case (ignore) RE_HUNK_BODY_LINE = re.compile(r'^([- \+\\])') + - -Load unified diff data by instantiating PatchSet with a file-like object as -argument, or using PatchSet.from_filename class method to read diff from file. +Load unified diff data by instantiating :code:`PatchSet` with a file-like object as +argument, or using :code:`PatchSet.from_filename` class method to read diff from file. -A PatchSet is a list of files updated by the given patch. For each PatchedFile +A :code:`PatchSet` is a list of files updated by the given patch. For each :code:`PatchedFile` you can get stats (if it is a new, removed or modified file; the source/target lines; etc), besides having access to each hunk (also like a list) and its respective info. @@ -81,7 +84,7 @@ Load a local diff file ---------------------- -To instantiate PatchSet from a local file, you can use: +To instantiate :code:`PatchSet` from a local file, you can use: :: @@ -90,7 +93,7 @@ >>> patch <PatchSet: [<PatchedFile: added_file>, <PatchedFile: modified_file>, <PatchedFile: removed_file>]> -Notice the (optional) encoding parameter. If not specified, unicode input will be expected. Or alternatively: +Notice the (optional) :code:`encoding` parameter. If not specified, unicode input will be expected. Or alternatively: :: @@ -102,7 +105,7 @@ >>> patch <PatchSet: [<PatchedFile: added_file>, <PatchedFile: modified_file>, <PatchedFile: removed_file>]> -Finally, you can also instantiate PatchSet passing any iterable (and encoding, if needed): +Finally, you can also instantiate :code:`PatchSet` passing any iterable (and encoding, if needed): :: @@ -110,10 +113,19 @@ >>> with open('tests/samples/bzr.diff', 'r') as diff: ... data = diff.readlines() ... - >>> patch = PatchSet(data, encoding='utf-8') + >>> patch = PatchSet(data) >>> patch <PatchSet: [<PatchedFile: added_file>, <PatchedFile: modified_file>, <PatchedFile: removed_file>]> +If you don't need to be able to rebuild the original unified diff input, you can pass +:code:`metadata_only=True` (defaults to :code:`False`), which should help making the +parsing more efficient: + +:: + + >>> from unidiff import PatchSet + >>> patch = PatchSet.from_filename('tests/samples/bzr.diff', encoding='utf-8', metadata_only=True) + References ---------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/python-unidiff-0.5.5/bin/unidiff new/python-unidiff-0.6.0/bin/unidiff --- old/python-unidiff-0.5.5/bin/unidiff 2018-01-03 22:14:18.000000000 +0100 +++ new/python-unidiff-0.6.0/bin/unidiff 2020-05-08 00:16:37.000000000 +0200 @@ -45,7 +45,7 @@ if PY2: diff_file = codecs.getreader(encoding)(diff_file) - patch = PatchSet(diff_file) + patch = PatchSet(diff_file, metadata_only=(not args.show_diff)) if args.show_diff: print(patch) @@ -55,14 +55,21 @@ print('-------') additions = 0 deletions = 0 + renamed_files = 0 for f in patch: - additions += f.added - deletions += f.removed - print('%s:' % f.path, '+%d additions,' % f.added, - '-%d deletions' % f.removed) + if f.is_binary_file: + print('%s:' % f.path, '(binary file)') + else: + additions += f.added + deletions += f.removed + print('%s:' % f.path, '+%d additions,' % f.added, + '-%d deletions' % f.removed) + renamed_files = renamed_files + 1 if f.is_rename else renamed_files print() print('%d modified file(s), %d added file(s), %d removed file(s)' % ( len(patch.modified_files), len(patch.added_files), len(patch.removed_files))) + if renamed_files: + print('%d file(s) renamed' % renamed_files) print('Total: %d addition(s), %d deletion(s)' % (additions, deletions)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/python-unidiff-0.5.5/setup.py new/python-unidiff-0.6.0/setup.py --- old/python-unidiff-0.5.5/setup.py 2018-01-03 22:14:18.000000000 +0100 +++ new/python-unidiff-0.6.0/setup.py 2020-05-08 00:16:37.000000000 +0200 @@ -48,9 +48,10 @@ "Programming Language :: Python :: 2", 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', - 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', + 'Programming Language :: Python :: 3.8', ], + test_suite='tests', ) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/python-unidiff-0.5.5/tests/samples/git_rename.diff new/python-unidiff-0.6.0/tests/samples/git_rename.diff --- old/python-unidiff-0.5.5/tests/samples/git_rename.diff 1970-01-01 01:00:00.000000000 +0100 +++ new/python-unidiff-0.6.0/tests/samples/git_rename.diff 2020-05-08 00:16:37.000000000 +0200 @@ -0,0 +1,13 @@ +diff --git a/added b/moved +similarity index 85% +rename from added +rename to moved +index a071991..4dbab21 100644 +--- a/added ++++ b/moved +@@ -9,4 +9,4 @@ Some content + Some content + Some content + Some content +-Some content ++Some modified content diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/python-unidiff-0.5.5/tests/samples/sample8.diff new/python-unidiff-0.6.0/tests/samples/sample8.diff --- old/python-unidiff-0.5.5/tests/samples/sample8.diff 1970-01-01 01:00:00.000000000 +0100 +++ new/python-unidiff-0.6.0/tests/samples/sample8.diff 2020-05-08 00:16:37.000000000 +0200 @@ -0,0 +1,11 @@ +diff --git a/foo.bin b/foo.bin +new file mode 100644 +index 0000000..af000000 +Binary files /dev/null and b/foo.bin differ +diff --git a/bar.bin b/bar.bin +index ad000000..ac000000 100644 +Binary files a/bar.bin and b/bar.bin differ +diff --git a/baz.bin b/baz.bin +deleted file mode 100644 +index af000000..0000000 +Binary files a/baz.bin and /dev/null differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/python-unidiff-0.5.5/tests/test_parser.py new/python-unidiff-0.6.0/tests/test_parser.py --- old/python-unidiff-0.5.5/tests/test_parser.py 2018-01-03 22:14:18.000000000 +0100 +++ new/python-unidiff-0.6.0/tests/test_parser.py 2020-05-08 00:16:37.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # The MIT License (MIT) -# Copyright (c) 2014-2017 Matias Bordese +# Copyright (c) 2014-2020 Matias Bordese # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -114,10 +114,10 @@ self.assertEqual(lines[12], '@@ -5,16 +11,10 @@') self.assertEqual(lines[31], '@@ -22,3 +22,7 @@') - def test_parse_sample(self): + def _test_parse_sample(self, metadata_only): """Parse sample file.""" with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file: - res = PatchSet(diff_file) + res = PatchSet(diff_file, metadata_only=metadata_only) # three file in the patch self.assertEqual(len(res), 3) @@ -128,6 +128,7 @@ self.assertTrue(res[0].is_modified_file) self.assertFalse(res[0].is_removed_file) self.assertFalse(res[0].is_added_file) + self.assertFalse(res[0].is_binary_file) # Hunk 1: five additions, no deletions, a section header self.assertEqual(res[0][0].added, 6) @@ -152,15 +153,23 @@ self.assertFalse(res[1].is_modified_file) self.assertFalse(res[1].is_removed_file) self.assertTrue(res[1].is_added_file) + self.assertFalse(res[1].is_binary_file) # third file is removed self.assertFalse(res[2].is_modified_file) self.assertTrue(res[2].is_removed_file) self.assertFalse(res[2].is_added_file) + self.assertFalse(res[2].is_binary_file) self.assertEqual(res.added, 21) self.assertEqual(res.removed, 17) + def test_parse_sample_full(self): + self._test_parse_sample(metadata_only=False) + + def test_parse_sample_metadata_only(self): + self._test_parse_sample(metadata_only=True) + def test_patchset_compare(self): with codecs.open(self.sample_file, 'r', encoding='utf-8') as diff_file: ps1 = PatchSet(diff_file) @@ -222,6 +231,42 @@ with open(utf8_file, 'r') as diff_file: self.assertRaises(UnidiffParseError, PatchSet, diff_file) + def test_parse_diff_with_new_and_modified_binary_files(self): + """Parse git diff file with newly added and modified binaries files.""" + utf8_file = os.path.join(self.samples_dir, 'samples/sample8.diff') + with open(utf8_file, 'r') as diff_file: + res = PatchSet(diff_file) + + # three file in the patch + self.assertEqual(len(res), 3) + + # first file is added + self.assertFalse(res[0].is_modified_file) + self.assertFalse(res[0].is_removed_file) + self.assertTrue(res[0].is_added_file) + self.assertTrue(res[0].is_binary_file) + + # second file is added + self.assertTrue(res[1].is_modified_file) + self.assertFalse(res[1].is_removed_file) + self.assertFalse(res[1].is_added_file) + self.assertTrue(res[1].is_binary_file) + + # third file is removed + self.assertFalse(res[2].is_modified_file) + self.assertTrue(res[2].is_removed_file) + self.assertFalse(res[2].is_added_file) + self.assertTrue(res[2].is_binary_file) + + def test_parse_round_trip_with_binary_files_in_diff(self): + """Parse git diff with binary files though round trip""" + utf8_file = os.path.join(self.samples_dir, 'samples/sample8.diff') + with open(utf8_file, 'r') as diff_file: + res1 = PatchSet(diff_file) + + res2 = PatchSet(str(res1)) + self.assertEqual(res1, res2) + def test_diff_lines_linenos(self): with open(self.sample_file, 'rb') as diff_file: res = PatchSet(diff_file, encoding='utf-8') @@ -277,6 +322,38 @@ self.assertEqual(source_line_nos, expected_source_line_nos) self.assertEqual(diff_line_nos, expected_diff_line_nos) + def test_diff_hunk_positions(self): + with open(self.sample_file, 'rb') as diff_file: + res = PatchSet(diff_file, encoding='utf-8') + self.do_test_diff_hunk_positions(res) + + def test_diff_metadata_only(self): + with open(self.sample_file, 'rb') as diff_file: + res = PatchSet(diff_file, encoding='utf-8', metadata_only=True) + self.do_test_diff_hunk_positions(res) + + def do_test_diff_hunk_positions(self, res): + hunk_positions = [] + for diff_file in res: + for hunk in diff_file: + hunk_positions.append((hunk.source_start, hunk.target_start, + hunk.source_length, hunk.target_length)) + + expected_hunk_positions = [ + # File: 1, Hunk: 1 + (1, 1, 3, 9), + # File: 1, Hunk: 2 + (5, 11, 16, 10), + # File: 1, Hunk: 3 + (22, 22, 3, 7), + # File: 2, Hunk: 1 + (0, 1, 0, 9), + # File: 3, Hunk: 1 + (1, 0, 9, 0) + ] + + self.assertEqual(hunk_positions, expected_hunk_positions) + class TestVCSSamples(unittest.TestCase): """Tests for real examples from VCS.""" @@ -327,3 +404,24 @@ # by unidiff are the same with codecs.open(file_path, 'r', encoding='utf-8') as diff_file: self.assertEqual(diff_file.read(), str(res)) + + def test_git_renaming(self): + tests_dir = os.path.dirname(os.path.realpath(__file__)) + file_path = os.path.join(tests_dir, 'samples/git_rename.diff') + with codecs.open(file_path, 'r', encoding='utf-8') as diff_file: + res = PatchSet(diff_file) + + self.assertEqual(len(res), 1) + + patch = res[0] + self.assertTrue(patch.is_rename) + self.assertEqual(patch.added, 1) + self.assertEqual(patch.removed, 1) + self.assertEqual(len(res.modified_files), 1) + self.assertEqual(len(res.added_files), 0) + self.assertEqual(len(res.removed_files), 0) + + # check that original diffs and those produced + # by unidiff are the same + with codecs.open(file_path, 'r', encoding='utf-8') as diff_file: + self.assertEqual(diff_file.read(), str(res)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/python-unidiff-0.5.5/unidiff/__version__.py new/python-unidiff-0.6.0/unidiff/__version__.py --- old/python-unidiff-0.5.5/unidiff/__version__.py 2018-01-03 22:14:18.000000000 +0100 +++ new/python-unidiff-0.6.0/unidiff/__version__.py 2020-05-08 00:16:37.000000000 +0200 @@ -21,4 +21,4 @@ # OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE # OR OTHER DEALINGS IN THE SOFTWARE. -__version__ = '0.5.5' +__version__ = '0.6.0' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/python-unidiff-0.5.5/unidiff/constants.py new/python-unidiff-0.6.0/unidiff/constants.py --- old/python-unidiff-0.5.5/unidiff/constants.py 2018-01-03 22:14:18.000000000 +0100 +++ new/python-unidiff-0.6.0/unidiff/constants.py 2020-05-08 00:16:37.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # The MIT License (MIT) -# Copyright (c) 2014-2017 Matias Bordese +# Copyright (c) 2014-2020 Matias Bordese # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -34,6 +34,12 @@ RE_TARGET_FILENAME = re.compile( r'^\+\+\+ (?P<filename>[^\t\n]+)(?:\t(?P<timestamp>[^\n]+))?') + +# git renamed files support +RE_RENAME_SOURCE_FILENAME = re.compile(r'^rename from (?P<filename>[^\t\n]+)') +RE_RENAME_TARGET_FILENAME = re.compile(r'^rename to (?P<filename>[^\t\n]+)') + + # @@ (source offset, length) (target offset, length) @@ (section header) RE_HUNK_HEADER = re.compile( r"^@@ -(\d+)(?:,(\d+))? \+(\d+)(?:,(\d+))?\ @@[ ]?(.*)") @@ -50,6 +56,11 @@ RE_NO_NEWLINE_MARKER = re.compile(r'^\\ No newline at end of file') +RE_BINARY_DIFF = re.compile( + r'^Binary files? ' + '(?P<source_filename>[^\t]+?)(?:\t(?P<source_timestamp>[\s0-9:\+-]+))?' + '(?: and (?P<target_filename>[^\t]+?)(?:\t(?P<target_timestamp>[\s0-9:\+-]+))?)? (differ|has changed)') + DEFAULT_ENCODING = 'UTF-8' LINE_TYPE_ADDED = '+' diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/python-unidiff-0.5.5/unidiff/patch.py new/python-unidiff-0.6.0/unidiff/patch.py --- old/python-unidiff-0.5.5/unidiff/patch.py 2018-01-03 22:14:18.000000000 +0100 +++ new/python-unidiff-0.6.0/unidiff/patch.py 2020-05-08 00:16:37.000000000 +0200 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # The MIT License (MIT) -# Copyright (c) 2014-2017 Matias Bordese +# Copyright (c) 2014-2020 Matias Bordese # # Permission is hereby granted, free of charge, to any person obtaining a copy # of this software and associated documentation files (the "Software"), to deal @@ -40,9 +40,12 @@ RE_HUNK_BODY_LINE, RE_HUNK_EMPTY_BODY_LINE, RE_HUNK_HEADER, + RE_RENAME_SOURCE_FILENAME, + RE_RENAME_TARGET_FILENAME, RE_SOURCE_FILENAME, RE_TARGET_FILENAME, RE_NO_NEWLINE_MARKER, + RE_BINARY_DIFF, ) from unidiff.errors import UnidiffParseError @@ -128,19 +131,18 @@ def __init__(self, src_start=0, src_len=0, tgt_start=0, tgt_len=0, section_header=''): + super(Hunk, self).__init__() if src_len is None: src_len = 1 if tgt_len is None: tgt_len = 1 - self.added = 0 # number of added lines - self.removed = 0 # number of removed lines - self.source = [] self.source_start = int(src_start) self.source_length = int(src_len) - self.target = [] self.target_start = int(tgt_start) self.target_length = int(tgt_len) self.section_header = section_header + self._added = None + self._removed = None def __repr__(self): value = "<Hunk: @@ %d,%d %d,%d @@ %s>" % (self.source_start, @@ -161,17 +163,26 @@ def append(self, line): """Append the line to hunk, and keep track of source/target lines.""" + # Make sure the line is encoded correctly. This is a no-op except for + # potentially raising a UnicodeDecodeError. + str(line) super(Hunk, self).append(line) - s = str(line) - if line.is_added: - self.added += 1 - self.target.append(s) - elif line.is_removed: - self.removed += 1 - self.source.append(s) - elif line.is_context: - self.target.append(s) - self.source.append(s) + + @property + def added(self): + if self._added is not None: + return self._added + # re-calculate each time to allow for hunk modifications + # (which should mean metadata_only switch wasn't used) + return sum(1 for line in self if line.is_added) + + @property + def removed(self): + if self._removed is not None: + return self._removed + # re-calculate each time to allow for hunk modifications + # (which should mean metadata_only switch wasn't used) + return sum(1 for line in self if line.is_removed) def is_valid(self): """Check hunk header data matches entered lines info.""" @@ -182,39 +193,53 @@ """Hunk lines from source file (generator).""" return (l for l in self if l.is_context or l.is_removed) + @property + def source(self): + return [str(l) for l in self.source_lines()] + def target_lines(self): """Hunk lines from target file (generator).""" return (l for l in self if l.is_context or l.is_added) + @property + def target(self): + return [str(l) for l in self.target_lines()] + class PatchedFile(list): """Patch updated file, it is a list of Hunks.""" def __init__(self, patch_info=None, source='', target='', - source_timestamp=None, target_timestamp=None): + source_timestamp=None, target_timestamp=None, + is_binary_file=False, is_rename=False): super(PatchedFile, self).__init__() self.patch_info = patch_info self.source_file = source self.source_timestamp = source_timestamp self.target_file = target self.target_timestamp = target_timestamp + self.is_binary_file = is_binary_file + self.is_rename = is_rename def __repr__(self): return make_str("<PatchedFile: %s>") % make_str(self.path) def __str__(self): + source = '' + target = '' # patch info is optional info = '' if self.patch_info is None else str(self.patch_info) - source = "--- %s%s\n" % ( - self.source_file, - '\t' + self.source_timestamp if self.source_timestamp else '') - target = "+++ %s%s\n" % ( - self.target_file, - '\t' + self.target_timestamp if self.target_timestamp else '') + if not self.is_binary_file and self: + source = "--- %s%s\n" % ( + self.source_file, + '\t' + self.source_timestamp if self.source_timestamp else '') + target = "+++ %s%s\n" % ( + self.target_file, + '\t' + self.target_timestamp if self.target_timestamp else '') hunks = ''.join(unicode(hunk) for hunk in self) return info + source + target + hunks - def _parse_hunk(self, header, diff, encoding): + def _parse_hunk(self, header, diff, encoding, metadata_only): """Parse hunk details.""" header_info = RE_HUNK_HEADER.match(header) hunk_info = header_info.groups() @@ -224,38 +249,68 @@ target_line_no = hunk.target_start expected_source_end = source_line_no + hunk.source_length expected_target_end = target_line_no + hunk.target_length + added = 0 + removed = 0 for diff_line_no, line in diff: if encoding is not None: line = line.decode(encoding) - valid_line = RE_HUNK_EMPTY_BODY_LINE.match(line) - if not valid_line: - valid_line = RE_HUNK_BODY_LINE.match(line) + if metadata_only: + # quick line type detection, no regex required + line_type = line[0] if line else LINE_TYPE_CONTEXT + if line_type not in (LINE_TYPE_ADDED, + LINE_TYPE_REMOVED, + LINE_TYPE_CONTEXT, + LINE_TYPE_NO_NEWLINE): + raise UnidiffParseError( + 'Hunk diff line expected: %s' % line) + + if line_type == LINE_TYPE_ADDED: + target_line_no += 1 + added += 1 + elif line_type == LINE_TYPE_REMOVED: + source_line_no += 1 + removed += 1 + elif line_type == LINE_TYPE_CONTEXT: + target_line_no += 1 + source_line_no += 1 - if not valid_line: - raise UnidiffParseError('Hunk diff line expected: %s' % line) + # no file content tracking + original_line = None - line_type = valid_line.group('line_type') - if line_type == LINE_TYPE_EMPTY: - line_type = LINE_TYPE_CONTEXT - value = valid_line.group('value') - original_line = Line(value, line_type=line_type) - if line_type == LINE_TYPE_ADDED: - original_line.target_line_no = target_line_no - target_line_no += 1 - elif line_type == LINE_TYPE_REMOVED: - original_line.source_line_no = source_line_no - source_line_no += 1 - elif line_type == LINE_TYPE_CONTEXT: - original_line.target_line_no = target_line_no - target_line_no += 1 - original_line.source_line_no = source_line_no - source_line_no += 1 - elif line_type == LINE_TYPE_NO_NEWLINE: - pass else: - original_line = None + # parse diff line content + valid_line = RE_HUNK_BODY_LINE.match(line) + if not valid_line: + valid_line = RE_HUNK_EMPTY_BODY_LINE.match(line) + + if not valid_line: + raise UnidiffParseError( + 'Hunk diff line expected: %s' % line) + + line_type = valid_line.group('line_type') + if line_type == LINE_TYPE_EMPTY: + line_type = LINE_TYPE_CONTEXT + + value = valid_line.group('value') + original_line = Line(value, line_type=line_type) + + if line_type == LINE_TYPE_ADDED: + original_line.target_line_no = target_line_no + target_line_no += 1 + elif line_type == LINE_TYPE_REMOVED: + original_line.source_line_no = source_line_no + source_line_no += 1 + elif line_type == LINE_TYPE_CONTEXT: + original_line.target_line_no = target_line_no + original_line.source_line_no = source_line_no + target_line_no += 1 + source_line_no += 1 + elif line_type == LINE_TYPE_NO_NEWLINE: + pass + else: + original_line = None # stop parsing if we got past expected number of lines if (source_line_no > expected_source_end or @@ -276,6 +331,11 @@ target_line_no < expected_target_end): raise UnidiffParseError('Hunk is shorter than expected') + if metadata_only: + # HACK: set fixed calculated values when metadata_only is enabled + hunk._added = added + hunk._removed = removed + self.append(hunk) def _add_no_newline_marker_to_last_hunk(self): @@ -301,7 +361,8 @@ elif (self.source_file.startswith('a/') and self.target_file == '/dev/null'): filepath = self.source_file[2:] - elif (self.target_file.startswith('b/') and + elif (self.target_file is not None and + self.target_file.startswith('b/') and self.source_file == '/dev/null'): filepath = self.target_file[2:] else: @@ -321,12 +382,16 @@ @property def is_added_file(self): """Return True if this patch adds the file.""" + if self.source_file == '/dev/null': + return True return (len(self) == 1 and self[0].source_start == 0 and self[0].source_length == 0) @property def is_removed_file(self): """Return True if this patch removes the file.""" + if self.target_file == '/dev/null': + return True return (len(self) == 1 and self[0].target_start == 0 and self[0].target_length == 0) @@ -340,7 +405,7 @@ class PatchSet(list): """A list of PatchedFiles.""" - def __init__(self, f, encoding=None): + def __init__(self, f, encoding=None, metadata_only=False): super(PatchSet, self).__init__() # convert string inputs to StringIO objects @@ -350,7 +415,10 @@ # make sure we pass an iterator object to parse data = iter(f) # if encoding is None, assume we are reading unicode data - self._parse(data, encoding=encoding) + # when metadata_only is True, only perform a minimal metadata parsing + # (ie. hunks without content) which is around 2.5-6 times faster; + # it will still validate the diff metadata consistency and get counts + self._parse(data, encoding=encoding, metadata_only=metadata_only) def __repr__(self): return make_str('<PatchSet: %s>') % super(PatchSet, self).__repr__() @@ -358,7 +426,7 @@ def __str__(self): return ''.join(unicode(patched_file) for patched_file in self) - def _parse(self, diff, encoding): + def _parse(self, diff, encoding, metadata_only): current_file = None patch_info = None @@ -367,28 +435,61 @@ if encoding is not None: line = line.decode(encoding) + # check for a git rename, source file + is_rename_source_filename = RE_RENAME_SOURCE_FILENAME.match(line) + if is_rename_source_filename: + # prefix with 'a/' to match expected git source format + source_file = ( + 'a/' + is_rename_source_filename.group('filename')) + # keep line as patch_info + patch_info.append(line) + # reset current file + current_file = None + continue + + # check for a git rename, target file + is_rename_target_filename = RE_RENAME_TARGET_FILENAME.match(line) + if is_rename_target_filename: + if current_file is not None: + raise UnidiffParseError('Target without source: %s' % line) + # prefix with 'b/' to match expected git source format + target_file = ( + 'b/' + is_rename_target_filename.group('filename')) + # keep line as patch_info + patch_info.append(line) + # add current file to PatchSet + current_file = PatchedFile( + patch_info, source_file, target_file, None, None, + is_rename=True) + self.append(current_file) + continue + # check for source file header is_source_filename = RE_SOURCE_FILENAME.match(line) if is_source_filename: source_file = is_source_filename.group('filename') source_timestamp = is_source_filename.group('timestamp') - # reset current file - current_file = None + # reset current file, unless we are processing a rename + # (in that case, source files should match) + if current_file is not None and not (current_file.is_rename and + current_file.source_file == source_file): + current_file = None continue # check for target file header is_target_filename = RE_TARGET_FILENAME.match(line) if is_target_filename: - if current_file is not None: + if current_file is not None and not current_file.is_rename: raise UnidiffParseError('Target without source: %s' % line) target_file = is_target_filename.group('filename') target_timestamp = is_target_filename.group('timestamp') - # add current file to PatchSet - current_file = PatchedFile( - patch_info, source_file, target_file, - source_timestamp, target_timestamp) - self.append(current_file) - patch_info = None + if current_file is None: + # add current file to PatchSet + current_file = PatchedFile( + patch_info, source_file, target_file, + source_timestamp, target_timestamp) + self.append(current_file) + patch_info = None continue # check for hunk header @@ -396,7 +497,7 @@ if is_hunk_header: if current_file is None: raise UnidiffParseError('Unexpected hunk found: %s' % line) - current_file._parse_hunk(line, diff, encoding) + current_file._parse_hunk(line, diff, encoding, metadata_only) continue # check for no newline marker @@ -412,6 +513,18 @@ current_file._append_trailing_empty_line() continue + is_binary_diff = RE_BINARY_DIFF.match(line) + if is_binary_diff: + source_file = is_binary_diff.group('source_filename') + target_file = is_binary_diff.group('target_filename') + patch_info.append(line) + current_file = PatchedFile( + patch_info, source_file, target_file, is_binary_file=True) + self.append(current_file) + patch_info = None + current_file = None + continue + # if nothing has matched above then this line is a patch info if patch_info is None: current_file = None
