Hello community, here is the log from the commit of package python-ged4py for openSUSE:Factory checked in at 2020-04-16 23:00:22 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python-ged4py (Old) and /work/SRC/openSUSE:Factory/.python-ged4py.new.2738 (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-ged4py" Thu Apr 16 23:00:22 2020 rev:2 rq:791825 version:0.1.12 Changes: -------- --- /work/SRC/openSUSE:Factory/python-ged4py/python-ged4py.changes 2019-08-05 10:39:18.659313412 +0200 +++ /work/SRC/openSUSE:Factory/.python-ged4py.new.2738/python-ged4py.changes 2020-04-16 23:00:37.659554159 +0200 @@ -1,0 +2,6 @@ +Mon Apr 6 14:32:12 UTC 2020 - Marketa Calabkova <[email protected]> + +- Update to 0.1.12 + * Add support for a bunch of illegal encodings (thanks @Tuisto59 for report). + +------------------------------------------------------------------- Old: ---- ged4py-0.1.11.tar.gz New: ---- ged4py-0.1.12.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python-ged4py.spec ++++++ --- /var/tmp/diff_new_pack.HFmuk7/_old 2020-04-16 23:00:38.711554951 +0200 +++ /var/tmp/diff_new_pack.HFmuk7/_new 2020-04-16 23:00:38.715554954 +0200 @@ -1,7 +1,7 @@ # # spec file for package python-ged4py # -# Copyright (c) 2019 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2020 SUSE LLC # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -18,7 +18,7 @@ %{?!python_module:%define python_module() python-%{**} python3-%{**}} Name: python-ged4py -Version: 0.1.11 +Version: 0.1.12 Release: 0 Summary: GEDCOM tools for Python License: MIT @@ -32,6 +32,7 @@ BuildArch: noarch # SECTION test requirements BuildRequires: %{python_module ansel} +BuildRequires: %{python_module pytest} # /SECTION %python_subpackages @@ -50,7 +51,7 @@ %python_expand %fdupes %{buildroot}%{$python_sitelib} %check -%python_exec setup.py test +%pytest %files %{python_files} %doc AUTHORS.rst README.rst ++++++ ged4py-0.1.11.tar.gz -> ged4py-0.1.12.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/HISTORY.rst new/ged4py-0.1.12/HISTORY.rst --- old/ged4py-0.1.11/HISTORY.rst 2019-01-06 09:23:02.000000000 +0100 +++ new/ged4py-0.1.12/HISTORY.rst 2020-03-01 20:19:35.000000000 +0100 @@ -2,6 +2,11 @@ History ======= +0.1.12 (2020-03-01) +------------------- + +* Add support for a bunch of illegal encodings (thanks @Tuisto59 for report). + 0.1.11 (2019-01-06) ------------------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/PKG-INFO new/ged4py-0.1.12/PKG-INFO --- old/ged4py-0.1.11/PKG-INFO 2019-01-06 09:26:36.000000000 +0100 +++ new/ged4py-0.1.12/PKG-INFO 2020-03-01 20:23:32.000000000 +0100 @@ -1,12 +1,11 @@ Metadata-Version: 1.1 Name: ged4py -Version: 0.1.11 +Version: 0.1.12 Summary: GEDCOM tools for Python Home-page: https://github.com/andy-z/ged4py Author: Andy Salnikov Author-email: [email protected] License: MIT License -Description-Content-Type: UNKNOWN Description: ======================== GEDCOM parser for Python ======================== @@ -50,6 +49,11 @@ History ======= + 0.1.12 (2020-03-01) + ------------------- + + * Add support for a bunch of illegal encodings (thanks @Tuisto59 for report). + 0.1.11 (2019-01-06) ------------------- @@ -121,8 +125,8 @@ Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 Classifier: Topic :: Sociology :: Genealogy Binary files old/ged4py-0.1.11/docs/_build/html/_static/ajax-loader.gif and new/ged4py-0.1.12/docs/_build/html/_static/ajax-loader.gif differ Binary files old/ged4py-0.1.11/docs/_build/html/_static/comment-bright.png and new/ged4py-0.1.12/docs/_build/html/_static/comment-bright.png differ Binary files old/ged4py-0.1.11/docs/_build/html/_static/comment-close.png and new/ged4py-0.1.12/docs/_build/html/_static/comment-close.png differ Binary files old/ged4py-0.1.11/docs/_build/html/_static/comment.png and new/ged4py-0.1.12/docs/_build/html/_static/comment.png differ Binary files old/ged4py-0.1.11/docs/_build/html/_static/down-pressed.png and new/ged4py-0.1.12/docs/_build/html/_static/down-pressed.png differ Binary files old/ged4py-0.1.11/docs/_build/html/_static/down.png and new/ged4py-0.1.12/docs/_build/html/_static/down.png differ Binary files old/ged4py-0.1.11/docs/_build/html/_static/up-pressed.png and new/ged4py-0.1.12/docs/_build/html/_static/up-pressed.png differ Binary files old/ged4py-0.1.11/docs/_build/html/_static/up.png and new/ged4py-0.1.12/docs/_build/html/_static/up.png differ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/docs/ged4py.detail.date.rst new/ged4py-0.1.12/docs/ged4py.detail.date.rst --- old/ged4py-0.1.11/docs/ged4py.detail.date.rst 1970-01-01 01:00:00.000000000 +0100 +++ new/ged4py-0.1.12/docs/ged4py.detail.date.rst 2020-03-01 19:52:43.000000000 +0100 @@ -0,0 +1,7 @@ +ged4py.detail.date module +========================= + +.. automodule:: ged4py.detail.date + :members: + :undoc-members: + :show-inheritance: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/docs/ged4py.detail.io.rst new/ged4py-0.1.12/docs/ged4py.detail.io.rst --- old/ged4py-0.1.11/docs/ged4py.detail.io.rst 1970-01-01 01:00:00.000000000 +0100 +++ new/ged4py-0.1.12/docs/ged4py.detail.io.rst 2020-03-01 19:52:43.000000000 +0100 @@ -0,0 +1,7 @@ +ged4py.detail.io module +======================= + +.. automodule:: ged4py.detail.io + :members: + :undoc-members: + :show-inheritance: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/docs/ged4py.detail.name.rst new/ged4py-0.1.12/docs/ged4py.detail.name.rst --- old/ged4py-0.1.11/docs/ged4py.detail.name.rst 1970-01-01 01:00:00.000000000 +0100 +++ new/ged4py-0.1.12/docs/ged4py.detail.name.rst 2020-03-01 19:52:43.000000000 +0100 @@ -0,0 +1,7 @@ +ged4py.detail.name module +========================= + +.. automodule:: ged4py.detail.name + :members: + :undoc-members: + :show-inheritance: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/docs/ged4py.detail.rst new/ged4py-0.1.12/docs/ged4py.detail.rst --- old/ged4py-0.1.11/docs/ged4py.detail.rst 2017-11-26 06:33:33.000000000 +0100 +++ new/ged4py-0.1.12/docs/ged4py.detail.rst 2020-03-01 19:52:43.000000000 +0100 @@ -1,38 +1,19 @@ -ged4py\.detail package -====================== +ged4py.detail package +===================== Submodules ---------- -ged4py\.detail\.date module ---------------------------- - -.. automodule:: ged4py.detail.date - :members: - :undoc-members: - :show-inheritance: - -ged4py\.detail\.io module -------------------------- - -.. automodule:: ged4py.detail.io - :members: - :undoc-members: - :show-inheritance: - -ged4py\.detail\.name module ---------------------------- - -.. automodule:: ged4py.detail.name - :members: - :undoc-members: - :show-inheritance: +.. toctree:: + ged4py.detail.date + ged4py.detail.io + ged4py.detail.name Module contents --------------- .. automodule:: ged4py.detail - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/docs/ged4py.model.rst new/ged4py-0.1.12/docs/ged4py.model.rst --- old/ged4py-0.1.11/docs/ged4py.model.rst 1970-01-01 01:00:00.000000000 +0100 +++ new/ged4py-0.1.12/docs/ged4py.model.rst 2020-03-01 19:52:43.000000000 +0100 @@ -0,0 +1,7 @@ +ged4py.model module +=================== + +.. automodule:: ged4py.model + :members: + :undoc-members: + :show-inheritance: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/docs/ged4py.parser.rst new/ged4py-0.1.12/docs/ged4py.parser.rst --- old/ged4py-0.1.11/docs/ged4py.parser.rst 1970-01-01 01:00:00.000000000 +0100 +++ new/ged4py-0.1.12/docs/ged4py.parser.rst 2020-03-01 19:52:43.000000000 +0100 @@ -0,0 +1,7 @@ +ged4py.parser module +==================== + +.. automodule:: ged4py.parser + :members: + :undoc-members: + :show-inheritance: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/docs/ged4py.rst new/ged4py-0.1.12/docs/ged4py.rst --- old/ged4py-0.1.11/docs/ged4py.rst 2017-11-26 08:13:17.000000000 +0100 +++ new/ged4py-0.1.12/docs/ged4py.rst 2020-03-01 19:52:43.000000000 +0100 @@ -6,40 +6,20 @@ .. toctree:: - ged4py.detail + ged4py.detail Submodules ---------- -ged4py\.codecs module ---------------------- - -.. automodule:: ged4py.codecs - :members: - :undoc-members: - :show-inheritance: - -ged4py\.model module --------------------- - -.. automodule:: ged4py.model - :members: - :undoc-members: - :show-inheritance: - -ged4py\.parser module ---------------------- - -.. automodule:: ged4py.parser - :members: - :undoc-members: - :show-inheritance: +.. toctree:: + ged4py.model + ged4py.parser Module contents --------------- .. automodule:: ged4py - :members: - :undoc-members: - :show-inheritance: + :members: + :undoc-members: + :show-inheritance: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/docs/techinfo.rst new/ged4py-0.1.12/docs/techinfo.rst --- old/ged4py-0.1.11/docs/techinfo.rst 2017-11-27 05:18:35.000000000 +0100 +++ new/ged4py-0.1.12/docs/techinfo.rst 2020-03-01 20:17:34.000000000 +0100 @@ -56,9 +56,12 @@ ``codecs`` module. This scheme applies to to both header (before ``CHAR`` record) and regular content. +See also Tamura Jones' excellent `article`_ summarizing many varieties of +illegal encodings that may be present in GEDCOM files. + .. _ANSEL: https://en.wikipedia.org/wiki/ANSEL .. _BOM: https://en.wikipedia.org/wiki/Byte_order_mark - +.. _article: https://www.tamurajones.net/GEDCOMCharacterEncodings.xhtml Name representation ------------------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/ged4py/__init__.py new/ged4py-0.1.12/ged4py/__init__.py --- old/ged4py-0.1.11/ged4py/__init__.py 2019-01-06 09:24:21.000000000 +0100 +++ new/ged4py-0.1.12/ged4py/__init__.py 2020-03-01 20:22:57.000000000 +0100 @@ -4,7 +4,7 @@ __author__ = """Andy Salnikov""" __email__ = '[email protected]' -__version__ = '0.1.11' +__version__ = '0.1.12' import ansel as _ansel _ansel.register() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/ged4py/detail/date.py new/ged4py-0.1.12/ged4py/detail/date.py --- old/ged4py-0.1.11/ged4py/detail/date.py 2018-10-18 08:34:01.000000000 +0200 +++ new/ged4py-0.1.12/ged4py/detail/date.py 2020-03-01 20:17:34.000000000 +0100 @@ -44,13 +44,14 @@ # DATE_PERIOD:= [ FROM <DATE> | TO <DATE> | FROM <DATE> TO <DATE> ] DATE_PERIOD_FROM = r"^FROM\s+(?P<date>" + DATE + ")$" DATE_PERIOD_TO = r"^TO\s+(?P<date>" + DATE + ")$" -DATE_PERIOD = r"^FROM\s+(?P<date1>" + DATE + ")\s+TO\s+(?P<date2>" + \ +DATE_PERIOD = r"^FROM\s+(?P<date1>" + DATE + r")\s+TO\s+(?P<date2>" + \ DATE + ")$" # DATE_RANGE:= [ BEF <DATE> | AFT <DATE> | BET <DATE> AND <DATE> ] DATE_RANGE_BEFORE = r"^BEF\s+(?P<date>" + DATE + ")$" DATE_RANGE_AFTER = r"^AFT\s+(?P<date>" + DATE + ")$" -DATE_RANGE = r"^BET\s+(?P<date1>" + DATE + ")\s+AND\s+(?P<date2>" + DATE + ")$" +DATE_RANGE = r"^BET\s+(?P<date1>" + DATE + r")\s+AND\s+(?P<date2>" + \ + DATE + ")$" # DATE_APPROXIMATED := [ ABT <DATE> | CAL <DATE> | EST <DATE> ] DATE_APPROX_ABOUT = r"^ABT\s+(?P<date>" + DATE + ")$" @@ -58,7 +59,7 @@ DATE_APPROX_EST = r"^EST\s+(?P<date>" + DATE + ")$" # INT <DATE> (<DATE_PHRASE>) -DATE_INTERP = r"^INT\s+(?P<date>" + DATE + ")\s+\((?P<phrase>.*)\)$" +DATE_INTERP = r"^INT\s+(?P<date>" + DATE + r")\s+\((?P<phrase>.*)\)$" DATE_PHRASE = r"^\((?P<phrase>.*)\)$" # INT <DATE> (<DATE_PHRASE>) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/ged4py/detail/name.py new/ged4py-0.1.12/ged4py/detail/name.py --- old/ged4py-0.1.11/ged4py/detail/name.py 2018-10-18 08:34:01.000000000 +0200 +++ new/ged4py-0.1.12/ged4py/detail/name.py 2020-03-01 20:17:34.000000000 +0100 @@ -126,6 +126,7 @@ maiden) return name_tuple + def parse_name_ancestris(record): """Parse NAME structure assuming ANCESTRIS dialect. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/ged4py/parser.py new/ged4py-0.1.12/ged4py/parser.py --- old/ged4py-0.1.11/ged4py/parser.py 2019-01-06 09:20:31.000000000 +0100 +++ new/ged4py-0.1.12/ged4py/parser.py 2020-03-01 20:17:34.000000000 +0100 @@ -57,7 +57,7 @@ pass -def guess_codec(file, errors="strict", require_char=False): +def guess_codec(file, errors="strict", require_char=False, warn=True): """Look at file contents and guess its correct encoding. File must be open in binary mode and positioned at offset 0. If BOM @@ -72,6 +72,8 @@ :param bool require_char: If True then exception is thrown if CHAR record is not found in a header, if False and CHAR is not in the header then codec determined from BOM or "gedcom" is returned. + :param bool warn: If True (default) then generate error/warning messages + for illegal encodings. :returns: Tuple (codec_name, bom_size) :raises: :py:class:`CodecError` when codec name in file is unknown or when codec name in file contradicts codec determined from BOM. @@ -79,10 +81,35 @@ input lines and `errors` is set to "strict" (default). """ - # mapping of gedcom character set specifiers to Python encoding names - gedcom_char_to_codec = { - 'ansel': 'gedcom', + # set of illegal but unambiguous encodings and their corresponding codecs + illegal_encodings = { + "windows-1250": "cp1250", + "windows-1251": "cp1251", + "cp1252": "cp1252", + "iso-8859-1": "iso8859-1", + "iso8859-1": "iso8859-1", } + # set of ambiguous (and illegal) encodings + ambiguous_encodings = { + 'ibmpc': 'cp437', + "ibm": "cp437", + "ibm-pc": "cp437", + "oem": "cp437", + "msdos": "cp850", + "ibm dos": "cp850", + "ms-dos": "cp850", + "ansi": "cp1252", + "windows": "cp1252", + "ibm_windows": "cp1252", + "ibm windows": "cp1252", + "iso8859": "iso8859-1", + "latin1": "iso8859-1", + "macintosh": "mac-roman", + } + illegal_encodings.update(ambiguous_encodings) + # full set of encodings, including legal ones + gedcom_char_to_codec = {"ansel": "gedcom"} + gedcom_char_to_codec.update(illegal_encodings) # check BOM first bom_codec = check_bom(file) @@ -90,8 +117,11 @@ codec = bom_codec or 'gedcom' # scan header until CHAR or end of header + lineno = 0 while True: + lineno += 1 + # this stops at '\n' line = file.readline() if not line: @@ -113,12 +143,18 @@ break elif len(words) >= 3 and words[0] == b"1" and words[1] == b"CHAR": try: - encoding = words[2].decode(codec, errors) - encoding = gedcom_char_to_codec.get(encoding.lower(), - encoding.lower()) + enc = b" ".join(words[2:]).decode(codec, errors) + encoding = gedcom_char_to_codec.get(enc.lower(), enc.lower()) + if enc.lower() in illegal_encodings and warn: + _log.error("Line %d: \"%s\" - \"%s\" is not a legal " + "character set or encoding.", lineno, line, enc) + if enc.lower() in ambiguous_encodings: + _log.warning("Character set (\"%s\") is ambiguous, it " + "will be interpreted as \"%s\"", + enc, encoding) new_codec = codecs.lookup(encoding).name except LookupError: - raise CodecError("Unknown codec name {0}".format(encoding)) + raise CodecError("Unknown codec name '{0}'".format(enc)) if bom_codec is None: codec = new_codec elif new_codec != bom_codec: @@ -170,7 +206,8 @@ try: encoding, self._bom_size = guess_codec(self._file, errors=self._errors, - require_char=require_char) + require_char=require_char, + warn=self._encoding is None) except Exception: self._file.close() raise diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/ged4py.egg-info/PKG-INFO new/ged4py-0.1.12/ged4py.egg-info/PKG-INFO --- old/ged4py-0.1.11/ged4py.egg-info/PKG-INFO 2019-01-06 09:26:36.000000000 +0100 +++ new/ged4py-0.1.12/ged4py.egg-info/PKG-INFO 2020-03-01 20:23:31.000000000 +0100 @@ -1,12 +1,11 @@ Metadata-Version: 1.1 Name: ged4py -Version: 0.1.11 +Version: 0.1.12 Summary: GEDCOM tools for Python Home-page: https://github.com/andy-z/ged4py Author: Andy Salnikov Author-email: [email protected] License: MIT License -Description-Content-Type: UNKNOWN Description: ======================== GEDCOM parser for Python ======================== @@ -50,6 +49,11 @@ History ======= + 0.1.12 (2020-03-01) + ------------------- + + * Add support for a bunch of illegal encodings (thanks @Tuisto59 for report). + 0.1.11 (2019-01-06) ------------------- @@ -121,8 +125,8 @@ Classifier: Programming Language :: Python :: 2 Classifier: Programming Language :: Python :: 2.7 Classifier: Programming Language :: Python :: 3 -Classifier: Programming Language :: Python :: 3.3 Classifier: Programming Language :: Python :: 3.4 Classifier: Programming Language :: Python :: 3.5 Classifier: Programming Language :: Python :: 3.6 +Classifier: Programming Language :: Python :: 3.7 Classifier: Topic :: Sociology :: Genealogy diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/ged4py.egg-info/SOURCES.txt new/ged4py-0.1.12/ged4py.egg-info/SOURCES.txt --- old/ged4py-0.1.11/ged4py.egg-info/SOURCES.txt 2019-01-06 09:26:36.000000000 +0100 +++ new/ged4py-0.1.12/ged4py.egg-info/SOURCES.txt 2020-03-01 20:23:31.000000000 +0100 @@ -10,7 +10,12 @@ docs/authors.rst docs/conf.py docs/contributing.rst +docs/ged4py.detail.date.rst +docs/ged4py.detail.io.rst +docs/ged4py.detail.name.rst docs/ged4py.detail.rst +docs/ged4py.model.rst +docs/ged4py.parser.rst docs/ged4py.rst docs/history.rst docs/index.rst @@ -20,17 +25,9 @@ docs/readme.rst docs/techinfo.rst docs/usage.rst -docs/_build/html/_static/ajax-loader.gif -docs/_build/html/_static/comment-bright.png -docs/_build/html/_static/comment-close.png -docs/_build/html/_static/comment.png -docs/_build/html/_static/down-pressed.png -docs/_build/html/_static/down.png docs/_build/html/_static/file.png docs/_build/html/_static/minus.png docs/_build/html/_static/plus.png -docs/_build/html/_static/up-pressed.png -docs/_build/html/_static/up.png ged4py/__init__.py ged4py/model.py ged4py/parser.py @@ -50,4 +47,5 @@ tests/test_detail_io.py tests/test_detail_name.py tests/test_model.py -tests/test_parser.py \ No newline at end of file +tests/test_parser.py +tests/test_parser_encodings.py \ No newline at end of file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/setup.cfg new/ged4py-0.1.12/setup.cfg --- old/ged4py-0.1.11/setup.cfg 2019-01-06 09:26:36.000000000 +0100 +++ new/ged4py-0.1.12/setup.cfg 2020-03-01 20:23:32.000000000 +0100 @@ -1,5 +1,5 @@ [bumpversion] -current_version = 0.1.11 +current_version = 0.1.12 commit = True tag = True @@ -16,6 +16,7 @@ [flake8] exclude = docs +max-line-length = 110 [aliases] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/setup.py new/ged4py-0.1.12/setup.py --- old/ged4py-0.1.11/setup.py 2019-01-06 09:24:21.000000000 +0100 +++ new/ged4py-0.1.12/setup.py 2020-03-01 20:22:57.000000000 +0100 @@ -23,7 +23,7 @@ setup( name='ged4py', - version='0.1.11', + version='0.1.12', description="GEDCOM tools for Python", long_description=readme + '\n\n' + history, @@ -51,10 +51,10 @@ "Programming Language :: Python :: 2", 'Programming Language :: Python :: 2.7', 'Programming Language :: Python :: 3', - 'Programming Language :: Python :: 3.3', 'Programming Language :: Python :: 3.4', 'Programming Language :: Python :: 3.5', 'Programming Language :: Python :: 3.6', + 'Programming Language :: Python :: 3.7', 'Topic :: Sociology :: Genealogy', ], test_suite='tests', diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/tests/test_detail_date.py new/ged4py-0.1.12/tests/test_detail_date.py --- old/ged4py-0.1.11/tests/test_detail_date.py 2018-10-18 08:34:01.000000000 +0200 +++ new/ged4py-0.1.12/tests/test_detail_date.py 2020-03-01 20:17:34.000000000 +0100 @@ -109,7 +109,7 @@ "date2": CalendarDate("2020")}) self.assertEqual(date.template, "FROM $date1 TO $date2") self.assertEqual(date.kw, {"date1": CalendarDate("2017"), - "date2": CalendarDate("2020")}) + "date2": CalendarDate("2020")}) self.assertEqual(date.fmt(), "FROM 2017 TO 2020") # "phrase" keyword corresponds to string @@ -153,14 +153,14 @@ self.assertTrue(date is not None) self.assertEqual(date.template, "FROM $date1 TO $date2") self.assertEqual(date.kw, {"date1": CalendarDate("1920"), - "date2": CalendarDate("2000")}) + "date2": CalendarDate("2000")}) self.assertEqual(date.fmt(), "FROM 1920 TO 2000") date = DateValue.parse("from mar 1920 to 1 apr 2000") self.assertTrue(date is not None) self.assertEqual(date.template, "FROM $date1 TO $date2") self.assertEqual(date.kw, {"date1": CalendarDate("1920", "MAR"), - "date2": CalendarDate("2000", "APR", 1)}) + "date2": CalendarDate("2000", "APR", 1)}) def test_013_date_parse_range(self): """Test detail.date.DateValue class.""" @@ -181,22 +181,22 @@ self.assertTrue(date is not None) self.assertEqual(date.template, "BETWEEN $date1 AND $date2") self.assertEqual(date.kw, {"date1": CalendarDate("1600"), - "date2": CalendarDate("2000")}) + "date2": CalendarDate("2000")}) self.assertEqual(date.fmt(), "BETWEEN 1600 AND 2000") date = DateValue.parse("bet mar 1920 and apr 2000") self.assertTrue(date is not None) self.assertEqual(date.template, "BETWEEN $date1 AND $date2") self.assertEqual(date.kw, {"date1": CalendarDate("1920", "MAR"), - "date2": CalendarDate("2000", "APR")}) + "date2": CalendarDate("2000", "APR")}) self.assertEqual(date.fmt(), "BETWEEN 1920 MAR AND 2000 APR") def test_014_date_parse_approx(self): """Test detail.date.DateValue class.""" - dates = {"500B.C." : CalendarDate("500B.C."), - "@#DGREGORIAN@ JAN 2017" : CalendarDate("2017", "JAN"), - "31 JAN 2017" : CalendarDate("2017", "JAN", 31)} + dates = {"500B.C.": CalendarDate("500B.C."), + "@#DGREGORIAN@ JAN 2017": CalendarDate("2017", "JAN"), + "31 JAN 2017": CalendarDate("2017", "JAN", 31)} approx = {"ABT": "ABOUT", "CAL": "CALCULATED", "EST": "ESTIMATED"} @@ -220,14 +220,14 @@ self.assertTrue(date is not None) self.assertEqual(date.template, "INTERPRETED $date ($phrase)") self.assertEqual(date.kw, {"date": CalendarDate("1967A.D."), - "phrase": "some phrase"}) + "phrase": "some phrase"}) self.assertEqual(date.fmt(), "INTERPRETED 1967A.D. (some phrase)") date = DateValue.parse("INT @#DGREGORIAN@ 1 JAN 2017 (some phrase)") self.assertTrue(date is not None) self.assertEqual(date.template, "INTERPRETED $date ($phrase)") self.assertEqual(date.kw, {"date": CalendarDate("2017", "JAN", 1), - "phrase": "some phrase"}) + "phrase": "some phrase"}) self.assertEqual(date.fmt(), "INTERPRETED 2017 JAN 1 (some phrase)") def test_016_date_parse_simple(self): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/tests/test_model.py new/ged4py-0.1.12/tests/test_model.py --- old/ged4py-0.1.11/tests/test_model.py 2018-10-18 08:34:01.000000000 +0200 +++ new/ged4py-0.1.12/tests/test_model.py 2020-03-01 20:17:34.000000000 +0100 @@ -343,6 +343,7 @@ def __init__(self): self.xref0 = {b"@pointer0@": (0, "TAG0"), b"@pointer1@": (1, "TAG1")} + def read_record(self, offset): return str(offset) @@ -366,6 +367,7 @@ def __init__(self): self.xref0 = {b"@pointer0@": (0, "TAG0"), b"@pointer1@": (1, "TAG1")} + def read_record(self, offset): return str(offset) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/tests/test_parser.py new/ged4py-0.1.12/tests/test_parser.py --- old/ged4py-0.1.11/tests/test_parser.py 2019-01-06 09:20:31.000000000 +0100 +++ new/ged4py-0.1.12/tests/test_parser.py 2020-03-01 20:17:34.000000000 +0100 @@ -1,7 +1,7 @@ #!/usr/bin/env python # -*- coding: utf-8 -*- -"""Tests for `ged4py.codecs` module.""" +"""Tests for `ged4py.parser` module.""" from contextlib import contextmanager import io @@ -381,7 +381,8 @@ def test_031_read_record_conc(self): # encoded string data = b"0 HEAD\n1 CHAR UTF8\n"\ - b"0 TAG \xd0\x98\xd0\xb2\xd0\xb0\xd0\xbd \xd0\x98\xd0\xb2\xd0\xb0\xd0\xbd\xd0\xbe\xd0\xb2\xd0\xb8\xd1\x87" + b"0 TAG \xd0\x98\xd0\xb2\xd0\xb0\xd0\xbd \xd0\x98\xd0\xb2\xd0\xb0"\ + b"\xd0\xbd\xd0\xbe\xd0\xb2\xd0\xb8\xd1\x87" with _temp_file(data) as fname: with parser.GedcomReader(fname) as reader: @@ -500,7 +501,8 @@ def test_042_rec_dialect(self): """Test records0 method""" - data = b"0 HEAD\n1 CHAR ASCII\n1 SOUR ALTREE\n0 INDI A\n1 SUBA A\n1 SUBB B\n2 SUBC C\n1 SUBD D\n0 STOP" + data = b"0 HEAD\n1 CHAR ASCII\n1 SOUR ALTREE\n"\ + b"0 INDI A\n1 SUBA A\n1 SUBB B\n2 SUBC C\n1 SUBD D\n0 STOP" with io.BytesIO(data) as file: with parser.GedcomReader(file) as reader: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/ged4py-0.1.11/tests/test_parser_encodings.py new/ged4py-0.1.12/tests/test_parser_encodings.py --- old/ged4py-0.1.11/tests/test_parser_encodings.py 1970-01-01 01:00:00.000000000 +0100 +++ new/ged4py-0.1.12/tests/test_parser_encodings.py 2020-03-01 20:17:34.000000000 +0100 @@ -0,0 +1,95 @@ +#!/usr/bin/env python +# -*- coding: utf-8 -*- + +"""Tests for ged4py encodings handling.""" + +import io +import logging +import pytest + +from ged4py.parser import GedcomReader, CodecError + + +def _check_log_rec(rec, level, msg, args): + assert rec.levelno == level + assert msg in rec.msg + assert rec.args == args + + +def test_001_standard(): + """Test standard encodings.""" + + file = io.BytesIO(b"0 HEAD\n1 CHAR ASCII\n0 TRLR") + reader = GedcomReader(file) + assert reader._encoding == "ascii" + + file = io.BytesIO(b"0 HEAD\n1 CHAR ANSEL\n0 TRLR") + reader = GedcomReader(file) + assert reader._encoding == "gedcom" + + file = io.BytesIO(b"0 HEAD\n1 CHAR UTF-8\n0 TRLR") + reader = GedcomReader(file) + assert reader._encoding == "utf-8" + + file = io.BytesIO(b"\xef\xbb\xbf0 HEAD\n1 CHAR UTF-8\n0 TRLR") + reader = GedcomReader(file) + assert reader._encoding == "utf-8" + + # UTF-16 is broken, do not use + + [email protected]('enc,pyenc,ambig', + [("IBMPC", "cp437", True), + ("IBM", "cp437", True), + ("IBM-PC", "cp437", True), + ("OEM", "cp437", True), + ("MSDOS", "cp850", True), + ("IBM DOS", "cp850", True), + ("MS-DOS", "cp850", True), + ("ANSI", "cp1252", True), + ("WINDOWS", "cp1252", True), + ("IBM WINDOWS", "cp1252", True), + ("IBM_WINDOWS", "cp1252", True), + ("WINDOWS-1250", "cp1250", False), + ("WINDOWS-1251", "cp1251", False), + ("CP1252", "cp1252", False), + ("ISO-8859-1", "iso8859-1", False), + ("ISO8859-1", "iso8859-1", False), + ("ISO8859", "iso8859-1", True), + ("LATIN1", "iso8859-1", True), + ("MACINTOSH", "mac-roman", True), + ]) +def test_002_illegal(enc, pyenc, ambig, caplog): + """Test for illegal encodings. + """ + caplog.set_level(logging.WARNING) + + # %s formatting works in py27 and py3 + char = ("1 CHAR " + enc).encode() + file = io.BytesIO(b"0 HEAD\n" + char + b"\n0 TRLR") + reader = GedcomReader(file) + assert reader._encoding == pyenc + + # check logging + assert len(caplog.records) == (2 if ambig else 1) + _check_log_rec(caplog.records[0], logging.ERROR, + "is not a legal character set or encoding", + (2, char, enc)) + if ambig: + _check_log_rec(caplog.records[1], logging.WARNING, + "is ambiguous, it will be interpreted as", + (enc, pyenc)) + + +def test_003_codec_exceptions(): + """Test codecs-related exceptions.""" + + # unknown codec name + file = io.BytesIO(b"0 HEAD\n1 CHAR NOTCODEC\n0 TRLR") + with pytest.raises(CodecError): + GedcomReader(file) + + # BOM disagrees with CHAR + file = io.BytesIO(b"\xef\xbb\xbf0 HEAD\n1 CHAR ANSEL\n0 TRLR") + with pytest.raises(CodecError): + GedcomReader(file)
