commit python-html2text for openSUSE:Factory

root Thu, 09 Apr 2020 14:18:46 -0700

Hello community,

here is the log from the commit of package python-html2text for 
openSUSE:Factory checked in at 2020-04-09 23:18:09
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-html2text (Old)
 and      /work/SRC/openSUSE:Factory/.python-html2text.new.3248 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-html2text"

Thu Apr  9 23:18:09 2020 rev:22 rq:792732 version:2020.1.16

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-html2text/python-html2text.changes        
2019-12-16 15:22:03.595096903 +0100
+++ 
/work/SRC/openSUSE:Factory/.python-html2text.new.3248/python-html2text.changes  
    2020-04-09 23:18:40.606356254 +0200
@@ -1,0 +2,9 @@
+Thu Apr  9 11:17:36 UTC 2020 - Marketa Calabkova <[email protected]>
+
+- Update to 2020.1.16
+  * Add type annotations.
+  * Add support for Python 3.8.
+  * Performance improvements when ``wrap_links`` is ``False`` (the default).
+  * Configure setuptools using setup.cfg.
+
+-------------------------------------------------------------------

Old:
----
  html2text-2019.9.26.tar.gz

New:
----
  html2text-2020.1.16.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-html2text.spec ++++++
--- /var/tmp/diff_new_pack.nA0zPg/_old  2020-04-09 23:18:41.410356719 +0200
+++ /var/tmp/diff_new_pack.nA0zPg/_new  2020-04-09 23:18:41.410356719 +0200
@@ -1,7 +1,7 @@
 #
 # spec file for package python-html2text
 #
-# Copyright (c) 2019 SUSE LLC
+# Copyright (c) 2020 SUSE LLC
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -20,7 +20,7 @@
 %define skip_python2 1
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
 Name:           python-%{upname}
-Version:        2019.9.26
+Version:        2020.1.16
 Release:        0
 Summary:        Python script for turning HTML into Markdown text
 License:        GPL-3.0-only

++++++ html2text-2019.9.26.tar.gz -> html2text-2020.1.16.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2019.9.26/ChangeLog.rst 
new/html2text-2020.1.16/ChangeLog.rst
--- old/html2text-2019.9.26/ChangeLog.rst       2019-09-26 12:36:15.000000000 
+0200
+++ new/html2text-2020.1.16/ChangeLog.rst       2020-01-16 15:20:17.000000000 
+0100
@@ -1,3 +1,13 @@
+2020.1.16
+=========
+----
+
+* Add type annotations.
+* Add support for Python 3.8.
+* Performance improvements when ``wrap_links`` is ``False`` (the default).
+* Configure setuptools using setup.cfg.
+
+
 2019.9.26
 =========
 ----
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2019.9.26/PKG-INFO 
new/html2text-2020.1.16/PKG-INFO
--- old/html2text-2019.9.26/PKG-INFO    2019-09-26 12:37:26.000000000 +0200
+++ new/html2text-2020.1.16/PKG-INFO    2020-01-16 15:21:10.000000000 +0100
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: html2text
-Version: 2019.9.26
+Version: 2020.1.16
 Summary: Turn HTML into equivalent Markdown-structured text.
 Home-page: https://github.com/Alir3z4/html2text/
 Author: Aaron Swartz
@@ -105,6 +105,7 @@
 Classifier: Programming Language :: Python :: 3.5
 Classifier: Programming Language :: Python :: 3.6
 Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Programming Language :: Python :: Implementation :: PyPy
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2019.9.26/html2text/__init__.py 
new/html2text-2020.1.16/html2text/__init__.py
--- old/html2text-2019.9.26/html2text/__init__.py       2019-09-26 
12:36:15.000000000 +0200
+++ new/html2text-2020.1.16/html2text/__init__.py       2020-01-16 
15:20:17.000000000 +0100
@@ -5,9 +5,12 @@
 import re
 import urllib.parse as urlparse
 from textwrap import wrap
+from typing import Dict, List, Optional, Tuple, Union
 
-from html2text import config
-from html2text.utils import (
+from . import config
+from .elements import AnchorElement, ListElement
+from .typing import OutCallback
+from .utils import (
     dumb_css_parser,
     element_style,
     escape_md,
@@ -23,7 +26,7 @@
     unifiable_n,
 )
 
-__version__ = (2019, 9, 26)
+__version__ = (2020, 1, 16)
 
 
 # TODO:
@@ -31,7 +34,12 @@
 
 
 class HTML2Text(html.parser.HTMLParser):
-    def __init__(self, out=None, baseurl="", bodywidth=config.BODY_WIDTH):
+    def __init__(
+        self,
+        out: Optional[OutCallback] = None,
+        baseurl: str = "",
+        bodywidth: int = config.BODY_WIDTH,
+    ) -> None:
         """
         Input parameters:
             out: possible custom replacement for self.outtextf (which
@@ -82,20 +90,20 @@
             self.out = out
 
         # empty list to store output characters before they are "joined"
-        self.outtextlist = []
+        self.outtextlist = []  # type: List[str]
 
         self.quiet = 0
         self.p_p = 0  # number of newline character to print before next output
         self.outcount = 0
         self.start = True
         self.space = False
-        self.a = []
-        self.astack = []
-        self.maybe_automatic_link = None
+        self.a = []  # type: List[AnchorElement]
+        self.astack = []  # type: List[Optional[Dict[str, Optional[str]]]]
+        self.maybe_automatic_link = None  # type: Optional[str]
         self.empty_link = False
         self.absolute_url_matcher = re.compile(r"^[a-zA-Z+]+://")
         self.acount = 0
-        self.list = []
+        self.list = []  # type: List[ListElement]
         self.blockquote = 0
         self.pre = False
         self.startpre = False
@@ -105,42 +113,47 @@
         self.lastWasNL = False
         self.lastWasList = False
         self.style = 0
-        self.style_def = {}
-        self.tag_stack = []
+        self.style_def = {}  # type: Dict[str, Dict[str, str]]
+        self.tag_stack = (
+            []
+        )  # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
         self.emphasis = 0
         self.drop_white_space = 0
         self.inheader = False
-        self.abbr_title = None  # current abbreviation definition
-        self.abbr_data = None  # last inner HTML (for abbr being defined)
-        self.abbr_list = {}  # stack of abbreviations to write later
+        # Current abbreviation definition
+        self.abbr_title = None  # type: Optional[str]
+        # Last inner HTML (for abbr being defined)
+        self.abbr_data = None  # type: Optional[str]
+        # Stack of abbreviations to write later
+        self.abbr_list = {}  # type: Dict[str, str]
         self.baseurl = baseurl
         self.stressed = False
         self.preceding_stressed = False
-        self.preceding_data = None
-        self.current_tag = None
+        self.preceding_data = ""
+        self.current_tag = ""
 
         config.UNIFIABLE["nbsp"] = "&nbsp_place_holder;"
 
-    def feed(self, data):
+    def feed(self, data: str) -> None:
         data = data.replace("</' + 'script>", "</ignore>")
         super().feed(data)
 
-    def handle(self, data):
+    def handle(self, data: str) -> str:
         self.feed(data)
         self.feed("")
-        markdown = self.optwrap(self.close())
+        markdown = self.optwrap(self.finish())
         if self.pad_tables:
             return pad_tables_in_text(markdown)
         else:
             return markdown
 
-    def outtextf(self, s):
+    def outtextf(self, s: str) -> None:
         self.outtextlist.append(s)
         if s:
             self.lastWasNL = s[-1] == "\n"
 
-    def close(self):
-        super().close()
+    def finish(self) -> str:
+        self.close()
 
         self.pbr()
         self.o("", force="end")
@@ -159,10 +172,10 @@
 
         return outtext
 
-    def handle_charref(self, c):
+    def handle_charref(self, c: str) -> None:
         self.handle_data(self.charref(c), True)
 
-    def handle_entityref(self, c):
+    def handle_entityref(self, c: str) -> None:
         ref = self.entityref(c)
 
         # ref may be an empty string (e.g. for &lrm;/&rlm; markers that should
@@ -174,13 +187,13 @@
         if ref:
             self.handle_data(ref, True)
 
-    def handle_starttag(self, tag, attrs):
-        self.handle_tag(tag, attrs, start=True)
+    def handle_starttag(self, tag: str, attrs: List[Tuple[str, 
Optional[str]]]) -> None:
+        self.handle_tag(tag, dict(attrs), start=True)
 
-    def handle_endtag(self, tag):
-        self.handle_tag(tag, None, start=False)
+    def handle_endtag(self, tag: str) -> None:
+        self.handle_tag(tag, {}, start=False)
 
-    def previousIndex(self, attrs):
+    def previousIndex(self, attrs: Dict[str, Optional[str]]) -> Optional[int]:
         """
         :type attrs: dict
 
@@ -193,12 +206,12 @@
 
         match = False
         for i, a in enumerate(self.a):
-            if "href" in a and a["href"] == attrs["href"]:
-                if "title" in a or "title" in attrs:
+            if "href" in a.attrs and a.attrs["href"] == attrs["href"]:
+                if "title" in a.attrs or "title" in attrs:
                     if (
-                        "title" in a
+                        "title" in a.attrs
                         and "title" in attrs
-                        and a["title"] == attrs["title"]
+                        and a.attrs["title"] == attrs["title"]
                     ):
                         match = True
                 else:
@@ -208,7 +221,9 @@
                 return i
         return None
 
-    def handle_emphasis(self, start, tag_style, parent_style):
+    def handle_emphasis(
+        self, start: bool, tag_style: Dict[str, str], parent_style: Dict[str, 
str]
+    ) -> None:
         """
         Handles various text emphases
         """
@@ -279,13 +294,10 @@
             if strikethrough:
                 self.quiet -= 1
 
-    def handle_tag(self, tag, attrs, start):
+    def handle_tag(
+        self, tag: str, attrs: Dict[str, Optional[str]], start: bool
+    ) -> None:
         self.current_tag = tag
-        # attrs is None for endtags
-        if attrs is None:
-            attrs = {}
-        else:
-            attrs = dict(attrs)
 
         if self.tag_callback is not None:
             if self.tag_callback(self, tag, attrs, start) is True:
@@ -308,7 +320,7 @@
             # need the attributes of the parent nodes in order to get a
             # complete style description for the current element. we assume
             # that google docs export well formed html.
-            parent_style = {}
+            parent_style = {}  # type: Dict[str, str]
             if start:
                 if self.tag_stack:
                     parent_style = self.tag_stack[-1][2]
@@ -377,8 +389,10 @@
                 self.blockquote -= 1
                 self.p()
 
-        def no_preceding_space(self):
-            return self.preceding_data and re.match(r"[^\s]", 
self.preceding_data[-1])
+        def no_preceding_space(self: HTML2Text) -> bool:
+            return bool(
+                self.preceding_data and re.match(r"[^\s]", 
self.preceding_data[-1])
+            )
 
         if tag in ["em", "i", "u"] and not self.ignore_emphasis:
             if start and no_preceding_space(self):
@@ -427,6 +441,7 @@
                     self.abbr_title = attrs["title"]
             else:
                 if self.abbr_title is not None:
+                    assert self.abbr_data is not None
                     self.abbr_list[self.abbr_data] = self.abbr_title
                     self.abbr_title = None
                 self.abbr_data = None
@@ -438,7 +453,7 @@
                 self.o(self.close_quote)
             self.quote = not self.quote
 
-        def link_url(self, link, title=""):
+        def link_url(self: HTML2Text, link: str, title: str = "") -> None:
             url = urlparse.urljoin(self.baseurl, link)
             title = ' "{}"'.format(title) if title.strip() else ""
             self.o("]({url}{title})".format(url=escape_md(url), title=title))
@@ -463,31 +478,28 @@
                     if self.maybe_automatic_link and not self.empty_link:
                         self.maybe_automatic_link = None
                     elif a:
+                        assert a["href"] is not None
                         if self.empty_link:
                             self.o("[")
                             self.empty_link = False
                             self.maybe_automatic_link = None
                         if self.inline_links:
-                            try:
-                                title = a["title"] if a["title"] else ""
-                                title = escape_md(title)
-                            except KeyError:
-                                link_url(self, a["href"], "")
-                            else:
-                                link_url(self, a["href"], title)
+                            title = a.get("title") or ""
+                            title = escape_md(title)
+                            link_url(self, a["href"], title)
                         else:
                             i = self.previousIndex(a)
                             if i is not None:
-                                a = self.a[i]
+                                a_props = self.a[i]
                             else:
                                 self.acount += 1
-                                a["count"] = self.acount
-                                a["outcount"] = self.outcount
-                                self.a.append(a)
-                            self.o("][" + str(a["count"]) + "]")
+                                a_props = AnchorElement(a, self.acount, 
self.outcount)
+                                self.a.append(a_props)
+                            self.o("][" + str(a_props.count) + "]")
 
         if tag == "img" and start and not self.ignore_images:
             if "src" in attrs:
+                assert attrs["src"] is not None
                 if not self.images_to_alt:
                     attrs["href"] = attrs["src"]
                 alt = attrs.get("alt") or self.default_image_alt
@@ -499,8 +511,10 @@
                 ):
                     self.o("<img src='" + attrs["src"] + "' ")
                     if "width" in attrs:
+                        assert attrs["width"] is not None
                         self.o("width='" + attrs["width"] + "' ")
                     if "height" in attrs:
+                        assert attrs["height"] is not None
                         self.o("height='" + attrs["height"] + "' ")
                     if alt:
                         self.o("alt='" + alt + "' ")
@@ -537,13 +551,12 @@
                     else:
                         i = self.previousIndex(attrs)
                         if i is not None:
-                            attrs = self.a[i]
+                            a_props = self.a[i]
                         else:
                             self.acount += 1
-                            attrs["count"] = self.acount
-                            attrs["outcount"] = self.outcount
-                            self.a.append(attrs)
-                        self.o("[" + str(attrs["count"]) + "]")
+                            a_props = AnchorElement(attrs, self.acount, 
self.outcount)
+                            self.a.append(a_props)
+                        self.o("[" + str(a_props.count) + "]")
 
         if tag == "dl" and start:
             self.p()
@@ -564,7 +577,7 @@
                 else:
                     list_style = tag
                 numbering_start = list_numbering_start(attrs)
-                self.list.append({"name": list_style, "num": numbering_start})
+                self.list.append(ListElement(list_style, numbering_start))
             else:
                 if self.list:
                     self.list.pop()
@@ -580,18 +593,18 @@
                 if self.list:
                     li = self.list[-1]
                 else:
-                    li = {"name": "ul", "num": 0}
+                    li = ListElement("ul", 0)
                 if self.google_doc:
                     nest_count = self.google_nest_count(tag_style)
                 else:
                     nest_count = len(self.list)
                 # TODO: line up <ol><li>s > 9 correctly.
                 self.o("  " * nest_count)
-                if li["name"] == "ul":
+                if li.name == "ul":
                     self.o(self.ul_item_mark + " ")
-                elif li["name"] == "ol":
-                    li["num"] += 1
-                    self.o(str(li["num"]) + ". ")
+                elif li.name == "ol":
+                    li.num += 1
+                    self.o(str(li.num) + ". ")
                 self.start = True
 
         if tag in ["table", "tr", "td", "th"]:
@@ -658,21 +671,23 @@
             self.p()
 
     # TODO: Add docstring for these one letter functions
-    def pbr(self):
+    def pbr(self) -> None:
         "Pretty print has a line break"
         if self.p_p == 0:
             self.p_p = 1
 
-    def p(self):
+    def p(self) -> None:
         "Set pretty print to 1 or 2 lines"
         self.p_p = 1 if self.single_line_break else 2
 
-    def soft_br(self):
+    def soft_br(self) -> None:
         "Soft breaks"
         self.pbr()
         self.br_toggle = "  "
 
-    def o(self, data, puredata=False, force=False):
+    def o(
+        self, data: str, puredata: bool = False, force: Union[bool, str] = 
False
+    ) -> None:
         """
         Deal with indentation and whitespace
         """
@@ -717,8 +732,7 @@
                 if not self.list:
                     bq += "    "
                 # else: list content is already partially indented
-                for i in range(len(self.list)):
-                    bq += "    "
+                bq += "    " * len(self.list)
                 data = data.replace("\n", "\n" + bq)
 
             if self.startpre:
@@ -756,15 +770,16 @@
 
                 newa = []
                 for link in self.a:
-                    if self.outcount > link["outcount"]:
+                    if self.outcount > link.outcount:
                         self.out(
                             "   ["
-                            + str(link["count"])
+                            + str(link.count)
                             + "]: "
-                            + urlparse.urljoin(self.baseurl, link["href"])
+                            + urlparse.urljoin(self.baseurl, 
link.attrs["href"])
                         )
-                        if "title" in link:
-                            self.out(" (" + link["title"] + ")")
+                        if "title" in link.attrs:
+                            assert link.attrs["title"] is not None
+                            self.out(" (" + link.attrs["title"] + ")")
                         self.out("\n")
                     else:
                         newa.append(link)
@@ -783,7 +798,7 @@
             self.out(data)
             self.outcount += 1
 
-    def handle_data(self, data, entity_char=False):
+    def handle_data(self, data: str, entity_char: bool = False) -> None:
         if not data:
             # Data may be empty for some HTML entities. For example,
             # LEFT-TO-RIGHT MARK.
@@ -826,7 +841,7 @@
         self.preceding_data = data
         self.o(data, puredata=True)
 
-    def charref(self, name):
+    def charref(self, name: str) -> str:
         if name[0] in ["x", "X"]:
             c = int(name[1:], 16)
         else:
@@ -840,7 +855,7 @@
             except ValueError:  # invalid unicode
                 return ""
 
-    def entityref(self, c):
+    def entityref(self, c: str) -> str:
         if not self.unicode_snob and c in config.UNIFIABLE:
             return config.UNIFIABLE[c]
         try:
@@ -849,7 +864,7 @@
             return "&" + c + ";"
         return config.UNIFIABLE[c] if c == "nbsp" else ch
 
-    def google_nest_count(self, style):
+    def google_nest_count(self, style: Dict[str, str]) -> int:
         """
         Calculate the nesting count of google doc lists
 
@@ -863,7 +878,7 @@
 
         return nest_count
 
-    def optwrap(self, text):
+    def optwrap(self, text: str) -> str:
         """
         Wrap all paragraphs in the provided text.
 
@@ -924,7 +939,7 @@
         return result
 
 
-def html2text(html, baseurl="", bodywidth=None):
+def html2text(html: str, baseurl: str = "", bodywidth: Optional[int] = None) 
-> str:
     if bodywidth is None:
         bodywidth = config.BODY_WIDTH
     h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2019.9.26/html2text/__main__.py 
new/html2text-2020.1.16/html2text/__main__.py
--- old/html2text-2019.9.26/html2text/__main__.py       2019-02-26 
15:42:00.000000000 +0100
+++ new/html2text-2020.1.16/html2text/__main__.py       2019-10-12 
17:55:30.000000000 +0200
@@ -1,3 +1,3 @@
-from html2text.cli import main
+from .cli import main
 
 main()
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2019.9.26/html2text/cli.py 
new/html2text-2020.1.16/html2text/cli.py
--- old/html2text-2019.9.26/html2text/cli.py    2019-08-15 12:56:54.000000000 
+0200
+++ new/html2text-2020.1.16/html2text/cli.py    2019-10-12 18:20:41.000000000 
+0200
@@ -1,10 +1,10 @@
 import argparse
 import sys
 
-from html2text import HTML2Text, __version__, config
+from . import HTML2Text, __version__, config
 
 
-def main():
+def main() -> None:
     baseurl = ""
 
     class bcolors:
@@ -259,7 +259,7 @@
         data = sys.stdin.buffer.read()
 
     try:
-        data = data.decode(args.encoding, args.decode_errors)
+        html = data.decode(args.encoding, args.decode_errors)
     except UnicodeDecodeError as err:
         warning = bcolors.WARNING + "Warning:" + bcolors.ENDC
         warning += " Use the " + bcolors.OKGREEN
@@ -303,4 +303,4 @@
     h.open_quote = args.open_quote
     h.close_quote = args.close_quote
 
-    sys.stdout.write(h.handle(data))
+    sys.stdout.write(h.handle(html))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2019.9.26/html2text/elements.py 
new/html2text-2020.1.16/html2text/elements.py
--- old/html2text-2019.9.26/html2text/elements.py       1970-01-01 
01:00:00.000000000 +0100
+++ new/html2text-2020.1.16/html2text/elements.py       2019-10-12 
18:20:41.000000000 +0200
@@ -0,0 +1,18 @@
+from typing import Dict, Optional
+
+
+class AnchorElement:
+    __slots__ = ["attrs", "count", "outcount"]
+
+    def __init__(self, attrs: Dict[str, Optional[str]], count: int, outcount: 
int):
+        self.attrs = attrs
+        self.count = count
+        self.outcount = outcount
+
+
+class ListElement:
+    __slots__ = ["name", "num"]
+
+    def __init__(self, name: str, num: int):
+        self.name = name
+        self.num = num
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2019.9.26/html2text/typing.py 
new/html2text-2020.1.16/html2text/typing.py
--- old/html2text-2019.9.26/html2text/typing.py 1970-01-01 01:00:00.000000000 
+0100
+++ new/html2text-2020.1.16/html2text/typing.py 2019-10-12 18:20:41.000000000 
+0200
@@ -0,0 +1,3 @@
+class OutCallback:
+    def __call__(self, s: str) -> None:
+        ...
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2019.9.26/html2text/utils.py 
new/html2text-2020.1.16/html2text/utils.py
--- old/html2text-2019.9.26/html2text/utils.py  2019-08-15 12:56:54.000000000 
+0200
+++ new/html2text-2020.1.16/html2text/utils.py  2020-01-16 15:08:28.000000000 
+0100
@@ -1,6 +1,7 @@
 import html.entities
+from typing import Dict, List, Optional
 
-from html2text import config
+from . import config
 
 unifiable_n = {
     html.entities.name2codepoint[k]: v
@@ -9,7 +10,7 @@
 }
 
 
-def hn(tag):
+def hn(tag: str) -> int:
     if tag[0] == "h" and len(tag) == 2:
         n = tag[1]
         if "0" < n <= "9":
@@ -17,7 +18,7 @@
     return 0
 
 
-def dumb_property_dict(style):
+def dumb_property_dict(style: str) -> Dict[str, str]:
     """
     :returns: A hash of css attributes
     """
@@ -27,7 +28,7 @@
     }
 
 
-def dumb_css_parser(data):
+def dumb_css_parser(data: str) -> Dict[str, Dict[str, str]]:
     """
     :type data: str
 
@@ -44,16 +45,20 @@
 
     # parse the css. reverted from dictionary comprehension in order to
     # support older pythons
-    elements = [x.split("{") for x in data.split("}") if "{" in x.strip()]
+    pairs = [x.split("{") for x in data.split("}") if "{" in x.strip()]
     try:
-        elements = {a.strip(): dumb_property_dict(b) for a, b in elements}
+        elements = {a.strip(): dumb_property_dict(b) for a, b in pairs}
     except ValueError:
         elements = {}  # not that important
 
     return elements
 
 
-def element_style(attrs, style_def, parent_style):
+def element_style(
+    attrs: Dict[str, Optional[str]],
+    style_def: Dict[str, Dict[str, str]],
+    parent_style: Dict[str, str],
+) -> Dict[str, str]:
     """
     :type attrs: dict
     :type style_def: dict
@@ -64,17 +69,19 @@
     """
     style = parent_style.copy()
     if "class" in attrs:
+        assert attrs["class"] is not None
         for css_class in attrs["class"].split():
             css_style = style_def.get("." + css_class, {})
             style.update(css_style)
     if "style" in attrs:
+        assert attrs["style"] is not None
         immediate_style = dumb_property_dict(attrs["style"])
         style.update(immediate_style)
 
     return style
 
 
-def google_list_style(style):
+def google_list_style(style: Dict[str, str]) -> str:
     """
     Finds out whether this is an ordered or unordered list
 
@@ -90,7 +97,7 @@
     return "ol"
 
 
-def google_has_height(style):
+def google_has_height(style: Dict[str, str]) -> bool:
     """
     Check if the style of the element has the 'height' attribute
     explicitly defined
@@ -102,7 +109,7 @@
     return "height" in style
 
 
-def google_text_emphasis(style):
+def google_text_emphasis(style: Dict[str, str]) -> List[str]:
     """
     :type style: dict
 
@@ -120,7 +127,7 @@
     return emphasis
 
 
-def google_fixed_width_font(style):
+def google_fixed_width_font(style: Dict[str, str]) -> bool:
     """
     Check if the css of the current element defines a fixed width font
 
@@ -134,7 +141,7 @@
     return "courier new" == font_family or "consolas" == font_family
 
 
-def list_numbering_start(attrs):
+def list_numbering_start(attrs: Dict[str, Optional[str]]) -> int:
     """
     Extract numbering from list element attributes
 
@@ -143,6 +150,7 @@
     :rtype: int or None
     """
     if "start" in attrs:
+        assert attrs["start"] is not None
         try:
             return int(attrs["start"]) - 1
         except ValueError:
@@ -151,10 +159,10 @@
     return 0
 
 
-def skipwrap(para, wrap_links, wrap_list_items):
+def skipwrap(para: str, wrap_links: bool, wrap_list_items: bool) -> bool:
     # If it appears to contain a link
     # don't wrap
-    if (len(config.RE_LINK.findall(para)) > 0) and not wrap_links:
+    if not wrap_links and config.RE_LINK.search(para):
         return True
     # If the text begins with four spaces or one tab, it's a code block;
     # don't wrap
@@ -182,7 +190,7 @@
     )
 
 
-def escape_md(text):
+def escape_md(text: str) -> str:
     """
     Escapes markdown-sensitive characters within other markdown
     constructs.
@@ -190,7 +198,7 @@
     return config.RE_MD_CHARS_MATCHER.sub(r"\\\1", text)
 
 
-def escape_md_section(text, snob=False):
+def escape_md_section(text: str, snob: bool = False) -> str:
     """
     Escapes markdown-sensitive characters across whole document sections.
     """
@@ -206,7 +214,7 @@
     return text
 
 
-def reformat_table(lines, right_margin):
+def reformat_table(lines: List[str], right_margin: int) -> List[str]:
     """
     Given the lines of a table
     padds the cells and returns the new lines
@@ -249,12 +257,13 @@
     return new_lines
 
 
-def pad_tables_in_text(text, right_margin=1):
+def pad_tables_in_text(text: str, right_margin: int = 1) -> str:
     """
     Provide padding for tables in the text
     """
     lines = text.split("\n")
-    table_buffer, table_started = [], False
+    table_buffer = []  # type: List[str]
+    table_started = False
     new_lines = []
     for line in lines:
         # Toggle table started
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2019.9.26/html2text.egg-info/PKG-INFO 
new/html2text-2020.1.16/html2text.egg-info/PKG-INFO
--- old/html2text-2019.9.26/html2text.egg-info/PKG-INFO 2019-09-26 
12:37:26.000000000 +0200
+++ new/html2text-2020.1.16/html2text.egg-info/PKG-INFO 2020-01-16 
15:21:10.000000000 +0100
@@ -1,6 +1,6 @@
 Metadata-Version: 2.1
 Name: html2text
-Version: 2019.9.26
+Version: 2020.1.16
 Summary: Turn HTML into equivalent Markdown-structured text.
 Home-page: https://github.com/Alir3z4/html2text/
 Author: Aaron Swartz
@@ -105,6 +105,7 @@
 Classifier: Programming Language :: Python :: 3.5
 Classifier: Programming Language :: Python :: 3.6
 Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
 Classifier: Programming Language :: Python :: 3 :: Only
 Classifier: Programming Language :: Python :: Implementation :: CPython
 Classifier: Programming Language :: Python :: Implementation :: PyPy
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2019.9.26/html2text.egg-info/SOURCES.txt 
new/html2text-2020.1.16/html2text.egg-info/SOURCES.txt
--- old/html2text-2019.9.26/html2text.egg-info/SOURCES.txt      2019-09-26 
12:37:26.000000000 +0200
+++ new/html2text-2020.1.16/html2text.egg-info/SOURCES.txt      2020-01-16 
15:21:10.000000000 +0100
@@ -10,11 +10,15 @@
 html2text/__main__.py
 html2text/cli.py
 html2text/config.py
+html2text/elements.py
+html2text/py.typed
+html2text/typing.py
 html2text/utils.py
 html2text.egg-info/PKG-INFO
 html2text.egg-info/SOURCES.txt
 html2text.egg-info/dependency_links.txt
 html2text.egg-info/entry_points.txt
+html2text.egg-info/not-zip-safe
 html2text.egg-info/top_level.txt
 test/GoogleDocMassDownload.html
 test/GoogleDocMassDownload.md
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2019.9.26/html2text.egg-info/not-zip-safe 
new/html2text-2020.1.16/html2text.egg-info/not-zip-safe
--- old/html2text-2019.9.26/html2text.egg-info/not-zip-safe     1970-01-01 
01:00:00.000000000 +0100
+++ new/html2text-2020.1.16/html2text.egg-info/not-zip-safe     2020-01-16 
15:21:10.000000000 +0100
@@ -0,0 +1 @@
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2019.9.26/setup.cfg 
new/html2text-2020.1.16/setup.cfg
--- old/html2text-2019.9.26/setup.cfg   2019-09-26 12:37:26.000000000 +0200
+++ new/html2text-2020.1.16/setup.cfg   2020-01-16 15:21:10.000000000 +0100
@@ -1,3 +1,43 @@
+[metadata]
+name = html2text
+version = attr: html2text.__version__
+description = Turn HTML into equivalent Markdown-structured text.
+long_description = file: README.md
+long_description_content_type = text/markdown
+url = https://github.com/Alir3z4/html2text/
+author = Aaron Swartz
+author_email = [email protected]
+maintainer = Alireza Savand
+maintainer_email = [email protected]
+license = GNU GPL 3
+classifiers = 
+       Development Status :: 5 - Production/Stable
+       Intended Audience :: Developers
+       License :: OSI Approved :: GNU General Public License (GPL)
+       Operating System :: OS Independent
+       Programming Language :: Python
+       Programming Language :: Python :: 3
+       Programming Language :: Python :: 3.5
+       Programming Language :: Python :: 3.6
+       Programming Language :: Python :: 3.7
+       Programming Language :: Python :: 3.8
+       Programming Language :: Python :: 3 :: Only
+       Programming Language :: Python :: Implementation :: CPython
+       Programming Language :: Python :: Implementation :: PyPy
+platform = OS Independent
+
+[options]
+zip_safe = False
+packages = html2text
+python_requires = >=3.5
+
+[options.entry_points]
+console_scripts = 
+       html2text = html2text.cli:main
+
+[options.package_data]
+html2text = py.typed
+
 [flake8]
 max_line_length = 88
 ignore = 
@@ -10,6 +50,9 @@
 line_length = 88
 multi_line_output = 3
 
+[mypy]
+python_version = 3.5
+
 [egg_info]
 tag_build = 
 tag_date = 0
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2019.9.26/setup.py 
new/html2text-2020.1.16/setup.py
--- old/html2text-2019.9.26/setup.py    2019-08-15 12:56:54.000000000 +0200
+++ new/html2text-2020.1.16/setup.py    2019-10-31 19:37:31.000000000 +0100
@@ -1,39 +1,3 @@
 from setuptools import setup
 
-
-def readall(f):
-    with open(f) as fp:
-        return fp.read()
-
-
-setup(
-    name="html2text",
-    version=".".join(map(str, __import__("html2text").__version__)),
-    description="Turn HTML into equivalent Markdown-structured text.",
-    long_description=readall("README.md"),
-    long_description_content_type="text/markdown",
-    author="Aaron Swartz",
-    author_email="[email protected]",
-    maintainer="Alireza Savand",
-    maintainer_email="[email protected]",
-    url="https://github.com/Alir3z4/html2text/";,
-    platforms="OS Independent",
-    classifiers=[
-        "Development Status :: 5 - Production/Stable",
-        "Intended Audience :: Developers",
-        "License :: OSI Approved :: GNU General Public License (GPL)",
-        "Operating System :: OS Independent",
-        "Programming Language :: Python",
-        "Programming Language :: Python :: 3",
-        "Programming Language :: Python :: 3.5",
-        "Programming Language :: Python :: 3.6",
-        "Programming Language :: Python :: 3.7",
-        "Programming Language :: Python :: 3 :: Only",
-        "Programming Language :: Python :: Implementation :: CPython",
-        "Programming Language :: Python :: Implementation :: PyPy",
-    ],
-    python_requires=">=3.5",
-    entry_points={"console_scripts": ["html2text = html2text.cli:main"]},
-    license="GNU GPL 3",
-    packages=["html2text"],
-)
+setup()
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2019.9.26/test/test_html2text.py 
new/html2text-2020.1.16/test/test_html2text.py
--- old/html2text-2019.9.26/test/test_html2text.py      2019-08-15 
12:56:54.000000000 +0200
+++ new/html2text-2020.1.16/test/test_html2text.py      2020-01-16 
15:08:28.000000000 +0100
@@ -40,8 +40,7 @@
 
         if base_fn.find("unicode") >= 0:
             module_args["unicode_snob"] = True
-            # There is no command-line option to control unicode_snob.
-            cmdline_args = skip
+            cmdline_args.append("--unicode-snob")
             func_args = skip
 
         if base_fn.find("flip_emphasis") >= 0:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2019.9.26/tox.ini 
new/html2text-2020.1.16/tox.ini
--- old/html2text-2019.9.26/tox.ini     2019-09-25 09:41:57.000000000 +0200
+++ new/html2text-2020.1.16/tox.ini     2019-10-31 19:37:31.000000000 +0100
@@ -3,7 +3,8 @@
     black
     flake8
     isort
-    py{35,36,37,py3}
+    mypy
+    py{35,36,37,38,py3}
 minversion = 1.9
 
 [testenv]
@@ -36,3 +37,8 @@
 deps =
     isort
 skip_install = true
+
+[testenv:mypy]
+commands = mypy --strict html2text
+deps = mypy
+skip_install = true

commit python-html2text for openSUSE:Factory

Reply via email to