Hello community,
here is the log from the commit of package python-html2text for
openSUSE:Factory checked in at 2020-04-09 23:18:09
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-html2text (Old)
and /work/SRC/openSUSE:Factory/.python-html2text.new.3248 (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python-html2text"
Thu Apr 9 23:18:09 2020 rev:22 rq:792732 version:2020.1.16
Changes:
--------
--- /work/SRC/openSUSE:Factory/python-html2text/python-html2text.changes
2019-12-16 15:22:03.595096903 +0100
+++
/work/SRC/openSUSE:Factory/.python-html2text.new.3248/python-html2text.changes
2020-04-09 23:18:40.606356254 +0200
@@ -1,0 +2,9 @@
+Thu Apr 9 11:17:36 UTC 2020 - Marketa Calabkova <[email protected]>
+
+- Update to 2020.1.16
+ * Add type annotations.
+ * Add support for Python 3.8.
+ * Performance improvements when ``wrap_links`` is ``False`` (the default).
+ * Configure setuptools using setup.cfg.
+
+-------------------------------------------------------------------
Old:
----
html2text-2019.9.26.tar.gz
New:
----
html2text-2020.1.16.tar.gz
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Other differences:
------------------
++++++ python-html2text.spec ++++++
--- /var/tmp/diff_new_pack.nA0zPg/_old 2020-04-09 23:18:41.410356719 +0200
+++ /var/tmp/diff_new_pack.nA0zPg/_new 2020-04-09 23:18:41.410356719 +0200
@@ -1,7 +1,7 @@
#
# spec file for package python-html2text
#
-# Copyright (c) 2019 SUSE LLC
+# Copyright (c) 2020 SUSE LLC
#
# All modifications and additions to the file contributed by third parties
# remain the property of their copyright owners, unless otherwise agreed
@@ -20,7 +20,7 @@
%define skip_python2 1
%{?!python_module:%define python_module() python-%{**} python3-%{**}}
Name: python-%{upname}
-Version: 2019.9.26
+Version: 2020.1.16
Release: 0
Summary: Python script for turning HTML into Markdown text
License: GPL-3.0-only
++++++ html2text-2019.9.26.tar.gz -> html2text-2020.1.16.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html2text-2019.9.26/ChangeLog.rst
new/html2text-2020.1.16/ChangeLog.rst
--- old/html2text-2019.9.26/ChangeLog.rst 2019-09-26 12:36:15.000000000
+0200
+++ new/html2text-2020.1.16/ChangeLog.rst 2020-01-16 15:20:17.000000000
+0100
@@ -1,3 +1,13 @@
+2020.1.16
+=========
+----
+
+* Add type annotations.
+* Add support for Python 3.8.
+* Performance improvements when ``wrap_links`` is ``False`` (the default).
+* Configure setuptools using setup.cfg.
+
+
2019.9.26
=========
----
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html2text-2019.9.26/PKG-INFO
new/html2text-2020.1.16/PKG-INFO
--- old/html2text-2019.9.26/PKG-INFO 2019-09-26 12:37:26.000000000 +0200
+++ new/html2text-2020.1.16/PKG-INFO 2020-01-16 15:21:10.000000000 +0100
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: html2text
-Version: 2019.9.26
+Version: 2020.1.16
Summary: Turn HTML into equivalent Markdown-structured text.
Home-page: https://github.com/Alir3z4/html2text/
Author: Aaron Swartz
@@ -105,6 +105,7 @@
Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html2text-2019.9.26/html2text/__init__.py
new/html2text-2020.1.16/html2text/__init__.py
--- old/html2text-2019.9.26/html2text/__init__.py 2019-09-26
12:36:15.000000000 +0200
+++ new/html2text-2020.1.16/html2text/__init__.py 2020-01-16
15:20:17.000000000 +0100
@@ -5,9 +5,12 @@
import re
import urllib.parse as urlparse
from textwrap import wrap
+from typing import Dict, List, Optional, Tuple, Union
-from html2text import config
-from html2text.utils import (
+from . import config
+from .elements import AnchorElement, ListElement
+from .typing import OutCallback
+from .utils import (
dumb_css_parser,
element_style,
escape_md,
@@ -23,7 +26,7 @@
unifiable_n,
)
-__version__ = (2019, 9, 26)
+__version__ = (2020, 1, 16)
# TODO:
@@ -31,7 +34,12 @@
class HTML2Text(html.parser.HTMLParser):
- def __init__(self, out=None, baseurl="", bodywidth=config.BODY_WIDTH):
+ def __init__(
+ self,
+ out: Optional[OutCallback] = None,
+ baseurl: str = "",
+ bodywidth: int = config.BODY_WIDTH,
+ ) -> None:
"""
Input parameters:
out: possible custom replacement for self.outtextf (which
@@ -82,20 +90,20 @@
self.out = out
# empty list to store output characters before they are "joined"
- self.outtextlist = []
+ self.outtextlist = [] # type: List[str]
self.quiet = 0
self.p_p = 0 # number of newline character to print before next output
self.outcount = 0
self.start = True
self.space = False
- self.a = []
- self.astack = []
- self.maybe_automatic_link = None
+ self.a = [] # type: List[AnchorElement]
+ self.astack = [] # type: List[Optional[Dict[str, Optional[str]]]]
+ self.maybe_automatic_link = None # type: Optional[str]
self.empty_link = False
self.absolute_url_matcher = re.compile(r"^[a-zA-Z+]+://")
self.acount = 0
- self.list = []
+ self.list = [] # type: List[ListElement]
self.blockquote = 0
self.pre = False
self.startpre = False
@@ -105,42 +113,47 @@
self.lastWasNL = False
self.lastWasList = False
self.style = 0
- self.style_def = {}
- self.tag_stack = []
+ self.style_def = {} # type: Dict[str, Dict[str, str]]
+ self.tag_stack = (
+ []
+ ) # type: List[Tuple[str, Dict[str, Optional[str]], Dict[str, str]]]
self.emphasis = 0
self.drop_white_space = 0
self.inheader = False
- self.abbr_title = None # current abbreviation definition
- self.abbr_data = None # last inner HTML (for abbr being defined)
- self.abbr_list = {} # stack of abbreviations to write later
+ # Current abbreviation definition
+ self.abbr_title = None # type: Optional[str]
+ # Last inner HTML (for abbr being defined)
+ self.abbr_data = None # type: Optional[str]
+ # Stack of abbreviations to write later
+ self.abbr_list = {} # type: Dict[str, str]
self.baseurl = baseurl
self.stressed = False
self.preceding_stressed = False
- self.preceding_data = None
- self.current_tag = None
+ self.preceding_data = ""
+ self.current_tag = ""
config.UNIFIABLE["nbsp"] = " _place_holder;"
- def feed(self, data):
+ def feed(self, data: str) -> None:
data = data.replace("</' + 'script>", "</ignore>")
super().feed(data)
- def handle(self, data):
+ def handle(self, data: str) -> str:
self.feed(data)
self.feed("")
- markdown = self.optwrap(self.close())
+ markdown = self.optwrap(self.finish())
if self.pad_tables:
return pad_tables_in_text(markdown)
else:
return markdown
- def outtextf(self, s):
+ def outtextf(self, s: str) -> None:
self.outtextlist.append(s)
if s:
self.lastWasNL = s[-1] == "\n"
- def close(self):
- super().close()
+ def finish(self) -> str:
+ self.close()
self.pbr()
self.o("", force="end")
@@ -159,10 +172,10 @@
return outtext
- def handle_charref(self, c):
+ def handle_charref(self, c: str) -> None:
self.handle_data(self.charref(c), True)
- def handle_entityref(self, c):
+ def handle_entityref(self, c: str) -> None:
ref = self.entityref(c)
# ref may be an empty string (e.g. for ‎/‏ markers that should
@@ -174,13 +187,13 @@
if ref:
self.handle_data(ref, True)
- def handle_starttag(self, tag, attrs):
- self.handle_tag(tag, attrs, start=True)
+ def handle_starttag(self, tag: str, attrs: List[Tuple[str,
Optional[str]]]) -> None:
+ self.handle_tag(tag, dict(attrs), start=True)
- def handle_endtag(self, tag):
- self.handle_tag(tag, None, start=False)
+ def handle_endtag(self, tag: str) -> None:
+ self.handle_tag(tag, {}, start=False)
- def previousIndex(self, attrs):
+ def previousIndex(self, attrs: Dict[str, Optional[str]]) -> Optional[int]:
"""
:type attrs: dict
@@ -193,12 +206,12 @@
match = False
for i, a in enumerate(self.a):
- if "href" in a and a["href"] == attrs["href"]:
- if "title" in a or "title" in attrs:
+ if "href" in a.attrs and a.attrs["href"] == attrs["href"]:
+ if "title" in a.attrs or "title" in attrs:
if (
- "title" in a
+ "title" in a.attrs
and "title" in attrs
- and a["title"] == attrs["title"]
+ and a.attrs["title"] == attrs["title"]
):
match = True
else:
@@ -208,7 +221,9 @@
return i
return None
- def handle_emphasis(self, start, tag_style, parent_style):
+ def handle_emphasis(
+ self, start: bool, tag_style: Dict[str, str], parent_style: Dict[str,
str]
+ ) -> None:
"""
Handles various text emphases
"""
@@ -279,13 +294,10 @@
if strikethrough:
self.quiet -= 1
- def handle_tag(self, tag, attrs, start):
+ def handle_tag(
+ self, tag: str, attrs: Dict[str, Optional[str]], start: bool
+ ) -> None:
self.current_tag = tag
- # attrs is None for endtags
- if attrs is None:
- attrs = {}
- else:
- attrs = dict(attrs)
if self.tag_callback is not None:
if self.tag_callback(self, tag, attrs, start) is True:
@@ -308,7 +320,7 @@
# need the attributes of the parent nodes in order to get a
# complete style description for the current element. we assume
# that google docs export well formed html.
- parent_style = {}
+ parent_style = {} # type: Dict[str, str]
if start:
if self.tag_stack:
parent_style = self.tag_stack[-1][2]
@@ -377,8 +389,10 @@
self.blockquote -= 1
self.p()
- def no_preceding_space(self):
- return self.preceding_data and re.match(r"[^\s]",
self.preceding_data[-1])
+ def no_preceding_space(self: HTML2Text) -> bool:
+ return bool(
+ self.preceding_data and re.match(r"[^\s]",
self.preceding_data[-1])
+ )
if tag in ["em", "i", "u"] and not self.ignore_emphasis:
if start and no_preceding_space(self):
@@ -427,6 +441,7 @@
self.abbr_title = attrs["title"]
else:
if self.abbr_title is not None:
+ assert self.abbr_data is not None
self.abbr_list[self.abbr_data] = self.abbr_title
self.abbr_title = None
self.abbr_data = None
@@ -438,7 +453,7 @@
self.o(self.close_quote)
self.quote = not self.quote
- def link_url(self, link, title=""):
+ def link_url(self: HTML2Text, link: str, title: str = "") -> None:
url = urlparse.urljoin(self.baseurl, link)
title = ' "{}"'.format(title) if title.strip() else ""
self.o("]({url}{title})".format(url=escape_md(url), title=title))
@@ -463,31 +478,28 @@
if self.maybe_automatic_link and not self.empty_link:
self.maybe_automatic_link = None
elif a:
+ assert a["href"] is not None
if self.empty_link:
self.o("[")
self.empty_link = False
self.maybe_automatic_link = None
if self.inline_links:
- try:
- title = a["title"] if a["title"] else ""
- title = escape_md(title)
- except KeyError:
- link_url(self, a["href"], "")
- else:
- link_url(self, a["href"], title)
+ title = a.get("title") or ""
+ title = escape_md(title)
+ link_url(self, a["href"], title)
else:
i = self.previousIndex(a)
if i is not None:
- a = self.a[i]
+ a_props = self.a[i]
else:
self.acount += 1
- a["count"] = self.acount
- a["outcount"] = self.outcount
- self.a.append(a)
- self.o("][" + str(a["count"]) + "]")
+ a_props = AnchorElement(a, self.acount,
self.outcount)
+ self.a.append(a_props)
+ self.o("][" + str(a_props.count) + "]")
if tag == "img" and start and not self.ignore_images:
if "src" in attrs:
+ assert attrs["src"] is not None
if not self.images_to_alt:
attrs["href"] = attrs["src"]
alt = attrs.get("alt") or self.default_image_alt
@@ -499,8 +511,10 @@
):
self.o("<img src='" + attrs["src"] + "' ")
if "width" in attrs:
+ assert attrs["width"] is not None
self.o("width='" + attrs["width"] + "' ")
if "height" in attrs:
+ assert attrs["height"] is not None
self.o("height='" + attrs["height"] + "' ")
if alt:
self.o("alt='" + alt + "' ")
@@ -537,13 +551,12 @@
else:
i = self.previousIndex(attrs)
if i is not None:
- attrs = self.a[i]
+ a_props = self.a[i]
else:
self.acount += 1
- attrs["count"] = self.acount
- attrs["outcount"] = self.outcount
- self.a.append(attrs)
- self.o("[" + str(attrs["count"]) + "]")
+ a_props = AnchorElement(attrs, self.acount,
self.outcount)
+ self.a.append(a_props)
+ self.o("[" + str(a_props.count) + "]")
if tag == "dl" and start:
self.p()
@@ -564,7 +577,7 @@
else:
list_style = tag
numbering_start = list_numbering_start(attrs)
- self.list.append({"name": list_style, "num": numbering_start})
+ self.list.append(ListElement(list_style, numbering_start))
else:
if self.list:
self.list.pop()
@@ -580,18 +593,18 @@
if self.list:
li = self.list[-1]
else:
- li = {"name": "ul", "num": 0}
+ li = ListElement("ul", 0)
if self.google_doc:
nest_count = self.google_nest_count(tag_style)
else:
nest_count = len(self.list)
# TODO: line up <ol><li>s > 9 correctly.
self.o(" " * nest_count)
- if li["name"] == "ul":
+ if li.name == "ul":
self.o(self.ul_item_mark + " ")
- elif li["name"] == "ol":
- li["num"] += 1
- self.o(str(li["num"]) + ". ")
+ elif li.name == "ol":
+ li.num += 1
+ self.o(str(li.num) + ". ")
self.start = True
if tag in ["table", "tr", "td", "th"]:
@@ -658,21 +671,23 @@
self.p()
# TODO: Add docstring for these one letter functions
- def pbr(self):
+ def pbr(self) -> None:
"Pretty print has a line break"
if self.p_p == 0:
self.p_p = 1
- def p(self):
+ def p(self) -> None:
"Set pretty print to 1 or 2 lines"
self.p_p = 1 if self.single_line_break else 2
- def soft_br(self):
+ def soft_br(self) -> None:
"Soft breaks"
self.pbr()
self.br_toggle = " "
- def o(self, data, puredata=False, force=False):
+ def o(
+ self, data: str, puredata: bool = False, force: Union[bool, str] =
False
+ ) -> None:
"""
Deal with indentation and whitespace
"""
@@ -717,8 +732,7 @@
if not self.list:
bq += " "
# else: list content is already partially indented
- for i in range(len(self.list)):
- bq += " "
+ bq += " " * len(self.list)
data = data.replace("\n", "\n" + bq)
if self.startpre:
@@ -756,15 +770,16 @@
newa = []
for link in self.a:
- if self.outcount > link["outcount"]:
+ if self.outcount > link.outcount:
self.out(
" ["
- + str(link["count"])
+ + str(link.count)
+ "]: "
- + urlparse.urljoin(self.baseurl, link["href"])
+ + urlparse.urljoin(self.baseurl,
link.attrs["href"])
)
- if "title" in link:
- self.out(" (" + link["title"] + ")")
+ if "title" in link.attrs:
+ assert link.attrs["title"] is not None
+ self.out(" (" + link.attrs["title"] + ")")
self.out("\n")
else:
newa.append(link)
@@ -783,7 +798,7 @@
self.out(data)
self.outcount += 1
- def handle_data(self, data, entity_char=False):
+ def handle_data(self, data: str, entity_char: bool = False) -> None:
if not data:
# Data may be empty for some HTML entities. For example,
# LEFT-TO-RIGHT MARK.
@@ -826,7 +841,7 @@
self.preceding_data = data
self.o(data, puredata=True)
- def charref(self, name):
+ def charref(self, name: str) -> str:
if name[0] in ["x", "X"]:
c = int(name[1:], 16)
else:
@@ -840,7 +855,7 @@
except ValueError: # invalid unicode
return ""
- def entityref(self, c):
+ def entityref(self, c: str) -> str:
if not self.unicode_snob and c in config.UNIFIABLE:
return config.UNIFIABLE[c]
try:
@@ -849,7 +864,7 @@
return "&" + c + ";"
return config.UNIFIABLE[c] if c == "nbsp" else ch
- def google_nest_count(self, style):
+ def google_nest_count(self, style: Dict[str, str]) -> int:
"""
Calculate the nesting count of google doc lists
@@ -863,7 +878,7 @@
return nest_count
- def optwrap(self, text):
+ def optwrap(self, text: str) -> str:
"""
Wrap all paragraphs in the provided text.
@@ -924,7 +939,7 @@
return result
-def html2text(html, baseurl="", bodywidth=None):
+def html2text(html: str, baseurl: str = "", bodywidth: Optional[int] = None)
-> str:
if bodywidth is None:
bodywidth = config.BODY_WIDTH
h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html2text-2019.9.26/html2text/__main__.py
new/html2text-2020.1.16/html2text/__main__.py
--- old/html2text-2019.9.26/html2text/__main__.py 2019-02-26
15:42:00.000000000 +0100
+++ new/html2text-2020.1.16/html2text/__main__.py 2019-10-12
17:55:30.000000000 +0200
@@ -1,3 +1,3 @@
-from html2text.cli import main
+from .cli import main
main()
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html2text-2019.9.26/html2text/cli.py
new/html2text-2020.1.16/html2text/cli.py
--- old/html2text-2019.9.26/html2text/cli.py 2019-08-15 12:56:54.000000000
+0200
+++ new/html2text-2020.1.16/html2text/cli.py 2019-10-12 18:20:41.000000000
+0200
@@ -1,10 +1,10 @@
import argparse
import sys
-from html2text import HTML2Text, __version__, config
+from . import HTML2Text, __version__, config
-def main():
+def main() -> None:
baseurl = ""
class bcolors:
@@ -259,7 +259,7 @@
data = sys.stdin.buffer.read()
try:
- data = data.decode(args.encoding, args.decode_errors)
+ html = data.decode(args.encoding, args.decode_errors)
except UnicodeDecodeError as err:
warning = bcolors.WARNING + "Warning:" + bcolors.ENDC
warning += " Use the " + bcolors.OKGREEN
@@ -303,4 +303,4 @@
h.open_quote = args.open_quote
h.close_quote = args.close_quote
- sys.stdout.write(h.handle(data))
+ sys.stdout.write(h.handle(html))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html2text-2019.9.26/html2text/elements.py
new/html2text-2020.1.16/html2text/elements.py
--- old/html2text-2019.9.26/html2text/elements.py 1970-01-01
01:00:00.000000000 +0100
+++ new/html2text-2020.1.16/html2text/elements.py 2019-10-12
18:20:41.000000000 +0200
@@ -0,0 +1,18 @@
+from typing import Dict, Optional
+
+
+class AnchorElement:
+ __slots__ = ["attrs", "count", "outcount"]
+
+ def __init__(self, attrs: Dict[str, Optional[str]], count: int, outcount:
int):
+ self.attrs = attrs
+ self.count = count
+ self.outcount = outcount
+
+
+class ListElement:
+ __slots__ = ["name", "num"]
+
+ def __init__(self, name: str, num: int):
+ self.name = name
+ self.num = num
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html2text-2019.9.26/html2text/typing.py
new/html2text-2020.1.16/html2text/typing.py
--- old/html2text-2019.9.26/html2text/typing.py 1970-01-01 01:00:00.000000000
+0100
+++ new/html2text-2020.1.16/html2text/typing.py 2019-10-12 18:20:41.000000000
+0200
@@ -0,0 +1,3 @@
+class OutCallback:
+ def __call__(self, s: str) -> None:
+ ...
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html2text-2019.9.26/html2text/utils.py
new/html2text-2020.1.16/html2text/utils.py
--- old/html2text-2019.9.26/html2text/utils.py 2019-08-15 12:56:54.000000000
+0200
+++ new/html2text-2020.1.16/html2text/utils.py 2020-01-16 15:08:28.000000000
+0100
@@ -1,6 +1,7 @@
import html.entities
+from typing import Dict, List, Optional
-from html2text import config
+from . import config
unifiable_n = {
html.entities.name2codepoint[k]: v
@@ -9,7 +10,7 @@
}
-def hn(tag):
+def hn(tag: str) -> int:
if tag[0] == "h" and len(tag) == 2:
n = tag[1]
if "0" < n <= "9":
@@ -17,7 +18,7 @@
return 0
-def dumb_property_dict(style):
+def dumb_property_dict(style: str) -> Dict[str, str]:
"""
:returns: A hash of css attributes
"""
@@ -27,7 +28,7 @@
}
-def dumb_css_parser(data):
+def dumb_css_parser(data: str) -> Dict[str, Dict[str, str]]:
"""
:type data: str
@@ -44,16 +45,20 @@
# parse the css. reverted from dictionary comprehension in order to
# support older pythons
- elements = [x.split("{") for x in data.split("}") if "{" in x.strip()]
+ pairs = [x.split("{") for x in data.split("}") if "{" in x.strip()]
try:
- elements = {a.strip(): dumb_property_dict(b) for a, b in elements}
+ elements = {a.strip(): dumb_property_dict(b) for a, b in pairs}
except ValueError:
elements = {} # not that important
return elements
-def element_style(attrs, style_def, parent_style):
+def element_style(
+ attrs: Dict[str, Optional[str]],
+ style_def: Dict[str, Dict[str, str]],
+ parent_style: Dict[str, str],
+) -> Dict[str, str]:
"""
:type attrs: dict
:type style_def: dict
@@ -64,17 +69,19 @@
"""
style = parent_style.copy()
if "class" in attrs:
+ assert attrs["class"] is not None
for css_class in attrs["class"].split():
css_style = style_def.get("." + css_class, {})
style.update(css_style)
if "style" in attrs:
+ assert attrs["style"] is not None
immediate_style = dumb_property_dict(attrs["style"])
style.update(immediate_style)
return style
-def google_list_style(style):
+def google_list_style(style: Dict[str, str]) -> str:
"""
Finds out whether this is an ordered or unordered list
@@ -90,7 +97,7 @@
return "ol"
-def google_has_height(style):
+def google_has_height(style: Dict[str, str]) -> bool:
"""
Check if the style of the element has the 'height' attribute
explicitly defined
@@ -102,7 +109,7 @@
return "height" in style
-def google_text_emphasis(style):
+def google_text_emphasis(style: Dict[str, str]) -> List[str]:
"""
:type style: dict
@@ -120,7 +127,7 @@
return emphasis
-def google_fixed_width_font(style):
+def google_fixed_width_font(style: Dict[str, str]) -> bool:
"""
Check if the css of the current element defines a fixed width font
@@ -134,7 +141,7 @@
return "courier new" == font_family or "consolas" == font_family
-def list_numbering_start(attrs):
+def list_numbering_start(attrs: Dict[str, Optional[str]]) -> int:
"""
Extract numbering from list element attributes
@@ -143,6 +150,7 @@
:rtype: int or None
"""
if "start" in attrs:
+ assert attrs["start"] is not None
try:
return int(attrs["start"]) - 1
except ValueError:
@@ -151,10 +159,10 @@
return 0
-def skipwrap(para, wrap_links, wrap_list_items):
+def skipwrap(para: str, wrap_links: bool, wrap_list_items: bool) -> bool:
# If it appears to contain a link
# don't wrap
- if (len(config.RE_LINK.findall(para)) > 0) and not wrap_links:
+ if not wrap_links and config.RE_LINK.search(para):
return True
# If the text begins with four spaces or one tab, it's a code block;
# don't wrap
@@ -182,7 +190,7 @@
)
-def escape_md(text):
+def escape_md(text: str) -> str:
"""
Escapes markdown-sensitive characters within other markdown
constructs.
@@ -190,7 +198,7 @@
return config.RE_MD_CHARS_MATCHER.sub(r"\\\1", text)
-def escape_md_section(text, snob=False):
+def escape_md_section(text: str, snob: bool = False) -> str:
"""
Escapes markdown-sensitive characters across whole document sections.
"""
@@ -206,7 +214,7 @@
return text
-def reformat_table(lines, right_margin):
+def reformat_table(lines: List[str], right_margin: int) -> List[str]:
"""
Given the lines of a table
padds the cells and returns the new lines
@@ -249,12 +257,13 @@
return new_lines
-def pad_tables_in_text(text, right_margin=1):
+def pad_tables_in_text(text: str, right_margin: int = 1) -> str:
"""
Provide padding for tables in the text
"""
lines = text.split("\n")
- table_buffer, table_started = [], False
+ table_buffer = [] # type: List[str]
+ table_started = False
new_lines = []
for line in lines:
# Toggle table started
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html2text-2019.9.26/html2text.egg-info/PKG-INFO
new/html2text-2020.1.16/html2text.egg-info/PKG-INFO
--- old/html2text-2019.9.26/html2text.egg-info/PKG-INFO 2019-09-26
12:37:26.000000000 +0200
+++ new/html2text-2020.1.16/html2text.egg-info/PKG-INFO 2020-01-16
15:21:10.000000000 +0100
@@ -1,6 +1,6 @@
Metadata-Version: 2.1
Name: html2text
-Version: 2019.9.26
+Version: 2020.1.16
Summary: Turn HTML into equivalent Markdown-structured text.
Home-page: https://github.com/Alir3z4/html2text/
Author: Aaron Swartz
@@ -105,6 +105,7 @@
Classifier: Programming Language :: Python :: 3.5
Classifier: Programming Language :: Python :: 3.6
Classifier: Programming Language :: Python :: 3.7
+Classifier: Programming Language :: Python :: 3.8
Classifier: Programming Language :: Python :: 3 :: Only
Classifier: Programming Language :: Python :: Implementation :: CPython
Classifier: Programming Language :: Python :: Implementation :: PyPy
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html2text-2019.9.26/html2text.egg-info/SOURCES.txt
new/html2text-2020.1.16/html2text.egg-info/SOURCES.txt
--- old/html2text-2019.9.26/html2text.egg-info/SOURCES.txt 2019-09-26
12:37:26.000000000 +0200
+++ new/html2text-2020.1.16/html2text.egg-info/SOURCES.txt 2020-01-16
15:21:10.000000000 +0100
@@ -10,11 +10,15 @@
html2text/__main__.py
html2text/cli.py
html2text/config.py
+html2text/elements.py
+html2text/py.typed
+html2text/typing.py
html2text/utils.py
html2text.egg-info/PKG-INFO
html2text.egg-info/SOURCES.txt
html2text.egg-info/dependency_links.txt
html2text.egg-info/entry_points.txt
+html2text.egg-info/not-zip-safe
html2text.egg-info/top_level.txt
test/GoogleDocMassDownload.html
test/GoogleDocMassDownload.md
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html2text-2019.9.26/html2text.egg-info/not-zip-safe
new/html2text-2020.1.16/html2text.egg-info/not-zip-safe
--- old/html2text-2019.9.26/html2text.egg-info/not-zip-safe 1970-01-01
01:00:00.000000000 +0100
+++ new/html2text-2020.1.16/html2text.egg-info/not-zip-safe 2020-01-16
15:21:10.000000000 +0100
@@ -0,0 +1 @@
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html2text-2019.9.26/setup.cfg
new/html2text-2020.1.16/setup.cfg
--- old/html2text-2019.9.26/setup.cfg 2019-09-26 12:37:26.000000000 +0200
+++ new/html2text-2020.1.16/setup.cfg 2020-01-16 15:21:10.000000000 +0100
@@ -1,3 +1,43 @@
+[metadata]
+name = html2text
+version = attr: html2text.__version__
+description = Turn HTML into equivalent Markdown-structured text.
+long_description = file: README.md
+long_description_content_type = text/markdown
+url = https://github.com/Alir3z4/html2text/
+author = Aaron Swartz
+author_email = [email protected]
+maintainer = Alireza Savand
+maintainer_email = [email protected]
+license = GNU GPL 3
+classifiers =
+ Development Status :: 5 - Production/Stable
+ Intended Audience :: Developers
+ License :: OSI Approved :: GNU General Public License (GPL)
+ Operating System :: OS Independent
+ Programming Language :: Python
+ Programming Language :: Python :: 3
+ Programming Language :: Python :: 3.5
+ Programming Language :: Python :: 3.6
+ Programming Language :: Python :: 3.7
+ Programming Language :: Python :: 3.8
+ Programming Language :: Python :: 3 :: Only
+ Programming Language :: Python :: Implementation :: CPython
+ Programming Language :: Python :: Implementation :: PyPy
+platform = OS Independent
+
+[options]
+zip_safe = False
+packages = html2text
+python_requires = >=3.5
+
+[options.entry_points]
+console_scripts =
+ html2text = html2text.cli:main
+
+[options.package_data]
+html2text = py.typed
+
[flake8]
max_line_length = 88
ignore =
@@ -10,6 +50,9 @@
line_length = 88
multi_line_output = 3
+[mypy]
+python_version = 3.5
+
[egg_info]
tag_build =
tag_date = 0
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html2text-2019.9.26/setup.py
new/html2text-2020.1.16/setup.py
--- old/html2text-2019.9.26/setup.py 2019-08-15 12:56:54.000000000 +0200
+++ new/html2text-2020.1.16/setup.py 2019-10-31 19:37:31.000000000 +0100
@@ -1,39 +1,3 @@
from setuptools import setup
-
-def readall(f):
- with open(f) as fp:
- return fp.read()
-
-
-setup(
- name="html2text",
- version=".".join(map(str, __import__("html2text").__version__)),
- description="Turn HTML into equivalent Markdown-structured text.",
- long_description=readall("README.md"),
- long_description_content_type="text/markdown",
- author="Aaron Swartz",
- author_email="[email protected]",
- maintainer="Alireza Savand",
- maintainer_email="[email protected]",
- url="https://github.com/Alir3z4/html2text/",
- platforms="OS Independent",
- classifiers=[
- "Development Status :: 5 - Production/Stable",
- "Intended Audience :: Developers",
- "License :: OSI Approved :: GNU General Public License (GPL)",
- "Operating System :: OS Independent",
- "Programming Language :: Python",
- "Programming Language :: Python :: 3",
- "Programming Language :: Python :: 3.5",
- "Programming Language :: Python :: 3.6",
- "Programming Language :: Python :: 3.7",
- "Programming Language :: Python :: 3 :: Only",
- "Programming Language :: Python :: Implementation :: CPython",
- "Programming Language :: Python :: Implementation :: PyPy",
- ],
- python_requires=">=3.5",
- entry_points={"console_scripts": ["html2text = html2text.cli:main"]},
- license="GNU GPL 3",
- packages=["html2text"],
-)
+setup()
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html2text-2019.9.26/test/test_html2text.py
new/html2text-2020.1.16/test/test_html2text.py
--- old/html2text-2019.9.26/test/test_html2text.py 2019-08-15
12:56:54.000000000 +0200
+++ new/html2text-2020.1.16/test/test_html2text.py 2020-01-16
15:08:28.000000000 +0100
@@ -40,8 +40,7 @@
if base_fn.find("unicode") >= 0:
module_args["unicode_snob"] = True
- # There is no command-line option to control unicode_snob.
- cmdline_args = skip
+ cmdline_args.append("--unicode-snob")
func_args = skip
if base_fn.find("flip_emphasis") >= 0:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn'
'--exclude=.svnignore' old/html2text-2019.9.26/tox.ini
new/html2text-2020.1.16/tox.ini
--- old/html2text-2019.9.26/tox.ini 2019-09-25 09:41:57.000000000 +0200
+++ new/html2text-2020.1.16/tox.ini 2019-10-31 19:37:31.000000000 +0100
@@ -3,7 +3,8 @@
black
flake8
isort
- py{35,36,37,py3}
+ mypy
+ py{35,36,37,38,py3}
minversion = 1.9
[testenv]
@@ -36,3 +37,8 @@
deps =
isort
skip_install = true
+
+[testenv:mypy]
+commands = mypy --strict html2text
+deps = mypy
+skip_install = true