commit python-html2text for openSUSE:Factory

root Fri, 24 Aug 2018 08:05:03 -0700

Hello community,

here is the log from the commit of package python-html2text for 
openSUSE:Factory checked in at 2018-08-24 17:04:32
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python-html2text (Old)
 and      /work/SRC/openSUSE:Factory/.python-html2text.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python-html2text"

Fri Aug 24 17:04:32 2018 rev:18 rq:629541 version:2018.1.9

Changes:
--------
--- /work/SRC/openSUSE:Factory/python-html2text/python-html2text.changes        
2017-05-02 08:54:48.643017336 +0200
+++ /work/SRC/openSUSE:Factory/.python-html2text.new/python-html2text.changes   
2018-08-24 17:04:34.322104714 +0200
@@ -1,0 +2,36 @@
+Thu Aug 16 10:23:31 UTC 2018 - jeng...@inai.de
+
+- Use noun phrase for summary. Ensure accuracy and
+  neutrality of description.
+
+-------------------------------------------------------------------
+Mon Aug 13 11:39:14 UTC 2018 - mc...@suse.com
+
+- Upgrade to 2018.1.9:
+    Fix #188: Non-ASCII in title attribute causes encode error.
+    Feature #194: Add support for the <kbd> tag.
+    Feature #193: Add support for the <q> tag.
+    Fix #157: Fix images link with div wrap
+    Fix #55: Fix error when empty title tags
+    Fix #160: The html2text tests are failing on Windows and on
+        Cygwin due to differences in eol handling between
+        Windows/*nix
+    Feature #164: Housekeeping: Add flake8 to the travis build,
+        cleanup existing flake8 violations, add py3.6 and pypy3
+        to the travis build
+    Fix #109: Fix for unexpanded &lt; &gt; &amp;
+    Fix #143: Fix line wrapping for the lines starting with bold
+        Adds support for numeric bold text indication in font-weight,
+        as used by Google (and presumably others.)
+    Fix #173 and #142: Stripping whitespace in crucial markdown
+        and adding whitespace as necessary
+        Don't drop any cell data on tables uneven row lengths
+        (e.g. colspan in use)
+
+-------------------------------------------------------------------
+Mon Aug 13 08:38:04 UTC 2018 - mc...@suse.com
+
+- Remove dependency on unittetst2
+  Added remove_unittest2.patch to facilitate that
+
+-------------------------------------------------------------------

Old:
----
  html2text-2016.9.19.tar.gz

New:
----
  html2text-2018.1.9.tar.gz
  remove_unittest2.patch

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python-html2text.spec ++++++
--- /var/tmp/diff_new_pack.GorObB/_old  2018-08-24 17:04:35.334105915 +0200
+++ /var/tmp/diff_new_pack.GorObB/_new  2018-08-24 17:04:35.338105920 +0200
@@ -1,7 +1,7 @@
 #
 # spec file for package python-html2text
 #
-# Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany.
+# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany.
 #
 # All modifications and additions to the file contributed by third parties
 # remain the property of their copyright owners, unless otherwise agreed
@@ -16,39 +16,38 @@
 #
 
 
-%bcond_without tests
+#
+%define upname html2text
 
 %{?!python_module:%define python_module() python-%{**} python3-%{**}}
-Name:           python-html2text
-Version:        2016.9.19
+%bcond_without tests
+Name:           python-%{upname}
+Version:        2018.1.9
 Release:        0
-Url:            https://github.com/Alir3z4/html2text/
-Summary:        Turn HTML into equivalent Markdown-structured text
-License:        GPL-3.0
+Summary:        Python script for turning HTML into Markdown text
+License:        GPL-3.0-only
 Group:          Development/Languages/Python
-Source:         
https://files.pythonhosted.org/packages/source/h/html2text/html2text-%{version}.tar.gz
-BuildRoot:      %{_tmppath}/%{name}-%{version}-build
-BuildRequires:  fdupes
-BuildRequires:  python-rpm-macros
+URL:            https://github.com/Alir3z4/html2text/
+Source:         
https://files.pythonhosted.org/packages/source/h/%{upname}/%{upname}-%{version}.tar.gz
+Patch0:         remove_unittest2.patch
 BuildRequires:  %{python_module devel}
 BuildRequires:  %{python_module setuptools}
-%if %{with tests}
-BuildRequires:  python2-unittest2
-%endif
+BuildRequires:  fdupes
+BuildRequires:  python-rpm-macros
 Requires(post):   update-alternatives
 Requires(preun):  update-alternatives
 BuildArch:      noarch
 %python_subpackages
 
 %description
-html2text is a Python script that converts a page of HTML into clean,
-easy-to-read plain ASCII text. Better yet, that ASCII also happens to
-be valid Markdown (a text-to-HTML format).
+html2text is a Python script that converts a page of HTML into
+Markdown (a text-to-HTML format).
 
 %prep
-%setup -q -n html2text-%{version}
+%setup -q -n %{upname}-%{version}
+%patch0 -p1
 # remove useless shebang
-sed -i '/^#!/d' html2text/__init__.py
+sed -i '/^#!/d' %{upname}/__init__.py
 
 %build
 %python_build
@@ -57,29 +56,26 @@
 %python_install
 %python_expand %fdupes %{buildroot}%{$python_sitelib}
 
-# To avoid conflicts with the rst2html5 package
-mv %{buildroot}%{_bindir}/html2text %{buildroot}%{_bindir}/html2text-python
-ln -s -f %{_sysconfdir}/alternatives/html2text %{buildroot}%{_bindir}/html2text
+%python_clone -a %{buildroot}%{_bindir}/%{upname}
+
+# remove executable bits from egg files
+%python_expand chmod -x %{buildroot}%{$python_sitelib}/%{upname}-*.egg-info/*
 
 %post
-update-alternatives --install %{_bindir}/html2text html2text 
%{_bindir}/html2text-python 15
+%python_install_alternative html2text
 
-%preun
-if [ ! -f %{_bindir}/html2text-python ] ; then
-   update-alternatives --remove html2text %{_bindir}/html2text-python
-fi
+%postun
+%python_uninstall_alternative html2text
 
 %if %{with tests}
 %check
 %python_exec setup.py test
 %endif
 
-%files %python_files
-%defattr(-,root,root,-)
-%doc COPYING README.md AUTHORS.rst ChangeLog.rst
-%python3_only %{_bindir}/html2text
-%python3_only %{_bindir}/html2text-python
-%python3_only %ghost %{_sysconfdir}/alternatives/html2text
+%files %{python_files}
+%license COPYING
+%doc README.md AUTHORS.rst ChangeLog.rst
+%python_alternative %{_bindir}/%{upname}
 %{python_sitelib}/*
 
 %changelog

++++++ html2text-2016.9.19.tar.gz -> html2text-2018.1.9.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/AUTHORS.rst 
new/html2text-2018.1.9/AUTHORS.rst
--- old/html2text-2016.9.19/AUTHORS.rst 2016-05-29 18:08:48.000000000 +0200
+++ new/html2text-2018.1.9/AUTHORS.rst  2018-01-09 05:43:43.000000000 +0100
@@ -20,7 +20,13 @@
 * Etienne Millon <m...@emillon.org>
 * John C F <gh: critiqjo>
 * Mikhail Melnik <by.zumz...@gmail.com>
-
+* Andres Rey
+* Ciprian Miclaus
+* Toshihiro Kamiya <kam...@mbj.nifty.com>
+* Matt Dennewitz <mattdennew...@gmail.com>
+* Jonathan Sundqvist <sundqvist.jonat...@gmail.com>
+* Simon Meers <gh: DrMeers>
+* Kurt McKee <contac...@kurtmckee.org>
 
 Maintainer:
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/ChangeLog.rst 
new/html2text-2018.1.9/ChangeLog.rst
--- old/html2text-2016.9.19/ChangeLog.rst       2016-09-19 00:03:35.000000000 
+0200
+++ new/html2text-2018.1.9/ChangeLog.rst        2018-01-09 05:43:43.000000000 
+0100
@@ -1,3 +1,28 @@
+2018.9.1
+========
+----
+
+* Fix #188: Non-ASCII in title attribute causes encode error.
+* Feature #194: Add support for the <kbd> tag.
+* Feature #193: Add support for the <q> tag.
+
+
+2017.10.4
+==========
+----
+
+* Fix #157: Fix images link with div wrap
+* Fix #55: Fix error when empty title tags
+* Fix #160: The html2text tests are failing on Windows and on Cygwin due to 
differences in eol handling between windows/*nix
+* Feature #164: Housekeeping: Add flake8 to the travis build, cleanup existing 
flake8 violations, add py3.6 and pypy3 to the travis build
+* Fix #109: Fix for unexpanded &lt; &gt; &amp;
+* Fix #143: Fix line wrapping for the lines starting with bold
+* Adds support for numeric bold text indication in `font-weight`,
+  as used by Google (and presumably others.)
+* Fix #173 and #142: Stripping whitespace in crucial markdown and adding 
whitespace as necessary
+* Don't drop any cell data on tables uneven row lengths (e.g. colspan in use)
+
+
 2016.9.19
 =========
 ----
@@ -124,7 +149,7 @@
 ==========
 ----
 
-* Feature #49: Added a images_to_alt option to discard images and keep only 
their alt.
+* Feature #49: Added an images_to_alt option to discard images and keep only 
their alt.
 * Feature #50: Protect links, surrounding them with angle brackets to avoid 
breaking...
 * Feature: Add ``setup.cfg`` file.
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/PKG-INFO 
new/html2text-2018.1.9/PKG-INFO
--- old/html2text-2016.9.19/PKG-INFO    2016-09-19 00:08:46.000000000 +0200
+++ new/html2text-2018.1.9/PKG-INFO     2018-01-10 07:03:39.000000000 +0100
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: html2text
-Version: 2016.9.19
+Version: 2018.1.9
 Summary: Turn HTML into equivalent Markdown-structured text.
 Home-page: https://github.com/Alir3z4/html2text/
 Author: Alireza Savand
@@ -141,3 +141,4 @@
 Classifier: Programming Language :: Python :: 3.3
 Classifier: Programming Language :: Python :: 3.4
 Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/html2text/__init__.py 
new/html2text-2018.1.9/html2text/__init__.py
--- old/html2text-2016.9.19/html2text/__init__.py       2016-09-19 
00:03:55.000000000 +0200
+++ new/html2text-2018.1.9/html2text/__init__.py        2018-01-10 
06:58:34.000000000 +0100
@@ -2,6 +2,7 @@
 # coding: utf-8
 """html2text: Turn HTML into equivalent Markdown-structured text."""
 from __future__ import division
+from __future__ import unicode_literals
 import re
 import sys
 
@@ -10,7 +11,7 @@
 except ImportError:  # pragma: no cover
     pass
 
-from html2text.compat import urlparse, HTMLParser, html_escape
+from html2text.compat import urlparse, HTMLParser
 from html2text import config
 
 from html2text.utils import (
@@ -30,7 +31,14 @@
     pad_tables_in_text
 )
 
-__version__ = (2016, 9, 19)
+try:
+    chr = unichr
+    nochr = unicode('')
+except NameError:
+    # python3 uses chr
+    nochr = str('')
+
+__version__ = (2018, 1, 9)
 
 
 # TODO:
@@ -81,6 +89,8 @@
         self.pad_tables = config.PAD_TABLES  # covered in cli
         self.default_image_alt = config.DEFAULT_IMAGE_ALT  # covered in cli
         self.tag_callback = None
+        self.open_quote = config.OPEN_QUOTE  # covered in cli
+        self.close_quote = config.CLOSE_QUOTE  # covered in cli
 
         if out is None:  # pragma: no cover
             self.out = self.outtextf
@@ -106,6 +116,7 @@
         self.pre = 0
         self.startpre = 0
         self.code = False
+        self.quote = False
         self.br_toggle = ''
         self.lastWasNL = 0
         self.lastWasList = False
@@ -119,6 +130,10 @@
         self.abbr_data = None  # last inner HTML (for abbr being defined)
         self.abbr_list = {}  # stack of abbreviations to write later
         self.baseurl = baseurl
+        self.stressed = False
+        self.preceding_stressed = False
+        self.preceding_data = None
+        self.current_tag = None
 
         try:
             del unifiable_n[name2cp('nbsp')]
@@ -147,22 +162,15 @@
     def close(self):
         HTMLParser.HTMLParser.close(self)
 
-        try:
-            nochr = unicode('')
-            unicode_character = unichr
-        except NameError:
-            nochr = str('')
-            unicode_character = chr
-
         self.pbr()
         self.o('', 0, 'end')
 
         outtext = nochr.join(self.outtextlist)
 
         if self.unicode_snob:
-            nbsp = unicode_character(name2cp('nbsp'))
+            nbsp = chr(name2cp('nbsp'))
         else:
-            nbsp = unicode_character(32)
+            nbsp = chr(32)
         try:
             outtext = outtext.replace(unicode('&nbsp_place_holder;'), nbsp)
         except NameError:
@@ -175,17 +183,10 @@
         return outtext
 
     def handle_charref(self, c):
-        charref = self.charref(c)
-        if not self.code and not self.pre:
-            charref = html_escape(charref)
-        self.handle_data(charref, True)
+        self.handle_data(self.charref(c), True)
 
     def handle_entityref(self, c):
-        entityref = self.entityref(c)
-        if (not self.code and not self.pre
-                and entityref != '&nbsp_place_holder;'):
-            entityref = html_escape(entityref)
-        self.handle_data(entityref, True)
+        self.handle_data(self.entityref(c), True)
 
     def handle_starttag(self, tag, attrs):
         self.handle_tag(tag, attrs, 1)
@@ -208,10 +209,11 @@
             i += 1
             match = 0
 
-            if ('href' in a) and a['href'] == attrs['href']:
-                if ('title' in a) or ('title' in attrs):
-                    if (('title' in a) and ('title' in attrs) and
-                                a['title'] == attrs['title']):
+            if 'href' in a and a['href'] == attrs['href']:
+                if 'title' in a or 'title' in attrs:
+                    if 'title' in a and \
+                        'title' in attrs and \
+                            a['title'] == attrs['title']:
                         match = True
                 else:
                     match = True
@@ -229,8 +231,16 @@
         # handle Google's text emphasis
         strikethrough = 'line-through' in \
                         tag_emphasis and self.hide_strikethrough
-        bold = 'bold' in tag_emphasis and not 'bold' in parent_emphasis
-        italic = 'italic' in tag_emphasis and not 'italic' in parent_emphasis
+
+        # google and others may mark a font's weight as `bold` or `700`
+        bold = False
+        for bold_marker in config.BOLD_TEXT_STYLE_VALUES:
+            bold = (bold_marker in tag_emphasis
+                    and bold_marker not in parent_emphasis)
+            if bold:
+                break
+
+        italic = 'italic' in tag_emphasis and 'italic' not in parent_emphasis
         fixed = google_fixed_width_font(tag_style) and not \
             google_fixed_width_font(parent_style) and not self.pre
 
@@ -282,6 +292,7 @@
                 self.quiet -= 1
 
     def handle_tag(self, tag, attrs, start):
+        self.current_tag = tag
         # attrs is None for endtags
         if attrs is None:
             attrs = {}
@@ -292,10 +303,11 @@
             if self.tag_callback(self, tag, attrs, start) is True:
                 return
 
-        # first thing inside the anchor tag is another tag that produces some 
output
-        if (start and not self.maybe_automatic_link is None
-                and tag not in ['p', 'div', 'style', 'dl', 'dt']
-                and (tag != "img" or self.ignore_images)):
+        # first thing inside the anchor tag is another tag
+        # that produces some output
+        if (start and self.maybe_automatic_link is not None and
+                tag not in ['p', 'div', 'style', 'dl', 'dt'] and
+                (tag != "img" or self.ignore_images)):
             self.o("[")
             self.maybe_automatic_link = None
             self.empty_link = False
@@ -312,7 +324,8 @@
                 tag_style = element_style(attrs, self.style_def, parent_style)
                 self.tag_stack.append((tag, attrs, tag_style))
             else:
-                dummy, attrs, tag_style = self.tag_stack.pop() if 
self.tag_stack else (None, {}, {})
+                dummy, attrs, tag_style = self.tag_stack.pop() \
+                    if self.tag_stack else (None, {}, {})
                 if self.tag_stack:
                     parent_style = self.tag_stack[-1][2]
 
@@ -331,6 +344,8 @@
                     self.p()
                 else:
                     self.soft_br()
+            elif self.astack and tag == 'div':
+                pass
             else:
                 self.p()
 
@@ -370,24 +385,49 @@
                 self.blockquote -= 1
                 self.p()
 
+        def no_preceding_space(self):
+            return (self.preceding_data
+                    and re.match(r'[^\s]', self.preceding_data[-1]))
+
         if tag in ['em', 'i', 'u'] and not self.ignore_emphasis:
-            self.o(self.emphasis_mark)
+            if start and no_preceding_space(self):
+                emphasis = ' ' + self.emphasis_mark
+            else:
+                emphasis = self.emphasis_mark
+
+            self.o(emphasis)
+            if start:
+                self.stressed = True
+
         if tag in ['strong', 'b'] and not self.ignore_emphasis:
-            self.o(self.strong_mark)
-        if tag in ['del', 'strike', 's']:
+            if start and no_preceding_space(self):
+                strong = ' ' + self.strong_mark
+            else:
+                strong = self.strong_mark
+
+            self.o(strong)
             if start:
-                self.o('~~')
+                self.stressed = True
+
+        if tag in ['del', 'strike', 's']:
+            if start and no_preceding_space(self):
+                strike = ' ~~'
             else:
-                self.o('~~')
+                strike = '~~'
+
+            self.o(strike)
+            if start:
+                self.stressed = True
 
         if self.google_doc:
             if not self.inheader:
                 # handle some font attributes, but leave headers clean
                 self.handle_emphasis(start, tag_style, parent_style)
 
-        if tag in ["code", "tt"] and not self.pre:
+        if tag in ["kbd", "code", "tt"] and not self.pre:
             self.o('`')  # TODO: `` `this` ``
             self.code = not self.code
+
         if tag == "abbr":
             if start:
                 self.abbr_title = None
@@ -400,17 +440,30 @@
                     self.abbr_title = None
                 self.abbr_data = ''
 
+        if tag == "q":
+            if not self.quote:
+                self.o(self.open_quote)
+            else:
+                self.o(self.close_quote)
+            self.quote = not self.quote
+
+        def link_url(self, link, title=""):
+            url = urlparse.urljoin(self.baseurl, link)
+            title = ' "{0}"'.format(title) if title.strip() else ''
+            self.o(']({url}{title})'.format(url=escape_md(url),
+                                            title=title))
+
         if tag == "a" and not self.ignore_links:
             if start:
-                if ('href' in attrs) and \
-                        (attrs['href'] is not None) and \
-                        not (self.skip_internal_links and
-                                 attrs['href'].startswith('#')):
+                if 'href' in attrs and \
+                    attrs['href'] is not None and not \
+                        (self.skip_internal_links and
+                            attrs['href'].startswith('#')):
                     self.astack.append(attrs)
                     self.maybe_automatic_link = attrs['href']
                     self.empty_link = True
                     if self.protect_links:
-                        attrs['href'] = '<'+attrs['href']+'>'
+                        attrs['href'] = '<' + attrs['href'] + '>'
                 else:
                     self.astack.append(None)
             else:
@@ -425,12 +478,12 @@
                             self.maybe_automatic_link = None
                         if self.inline_links:
                             try:
-                                title = escape_md(a['title'])
+                                title = a['title'] if a['title'] else ''
+                                title = escape_md(title)
                             except KeyError:
-                                self.o("](" + 
escape_md(urlparse.urljoin(self.baseurl, a['href'])) + ")")
+                                link_url(self, a['href'], '')
                             else:
-                                self.o("](" + 
escape_md(urlparse.urljoin(self.baseurl, a['href']))
-                                       + ' "' + title + '" )')
+                                link_url(self, a['href'], title)
                         else:
                             i = self.previousIndex(a)
                             if i is not None:
@@ -463,7 +516,7 @@
                     return
 
                 # If we have a link to create, output the start
-                if not self.maybe_automatic_link is None:
+                if self.maybe_automatic_link is not None:
                     href = self.maybe_automatic_link
                     if self.images_to_alt and escape_md(alt) == href and \
                             self.absolute_url_matcher.match(href):
@@ -483,7 +536,16 @@
                     self.o("![" + escape_md(alt) + "]")
                     if self.inline_links:
                         href = attrs.get('href') or ''
-                        self.o("(" + escape_md(urlparse.urljoin(self.baseurl, 
href)) + ")")
+                        self.o(
+                            "(" +
+                            escape_md(
+                                urlparse.urljoin(
+                                    self.baseurl,
+                                    href
+                                )
+                            ) +
+                            ")"
+                        )
                     else:
                         i = self.previousIndex(attrs)
                         if i is not None:
@@ -576,11 +638,11 @@
                     if start:
                         self.table_start = True
                         if self.pad_tables:
-                            self.o("<"+config.TABLE_MARKER_FOR_PAD+">")
+                            self.o("<" + config.TABLE_MARKER_FOR_PAD + ">")
                             self.o("  \n")
                     else:
                         if self.pad_tables:
-                            self.o("</"+config.TABLE_MARKER_FOR_PAD+">")
+                            self.o("</" + config.TABLE_MARKER_FOR_PAD + ">")
                             self.o("  \n")
                 if tag in ["td", "th"] and start:
                     if self.split_next_td:
@@ -654,8 +716,9 @@
                 return
 
             if self.startpre:
-                #self.out(" :") #TODO: not output when already one there
-                if not data.startswith("\n"):  # <pre>stuff...
+                # self.out(" :") #TODO: not output when already one there
+                if not data.startswith("\n") and not data.startswith("\r\n"):
+                    # <pre>stuff...
                     data = "\n" + data
                 if self.mark_code:
                     self.out("\n[code]")
@@ -668,7 +731,7 @@
             if self.pre:
                 if not self.list:
                     bq += "    "
-                #else: list content is already partially indented
+                # else: list content is already partially indented
                 for i in range(len(self.list)):
                     bq += "    "
                 data = data.replace("\n", "\n" + bq)
@@ -700,8 +763,8 @@
                     self.out(' ')
                 self.space = 0
 
-            if self.a and ((self.p_p == 2 and self.links_each_paragraph)
-                           or force == "end"):
+            if self.a and ((self.p_p == 2 and self.links_each_paragraph) or
+                           force == "end"):
                 if force == "end":
                     self.out("\n")
 
@@ -731,13 +794,25 @@
             self.outcount += 1
 
     def handle_data(self, data, entity_char=False):
+        if self.stressed:
+            data = data.strip()
+            self.stressed = False
+            self.preceding_stressed = True
+        elif (self.preceding_stressed
+              and re.match(r'[^\s.!?]', data[0])
+              and not hn(self.current_tag)
+              and self.current_tag not in ['a', 'code', 'pre']):
+            # should match a letter or common punctuation
+            data = ' ' + data
+            self.preceding_stressed = False
+
         if self.style:
             self.style_def.update(dumb_css_parser(data))
 
-        if not self.maybe_automatic_link is None:
+        if self.maybe_automatic_link is not None:
             href = self.maybe_automatic_link
-            if (href == data and self.absolute_url_matcher.match(href)
-                    and self.use_automatic_links):
+            if (href == data and self.absolute_url_matcher.match(href) and
+                    self.use_automatic_links):
                 self.o("<" + data + ">")
                 self.empty_link = False
                 return
@@ -748,6 +823,7 @@
 
         if not self.code and not self.pre and not entity_char:
             data = escape_md_section(data, snob=self.escape_snob)
+        self.preceding_data = data
         self.o(data, 1)
 
     def unknown_decl(self, data):  # pragma: no cover
@@ -764,10 +840,7 @@
             return unifiable_n[c]
         else:
             try:
-                try:
-                    return unichr(c)
-                except NameError:  # Python3
-                    return chr(c)
+                return chr(c)
             except ValueError:  # invalid unicode
                 return ''
 
@@ -783,10 +856,7 @@
                 if c == 'nbsp':
                     return config.UNIFIABLE[c]
                 else:
-                    try:
-                        return unichr(name2cp(c))
-                    except NameError:  # Python3
-                        return chr(name2cp(c))
+                    return chr(name2cp(c))
 
     def replaceEntities(self, s):
         s = s.group(1)
@@ -809,7 +879,7 @@
         nest_count = 0
         if 'margin-left' in style:
             nest_count = int(style['margin-left'][:-2]) \
-                         // self.google_list_indent
+                // self.google_list_indent
 
         return nest_count
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/html2text/cli.py 
new/html2text-2018.1.9/html2text/cli.py
--- old/html2text-2016.9.19/html2text/cli.py    2016-09-18 23:51:18.000000000 
+0200
+++ new/html2text-2018.1.9/html2text/cli.py     2018-01-09 05:43:43.000000000 
+0100
@@ -158,7 +158,8 @@
         action="store_true",
         dest="ignore_tables",
         default=config.IGNORE_TABLES,
-        help="Ignore table-related tags (table, th, td, tr) while keeping 
rows."
+        help="Ignore table-related tags (table, th, td, tr) "
+             "while keeping rows."
     )
     p.add_option(
         "--single-line-break",
@@ -211,7 +212,24 @@
         action="store",
         type="string",
         default=config.DECODE_ERRORS,
-        help="What to do in case of decode errors.'ignore', 'strict' and 
'replace' are acceptable values"
+        help="What to do in case of decode errors.'ignore', 'strict' and "
+             "'replace' are acceptable values"
+    )
+    p.add_option(
+        "--open-quote",
+        dest="open_quote",
+        action="store",
+        type="str",
+        default=config.OPEN_QUOTE,
+        help="The character used to open quotes",
+    )
+    p.add_option(
+        "--close-quote",
+        dest="close_quote",
+        action="store",
+        type="str",
+        default=config.CLOSE_QUOTE,
+        help="The character used to close quotes",
     )
     (options, args) = p.parse_args()
 
@@ -226,8 +244,11 @@
         file_ = args[0]
 
         if file_.startswith('http://') or file_.startswith('https://'):
-            warnings.warn("Support for retrieving html over network is set for 
deprecation by version (2017, 1, x)",
-                    DeprecationWarning)
+            warnings.warn(
+                "Support for retrieving html over network is set for "
+                "deprecation by version (2017, 1, x)",
+                DeprecationWarning
+            )
             baseurl = file_
             j = urllib.urlopen(baseurl)
             data = j.read()
@@ -235,7 +256,8 @@
                 try:
                     from feedparser import _getCharacterEncoding as enc
                 except ImportError:
-                    enc = lambda x, y: ('utf-8', 1)
+                    def enc(x, y):
+                        return ('utf-8', 1)
                 encoding = enc(j.headers, data)[0]
                 if encoding == 'us-ascii':
                     encoding = 'utf-8'
@@ -245,7 +267,8 @@
                 try:
                     from chardet import detect
                 except ImportError:
-                    detect = lambda x: {'encoding': 'utf-8'}
+                    def detect(x):
+                        return {'encoding': 'utf-8'}
                 encoding = detect(data)['encoding']
     else:
         data = wrap_read()
@@ -295,5 +318,7 @@
     h.wrap_links = options.wrap_links
     h.pad_tables = options.pad_tables
     h.default_image_alt = options.default_image_alt
+    h.open_quote = options.open_quote
+    h.close_quote = options.close_quote
 
     wrapwrite(h.handle(data))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/html2text/compat.py 
new/html2text-2018.1.9/html2text/compat.py
--- old/html2text-2016.9.19/html2text/compat.py 2016-09-18 23:51:18.000000000 
+0200
+++ new/html2text-2018.1.9/html2text/compat.py  2017-10-04 08:29:40.000000000 
+0200
@@ -13,5 +13,9 @@
     import html.parser as HTMLParser
     import urllib.request as urllib
     from html import escape
+
     def html_escape(s):
         return escape(s, quote=False)
+
+
+__all__ = ['HTMLParser', 'html_escape', 'htmlentitydefs', 'urllib', 'urlparse']
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/html2text/config.py 
new/html2text-2018.1.9/html2text/config.py
--- old/html2text-2016.9.19/html2text/config.py 2016-09-18 23:51:18.000000000 
+0200
+++ new/html2text-2018.1.9/html2text/config.py  2018-01-09 05:43:43.000000000 
+0100
@@ -1,6 +1,8 @@
+from __future__ import unicode_literals
+
 import re
 
-# Use Unicode characters instead of their ascii psuedo-replacements
+# Use Unicode characters instead of their ascii pseudo-replacements
 UNICODE_SNOB = 0
 
 # Marker to use for marking tables for padding post processing
@@ -31,6 +33,9 @@
 # Number of pixels Google indents nested lists
 GOOGLE_LIST_INDENT = 36
 
+# Values Google and others may use to indicate bold text
+BOLD_TEXT_STYLE_VALUES = ('bold', '700', '800', '900')
+
 IGNORE_ANCHORS = False
 IGNORE_IMAGES = False
 IMAGES_TO_ALT = False
@@ -41,7 +46,8 @@
 DEFAULT_IMAGE_ALT = ''
 PAD_TABLES = False
 
-# Convert links with same href and text to <href> format if they are absolute 
links
+# Convert links with same href and text to <href> format
+# if they are absolute links
 USE_AUTOMATIC_LINKS = True
 
 # For checking space-only lines on line 771
@@ -52,7 +58,10 @@
 RE_UNORDERED_LIST_MATCHER = re.compile(r'[-\*\+]\s')
 RE_MD_CHARS_MATCHER = re.compile(r"([\\\[\]\(\)])")
 RE_MD_CHARS_MATCHER_ALL = re.compile(r"([`\*_{}\[\]\(\)#!])")
-RE_LINK = re.compile(r"(\[.*?\] ?\(.*?\))|(\[.*?\]:.*?)")  # to find links in 
the text
+
+# to find links in the text
+RE_LINK = re.compile(r"(\[.*?\] ?\(.*?\))|(\[.*?\]:.*?)")
+
 RE_MD_DOT_MATCHER = re.compile(r"""
     ^             # start of line
     (\s*\d+)      # optional whitespace and a number
@@ -126,6 +135,11 @@
 IGNORE_TABLES = False
 
 
-# Use a single line break after a block element rather an two line breaks.
+# Use a single line break after a block element rather than two line breaks.
 # NOTE: Requires body width setting to be 0.
 SINGLE_LINE_BREAK = False
+
+
+# Use double quotation marks when converting the <q> tag.
+OPEN_QUOTE = '"'
+CLOSE_QUOTE = '"'
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/html2text/utils.py 
new/html2text-2018.1.9/html2text/utils.py
--- old/html2text-2016.9.19/html2text/utils.py  2016-09-18 23:51:18.000000000 
+0200
+++ new/html2text-2018.1.9/html2text/utils.py   2017-10-04 08:29:40.000000000 
+0200
@@ -12,7 +12,6 @@
 
 
 unifiable_n = {}
-
 for k in config.UNIFIABLE.keys():
     unifiable_n[name2cp(k)] = config.UNIFIABLE[k]
 
@@ -191,7 +190,7 @@
     # I'm not sure what this is for; I thought it was to detect lists,
     # but there's a <br>-inside-<span> case in one of the tests that
     # also depends upon it.
-    if stripped[0:1] == '-' or stripped[0:1] == '*':
+    if stripped[0:1] in ('-', '*') and not stripped[0:2] == '**':
         return True
 
     # If the text begins with a single -, *, or +, followed by a space,
@@ -245,6 +244,7 @@
 
     return text
 
+
 def reformat_table(lines, right_margin):
     """
     Given the lines of a table
@@ -252,11 +252,24 @@
     """
     # find the maximum width of the columns
     max_width = [len(x.rstrip()) + right_margin for x in lines[0].split('|')]
+    max_cols = len(max_width)
     for line in lines:
         cols = [x.rstrip() for x in line.split('|')]
+        num_cols = len(cols)
+
+        # don't drop any data if colspan attributes result in unequal lengths
+        if num_cols < max_cols:
+            cols += [''] * (max_cols - num_cols)
+        elif max_cols < num_cols:
+            max_width += [
+                len(x) + right_margin for x in
+                cols[-(num_cols - max_cols):]
+            ]
+            max_cols = num_cols
+
         max_width = [max(len(x) + right_margin, old_len)
                      for x, old_len in zip(cols, max_width)]
-    
+
     # reformat
     new_lines = []
     for line in lines:
@@ -272,15 +285,16 @@
         new_lines.append('|'.join(new_cols))
     return new_lines
 
+
 def pad_tables_in_text(text, right_margin=1):
     """
     Provide padding for tables in the text
     """
     lines = text.split('\n')
-    table_buffer, altered_lines, table_widths, table_started = [], [], [], 
False
+    table_buffer, table_started = [], False
     new_lines = []
     for line in lines:
-        # Toogle table started
+        # Toggle table started
         if (config.TABLE_MARKER_FOR_PAD in line):
             table_started = not table_started
             if not table_started:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/html2text.egg-info/PKG-INFO 
new/html2text-2018.1.9/html2text.egg-info/PKG-INFO
--- old/html2text-2016.9.19/html2text.egg-info/PKG-INFO 2016-09-19 
00:08:46.000000000 +0200
+++ new/html2text-2018.1.9/html2text.egg-info/PKG-INFO  2018-01-10 
07:03:39.000000000 +0100
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: html2text
-Version: 2016.9.19
+Version: 2018.1.9
 Summary: Turn HTML into equivalent Markdown-structured text.
 Home-page: https://github.com/Alir3z4/html2text/
 Author: Alireza Savand
@@ -141,3 +141,4 @@
 Classifier: Programming Language :: Python :: 3.3
 Classifier: Programming Language :: Python :: 3.4
 Classifier: Programming Language :: Python :: 3.5
+Classifier: Programming Language :: Python :: 3.6
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/html2text.egg-info/SOURCES.txt 
new/html2text-2018.1.9/html2text.egg-info/SOURCES.txt
--- old/html2text-2016.9.19/html2text.egg-info/SOURCES.txt      2016-09-19 
00:08:46.000000000 +0200
+++ new/html2text-2018.1.9/html2text.egg-info/SOURCES.txt       2018-01-10 
07:03:39.000000000 +0100
@@ -35,6 +35,8 @@
 test/bodywidth_newline.md
 test/bold_inside_link.html
 test/bold_inside_link.md
+test/bold_long_line.html
+test/bold_long_line.md
 test/break_preserved_in_blockquote.html
 test/break_preserved_in_blockquote.md
 test/css_import_no_semicolon.html
@@ -49,8 +51,12 @@
 test/doc_with_table_bypass.md
 test/emdash-para.html
 test/emdash-para.md
+test/emphasis_preserved_whitespace.html
+test/emphasis_preserved_whitespace.md
 test/empty-link.html
 test/empty-link.md
+test/empty-title-tag.html
+test/empty-title-tag.md
 test/flip_emphasis.html
 test/flip_emphasis.md
 test/google-like_font-properties.html
@@ -65,6 +71,8 @@
 test/html_entities_out_of_text.md
 test/images_to_alt.html
 test/images_to_alt.md
+test/images_with_div_wrap.html
+test/images_with_div_wrap.md
 test/images_with_size.html
 test/images_with_size.md
 test/img-tag-with-link.html
@@ -75,6 +83,8 @@
 test/invalid_start.md
 test/invalid_unicode.html
 test/invalid_unicode.md
+test/kbd_tag.html
+test/kbd_tag.md
 test/link_titles.html
 test/link_titles.md
 test/list_tags_example.html
@@ -109,6 +119,8 @@
 test/preformatted_in_list.md
 test/protect_links.html
 test/protect_links.md
+test/q_tag.html
+test/q_tag.md
 test/single_line_break.html
 test/single_line_break.md
 test/table_ignore.html
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/setup.py 
new/html2text-2018.1.9/setup.py
--- old/html2text-2016.9.19/setup.py    2016-05-29 18:13:44.000000000 +0200
+++ new/html2text-2018.1.9/setup.py     2017-10-04 08:31:57.000000000 +0200
@@ -3,11 +3,20 @@
 
 from setuptools import setup, Command, find_packages
 
+
+def read_md_convert(f):
+    return convert(f, 'rst')
+
+
+def read_md_open(f):
+    return open(f, 'r').read()
+
+
 try:
     from pypandoc import convert
-    read_md = lambda f: convert(f, 'rst')
+    read_md = read_md_convert
 except ImportError:
-    read_md = lambda f: open(f, 'r').read()
+    read_md = read_md_open
 
 requires_list = []
 try:
@@ -69,6 +78,7 @@
         'Programming Language :: Python :: 3.3',
         'Programming Language :: Python :: 3.4',
         'Programming Language :: Python :: 3.5',
+        'Programming Language :: Python :: 3.6',
     ],
     entry_points="""
         [console_scripts]
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/bold_long_line.html 
new/html2text-2018.1.9/test/bold_long_line.html
--- old/html2text-2016.9.19/test/bold_long_line.html    1970-01-01 
01:00:00.000000000 +0100
+++ new/html2text-2018.1.9/test/bold_long_line.html     2017-10-04 
08:29:40.000000000 +0200
@@ -0,0 +1,3 @@
+<p>
+<b>text</b> and a very long long long long long long long long long long long 
long long long long long long long long long line
+</p>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/bold_long_line.md 
new/html2text-2018.1.9/test/bold_long_line.md
--- old/html2text-2016.9.19/test/bold_long_line.md      1970-01-01 
01:00:00.000000000 +0100
+++ new/html2text-2018.1.9/test/bold_long_line.md       2017-10-04 
08:29:40.000000000 +0200
@@ -0,0 +1,3 @@
+**text** and a very long long long long long long long long long long long
+long long long long long long long long long line
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/html2text-2016.9.19/test/emphasis_preserved_whitespace.html 
new/html2text-2018.1.9/test/emphasis_preserved_whitespace.html
--- old/html2text-2016.9.19/test/emphasis_preserved_whitespace.html     
1970-01-01 01:00:00.000000000 +0100
+++ new/html2text-2018.1.9/test/emphasis_preserved_whitespace.html      
2017-10-04 08:29:40.000000000 +0200
@@ -0,0 +1,20 @@
+<p><em> emphasis </em></p>
+<p><em>emphasis: </em>some text</p>
+<p><em>repeat: </em>again</p>
+
+<p><b> bold </b></p>
+<p><b>bold: </b>some text</p>
+<p><b>repeat: </b>again</p>
+
+<p><strike> strike </strike></p>
+<p><strike>strike: </strike>some text</p>
+<p><strike>strike: </strike>again</p>
+
+<p>separate<em> emphasis</em> some more text</p>
+
+<!-- Various punctuation has no space  -->
+<p><em>emphasis</em>.</p>
+<p><em>emphasis</em>?</p>
+<p><em>emphasis</em>!</p>
+
+<p><em>em1</em><em>em2</em></p>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/html2text-2016.9.19/test/emphasis_preserved_whitespace.md 
new/html2text-2018.1.9/test/emphasis_preserved_whitespace.md
--- old/html2text-2016.9.19/test/emphasis_preserved_whitespace.md       
1970-01-01 01:00:00.000000000 +0100
+++ new/html2text-2018.1.9/test/emphasis_preserved_whitespace.md        
2017-10-04 08:29:40.000000000 +0200
@@ -0,0 +1,28 @@
+_emphasis_
+
+_emphasis:_ some text
+
+_repeat:_ again
+
+**bold**
+
+**bold:** some text
+
+**repeat:** again
+
+~~strike~~
+
+~~strike:~~ some text
+
+~~strike:~~ again
+
+separate _emphasis_ some more text
+
+_emphasis_.
+
+_emphasis_?
+
+_emphasis_!
+
+_em1_ _em2_
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/empty-link.html 
new/html2text-2018.1.9/test/empty-link.html
--- old/html2text-2016.9.19/test/empty-link.html        2016-05-29 
18:08:43.000000000 +0200
+++ new/html2text-2018.1.9/test/empty-link.html 2017-10-04 08:29:40.000000000 
+0200
@@ -1,6 +1,6 @@
 <h1>Processing empty hyperlinks</h1>
 
-<p>This test checks wheter empty hyperlinks still appear in the markdown 
result.</p>
+<p>This test checks whether empty hyperlinks still appear in the markdown 
result.</p>
 
 <a href="http://some.link";></a>
 <a href="http://some.link";><p></p></a>
\ No newline at end of file
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/empty-link.md 
new/html2text-2018.1.9/test/empty-link.md
--- old/html2text-2016.9.19/test/empty-link.md  2016-05-29 18:08:43.000000000 
+0200
+++ new/html2text-2018.1.9/test/empty-link.md   2017-10-04 08:29:40.000000000 
+0200
@@ -1,6 +1,6 @@
 # Processing empty hyperlinks
 
-This test checks wheter empty hyperlinks still appear in the markdown result.
+This test checks whether empty hyperlinks still appear in the markdown result.
 
 [](http://some.link)
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/empty-title-tag.html 
new/html2text-2018.1.9/test/empty-title-tag.html
--- old/html2text-2016.9.19/test/empty-title-tag.html   1970-01-01 
01:00:00.000000000 +0100
+++ new/html2text-2018.1.9/test/empty-title-tag.html    2017-10-04 
08:29:40.000000000 +0200
@@ -0,0 +1 @@
+<a href="test.html" title>This is an A tag with an empty title property</a>
\ No newline at end of file
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/empty-title-tag.md 
new/html2text-2018.1.9/test/empty-title-tag.md
--- old/html2text-2016.9.19/test/empty-title-tag.md     1970-01-01 
01:00:00.000000000 +0100
+++ new/html2text-2018.1.9/test/empty-title-tag.md      2017-10-04 
08:29:40.000000000 +0200
@@ -0,0 +1,2 @@
+[This is an A tag with an empty title property](test.html)
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/html2text-2016.9.19/test/google-like_font-properties.html 
new/html2text-2018.1.9/test/google-like_font-properties.html
--- old/html2text-2016.9.19/test/google-like_font-properties.html       
2016-09-18 23:51:18.000000000 +0200
+++ new/html2text-2018.1.9/test/google-like_font-properties.html        
2017-10-04 08:29:40.000000000 +0200
@@ -5,6 +5,12 @@
   <BODY>
     <p><span style="font-weight: bold">font-weight: bold</span></p>
     <P><SPAN STYLE="FONT-WEIGHT: BOLD">FONT-WEIGHT: BOLD</SPAN></P>
+    <P><SPAN STYLE="font-weight: 700">font-weight: 700</SPAN></P>
+    <P><SPAN STYLE="FONT-WEIGHT: 700">FONT-WEIGHT: 700</SPAN></P>
+    <P><SPAN STYLE="font-weight: 800">font-weight: 800</SPAN></P>
+    <P><SPAN STYLE="FONT-WEIGHT: 800">FONT-WEIGHT: 800</SPAN></P>
+    <P><SPAN STYLE="font-weight: 900">font-weight: 900</SPAN></P>
+    <P><SPAN STYLE="FONT-WEIGHT: 900">FONT-WEIGHT: 900</SPAN></P>
     <p><span style="font-style: italic">font-style: italic</span></p>
     <P><SPAN STYLE="FONT-STYLE: ITALIC">FONT-STYLE: ITALIC</SPAN></P>
     <p><span style="font-weight: bold;font-style: italic">
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/html2text-2016.9.19/test/google-like_font-properties.md 
new/html2text-2018.1.9/test/google-like_font-properties.md
--- old/html2text-2016.9.19/test/google-like_font-properties.md 2016-09-18 
23:51:18.000000000 +0200
+++ new/html2text-2018.1.9/test/google-like_font-properties.md  2017-10-04 
08:29:40.000000000 +0200
@@ -1,5 +1,11 @@
 **font-weight: bold**   
 **FONT-WEIGHT: BOLD**   
+**font-weight: 700**   
+**FONT-WEIGHT: 700**   
+**font-weight: 800**   
+**FONT-WEIGHT: 800**   
+**font-weight: 900**   
+**FONT-WEIGHT: 900**   
 _font-style: italic_   
 _FONT-STYLE: ITALIC_   
 _**font-weight: bold;font-style: italic**_   
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/html-escaping.html 
new/html2text-2018.1.9/test/html-escaping.html
--- old/html2text-2016.9.19/test/html-escaping.html     2016-05-29 
18:08:43.000000000 +0200
+++ new/html2text-2018.1.9/test/html-escaping.html      2017-10-04 
08:29:40.000000000 +0200
@@ -1,3 +1,3 @@
-<p>Escaped HTML like &lt;div&gt; or &amp; should remain escaped on output</p>
-<pre>...unless that escaped HTML is in a &lt;pre&gt; tag</pre>
+<p>Escaped HTML like &lt;div&gt; or &amp; should NOT remain escaped on 
output</p>
+<pre>...even when that escaped HTML is in a &lt;pre&gt; tag</pre>
 <code>...or a &lt;code&gt; tag</code>
\ No newline at end of file
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/html-escaping.md 
new/html2text-2018.1.9/test/html-escaping.md
--- old/html2text-2016.9.19/test/html-escaping.md       2016-05-29 
18:08:43.000000000 +0200
+++ new/html2text-2018.1.9/test/html-escaping.md        2017-10-04 
08:29:40.000000000 +0200
@@ -1,8 +1,8 @@
-Escaped HTML like &lt;div&gt; or &amp; should remain escaped on output
+Escaped HTML like <div> or & should NOT remain escaped on output
 
     
     
-    ...unless that escaped HTML is in a <pre> tag
+    ...even when that escaped HTML is in a <pre> tag
 
 `...or a <code> tag`
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/images_with_div_wrap.html 
new/html2text-2018.1.9/test/images_with_div_wrap.html
--- old/html2text-2016.9.19/test/images_with_div_wrap.html      1970-01-01 
01:00:00.000000000 +0100
+++ new/html2text-2018.1.9/test/images_with_div_wrap.html       2017-10-04 
08:29:40.000000000 +0200
@@ -0,0 +1 @@
+<a href="http://example.com";><div><img 
src="http://example.com/img.png"/></div></a>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/images_with_div_wrap.md 
new/html2text-2018.1.9/test/images_with_div_wrap.md
--- old/html2text-2016.9.19/test/images_with_div_wrap.md        1970-01-01 
01:00:00.000000000 +0100
+++ new/html2text-2018.1.9/test/images_with_div_wrap.md 2017-10-04 
08:29:40.000000000 +0200
@@ -0,0 +1,2 @@
+[![](http://example.com/img.png)](http://example.com)
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/kbd_tag.html 
new/html2text-2018.1.9/test/kbd_tag.html
--- old/html2text-2016.9.19/test/kbd_tag.html   1970-01-01 01:00:00.000000000 
+0100
+++ new/html2text-2018.1.9/test/kbd_tag.html    2018-01-09 05:43:43.000000000 
+0100
@@ -0,0 +1 @@
+Press <kbd>[CTRL]+c</kbd> to copy.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/kbd_tag.md 
new/html2text-2018.1.9/test/kbd_tag.md
--- old/html2text-2016.9.19/test/kbd_tag.md     1970-01-01 01:00:00.000000000 
+0100
+++ new/html2text-2018.1.9/test/kbd_tag.md      2018-01-09 05:43:43.000000000 
+0100
@@ -0,0 +1,2 @@
+Press `[CTRL]+c` to copy.
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/link_titles.md 
new/html2text-2018.1.9/test/link_titles.md
--- old/html2text-2016.9.19/test/link_titles.md 2016-05-29 18:08:43.000000000 
+0200
+++ new/html2text-2018.1.9/test/link_titles.md  2017-10-04 08:29:40.000000000 
+0200
@@ -1,3 +1,3 @@
-[ first example](http://example.com "MyTitle" )  
+[ first example](http://example.com "MyTitle")  
 [ second example](http://example.com)
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/list_tags_example.html 
new/html2text-2018.1.9/test/list_tags_example.html
--- old/html2text-2016.9.19/test/list_tags_example.html 2016-05-29 
18:08:43.000000000 +0200
+++ new/html2text-2018.1.9/test/list_tags_example.html  2017-10-04 
08:29:40.000000000 +0200
@@ -34,6 +34,6 @@
 </ol>
 
 <ul style="list-style-type:ordered;">
-<li>somthing else here</li>
+<li>something else here</li>
 <li>some item</li>
 </ul>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/list_tags_example.md 
new/html2text-2018.1.9/test/list_tags_example.md
--- old/html2text-2016.9.19/test/list_tags_example.md   2016-05-29 
18:08:48.000000000 +0200
+++ new/html2text-2018.1.9/test/list_tags_example.md    2017-10-04 
08:29:40.000000000 +0200
@@ -33,6 +33,6 @@
   2. some item
   3. some item
 
-  * somthing else here
+  * something else here
   * some item
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/pad_table.html 
new/html2text-2018.1.9/test/pad_table.html
--- old/html2text-2016.9.19/test/pad_table.html 2016-05-29 18:08:48.000000000 
+0200
+++ new/html2text-2018.1.9/test/pad_table.html  2017-10-04 08:29:40.000000000 
+0200
@@ -22,5 +22,30 @@
         <tr> <td>Content 1</td> <td>Content 2 longer</td> <td><img 
src="http://lorempixel.com/200/200"; alt="200"/> Image!</td> </tr>
     </table>
 
-something else entirely
+something else entirely<br>
+
+    <table>
+      <thead>
+        <tr><th>One</th><th>Two</th><th>Three</th></tr>
+      </thead>
+      <tbody>
+        <tr><td>A</td><td>B</td><th>C</th></tr>
+        <tr><td>A</td><td colspan="2">B+C</td></tr>
+        <tr><td colspan="2">A+B</td><td>C</td></tr>
+        <tr><td colspan="3">A+B+C</td></tr>
+      </tbody>
+    </table>
+
+    <table>
+      <thead>
+        <tr><th colspan="2">One+Two</th><th>Three</th></tr>
+      </thead>
+      <tbody>
+        <tr><td>A</td><td>B</td><th>C</th></tr>
+        <tr><td>A</td><td colspan="2">B+C</td></tr>
+        <tr><td colspan="2">A+B</td><td>C</td></tr>
+        <tr><td colspan="3">A+B+C</td></tr>
+      </tbody>
+    </table>
+
 </body> </html>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/pad_table.md 
new/html2text-2018.1.9/test/pad_table.md
--- old/html2text-2016.9.19/test/pad_table.md   2016-05-29 18:08:48.000000000 
+0200
+++ new/html2text-2018.1.9/test/pad_table.md    2017-10-04 08:29:40.000000000 
+0200
@@ -24,5 +24,19 @@
 Content 1 | Content 2        | ![200](http://lorempixel.com/200/200) Image! 
 Content 1 | Content 2 longer | ![200](http://lorempixel.com/200/200) Image! 
 
-something else entirely
+something else entirely  
+One   | Two | Three 
+------|-----|-------
+A     | B   | C     
+A     | B+C 
+A+B   | C   
+A+B+C 
+
+One+Two | Three 
+--------|-------
+A       | B     | C 
+A       | B+C   
+A+B     | C     
+A+B+C   
+
 
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/pre.html 
new/html2text-2018.1.9/test/pre.html
--- old/html2text-2016.9.19/test/pre.html       2016-05-29 18:08:43.000000000 
+0200
+++ new/html2text-2018.1.9/test/pre.html        2017-10-04 08:29:40.000000000 
+0200
@@ -1,6 +1,6 @@
 <html>
   <head>
-    <title>initial crowsed pre handling test #1</title>
+    <title>initial crowded pre handling test #1</title>
   </head>
   <body>
 <pre>a
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/q_tag.html 
new/html2text-2018.1.9/test/q_tag.html
--- old/html2text-2016.9.19/test/q_tag.html     1970-01-01 01:00:00.000000000 
+0100
+++ new/html2text-2018.1.9/test/q_tag.html      2018-01-09 05:43:43.000000000 
+0100
@@ -0,0 +1 @@
+<q>If this is a test,</q> he said, <q>then it should pass</q>.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/q_tag.md 
new/html2text-2018.1.9/test/q_tag.md
--- old/html2text-2016.9.19/test/q_tag.md       1970-01-01 01:00:00.000000000 
+0100
+++ new/html2text-2018.1.9/test/q_tag.md        2018-01-09 05:43:43.000000000 
+0100
@@ -0,0 +1,2 @@
+"If this is a test," he said, "then it should pass".
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/test_html2text.py 
new/html2text-2018.1.9/test/test_html2text.py
--- old/html2text-2016.9.19/test/test_html2text.py      2016-09-18 
23:51:18.000000000 +0200
+++ new/html2text-2018.1.9/test/test_html2text.py       2017-10-04 
08:29:40.000000000 +0200
@@ -1,5 +1,7 @@
 import codecs
 import glob
+import html2text
+import logging
 import os
 import re
 import subprocess
@@ -9,12 +11,20 @@
     import unittest2 as unittest
 else:
     import unittest
-import logging
+
 
 logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
                     level=logging.DEBUG)
 
-import html2text
+
+def cleanup_eol(clean_str):
+    if os.name == 'nt' or sys.platform == 'cygwin':
+        # Fix the unwanted CR to CRCRLF replacement
+        # during text pipelining on Windows/cygwin
+        # on cygwin, os.name == 'posix', not nt
+        clean_str = re.sub(r'\r+', '\r', clean_str)
+        clean_str = clean_str.replace('\r\n', '\n')
+    return clean_str
 
 
 def test_module(fn, google_doc=False, **kwargs):
@@ -31,9 +41,9 @@
         setattr(h, k, v)
 
     result = get_baseline(fn)
-    inf = open(fn)
-    actual = h.handle(inf.read())
-    inf.close()
+    with open(fn) as inf:
+        actual = cleanup_eol(inf.read())
+        actual = h.handle(actual)
     return result, actual
 
 
@@ -56,11 +66,7 @@
 
     actual = out.decode('utf8')
 
-    if os.name == 'nt':
-        # Fix the unwanted CR to CRCRLF replacement
-        # during text pipelining on Windows/cygwin
-        actual = re.sub(r'\r+', '\r', actual)
-        actual = actual.replace('\r\n', '\n')
+    actual = cleanup_eol(actual)
 
     return result, actual
 
@@ -82,30 +88,62 @@
 
 def get_baseline(fn):
     name = get_baseline_name(fn)
-    f = codecs.open(name, mode='r', encoding='utf8')
-    out = f.read()
-    f.close()
+    with codecs.open(name, mode='r', encoding='utf8') as f:
+        out = f.read()
+    out = cleanup_eol(out)
     return out
 
 
 class TestHTML2Text(unittest.TestCase):
-    pass
+
+    def test_html_escape(self):
+        self.assertEqual(
+            html2text.compat.html_escape('<pre>and then<div> & other tags'),
+            '&lt;pre&gt;and then&lt;div&gt; &amp; other tags'
+        )
+
+    def test_unescape(self):
+        self.assertEqual(
+            '<pre>and then<div> & other tags',
+            html2text.unescape(
+                '&lt;pre&gt;and then&lt;div&gt; &amp; other tags'
+            )
+        )
+
+    def _skip_certain_tags(self, h2t, tag, attrs, start):
+        if tag == 'b':
+            return True
+
+    def test_tag_callback(self):
+        h = html2text.HTML2Text()
+        h.tag_callback = self._skip_certain_tags
+        ret = h.handle(
+            'this is a <b>txt</b> and this is a'
+            ' <b class="skip">with text</b> and '
+            'some <i>italics</i> too.'
+        )
+        self.assertEqual(
+            ret,
+            'this is a txt and this is a'
+            ' with text and '
+            'some _italics_ too.\n\n'
+        )
 
 
 def generate_test(fn):
-    def test_mod(self):
+    def _test_mod(self):
         self.maxDiff = None
         result, actual = test_module(fn, **module_args)
         self.assertEqual(result, actual)
 
-    def test_cmd(self):
+    def _test_cmd(self):
         # Because there is no command-line option to control unicode_snob
         if 'unicode_snob' not in module_args:
             self.maxDiff = None
             result, actual = test_command(fn, *cmdline_args)
             self.assertEqual(result, actual)
 
-    def test_func(self):
+    def _test_func(self):
         result, actual = test_function(fn, **func_args)
         self.assertEqual(result, actual)
 
@@ -184,14 +222,19 @@
 
     if base_fn not in ['bodywidth_newline.html', 'abbr_tag.html']:
         test_func = None
+    else:
+        test_func = _test_func
 
     if base_fn == 'inplace_baseurl_substitution.html':
         module_args['baseurl'] = 'http://brettterpstra.com'
         module_args['body_width'] = 0
         # there is no way to specify baseurl in cli :(
         test_cmd = None
+    else:
+        test_cmd = _test_cmd
+
+    return _test_mod, test_cmd, test_func
 
-    return test_mod, test_cmd, test_func
 
 # Originally from http://stackoverflow.com/questions/32899/\
 #    how-to-generate-dynamic-parametrized-unit-tests-in-python
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2016.9.19/test/test_memleak.py 
new/html2text-2018.1.9/test/test_memleak.py
--- old/html2text-2016.9.19/test/test_memleak.py        2016-05-29 
18:08:43.000000000 +0200
+++ new/html2text-2018.1.9/test/test_memleak.py 2017-10-04 08:29:40.000000000 
+0200
@@ -1,14 +1,15 @@
+import html2text
+import logging
 import sys
 if sys.version_info[:2] < (2, 7):
     import unittest2 as unittest
 else:
     import unittest
-import logging
+
+
 logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
                     level=logging.DEBUG)
 
-import html2text
-
 
 class TestMemleak(unittest.TestCase):
     """

++++++ remove_unittest2.patch ++++++
--- a/setup.py
+++ b/setup.py
@@ -18,14 +18,7 @@ try:
 except ImportError:
     read_md = read_md_open
 
-requires_list = []
-try:
-    import unittest2 as unittest
-except ImportError:
-    import unittest
-else:
-    if sys.version_info <= (2, 6):
-        requires_list.append("unittest2")
+import unittest
 
 
 class RunTests(Command):
@@ -68,9 +61,6 @@ setup(
         'Operating System :: OS Independent',
         'Programming Language :: Python',
         'Programming Language :: Python :: 2',
-        'Programming Language :: Python :: 2.4',
-        'Programming Language :: Python :: 2.5',
-        'Programming Language :: Python :: 2.6',
         'Programming Language :: Python :: 2.7',
         'Programming Language :: Python :: 3',
         'Programming Language :: Python :: 3.0',
@@ -85,7 +75,6 @@ setup(
         html2text=html2text.cli:main
     """,
     license='GNU GPL 3',
-    requires=requires_list,
     packages=find_packages(exclude=['test']),
     include_package_data=True,
     zip_safe=False,
--- a/test/test_html2text.py
+++ b/test/test_html2text.py
@@ -7,10 +7,7 @@ import re
 import subprocess
 import sys
 
-if sys.version_info[:2] < (2, 7):
-    import unittest2 as unittest
-else:
-    import unittest
+import unittest
 
 
 logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',
--- a/test/test_memleak.py
+++ b/test/test_memleak.py
@@ -1,10 +1,7 @@
 import html2text
 import logging
 import sys
-if sys.version_info[:2] < (2, 7):
-    import unittest2 as unittest
-else:
-    import unittest
+import unittest
 
 
 logging.basicConfig(format='%(levelname)s:%(funcName)s:%(message)s',

commit python-html2text for openSUSE:Factory

Reply via email to