commit python3-html2text for openSUSE:Factory

h_root Sun, 08 Nov 2015 02:27:07 -0800

Hello community,

here is the log from the commit of package python3-html2text for 
openSUSE:Factory checked in at 2015-11-08 11:26:24
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Comparing /work/SRC/openSUSE:Factory/python3-html2text (Old)
 and      /work/SRC/openSUSE:Factory/.python3-html2text.new (New)
++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++


Package is "python3-html2text"

Changes:
--------
--- /work/SRC/openSUSE:Factory/python3-html2text/python3-html2text.changes      
2015-07-03 00:18:41.000000000 +0200
+++ /work/SRC/openSUSE:Factory/.python3-html2text.new/python3-html2text.changes 
2015-11-08 11:26:25.000000000 +0100
@@ -1,0 +2,11 @@
+Wed Nov  4 16:40:42 UTC 2015 - [email protected]
+
+- update to version 2015.11.4:
+  * Fix #38: Long links wrapping controlled by --no-wrap-links.
+  * Note: --no-wrap-links implies --reference-links
+  * Feature #83: Add callback-on-tag.
+  * Fix #87: Decode errors can be handled via command line.
+  * Feature #95: Docs, decode errors spelling mistake.
+  * Fix #84: Make bodywidth kwarg overridable using config.
+
+-------------------------------------------------------------------

Old:
----
  html2text-2015.6.21.tar.gz

New:
----
  html2text-2015.11.4.tar.gz

++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++

Other differences:
------------------
++++++ python3-html2text.spec ++++++
--- /var/tmp/diff_new_pack.BnLolk/_old  2015-11-08 11:26:26.000000000 +0100
+++ /var/tmp/diff_new_pack.BnLolk/_new  2015-11-08 11:26:26.000000000 +0100
@@ -17,7 +17,7 @@
 
 
 Name:           python3-html2text
-Version:        2015.6.21
+Version:        2015.11.4
 Release:        0
 Url:            https://github.com/Alir3z4/html2text/
 Summary:        Turn HTML into equivalent Markdown-structured text

++++++ html2text-2015.6.21.tar.gz -> html2text-2015.11.4.tar.gz ++++++
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2015.6.21/AUTHORS.rst 
new/html2text-2015.11.4/AUTHORS.rst
--- old/html2text-2015.6.21/AUTHORS.rst 2015-06-12 08:58:49.000000000 +0200
+++ new/html2text-2015.11.4/AUTHORS.rst 2015-11-04 15:32:38.000000000 +0100
@@ -17,6 +17,8 @@
 * Arjoonn Sharma <gh: theSage21>
 * Ali Mohammad <gh: alawibaba>
 * Albert Berger <gh: nbdsp>
+* Etienne Millon <[email protected]>
+* John C F <gh: critiqjo>
 
 
 Maintainer:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2015.6.21/ChangeLog.rst 
new/html2text-2015.11.4/ChangeLog.rst
--- old/html2text-2015.6.21/ChangeLog.rst       2015-06-21 16:36:23.000000000 
+0200
+++ new/html2text-2015.11.4/ChangeLog.rst       2015-11-04 15:48:46.000000000 
+0100
@@ -1,3 +1,15 @@
+2015.11.4
+=========
+----
+
+* Fix #38: Long links wrapping controlled by `--no-wrap-links`.
+* Note: `--no-wrap-links` implies `--reference-links`
+* Feature #83: Add callback-on-tag.
+* Fix #87: Decode errors can be handled via command line.
+* Feature #95: Docs, decode errors spelling mistake.
+* Fix #84: Make bodywidth kwarg overridable using config.
+
+
 2015.6.21
 =========
 ----
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2015.6.21/PKG-INFO 
new/html2text-2015.11.4/PKG-INFO
--- old/html2text-2015.6.21/PKG-INFO    2015-06-21 16:43:23.000000000 +0200
+++ new/html2text-2015.11.4/PKG-INFO    2015-11-04 16:23:02.000000000 +0100
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: html2text
-Version: 2015.6.21
+Version: 2015.11.4
 Summary: Turn HTML into equivalent Markdown-structured text.
 Home-page: https://github.com/Alir3z4/html2text/
 Author: Alireza Savand
@@ -23,3 +23,4 @@
 Classifier: Programming Language :: Python :: 3.1
 Classifier: Programming Language :: Python :: 3.2
 Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2015.6.21/README.md 
new/html2text-2015.11.4/README.md
--- old/html2text-2015.6.21/README.md   2015-06-12 08:58:23.000000000 +0200
+++ new/html2text-2015.11.4/README.md   2015-11-04 15:32:38.000000000 +0100
@@ -15,27 +15,16 @@
 
 Usage: `html2text [(filename|url) [encoding]]`
 
-
 | Option                                                 | Description
 
|--------------------------------------------------------|---------------------------------------------------
 | `--version`                                            | Show program's 
version number and exit
 | `-h`, `--help`                                         | Show this help 
message and exit
 | `--ignore-links`                                       | Don't include any 
formatting for links
-|`--protect-links`                                       | Protect links from 
line breaks surrounding them "+" with angle brackets
-|`--ignore-images`                                       | Don't include any 
formatting for images
-|`--images-to-alt`                                       | Discard image data, 
only keep alt text
-|`--images-with-size`                                    | Write image tags 
with height and width attrs as raw html to retain dimensions
-|`-g`, `--google-doc`                                    | Convert an 
html-exported Google Document
-|`-d`, `--dash-unordered-list`                           | Use a dash rather 
than a star for unordered list items
-|`-b` `BODY_WIDTH`, `--body-width`=`BODY_WIDTH`          | Number of 
characters per output line, `0` for no wrap
-|`-i` `LIST_INDENT`, `--google-list-indent`=`LIST_INDENT`| Number of pixels 
Google indents nested lists
-|`-s`, `--hide-strikethrough`                            | Hide strike-through 
text. only relevent when `-g` is specified as well
 |`--escape-all`                                          | Escape all special 
characters.  Output is less readable, but avoids corner case formatting issues.
-| `--bypass-tables`                                      | Format tables in 
HTML rather than Markdown syntax.
-| `--single-line-break`                                  | Use a single line 
break after a block element rather than two.
 | `--reference-links`                                    | Use reference links 
instead of links to create markdown
 | `--mark-code`                                          | Mark preformatted 
and code blocks with [code]...[/code]
 
+For a complete list of options see the [docs](docs/usage.md)
 
 
 Or you can use it from within `Python`:
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2015.6.21/html2text/__init__.py 
new/html2text-2015.11.4/html2text/__init__.py
--- old/html2text-2015.6.21/html2text/__init__.py       2015-06-21 
16:36:01.000000000 +0200
+++ new/html2text-2015.11.4/html2text/__init__.py       2015-11-04 
15:48:14.000000000 +0100
@@ -29,7 +29,7 @@
     skipwrap
 )
 
-__version__ = (2015, 6, 21)
+__version__ = (2015, 11, 4)
 
 
 # TODO:
@@ -72,6 +72,10 @@
         self.use_automatic_links = config.USE_AUTOMATIC_LINKS  # covered in cli
         self.hide_strikethrough = False  # covered in cli
         self.mark_code = config.MARK_CODE
+        self.single_line_break = config.SINGLE_LINE_BREAK
+        self.use_automatic_links = config.USE_AUTOMATIC_LINKS
+        self.wrap_links = config.WRAP_LINKS  # covered in cli
+        self.tag_callback = None
 
         if out is None:  # pragma: no cover
             self.out = self.outtextf
@@ -278,6 +282,10 @@
         else:
             attrs = dict(attrs)
 
+        if self.tag_callback is not None:
+            if self.tag_callback(self, tag, attrs, start) is True:
+                return
+
         # first thing inside the anchor tag is another tag that produces some 
output
         if (start and not self.maybe_automatic_link is None
                 and tag not in ['p', 'div', 'style', 'dl', 'dt']
@@ -794,9 +802,14 @@
         assert wrap, "Requires Python 2.3."
         result = ''
         newlines = 0
+        # I cannot think of a better solution for now.
+        # To avoid the non-wrap behaviour for entire paras
+        # because of the presence of a link in it
+        if not self.wrap_links:
+            self.inline_links = False
         for para in text.split("\n"):
             if len(para) > 0:
-                if not skipwrap(para):
+                if not skipwrap(para, self.wrap_links):
                     result += "\n".join(wrap(para, self.body_width))
                     if para.endswith('  '):
                         result += "  \n"
@@ -819,7 +832,9 @@
         return result
 
 
-def html2text(html, baseurl='', bodywidth=config.BODY_WIDTH):
+def html2text(html, baseurl='', bodywidth=None):
+    if bodywidth is None:
+        bodywidth = config.BODY_WIDTH
     h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth)
 
     return h.handle(html)
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2015.6.21/html2text/cli.py 
new/html2text-2015.11.4/html2text/cli.py
--- old/html2text-2015.6.21/html2text/cli.py    2015-06-21 16:21:11.000000000 
+0200
+++ new/html2text-2015.11.4/html2text/cli.py    2015-11-04 15:32:38.000000000 
+0100
@@ -8,11 +8,28 @@
 def main():
     baseurl = ''
 
+    class bcolors:  # pragma: no cover
+        HEADER = '\033[95m'
+        OKBLUE = '\033[94m'
+        OKGREEN = '\033[92m'
+        WARNING = '\033[93m'
+        FAIL = '\033[91m'
+        ENDC = '\033[0m'
+        BOLD = '\033[1m'
+        UNDERLINE = '\033[4m'
+
     p = optparse.OptionParser(
         '%prog [(filename|url) [encoding]]',
         version='%prog ' + ".".join(map(str, __version__))
     )
     p.add_option(
+        "--no-wrap-links",
+        dest="wrap_links",
+        action="store_false",
+        default=config.WRAP_LINKS,
+        help="wrap links during conversion"
+    )
+    p.add_option(
         "--ignore-emphasis",
         dest="ignore_emphasis",
         action="store_true",
@@ -165,7 +182,15 @@
         dest="mark_code",
         default=config.MARK_CODE,
         help="Mark program code blocks with [code]...[/code]"
-    )    
+    )
+    p.add_option(
+        "--decode-errors",
+        dest="decode_errors",
+        action="store",
+        type="string",
+        default=config.DECODE_ERRORS,
+        help="What to do in case of decode errors.'ignore', 'strict' and 
'replace' are acceptable values"
+    )
     (options, args) = p.parse_args()
 
     # process input
@@ -201,7 +226,18 @@
         data = wrap_read()
 
     if hasattr(data, 'decode'):
-        data = data.decode(encoding)
+        try:
+            try:
+                data = data.decode(encoding, errors=options.decode_errors)
+            except TypeError:
+                # python 2.6.x does not have the errors option
+                data = data.decode(encoding)
+        except UnicodeDecodeError as err:
+            warning = bcolors.WARNING + "Warning:" + bcolors.ENDC
+            warning += ' Use the ' + bcolors.OKGREEN
+            warning += '--decode-errors=ignore' + bcolors.ENDC + 'flag.'
+            print(warning)
+            raise err
 
     h = HTML2Text(baseurl=baseurl)
     # handle options
@@ -230,5 +266,6 @@
     h.skip_internal_links = options.skip_internal_links
     h.links_each_paragraph = options.links_each_paragraph
     h.mark_code = options.mark_code
+    h.wrap_links = options.wrap_links
 
     wrapwrite(h.handle(data))
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2015.6.21/html2text/config.py 
new/html2text-2015.11.4/html2text/config.py
--- old/html2text-2015.6.21/html2text/config.py 2015-06-12 08:58:23.000000000 
+0200
+++ new/html2text-2015.11.4/html2text/config.py 2015-11-04 15:32:38.000000000 
+0100
@@ -23,6 +23,8 @@
 # Protect links from line breaks surrounding them with angle brackets (in
 # addition to their square brackets)
 PROTECT_LINKS = False
+# WRAP_LINKS = True
+WRAP_LINKS = True
 
 # Number of pixels Google indents nested lists
 GOOGLE_LIST_INDENT = 36
@@ -33,6 +35,7 @@
 IMAGES_WITH_SIZE = False
 IGNORE_EMPHASIS = False
 MARK_CODE = False
+DECODE_ERRORS = 'strict'
 
 # Convert links with same href and text to <href> format if they are absolute 
links
 USE_AUTOMATIC_LINKS = True
@@ -45,6 +48,7 @@
 RE_UNORDERED_LIST_MATCHER = re.compile(r'[-\*\+]\s')
 RE_MD_CHARS_MATCHER = re.compile(r"([\\\[\]\(\)])")
 RE_MD_CHARS_MATCHER_ALL = re.compile(r"([`\*_{}\[\]\(\)#!])")
+RE_LINK = re.compile(r"(\[.*?\] ?\(.*?\))|(\[.*?\]:.*?)")  # to find links in 
the text
 RE_MD_DOT_MATCHER = re.compile(r"""
     ^             # start of line
     (\s*\d+)      # optional whitespace and a number
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2015.6.21/html2text/utils.py 
new/html2text-2015.11.4/html2text/utils.py
--- old/html2text-2015.6.21/html2text/utils.py  2015-06-21 16:31:21.000000000 
+0200
+++ new/html2text-2015.11.4/html2text/utils.py  2015-11-04 15:32:38.000000000 
+0100
@@ -172,7 +172,11 @@
     return 0
 
 
-def skipwrap(para):
+def skipwrap(para, wrap_links):
+    # If it appears to contain a link
+    # don't wrap
+    if (len(config.RE_LINK.findall(para)) > 0) and not wrap_links:
+        return True
     # If the text begins with four spaces or one tab, it's a code block;
     # don't wrap
     if para[0:4] == '    ' or para[0] == '\t':
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2015.6.21/html2text.egg-info/PKG-INFO 
new/html2text-2015.11.4/html2text.egg-info/PKG-INFO
--- old/html2text-2015.6.21/html2text.egg-info/PKG-INFO 2015-06-21 
16:43:23.000000000 +0200
+++ new/html2text-2015.11.4/html2text.egg-info/PKG-INFO 2015-11-04 
16:23:02.000000000 +0100
@@ -1,6 +1,6 @@
 Metadata-Version: 1.1
 Name: html2text
-Version: 2015.6.21
+Version: 2015.11.4
 Summary: Turn HTML into equivalent Markdown-structured text.
 Home-page: https://github.com/Alir3z4/html2text/
 Author: Alireza Savand
@@ -23,3 +23,4 @@
 Classifier: Programming Language :: Python :: 3.1
 Classifier: Programming Language :: Python :: 3.2
 Classifier: Programming Language :: Python :: 3.3
+Classifier: Programming Language :: Python :: 3.4
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2015.6.21/html2text.egg-info/SOURCES.txt 
new/html2text-2015.11.4/html2text.egg-info/SOURCES.txt
--- old/html2text-2015.6.21/html2text.egg-info/SOURCES.txt      2015-06-21 
16:43:23.000000000 +0200
+++ new/html2text-2015.11.4/html2text.egg-info/SOURCES.txt      2015-11-04 
16:23:02.000000000 +0100
@@ -83,6 +83,10 @@
 test/no_inline_links_images_to_alt.md
 test/no_inline_links_nested.html
 test/no_inline_links_nested.md
+test/no_wrap_links.html
+test/no_wrap_links.md
+test/no_wrap_links_no_inline_links.html
+test/no_wrap_links_no_inline_links.md
 test/normal.html
 test/normal.md
 test/normal_escape_snob.html
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2015.6.21/setup.py 
new/html2text-2015.11.4/setup.py
--- old/html2text-2015.6.21/setup.py    2015-06-05 09:12:13.000000000 +0200
+++ new/html2text-2015.11.4/setup.py    2015-11-04 15:58:13.000000000 +0100
@@ -58,7 +58,8 @@
         'Programming Language :: Python :: 3.0',
         'Programming Language :: Python :: 3.1',
         'Programming Language :: Python :: 3.2',
-        'Programming Language :: Python :: 3.3'
+        'Programming Language :: Python :: 3.3',
+        'Programming Language :: Python :: 3.4',
     ],
     entry_points="""
         [console_scripts]
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2015.6.21/test/no_wrap_links.html 
new/html2text-2015.11.4/test/no_wrap_links.html
--- old/html2text-2015.6.21/test/no_wrap_links.html     1970-01-01 
01:00:00.000000000 +0100
+++ new/html2text-2015.11.4/test/no_wrap_links.html     2015-11-04 
15:32:38.000000000 +0100
@@ -0,0 +1 @@
+And <a 
href="http://bugs.debian.org/cgi-bin/pkgreport.cgi?tag=multiarch;[email protected]";>here</a>
 is a long link I had at hand.</p>
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2015.6.21/test/no_wrap_links.md 
new/html2text-2015.11.4/test/no_wrap_links.md
--- old/html2text-2015.6.21/test/no_wrap_links.md       1970-01-01 
01:00:00.000000000 +0100
+++ new/html2text-2015.11.4/test/no_wrap_links.md       2015-11-04 
15:32:38.000000000 +0100
@@ -0,0 +1,2 @@
+And 
[here](http://bugs.debian.org/cgi-bin/pkgreport.cgi?tag=multiarch;[email protected])
 is a long link I had at hand.
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/html2text-2015.6.21/test/no_wrap_links_no_inline_links.html 
new/html2text-2015.11.4/test/no_wrap_links_no_inline_links.html
--- old/html2text-2015.6.21/test/no_wrap_links_no_inline_links.html     
1970-01-01 01:00:00.000000000 +0100
+++ new/html2text-2015.11.4/test/no_wrap_links_no_inline_links.html     
2015-11-04 15:32:38.000000000 +0100
@@ -0,0 +1 @@
+And <a 
href="http://bugs.debian.org/cgi-bin/pkgreport.cgi?tag=multiarch;[email protected]";>here</a>
 is a long link I had at hand.
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' 
old/html2text-2015.6.21/test/no_wrap_links_no_inline_links.md 
new/html2text-2015.11.4/test/no_wrap_links_no_inline_links.md
--- old/html2text-2015.6.21/test/no_wrap_links_no_inline_links.md       
1970-01-01 01:00:00.000000000 +0100
+++ new/html2text-2015.11.4/test/no_wrap_links_no_inline_links.md       
2015-11-04 15:32:38.000000000 +0100
@@ -0,0 +1,2 @@
+And 
[here](http://bugs.debian.org/cgi-bin/pkgreport.cgi?tag=multiarch;[email protected])
 is a long link I had at hand. 
+
diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' 
'--exclude=.svnignore' old/html2text-2015.6.21/test/test_html2text.py 
new/html2text-2015.11.4/test/test_html2text.py
--- old/html2text-2015.6.21/test/test_html2text.py      2015-06-12 
08:58:23.000000000 +0200
+++ new/html2text-2015.11.4/test/test_html2text.py      2015-11-04 
15:32:38.000000000 +0100
@@ -65,6 +65,13 @@
     return result, actual
 
 
+def test_function(fn, **kwargs):
+    with open(fn) as inf:
+        actual = html2text.html2text(inf.read(), **kwargs)
+    result = get_baseline(fn)
+    return result, actual
+
+
 def get_dump_name(fn, suffix):
     return '%s-%s_output.md' % (os.path.splitext(fn)[0], suffix)
 
@@ -98,8 +105,13 @@
             result, actual = test_command(fn, *cmdline_args)
             self.assertEqual(result, actual)
 
+    def test_func(self):
+        result, actual = test_function(fn, **func_args)
+        self.assertEqual(result, actual)
+
     module_args = {}
     cmdline_args = []
+    func_args = {}
     base_fn = os.path.basename(fn).lower()
 
     if base_fn.startswith('google'):
@@ -126,6 +138,7 @@
         # module_args['unicode_snob'] = True
         module_args['body_width'] = 0
         cmdline_args.append('--body-width=0')
+        func_args['bodywidth'] = 0
 
     if base_fn.startswith('protect_links'):
         module_args['protect_links'] = True
@@ -148,22 +161,31 @@
     if base_fn.startswith('no_inline_links'):
         module_args['inline_links'] = False
         cmdline_args.append('--reference-links')
+    
+    if base_fn.startswith('no_wrap_links'):
+        module_args['wrap_links'] = False
+        cmdline_args.append('--no-wrap-links')
 
     if base_fn.startswith('mark_code'):
         module_args['mark_code'] = True
         cmdline_args.append('--mark-code')
-        
-    return test_mod, test_cmd
+
+    if base_fn not in ['bodywidth_newline.html', 'abbr_tag.html']:
+        test_func = None
+
+    return test_mod, test_cmd, test_func
 
 # Originally from http://stackoverflow.com/questions/32899/\
 #    how-to-generate-dynamic-parametrized-unit-tests-in-python
 test_dir_name = os.path.dirname(os.path.realpath(__file__))
 for fn in glob.glob("%s/*.html" % test_dir_name):
     test_name = 'test_%s' % os.path.splitext(os.path.basename(fn))[0].lower()
-    test_m, test_c = generate_test(fn)
+    test_m, test_c, test_func = generate_test(fn)
     setattr(TestHTML2Text, test_name + "_mod", test_m)
     if test_c:
         setattr(TestHTML2Text, test_name + "_cmd", test_c)
+    if test_func:
+        setattr(TestHTML2Text, test_name + "_func", test_func)
 
 if __name__ == "__main__":
     unittest.main()

commit python3-html2text for openSUSE:Factory

Reply via email to