Hello community, here is the log from the commit of package python3-html2text for openSUSE:Factory checked in at 2015-11-08 11:26:24 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/python3-html2text (Old) and /work/SRC/openSUSE:Factory/.python3-html2text.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "python3-html2text" Changes: -------- --- /work/SRC/openSUSE:Factory/python3-html2text/python3-html2text.changes 2015-07-03 00:18:41.000000000 +0200 +++ /work/SRC/openSUSE:Factory/.python3-html2text.new/python3-html2text.changes 2015-11-08 11:26:25.000000000 +0100 @@ -1,0 +2,11 @@ +Wed Nov 4 16:40:42 UTC 2015 - [email protected] + +- update to version 2015.11.4: + * Fix #38: Long links wrapping controlled by --no-wrap-links. + * Note: --no-wrap-links implies --reference-links + * Feature #83: Add callback-on-tag. + * Fix #87: Decode errors can be handled via command line. + * Feature #95: Docs, decode errors spelling mistake. + * Fix #84: Make bodywidth kwarg overridable using config. + +------------------------------------------------------------------- Old: ---- html2text-2015.6.21.tar.gz New: ---- html2text-2015.11.4.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ python3-html2text.spec ++++++ --- /var/tmp/diff_new_pack.BnLolk/_old 2015-11-08 11:26:26.000000000 +0100 +++ /var/tmp/diff_new_pack.BnLolk/_new 2015-11-08 11:26:26.000000000 +0100 @@ -17,7 +17,7 @@ Name: python3-html2text -Version: 2015.6.21 +Version: 2015.11.4 Release: 0 Url: https://github.com/Alir3z4/html2text/ Summary: Turn HTML into equivalent Markdown-structured text ++++++ html2text-2015.6.21.tar.gz -> html2text-2015.11.4.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/AUTHORS.rst new/html2text-2015.11.4/AUTHORS.rst --- old/html2text-2015.6.21/AUTHORS.rst 2015-06-12 08:58:49.000000000 +0200 +++ new/html2text-2015.11.4/AUTHORS.rst 2015-11-04 15:32:38.000000000 +0100 @@ -17,6 +17,8 @@ * Arjoonn Sharma <gh: theSage21> * Ali Mohammad <gh: alawibaba> * Albert Berger <gh: nbdsp> +* Etienne Millon <[email protected]> +* John C F <gh: critiqjo> Maintainer: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/ChangeLog.rst new/html2text-2015.11.4/ChangeLog.rst --- old/html2text-2015.6.21/ChangeLog.rst 2015-06-21 16:36:23.000000000 +0200 +++ new/html2text-2015.11.4/ChangeLog.rst 2015-11-04 15:48:46.000000000 +0100 @@ -1,3 +1,15 @@ +2015.11.4 +========= +---- + +* Fix #38: Long links wrapping controlled by `--no-wrap-links`. +* Note: `--no-wrap-links` implies `--reference-links` +* Feature #83: Add callback-on-tag. +* Fix #87: Decode errors can be handled via command line. +* Feature #95: Docs, decode errors spelling mistake. +* Fix #84: Make bodywidth kwarg overridable using config. + + 2015.6.21 ========= ---- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/PKG-INFO new/html2text-2015.11.4/PKG-INFO --- old/html2text-2015.6.21/PKG-INFO 2015-06-21 16:43:23.000000000 +0200 +++ new/html2text-2015.11.4/PKG-INFO 2015-11-04 16:23:02.000000000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: html2text -Version: 2015.6.21 +Version: 2015.11.4 Summary: Turn HTML into equivalent Markdown-structured text. Home-page: https://github.com/Alir3z4/html2text/ Author: Alireza Savand @@ -23,3 +23,4 @@ Classifier: Programming Language :: Python :: 3.1 Classifier: Programming Language :: Python :: 3.2 Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/README.md new/html2text-2015.11.4/README.md --- old/html2text-2015.6.21/README.md 2015-06-12 08:58:23.000000000 +0200 +++ new/html2text-2015.11.4/README.md 2015-11-04 15:32:38.000000000 +0100 @@ -15,27 +15,16 @@ Usage: `html2text [(filename|url) [encoding]]` - | Option | Description |--------------------------------------------------------|--------------------------------------------------- | `--version` | Show program's version number and exit | `-h`, `--help` | Show this help message and exit | `--ignore-links` | Don't include any formatting for links -|`--protect-links` | Protect links from line breaks surrounding them "+" with angle brackets -|`--ignore-images` | Don't include any formatting for images -|`--images-to-alt` | Discard image data, only keep alt text -|`--images-with-size` | Write image tags with height and width attrs as raw html to retain dimensions -|`-g`, `--google-doc` | Convert an html-exported Google Document -|`-d`, `--dash-unordered-list` | Use a dash rather than a star for unordered list items -|`-b` `BODY_WIDTH`, `--body-width`=`BODY_WIDTH` | Number of characters per output line, `0` for no wrap -|`-i` `LIST_INDENT`, `--google-list-indent`=`LIST_INDENT`| Number of pixels Google indents nested lists -|`-s`, `--hide-strikethrough` | Hide strike-through text. only relevent when `-g` is specified as well |`--escape-all` | Escape all special characters. Output is less readable, but avoids corner case formatting issues. -| `--bypass-tables` | Format tables in HTML rather than Markdown syntax. -| `--single-line-break` | Use a single line break after a block element rather than two. | `--reference-links` | Use reference links instead of links to create markdown | `--mark-code` | Mark preformatted and code blocks with [code]...[/code] +For a complete list of options see the [docs](docs/usage.md) Or you can use it from within `Python`: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/html2text/__init__.py new/html2text-2015.11.4/html2text/__init__.py --- old/html2text-2015.6.21/html2text/__init__.py 2015-06-21 16:36:01.000000000 +0200 +++ new/html2text-2015.11.4/html2text/__init__.py 2015-11-04 15:48:14.000000000 +0100 @@ -29,7 +29,7 @@ skipwrap ) -__version__ = (2015, 6, 21) +__version__ = (2015, 11, 4) # TODO: @@ -72,6 +72,10 @@ self.use_automatic_links = config.USE_AUTOMATIC_LINKS # covered in cli self.hide_strikethrough = False # covered in cli self.mark_code = config.MARK_CODE + self.single_line_break = config.SINGLE_LINE_BREAK + self.use_automatic_links = config.USE_AUTOMATIC_LINKS + self.wrap_links = config.WRAP_LINKS # covered in cli + self.tag_callback = None if out is None: # pragma: no cover self.out = self.outtextf @@ -278,6 +282,10 @@ else: attrs = dict(attrs) + if self.tag_callback is not None: + if self.tag_callback(self, tag, attrs, start) is True: + return + # first thing inside the anchor tag is another tag that produces some output if (start and not self.maybe_automatic_link is None and tag not in ['p', 'div', 'style', 'dl', 'dt'] @@ -794,9 +802,14 @@ assert wrap, "Requires Python 2.3." result = '' newlines = 0 + # I cannot think of a better solution for now. + # To avoid the non-wrap behaviour for entire paras + # because of the presence of a link in it + if not self.wrap_links: + self.inline_links = False for para in text.split("\n"): if len(para) > 0: - if not skipwrap(para): + if not skipwrap(para, self.wrap_links): result += "\n".join(wrap(para, self.body_width)) if para.endswith(' '): result += " \n" @@ -819,7 +832,9 @@ return result -def html2text(html, baseurl='', bodywidth=config.BODY_WIDTH): +def html2text(html, baseurl='', bodywidth=None): + if bodywidth is None: + bodywidth = config.BODY_WIDTH h = HTML2Text(baseurl=baseurl, bodywidth=bodywidth) return h.handle(html) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/html2text/cli.py new/html2text-2015.11.4/html2text/cli.py --- old/html2text-2015.6.21/html2text/cli.py 2015-06-21 16:21:11.000000000 +0200 +++ new/html2text-2015.11.4/html2text/cli.py 2015-11-04 15:32:38.000000000 +0100 @@ -8,11 +8,28 @@ def main(): baseurl = '' + class bcolors: # pragma: no cover + HEADER = '\033[95m' + OKBLUE = '\033[94m' + OKGREEN = '\033[92m' + WARNING = '\033[93m' + FAIL = '\033[91m' + ENDC = '\033[0m' + BOLD = '\033[1m' + UNDERLINE = '\033[4m' + p = optparse.OptionParser( '%prog [(filename|url) [encoding]]', version='%prog ' + ".".join(map(str, __version__)) ) p.add_option( + "--no-wrap-links", + dest="wrap_links", + action="store_false", + default=config.WRAP_LINKS, + help="wrap links during conversion" + ) + p.add_option( "--ignore-emphasis", dest="ignore_emphasis", action="store_true", @@ -165,7 +182,15 @@ dest="mark_code", default=config.MARK_CODE, help="Mark program code blocks with [code]...[/code]" - ) + ) + p.add_option( + "--decode-errors", + dest="decode_errors", + action="store", + type="string", + default=config.DECODE_ERRORS, + help="What to do in case of decode errors.'ignore', 'strict' and 'replace' are acceptable values" + ) (options, args) = p.parse_args() # process input @@ -201,7 +226,18 @@ data = wrap_read() if hasattr(data, 'decode'): - data = data.decode(encoding) + try: + try: + data = data.decode(encoding, errors=options.decode_errors) + except TypeError: + # python 2.6.x does not have the errors option + data = data.decode(encoding) + except UnicodeDecodeError as err: + warning = bcolors.WARNING + "Warning:" + bcolors.ENDC + warning += ' Use the ' + bcolors.OKGREEN + warning += '--decode-errors=ignore' + bcolors.ENDC + 'flag.' + print(warning) + raise err h = HTML2Text(baseurl=baseurl) # handle options @@ -230,5 +266,6 @@ h.skip_internal_links = options.skip_internal_links h.links_each_paragraph = options.links_each_paragraph h.mark_code = options.mark_code + h.wrap_links = options.wrap_links wrapwrite(h.handle(data)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/html2text/config.py new/html2text-2015.11.4/html2text/config.py --- old/html2text-2015.6.21/html2text/config.py 2015-06-12 08:58:23.000000000 +0200 +++ new/html2text-2015.11.4/html2text/config.py 2015-11-04 15:32:38.000000000 +0100 @@ -23,6 +23,8 @@ # Protect links from line breaks surrounding them with angle brackets (in # addition to their square brackets) PROTECT_LINKS = False +# WRAP_LINKS = True +WRAP_LINKS = True # Number of pixels Google indents nested lists GOOGLE_LIST_INDENT = 36 @@ -33,6 +35,7 @@ IMAGES_WITH_SIZE = False IGNORE_EMPHASIS = False MARK_CODE = False +DECODE_ERRORS = 'strict' # Convert links with same href and text to <href> format if they are absolute links USE_AUTOMATIC_LINKS = True @@ -45,6 +48,7 @@ RE_UNORDERED_LIST_MATCHER = re.compile(r'[-\*\+]\s') RE_MD_CHARS_MATCHER = re.compile(r"([\\\[\]\(\)])") RE_MD_CHARS_MATCHER_ALL = re.compile(r"([`\*_{}\[\]\(\)#!])") +RE_LINK = re.compile(r"(\[.*?\] ?\(.*?\))|(\[.*?\]:.*?)") # to find links in the text RE_MD_DOT_MATCHER = re.compile(r""" ^ # start of line (\s*\d+) # optional whitespace and a number diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/html2text/utils.py new/html2text-2015.11.4/html2text/utils.py --- old/html2text-2015.6.21/html2text/utils.py 2015-06-21 16:31:21.000000000 +0200 +++ new/html2text-2015.11.4/html2text/utils.py 2015-11-04 15:32:38.000000000 +0100 @@ -172,7 +172,11 @@ return 0 -def skipwrap(para): +def skipwrap(para, wrap_links): + # If it appears to contain a link + # don't wrap + if (len(config.RE_LINK.findall(para)) > 0) and not wrap_links: + return True # If the text begins with four spaces or one tab, it's a code block; # don't wrap if para[0:4] == ' ' or para[0] == '\t': diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/html2text.egg-info/PKG-INFO new/html2text-2015.11.4/html2text.egg-info/PKG-INFO --- old/html2text-2015.6.21/html2text.egg-info/PKG-INFO 2015-06-21 16:43:23.000000000 +0200 +++ new/html2text-2015.11.4/html2text.egg-info/PKG-INFO 2015-11-04 16:23:02.000000000 +0100 @@ -1,6 +1,6 @@ Metadata-Version: 1.1 Name: html2text -Version: 2015.6.21 +Version: 2015.11.4 Summary: Turn HTML into equivalent Markdown-structured text. Home-page: https://github.com/Alir3z4/html2text/ Author: Alireza Savand @@ -23,3 +23,4 @@ Classifier: Programming Language :: Python :: 3.1 Classifier: Programming Language :: Python :: 3.2 Classifier: Programming Language :: Python :: 3.3 +Classifier: Programming Language :: Python :: 3.4 diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/html2text.egg-info/SOURCES.txt new/html2text-2015.11.4/html2text.egg-info/SOURCES.txt --- old/html2text-2015.6.21/html2text.egg-info/SOURCES.txt 2015-06-21 16:43:23.000000000 +0200 +++ new/html2text-2015.11.4/html2text.egg-info/SOURCES.txt 2015-11-04 16:23:02.000000000 +0100 @@ -83,6 +83,10 @@ test/no_inline_links_images_to_alt.md test/no_inline_links_nested.html test/no_inline_links_nested.md +test/no_wrap_links.html +test/no_wrap_links.md +test/no_wrap_links_no_inline_links.html +test/no_wrap_links_no_inline_links.md test/normal.html test/normal.md test/normal_escape_snob.html diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/setup.py new/html2text-2015.11.4/setup.py --- old/html2text-2015.6.21/setup.py 2015-06-05 09:12:13.000000000 +0200 +++ new/html2text-2015.11.4/setup.py 2015-11-04 15:58:13.000000000 +0100 @@ -58,7 +58,8 @@ 'Programming Language :: Python :: 3.0', 'Programming Language :: Python :: 3.1', 'Programming Language :: Python :: 3.2', - 'Programming Language :: Python :: 3.3' + 'Programming Language :: Python :: 3.3', + 'Programming Language :: Python :: 3.4', ], entry_points=""" [console_scripts] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/test/no_wrap_links.html new/html2text-2015.11.4/test/no_wrap_links.html --- old/html2text-2015.6.21/test/no_wrap_links.html 1970-01-01 01:00:00.000000000 +0100 +++ new/html2text-2015.11.4/test/no_wrap_links.html 2015-11-04 15:32:38.000000000 +0100 @@ -0,0 +1 @@ +And <a href="http://bugs.debian.org/cgi-bin/pkgreport.cgi?tag=multiarch;[email protected]">here</a> is a long link I had at hand.</p> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/test/no_wrap_links.md new/html2text-2015.11.4/test/no_wrap_links.md --- old/html2text-2015.6.21/test/no_wrap_links.md 1970-01-01 01:00:00.000000000 +0100 +++ new/html2text-2015.11.4/test/no_wrap_links.md 2015-11-04 15:32:38.000000000 +0100 @@ -0,0 +1,2 @@ +And [here](http://bugs.debian.org/cgi-bin/pkgreport.cgi?tag=multiarch;[email protected]) is a long link I had at hand. + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/test/no_wrap_links_no_inline_links.html new/html2text-2015.11.4/test/no_wrap_links_no_inline_links.html --- old/html2text-2015.6.21/test/no_wrap_links_no_inline_links.html 1970-01-01 01:00:00.000000000 +0100 +++ new/html2text-2015.11.4/test/no_wrap_links_no_inline_links.html 2015-11-04 15:32:38.000000000 +0100 @@ -0,0 +1 @@ +And <a href="http://bugs.debian.org/cgi-bin/pkgreport.cgi?tag=multiarch;[email protected]">here</a> is a long link I had at hand. diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/test/no_wrap_links_no_inline_links.md new/html2text-2015.11.4/test/no_wrap_links_no_inline_links.md --- old/html2text-2015.6.21/test/no_wrap_links_no_inline_links.md 1970-01-01 01:00:00.000000000 +0100 +++ new/html2text-2015.11.4/test/no_wrap_links_no_inline_links.md 2015-11-04 15:32:38.000000000 +0100 @@ -0,0 +1,2 @@ +And [here](http://bugs.debian.org/cgi-bin/pkgreport.cgi?tag=multiarch;[email protected]) is a long link I had at hand. + diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/html2text-2015.6.21/test/test_html2text.py new/html2text-2015.11.4/test/test_html2text.py --- old/html2text-2015.6.21/test/test_html2text.py 2015-06-12 08:58:23.000000000 +0200 +++ new/html2text-2015.11.4/test/test_html2text.py 2015-11-04 15:32:38.000000000 +0100 @@ -65,6 +65,13 @@ return result, actual +def test_function(fn, **kwargs): + with open(fn) as inf: + actual = html2text.html2text(inf.read(), **kwargs) + result = get_baseline(fn) + return result, actual + + def get_dump_name(fn, suffix): return '%s-%s_output.md' % (os.path.splitext(fn)[0], suffix) @@ -98,8 +105,13 @@ result, actual = test_command(fn, *cmdline_args) self.assertEqual(result, actual) + def test_func(self): + result, actual = test_function(fn, **func_args) + self.assertEqual(result, actual) + module_args = {} cmdline_args = [] + func_args = {} base_fn = os.path.basename(fn).lower() if base_fn.startswith('google'): @@ -126,6 +138,7 @@ # module_args['unicode_snob'] = True module_args['body_width'] = 0 cmdline_args.append('--body-width=0') + func_args['bodywidth'] = 0 if base_fn.startswith('protect_links'): module_args['protect_links'] = True @@ -148,22 +161,31 @@ if base_fn.startswith('no_inline_links'): module_args['inline_links'] = False cmdline_args.append('--reference-links') + + if base_fn.startswith('no_wrap_links'): + module_args['wrap_links'] = False + cmdline_args.append('--no-wrap-links') if base_fn.startswith('mark_code'): module_args['mark_code'] = True cmdline_args.append('--mark-code') - - return test_mod, test_cmd + + if base_fn not in ['bodywidth_newline.html', 'abbr_tag.html']: + test_func = None + + return test_mod, test_cmd, test_func # Originally from http://stackoverflow.com/questions/32899/\ # how-to-generate-dynamic-parametrized-unit-tests-in-python test_dir_name = os.path.dirname(os.path.realpath(__file__)) for fn in glob.glob("%s/*.html" % test_dir_name): test_name = 'test_%s' % os.path.splitext(os.path.basename(fn))[0].lower() - test_m, test_c = generate_test(fn) + test_m, test_c, test_func = generate_test(fn) setattr(TestHTML2Text, test_name + "_mod", test_m) if test_c: setattr(TestHTML2Text, test_name + "_cmd", test_c) + if test_func: + setattr(TestHTML2Text, test_name + "_func", test_func) if __name__ == "__main__": unittest.main()
