Hello community, here is the log from the commit of package urlwatch for openSUSE:Factory checked in at 2018-02-02 22:21:48 ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Comparing /work/SRC/openSUSE:Factory/urlwatch (Old) and /work/SRC/openSUSE:Factory/.urlwatch.new (New) ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++
Package is "urlwatch" Fri Feb 2 22:21:48 2018 rev:7 rq:571819 version:2.8 Changes: -------- --- /work/SRC/openSUSE:Factory/urlwatch/urlwatch.changes 2017-01-15 11:21:32.608953448 +0100 +++ /work/SRC/openSUSE:Factory/.urlwatch.new/urlwatch.changes 2018-02-02 22:21:49.299165623 +0100 @@ -1,0 +2,9 @@ +Wed Jan 31 08:58:58 UTC 2018 - [email protected] + +- Update to 2.8, please see + + /usr/share/doc/packages/urlwatch/ChangeLog + +- Added python3-appdirs as dependency + +------------------------------------------------------------------- Old: ---- urlwatch-2.6.tar.gz New: ---- urlwatch-2.8.tar.gz ++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++++ Other differences: ------------------ ++++++ urlwatch.spec ++++++ --- /var/tmp/diff_new_pack.DW3PiV/_old 2018-02-02 22:21:50.067129769 +0100 +++ /var/tmp/diff_new_pack.DW3PiV/_new 2018-02-02 22:21:50.071129583 +0100 @@ -1,7 +1,7 @@ # # spec file for package urlwatch # -# Copyright (c) 2017 SUSE LINUX GmbH, Nuernberg, Germany. +# Copyright (c) 2018 SUSE LINUX GmbH, Nuernberg, Germany. # # All modifications and additions to the file contributed by third parties # remain the property of their copyright owners, unless otherwise agreed @@ -17,18 +17,19 @@ Name: urlwatch -Version: 2.6 +Version: 2.8 Release: 0 Summary: A tool for monitoring webpages for updates License: BSD-3-Clause Group: Productivity/Networking/Web/Utilities Url: https://thp.io/2008/urlwatch/ -Source0: https://thp.io/2008/%{name}/%{name}-%{version}.tar.gz +Source0: https://github.com/thp/%{name}/archive/%{version}.tar.gz#/%{name}-%{version}.tar.gz BuildRequires: python-futures BuildRequires: python3-devel BuildRequires: python3-setuptools Requires: python-keyring Requires: python3-PyYAML +Requires: python3-appdirs Requires: python3-minidb Requires: python3-requests BuildArch: noarch ++++++ urlwatch-2.6.tar.gz -> urlwatch-2.8.tar.gz ++++++ diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/.gitignore new/urlwatch-2.8/.gitignore --- old/urlwatch-2.6/.gitignore 1970-01-01 01:00:00.000000000 +0100 +++ new/urlwatch-2.8/.gitignore 2018-01-28 20:48:05.000000000 +0100 @@ -0,0 +1,3 @@ +__pycache__ +.idea +build \ No newline at end of file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/.travis.yml new/urlwatch-2.8/.travis.yml --- old/urlwatch-2.6/.travis.yml 1970-01-01 01:00:00.000000000 +0100 +++ new/urlwatch-2.8/.travis.yml 2018-01-28 20:48:05.000000000 +0100 @@ -0,0 +1,9 @@ +language: python +python: + - "3.4" + - "3.5" + - "3.6" + - "nightly" +install: + - pip install pyyaml minidb requests keyring pycodestyle appdirs +script: nosetests -v diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/COPYING new/urlwatch-2.8/COPYING --- old/urlwatch-2.6/COPYING 2016-01-20 09:44:10.000000000 +0100 +++ new/urlwatch-2.8/COPYING 2018-01-28 20:48:05.000000000 +0100 @@ -1,4 +1,4 @@ -Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +Copyright (c) 2008-2018 Thomas Perl <[email protected]> All rights reserved. Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/ChangeLog new/urlwatch-2.8/ChangeLog --- old/urlwatch-2.6/ChangeLog 2016-12-04 11:28:00.000000000 +0100 +++ new/urlwatch-2.8/ChangeLog 2018-01-28 20:48:05.000000000 +0100 @@ -203,3 +203,19 @@ * Issue #108: Fix creation of example files on first startup * Issue #118: Fix match filters for missing keys * Small fixes by: Jakub Wilk, Marc Urben, Adam Dobrawy and Louis Sautier + +2017-11-08 Thomas Perl <thp.io/about> + * Issue #127: Fix error reporting + * ElementsByAttribute: look for matching tag in handle_endtag (by Gaetan Leurent) + * Paths: Add XDG_CONFIG_DIR support (by Jelle van der Waa) + * E-Mail: Fix encodings (by Seokjin Han), Allow 'user' parameter for SMTP (by Jay Sitter) + * HTTP: Option to avoid 304 responses, Content-Type header (by Vinicius Massuchetto) + * html2text: Configuration options (by Vinicius Massuchetto) + * Filtering: style (by gvandenbroucke), tag (by cmichi) + * New reporter: Telegram support (by gvandenbroucke) + +2018-01-28 Thomas Perl <[email protected]> + * Documentation: Mention appdirs (by e-dschungel) + * SMTP: Fix handling of missing user field (by e-dschungel) + * Manpage: Fix documentation of XDG environment variables (by Jelle van der Waa) + * Unit tests: Fix imports for out-of-source-tree tests (by Maxime Werlen) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/PKG-INFO new/urlwatch-2.8/PKG-INFO --- old/urlwatch-2.6/PKG-INFO 2016-12-04 11:31:11.000000000 +0100 +++ new/urlwatch-2.8/PKG-INFO 1970-01-01 01:00:00.000000000 +0100 @@ -1,14 +0,0 @@ -Metadata-Version: 1.1 -Name: urlwatch -Version: 2.6 -Summary: A tool for monitoring webpages for updates -Home-page: http://thp.io/2008/urlwatch/ -Author: Thomas Perl -Author-email: [email protected] -License: BSD -Download-URL: http://thp.io/2008/urlwatch/urlwatch-2.6.tar.gz -Description: urlwatch is intended to help you watch changes in webpages and get notified - (via email, in your terminal or with a custom-written reporter class) of any - changes. The change notification will include the URL that has changed and - a unified diff of what has changed. -Platform: UNKNOWN diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/README.md new/urlwatch-2.8/README.md --- old/urlwatch-2.6/README.md 2016-10-31 10:54:09.000000000 +0100 +++ new/urlwatch-2.8/README.md 2018-01-28 20:48:05.000000000 +0100 @@ -25,12 +25,13 @@ * [minidb](https://thp.io/2010/minidb/) * [requests](http://python-requests.org/) * [keyring](https://github.com/jaraco/keyring/) + * [appdirs](https://github.com/ActiveState/appdirs) * [chump](https://github.com/karanlyons/chump/) (for Pushover support) * [pushbullet.py](https://github.com/randomchars/pushbullet.py) (for Pushbullet support) The dependencies can be installed with (add `--user` to install to `$HOME`): -`python3 -m pip install pyyaml minidb requests keyring` +`python3 -m pip install pyyaml minidb requests keyring appdirs` For optional pushover support the chump package is required: @@ -108,6 +109,12 @@ your urls.yaml page without requiring a custom hook where previously you would have needed to write custom filtering code in Python. +If you want to extract only the body tag you can use this filer: +```yaml +url: http://thp.io/2008/urlwatch/ +filter: element-by-tag:body +``` + PUSHOVER -------- @@ -126,6 +133,19 @@ You'll need to add to the config your Pushbullet Access Token, which you can generate at https://www.pushbullet.com/#settings +TELEGRAM +-------- + +Telegram notifications are configured using the Telegram Bot API. +For this, you'll need a Bot API token and a chat id (see https://core.telegram.org/bots). +Sample configuration: +```yaml +telegram: + bot_token: '999999999:3tOhy2CuZE0pTaCtszRfKpnagOG8IQbP5gf' # your bot api token + chat_id: '88888888' # the chat id where the messages should be sent + enabled: true +``` + CONTACT ------- diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch/__init__.py new/urlwatch-2.8/lib/urlwatch/__init__.py --- old/urlwatch-2.6/lib/urlwatch/__init__.py 2016-12-04 11:18:51.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch/__init__.py 2018-01-28 20:48:05.000000000 +0100 @@ -8,9 +8,9 @@ pkgname = 'urlwatch' -__copyright__ = 'Copyright 2008-2016 Thomas Perl' +__copyright__ = 'Copyright 2008-2018 Thomas Perl' __author__ = 'Thomas Perl <[email protected]>' __license__ = 'BSD' __url__ = 'http://thp.io/2008/urlwatch/' -__version__ = '2.6' +__version__ = '2.8' __user_agent__ = '%s/%s (+http://thp.io/2008/urlwatch/info.html)' % (pkgname, __version__) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch/command.py new/urlwatch-2.8/lib/urlwatch/command.py --- old/urlwatch-2.6/lib/urlwatch/command.py 2016-10-31 10:54:09.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch/command.py 2018-01-28 20:48:05.000000000 +0100 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch/config.py new/urlwatch-2.8/lib/urlwatch/config.py --- old/urlwatch-2.6/lib/urlwatch/config.py 2016-10-31 11:17:13.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch/config.py 2018-01-28 20:48:05.000000000 +0100 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch/filters.py new/urlwatch-2.8/lib/urlwatch/filters.py --- old/urlwatch-2.6/lib/urlwatch/filters.py 2016-12-04 11:17:07.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch/filters.py 2018-01-28 20:48:05.000000000 +0100 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -36,6 +36,8 @@ import html.parser import hashlib +from enum import Enum + from .util import TrackSubClasses logger = logging.getLogger(__name__) @@ -78,6 +80,7 @@ @classmethod def process(cls, filter_kind, subfilter, state, data): + logger.info('Applying filter %r, subfilter %r to %s', filter_kind, subfilter, state.job.get_location()) filtercls = cls.__subclasses__.get(filter_kind, None) if filtercls is None: raise ValueError('Unknown filter kind: %s:%s' % (filter_kind, subfilter)) @@ -155,11 +158,18 @@ __kind__ = 'html2text' def filter(self, data, subfilter=None): - if subfilter is None: - subfilter = 're' + if subfilter is None: + method = 're' + options = {} + elif isinstance(subfilter, dict): + method = subfilter.pop('method') + options = subfilter + elif isinstance(subfilter, str): + method = subfilter + options = {} from .html2txt import html2text - return html2text(data, method=subfilter) + return html2text(data, method=method, options=options) class Ical2TextFilter(FilterBase): @@ -209,14 +219,24 @@ return data.strip() -class ElementsByAttribute(html.parser.HTMLParser): - def __init__(self, name, value): +class FilterBy(Enum): + ATTRIBUTE = 1 + TAG = 2 + + +class ElementsBy(html.parser.HTMLParser): + def __init__(self, filter_by, name, value=None): super().__init__() - self._attributes = {name: value} + self._filter_by = filter_by + if self._filter_by == FilterBy.ATTRIBUTE: + self._attributes = {name: value} + else: + self._name = name + self._result = [] self._inside = False - self._depth = 0 + self._elts = [] def get_html(self): return ''.join(self._result) @@ -224,19 +244,24 @@ def handle_starttag(self, tag, attrs): ad = dict(attrs) - if all(ad.get(k, None) == v for k, v in self._attributes.items()): + if self._filter_by == FilterBy.ATTRIBUTE and all(ad.get(k, None) == v for k, v in self._attributes.items()): + self._inside = True + elif self._filter_by == FilterBy.TAG and tag == self._name: self._inside = True if self._inside: self._result.append('<%s%s%s>' % (tag, ' ' if attrs else '', ' '.join('%s="%s"' % (k, v) for k, v in attrs))) - self._depth += 1 + self._elts.append(tag) def handle_endtag(self, tag): if self._inside: self._result.append('</%s>' % (tag,)) - self._depth -= 1 - if self._depth == 0: + if tag in self._elts: + t = self._elts.pop() + while t != tag and self._elts: + t = self._elts.pop() + if not self._elts: self._inside = False def handle_data(self, data): @@ -253,7 +278,7 @@ if subfilter is None: raise ValueError('Need an element ID for filtering') - element_by_id = ElementsByAttribute('id', subfilter) + element_by_id = ElementsBy(FilterBy.ATTRIBUTE, 'id', subfilter) element_by_id.feed(data) return element_by_id.get_html() @@ -267,11 +292,39 @@ if subfilter is None: raise ValueError('Need an element class for filtering') - element_by_class = ElementsByAttribute('class', subfilter) + element_by_class = ElementsBy(FilterBy.ATTRIBUTE, 'class', subfilter) element_by_class.feed(data) return element_by_class.get_html() +class GetElementByStyle(FilterBase): + """Get all HTML elements by style""" + + __kind__ = 'element-by-style' + + def filter(self, data, subfilter=None): + if subfilter is None: + raise ValueError('Need an element style for filtering') + + element_by_style = ElementsBy(FilterBy.ATTRIBUTE, 'style', subfilter) + element_by_style.feed(data) + return element_by_style.get_html() + + +class GetElementByTag(FilterBase): + """Get an HTML element by its tag""" + + __kind__ = 'element-by-tag' + + def filter(self, data, subfilter=None): + if subfilter is None: + raise ValueError('Need a tag for filtering') + + element_by_tag = ElementsBy(FilterBy.TAG, subfilter) + element_by_tag.feed(data) + return element_by_tag.get_html() + + class Sha1Filter(FilterBase): """Calculate the SHA-1 checksum of the content""" diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch/handler.py new/urlwatch-2.8/lib/urlwatch/handler.py --- old/urlwatch-2.6/lib/urlwatch/handler.py 2016-10-31 10:54:09.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch/handler.py 2018-01-28 20:48:05.000000000 +0100 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -67,17 +67,20 @@ # Apply any specified filters filter_list = self.job.filter - if filter_list is not None: - for filter_kind in filter_list.split(','): - if ':' in filter_kind: - filter_kind, subfilter = filter_kind.split(':', 2) - else: - subfilter = None - - logger.info('Applying filter %r, subfilter %r to %s', - filter_kind, subfilter, self.job.get_location()) - data = FilterBase.process(filter_kind, subfilter, self, data) + if filter_list is not None: + if isinstance(filter_list, list): + for item in filter_list: + key = next(iter(item)) + filter_kind, subfilter = key, item[key] + data = FilterBase.process(filter_kind, subfilter, self, data) + elif isinstance(filter_list, str): + for filter_kind in filter_list.split(','): + if ':' in filter_kind: + filter_kind, subfilter = filter_kind.split(':', 1) + else: + subfilter = None + data = FilterBase.process(filter_kind, subfilter, self, data) self.new_data = data except Exception as e: self.exception = e diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch/html2txt.py new/urlwatch-2.8/lib/urlwatch/html2txt.py --- old/urlwatch-2.6/lib/urlwatch/html2txt.py 2016-10-15 20:45:09.000000000 +0200 +++ new/urlwatch-2.8/lib/urlwatch/html2txt.py 2018-01-28 20:48:05.000000000 +0100 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -36,7 +36,7 @@ logger = logging.getLogger(__name__) -def html2text(data, method='lynx'): +def html2text(data, method='lynx', options=None): """ Convert a string consisting of HTML to plain text @@ -44,11 +44,19 @@ Method may be one of: 'lynx' (default) - Use "lynx -dump" for conversion + options: see "lynx -help" output for options that work with "-dump" 'html2text' - Use "html2text -nobs" for conversion + options: https://linux.die.net/man/1/html2text 'bs4' - Use Beautiful Soup library to prettify the HTML + options: "parser" only, bs4 supports "lxml", "html5lib", and "html.parser" + http://beautiful-soup-4.readthedocs.io/en/latest/#specifying-the-parser-to-use 're' - A simple regex-based HTML tag stripper - 'pyhtml2text' - Use Python module "html2text", keeps link targets + 'pyhtml2text' - Use Python module "html2text" + options: https://github.com/Alir3z4/html2text/blob/master/docs/usage.md#available-options """ + if options is None: + options = {} + if method == 're': stripped_tags = re.sub(r'<[^>]*>', '', data) d = '\n'.join((l.rstrip() for l in stripped_tags.splitlines() if l.strip() != '')) @@ -56,25 +64,31 @@ if method == 'pyhtml2text': import html2text - pyhtml2text = html2text.HTML2Text() - d = pyhtml2text.handle(data) + parser = html2text.HTML2Text() + for k, v in options.items(): + setattr(parser, k.lower(), v) + d = parser.handle(data) return d if method == 'bs4': from bs4 import BeautifulSoup - soup = BeautifulSoup(data, 'html.parser') + parser = options.pop('parser', 'html.parser') + soup = BeautifulSoup(data, parser) d = soup.prettify() return d if method == 'lynx': - cmd = ['lynx', '-nonumbers', '-dump', '-stdin', '-assume_charset=UTF-8', '-display_charset=UTF-8'] - stdout_encoding = 'utf-8' + cmd = ['lynx', '-nonumbers', '-dump', '-stdin', '-assume_charset UTF-8', '-display_charset UTF-8'] elif method == 'html2text': cmd = ['html2text', '-nobs', '-utf8'] - stdout_encoding = 'utf-8' else: raise ValueError('Unknown html2text method: %r' % (method,)) + stdout_encoding = 'utf-8' + + for k, v in options.items(): + cmd.append('-%s %s' % (k, v) if v is True else '-%s' % k) + logger.debug('Command: %r, stdout encoding: %s', cmd, stdout_encoding) env = {} diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch/ical2txt.py new/urlwatch-2.8/lib/urlwatch/ical2txt.py --- old/urlwatch-2.6/lib/urlwatch/ical2txt.py 2016-01-20 09:44:10.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch/ical2txt.py 2018-01-28 20:48:05.000000000 +0100 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -36,7 +36,7 @@ else: try: parsedCal = vobject.readOne(ical_string) - except: + except Exception as e: parsedCal = vobject.readOne(ical_string.decode('utf-8', 'ignore')) for event in parsedCal.getChildren(): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch/jobs.py new/urlwatch-2.8/lib/urlwatch/jobs.py --- old/urlwatch-2.6/lib/urlwatch/jobs.py 2016-10-31 11:17:13.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch/jobs.py 2018-01-28 20:48:05.000000000 +0100 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -179,7 +179,7 @@ __kind__ = 'url' __required__ = ('url',) - __optional__ = ('cookies', 'data', 'method', 'ssl_no_verify', 'http_proxy', 'https_proxy') + __optional__ = ('cookies', 'data', 'method', 'ssl_no_verify', 'ignore_cached', 'http_proxy', 'https_proxy') CHARSET_RE = re.compile('text/(html|plain); charset=([^;]*)') @@ -199,10 +199,16 @@ if job_state.timestamp is not None: headers['If-Modified-Since'] = email.utils.formatdate(job_state.timestamp) + if self.ignore_cached: + headers['If-Modified-Since'] = email.utils.formatdate(0) + headers['Cache-Control'] = 'max-age=172800' + headers['Expires'] = email.utils.formatdate() + if self.method is None: self.method = "GET" if self.data is not None: self.method = "POST" + headers['Content-type'] = 'application/x-www-form-urlencoded' logger.info('Sending POST request to %s', self.url) if self.http_proxy is not None: diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch/mailer.py new/urlwatch-2.8/lib/urlwatch/mailer.py --- old/urlwatch-2.6/lib/urlwatch/mailer.py 2016-10-31 10:54:09.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch/mailer.py 2018-01-28 20:48:05.000000000 +0100 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -50,7 +50,7 @@ raise NotImplementedError def msg_plain(self, from_email, to_email, subject, body): - msg = email.mime.text.MIMEText(body, 'plain', 'utf_8') + msg = email.mime.text.MIMEText(body, 'plain', 'utf-8') msg['Subject'] = subject msg['From'] = from_email msg['To'] = to_email @@ -65,15 +65,16 @@ msg['To'] = to_email msg['Date'] = email.utils.formatdate() - msg.attach(email.mime.text.MIMEText(body_text, 'plain', 'utf_8')) - msg.attach(email.mime.text.MIMEText(body_html, 'html', 'utf_8')) + msg.attach(email.mime.text.MIMEText(body_text, 'plain', 'utf-8')) + msg.attach(email.mime.text.MIMEText(body_html, 'html', 'utf-8')) return msg class SMTPMailer(Mailer): - def __init__(self, smtp_server, smtp_port, tls, auth): + def __init__(self, smtp_user, smtp_server, smtp_port, tls, auth): self.smtp_server = smtp_server + self.smtp_user = smtp_user self.smtp_port = smtp_port self.tls = tls self.auth = auth @@ -86,10 +87,10 @@ s.starttls() if self.auth and keyring is not None: - passwd = keyring.get_password(self.smtp_server, msg['From']) + passwd = keyring.get_password(self.smtp_server, self.smtp_user) if passwd is None: - raise ValueError('No password available in keyring for {}, {}'.format(self.smtp_server, msg['From'])) - s.login(msg['From'], passwd) + raise ValueError('No password available in keyring for {}, {}'.format(self.smtp_server, self.smtp_user)) + s.login(self.smtp_user, passwd) s.sendmail(msg['From'], [msg['To']], msg.as_string()) s.quit() diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch/main.py new/urlwatch-2.8/lib/urlwatch/main.py --- old/urlwatch-2.6/lib/urlwatch/main.py 2016-10-31 11:17:13.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch/main.py 2018-01-28 20:48:05.000000000 +0100 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch/migration.py new/urlwatch-2.8/lib/urlwatch/migration.py --- old/urlwatch-2.6/lib/urlwatch/migration.py 2016-10-31 10:54:09.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch/migration.py 2018-01-28 20:48:05.000000000 +0100 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch/reporters.py new/urlwatch-2.8/lib/urlwatch/reporters.py --- old/urlwatch-2.6/lib/urlwatch/reporters.py 2016-11-26 13:10:44.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch/reporters.py 2018-01-28 20:48:05.000000000 +0100 @@ -1,6 +1,6 @@ # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -206,7 +206,7 @@ '</pre>', )) else: - raise ValueError('Diff style not supported: %r', cfg['diff']) + raise ValueError('Diff style not supported: %r' % (difftype,)) class TextReporter(ReporterBase): @@ -343,7 +343,8 @@ logger.debug('Not sending e-mail (no changes)') return if self.config['method'] == "smtp": - mailer = SMTPMailer(self.config['smtp']['host'], self.config['smtp']['port'], + smtp_user = self.config['smtp'].get('user', self.config['from']) + mailer = SMTPMailer(smtp_user, self.config['smtp']['host'], self.config['smtp']['port'], self.config['smtp']['starttls'], self.config['smtp']['keyring']) elif self.config['method'] == "sendmail": mailer = SendmailMailer(self.config['sendmail']['path']) @@ -383,7 +384,7 @@ try: service = self.web_service_get() - except: + except Exception as e: logger.error('Failed to load or connect to %s - are the dependencies installed and configured?', self.__kind__, exc_info=True) return @@ -466,3 +467,49 @@ result.content)) return result + + +class TelegramReporter(TextReporter): + """Custom Telegram reporter""" + MAX_LENGTH = 4096 + + __kind__ = 'telegram' + + def submit(self): + + bot_token = self.config['bot_token'] + chat_id = self.config['chat_id'] + + text = '\n'.join(super().submit()) + + if not text: + logger.debug('Not calling telegram API (no changes)') + return + + result = None + + for chunk in self.chunkstring(text, self.MAX_LENGTH): + result = self.submitToTelegram(bot_token, chat_id, chunk) + + return result + + def submitToTelegram(self, bot_token, chat_id, text): + logger.debug("Sending telegram request to chat id:'{0}'".format(chat_id)) + result = requests.post( + "https://api.telegram.org/bot{0}/sendMessage".format(bot_token), + data={"chat_id": chat_id, "text": text, "disable_web_page_preview": "true"}) + try: + json_res = result.json() + + if (result.status_code == 200): + logger.info("Telegram response: ok '{0}'. {1}".format(json_res['ok'], json_res['result'])) + else: + logger.error("Telegram error: {0}".format(json_res['description'])) + except ValueError: + logger.error( + "Failed to parse telegram response. HTTP status code: {0}, content: {1}".format(result.status_code, + result.content)) + return result + + def chunkstring(self, string, length): + return (string[0 + i:length + i] for i in range(0, len(string), length)) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch/storage.py new/urlwatch-2.8/lib/urlwatch/storage.py --- old/urlwatch-2.6/lib/urlwatch/storage.py 2016-11-26 13:10:44.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch/storage.py 2018-01-28 20:48:05.000000000 +0100 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -37,6 +37,7 @@ import shutil import subprocess +import shlex import yaml import json import minidb @@ -179,7 +180,9 @@ while True: try: - subprocess.check_call([editor, file_edit]) + editor = shlex.split(editor) + editor.append(file_edit) + subprocess.check_call(editor) # Check if we can still parse it if self.parse is not None: self.parse(file_edit) diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch/util.py new/urlwatch-2.8/lib/urlwatch/util.py --- old/urlwatch-2.6/lib/urlwatch/util.py 2016-10-31 10:54:09.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch/util.py 2018-01-28 20:48:05.000000000 +0100 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch/worker.py new/urlwatch-2.8/lib/urlwatch/worker.py --- old/urlwatch-2.6/lib/urlwatch/worker.py 2016-10-31 10:54:09.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch/worker.py 2018-01-28 20:48:05.000000000 +0100 @@ -1,7 +1,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch.egg-info/PKG-INFO new/urlwatch-2.8/lib/urlwatch.egg-info/PKG-INFO --- old/urlwatch-2.6/lib/urlwatch.egg-info/PKG-INFO 2016-12-04 11:31:11.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch.egg-info/PKG-INFO 1970-01-01 01:00:00.000000000 +0100 @@ -1,14 +0,0 @@ -Metadata-Version: 1.1 -Name: urlwatch -Version: 2.6 -Summary: A tool for monitoring webpages for updates -Home-page: http://thp.io/2008/urlwatch/ -Author: Thomas Perl -Author-email: [email protected] -License: BSD -Download-URL: http://thp.io/2008/urlwatch/urlwatch-2.6.tar.gz -Description: urlwatch is intended to help you watch changes in webpages and get notified - (via email, in your terminal or with a custom-written reporter class) of any - changes. The change notification will include the URL that has changed and - a unified diff of what has changed. -Platform: UNKNOWN diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch.egg-info/SOURCES.txt new/urlwatch-2.8/lib/urlwatch.egg-info/SOURCES.txt --- old/urlwatch-2.6/lib/urlwatch.egg-info/SOURCES.txt 2016-12-04 11:31:11.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch.egg-info/SOURCES.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1,36 +0,0 @@ -COPYING -ChangeLog -MANIFEST.in -README.md -setup.cfg -setup.py -urlwatch -lib/urlwatch/__init__.py -lib/urlwatch/command.py -lib/urlwatch/config.py -lib/urlwatch/filters.py -lib/urlwatch/handler.py -lib/urlwatch/html2txt.py -lib/urlwatch/ical2txt.py -lib/urlwatch/jobs.py -lib/urlwatch/mailer.py -lib/urlwatch/main.py -lib/urlwatch/migration.py -lib/urlwatch/reporters.py -lib/urlwatch/storage.py -lib/urlwatch/util.py -lib/urlwatch/worker.py -lib/urlwatch.egg-info/PKG-INFO -lib/urlwatch.egg-info/SOURCES.txt -lib/urlwatch.egg-info/dependency_links.txt -lib/urlwatch.egg-info/requires.txt -lib/urlwatch.egg-info/top_level.txt -share/man/man1/urlwatch.1 -share/urlwatch/examples/hooks.py.example -share/urlwatch/examples/urls.yaml.example -test/test_filters.py -test/test_handler.py -test/data/urls.json -test/data/urls.txt -test/data/urlwatch.json -test/data/urlwatch.yaml \ No newline at end of file diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch.egg-info/dependency_links.txt new/urlwatch-2.8/lib/urlwatch.egg-info/dependency_links.txt --- old/urlwatch-2.6/lib/urlwatch.egg-info/dependency_links.txt 2016-12-04 11:31:11.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch.egg-info/dependency_links.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch.egg-info/requires.txt new/urlwatch-2.8/lib/urlwatch.egg-info/requires.txt --- old/urlwatch-2.6/lib/urlwatch.egg-info/requires.txt 2016-12-04 11:31:11.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch.egg-info/requires.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1,5 +0,0 @@ -minidb -PyYAML -requests -keyring -pycodestyle diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/lib/urlwatch.egg-info/top_level.txt new/urlwatch-2.8/lib/urlwatch.egg-info/top_level.txt --- old/urlwatch-2.6/lib/urlwatch.egg-info/top_level.txt 2016-12-04 11:31:11.000000000 +0100 +++ new/urlwatch-2.8/lib/urlwatch.egg-info/top_level.txt 1970-01-01 01:00:00.000000000 +0100 @@ -1 +0,0 @@ -urlwatch diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/setup.cfg new/urlwatch-2.8/setup.cfg --- old/urlwatch-2.6/setup.cfg 2016-12-04 11:31:11.000000000 +0100 +++ new/urlwatch-2.8/setup.cfg 2018-01-28 20:48:05.000000000 +0100 @@ -1,8 +1,2 @@ [pep8] max-line-length = 120 - -[egg_info] -tag_build = -tag_date = 0 -tag_svn_revision = 0 - diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/setup.py new/urlwatch-2.8/setup.py --- old/urlwatch-2.6/setup.py 2016-10-31 10:54:09.000000000 +0100 +++ new/urlwatch-2.8/setup.py 2018-01-28 20:48:05.000000000 +0100 @@ -13,7 +13,7 @@ m['author'], m['author_email'] = re.match(r'(.*) <(.*)>', m['author']).groups() m['description'], m['long_description'] = docs[0].strip().split('\n\n', 1) m['download_url'] = '{url}urlwatch-{version}.tar.gz'.format(**m) -m['install_requires'] = ['minidb', 'PyYAML', 'requests', 'keyring', 'pycodestyle'] +m['install_requires'] = ['minidb', 'PyYAML', 'requests', 'keyring', 'pycodestyle', 'appdirs'] m['scripts'] = ['urlwatch'] m['package_dir'] = {'': 'lib'} m['packages'] = ['urlwatch'] diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/share/man/man1/urlwatch.1 new/urlwatch-2.8/share/man/man1/urlwatch.1 --- old/urlwatch-2.6/share/man/man1/urlwatch.1 2016-01-20 09:44:10.000000000 +0100 +++ new/urlwatch-2.8/share/man/man1/urlwatch.1 2018-01-28 20:48:05.000000000 +0100 @@ -71,13 +71,13 @@ remove old cache entries .SH "FILES" .TP -.B ~/.urlwatch/urls.yaml +.B $XDG_CONFIG_HOME/urlwatch/urls.yaml A list of URLs, commands and other jobs to watch .TP -.B ~/.urlwatch/hooks.py +.B $XDG_CONFIG_HOME/urlwatch/hooks.py A Python module that can implement new job types, filters and reporters .TP -.B ~/.urlwatch/cache.db +.B $XDG_CONFIG_HOME/urlwatch/cache.db A SQLite 3 database that contains the state history of jobs (for diffing) .SH AUTHOR Thomas Perl <thp.io/about> diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/share/urlwatch/examples/hooks.py.example new/urlwatch-2.8/share/urlwatch/examples/hooks.py.example --- old/urlwatch-2.6/share/urlwatch/examples/hooks.py.example 2016-02-11 18:38:27.000000000 +0100 +++ new/urlwatch-2.8/share/urlwatch/examples/hooks.py.example 2018-01-28 20:48:05.000000000 +0100 @@ -1,7 +1,7 @@ # # Example hooks file for urlwatch # -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/test/test_filters.py new/urlwatch-2.8/test/test_filters.py --- old/urlwatch-2.6/test/test_filters.py 2016-01-20 09:44:10.000000000 +0100 +++ new/urlwatch-2.8/test/test_filters.py 2018-01-28 20:48:05.000000000 +0100 @@ -1,4 +1,5 @@ from urlwatch.filters import GetElementById +from urlwatch.filters import GetElementByTag from nose.tools import eq_ @@ -13,3 +14,24 @@ """, 'bar') print(result) eq_(result, '<div id="bar">asdf <span>bar</span> hoho</div>') + + +def test_get_element_by_tag(): + get_element_by_tag = GetElementByTag(None, None) + result = get_element_by_tag.filter(""" + <html><head></head><body>foo</body></html> + """, 'body') + print(result) + eq_(result, '<body>foo</body>') + + +def test_get_element_by_tag_nested(): + get_element_by_tag = GetElementByTag(None, None) + result = get_element_by_tag.filter(""" + <html><head></head><body> + <div>foo</div> + <div>bar</div> + </body></html> + """, 'div') + print(result) + eq_(result, """<div>foo</div><div>bar</div>""") diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/test/test_handler.py new/urlwatch-2.8/test/test_handler.py --- old/urlwatch-2.6/test/test_handler.py 2016-10-31 10:54:09.000000000 +0100 +++ new/urlwatch-2.8/test/test_handler.py 2018-01-28 20:48:05.000000000 +0100 @@ -11,10 +11,10 @@ import os import imp -from lib.urlwatch import storage -from lib.urlwatch.config import BaseConfig -from lib.urlwatch.storage import JsonConfigStorage, YamlConfigStorage, UrlsJson, CacheMiniDBStorage -from lib.urlwatch.main import Urlwatch +from urlwatch import storage +from urlwatch.config import BaseConfig +from urlwatch.storage import JsonConfigStorage, YamlConfigStorage, UrlsJson, CacheMiniDBStorage +from urlwatch.main import Urlwatch def test_required_classattrs_in_subclasses(): diff -urN '--exclude=CVS' '--exclude=.cvsignore' '--exclude=.svn' '--exclude=.svnignore' old/urlwatch-2.6/urlwatch new/urlwatch-2.8/urlwatch --- old/urlwatch-2.6/urlwatch 2016-10-31 10:54:09.000000000 +0100 +++ new/urlwatch-2.8/urlwatch 2018-01-28 20:48:05.000000000 +0100 @@ -2,7 +2,7 @@ # -*- coding: utf-8 -*- # # This file is part of urlwatch (https://thp.io/2008/urlwatch/). -# Copyright (c) 2008-2016 Thomas Perl <thp.io/about> +# Copyright (c) 2008-2018 Thomas Perl <[email protected]> # All rights reserved. # # Redistribution and use in source and binary forms, with or without @@ -36,9 +36,14 @@ import socket import sys +from appdirs import AppDirs + pkgname = 'urlwatch' urlwatch_dir = os.path.expanduser(os.path.join('~', '.' + pkgname)) +if not os.path.exists(urlwatch_dir): + urlwatch_dir = AppDirs(pkgname).user_config_dir + # Check if we are installed in the system already (prefix, bindir) = os.path.split(os.path.dirname(os.path.abspath(sys.argv[0])))
