Eranroz has uploaded a new change for review. https://gerrit.wikimedia.org/r/204769
Change subject: Adding IRC recent changes listener ...................................................................... Adding IRC recent changes listener IRC recent changes listener gives a similar API for stream API, but is based on IRC. This API allows pywikibot users to easily switch between them. Change-Id: Iafa1996e67ba9398ff200beba8f3530e7f652ed8 --- A pywikibot/comms/irc_listener.py 1 file changed, 143 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/69/204769/1 diff --git a/pywikibot/comms/irc_listener.py b/pywikibot/comms/irc_listener.py new file mode 100644 index 0000000..0ce4382 --- /dev/null +++ b/pywikibot/comms/irc_listener.py @@ -0,0 +1,143 @@ +""" +IRC listener to recent changes of wikis of Wikimedia foundation with similar API to that of rcstream. + +(c) 2015 Eranroz + +License: MIT license +""" +import pywikibot +from pywikibot.botirc import IRCBot +import threading +import sys +import re +if sys.version_info[0] > 2: + from queue import Queue, Empty +else: + from Queue import Queue, Empty + + +class IRCRecentChangesBot(IRCBot): + """ + IRC bot for parsing recent changes IRC messages in similar way to rcstream. + + @param site: the Pywikibot.Site object to yield changes for + @param channel: IRC channel to listen for + @param nickname: nickname in the IRC channel + @param server: IRC server + @param filter_generator: generator to use for filtering the yielded pages + """ + def __init__(self, site, channel, nickname, server, filter_generator=None): + super(IRCRecentChangesBot, self).__init__(site, channel, nickname, server) + self.re_new_page_diff = re.compile('.+?index\.php\?oldid=(?P<new>[0-9]+)') + self.re_edit_page_diff = re.compile('.+?index\.php\?diff=(?P<new>[0-9]+)&oldid=(?P<old>[0-9]+)') + self.queue = Queue() + if filter_generator is None: + filter_generator = lambda x: x + self.filter_generator = filter_generator + + def on_pubmsg(self, c, e): + match = self.re_edit.match(e.arguments()[0]) + if not match: + return + + try: + msg = e.arguments()[0].decode('utf-8') + except UnicodeDecodeError: + return + + page_title_end = msg.find(u'\x0314', 9) + if page_title_end == -1: + return + name = msg[8:page_title_end] + page = pywikibot.Page(self.site, name) + + is_new = 'N' in match.group('flags') + if is_new: + diff_match = self.re_new_page_diff.match(match.group('url')) + if not diff_match: + return + diff_revisions = {'new': int(diff_match.group('new')), 'old': 0} + else: + diff_match = self.re_edit_page_diff.match(match.group('url')) + if not diff_match: + return + + diff_revisions = {'new': int(diff_match.group('new')), 'old': int(diff_match.group('old'))} + + diff_data = { + 'type': 'edit', + 'comment': match.group('summary'), + 'user': match.group('user'), + 'namespace': page.namespace(), + 'revision': diff_revisions, + 'diff_bytes': int(match.group('bytes')), + 'bot': 'B' in match.group('flags') + } + page._rcinfo = diff_data + + # use of generator rather than simple if allow easy use of pagegenerators + try: + for filtered_page in self.filter_generator([page]): + self.queue.put(filtered_page) + except: + # whatever reason the filter fail we can ignore it + pass + + +class IRCRcBotThread(threading.Thread): + """Wrapper thread for IRCRecentChangesBot + + @param site: the Pywikibot.Site object to yield changes for + @param channel: IRC channel to listen for + @param nickname: nickname in the IRC channel + @param server: IRC server + @param filter_generator: generator to use for filtering the yielded pages + """ + def __init__(self, site, channel, nickname, server, filter_generator=None): + super(IRCRcBotThread, self).__init__() + self.daemon = True + self.irc_bot = IRCRecentChangesBot(site, channel, nickname, server, filter_generator) + + def run(self): + self.irc_bot.start() + + def stop(self): + self.irc_bot.die() + + +def irc_rc_listener(site, filter_gen=None): + """RC Changes Generator. Yields changes received from IRC channel. + + @param site: the Pywikibot.Site object to yield live recent changes for + @type site: Pywikibot.BaseSite + @param filter_generator: generator to use for filtering the yielded pages + """ + channel = '#{}.{}'.format(site.lang, site.family.name) + server = 'irc.wikimedia.org' + nickname = site.username() + irc_thread = IRCRcBotThread(site, channel, nickname, server, filter_gen) + irc_thread.start() + while True: + try: + element = irc_thread.irc_bot.queue.get(timeout=0.1) + except Empty: + continue + if element is None: + return + yield element + + +def main(): + """Demo for using IRC recent changes listener + """ + print('creating site') + site = pywikibot.Site() + print('starting bot') + for p in irc_rc_listener(site): + print(p.title()) + print(p._rcinfo) + print('------') + + +if __name__ == '__main__': + main() -- To view, visit https://gerrit.wikimedia.org/r/204769 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: Iafa1996e67ba9398ff200beba8f3530e7f652ed8 Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Eranroz <eranro...@gmail.com> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits