Eranroz has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/204769

Change subject: Adding IRC recent changes listener
......................................................................

Adding IRC recent changes listener

IRC recent changes listener gives a similar API for stream API,
but is based on IRC.
This API allows pywikibot users to easily switch between them.

Change-Id: Iafa1996e67ba9398ff200beba8f3530e7f652ed8
---
A pywikibot/comms/irc_listener.py
1 file changed, 143 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/69/204769/1

diff --git a/pywikibot/comms/irc_listener.py b/pywikibot/comms/irc_listener.py
new file mode 100644
index 0000000..0ce4382
--- /dev/null
+++ b/pywikibot/comms/irc_listener.py
@@ -0,0 +1,143 @@
+"""
+IRC listener to recent changes of wikis of Wikimedia foundation with similar 
API to that of rcstream.
+
+(c) 2015 Eranroz
+
+License: MIT license
+"""
+import pywikibot
+from pywikibot.botirc import IRCBot
+import threading
+import sys
+import re
+if sys.version_info[0] > 2:
+    from queue import Queue, Empty
+else:
+    from Queue import Queue, Empty
+
+
+class IRCRecentChangesBot(IRCBot):
+    """
+    IRC bot for parsing recent changes IRC messages in similar way to rcstream.
+
+    @param site: the Pywikibot.Site object to yield changes for
+    @param channel: IRC channel to listen for
+    @param nickname: nickname in the IRC channel
+    @param server: IRC server
+    @param filter_generator: generator to use for filtering the yielded pages
+    """
+    def __init__(self, site, channel, nickname, server, filter_generator=None):
+        super(IRCRecentChangesBot, self).__init__(site, channel, nickname, 
server)
+        self.re_new_page_diff = 
re.compile('.+?index\.php\?oldid=(?P<new>[0-9]+)')
+        self.re_edit_page_diff = 
re.compile('.+?index\.php\?diff=(?P<new>[0-9]+)&oldid=(?P<old>[0-9]+)')
+        self.queue = Queue()
+        if filter_generator is None:
+            filter_generator = lambda x: x
+        self.filter_generator = filter_generator
+
+    def on_pubmsg(self, c, e):
+        match = self.re_edit.match(e.arguments()[0])
+        if not match:
+            return
+
+        try:
+            msg = e.arguments()[0].decode('utf-8')
+        except UnicodeDecodeError:
+            return
+
+        page_title_end = msg.find(u'\x0314', 9)
+        if page_title_end == -1:
+            return
+        name = msg[8:page_title_end]
+        page = pywikibot.Page(self.site, name)
+
+        is_new = 'N' in match.group('flags')
+        if is_new:
+            diff_match = self.re_new_page_diff.match(match.group('url'))
+            if not diff_match:
+                return
+            diff_revisions = {'new': int(diff_match.group('new')), 'old': 0}
+        else:
+            diff_match = self.re_edit_page_diff.match(match.group('url'))
+            if not diff_match:
+                return
+
+            diff_revisions = {'new': int(diff_match.group('new')), 'old': 
int(diff_match.group('old'))}
+
+        diff_data = {
+            'type': 'edit',
+            'comment': match.group('summary'),
+            'user': match.group('user'),
+            'namespace': page.namespace(),
+            'revision': diff_revisions,
+            'diff_bytes': int(match.group('bytes')),
+            'bot': 'B' in match.group('flags')
+        }
+        page._rcinfo = diff_data
+
+        # use of generator rather than simple if allow easy use of 
pagegenerators
+        try:
+            for filtered_page in self.filter_generator([page]):
+                self.queue.put(filtered_page)
+        except:
+            # whatever reason the filter fail we can ignore it
+            pass
+
+
+class IRCRcBotThread(threading.Thread):
+    """Wrapper thread for IRCRecentChangesBot
+
+    @param site: the Pywikibot.Site object to yield changes for
+    @param channel: IRC channel to listen for
+    @param nickname: nickname in the IRC channel
+    @param server: IRC server
+    @param filter_generator: generator to use for filtering the yielded pages
+    """
+    def __init__(self, site, channel, nickname, server, filter_generator=None):
+        super(IRCRcBotThread, self).__init__()
+        self.daemon = True
+        self.irc_bot = IRCRecentChangesBot(site, channel, nickname, server, 
filter_generator)
+
+    def run(self):
+        self.irc_bot.start()
+
+    def stop(self):
+        self.irc_bot.die()
+
+
+def irc_rc_listener(site, filter_gen=None):
+    """RC Changes Generator. Yields changes received from IRC channel.
+
+    @param site: the Pywikibot.Site object to yield live recent changes for
+    @type site: Pywikibot.BaseSite
+    @param filter_generator: generator to use for filtering the yielded pages
+    """
+    channel = '#{}.{}'.format(site.lang, site.family.name)
+    server = 'irc.wikimedia.org'
+    nickname = site.username()
+    irc_thread = IRCRcBotThread(site, channel, nickname, server, filter_gen)
+    irc_thread.start()
+    while True:
+        try:
+            element = irc_thread.irc_bot.queue.get(timeout=0.1)
+        except Empty:
+            continue
+        if element is None:
+            return
+        yield element
+
+
+def main():
+    """Demo for using IRC recent changes listener
+    """
+    print('creating site')
+    site = pywikibot.Site()
+    print('starting bot')
+    for p in irc_rc_listener(site):
+        print(p.title())
+        print(p._rcinfo)
+        print('------')
+
+
+if __name__ == '__main__':
+    main()

-- 
To view, visit https://gerrit.wikimedia.org/r/204769
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: Iafa1996e67ba9398ff200beba8f3530e7f652ed8
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Eranroz <eranro...@gmail.com>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to