jenkins-bot has submitted this change and it was merged.

Change subject: Port patrol.py to core
......................................................................


Port patrol.py to core

Made minor necessary changes to make it compatible with core
Depends on mwlib

Bug: T74206
Change-Id: I8612ce905d149d0e440d819f62f923385a583920
---
A scripts/patrol.py
M setup.py
M tests/script_tests.py
3 files changed, 529 insertions(+), 1 deletion(-)

Approvals:
  John Vandenberg: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/scripts/patrol.py b/scripts/patrol.py
new file mode 100644
index 0000000..b347c1f
--- /dev/null
+++ b/scripts/patrol.py
@@ -0,0 +1,525 @@
+#!/usr/bin/python
+# -*- coding: utf-8  -*-
+"""
+The bot is meant to mark the edits based on info obtained by whitelist.
+
+This bot obtains a list of recent changes and newpages and marks the
+edits as patrolled based on a whitelist.
+See http://en.wikisource.org/wiki/User:JVbot/patrol_whitelist
+
+Commandline parameters that are supported:
+
+-namespace         Filter the page generator to only yield pages in
+                    specified namespaces
+-ask               If True, confirm each patrol action
+-whitelist         page title for whitelist (optional)
+-autopatroluserns  Takes user consent to automatically patrol
+-versionchecktime  Check versionchecktime lapse in sec
+
+"""
+#
+# (C) Pywikibot team, 2011-2015
+#
+# Distributed under the terms of the MIT license.
+#
+__version__ = '$Id$'
+import pywikibot
+from pywikibot import pagegenerators, Bot
+import mwlib.uparser  # used to parse the whitelist
+import mwlib.parser  # used to parse the whitelist
+import time
+
+_logger = 'patrol'
+
+# This is required for the text that is shown when you run this script
+# with the parameter -help.
+docuReplacements = {
+    '&params;': pagegenerators.parameterHelp
+}
+
+
+class PatrolBot(Bot):
+
+    """Bot marks the edits as patrolled based on info obtained by whitelist."""
+
+    # Localised name of the whitelist page
+    whitelist_subpage_name = {
+        'en': u'patrol_whitelist',
+    }
+
+    def __init__(self, **kwargs):
+        """
+        Constructor.
+
+        @kwarg feed            - The changes feed to work on (Newpages
+                                  or Recentchanges)
+        @kwarg ask             - If True, confirm each patrol action
+        @kwarg whitelist       - page title for whitelist (optional)
+        @kwarg autopatroluserns - Takes user consent to automatically patrol
+        @kwarg versionchecktime - Check versionchecktime lapse in sec
+        """
+        self.availableOptions.update({
+            'ask': False,
+            'feed': None,
+            'whitelist': None,
+            'versionchecktime': 300,
+            'autopatroluserns': False
+        })
+        super(PatrolBot, self).__init__(**kwargs)
+        self.recent_gen = True
+        self.user = None
+        self.site = pywikibot.Site()
+        if self.getOption('whitelist'):
+            self.whitelist_pagename = self.getOption('whitelist')
+        else:
+            local_whitelist_subpage_name = pywikibot.translate(
+                self.site, self.whitelist_subpage_name, fallback=True)
+            self.whitelist_pagename = u'%s:%s/%s' % (
+                                      self.site.namespace(2),
+                                      self.site.username(),
+                                      local_whitelist_subpage_name)
+        self.whitelist = self.getOption('whitelist')
+        self.whitelist_ts = 0
+        self.whitelist_load_ts = 0
+
+        self.highest_rcid = 0  # used to track loops
+        self.last_rcid = 0
+        self.repeat_start_ts = 0
+
+        self.rc_item_counter = 0  # counts how many items have been reviewed
+        self.patrol_counter = 0  # and how many times an action was taken
+
+    def load_whitelist(self):
+        """Load most recent watchlist_page for further processing."""
+        # Check for a more recent version after versionchecktime in sec.
+        if (self.whitelist_load_ts and (time.time() - self.whitelist_load_ts <
+                                        self.getOption('versionchecktime'))):
+            if pywikibot.config.verbose_output:
+                pywikibot.output(u'Whitelist not stale yet')
+            return
+
+        whitelist_page = pywikibot.Page(self.site,
+                                        self.whitelist_pagename)
+
+        if not self.whitelist:
+            pywikibot.output(u'Loading %s' % self.whitelist_pagename)
+
+        try:
+            if self.whitelist_ts:
+                # check for a more recent version
+                h = whitelist_page.revisions()
+                last_edit_ts = next(h).timestamp
+                if last_edit_ts == self.whitelist_ts:
+                    # As there hasn't been any change to the whitelist
+                    # it has been effectively reloaded 'now'
+                    self.whitelist_load_ts = time.time()
+                    if pywikibot.config.verbose_output:
+                        pywikibot.output(u'Whitelist not modified')
+                    return
+
+            if self.whitelist:
+                pywikibot.output(u'Reloading whitelist')
+
+            # Fetch whitelist
+            wikitext = whitelist_page.get()
+            # Parse whitelist
+            self.whitelist = self.parse_page_tuples(wikitext, self.user)
+            # Record timestamp
+            self.whitelist_ts = whitelist_page.editTime()
+            self.whitelist_load_ts = time.time()
+        except Exception as e:
+            # cascade if there isnt a whitelist to fallback on
+            if not self.whitelist:
+                raise
+            pywikibot.error(u'%s' % e)
+
+    @staticmethod
+    def add_to_tuples(tuples, user, page):
+        """Update tuples 'user' key by adding page."""
+        if pywikibot.config.verbose_output:
+            pywikibot.output(u"Adding %s:%s" % (user, page.title()))
+
+        if user in tuples:
+            tuples[user].append(page)
+        else:
+            tuples[user] = [page]
+
+    def in_list(self, pagelist, title):
+        """Check if title present in pagelist."""
+        if pywikibot.config.verbose_output:
+            pywikibot.output(u'Checking whitelist for: %s' % title)
+
+        # quick check for exact match
+        if title in pagelist:
+            return title
+
+        # quick check for wildcard
+        if '' in pagelist:
+            if pywikibot.config.verbose_output:
+                pywikibot.output(u'wildcarded')
+            return '.*'
+
+        for item in pagelist:
+            if pywikibot.config.verbose_output:
+                pywikibot.output(u'checking against whitelist item = %s' % 
item)
+
+            if isinstance(item, PatrolRule):
+                if pywikibot.config.verbose_output:
+                    pywikibot.output(u'invoking programmed rule')
+                if item.match(title):
+                    return item
+
+            elif title_match(item, title):
+                return item
+
+        if pywikibot.config.verbose_output:
+            pywikibot.output(u'not found')
+
+    def parse_page_tuples(self, wikitext, user=None):
+        """Parse page details apart from 'user:' for use."""
+        tuples = {}
+
+        # for any structure, the only first 'user:' page
+        # is registered as the user the rest of the structure
+        # refers to.
+        def process_children(obj, current_user):
+            pywikibot.debug(u'Parsing node: %s' % obj, _logger)
+            for c in obj.children:
+                temp = process_node(c, current_user)
+                if temp and not current_user:
+                    current_user = temp
+
+        def process_node(obj, current_user):
+            # links are analysed; interwiki links are included because mwlib
+            # incorrectly calls 'Wikisource:' namespace links an interwiki
+            if isinstance(obj, mwlib.parser.NamespaceLink) or \
+               isinstance(obj, mwlib.parser.InterwikiLink) or \
+               isinstance(obj, mwlib.parser.ArticleLink):
+                if obj.namespace == -1:
+                    # the parser accepts 'special:prefixindex/' as a wildcard
+                    # this allows a prefix that doesnt match an existing page
+                    # to be a blue link, and can be clicked to see what pages
+                    # will be included in the whitelist
+                    if obj.target[:20].lower() == 'special:prefixindex/':
+                        if len(obj.target) == 20:
+                            if pywikibot.config.verbose_output:
+                                pywikibot.output(u'Whitelist everything')
+                            page = ''
+                        else:
+                            page = obj.target[20:]
+                            if pywikibot.config.verbose_output:
+                                pywikibot.output(u'Whitelist prefixindex hack '
+                                                 u'for: %s' % page)
+                            # p = pywikibot.Page(self.site, obj.target[20:])
+                            # obj.namespace = p.namespace
+                            # obj.target = p.title()
+
+                elif obj.namespace == 2 and not current_user:
+                    # if a target user hasn't been found yet, and the link is
+                    # 'user:'
+                    # the user will be the target of subsequent rules
+                    page_prefix_len = len(self.site.namespace(2))
+                    current_user = obj.target[(page_prefix_len + 1):]
+                    if pywikibot.config.verbose_output:
+                        pywikibot.output(u'Whitelist user: %s' % current_user)
+                    return current_user
+                else:
+                    page = obj.target
+
+                if current_user:
+                    if not user or current_user == user:
+                        if self.is_wikisource_author_page(page):
+                            if pywikibot.config.verbose_output:
+                                pywikibot.output(u'Whitelist author: %s' % 
page)
+                            author = LinkedPagesRule(page)
+                            self.add_to_tuples(tuples, current_user, author)
+                        else:
+                            if pywikibot.config.verbose_output:
+                                pywikibot.output(u'Whitelist page: %s' % page)
+                            self.add_to_tuples(tuples, current_user, page)
+                    elif pywikibot.config.verbose_output:
+                        pywikibot.output(u'Discarding whitelist page for '
+                                         u'another user: %s' % page)
+                else:
+                    raise Exception(u'No user set for page %s' % page)
+            else:
+                process_children(obj, current_user)
+
+        root = mwlib.uparser.parseString(title='Not used', raw=wikitext)
+        process_children(root, None)
+
+        return tuples
+
+    def is_wikisource_author_page(self, title):
+        """Initialise author_ns if site family is 'wikisource' else pass."""
+        if self.site.family.name != 'wikisource':
+            return
+
+        author_ns = 0
+        try:
+            author_ns = self.site.family.authornamespaces[self.site.lang][0]
+        except:
+            pass
+        if author_ns:
+            author_ns_prefix = self.site.namespace(author_ns)
+        pywikibot.debug(u'Author ns: %d; name: %s'
+                        % (author_ns, author_ns_prefix), _logger)
+        if title.find(author_ns_prefix + ':') == 0:
+            if pywikibot.config.verbose_output:
+                author_page_name = title[len(author_ns_prefix) + 1:]
+                pywikibot.output(u'Found author %s' % author_page_name)
+            return True
+
+    def run(self, feed=None):
+        """Process 'whitelist' page absent in generator."""
+        if self.whitelist is None:
+            self.load_whitelist()
+        if not feed:
+            feed = self.getOption('feed')
+        for page in feed:
+            self.treat(page)
+
+    def treat(self, page):
+        """It loads the given page, does some changes, and saves it."""
+        choice = False
+        try:
+            # page: title, date, username, comment, loginfo, rcid, token
+            username = page['user']
+            # when the feed isnt from the API, it used to contain
+            # '(not yet written)' or '(page does not exist)' when it was
+            # a redlink
+            rcid = page['rcid']
+            title = page['title']
+            if not rcid:
+                raise Exception('rcid not present')
+
+            # check whether we have wrapped around to higher rcids
+            # which indicates a new RC feed is being processed
+            if rcid > self.last_rcid:
+                # refresh the whitelist
+                self.load_whitelist()
+                self.repeat_start_ts = time.time()
+
+            if pywikibot.config.verbose_output or self.getOption('ask'):
+                pywikibot.output(u'User %s has created or modified page %s'
+                                 % (username, title))
+
+            if self.getOption('autopatroluserns') and (page['ns'] == 2 or
+                                                       page['ns'] == 3):
+                # simple rule to whitelist any user editing their own userspace
+                if title.partition(':')[2].split('/')[0].startswith(username):
+                    if pywikibot.config.verbose_output:
+                        pywikibot.output(u'%s is whitelisted to modify %s'
+                                         % (username, title))
+                    choice = True
+
+            if not choice and username in self.whitelist:
+                if self.in_list(self.whitelist[username], title):
+                    if pywikibot.config.verbose_output:
+                        pywikibot.output(u'%s is whitelisted to modify %s'
+                                         % (username, title))
+                    choice = True
+
+            if self.getOption('ask'):
+                choice = pywikibot.input_yn(
+                    u'Do you want to mark page as patrolled?', 
automatic_quit=False)
+
+            # Patrol the page
+            if choice:
+                # list() iterates over patrol() which returns a generator
+                list(self.site.patrol(rcid))
+                self.patrol_counter = self.patrol_counter + 1
+                pywikibot.output(u'Patrolled %s (rcid %d) by user %s'
+                                 % (title, rcid, username))
+            else:
+                if pywikibot.config.verbose_output:
+                    pywikibot.output(u'Skipped')
+
+            if rcid > self.highest_rcid:
+                self.highest_rcid = rcid
+            self.last_rcid = rcid
+            self.rc_item_counter = self.rc_item_counter + 1
+
+        except pywikibot.NoPage:
+            pywikibot.output(u'Page %s does not exist; skipping.'
+                             % title(asLink=True))
+        except pywikibot.IsRedirectPage:
+            pywikibot.output(u'Page %s is a redirect; skipping.'
+                             % title(asLink=True))
+
+
+def title_match(prefix, title):
+    """Match title substring with given prefix."""
+    if pywikibot.config.verbose_output:
+        pywikibot.output(u'Matching %s to prefix %s' % (title, prefix))
+    if title.startswith(prefix):
+        if pywikibot.config.verbose_output:
+            pywikibot.output(u'substr match')
+        return True
+    return
+
+
+class PatrolRule(object):
+
+    """Bot marks the edit.startswith("-s as patrolled based on info obtained 
by whitelist."""
+
+    def __init__(self, page_title):
+        """
+        Constructor.
+
+        @param page_title: The page title for this rule
+        @type  page_title: pywikibot.Page
+        """
+        self.page_title = page_title
+
+    def title(self):
+        """Obtain page title."""
+        return self.page_title
+
+    def match(self, page):
+        """Added for future use."""
+        pass
+
+
+class LinkedPagesRule(PatrolRule):
+
+    """Matches of page site title and linked pages title."""
+
+    def __init__(self, page_title):
+        """Constructor.
+
+        @param page_title: The page title for this rule
+        @type  page_title: pywikibot.Page
+        """
+        self.site = pywikibot.Site()
+        self.page_title = page_title
+        self.linkedpages = None
+
+    def match(self, page_title):
+        """Match page_title to linkedpages elements."""
+        if page_title == self.page_title:
+            return True
+
+        if not self.site.family.name == 'wikisource':
+            raise Exception('This is a wikisource rule')
+
+        if not self.linkedpages:
+            if pywikibot.config.verbose_output:
+                pywikibot.output(u'loading page links on %s' % self.page_title)
+            p = pywikibot.Page(self.site, self.page_title)
+            linkedpages = list()
+            for linkedpage in p.linkedPages():
+                linkedpages.append(linkedpage.title())
+
+            self.linkedpages = linkedpages
+            if pywikibot.config.verbose_output:
+                pywikibot.output(u'Loaded %d page links' % len(linkedpages))
+
+        for p in self.linkedpages:
+            if pywikibot.config.verbose_output:
+                pywikibot.output(u"Checking against '%s'" % p)
+            if title_match(p, page_title):
+                if pywikibot.config.verbose_output:
+                    pywikibot.output(u'Matched.')
+                return p
+
+
+def api_feed_repeater(gen, delay=0, repeat=False, number=1000, namespaces=None,
+                      user=None, recent_new_gen=True):
+    """Generator which loads pages details to be processed."""
+    while True:
+        if recent_new_gen:
+            generator = gen(step=number, namespaces=namespaces, user=user,
+                            showPatrolled=False)
+        else:
+            generator = gen(step=number, namespaces=namespaces, user=user,
+                            returndict=True, showPatrolled=False)
+        for page in generator:
+            if recent_new_gen:
+                yield page
+            else:
+                yield page[1]
+        if repeat:
+            pywikibot.output(u'Sleeping for %d seconds' % delay)
+            time.sleep(delay)
+        else:
+            break
+
+
+def main(*args):
+    """Process command line arguments and invoke PatrolBot."""
+    # This factory is responsible for processing command line arguments
+    # that are also used by other scripts and that determine on which pages
+    # to work on.
+    usercontribs = None
+    gen = None
+    recentchanges = False
+    newpages = False
+    repeat = False
+    genFactory = pagegenerators.GeneratorFactory()
+    options = {}
+
+    # Parse command line arguments
+    for arg in pywikibot.handle_args(args):
+        if arg.startswith('-ask'):
+            options['ask'] = True
+        elif arg.startswith('-autopatroluserns'):
+            options['autopatroluserns'] = True
+        elif arg.startswith('-repeat'):
+            repeat = True
+        elif arg.startswith('-newpages'):
+            newpages = True
+        elif arg.startswith('-recentchanges'):
+            recentchanges = True
+        elif arg.startswith('-usercontribs:'):
+            usercontribs = arg[14:]
+        elif arg.startswith('-versionchecktime:'):
+            versionchecktime = arg[len('-versionchecktime:'):]
+            options['versionchecktime'] = int(versionchecktime)
+        elif arg.startswith("-whitelist:"):
+            options['whitelist'] = arg[len('-whitelist:'):]
+        else:
+            generator = genFactory.handleArg(arg)
+            if not generator:
+                if ':' in arg:
+                    m = arg.split(':')
+                    options[m[0]] = m[1]
+
+    site = pywikibot.Site()
+    site.login()
+
+    if usercontribs:
+        pywikibot.output(u'Processing user: %s' % usercontribs)
+
+    newpage_count = 300
+    if not newpages and not recentchanges and not usercontribs:
+        if site.family.name == 'wikipedia':
+            newpages = True
+            newpage_count = 5000
+        else:
+            recentchanges = True
+
+    bot = PatrolBot(**options)
+
+    if newpages or usercontribs:
+        pywikibot.output(u'Newpages:')
+        gen = site.newpages
+        feed = api_feed_repeater(gen, delay=60, repeat=repeat,
+                                 number=newpage_count, user=usercontribs,
+                                 namespaces=genFactory.namespaces,
+                                 recent_new_gen=False)
+        bot.run(feed)
+
+    if recentchanges or usercontribs:
+        pywikibot.output(u'Recentchanges:')
+        gen = site.recentchanges
+        feed = api_feed_repeater(gen, delay=60, repeat=repeat, number=1000,
+                                 namespaces=genFactory.namespaces,
+                                 user=usercontribs)
+        bot.run(feed)
+
+    pywikibot.output(u'%d/%d patrolled'
+                     % (bot.patrol_counter, bot.rc_item_counter))
+
+if __name__ == '__main__':
+    main()
diff --git a/setup.py b/setup.py
index 274cdc2..fe8bf18 100644
--- a/setup.py
+++ b/setup.py
@@ -39,7 +39,8 @@
     'script_wui.py': [irc_dep, 'lunatic-python', 'crontab'],
     # Note: None of the 'lunatic-python' repos on github support MS Windows.
     'flickrripper.py': ['Pillow'],
-    'states_redirect.py': ['pycountry']
+    'states_redirect.py': ['pycountry'],
+    'patrol': ['mwlib'],
 }
 # flickrapi 1.4.4 installs a root logger in verbose mode; 1.4.5 fixes this.
 # The problem doesnt exist in flickrapi 2.x.
diff --git a/tests/script_tests.py b/tests/script_tests.py
index a85088b..fb47dea 100644
--- a/tests/script_tests.py
+++ b/tests/script_tests.py
@@ -31,6 +31,7 @@
     'flickrripper': ['flickrapi'],
     'match_images': ['PIL.ImageTk'],
     'states_redirect': ['pycountry'],
+    'patrol': ['mwlib'],
 }
 
 if sys.version_info < (2, 7):
@@ -108,6 +109,7 @@
     'revertbot',
     'noreferences',
     'nowcommons',
+    'patrol',
     'script_wui',
     'shell',
     'states_redirect',

-- 
To view, visit https://gerrit.wikimedia.org/r/184118
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I8612ce905d149d0e440d819f62f923385a583920
Gerrit-PatchSet: 28
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Prianka <[email protected]>
Gerrit-Reviewer: John Vandenberg <[email protected]>
Gerrit-Reviewer: Ladsgroup <[email protected]>
Gerrit-Reviewer: Merlijn van Deen <[email protected]>
Gerrit-Reviewer: Mpaa <[email protected]>
Gerrit-Reviewer: Prianka <[email protected]>
Gerrit-Reviewer: XZise <[email protected]>
Gerrit-Reviewer: jenkins-bot <>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to