[MediaWiki-commits] [Gerrit] [WIP] use pagegenerators options for checkimages - change (pywikibot/core)

Xqt (Code Review) Sat, 02 Apr 2016 11:37:14 -0700

Xqt has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/281224


Change subject: [WIP] use pagegenerators options for checkimages
......................................................................

[WIP] use pagegenerators options for checkimages

DO NOT SUBMIT YET

Bug: T76429
Bug: T100628

Change-Id: I9e076673d1b6077fda459603142af08843e19e1c
---
M pywikibot/data/api.py
M pywikibot/pagegenerators.py
M scripts/checkimages.py
3 files changed, 133 insertions(+), 153 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/24/281224/1

diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index c3f1ddf..c50964d 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -2625,7 +2625,7 @@
                            self.api_limit),
                         _logger)
 
-    def set_namespace(self, namespaces):
+    def set_namespace(self, namespaces, quiet=False):
         """Set a namespace filter on this query.
 
         @param namespaces: namespace identifiers to limit query results
@@ -2633,6 +2633,9 @@
             or a single instance of those types.  May be a '|' separated
             list of namespace identifiers. An empty iterator clears any
             namespace restriction.
+        @param quiet: Don't show a warning if module does not support
+             a namespace parameter
+        @type quiet: bool
         @raises KeyError: a namespace identifier was not resolved
         @raises TypeError: a namespace identifier has an inappropriate
             type such as NoneType or bool, or more than one namespace
@@ -2642,8 +2645,9 @@
         param = self.site._paraminfo.parameter('query+' + self.limited_module,
                                                'namespace')
         if not param:
-            pywikibot.warning(u'{0} module does not support a namespace '
-                              'parameter'.format(self.limited_module))
+            if not quiet:
+                pywikibot.warning(u'{0} module does not support a namespace '
+                                  'parameter'.format(self.limited_module))
             return
 
         if isinstance(namespaces, basestring):
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 224f93a..5ac5ad6 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -363,7 +363,7 @@
     # When not in intersect mode, _filter_unique could be:
     #   functools.partial(filter_unique, container=global_seen_list)
 
-    def __init__(self, site=None, positional_arg_name=None):
+    def __init__(self, site=None, positional_arg_name=None, ns=None):
         """
         Constructor.
 
@@ -372,9 +372,17 @@
         @param positional_arg_name: generator to use for positional args,
             which do not begin with a hyphen
         @type positional_arg_name: basestring
+        @param namespaces: list of namespace numbers
         """
         self.gens = []
-        self._namespaces = []
+        self._site = site
+        if ns is None:
+            self._namespaces = []
+            self.default_ns_setting = False
+        else:
+            self._namespaces = ns
+            self.namespaces  # change the storage to immutable
+            self.default_ns_setting = True
         self.limit = None
         self.qualityfilter_list = []
         self.articlefilter_list = []
@@ -383,7 +391,6 @@
         self.catfilter_list = []
         self.intersect = False
         self.subpage_max_depth = None
-        self._site = site
         self._positional_arg_name = positional_arg_name
         self._sparql = None
 
@@ -439,7 +446,8 @@
         for i in range(len(self.gens)):
             if isinstance(self.gens[i], pywikibot.data.api.QueryGenerator):
                 if self.namespaces:
-                    self.gens[i].set_namespace(self.namespaces)
+                    self.gens[i].set_namespace(
+                        self.namespaces, quiet=self.default_ns_setting)
                 if self.limit:
                     self.gens[i].set_maximum_items(self.limit)
             else:
@@ -752,7 +760,8 @@
         elif arg == '-page':
             if not value:
                 value = pywikibot.input(u'What page do you want to use?')
-            gen = [pywikibot.Page(pywikibot.Link(value, self.site))]
+            ns = list(self.namespaces)[0] if len(self.namespaces) == 1 else 0
+            gen = [pywikibot.Page(pywikibot.Link(value, self.site, ns))]
         elif arg == '-uncatfiles':
             gen = UnCategorizedImageGenerator(site=self.site)
         elif arg == '-uncatcat':
diff --git a/scripts/checkimages.py b/scripts/checkimages.py
index 4a9a344..348065a 100755
--- a/scripts/checkimages.py
+++ b/scripts/checkimages.py
@@ -13,6 +13,8 @@
 
 This script understands the following command-line arguments:
 
+&params;
+
 -limit              The number of images to check (default: 80)
 
 -commons            The Bot will check if an image on Commons has the same name
@@ -31,22 +33,13 @@
 
 -sleep[:#]          Time in seconds between repeat runs (default: 30)
 
--time[:#]           The -time option is deprecated. Use -sleep instead.
-
 -wait[:#]           Wait x second before check the images (default: 0)
 
 -skip[:#]           The bot skip the first [:#] images (default: 0)
 
--start[:#]          Use allpages() as generator
-                    (it starts already from File:[:#])
+-regex[:#]          Use regex, must be used with -fromurl
 
--cat[:#]            Use a category as generator
-
--regex[:#]          Use regex, must be used with -url or -page
-
--page[:#]           Define the name of the wikipage where are the images
-
--url[:#]            Define the url where are the images
+-fromurl[:#]        Define the url where are the images
 
 -nologerror         If given, this option will disable the error that is risen
                     when the log is full.
@@ -97,10 +90,11 @@
 import pywikibot
 
 from pywikibot import i18n
-from pywikibot import pagegenerators as pg
+from pywikibot import pagegenerators
 
 from pywikibot.exceptions import NotEmailableError
 from pywikibot.family import Family
+from pywikibot.site import Namespace
 from pywikibot.tools import deprecated, StringTypes
 
 ###############################################################################
@@ -509,6 +503,10 @@
 
 # END OF CONFIGURATION.
 
+docuReplacements = {
+    '&params;': pagegenerators.parameterHelp,
+}
+
 SETTINGS_REGEX = re.compile(r"""
 <-------\ ------->\n
 \*[Nn]ame\ ?=\ ?['"](.*?)['"]\n
@@ -629,7 +627,8 @@
             try:
                 resPutMex = self.tag_image(unver)
             except pywikibot.NoPage:
-                pywikibot.output(u"The page has been deleted! Skip!")
+                pywikibot.output(
+                    'The page does not exist on current site! Skip!')
                 break
             except pywikibot.EditConflict:
                 pywikibot.output(u"Edit conflict! Skip!")
@@ -673,7 +672,8 @@
         try:
             reportPageText = reportPageObject.get()
         except pywikibot.NoPage:
-            pywikibot.output(u'%s has been deleted...' % self.imageName)
+            pywikibot.output(
+                '{0} does not exist on current site...'.format(self.imageName))
             return
         # You can use this function also to find only the user that
         # has upload the image (FixME: Rewrite a bit this part)
@@ -1505,8 +1505,9 @@
         try:
             self.imageCheckText = self.image.get()
         except pywikibot.NoPage:
-            pywikibot.output(u"Skipping %s because it has been deleted."
-                             % self.imageName)
+            pywikibot.output(
+                'Skipping {0} because it does not exist on current site.'
+                ''.format(self.imageName))
             return
         except pywikibot.IsRedirectPage:
             pywikibot.output(u"Skipping %s because it's a redirect."
@@ -1593,9 +1594,9 @@
     skip_number = 0  # How many images to skip before checking?
     waitTime = 0  # How many time sleep before the check?
     commonsActive = False  # Is there's an image with the same name at commons?
-    normal = False  # Check the new images or use another generator?
-    urlUsed = False  # Use the url-related function instead of the new-pages
-    regexGen = False  # Use the regex generator
+    default = False  # Default generator is new images
+    url = None  # Use the url-related function instead of the new-pages
+    regex = None  # Use the regex for url
     duplicatesActive = False  # Use the duplicate option
     duplicatesReport = False  # Use the duplicate-report option
     sendemailActive = False  # Use the send-email
@@ -1603,147 +1604,113 @@
     generator = None
 
     local_args = pywikibot.handle_args(args)
+    genFactory = pagegenerators.GeneratorFactory(ns=[Namespace.FILE])
     site = pywikibot.Site()
-    # Here below there are the local parameters.
-    for arg in local_args:
-        if arg.startswith('-limit'):
-            if len(arg) == 6:
-                limit = int(pywikibot.input(
-                    u'How many files do you want to check?'))
-            else:
-                limit = int(arg[7:])
-        if arg.startswith('-sleep') or arg.startswith('-time'):
-            if arg.startswith('-sleep'):
-                length = len('-sleep')
-            else:
-                pywikibot.tools.issue_deprecation_warning('-time', '-sleep', 2)
-                length = len('-time')
-            if len(arg) == length:
-                time_sleep = int(pywikibot.input(
-                    'How many seconds do you want runs to be apart?'))
-            else:
-                time_sleep = int(arg[length + 1:])
-        elif arg == '-break':
-            repeat = False
-        elif arg == '-nologerror':
-            logFullError = False
-        elif arg == '-commons':
-            commonsActive = True
-        elif arg.startswith('-duplicates'):
-            duplicatesActive = True
-            if len(arg) == 11:
-                duplicates_rollback = 1
-            elif len(arg) > 11:
-                duplicates_rollback = int(arg[12:])
-        elif arg == '-duplicatereport':
-            duplicatesReport = True
-        elif arg == '-sendemail':
-            sendemailActive = True
-        elif arg.startswith('-skip'):
-            if len(arg) == 5:
-                skip_number = int(pywikibot.input(
-                    u'How many files do you want to skip?'))
-            elif len(arg) > 5:
-                skip_number = int(arg[6:])
-        elif arg.startswith('-wait'):
-            if len(arg) == 5:
-                waitTime = int(pywikibot.input(
-                    u'How many time do you want to wait before checking the '
-                    u'files?'))
-            elif len(arg) > 5:
-                waitTime = int(arg[6:])
-        elif arg.startswith('-start'):
-            if len(arg) == 6:
-                firstPageTitle = pywikibot.input(
-                    u'From which page do you want to start?')
-            elif len(arg) > 6:
-                firstPageTitle = arg[7:]
-            image = pywikibot.FilePage(site, firstPageTitle)
-            firstPageTitle = image.title(withNamespace=False)
-            generator = site.allpages(start=firstPageTitle,
-                                      namespace=image.namespace())
-            repeat = False
-        elif arg.startswith('-page'):
-            if len(arg) == 5:
-                regexPageName = str(pywikibot.input(
-                    u'Which page do you want to use for the regex?'))
-            elif len(arg) > 5:
-                regexPageName = str(arg[6:])
-            repeat = False
-            regexGen = True
-        elif arg.startswith('-url'):
-            if len(arg) == 4:
-                regexPageUrl = str(pywikibot.input(
-                    u'Which url do you want to use for the regex?'))
-            elif len(arg) > 4:
-                regexPageUrl = str(arg[5:])
-            urlUsed = True
-            repeat = False
-            regexGen = True
-        elif arg.startswith('-regex'):
-            if len(arg) == 6:
-                regexpToUse = str(pywikibot.input(
-                    u'Which regex do you want to use?'))
-            elif len(arg) > 6:
-                regexpToUse = str(arg[7:])
-            generator = 'regex'
-            repeat = False
-        elif arg.startswith('-cat'):
-            if len(arg) == 4:
-                catName = str(pywikibot.input(u'In which category do I work?'))
-            elif len(arg) > 4:
-                catName = str(arg[5:])
-            catSelected = pywikibot.Category(pywikibot.Site(),
-                                             'Category:%s' % catName)
-            generator = catSelected.articles(namespaces=[6])
-            repeat = False
-        elif arg.startswith('-ref'):
-            if len(arg) == 4:
-                refName = str(pywikibot.input(
-                    u'The references of what page should I parse?'))
-            elif len(arg) > 4:
-                refName = str(arg[5:])
-            ref = pywikibot.Page(pywikibot.Site(), refName)
-            generator = ref.getReferences(namespaces=[6])
-            repeat = False
 
-    if not generator:
-        normal = True
-
-    skip = skip_number > 0
-
-    # A little block-statement to ensure that the bot will not start with
-    # en-parameters
+    # A little block-statement to ensure that the bot will only start
+    # if the script is localized for a given site.
     if site.code not in project_inserted:
         pywikibot.output(u"Your project is not supported by this script.\n"
                          u"You have to edit the script and add it!")
         return False
 
-    # Reading the log of the new images if another generator is not given.
-    if normal:
-        if limit == 1:
-            pywikibot.output(u"Retrieving the latest file for checking...")
+    # Here below there are the local parameters.
+    for arg in local_args:
+        option, sep, value = arg.partition(':')
+        if option == '-limit':
+            if not value:
+                limit = int(pywikibot.input(
+                    u'How many files do you want to check?'))
+            else:
+                limit = int(value)
+        if option in ('-sleep', '-time'):
+            if option == '-time':
+                pywikibot.tools.issue_deprecation_warning('-time', '-sleep', 2)
+            if not value:
+                time_sleep = int(pywikibot.input(
+                    'How many seconds do you want runs to be apart?'))
+            else:
+                time_sleep = int(value)
+        elif option == '-break':
+            repeat = False
+        elif option == '-nologerror':
+            logFullError = False
+        elif option == '-commons':
+            commonsActive = True
+        elif option == '-duplicates':
+            duplicatesActive = True
+            if not value:
+                duplicates_rollback = 1
+            else:
+                duplicates_rollback = int(value)
+        elif option == '-duplicatereport':
+            duplicatesReport = True
+        elif option == '-sendemail':
+            sendemailActive = True
+        elif option == '-skip':
+            if not value:
+                skip_number = int(pywikibot.input(
+                    u'How many files do you want to skip?'))
+            else:
+                skip_number = int(value)
+        elif option == '-wait':
+            if not value:
+                waitTime = int(pywikibot.input(
+                    u'How many time do you want to wait before checking the '
+                    u'files?'))
+            else:
+                waitTime = int(value)
+        elif option == '-page':
+            pywikibot.tools.issue_deprecation_warning('-page', '-imagesused', 
2)
+            genFactory.handleArg('-imagesused:' + value)
+        elif option in ('-url', 'fromurl'):
+            if option == '-url':
+                pywikibot.tools.issue_deprecation_warning('-url', '-fromurl', 
2)
+            url = value or pywikibot.input(
+                'Which url do you want to use for the regex?')
+            repeat = False
+        elif option == '-regex':
+            regexp = value or pywikibot.input(
+                'Which regex do you want to use?')
+            generator = 'regex'
+            repeat = False
+        elif option == '-newpages':
+            pywikibot.warning('-newpages option is default. It may be 
omitted.')
         else:
-            pywikibot.output(u"Retrieving the latest %d files for checking..."
-                             % limit)
+            genFactory.handleArg(arg)
+
+    if bool(url) ^ bool(regex):
+        pywikibot.warning('-regex option must be given with -url option.')
+        return False
+
+    if not generator:
+        generator = genFactory.getCombinedGenerator()
+
+    if not generator:
+        default = True
+    else:
+        repeat = False
+
+    skip = skip_number > 0
+
     while True:
         # Defing the Main Class.
         Bot = checkImagesBot(site, sendemailActive=sendemailActive,
                              duplicatesReport=duplicatesReport,
                              logFullError=logFullError)
-        if normal:
-            generator = pg.NewimagesPageGenerator(total=limit, site=site)
+
+        # Reading the log of the new images if another generator is not given.
+        if default:
+            if limit == 1:
+                pywikibot.output(u"Retrieving the latest file for checking...")
+            else:
+                pywikibot.output(
+                    'Retrieving the latest %d files for checking...' % limit)
+            generator = pagegenerators.NewimagesPageGenerator(total=limit,
+                                                              site=site)
         # if urlUsed and regexGen, get the source for the generator
         if urlUsed and regexGen:
-            textRegex = site.getUrl(regexPageUrl, no_hostname=True)
-        # Not an url but a wiki page as "source" for the regex
-        elif regexGen:
-            pageRegex = pywikibot.Page(site, regexPageName)
-            try:
-                textRegex = pageRegex.get()
-            except pywikibot.NoPage:
-                pywikibot.output(u"%s doesn't exist!" % pageRegex.title())
-                textRegex = ''  # No source, so the bot will quit later.
+            textRegex = site.getUrl(url, no_hostname=True)
         # If generator is the regex' one, use your own Generator using an url
         # or page and a regex.
         if generator == 'regex' and regexGen:

-- 
To view, visit https://gerrit.wikimedia.org/r/281224
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I9e076673d1b6077fda459603142af08843e19e1c
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

[MediaWiki-commits] [Gerrit] [WIP] use pagegenerators options for checkimages - change (pywikibot/core)

Reply via email to