Xqt has uploaded a new change for review.
https://gerrit.wikimedia.org/r/281224
Change subject: [WIP] use pagegenerators options for checkimages
......................................................................
[WIP] use pagegenerators options for checkimages
DO NOT SUBMIT YET
Bug: T76429
Bug: T100628
Change-Id: I9e076673d1b6077fda459603142af08843e19e1c
---
M pywikibot/data/api.py
M pywikibot/pagegenerators.py
M scripts/checkimages.py
3 files changed, 133 insertions(+), 153 deletions(-)
git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core
refs/changes/24/281224/1
diff --git a/pywikibot/data/api.py b/pywikibot/data/api.py
index c3f1ddf..c50964d 100644
--- a/pywikibot/data/api.py
+++ b/pywikibot/data/api.py
@@ -2625,7 +2625,7 @@
self.api_limit),
_logger)
- def set_namespace(self, namespaces):
+ def set_namespace(self, namespaces, quiet=False):
"""Set a namespace filter on this query.
@param namespaces: namespace identifiers to limit query results
@@ -2633,6 +2633,9 @@
or a single instance of those types. May be a '|' separated
list of namespace identifiers. An empty iterator clears any
namespace restriction.
+ @param quiet: Don't show a warning if module does not support
+ a namespace parameter
+ @type quiet: bool
@raises KeyError: a namespace identifier was not resolved
@raises TypeError: a namespace identifier has an inappropriate
type such as NoneType or bool, or more than one namespace
@@ -2642,8 +2645,9 @@
param = self.site._paraminfo.parameter('query+' + self.limited_module,
'namespace')
if not param:
- pywikibot.warning(u'{0} module does not support a namespace '
- 'parameter'.format(self.limited_module))
+ if not quiet:
+ pywikibot.warning(u'{0} module does not support a namespace '
+ 'parameter'.format(self.limited_module))
return
if isinstance(namespaces, basestring):
diff --git a/pywikibot/pagegenerators.py b/pywikibot/pagegenerators.py
index 224f93a..5ac5ad6 100644
--- a/pywikibot/pagegenerators.py
+++ b/pywikibot/pagegenerators.py
@@ -363,7 +363,7 @@
# When not in intersect mode, _filter_unique could be:
# functools.partial(filter_unique, container=global_seen_list)
- def __init__(self, site=None, positional_arg_name=None):
+ def __init__(self, site=None, positional_arg_name=None, ns=None):
"""
Constructor.
@@ -372,9 +372,17 @@
@param positional_arg_name: generator to use for positional args,
which do not begin with a hyphen
@type positional_arg_name: basestring
+ @param namespaces: list of namespace numbers
"""
self.gens = []
- self._namespaces = []
+ self._site = site
+ if ns is None:
+ self._namespaces = []
+ self.default_ns_setting = False
+ else:
+ self._namespaces = ns
+ self.namespaces # change the storage to immutable
+ self.default_ns_setting = True
self.limit = None
self.qualityfilter_list = []
self.articlefilter_list = []
@@ -383,7 +391,6 @@
self.catfilter_list = []
self.intersect = False
self.subpage_max_depth = None
- self._site = site
self._positional_arg_name = positional_arg_name
self._sparql = None
@@ -439,7 +446,8 @@
for i in range(len(self.gens)):
if isinstance(self.gens[i], pywikibot.data.api.QueryGenerator):
if self.namespaces:
- self.gens[i].set_namespace(self.namespaces)
+ self.gens[i].set_namespace(
+ self.namespaces, quiet=self.default_ns_setting)
if self.limit:
self.gens[i].set_maximum_items(self.limit)
else:
@@ -752,7 +760,8 @@
elif arg == '-page':
if not value:
value = pywikibot.input(u'What page do you want to use?')
- gen = [pywikibot.Page(pywikibot.Link(value, self.site))]
+ ns = list(self.namespaces)[0] if len(self.namespaces) == 1 else 0
+ gen = [pywikibot.Page(pywikibot.Link(value, self.site, ns))]
elif arg == '-uncatfiles':
gen = UnCategorizedImageGenerator(site=self.site)
elif arg == '-uncatcat':
diff --git a/scripts/checkimages.py b/scripts/checkimages.py
index 4a9a344..348065a 100755
--- a/scripts/checkimages.py
+++ b/scripts/checkimages.py
@@ -13,6 +13,8 @@
This script understands the following command-line arguments:
+¶ms;
+
-limit The number of images to check (default: 80)
-commons The Bot will check if an image on Commons has the same name
@@ -31,22 +33,13 @@
-sleep[:#] Time in seconds between repeat runs (default: 30)
--time[:#] The -time option is deprecated. Use -sleep instead.
-
-wait[:#] Wait x second before check the images (default: 0)
-skip[:#] The bot skip the first [:#] images (default: 0)
--start[:#] Use allpages() as generator
- (it starts already from File:[:#])
+-regex[:#] Use regex, must be used with -fromurl
--cat[:#] Use a category as generator
-
--regex[:#] Use regex, must be used with -url or -page
-
--page[:#] Define the name of the wikipage where are the images
-
--url[:#] Define the url where are the images
+-fromurl[:#] Define the url where are the images
-nologerror If given, this option will disable the error that is risen
when the log is full.
@@ -97,10 +90,11 @@
import pywikibot
from pywikibot import i18n
-from pywikibot import pagegenerators as pg
+from pywikibot import pagegenerators
from pywikibot.exceptions import NotEmailableError
from pywikibot.family import Family
+from pywikibot.site import Namespace
from pywikibot.tools import deprecated, StringTypes
###############################################################################
@@ -509,6 +503,10 @@
# END OF CONFIGURATION.
+docuReplacements = {
+ '¶ms;': pagegenerators.parameterHelp,
+}
+
SETTINGS_REGEX = re.compile(r"""
<-------\ ------->\n
\*[Nn]ame\ ?=\ ?['"](.*?)['"]\n
@@ -629,7 +627,8 @@
try:
resPutMex = self.tag_image(unver)
except pywikibot.NoPage:
- pywikibot.output(u"The page has been deleted! Skip!")
+ pywikibot.output(
+ 'The page does not exist on current site! Skip!')
break
except pywikibot.EditConflict:
pywikibot.output(u"Edit conflict! Skip!")
@@ -673,7 +672,8 @@
try:
reportPageText = reportPageObject.get()
except pywikibot.NoPage:
- pywikibot.output(u'%s has been deleted...' % self.imageName)
+ pywikibot.output(
+ '{0} does not exist on current site...'.format(self.imageName))
return
# You can use this function also to find only the user that
# has upload the image (FixME: Rewrite a bit this part)
@@ -1505,8 +1505,9 @@
try:
self.imageCheckText = self.image.get()
except pywikibot.NoPage:
- pywikibot.output(u"Skipping %s because it has been deleted."
- % self.imageName)
+ pywikibot.output(
+ 'Skipping {0} because it does not exist on current site.'
+ ''.format(self.imageName))
return
except pywikibot.IsRedirectPage:
pywikibot.output(u"Skipping %s because it's a redirect."
@@ -1593,9 +1594,9 @@
skip_number = 0 # How many images to skip before checking?
waitTime = 0 # How many time sleep before the check?
commonsActive = False # Is there's an image with the same name at commons?
- normal = False # Check the new images or use another generator?
- urlUsed = False # Use the url-related function instead of the new-pages
- regexGen = False # Use the regex generator
+ default = False # Default generator is new images
+ url = None # Use the url-related function instead of the new-pages
+ regex = None # Use the regex for url
duplicatesActive = False # Use the duplicate option
duplicatesReport = False # Use the duplicate-report option
sendemailActive = False # Use the send-email
@@ -1603,147 +1604,113 @@
generator = None
local_args = pywikibot.handle_args(args)
+ genFactory = pagegenerators.GeneratorFactory(ns=[Namespace.FILE])
site = pywikibot.Site()
- # Here below there are the local parameters.
- for arg in local_args:
- if arg.startswith('-limit'):
- if len(arg) == 6:
- limit = int(pywikibot.input(
- u'How many files do you want to check?'))
- else:
- limit = int(arg[7:])
- if arg.startswith('-sleep') or arg.startswith('-time'):
- if arg.startswith('-sleep'):
- length = len('-sleep')
- else:
- pywikibot.tools.issue_deprecation_warning('-time', '-sleep', 2)
- length = len('-time')
- if len(arg) == length:
- time_sleep = int(pywikibot.input(
- 'How many seconds do you want runs to be apart?'))
- else:
- time_sleep = int(arg[length + 1:])
- elif arg == '-break':
- repeat = False
- elif arg == '-nologerror':
- logFullError = False
- elif arg == '-commons':
- commonsActive = True
- elif arg.startswith('-duplicates'):
- duplicatesActive = True
- if len(arg) == 11:
- duplicates_rollback = 1
- elif len(arg) > 11:
- duplicates_rollback = int(arg[12:])
- elif arg == '-duplicatereport':
- duplicatesReport = True
- elif arg == '-sendemail':
- sendemailActive = True
- elif arg.startswith('-skip'):
- if len(arg) == 5:
- skip_number = int(pywikibot.input(
- u'How many files do you want to skip?'))
- elif len(arg) > 5:
- skip_number = int(arg[6:])
- elif arg.startswith('-wait'):
- if len(arg) == 5:
- waitTime = int(pywikibot.input(
- u'How many time do you want to wait before checking the '
- u'files?'))
- elif len(arg) > 5:
- waitTime = int(arg[6:])
- elif arg.startswith('-start'):
- if len(arg) == 6:
- firstPageTitle = pywikibot.input(
- u'From which page do you want to start?')
- elif len(arg) > 6:
- firstPageTitle = arg[7:]
- image = pywikibot.FilePage(site, firstPageTitle)
- firstPageTitle = image.title(withNamespace=False)
- generator = site.allpages(start=firstPageTitle,
- namespace=image.namespace())
- repeat = False
- elif arg.startswith('-page'):
- if len(arg) == 5:
- regexPageName = str(pywikibot.input(
- u'Which page do you want to use for the regex?'))
- elif len(arg) > 5:
- regexPageName = str(arg[6:])
- repeat = False
- regexGen = True
- elif arg.startswith('-url'):
- if len(arg) == 4:
- regexPageUrl = str(pywikibot.input(
- u'Which url do you want to use for the regex?'))
- elif len(arg) > 4:
- regexPageUrl = str(arg[5:])
- urlUsed = True
- repeat = False
- regexGen = True
- elif arg.startswith('-regex'):
- if len(arg) == 6:
- regexpToUse = str(pywikibot.input(
- u'Which regex do you want to use?'))
- elif len(arg) > 6:
- regexpToUse = str(arg[7:])
- generator = 'regex'
- repeat = False
- elif arg.startswith('-cat'):
- if len(arg) == 4:
- catName = str(pywikibot.input(u'In which category do I work?'))
- elif len(arg) > 4:
- catName = str(arg[5:])
- catSelected = pywikibot.Category(pywikibot.Site(),
- 'Category:%s' % catName)
- generator = catSelected.articles(namespaces=[6])
- repeat = False
- elif arg.startswith('-ref'):
- if len(arg) == 4:
- refName = str(pywikibot.input(
- u'The references of what page should I parse?'))
- elif len(arg) > 4:
- refName = str(arg[5:])
- ref = pywikibot.Page(pywikibot.Site(), refName)
- generator = ref.getReferences(namespaces=[6])
- repeat = False
- if not generator:
- normal = True
-
- skip = skip_number > 0
-
- # A little block-statement to ensure that the bot will not start with
- # en-parameters
+ # A little block-statement to ensure that the bot will only start
+ # if the script is localized for a given site.
if site.code not in project_inserted:
pywikibot.output(u"Your project is not supported by this script.\n"
u"You have to edit the script and add it!")
return False
- # Reading the log of the new images if another generator is not given.
- if normal:
- if limit == 1:
- pywikibot.output(u"Retrieving the latest file for checking...")
+ # Here below there are the local parameters.
+ for arg in local_args:
+ option, sep, value = arg.partition(':')
+ if option == '-limit':
+ if not value:
+ limit = int(pywikibot.input(
+ u'How many files do you want to check?'))
+ else:
+ limit = int(value)
+ if option in ('-sleep', '-time'):
+ if option == '-time':
+ pywikibot.tools.issue_deprecation_warning('-time', '-sleep', 2)
+ if not value:
+ time_sleep = int(pywikibot.input(
+ 'How many seconds do you want runs to be apart?'))
+ else:
+ time_sleep = int(value)
+ elif option == '-break':
+ repeat = False
+ elif option == '-nologerror':
+ logFullError = False
+ elif option == '-commons':
+ commonsActive = True
+ elif option == '-duplicates':
+ duplicatesActive = True
+ if not value:
+ duplicates_rollback = 1
+ else:
+ duplicates_rollback = int(value)
+ elif option == '-duplicatereport':
+ duplicatesReport = True
+ elif option == '-sendemail':
+ sendemailActive = True
+ elif option == '-skip':
+ if not value:
+ skip_number = int(pywikibot.input(
+ u'How many files do you want to skip?'))
+ else:
+ skip_number = int(value)
+ elif option == '-wait':
+ if not value:
+ waitTime = int(pywikibot.input(
+ u'How many time do you want to wait before checking the '
+ u'files?'))
+ else:
+ waitTime = int(value)
+ elif option == '-page':
+ pywikibot.tools.issue_deprecation_warning('-page', '-imagesused',
2)
+ genFactory.handleArg('-imagesused:' + value)
+ elif option in ('-url', 'fromurl'):
+ if option == '-url':
+ pywikibot.tools.issue_deprecation_warning('-url', '-fromurl',
2)
+ url = value or pywikibot.input(
+ 'Which url do you want to use for the regex?')
+ repeat = False
+ elif option == '-regex':
+ regexp = value or pywikibot.input(
+ 'Which regex do you want to use?')
+ generator = 'regex'
+ repeat = False
+ elif option == '-newpages':
+ pywikibot.warning('-newpages option is default. It may be
omitted.')
else:
- pywikibot.output(u"Retrieving the latest %d files for checking..."
- % limit)
+ genFactory.handleArg(arg)
+
+ if bool(url) ^ bool(regex):
+ pywikibot.warning('-regex option must be given with -url option.')
+ return False
+
+ if not generator:
+ generator = genFactory.getCombinedGenerator()
+
+ if not generator:
+ default = True
+ else:
+ repeat = False
+
+ skip = skip_number > 0
+
while True:
# Defing the Main Class.
Bot = checkImagesBot(site, sendemailActive=sendemailActive,
duplicatesReport=duplicatesReport,
logFullError=logFullError)
- if normal:
- generator = pg.NewimagesPageGenerator(total=limit, site=site)
+
+ # Reading the log of the new images if another generator is not given.
+ if default:
+ if limit == 1:
+ pywikibot.output(u"Retrieving the latest file for checking...")
+ else:
+ pywikibot.output(
+ 'Retrieving the latest %d files for checking...' % limit)
+ generator = pagegenerators.NewimagesPageGenerator(total=limit,
+ site=site)
# if urlUsed and regexGen, get the source for the generator
if urlUsed and regexGen:
- textRegex = site.getUrl(regexPageUrl, no_hostname=True)
- # Not an url but a wiki page as "source" for the regex
- elif regexGen:
- pageRegex = pywikibot.Page(site, regexPageName)
- try:
- textRegex = pageRegex.get()
- except pywikibot.NoPage:
- pywikibot.output(u"%s doesn't exist!" % pageRegex.title())
- textRegex = '' # No source, so the bot will quit later.
+ textRegex = site.getUrl(url, no_hostname=True)
# If generator is the regex' one, use your own Generator using an url
# or page and a regex.
if generator == 'regex' and regexGen:
--
To view, visit https://gerrit.wikimedia.org/r/281224
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: newchange
Gerrit-Change-Id: I9e076673d1b6077fda459603142af08843e19e1c
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits