Hello, Is there any script with pywikipedia to remove none exist images
from pages? if there isn't, someone please commit this script which I
attached to the mail, Thanks.
On 2011/09/20 09:31 ?.?, Mjbmr wrote:
Hello,
I'm Interested having commit access for pywikipedia, my last request
in this
<http://lists.wikimedia.org/pipermail/pywikipedia-l/2011-February/>
archive, which no one finally answered me, I am active in the Persian
community but as I am active in many wikis, some times I invite people
to translate pywikipedia framework to their language and some times I
make patch for pywikipedia like this
<http://lists.wikimedia.org/pipermail/pywikipedia-l/2011-March/006768.html>and
some one else will approve it
<http://lists.wikimedia.org/pipermail/pywikipedia-l/2011-March/006769.html>
, as I told you before I have bot which is using pywikipedia framework
and I sent you some example of my written code which are using in wmf
projects and another reason for getting this access is I would like to
improve forgotten family files like this
<http://svn.wikimedia.org/viewvc/pywikipedia/trunk/pywikipedia/families/uncyclopedia_family.py?view=markup>
(read header) which is making bad edits in their projects like this
<http://bg.oxypedia.net/index.php?diff=2882&oldid=1248&uselang=en> for
example I tested that, there are several outdated namspaces in that
family file which I can fix them, per these statements I request
commit access for pywikipedia, if it's possible please approve me.
Thank you.
# -*- coding: utf-8 -*-
"""
This script can be used to remove none exist images from pages
Syntax: python remove_none_exist_images.py [options]
Command line options:
-summary: Provide a custom edit summary. If the summary includes spaces,
surround it with single quotes, such as:
-summary:'My edit summary'
-always Don't prompt to make changes, just do them.
-namespace:n Number of namespace to process. The parameter can be
used multiple times.
Example:
python remove_none_exist_images.py -always -namespace:0
"""
__version__ = '$Id:$'
import wikipedia as pywikibot
import pagegenerators, replace, re, query
#
# Distributed under the terms of the MIT license.
#
class remove_none_exist_images:
"""
This robot will remeve none exist files from pages
"""
# Edit Summary messages
msg_remove={
'en': u'Robot: Removing none exist file %s',
'fa': u'ربات: ØØ°Ù پرÙÙØ¯Ù ÙØ§Ù
ÙØ¬Ùد %s',
}
def __init__(self,summary='',always=False,namespaces=[]):
"""
Arguments:
* summary - A custom edit summary.
* always - Don't prompt to make changes, just do them.
* namespaces - Array number of namespaces to process.
"""
self.always = always
self.namespaces = namespaces
mysite = pywikibot.getSite()
self.site = mysite
if summary:
self.editSummary = summary
else:
self.editSummary = pywikibot.translate(mysite, self.msg_remove)
def run(self):
"""
Starts the robot's action.
"""
params = {
'action': 'query',
'meta': 'allmessages',
'ammessages': 'broken-file-category',
'amenableparser': '',
}
self.categoryname = query.GetData(params, encodeTitle = True)
self.categoryname = self.categoryname['query']['allmessages'][0]['*']
pywikibot.output(u"Getting list of pages from category '%s' ..." %
self.categoryname)
params = {
'action': 'query',
'list': 'categorymembers',
'cmlimit': 'max',
'cmtitle': u'Category:%s' % self.categoryname,
}
if (self.namespaces!=[]):
params['cmnamespace'] = '|'.join(self.namespaces)
self.pageslist = query.GetData(params, encodeTitle = True)
for pageitem in self.pageslist['query']['categorymembers']:
params = {
'action': 'query',
'prop': 'images',
'imlimit': 'max',
'titles': pageitem['title'],
}
imagelist = query.GetData(params, encodeTitle = True)
for image in imagelist['query']['pages'].values()[0]['images']:
params = {
'action': 'query',
'prop': 'imageinfo',
'titles': image['title'],
}
imagesinfo = query.GetData(params, encodeTitle = True)
for imageinfo in imagesinfo['query']['pages'].values():
site = self.site
imagename = re.match(r'(?:' + '|'.join(site.namespace(6,
all = True))\
+ ')\:(.*)', image['title']).group(1)
try:
if (imageinfo['missing']=="" and
imageinfo['imagerepository']==""):
pywikibot.output("Removing image '%s' ..." %
imagename)
self.remove_image(site,imagename)
except:
pywikibot.output("Skiping image '%s'..." % imagename)
def remove_image(self,site,imagename):
ImagePage = pywikibot.ImagePage(site,site.namespace(6)+':%s' %
imagename)
gen = pagegenerators.FileLinksGenerator(ImagePage)
preloadingGen = pagegenerators.PreloadingGenerator(gen)
if not site.nocapitalize:
case = re.escape(imagename[0].upper() + imagename[0].lower())
escaped = '[' + case + ']' + re.escape(imagename[1:])
else:
escaped = re.escape(imagename)
escaped = re.sub('\\\\[_ ]', '[_ ]', escaped)
for page in preloadingGen:
if(self.namespaces == [] or page.namespace() in self.namespaces):
try:
original_text = page.get()
new_text = re.sub(r'\[\[ *(?:' + '|'.join(site.namespace(6,
all = True)) + ')\s*:\s*' \
+ escaped + ' *(?:\|[^\n]+|) *\]\]',"",original_text)
new_text = re.sub(r'' + escaped,"",new_text)
if new_text == original_text:
pywikibot.output(u'No changes were necessary in %s' %
page.title(asLink=True))
else:
pywikibot.output(u">>> %s <<<" % page.title())
pywikibot.showDiff(original_text, new_text)
if not self.always:
choice = pywikibot.inputChoice(
u'Do you want to accept these changes?',
['Yes', 'No', 'All'],['y', 'N','a'], 'N')
if choice == 'y':
self.save_page(page, new_text, self.editSummary
% imagename)
if choice == 'a':
self.always = True
else:
self.save_page(page, new_text, self.editSummary %
imagename)
except pywikibot.NoPage:
pywikibot.output(u'Page %s has been deleted.' %
page.title())
def save_page(self,page,text,summary):
try:
page.put(text, summary)
except pywikibot.EditConflict:
pywikibot.output(u'Skipping %s because of edit conflict'
% (page.title(),))
except pywikibot.SpamfilterError, e:
pywikibot.output(u'Cannot change %s because of blacklist entry %s'
% (page.title(), e.url))
except pywikibot.PageNotSaved, error:
pywikibot.output(u'Error putting page: %s'
% (error.args,))
except pywikibot.LockedPage:
pywikibot.output(u'Skipping %s (locked page)'
% (page.title(),))
def main():
summary = ''
always = False
namespaces = []
# read command line parameters
for arg in pywikibot.handleArgs():
if arg == '-always':
always = True
elif arg.startswith('-summary'):
if len(arg) == len('-summary'):
summary = pywikibot.input(u'Choose an edit summary: ')
else:
summary = arg[len('-summary:'):]
elif arg.startswith('-namespace:'):
try:
namespaces.append(int(arg[11:]))
except ValueError:
namespaces.append(arg[11:])
elif arg.startswith('-ns:'):
try:
namespaces.append(int(arg[4:]))
except ValueError:
namespaces.append(arg[4:])
bot = remove_none_exist_images(summary,always,namespaces)
bot.run()
if __name__ == "__main__":
try:
main()
finally:
pywikibot.stopme()_______________________________________________
Pywikipedia-l mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-l