Hello, Is there any script with pywikipedia to remove none exist images from pages? if there isn't, someone please commit this script which I attached to the mail, Thanks.

On 2011/09/20 09:31 ?.?, Mjbmr wrote:
Hello,

I'm Interested having commit access for pywikipedia, my last request in this <http://lists.wikimedia.org/pipermail/pywikipedia-l/2011-February/> archive, which no one finally answered me, I am active in the Persian community but as I am active in many wikis, some times I invite people to translate pywikipedia framework to their language and some times I make patch for pywikipedia like this <http://lists.wikimedia.org/pipermail/pywikipedia-l/2011-March/006768.html>and some one else will approve it <http://lists.wikimedia.org/pipermail/pywikipedia-l/2011-March/006769.html> , as I told you before I have bot which is using pywikipedia framework and I sent you some example of my written code which are using in wmf projects and another reason for getting this access is I would like to improve forgotten family files like this <http://svn.wikimedia.org/viewvc/pywikipedia/trunk/pywikipedia/families/uncyclopedia_family.py?view=markup> (read header) which is making bad edits in their projects like this <http://bg.oxypedia.net/index.php?diff=2882&oldid=1248&uselang=en> for example I tested that, there are several outdated namspaces in that family file which I can fix them, per these statements I request commit access for pywikipedia, if it's possible please approve me.


Thank you.


# -*- coding: utf-8 -*-
"""
This script can be used to remove none exist images from pages

Syntax: python remove_none_exist_images.py [options]

Command line options:

-summary:      Provide a custom edit summary.  If the summary includes spaces,
               surround it with single quotes, such as:
               -summary:'My edit summary'

-always        Don't prompt to make changes, just do them.

-namespace:n   Number of namespace to process. The parameter can be
               used multiple times.

Example:
       python remove_none_exist_images.py -always -namespace:0

"""
__version__ = '$Id:$'
import wikipedia as pywikibot
import pagegenerators, replace, re, query
#
# Distributed under the terms of the MIT license.
#
class remove_none_exist_images:
    """
    This robot will remeve none exist files from pages
    """
    # Edit Summary messages
    msg_remove={
        'en': u'Robot: Removing none exist file %s',
        'fa': u'ربات: حذف پرونده ناموجود %s',
    }

    def __init__(self,summary='',always=False,namespaces=[]):
        """
        Arguments:
            * summary     - A custom edit summary.
            * always      - Don't prompt to make changes, just do them.
            * namespaces  - Array number of namespaces to process.
        """
        self.always = always
        self.namespaces = namespaces
        mysite = pywikibot.getSite()
        self.site = mysite
        if summary:
            self.editSummary = summary
        else:
            self.editSummary = pywikibot.translate(mysite, self.msg_remove)

    def run(self):
        """
        Starts the robot's action.
        """
        params = {
            'action': 'query',
            'meta': 'allmessages',
            'ammessages': 'broken-file-category',
            'amenableparser': '',
        }
        self.categoryname = query.GetData(params, encodeTitle = True)
        self.categoryname = self.categoryname['query']['allmessages'][0]['*']
        pywikibot.output(u"Getting list of pages from category '%s' ..." % 
self.categoryname)
        params = {
            'action': 'query',
            'list': 'categorymembers',
            'cmlimit': 'max',
            'cmtitle': u'Category:%s' % self.categoryname,
        }

        if (self.namespaces!=[]):
            params['cmnamespace'] = '|'.join(self.namespaces)

        self.pageslist = query.GetData(params, encodeTitle = True)
        for pageitem in self.pageslist['query']['categorymembers']:
            params = {
                'action': 'query',
                'prop': 'images',
                'imlimit': 'max',
                'titles': pageitem['title'],
            }
            imagelist = query.GetData(params, encodeTitle = True)
            for image in imagelist['query']['pages'].values()[0]['images']:
                params = {
                    'action': 'query',
                    'prop': 'imageinfo',
                    'titles': image['title'],
                }
                imagesinfo = query.GetData(params, encodeTitle = True)
                for imageinfo in imagesinfo['query']['pages'].values():
                    site = self.site
                    imagename = re.match(r'(?:' + '|'.join(site.namespace(6, 
all = True))\
                    + ')\:(.*)', image['title']).group(1)
                    try:
                        if (imageinfo['missing']=="" and 
imageinfo['imagerepository']==""):
                            pywikibot.output("Removing image '%s' ..." % 
imagename)
                            self.remove_image(site,imagename)
                    except:
                        pywikibot.output("Skiping image '%s'..." % imagename)

    def remove_image(self,site,imagename):
        ImagePage = pywikibot.ImagePage(site,site.namespace(6)+':%s' % 
imagename)
        gen = pagegenerators.FileLinksGenerator(ImagePage)
        preloadingGen = pagegenerators.PreloadingGenerator(gen)
        if not site.nocapitalize:
            case = re.escape(imagename[0].upper() + imagename[0].lower())
            escaped = '[' + case + ']' + re.escape(imagename[1:])
        else:
            escaped = re.escape(imagename)
        escaped = re.sub('\\\\[_ ]', '[_ ]', escaped)
        for page in preloadingGen:
            if(self.namespaces == [] or page.namespace() in self.namespaces):
                try:
                    original_text = page.get()
                    new_text = re.sub(r'\[\[ *(?:' + '|'.join(site.namespace(6, 
all = True)) + ')\s*:\s*' \
                    + escaped + ' *(?:\|[^\n]+|) *\]\]',"",original_text)
                    new_text = re.sub(r'' + escaped,"",new_text)
                    if new_text == original_text:
                        pywikibot.output(u'No changes were necessary in %s' % 
page.title(asLink=True))
                    else:
                        pywikibot.output(u">>> %s <<<" % page.title())
                        pywikibot.showDiff(original_text, new_text)
                        if not self.always:
                            choice = pywikibot.inputChoice(
                                        u'Do you want to accept these changes?',
                                       ['Yes', 'No', 'All'],['y', 'N','a'], 'N')
                            if choice == 'y':
                                self.save_page(page, new_text, self.editSummary 
 % imagename)
                            if choice == 'a':
                                self.always = True
                        else:
                            self.save_page(page, new_text, self.editSummary  % 
imagename)
                except pywikibot.NoPage:
                    pywikibot.output(u'Page %s has been deleted.' % 
page.title())

    def save_page(self,page,text,summary):
        try:
            page.put(text, summary)
        except pywikibot.EditConflict:
            pywikibot.output(u'Skipping %s because of edit conflict'
                % (page.title(),))
        except pywikibot.SpamfilterError, e:
            pywikibot.output(u'Cannot change %s because of blacklist entry %s'
                % (page.title(), e.url))
        except pywikibot.PageNotSaved, error:
            pywikibot.output(u'Error putting page: %s'
                % (error.args,))
        except pywikibot.LockedPage:
            pywikibot.output(u'Skipping %s (locked page)'
                % (page.title(),))

def main():
    summary = ''
    always = False
    namespaces = []
    # read command line parameters
    for arg in pywikibot.handleArgs():
        if arg == '-always':
            always = True
        elif arg.startswith('-summary'):
            if len(arg) == len('-summary'):
                summary = pywikibot.input(u'Choose an edit summary: ')
            else:
                summary = arg[len('-summary:'):]
        elif arg.startswith('-namespace:'):
            try:
                namespaces.append(int(arg[11:]))
            except ValueError:
                namespaces.append(arg[11:])
        elif arg.startswith('-ns:'):
            try:
                namespaces.append(int(arg[4:]))
            except ValueError:
                namespaces.append(arg[4:])

    bot = remove_none_exist_images(summary,always,namespaces)
    bot.run()

if __name__ == "__main__":
    try:
        main()
    finally:
        pywikibot.stopme()
_______________________________________________
Pywikipedia-l mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-l

Reply via email to