imagecopy_self.py

multichill Sun, 08 Aug 2010 07:58:51 -0700

Revision: 8389
Author:   multichill
Date:     2010-08-08 14:58:29 +0000 (Sun, 08 Aug 2010)


Log Message:
-----------
Better name

Added Paths:
-----------
    trunk/pywikipedia/imagecopy_self.py

Copied: trunk/pywikipedia/imagecopy_self.py (from rev 8388, 
trunk/pywikipedia/imagecopy_enwp.py)
===================================================================
--- trunk/pywikipedia/imagecopy_self.py                         (rev 0)
+++ trunk/pywikipedia/imagecopy_self.py 2010-08-08 14:58:29 UTC (rev 8389)
@@ -0,0 +1,721 @@
+# -*- coding: utf-8 -*-
+"""
+Script to copy self published files from the English Wikipedia to Wikimedia 
Commons.
+
+This bot is based on imagecopy.py and intended to be used to empty out 
http://en.wikipedia.org/wiki/Category:Self-published_work
+
+This bot uses a graphical interface and may not work from commandline
+only environment.
+
+Examples
+
+Work on a single file
+ python imagecopy.py -page:file:<filename>
+Work on all images in a category:<cat>
+ python imagecopy.py -cat:<cat>
+Work on all images which transclude a template
+ python imagecopy.py -transcludes:<template>
+
+See pagegenerators.py for more ways to get a list of images.
+By default the bot works on your home wiki (set in user-config)
+
+This is a first test version and should be used with care.
+
+Use -nochecktemplate if you don't want to add the check template. Be sure to 
check it yourself.
+
+Todo:
+*Queues with threads have to be implemented for the information collecting 
part and for the upload part.
+*Categories are now on a single line. Something like hotcat would be nice.
+
+"""
+#
+# Based on upload.py by:
+# (C) Rob W.W. Hooft, Andre Engels 2003-2007
+# (C) Wikipedian, Keichwa, Leogregianin, Rikwade, Misza13 2003-2007
+#
+# New bot by:
+# (C) Kyle/Orgullomoore, Siebrand Mazeland 2007
+#
+# Another rewrite by:
+#  (C) Multichill 2008
+#
+# English Wikipedia specific bot by:
+#  (C) Multichill 2010
+#
+# Distributed under the terms of the MIT license.
+#
+__version__='$Id$'
+#
+
+from Tkinter import *
+import os, sys, re, codecs
+import urllib, httplib, urllib2
+import webbrowser
+from Queue import Queue
+import time, threading
+import wikipedia, config, socket
+import pagegenerators, add_text
+import imagerecat
+from datetime import datetime
+from upload import *
+from image import *
+NL=''
+
+nowCommonsTemplate = {
+    'en': 
u'{{NowCommons|1=File:%s|date=~~~~~|reviewer={{subst:REVISIONUSER}}}}',
+}
+
+nowCommonsMessage = {
+    'en': u'File is now available on Wikimedia Commons.',
+}
+
+moveToCommonsTemplate = {
+    'en': [u'Commons ok', u'Copy to Wikimedia Commons', u'Move to commons', 
u'Movetocommons', u'To commons', u'Copy to Wikimedia Commons by BotMultichill'],
+}
+
+imageMoveMessage = {
+    'en': u'[[:File:%s|File]] moved to [[:commons:File:%s|commons]].',
+}
+
+skipTemplates = [u'Db-f1',
+                 u'Db-f2',
+                 u'Db-f3',
+                 u'Db-f7',
+                 u'Db-f8',
+                 u'Db-f9',
+                 u'Db-f10',
+                 u'NowCommons',
+                 u'CommonsNow',
+                 u'Nowcommons',
+                 u'NowCommonsThis',
+                 u'Nowcommons2',
+                 u'NCT',
+                 u'Nowcommonsthis',
+                 u'Moved to commons',
+                 u'Now Commons',
+                 u'Now at commons',
+                 u'Db-nowcommons',
+                 u'WikimediaCommons',
+                 u'Now commons',
+                 u'Di-no source',
+                 u'Di-no license',
+                 u'Di-no permission',
+                 u'Di-orphaned fair use',
+                 u'Di-no source no license',
+                 u'Di-replaceable fair use',
+                 u'Di-no fair use rationale',
+                 u'Di-disputed fair use rationale',
+                 u'Puf',
+                 u'PUI',
+                 u'Pui',
+                 u'Ffd',
+                 u'PD-user', # Only the self templates are supported for now.
+                 ]
+                 
+
+licenseTemplates = [(u'\{\{(self|self2)\|([^\}]+)\}\}', 
u'{{Self|\\2|author=[[:%(lang)s:User:%(author)s|%(author)s]] at 
[http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'),
+                    
(u'\{\{(GFDL-self|GFDL-self-no-disclaimers)\|([^\}]+)\}\}', 
u'{{Self|GFDL|\\2|author=[[:%(lang)s:User:%(author)s|%(author)s]] at 
[http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'),
+                    (u'\{\{GFDL-self-with-disclaimers\|([^\}]+)\}\}', 
u'{{Self|GFDL-with-disclaimers|\\1|author=[[:%(lang)s:User:%(author)s|%(author)s]]
 at [http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'),
+                    (u'\{\{PD-self(\|date=[^\}]+)?\}\}', 
u'{{PD-user-w|%(lang)s|%(family)s|%(author)s}}'),
+                    #Multilicense replacing placeholder
+                    (u'\{\{Multilicense replacing placeholder 
new(\|class=[^\}]+)?\}\}', 
u'{{Self|GFDL|Cc-by-sa-3.0,2.5,2.0,1.0|author=[[:%(lang)s:User:%(author)s|%(author)s]]
 at [http://%(lang)s.%(family)s.org %(lang)s.%(family)s]}}'),
+                    ]
+
+sourceGarbage =     [u'==\s*Summary\s*==',
+                     u'==\s*Licensing:?\s*==',
+                     u'\{\{(Copy to Wikimedia Commons|Move to Commons|Move to 
commons|Move to Wikimedia Commons|Copy to 
commons|Mtc|MtC|MTC|CWC|CtWC|CTWC|Ctwc|Tocommons|Copy to Commons|To 
Commons|Movetocommons|Move to Wikimedia commons|Move-to-commons|Commons 
ok|ToCommons|To commons|MoveToCommons|Copy to wikimedia commons|Upload to 
commons|CopyToCommons|Copytocommons|MITC|MovetoCommons|Do move to Commons)\}\}'
+                    ]
+
+class Tkdialog:
+    def __init__(self, imagepage, description, date, source, author, 
licensetemplate, categories):
+        self.root=Tk()
+        #"%dx%d%+d%+d" % (width, height, xoffset, yoffset)
+        #Always appear the same size and in the bottom-left corner
+        self.root.geometry("1000x400+100-100")
+        self.root.title(imagepage.titleWithoutNamespace())
+
+
+        self.url=imagepage.permalink()
+        self.scrollbar=Scrollbar(self.root, orient=VERTICAL)
+
+        self.old_description=Text(self.root)
+        self.old_description.insert(END, imagepage.get().encode('utf-8'))
+        self.old_description.config(state=DISABLED, height=8, width=140, 
padx=0, pady=0, wrap=WORD, yscrollcommand=self.scrollbar.set)
+
+        self.scrollbar.config(command=self.old_description.yview)
+
+        self.filename = imagepage.titleWithoutNamespace()
+
+        self.description = description
+        self.date = date
+        self.source = source
+        self.author = author 
+        self.licensetemplate = licensetemplate
+        self.categories = categories
+        self.skip = False
+
+        self.old_description_label=Label(self.root,text=u'The old description 
was : ')
+        self.new_description_label=Label(self.root,text=u'The new fields are : 
')
+        self.filename_label=Label(self.root,text=u'Filename : ')
+        self.information_description_label=Label(self.root,text=u'Description 
: ')
+        self.information_date_label=Label(self.root,text=u'Date : ')
+        self.information_source_label=Label(self.root,text=u'Source : ')
+        self.information_author_label=Label(self.root,text=u'Author : ')
+        self.information_licensetemplate_label=Label(self.root,text=u'License 
: ')
+        self.information_categories_label=Label(self.root,text=u'Categories : 
')
+
+        self.filename_field=Entry(self.root)
+        self.information_description=Entry(self.root)
+        self.information_date=Entry(self.root)
+        self.information_source=Entry(self.root)
+        self.information_author=Entry(self.root)
+        self.information_licensetemplate=Entry(self.root)
+        self.information_categories=Entry(self.root)
+
+        self.field_width=120
+
+        self.filename_field.config(width=self.field_width)
+        self.information_description.config(width=self.field_width)
+        self.information_date.config(width=self.field_width)
+        self.information_source.config(width=self.field_width)
+        self.information_author.config(width=self.field_width)
+        self.information_licensetemplate.config(width=self.field_width)
+        self.information_categories.config(width=self.field_width)
+
+
+        self.filename_field.insert(0, self.filename)
+        self.information_description.insert(0, self.description)
+        self.information_date.insert(0, self.date)
+        self.information_source.insert(0, self.source)
+        self.information_author.insert(0, self.author)
+        self.information_licensetemplate.insert(0, self.licensetemplate)
+        self.information_categories.insert(0, self.categories)
+
+        self.browserButton=Button(self.root, text='View in browser', 
command=self.openInBrowser)
+        self.skipButton=Button(self.root, text="Skip", command=self.skipFile)
+        self.okButton=Button(self.root, text="OK", command=self.okFile)
+
+        ##Start grid
+        self.old_description_label.grid(row=0, column=0, columnspan=3)
+
+        self.old_description.grid(row=1, column=0, columnspan=3)
+        self.scrollbar.grid(row=1, column=3)
+        self.new_description_label.grid(row=2, column=0, columnspan=3)
+        
+        self.filename_label.grid(row=3, column=0)
+        self.information_description_label.grid(row=4, column=0)
+        self.information_date_label.grid(row=5, column=0)
+        self.information_source_label.grid(row=6, column=0)
+        self.information_author_label.grid(row=7, column=0)
+        self.information_licensetemplate_label.grid(row=8, column=0)
+        self.information_categories_label.grid(row=9, column=0)
+
+        self.filename_field.grid(row=3, column=1, columnspan=3)
+        self.information_description.grid(row=4, column=1, columnspan=3)
+        self.information_date.grid(row=5, column=1, columnspan=3)
+        self.information_source.grid(row=6, column=1, columnspan=3)
+        self.information_author.grid(row=7, column=1, columnspan=3)
+        self.information_licensetemplate.grid(row=8, column=1, columnspan=3)
+        self.information_categories.grid(row=9, column=1, columnspan=3)
+
+        self.okButton.grid(row=10, column=3, rowspan=2)
+        self.skipButton.grid(row=10, column=2, rowspan=2)
+        self.browserButton.grid(row=10, column=1, rowspan=2)
+
+    def okFile(self):
+        '''
+        The user pressed the OK button.
+        '''
+        self.filename=self.filename_field.get()
+        self.description=self.information_description.get()
+        self.date=self.information_date.get()
+        self.source=self.information_source.get()
+        self.author=self.information_author.get()
+        self.licensetemplate=self.information_licensetemplate.get()
+        self.categories=self.information_categories.get()
+        
+        self.root.destroy()
+
+    def skipFile(self):
+        '''
+        The user pressed the Skip button.
+        '''
+        self.skip=1
+        self.root.destroy()
+
+    def openInBrowser(self):
+        '''
+        The user pressed the View in browser button.
+        '''
+        webbrowser.open(self.url)
+
+    def add2autoskip(self):
+        '''
+        The user pressed the Add to AutoSkip button.
+        '''
+        templateid=int(self.templatelist.curselection()[0])
+        template=self.templatelist.get(templateid)
+        toadd=codecs.open(archivo, 'a', 'utf-8')
+        toadd.write('{{'+template)
+        toadd.close()
+        self.skipFile()
+
+    def getnewmetadata(self):
+        '''
+        Activate the dialog and return the new name and if the image is 
skipped.
+        '''
+        self.root.mainloop()
+        return (self.filename, self.description, self.date, self.source, 
self.author, self.licensetemplate, self.categories, self.skip)
+
+
+class imageFetcher(threading.Thread):
+    '''
+    Tries to fetch information for all images in the generator
+    '''
+    def __init__ ( self, pagegenerator, prefetchQueue):
+        self.pagegenerator = pagegenerator
+        self.prefetchQueue = prefetchQueue
+        imagerecat.initLists()
+        threading.Thread.__init__ ( self )
+
+    def run(self):
+        for page in self.pagegenerator:
+            self.processImage(page)
+        self.prefetchQueue.put(None)
+        wikipedia.output(u'Fetched all images.')
+        return True
+
+    def processImage(self, page):
+        '''
+        Work on a single image
+        '''
+        if page.exists() and (page.namespace() == 6) and (not 
page.isRedirectPage()):
+            imagepage = wikipedia.ImagePage(page.site(), page.title())
+
+            #First do autoskip.
+            if self.doiskip(imagepage):
+                wikipedia.output(u'Skipping %s : Got a template on the skip 
list.' % page.title())
+                return False
+            
+            text = imagepage.get()
+            foundMatch = False
+            for (regex, replacement) in licenseTemplates:
+                match = re.search(regex, text, re.IGNORECASE)
+                if match:
+                    foundMatch = True
+            if not foundMatch:
+                wikipedia.output(u'Skipping %s : No suitable license template 
was found.' % page.title())
+                return False
+            self.prefetchQueue.put(self.getNewFields(imagepage))
+
+    def doiskip(self, imagepage):
+        '''
+        Skip this image or not.
+        Returns True if the image is on the skip list, otherwise False
+        '''
+        for template in imagepage.templates():
+            if template in skipTemplates:
+                wikipedia.output(u'Found ' + template + u' which is on the 
template skip list')
+                return True
+        return False
+
+    def getNewFields(self, imagepage):
+        '''
+        Build a new description based on the imagepage
+        '''
+        if u'{{Information' in imagepage.get() or u'{{information' in 
imagepage.get():
+            (description, date, source, author) = 
self.getNewFieldsFromInformation(imagepage)       
+        else:
+            (description, date, source, author) = 
self.getNewFieldsFromFreetext(imagepage)
+
+        licensetemplate = self.getNewLicensetemplate(imagepage)
+        categories = self.getNewCategories(imagepage)
+        return (imagepage, description, date, source, author, licensetemplate, 
categories)
+
+    def getNewFieldsFromInformation(self, imagepage):
+        '''
+        Try to extract fields from the current information template for the 
new information template.
+        '''
+        description = u''
+        date = u''
+        source = u''
+        author = u''
+        permission = u''
+        other_versions = u''
+        text = imagepage.get()
+        # Need to add the permission field
+        # Need to use pywikipedia template parser code
+        regexes 
=[u'\{\{Information[\s\r\n]*\|[\s\r\n]*description[\s\r\n]*=(?P<description>.*)\|[\s\r\n]*source[\s\r\n]*=(?P<source>.*)\|[\s\r\n]*date[\s\r\n]*=(?P<date>.*)\|[\s\r\n]*author[\s\r\n]*=(?P<author>.*)\|[\s\r\n]*permission.*=(?P<permission>[^\}]*)\|[\s\r\n]*other_versions.*=(?P<other_versions>[^\}]*)\}\}',
+                  
u'\{\{Information[\s\r\n]*\|[\s\r\n]*description[\s\r\n]*=(?P<description>.*)\|[\s\r\n]*source[\s\r\n]*=(?P<source>.*)\|[\s\r\n]*date[\s\r\n]*=(?P<date>.*)\|[\s\r\n]*author[\s\r\n]*=(?P<author>.*)\|[\s\r\n]*other_versions.*=(?P<other_versions>[^\}]*)\}\}',
              
+                  ]
+                
+        for regex in regexes:
+            match =re.search(regex, text, re.IGNORECASE|re.DOTALL)
+            if match:
+                description = 
self.convertLinks(match.group(u'description').strip(), imagepage.site())
+                
+                date = match.group(u'date').strip()
+                if date == u'':
+                    date = self.getUploadDate(imagepage)
+
+                source = self.getSource(imagepage, 
source=self.convertLinks(match.group(u'source').strip(), imagepage.site()))
+
+                author = self.convertLinks(match.group(u'author').strip(), 
imagepage.site())
+                if author == u'':
+                    author = self.getAuthorText(imagepage)
+                
+                if u'permission' in match.groupdict():
+                    permission = 
self.convertLinks(match.group(u'permission').strip(), imagepage.site())
+                if  u'other_versions' in match.groupdict():
+                    other_versions = 
self.convertLinks(match.group(u'other_versions').strip(), imagepage.site())
+                # Return the stuff we found
+                return (description, date, source, author)
+        
+        #We didn't find anything, return the empty strings
+        return (description, date, source, author)
+
+    def getNewFieldsFromFreetext(self, imagepage):
+        '''
+        Try to extract fields from free text for the new information template.
+        '''
+        text = imagepage.get()
+        #text = re.sub(u'== Summary ==', u'', text, re.IGNORECASE)
+        #text = re.sub(u'== Licensing ==', u'', text, re.IGNORECASE)
+        #text = re.sub(u'\{\{(self|self2)\|[^\}]+\}\}', u'', text, 
re.IGNORECASE)
+
+        for toRemove in sourceGarbage:
+            text = re.sub(toRemove, u'', text, re.IGNORECASE)
+        
+        for (regex, repl) in licenseTemplates:
+            text = re.sub(regex, u'', text, re.IGNORECASE)
+
+        text = wikipedia.removeCategoryLinks(text, imagepage.site()).strip()
+            
+        description = self.convertLinks(text.strip(), imagepage.site())
+        date = self.getUploadDate(imagepage)
+        source = self.getSource(imagepage)
+        author = self.getAuthorText(imagepage)
+        return (description, date, source, author)
+
+    def getUploadDate(self, imagepage):
+        '''
+        Get the original upload date to put in the date field of the new 
information template. If we really have nothing better.
+        '''
+        uploadtime = imagepage.getFileVersionHistory()[-1][0]
+        uploadDatetime = datetime.strptime(uploadtime, u'%Y-%m-%dT%H:%M:%SZ')
+        return u'{{Date|' + str(uploadDatetime.year) + u'|' + 
str(uploadDatetime.month) + u'|' + str(uploadDatetime.day) + u'}} (original 
upload date)'
+
+    def getSource(self, imagepage, source=u''):
+        '''
+        Get the text to put in the source field of the new information 
template.
+        '''
+        site = imagepage.site()
+        lang = site.language()
+        family = site.family.name
+        if source==u'':
+            source=u'{{Own}}'
+            
+        return source.strip() + u'<BR />Transferred from 
[http://%(lang)s.%(family)s.org %(lang)s.%(family)s]' % {u'lang' : lang, 
u'family' : family}
+
+    def getAuthorText(self, imagepage):
+        '''
+        Get the original uploader to put in the author field of the new 
information template.
+        '''
+        site = imagepage.site()
+        lang = site.language()
+        family = site.family.name
+        
+        firstuploader = self.getAuthor(imagepage)
+        return u'[[:%(lang)s:User:%(firstuploader)s|%(firstuploader)s]] at 
[http://%(lang)s.%(family)s.org %(lang)s.%(family)s]' % {u'lang' : lang, 
u'family' : family , u'firstuploader' : firstuploader}
+
+    def getAuthor(self, imagepage):
+        '''
+        Get the first uploader.
+        '''
+        return imagepage.getFileVersionHistory()[-1][1].strip()
+
+    def convertLinks(self, text, sourceSite):
+        '''
+        Convert links from the current wiki to Commons.
+        '''
+        lang = sourceSite.language()
+        family = sourceSite.family.name
+        conversions =[(u'\[\[([^\[\]\|]+)\|([^\[\]\|]+)\]\]', 
u'[[:%(lang)s:\\1|\\2]]'),
+                      (u'\[\[([^\[\]\|]+)\]\]', u'[[:%(lang)s:\\1|\\1]]'),
+                      ]
+        
+        for (regex, replacement) in conversions:
+            text = re.sub(regex, replacement  % {u'lang' : lang, u'family' : 
family}, text)              
+
+        return text
+
+    def getNewLicensetemplate(self, imagepage):
+        '''
+        Get a license template to put on the image to be uploaded
+        '''
+        text = imagepage.get()
+        
+        site = imagepage.site()
+        lang = site.language()
+        family = site.family.name
+
+        result = u''   
+
+        for (regex, replacement) in licenseTemplates:
+            match = re.search(regex, text, re.IGNORECASE)
+            if match:
+                result = re.sub(regex, replacement, match.group(0), 
re.IGNORECASE)
+                return result % {u'author' : self.getAuthor(imagepage),
+                                 u'lang' : lang,
+                                 u'family' : family}
+            
+        return result
+        
+    def getNewCategories(self, imagepage):
+        '''
+        Get a categories for the image
+        Dont forget to filter
+        '''
+        result = u''
+        (commonshelperCats, usage, galleries) = 
imagerecat.getCommonshelperCats(imagepage)
+        newcats = imagerecat.applyAllFilters(commonshelperCats)
+        for newcat in newcats:
+            result = result + u'[[Category:' + newcat + u']] '
+        return result
+
+class userInteraction(threading.Thread):
+    '''
+    Prompt all images to the user.
+    '''
+    def __init__ ( self, prefetchQueue, uploadQueue):
+        self.prefetchQueue = prefetchQueue
+        self.uploadQueue = uploadQueue
+        threading.Thread.__init__ ( self )
+
+    def run(self):
+        while True:
+            fields = self.prefetchQueue.get()
+            if fields:
+                self.processImage(fields)
+            else:
+                break
+        self.uploadQueue.put(None)
+        wikipedia.output(u'User worked on all images.')
+        return True
+            
+    def processImage(self, fields):
+        '''
+        Work on a single image
+        '''
+        (imagepage, description, date, source, author, licensetemplate, 
categories) = fields
+        while True:
+            # Do the Tkdialog to accept/reject and change te name
+            (filename, description, date, source, author, licensetemplate, 
categories, skip)=Tkdialog(imagepage, description, date, source, author, 
licensetemplate, categories).getnewmetadata()
+
+            if skip:
+                wikipedia.output(u'Skipping %s : User pressed skip.' % 
imagepage.title())
+                return False
+                   
+            # Check if the image already exists
+            CommonsPage=wikipedia.Page(wikipedia.getSite('commons', 
'commons'), u'File:' + filename)
+            if not CommonsPage.exists():
+                break
+            else:
+                wikipedia.output('Image already exists, pick another name or 
skip this image')
+                # We dont overwrite images, pick another name, go to the start 
of the loop
+
+        self.uploadQueue.put((imagepage, filename, description, date, source, 
author, licensetemplate, categories))
+
+
+class uploader(threading.Thread):
+    '''
+    Upload all images
+    '''
+    def __init__ ( self, uploadQueue):
+        self.uploadQueue = uploadQueue
+        self.checktemplate = True
+        threading.Thread.__init__ ( self )
+
+    def run(self):
+        while True: #Change later
+            fields = self.uploadQueue.get()
+            if fields:
+                self.processImage(fields)
+            else:
+                break
+        return True
+
+    def nochecktemplate(self):
+        '''
+        Don't want to add {{BotMoveToCommons}}
+        '''
+        self.checktemplate = False
+        return
+    
+    def processImage(self, fields):
+        '''
+        Work on a single image
+        '''
+        (imagepage, filename, description, date, source, author, 
licensetemplate, categories) = fields
+        cid = self.buildNewImageDescription(imagepage, description, date, 
source, author, licensetemplate, categories)
+        wikipedia.output(cid)
+        bot = UploadRobot(url=imagepage.fileUrl(), description=cid, 
useFilename=filename, keepFilename=True, verifyDescription=False, ignoreWarning 
= True, targetSite = wikipedia.getSite('commons', 'commons'))
+        bot.run()
+        
+        self.tagNowcommons(imagepage, filename)
+        self.replaceUsage(imagepage, filename)
+       
+
+    def buildNewImageDescription(self, imagepage, description, date, source, 
author, licensetemplate, categories):
+        '''
+        Build a new information template 
+        '''
+        
+        site = imagepage.site()
+        lang = site.language()
+        family = site.family.name
+        
+        cid = u''
+        if self.checktemplate:
+            cid = cid + 
u'\n{{BotMoveToCommons|%(lang)s.%(family)s|year={{subst:CURRENTYEAR}}|month={{subst:CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}}}\n'
 % {u'lang' : lang, u'family' : family}
+        cid = cid + u'== {{int:filedesc}} ==\n'
+        cid = cid + u'{{Information\n'
+        cid = cid + u'|description={{%(lang)s|1=' % {u'lang' : lang, u'family' 
: family}
+        cid = cid + description + u'}}\n' 
+        cid = cid + u'|date=' + date + u'\n'
+        cid = cid + u'|source=' + source + u'\n'
+        cid = cid + u'|author=' + author + u'\n'
+        cid = cid + u'|permission=\n'
+        cid = cid + u'|other_versions=\n'
+        cid = cid + u'}}\n'
+        cid = cid + u'== {{int:license}} ==\n'
+        cid = cid + licensetemplate + u'\n'
+        cid = cid + u'\n'
+        cid = cid + self.getOriginalUploadLog(imagepage)
+        cid = cid + u'__NOTOC__\n'
+        if categories.strip()==u'':
+            cid = cid + u'{{Subst:Unc}}'
+        else:
+            cid = cid + categories
+        return cid
+
+    def getOriginalUploadLog(self, imagepage):
+        '''
+        Get the original upload log to put at the bottom of the image 
description page at Commons.
+        '''
+        filehistory = imagepage.getFileVersionHistory()
+        filehistory.reverse()
+
+        site = imagepage.site()
+        lang = site.language()
+        family = site.family.name
+
+        sourceimage = 
imagepage.site().get_address(imagepage.title()).replace(u'&redirect=no&useskin=monobook',
 u'')
+        
+        result = u'== {{Original upload log}} ==\n'
+        result = result + u'The original description page is/was 
[http://%(lang)s.%(family)s.org%(sourceimage)s here]. All following user names 
refer to %(lang)s.%(family)s.\n' % {u'lang' : lang, u'family' : family , 
u'sourceimage' : sourceimage}
+        for (timestamp, username, resolution, size, comment) in filehistory:
+            date = datetime.strptime(timestamp, 
u'%Y-%m-%dT%H:%M:%SZ').strftime('%Y-%m-%d %H:%M')
+            result = result + u'* %(date)s 
[[:%(lang)s:user:%(username)s|%(username)s]] %(resolution)s (%(size)s bytes) 
\'\'<nowiki>%(comment)s</nowiki>\'\'\n' % {
+                u'lang' : lang,
+                u'family' : family ,
+                u'date' : date,
+                u'username' : username,
+                u'resolution': resolution,
+                u'size': size,
+                u'comment' : comment}       
+            
+        return result
+
+    def tagNowcommons(self, imagepage, filename):
+        '''
+        Tagged the imag which has been moved to Commons for deletion.
+        '''
+        if wikipedia.Page(wikipedia.getSite('commons', 'commons'), u'File:' + 
filename).exists():
+            #Get a fresh copy, force to get the page so we dont run into edit 
conflicts
+            imtxt=imagepage.get(force=True)
+
+            #Remove the move to commons templates
+            if imagepage.site().language() in moveToCommonsTemplate:
+                for moveTemplate in 
moveToCommonsTemplate[imagepage.site().language()]:
+                    imtxt = re.sub(u'(?i)\{\{' + moveTemplate + u'[^\}]*\}\}', 
u'', imtxt)
+
+            #add {{NowCommons}}
+            if imagepage.site().language() in nowCommonsTemplate:
+                addTemplate = nowCommonsTemplate[imagepage.site().language()] 
% filename
+            else:
+                addTemplate = nowCommonsTemplate['_default'] % filename
+
+            if imagepage.site().language() in nowCommonsMessage:
+                commentText = nowCommonsMessage[imagepage.site().language()]
+            else:
+                commentText = nowCommonsMessage['_default']
+
+            wikipedia.showDiff(imagepage.get(), imtxt + addTemplate)
+            imagepage.put(imtxt + addTemplate, comment = commentText)
+
+    def replaceUsage(self, imagepage, filename):
+        '''
+        If the image is uploaded under a different name, replace all usage.
+        '''
+        if imagepage.titleWithoutNamespace() != filename:
+            gen = pagegenerators.FileLinksGenerator(imagepage)
+            preloadingGen = pagegenerators.PreloadingGenerator(gen)
+            
+            if imagepage.site().language() in imageMoveMessage:
+                moveSummary = imageMoveMessage[imagepage.site().language()] % 
(imagepage.titleWithoutNamespace(), filename)
+            else:
+                moveSummary = imageMoveMessage['_default'] % 
(imagepage.titleWithoutNamespace(), filename)
+            imagebot = ImageRobot(generator = preloadingGen, oldImage = 
imagepage.titleWithoutNamespace(), newImage = filename, summary = moveSummary, 
always = True, loose = True)
+            imagebot.run()   
+    
+
+def main(args):
+    wikipedia.output(u'WARNING: This is an experimental bot')
+    wikipedia.output(u'WARNING: It will only work on self published work 
images')
+    wikipedia.output(u'WARNING: This bot is still full of bugs')
+    wikipedia.output(u'WARNING: Use at your own risk!')
+
+    generator = None;
+    always = False
+    checkTemplate = True
+    
+    # Load a lot of default generators
+    genFactory = pagegenerators.GeneratorFactory()
+
+    for arg in wikipedia.handleArgs():
+        if arg == '-nochecktemplate':
+            checkTemplate = False
+        else:
+            genFactory.handleArg(arg)
+    
+    generator = genFactory.getCombinedGenerator()
+    if not generator:
+        raise add_text.NoEnoughData('You have to specify the generator you 
want to use for the script!')
+
+    pregenerator = pagegenerators.PreloadingGenerator(generator)
+
+    prefetchQueue = Queue(maxsize=50)
+    uploadQueue = Queue(maxsize=200)
+
+    imageFetcherThread = imageFetcher(pregenerator, prefetchQueue)
+    userInteractionThread = userInteraction(prefetchQueue, uploadQueue)
+    uploaderThread = uploader(uploadQueue)
+
+    imageFetcherThread.daemon=False
+    userInteractionThread.daemon=False
+    uploaderThread.daemon=False
+    
+    if not checkTemplate:
+        uploaderThread.nochecktemplate()
+    
+    fetchDone = imageFetcherThread.start()
+    userDone = userInteractionThread.start()
+    uploadDone = uploaderThread.start()
+
+
+if __name__ == "__main__":
+    try:
+        main(sys.argv[1:])
+    finally:
+        wikipedia.stopme()



_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn

[Pywikipedia-svn] SVN: [8389] trunk/pywikipedia/imagecopy_self.py

Reply via email to