checkimages.py

alexsh Sat, 17 Oct 2009 05:50:50 -0700

Revision: 7488
Author:   alexsh
Date:     2009-10-17 12:50:32 +0000 (Sat, 17 Oct 2009)


Log Message:
-----------
syntax cleanup and make easy to view

Modified Paths:
--------------
    trunk/pywikipedia/checkimages.py

Modified: trunk/pywikipedia/checkimages.py
===================================================================
--- trunk/pywikipedia/checkimages.py    2009-10-17 12:09:43 UTC (rev 7487)
+++ trunk/pywikipedia/checkimages.py    2009-10-17 12:50:32 UTC (rev 7488)
@@ -276,27 +276,16 @@
 
 # The message that the bot will add the second time that find another license 
problem.
 second_message_without_license = {
-                'commons':None,
-                'de':None,
-                'en': None,
-                'ga': None,
+                '_default':None,
                 'hu':u'\nSzia! Úgy tűnik a [[:Kép:%s]] képpel is hasonló a 
probléma, mint az előbbivel. Kérlek olvasd el a [[WP:KÉPLIC|feltölthető 
képek]]ről szóló oldalunk, és segítségért fordulj a [[WP:KF-JO|Jogi 
kocsmafalhoz]]. Köszönöm --~~~~',
                 
'it':u':{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Senza 
licenza2|%s|__botnick__}} --~~~~',
-                'ja':None,
-                'ta':None,
-                'zh':None,
                 }
 # You can add some settings to wikipedia. In this way, you can change them 
without touching the code.
 # That's useful if you are running the bot on Toolserver.
 page_with_settings = {
+                    '_default':None,
                     'commons':u'User:Filbot/Settings',
-                    'de':None,
-                    'en':None,
-                    'ga':None,
-                    'hu':None,
                     
'it':u'Progetto:Coordinamento/Immagini/Bot/Settings#Settings',
-                    'ja':None,
-                    'ta':None,
                     'zh':u"User:Alexbot/cisettings#Settings",
                     }
 # The bot can report some images (like the images that have the same name of 
an image on commons)
@@ -367,38 +356,27 @@
         }
 # A page where there's a list of template to skip.
 PageWithHiddenTemplates = {
+    '_default':None,
     'commons': u'User:Filbot/White_templates#White_templates',
-    'en':None,
-    'ga':None,
     'it':u'Progetto:Coordinamento/Immagini/Bot/WhiteTemplates',
-    'ja':None,
     'ko': u'User:Kwjbot_IV/whitetemplates/list',
-    'ta':None,
-    'zh':None,
     }
 
 # A page where there's a list of template to consider as licenses.
 PageWithAllowedTemplates = {
+    '_default':None,
     'commons': u'User:Filbot/Allowed templates',
-    'en':None,
-    'ga':None,
     'it':u'Progetto:Coordinamento/Immagini/Bot/AllowedTemplates',
     'ko':u'User:Kwjbot_IV/AllowedTemplates',
-    'ta':None,
     }
 
 # Template added when the bot finds only an hidden template and nothing else.
 # Note: every __botnick__ will be repleaced with your bot's nickname (feel 
free not to use if you don't need it)
 HiddenTemplateNotification = {
+        '_default':None,
         'commons': u"""\n{{subst:User:Filnik/whitetemplate|File:%s}}\n\n''This 
message was '''added automatically by [[User:__botnick__|__botnick__]]''', if 
you need some help about it, ask its master (~~~) or go to the [[Commons:Help 
desk]]''. --~~~~""",
-        'de'     : None,
-        'en'     : None,
-        'ga'     : None,
         'it'     : 
u"{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Template_insufficiente|%s|__botnick__}}
 --~~~~",
-        'ja'     :None,
         'ko'     : u"\n{{subst:User:Kwj2772/whitetemplates|%s}} --~~~~", 
-        'ta'     :None,
-        'zh'     :None,
         }
 
 # In this part there are the parameters for the dupe images.
@@ -406,63 +384,38 @@
 # Put here the template that you want to put in the image to warn that it's a 
dupe
 # put __image__ if you want only one image, __images__ if you want the whole 
list
 duplicatesText = {
+        '_default':None,
         'commons': u'\n{{Dupe|__image__}}',
-        'en'     : None,
-        'ga'     : None,
         'it'     : u'\n{{Progetto:Coordinamento/Immagini/Bot/Template 
duplicati|__images__}}',
-        'ja'     :None,
-        'ta'     :None,
-        'zh'     :None,
         }
 # Head of the message given to the author
 duplicate_user_talk_head = {
-        'commons': None,
-        'en'     : None, 
-        'ga'     : None, 
+        '_default':None,
         'it'     : u'\n\n== File doppio ==\n',
-        'ja'     :None,
-        'ta'     :None,
-        'zh'     :None,
         }
 # Message to put in the talk
 duplicates_user_talk_text = {
+        '_default':None,
         'commons': u'{{subst:User:Filnik/duplicates|File:%s|File:%s}}', # 
FIXME: it doesn't exist
-        'en'     : None,
-        'ga'     : None,
         'it'     : 
u"{{subst:Progetto:Coordinamento/Immagini/Bot/Messaggi/Duplicati|%s|%s|__botnick__}}
 --~~~~",
-        'ja'     :None,
-        'ta'     :None,
-        'zh'     :None,
         }
 # Comment used by the bot while it reports the problem in the uploader's talk
 duplicates_comment_talk = {
+        '_default':None,
         'commons': u'Bot: Dupe file found',
-        'en'     : None,
-        'ga'     : None,
         'it'     : u"Bot: Notifico il file doppio trovato",
-        'ja'     :None,
-        'ta'     :None,
-        'zh'     :None,
         }
 # Comment used by the bot while it reports the problem in the image
 duplicates_comment_image = {
+        '_default':None,
         'commons': u'Bot: Tagging dupe file',
-        'en'     : None,
-        'ga'     : None,
         'it'     : u'Bot: File doppio, da cancellare',
-        'ja'     :None,
-        'ta'     :None,
-        'zh'     :None,
         }
 # Regex to detect the template put in the image's decription to find the dupe
 duplicatesRegex = {
+        '_default':None,
         'commons': r'\{\{(?:[Tt]emplate:|)[Dd]upe[|}]',
-        'en'     : None,
-        'ga'     : None,
         'it'     : 
r'\{\{(?:[Tt]emplate:|)[Pp]rogetto:[Cc]oordinamento/Immagini/Bot/Template 
duplicati[|}]',
-        'ja'     :None,
-        'ta'     :None,
-        'zh'     :None,
         }
 # Category with the licenses and / or with subcategories with the other 
licenses.
 category_with_licenses = {
@@ -480,29 +433,21 @@
 ## is still None.
 # Page where is stored the message to send as email to the users
 emailPageWithText = {
+        '_default':None,
         'de':'Benutzer:ABF/D3',
-        'en':None,
-        'ga':None,
-        'ja':None,
-        'ta':None,
-        'zh':None,
         }
 # Title of the email
 emailSubject = {
+        '_default':None,
         'de':'Problemen mit Deinem Bild auf der Deutschen Wikipedia',
-        'en':None,
-        'ga':None,
-        'ja':None,
-        'ta':None,
-        'zh':None,
         }
 
 # Seems that uploaderBots aren't interested to get messages regarding the
 # files that they upload.. strange, uh?
 # Format: [[user,regex], [user,regex]...] the regex is needed to match the 
user where to send the warning-msg
 uploadBots = {
+        '_default':None,
         'commons':[['File Upload Bot (Magnus 
Manske)',r'\|[Ss]ource=Transferred from .*?; transferred to Commons by 
\[\[User:(.*?)\]\]']],
-        'en':None,
 }
 
 # Add your project (in alphabetical order) if you want that the bot start
@@ -525,7 +470,6 @@
     """ Function to print the messages followed by the TimeZone encoded 
correctly. """
     if message[-1] != ' ':
         message = '%s ' % unicode(message)
-    time_zone = time.strftime("%d %b %Y %H:%M:%S (UTC)", time.localtime())
     if locale.getlocale()[1]:
         time_zone = unicode(time.strftime(u"%d %b %Y %H:%M:%S (UTC)", 
time.gmtime()), locale.getlocale()[1])
     else:
@@ -534,7 +478,21 @@
 
 class Global(object):
     # default environment settings
-    pass
+    # Command line configurable parameters
+    repeat = True # Restart after having check all the images?
+    limit = 80 # How many images check?
+    time_sleep = 30 # How many time sleep after the check?
+    skip_number = 0 # How many images to skip before checking?
+    waitTime = 0 # How many time sleep before the check?
+    commonsActive = False # Check if on commons there's an image with the same 
name?
+    normal = False # Check the new images or use another generator?
+    urlUsed = False # Use the url-related function instead of the new-pages 
generator
+    regexGen = False # Use the regex generator
+    untagged = False # Use the untagged generator   
+    duplicatesActive = False # Use the duplicate option
+    duplicatesReport = False # Use the duplicate-report option
+    sendemailActive = False # Use the send-email
+    logFullError = True # Raise an error when the log is full
     
     
 # Here there is the main class.
@@ -543,38 +501,56 @@
                  duplicatesReport = False, logFullError = True):
         """ Constructor, define some global variable """
         self.site = site
+        
         self.logFullError = logFullError
+        
         self.logFulNumber = logFulNumber
+        
         self.settings = wikipedia.translate(self.site, page_with_settings)
+        
         self.rep_page = wikipedia.translate(self.site, report_page)
+        
         self.rep_text = wikipedia.translate(self.site, report_text)
+        
         self.com = wikipedia.translate(self.site, comm10)
+        
         hiddentemplatesRaw = wikipedia.translate(self.site, HiddenTemplate)
-        self.hiddentemplates = list()
-        for template in hiddentemplatesRaw:
-            self.hiddentemplates.append(wikipedia.Page(self.site, template))
+        
+        self.hiddentemplates = [wikipedia.Page(self.site, tmp) for tmp in 
hiddentemplatesRaw]
+        
         self.pageHidden = wikipedia.translate(self.site, 
PageWithHiddenTemplates)
+        
         self.pageAllowed = wikipedia.translate(self.site, 
PageWithAllowedTemplates)        
         # Commento = Summary in italian
         self.commento = wikipedia.translate(self.site, comm)
         # Adding the bot's nickname at the notification text if needed.
         botolist = wikipedia.translate(self.site, bot_list)
+        
         project = wikipedia.getSite().family.name
+        
         self.project = project
+        
         bot = config.usernames[project]
         try:
             botnick = bot[self.site.lang]
         except KeyError:
             raise wikipedia.NoUsername(u"You have to specify an username for 
your bot in this project in the user-config.py file.")
+        
         self.botnick = botnick
         botolist.append(botnick)
+        
         self.botolist = botolist
+        
         self.sendemailActive = sendemailActive
-        self.skip_list = list() # Inizialize the skip list used below
+        # Inizialize the skip list used below
+        self.skip_list = list()
+        
         self.duplicatesReport = duplicatesReport
+        
         self.image_namespace = u"File:"
         # Load the licenses only once, so do it once
         self.list_licenses = self.load_licenses()
+    
     def setParameters(self, imageName, timestamp, uploader):
         """ Function to set parameters, now only image but maybe it can be 
used for others in "future" """
         self.imageName = imageName
@@ -582,6 +558,7 @@
         self.image = wikipedia.ImagePage(self.site, u'%s%s' % 
(self.image_namespace, self.imageName))
         self.timestamp = timestamp
         self.uploader = uploader
+    
     def report(self, newtext, image_to_report, notification = None, head = 
None,
                notification2 = None, unver = True, commTalk = None, commImage 
= None):
         """ Function to make the reports easier. """
@@ -591,18 +568,22 @@
         self.head = head
         self.notification = notification
         self.notification2 = notification2
-        if self.notification != None:
+        
+        if self.notification:
             self.notification = re.sub(r'__botnick__', self.botnick, 
notification)
-        if self.notification2 != None:
+        
+        if self.notification2:
             self.notification2 = re.sub(r'__botnick__', self.botnick, 
notification2)
         self.commTalk = commTalk
-        if commImage == None:
+        
+        if commImage:
+            self.commImage = commImage
+        else:
             self.commImage = self.commento
-        else:
-            self.commImage = commImage
+        
         # Ok, done, let's loop.
         while 1:
-            if unver == True:
+            if unver:
                 try:
                     resPutMex = self.tag_image()
                 except wikipedia.NoPage:
@@ -612,7 +593,7 @@
                     wikipedia.output(u"Edit conflict! Skip!")
                     break
                 else:
-                    if resPutMex == False:
+                    if not resPutMex:
                         break
             else:
                 try:
@@ -624,9 +605,10 @@
                     wikipedia.output(u"Edit conflict! Skip!")
                     break
                 else:
-                    if resPutMex == False:
+                    if not resPutMex:
                         break
-            if self.notification != None and self.head != None:
+            
+            if self.notification and self.head:
                 try:
                     self.put_mex_in_talk()
                 except wikipedia.EditConflict:
@@ -640,21 +622,24 @@
                     break
             else:
                 break
-
+    
     def uploadBotChangeFunction(self, reportPageText, upBotArray):
         """ Detect the user that has uploaded the file through the upload bot 
"""
         regex = upBotArray[1]
         results = re.findall(regex, reportPageText)
-        if results == []:
-            return upBotArray[0] # we can't find the user, report the problem 
to the bot
-        else:
+        
+        if results:
             luser = results[0]
             return luser
+        else:
+            return upBotArray[0] # we can't find the user, report the problem 
to the bot
+    
     def tag_image(self, put = True):
         """ Function to add the template in the image and to find out
         who's the user that has uploaded the file. """
         # Get the image's description
         reportPageObject = wikipedia.ImagePage(self.site, self.image_namespace 
+ self.image_to_report)
+        
         try:
             reportPageText = reportPageObject.get()
         except wikipedia.NoPage:
@@ -667,7 +652,7 @@
             reportPageObject.put(reportPageText + self.newtext, comment = 
self.commImage, minorEdit = True)
         # paginetta it's the image page object.        
         try:
-            if reportPageObject == self.image and self.uploader != None:
+            if reportPageObject == self.image and self.uploader:
                 nick = self.uploader
             else:
                 nick = reportPageObject.getLatestUploader()[0]
@@ -679,7 +664,8 @@
             return False
         upBots = wikipedia.translate(self.site, uploadBots)
         luser = wikipedia.url2link(nick, self.site, self.site)
-        if upBots != None:
+        
+        if upBots:
             for upBot in upBots:
                 if upBot[0] == luser:
                     luser = self.uploadBotChangeFunction(reportPageText, upBot)
@@ -687,15 +673,16 @@
         self.talk_page = talk_page
         self.luser = luser
         return True
+    
     def put_mex_in_talk(self):
         """ Function to put the warning in talk page of the uploader."""
         commento2 = wikipedia.translate(self.site, comm2)
         emailPageName = wikipedia.translate(self.site, emailPageWithText)
         emailSubj = wikipedia.translate(self.site, emailSubject)
-        if self.notification2 == None:
+        if self.notification2:
+            self.notification2 = self.notification2 % self.image_to_report
+        else:
             self.notification2 = self.notification
-        else:
-            self.notification2 = self.notification2 % self.image_to_report
         second_text = False
         # Getting the talk page's history, to check if there is another 
advise...
         # The try block is used to prevent error if you use an old 
wikipedia.py's version.
@@ -724,21 +711,23 @@
             second_text = False
             ti_es_ti = wikipedia.translate(self.site, empty)
             testoattuale = ti_es_ti
-        if self.commTalk == None:
+        
+        if self.commTalk:
+            commentox = self.commTalk
+        else:
             commentox = commento2
+        
+        if second_text:
+            newText = u"%s\n\n%s" % (testoattuale, self.notification2)
         else:
-            commentox = self.commTalk
-        if second_text == True:
-            try:
-                self.talk_page.put(u"%s\n\n%s" % (testoattuale, 
self.notification2), comment = commentox, minorEdit = False)
-            except wikipedia.LockedPage:
-                wikipedia.output(u'Talk page blocked, skip.')
-        elif second_text == False:
-            try:
-                self.talk_page.put(testoattuale + self.head + 
self.notification, comment = commentox, minorEdit = False)
-            except wikipedia.LockedPage:
-                wikipedia.output(u'Talk page blocked, skip.')
-        if emailPageName != None and emailSubj != None:
+            newText = testoattuale + self.head + self.notification
+        
+        try:
+            self.talk_page.put(newText, comment = commentox, minorEdit = False)
+        except wikipedia.LockedPage:
+            wikipedia.output(u'Talk page blocked, skip.')
+        
+        if emailPageName and emailSubj:
             emailPage = wikipedia.Page(self.site, emailPageName)
             try:
                 emailText = emailPage.get()
@@ -752,17 +741,19 @@
                 except userlib.UserActionRefuse:
                     wikipedia.output("User is not mailable, aborted")
                     return # exit
-
+    
     def untaggedGenerator(self, untaggedProject, limit):
         """ Generator that yield the files without license. It's based on a 
tool of the toolserver. """
         lang = untaggedProject.split('.', 1)[0]
         project = '.%s' % untaggedProject.split('.', 1)[1]
+        
         if lang == 'commons':
             link = 
'http://toolserver.org/~daniel/WikiSense/UntaggedImages.php?wikifam=commons.wikimedia.org&since=-100d&until=&img_user_text=&order=img_timestamp&max=100&order=img_timestamp&format=html'
         else:
             link = 
'http://toolserver.org/~daniel/WikiSense/UntaggedImages.php?wikilang=%s&wikifam=%s&order=img_timestamp&max=%s&ofs=0&max=%s'
 % (lang, project, limit, limit)
         text = self.site.getUrl(link, no_hostname = True)
         results = re.findall(r"""<td valign='top' title='Name'><a 
href='http://.*?\.org/w/index\.php\?title=(.*?)'>.*?</a></td>""", text)
+        
         if results:
             for result in results:
                 wikiPage = wikipedia.ImagePage(self.site, result)
@@ -770,31 +761,34 @@
         else:
             wikipedia.output(link)
             raise NothingFound(u'Nothing found! Try to use the tool by 
yourself to be sure that it works!')
-
+    
     def regexGenerator(self, regexp, textrun):
         """ Generator used when an user use a regex parsing a page to yield 
the results """
         regex = re.compile(r'%s' % regexp, re.UNICODE|re.DOTALL)
         results = regex.findall(textrun)
         for image in results:
             yield wikipedia.ImagePage(self.site, image)
-
+    
     def loadHiddenTemplates(self):
         """ Function to load the white templates """
         # A template as {{en is not a license! Adding also them in the 
whitelist template...
         for langK in wikipedia.Family(u'wikipedia').langs.keys():
             self.hiddentemplates.append(wikipedia.Page(self.site, 
u'Template:%s' % langK))
+        
         # The template #if: and #switch: aren't something to care about
         #self.hiddentemplates.extend([u'#if:', u'#switch:']) FIXME
+        
         # Hidden template loading
         if self.pageHidden:
             try:
                 pageHiddenText = wikipedia.Page(self.site, 
self.pageHidden).get()
             except (wikipedia.NoPage, wikipedia.IsRedirectPage):
                 pageHiddenText = ''
+            
             for element in self.load(pageHiddenText):
                 self.hiddentemplates.append(wikipedia.Page(self.site, element))
         return self.hiddentemplates
-
+    
     def returnOlderTime(self, listGiven, timeListGiven):
         """ Get some time and return the oldest of them """
         # print listGiven; print timeListGiven
@@ -813,26 +807,30 @@
                 max_usage = len(imageUsage)
                 num_older = num
             num += 1
+        
         if num_older:
             return listGiven[num_older][1]
+        
         for element in listGiven:
             time = element[0]
             imageName = element[1]
             not_the_oldest = False
+            
             for time_selected in timeListGiven:
                 if time > time_selected:
                     not_the_oldest = True
                     break
+            
             if not not_the_oldest:
                 return imageName
-
+    
     def convert_to_url(self, page):
         # Function stolen from wikipedia.py
         """The name of the page this Page refers to, in a form suitable for 
the URL of the page."""
         title = page.replace(u" ", u"_")
         encodedTitle = title.encode(self.site.encoding())
         return urllib.quote(encodedTitle)
-
+    
     def countEdits(self, pagename, userlist):
         """ Function to count the edit of a user or a list of users in a page. 
"""
         # self.botolist
@@ -841,20 +839,22 @@
         page = wikipedia.Page(self.site, pagename)
         history = page.getVersionHistory()
         user_list = list()
+        
         for data in history:
             user_list.append(data[2])
         number_edits = 0
+        
         for username in userlist:
             number_edits += user_list.count(username)
         return number_edits
-
+    
     def checkImageOnCommons(self):
         """ Checking if the file is on commons """
         wikipedia.output(u'Checking if %s is on commons...' % self.imageName)
         commons_site = wikipedia.getSite('commons', 'commons')
         regexOnCommons = r"\[\[:File:%s\]\] is also on '''Commons''': 
\[\[commons:File:.*?\]\](?: \(same name\)|)$" % re.escape(self.imageName)
         hash_found = self.image.getHash()
-        if hash_found == None:
+        if not hash_found:
             return False # Problems? Yes! Image deleted, no hash found. Skip 
the image.
         else:
             commons_image_with_this_hash = 
commons_site.getFilesFromAnHash(hash_found)
@@ -881,7 +881,7 @@
             else:
                 # Problems? No, return True
                 return True
-
+    
     def checkImageDuplicated(self, duplicates_rollback):
         """ Function to check the duplicated files. """
         # {{Dupe|File:Blanche_Montel.jpg}}
@@ -898,18 +898,23 @@
         imagePage = wikipedia.ImagePage(self.site, u'File:%s' % self.imageName)
         hash_found = imagePage.getHash()
         duplicates = self.site.getFilesFromAnHash(hash_found)
-        if duplicates == None:
+        
+        if not duplicates:
             return False # Error, image deleted, no hash found. Skip the image.
+        
         if len(duplicates) > 1:
             if len(duplicates) == 2:
                 wikipedia.output(u'%s has a duplicate! Reporting it...' % 
self.imageName)
             else:
                 wikipedia.output(u'%s has %s duplicates! Reporting them...' % 
(self.imageName, len(duplicates) - 1))
-            if not dupText == None and not dupRegex == None:
+            
+            if dupText and dupRegex:
                 time_image_list = list()
                 time_list = list()
+                
                 for duplicate in duplicates:
                     DupePage = wikipedia.ImagePage(self.site, u'File:%s' % 
duplicate)
+                    
                     if DupePage.urlname() == self.image.urlname() and 
self.timestamp != None:
                         imagedata = self.timestamp
                     else:
@@ -924,6 +929,7 @@
                 Page_oder_image = wikipedia.ImagePage(self.site, u'File:%s' % 
older_image)
                 string = ''
                 images_to_tag_list = []
+                
                 for duplicate in duplicates:
                     if wikipedia.ImagePage(self.site, u'%s:%s' % 
(self.image_namespace, duplicate)) == \
                        wikipedia.ImagePage(self.site, u'%s:%s' % 
(self.image_namespace, older_image)):
@@ -934,7 +940,8 @@
                         older_page_text = Page_oder_image.get()
                     except wikipedia.NoPage:
                         continue # The page doesn't exists
-                    if re.findall(dupRegex, DupPageText) == [] and 
re.findall(dupRegex, older_page_text) == []:
+                    
+                    if not re.findall(dupRegex, DupPageText) and not 
re.findall(dupRegex, older_page_text):
                         wikipedia.output(u'%s is a duplicate and has to be 
tagged...' % duplicate)
                         images_to_tag_list.append(duplicate)
                         #if duplicate != duplicates[-1]:
@@ -946,12 +953,13 @@
                         return True # Ok - No problem. Let's continue the 
checking phase
                 older_image_ns = u'%s%s' % (self.image_namespace, older_image) 
# adding the namespace
                 only_report = False # true if the image are not to be tagged 
as dupes
-
+                
                 # put only one image or the whole list according to the request
                 if u'__images__' in dupText:
                     text_for_the_report = re.sub(r'__images__', 
r'\n%s*[[:%s]]\n' % (string, older_image_ns), dupText)
                 else:
                     text_for_the_report = re.sub(r'__image__', r'%s' % 
older_image_ns, dupText)
+                
                 # Two iteration: report the "problem" to the user only once 
(the last)
                 if len(images_to_tag_list) > 1:
                     for image_to_tag in images_to_tag_list[:-1]:
@@ -964,6 +972,7 @@
                         text_for_the_report = re.sub(r'\n\*\[\[:%s\]\]' % 
re.escape(self.image_namespace + image_to_tag), '', text_for_the_report)
                         self.report(text_for_the_report, image_to_tag,
                                     commImage = dupComment_image, unver = True)
+                
                 if len(images_to_tag_list) != 0 and not only_report:
                     already_reported_in_past = self.countEdits(u'File:%s' % 
images_to_tag_list[-1], self.botolist)
                     image_to_resub = images_to_tag_list[-1]
@@ -977,11 +986,13 @@
                         self.report(text_for_the_report, 
images_to_tag_list[-1],
                             dupTalkText % (older_image_ns, string), 
dupTalkHead, commTalk = dupComment_talk,
                                 commImage = dupComment_image, unver = True)
+            
             if self.duplicatesReport or only_report:
                 if only_report:
                     repme = u"\n*[[:File:%s]] has the following duplicates 
('''forced mode'''):" % self.convert_to_url(self.imageName)
                 else:
                     repme = u"\n*[[:File:%s]] has the following duplicates:" % 
self.convert_to_url(self.imageName)
+                
                 for duplicate in duplicates:
                     if self.convert_to_url(duplicate) == 
self.convert_to_url(self.imageName):
                         continue # the image itself, not report also this as 
duplicate
@@ -989,16 +1000,19 @@
                 result = self.report_image(self.imageName, self.rep_page, 
self.com, repme, addings = False, regex = duplicateRegex)
                 if not result:
                     return True # If Errors, exit (but continue the check)     
           
+            
             if older_image != self.imageName:
                 return False # The image is a duplicate, it will be deleted. 
So skip the check-part, useless
         return True # Ok - No problem. Let's continue the checking phase
-
+    
     def report_image(self, image_to_report, rep_page = None, com = None, 
rep_text = None, addings = True, regex = None):
         """ Report the files to the report page when needed. """
         if not rep_page:
             rep_page = self.rep_page
+        
         if not com:
             com = self.com
+        
         if not rep_text:
             rep_text = self.rep_text
         
@@ -1012,6 +1026,7 @@
             text_get = ''
         except wikipedia.IsRedirectPage:            
             text_get = another_page.getRedirectTarget().get()
+        
         if len(text_get) >= self.logFulNumber:
             if self.logFullError:
                 raise LogIsFull(u"The log page (%s) is full! Please delete the 
old files reported." % another_page.title())
@@ -1021,6 +1036,7 @@
         # The talk page includes "_" between the two names, in this way i 
replace them to " "
         n = re.compile(regex, re.UNICODE|re.DOTALL)
         y = n.findall(text_get)
+        
         if y:
             wikipedia.output(u"%s is already in the report page." % 
image_to_report)
             reported = False
@@ -1032,7 +1048,7 @@
             wikipedia.output(u"...Reported...")
             reported = True
         return reported
-
+    
     def takesettings(self):
         """ Function to take the settings from the wiki. """
         try:
@@ -1046,6 +1062,7 @@
                     rxp = r"<------- ------->\n\*[Nn]ame ?= 
?['\"](.*?)['\"]\n\*([Ff]ind|[Ff]indonly)=(.*?)\n\*[Ii]magechanges=(.*?)\n\*[Ss]ummary=['\"](.*?)['\"]\n\*[Hh]ead=['\"](.*?)['\"]\n\*[Tt]ext
 ?= ?['\"](.*?)['\"]\n\*[Mm]ex ?= ?['\"]?([^\n]*?)['\"]?\n"
                     r = re.compile(rxp, re.UNICODE|re.DOTALL)
                     number = 1
+                    
                     for m in r.finditer(testo):
                         name = str(m.group(1))
                         find_tipe = str(m.group(2))
@@ -1058,6 +1075,7 @@
                         tupla = [number, name, find_tipe, find, imagechanges, 
summary, head, text, mexcatched]
                         self.settingsData += [tupla]
                         number += 1
+                    
                     if self.settingsData == list():
                         wikipedia.output(u"You've set wrongly your settings, 
please take a look to the relative page. (run without them)")
                         self.settingsData = None
@@ -1069,14 +1087,18 @@
             wikipedia.output(u'Problems with loading the settigs, run without 
them.')
             self.settingsData = None
             self.some_problem = False
-        if self.settingsData == []:
+        
+        if not self.settingsData:
             self.settingsData = None
+        
         # Real-Time page loaded
-        if self.settingsData != None: wikipedia.output(u'\t   >> Loaded the 
real-time page... <<')
+        if self.settingsData:
+            wikipedia.output(u'\t   >> Loaded the real-time page... <<')
         # No settings found, No problem, continue.
-        else: wikipedia.output(u'\t   >> No additional settings found! <<')
+        else:
+            wikipedia.output(u'\t   >> No additional settings found! <<')
         return self.settingsData # Useless, but it doesn't harm..
-
+    
     def load_licenses(self):
         """ Load the list of the licenses """
         """
@@ -1098,7 +1120,7 @@
         wikipedia.output(u'\n\t...Loading the licenses allowed...\n')
         list_licenses = catlib.categoryAllPageObjectsAPI(catName)
         wikipedia.output('') # blank line
-
+    
         # Add the licenses set in the default page as licenses
         # to check
         if self.pageAllowed:
@@ -1112,7 +1134,7 @@
                 if pageLicense not in list_licenses:
                     list_licenses.append(pageLicense) # the list has wiki-pages
         return list_licenses
-
+    
     def miniTemplateCheck(self, template):
         """
         Is the template given in the licenses allowed or in the licenses to 
skip?
@@ -1133,7 +1155,7 @@
             else:
                 self.whiteTemplatesFound = True
                 return False  
-
+    
     def templateInList(self):
         """
         The problem is the calls to the Mediawiki system because they can be 
pretty slow.
@@ -1155,7 +1177,7 @@
                         break                        
                 except wikipedia.NoPage:
                     continue          
-                
+    
     def smartDetection(self):
         """ The bot instead of checking if there's a simple template in the
             image's description, checks also if that template is a license or
@@ -1170,11 +1192,14 @@
             self.hiddentemplates = self.loadHiddenTemplates()      
             self.licenses_found = self.image.getTemplates()
             templatesInTheImageRaw = 
regex_find_licenses.findall(self.imageCheckText)
-            if self.licenses_found == [] and templatesInTheImageRaw != []:
+            
+            if not self.licenses_found and templatesInTheImageRaw:
                 raise wikipedia.Error("APIs seems down. No templates found 
with them but actually there are templates used in the image's page!")
             self.allLicenses = list()
-            if self.list_licenses == []:
+            
+            if not self.list_licenses:
                 raise wikipedia.Error(u'No licenses allowed provided, add that 
option to the code to make the script working correctly')
+            
             # Found the templates ONLY in the image's description
             for template_selected in templatesInTheImageRaw:
                 for templateReal in self.licenses_found:
@@ -1187,28 +1212,32 @@
             # for example: there's only self, and not GFDL and the other 
licenses.
             #if self.allLicenses == self.licenses_found and not dummy_edit and 
self.licenses_found != []:
             #    wikipedia.output(u"Seems that there's a problem regarding the 
Job queue, trying with a dummy edit to solve the problem.")
-            #    try:                    
+            #    try:
             #        self.imageCheckText = self.image.get()
             #        self.image.put(self.imageCheckText, 'Bot: Dummy edit,if 
you see this comment write [[User talk:%s|here]].' % self.botnick)
             #    except (wikipedia.NoPage, wikipedia.IsRedirectPage):
             #        return (None, list())
             #    dummy_edit = True
             #else:
-            break         
+            break
+        
         if self.licenses_found != []:
             self.templateInList()
+            
             if self.license_found == None and self.allLicenses != list():
                 # If only iterlist = self.AllLicenses if I remove something
                 # from iterlist it will be remove from self.AllLicenses too
                 iterlist = list(self.allLicenses)
+                
                 for template in iterlist:
                     try:
                         template.pageAPInfo()
                     except wikipedia.IsRedirectPage:
                         template = template.getRedirectTarget()
                     except wikipedia.NoPage:
-                        self.allLicenses.remove(template)            
-                if self.allLicenses != list():      
+                        self.allLicenses.remove(template)
+                
+                if self.allLicenses != list():
                     self.license_found = self.allLicenses[0].title()
         self.some_problem = False # If it has "some_problem" it must check
                   # the additional settings.
@@ -1245,7 +1274,7 @@
                     wikipedia.output(u"Skipping the file...")
                 self.some_problem = False
         return (self.license_found, self.whiteTemplatesFound)
-
+    
     def load(self, raw):
         """ Load a list of object from a string using regex. """
         list_loaded = list()
@@ -1349,7 +1378,7 @@
         else:
             wikipedia.output(u"The wait option is available only with the 
standard generator.")
             return generator
-     
+    
     def isTagged(self):
         """ Understand if a file is already tagged or not. """
         TextFind = wikipedia.translate(self.site, txt_find)
@@ -1365,7 +1394,7 @@
             elif i.lower() in self.imageCheckText:
                 return True
         return False # Nothing Found? Ok: False      
-
+    
     def findAdditionalProblems(self):
         # In every tupla there's a setting configuration
         for tupla in self.settingsData:
@@ -1410,7 +1439,7 @@
                         self.summary_used = summary
                         self.mex_used = mexCatched
                         continue
-
+    
     def checkStep(self):
         # nothing = Defining an empty image description
         nothing = ['', ' ', '  ', '   ', '\n', '\n ', '\n  ', '\n\n', '\n \n', 
' \n', ' \n ', ' \n \n']
@@ -1509,6 +1538,8 @@
             self.report(self.unvertext, self.imageName, notification, head, 
smwl)
             return True
 
+gbv = Global()
+
 def checkbot():
     """ Main function """
     # Command line configurable parameters
@@ -1574,7 +1605,7 @@
                 firstPageTitle = wikipedia.input(u'From witch page do you want 
to start?')
             elif len(arg) > 6:
                 firstPageTitle = arg[7:]
-            firstPageTitle = firstPageTitle.replace("File:", 
'').replace("file:", "")
+            firstPageTitle = firstPageTitle.split(":")[1:]
             generator = wikipedia.getSite().allpages(start=firstPageTitle, 
namespace=6)
             repeat = False
         elif arg.startswith('-page'):
@@ -1741,10 +1772,11 @@
         if repeat == True:
             printWithTimeZone(u"Waiting for %s seconds," % time_sleep)
             time.sleep(time_sleep)
-        elif repeat == False:
+        else:
             wikipedia.output(u"\t\t\t>> STOP! <<")
             break # Exit
 
+
 # Here there is the main loop. I'll take all the (name of the) images and then 
i'll check them.
 if __name__ == "__main__":
     old = 
datetime.datetime.strptime(str(datetime.datetime.utcnow()).split('.')[0], 
"%Y-%m-%d %H:%M:%S") #timezones are UTC



_______________________________________________
Pywikipedia-svn mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-svn

[Pywikipedia-svn] SVN: [7488] trunk/pywikipedia/checkimages.py

Reply via email to