Xqt has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/282619

Change subject: [WIP] Cleanup code
......................................................................

[WIP] Cleanup code

- use SingleSiteBot, ExistingPageBot and NoRedirectPageBot class
- remove -hash parts. Service is not availlable anymore
- use treat_page
- TODO: check whether commonsImagePage exists
        rewrite filenameOnCommons, do not use transferby=wdwdbot as filename

DO NOT SUBMIT

Bug: T132297
Change-Id: I534f41a39097f47f6b3ae1d73eec3c87f3acec9d
---
M scripts/nowcommons.py
1 file changed, 116 insertions(+), 211 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core 
refs/changes/19/282619/1

diff --git a/scripts/nowcommons.py b/scripts/nowcommons.py
index 166b776..343253d 100755
--- a/scripts/nowcommons.py
+++ b/scripts/nowcommons.py
@@ -34,14 +34,9 @@
     -replaceonly    Use this if you do not have a local sysop account, but do
                     wish to replace links from the NowCommons template.
 
-    -hash           Use the hash to identify the images that are the same. It
-                    doesn't work always, so the bot opens two tabs to let to
-                    the user to check if the images are equal or not.
-
 -- Example --
 
-    python pwb.py nowcommons -replaceonly -replaceloose -replacealways \
-        -replace -hash
+    python pwb.py nowcommons -replaceonly -replaceloose -replacealways -replace
 
 -- Known issues --
 Please fix these if you are capable and motivated:
@@ -51,8 +46,8 @@
 #
 # (C) Wikipedian, 2006-2007
 # (C) Siebrand Mazeland, 2007-2008
-# (C) xqt, 2010-2014
-# (C) Pywikibot team, 2006-2015
+# (C) xqt, 2010-2016
+# (C) Pywikibot team, 2006-2016
 #
 # Distributed under the terms of the MIT license.
 #
@@ -67,7 +62,8 @@
 
 import pywikibot
 
-from pywikibot import i18n, Bot
+from pywikibot import i18n
+from pywikibot.bot import ExistingPageBot, NoRedirectPageBot, SingleSiteBot
 from pywikibot import pagegenerators as pg
 from pywikibot.tools.formatter import color_format
 
@@ -188,7 +184,7 @@
 }
 
 
-class NowCommonsDeleteBot(Bot):
+class NowCommonsDeleteBot(SingleSiteBot, ExistingPageBot, NoRedirectPageBot):
 
     """Bot to delete migrated files."""
 
@@ -199,13 +195,18 @@
             'replacealways': False,
             'replaceloose': False,
             'replaceonly': False,
-            'use_hash': False,
         })
         super(NowCommonsDeleteBot, self).__init__(**kwargs)
 
-        self.site = pywikibot.Site()
-        if repr(self.site) == 'commons:commons':
-            sys.exit('Do not run this bot on Commons!')
+        if not self.site.has_image_repository:
+             sys.exit('There must be a file repository to run this script')
+        else:
+            self.commons = self.site.image_repository()
+            if self.site == self.commons:
+                sys.exit(
+                    'You cannot run this bot on file repository like Commons.')
+        self.summary = i18n.twtranslate(self.site,
+                                       'imagetransfer-nowcommons_notice')
 
     def ncTemplates(self):
         """Return nowcommons templates."""
@@ -222,77 +223,15 @@
                                      for title in self.ncTemplates())
         return self._nc_templates
 
-    def useHashGenerator(self):
-        """Use hash generator."""
-        # 
https://toolserver.org/~multichill/nowcommons.php?language=it&page=2&filter=
-        lang = self.site.lang
-        num_page = 0
-        word_to_skip_translated = i18n.translate(self.site, word_to_skip)
-        images_processed = list()
-        while 1:
-            url = ('https://toolserver.org/~multichill/nowcommons.php?'
-                   'language=%s&page=%s&filter=') % (lang, num_page)
-            HTML_text = self.site.getUrl(url, no_hostname=True)
-            reg = r'<[Aa] href="(?P<urllocal>.*?)">(?P<imagelocal>.*?)</[Aa]> 
+?</td><td>\n\s*?'
-            reg += r'<[Aa] 
href="(?P<urlcommons>http[s]?://commons.wikimedia.org/.*?)" \
-                   >Image:(?P<imagecommons>.*?)</[Aa]> +?</td><td>'
-            regex = re.compile(reg, re.UNICODE)
-            found_something = False
-            change_page = True
-            for x in regex.finditer(HTML_text):
-                found_something = True
-                image_local = x.group('imagelocal')
-                image_commons = x.group('imagecommons')
-                if image_local in images_processed:
-                    continue
-                change_page = False
-                images_processed.append(image_local)
-                # Skip images that have something in the title (useful for 
it.wiki)
-                image_to_skip = False
-                for word in word_to_skip_translated:
-                    if word.lower() in image_local.lower():
-                        image_to_skip = True
-                if image_to_skip:
-                    continue
-                url_local = x.group('urllocal')
-                url_commons = x.group('urlcommons')
-                pywikibot.output(color_format(
-                    '\n\n>>> {lightpurple}{0}{default} <<<',
-                    image_local))
-                pywikibot.output(u'Local: %s\nCommons: %s\n'
-                                 % (url_local, url_commons))
-                webbrowser.open(url_local, 0, 1)
-                webbrowser.open(url_commons, 0, 1)
-                if image_local.split('Image:')[1] == image_commons:
-                    choice = pywikibot.input_yn(
-                        u'The local and the commons images have the same name, 
'
-                        'continue?', default=False, automatic_quit=False)
-                else:
-                    choice = pywikibot.input_yn(
-                        u'Are the two images equal?',
-                        default=False, automatic_quit=False)
-                if choice:
-                    yield [image_local, image_commons]
-                else:
-                    continue
-            # The page is dinamically updated, so we may don't need to change 
it
-            if change_page:
-                num_page += 1
-            # If no image found means that there aren't anymore, break.
-            if not found_something:
-                break
-
-    def getPageGenerator(self):
+    @property
+    def generator(self):
         """Generator method."""
-        if self.getOption('use_hash'):
-            gen = self.useHashGenerator()
-        else:
-            gens = [t.getReferences(follow_redirects=True, namespaces=[6],
-                                    onlyTemplateInclusion=True)
-                    for t in self.nc_templates]
-            gen = pg.CombinedPageGenerator(gens)
-            gen = pg.DuplicateFilterPageGenerator(gen)
-            gen = pg.PreloadingGenerator(gen)
+        gens = [t.getReferences(follow_redirects=True, namespaces=[6],
+                                onlyTemplateInclusion=True)
+                for t in self.nc_templates]
+        gen = pg.CombinedPageGenerator(gens)
+        gen = pg.DuplicateFilterPageGenerator(gen)
+        gen = pg.PreloadingGenerator(gen)
         return gen
 
     def findFilenameOnCommons(self, localImagePage):
@@ -324,133 +263,101 @@
                         filenameOnCommons = val[1].strip()
                 return filenameOnCommons
 
-    def run(self):
-        """Run the bot."""
-        commons = pywikibot.Site('commons', 'commons')
-        comment = i18n.twtranslate(self.site, 
'imagetransfer-nowcommons_notice')
-
-        for page in self.getPageGenerator():
-            if self.getOption('use_hash'):
-                # Page -> Has the namespace | commons image -> Not
-                images_list = page    # 0 -> local image, 1 -> commons image
-                page = pywikibot.Page(self.site, images_list[0])
-            else:
-                # If use_hash is true, we have already print this before, no 
need
-                self.current_page = page
-            try:
-                localImagePage = pywikibot.FilePage(self.site, page.title())
-                if localImagePage.fileIsShared():
-                    pywikibot.output(u'File is already on Commons.')
-                    continue
-                sha1 = localImagePage.latest_file_info.sha1
-                if self.getOption('use_hash'):
-                    filenameOnCommons = images_list[1]
-                else:
-                    filenameOnCommons = self.findFilenameOnCommons(
-                        localImagePage)
-                if not filenameOnCommons and not self.getOption('use_hash'):
-                    pywikibot.output(u'NowCommons template not found.')
-                    continue
-                commonsImagePage = pywikibot.FilePage(commons, 'Image:%s'
-                                                      % filenameOnCommons)
-                if (localImagePage.title(withNamespace=False) ==
-                        commonsImagePage.title(withNamespace=False) and
-                        self.getOption('use_hash')):
-                    pywikibot.output(
-                        u'The local and the commons images have the same name')
-                if (localImagePage.title(withNamespace=False) !=
-                        commonsImagePage.title(withNamespace=False)):
-                    usingPages = list(localImagePage.usingPages())
-                    if usingPages and usingPages != [localImagePage]:
+    def treat_page(self):
+        """Process a single file."""
+        localImagePage = pywikibot.FilePage(self.current_page)
+        if localImagePage.fileIsShared():
+            pywikibot.output(u'File is already on Commons.')
+            return
+        sha1 = localImagePage.latest_file_info.sha1
+        filenameOnCommons = self.findFilenameOnCommons(localImagePage)
+        if not filenameOnCommons:
+            pywikibot.output(u'NowCommons template not found.')
+            return
+        commonsImagePage = pywikibot.FilePage(self.commons, 'Image:%s'
+                                              % filenameOnCommons)
+        if (localImagePage.title(withNamespace=False) !=
+                commonsImagePage.title(withNamespace=False)):
+            usingPages = list(localImagePage.usingPages())
+            if usingPages and usingPages != [localImagePage]:
+                pywikibot.output(color_format(
+                    '"{lightred}{0}{default}" is still used in {1} pages.',
+                    localImagePage.title(withNamespace=False),
+                    len(usingPages)))
+                if self.getOption('replace') is True:
                         pywikibot.output(color_format(
-                            '"{lightred}{0}{default}" is still used in {1} 
pages.',
+                            'Replacing "{lightred}{0}{default}" by '
+                            '"{lightgreen}{1}{default}\".',
                             localImagePage.title(withNamespace=False),
-                            len(usingPages)))
-                        if self.getOption('replace') is True:
-                                pywikibot.output(color_format(
-                                    'Replacing "{lightred}{0}{default}" by '
-                                    '"{lightgreen}{1}{default}\".',
-                                    localImagePage.title(withNamespace=False),
-                                    
commonsImagePage.title(withNamespace=False)))
-                                bot = ImageBot(
-                                    pg.FileLinksGenerator(localImagePage),
-                                    localImagePage.title(withNamespace=False),
-                                    
commonsImagePage.title(withNamespace=False),
-                                    '', self.getOption('replacealways'),
-                                    self.getOption('replaceloose'))
-                                bot.run()
-                                # If the image is used with the urlname the
-                                # previous function won't work
-                                is_used = bool(list(pywikibot.FilePage(
-                                    self.site, 
page.title()).usingPages(total=1)))
-                                if is_used and self.getOption('replaceloose'):
-                                    bot = ImageBot(
-                                        pg.FileLinksGenerator(
-                                            localImagePage),
-                                        localImagePage.title(
-                                            withNamespace=False, asUrl=True),
-                                        commonsImagePage.title(
-                                            withNamespace=False),
-                                        '', self.getOption('replacealways'),
-                                        self.getOption('replaceloose'))
-                                    bot.run()
-                                # refresh because we want the updated list
-                                usingPages = len(list(pywikibot.FilePage(
-                                    self.site, page.title()).usingPages()))
-                                if usingPages > 0 and 
self.getOption('use_hash'):
-                                    # just an enter
-                                    pywikibot.input(
-                                        u'There are still %s pages with this \
-                                        image, confirm the manual removal from 
them please.'
-                                        % usingPages)
-
-                        else:
-                            pywikibot.output(u'Please change them manually.')
-                        continue
-                    else:
-                        pywikibot.output(color_format(
-                            'No page is using "{lightgreen}{0}{default}" '
-                            'anymore.',
-                            localImagePage.title(withNamespace=False)))
-                commonsText = commonsImagePage.get()
-                if self.getOption('replaceonly') is False:
-                    if sha1 == commonsImagePage.latest_file_info.sha1:
-                        pywikibot.output(
-                            u'The image is identical to the one on Commons.')
-                        if (len(localImagePage.getFileVersionHistory()) > 1 and
-                                not self.getOption('use_hash')):
-                            pywikibot.output(
-                                u"This image has a version history. Please \
-                                delete it manually after making sure that the \
-                                old versions are not worth keeping.""")
-                            continue
-                        if self.getOption('always') is False:
-                            format_str = color_format(
-                                '\n\n>>>> Description on {lightpurple}%s'
-                                '{default} <<<<\n')
-                            pywikibot.output(format_str % page.title())
-                            pywikibot.output(localImagePage.get())
-                            pywikibot.output(format_str %
-                                             commonsImagePage.title())
-                            pywikibot.output(commonsText)
-                            if pywikibot.input_yn(
-                                    u'Does the description on Commons contain '
-                                    'all required source and license\n'
-                                    'information?',
-                                    default=False, automatic_quit=False):
-                                localImagePage.delete(
-                                    '%s [[:commons:Image:%s]]'
-                                    % (comment, filenameOnCommons), 
prompt=False)
-                        else:
-                            localImagePage.delete(
-                                comment + ' [[:commons:Image:%s]]'
-                                % filenameOnCommons, prompt=False)
-                    else:
-                        pywikibot.output(
-                            u'The image is not identical to the one on 
Commons.')
-            except (pywikibot.NoPage, pywikibot.IsRedirectPage) as e:
-                pywikibot.output(u'%s' % e[0])
-                continue
+                            commonsImagePage.title(withNamespace=False)))
+                        bot = ImageBot(
+                            pg.FileLinksGenerator(localImagePage),
+                            localImagePage.title(withNamespace=False),
+                            commonsImagePage.title(withNamespace=False),
+                            '', self.getOption('replacealways'),
+                            self.getOption('replaceloose'))
+                        bot.run()
+                        # If the image is used with the urlname the
+                        # previous function won't work
+                        is_used = bool(list(pywikibot.FilePage(
+                            self.site, page.title()).usingPages(total=1)))
+                        if is_used and self.getOption('replaceloose'):
+                            bot = ImageBot(
+                                pg.FileLinksGenerator(
+                                    localImagePage),
+                                localImagePage.title(
+                                    withNamespace=False, asUrl=True),
+                                commonsImagePage.title(
+                                    withNamespace=False),
+                                '', self.getOption('replacealways'),
+                                self.getOption('replaceloose'))
+                            bot.run()
+                        # refresh because we want the updated list
+                        usingPages = len(list(pywikibot.FilePage(
+                            self.site, page.title()).usingPages()))
+                else:
+                    pywikibot.output(u'Please change them manually.')
+                return
+            else:
+                pywikibot.output(color_format(
+                    'No page is using "{lightgreen}{0}{default}" '
+                    'anymore.',
+                    localImagePage.title(withNamespace=False)))
+        commonsText = commonsImagePage.get()
+        if self.getOption('replaceonly') is False:
+            if sha1 == commonsImagePage.latest_file_info.sha1:
+                pywikibot.output(
+                    u'The image is identical to the one on Commons.')
+                if len(localImagePage.getFileVersionHistory()) > 1:
+                    pywikibot.output(
+                        u"This image has a version history. Please \
+                        delete it manually after making sure that the \
+                        old versions are not worth keeping.""")
+                    return
+                if self.getOption('always') is False:
+                    format_str = color_format(
+                        '\n\n>>>> Description on {lightpurple}%s'
+                        '{default} <<<<\n')
+                    pywikibot.output(format_str % page.title())
+                    pywikibot.output(localImagePage.get())
+                    pywikibot.output(format_str %
+                                     commonsImagePage.title())
+                    pywikibot.output(commonsText)
+                    if pywikibot.input_yn(
+                            u'Does the description on Commons contain '
+                            'all required source and license\n'
+                            'information?',
+                            default=False, automatic_quit=False):
+                        localImagePage.delete(
+                            '%s [[:commons:Image:%s]]'
+                            % (comment, filenameOnCommons), prompt=False)
+                else:
+                    localImagePage.delete(
+                        comment + ' [[:commons:Image:%s]]'
+                        % filenameOnCommons, prompt=False)
+            else:
+                pywikibot.output(
+                    u'The image is not identical to the one on Commons.')
 
 
 def main(*args):
@@ -468,8 +375,6 @@
         if arg == '-replacealways':
             options['replace'] = True
             options['replacealways'] = True
-        elif arg == '-hash':
-            options['use_hash'] = True
         elif arg == '-autonomous':
             pywikibot.warning(u"The '-autonomous' argument is DEPRECATED,"
                               u" use '-always' instead.")

-- 
To view, visit https://gerrit.wikimedia.org/r/282619
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I534f41a39097f47f6b3ae1d73eec3c87f3acec9d
Gerrit-PatchSet: 1
Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-Owner: Xqt <i...@gno.de>

_______________________________________________
MediaWiki-commits mailing list
MediaWiki-commits@lists.wikimedia.org
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to