Xqt has uploaded a new change for review. https://gerrit.wikimedia.org/r/282619
Change subject: [WIP] Cleanup code ...................................................................... [WIP] Cleanup code - use SingleSiteBot, ExistingPageBot and NoRedirectPageBot class - remove -hash parts. Service is not availlable anymore - use treat_page - TODO: check whether commonsImagePage exists rewrite filenameOnCommons, do not use transferby=wdwdbot as filename DO NOT SUBMIT Bug: T132297 Change-Id: I534f41a39097f47f6b3ae1d73eec3c87f3acec9d --- M scripts/nowcommons.py 1 file changed, 116 insertions(+), 211 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/pywikibot/core refs/changes/19/282619/1 diff --git a/scripts/nowcommons.py b/scripts/nowcommons.py index 166b776..343253d 100755 --- a/scripts/nowcommons.py +++ b/scripts/nowcommons.py @@ -34,14 +34,9 @@ -replaceonly Use this if you do not have a local sysop account, but do wish to replace links from the NowCommons template. - -hash Use the hash to identify the images that are the same. It - doesn't work always, so the bot opens two tabs to let to - the user to check if the images are equal or not. - -- Example -- - python pwb.py nowcommons -replaceonly -replaceloose -replacealways \ - -replace -hash + python pwb.py nowcommons -replaceonly -replaceloose -replacealways -replace -- Known issues -- Please fix these if you are capable and motivated: @@ -51,8 +46,8 @@ # # (C) Wikipedian, 2006-2007 # (C) Siebrand Mazeland, 2007-2008 -# (C) xqt, 2010-2014 -# (C) Pywikibot team, 2006-2015 +# (C) xqt, 2010-2016 +# (C) Pywikibot team, 2006-2016 # # Distributed under the terms of the MIT license. # @@ -67,7 +62,8 @@ import pywikibot -from pywikibot import i18n, Bot +from pywikibot import i18n +from pywikibot.bot import ExistingPageBot, NoRedirectPageBot, SingleSiteBot from pywikibot import pagegenerators as pg from pywikibot.tools.formatter import color_format @@ -188,7 +184,7 @@ } -class NowCommonsDeleteBot(Bot): +class NowCommonsDeleteBot(SingleSiteBot, ExistingPageBot, NoRedirectPageBot): """Bot to delete migrated files.""" @@ -199,13 +195,18 @@ 'replacealways': False, 'replaceloose': False, 'replaceonly': False, - 'use_hash': False, }) super(NowCommonsDeleteBot, self).__init__(**kwargs) - self.site = pywikibot.Site() - if repr(self.site) == 'commons:commons': - sys.exit('Do not run this bot on Commons!') + if not self.site.has_image_repository: + sys.exit('There must be a file repository to run this script') + else: + self.commons = self.site.image_repository() + if self.site == self.commons: + sys.exit( + 'You cannot run this bot on file repository like Commons.') + self.summary = i18n.twtranslate(self.site, + 'imagetransfer-nowcommons_notice') def ncTemplates(self): """Return nowcommons templates.""" @@ -222,77 +223,15 @@ for title in self.ncTemplates()) return self._nc_templates - def useHashGenerator(self): - """Use hash generator.""" - # https://toolserver.org/~multichill/nowcommons.php?language=it&page=2&filter= - lang = self.site.lang - num_page = 0 - word_to_skip_translated = i18n.translate(self.site, word_to_skip) - images_processed = list() - while 1: - url = ('https://toolserver.org/~multichill/nowcommons.php?' - 'language=%s&page=%s&filter=') % (lang, num_page) - HTML_text = self.site.getUrl(url, no_hostname=True) - reg = r'<[Aa] href="(?P<urllocal>.*?)">(?P<imagelocal>.*?)</[Aa]> +?</td><td>\n\s*?' - reg += r'<[Aa] href="(?P<urlcommons>http[s]?://commons.wikimedia.org/.*?)" \ - >Image:(?P<imagecommons>.*?)</[Aa]> +?</td><td>' - regex = re.compile(reg, re.UNICODE) - found_something = False - change_page = True - for x in regex.finditer(HTML_text): - found_something = True - image_local = x.group('imagelocal') - image_commons = x.group('imagecommons') - if image_local in images_processed: - continue - change_page = False - images_processed.append(image_local) - # Skip images that have something in the title (useful for it.wiki) - image_to_skip = False - for word in word_to_skip_translated: - if word.lower() in image_local.lower(): - image_to_skip = True - if image_to_skip: - continue - url_local = x.group('urllocal') - url_commons = x.group('urlcommons') - pywikibot.output(color_format( - '\n\n>>> {lightpurple}{0}{default} <<<', - image_local)) - pywikibot.output(u'Local: %s\nCommons: %s\n' - % (url_local, url_commons)) - webbrowser.open(url_local, 0, 1) - webbrowser.open(url_commons, 0, 1) - if image_local.split('Image:')[1] == image_commons: - choice = pywikibot.input_yn( - u'The local and the commons images have the same name, ' - 'continue?', default=False, automatic_quit=False) - else: - choice = pywikibot.input_yn( - u'Are the two images equal?', - default=False, automatic_quit=False) - if choice: - yield [image_local, image_commons] - else: - continue - # The page is dinamically updated, so we may don't need to change it - if change_page: - num_page += 1 - # If no image found means that there aren't anymore, break. - if not found_something: - break - - def getPageGenerator(self): + @property + def generator(self): """Generator method.""" - if self.getOption('use_hash'): - gen = self.useHashGenerator() - else: - gens = [t.getReferences(follow_redirects=True, namespaces=[6], - onlyTemplateInclusion=True) - for t in self.nc_templates] - gen = pg.CombinedPageGenerator(gens) - gen = pg.DuplicateFilterPageGenerator(gen) - gen = pg.PreloadingGenerator(gen) + gens = [t.getReferences(follow_redirects=True, namespaces=[6], + onlyTemplateInclusion=True) + for t in self.nc_templates] + gen = pg.CombinedPageGenerator(gens) + gen = pg.DuplicateFilterPageGenerator(gen) + gen = pg.PreloadingGenerator(gen) return gen def findFilenameOnCommons(self, localImagePage): @@ -324,133 +263,101 @@ filenameOnCommons = val[1].strip() return filenameOnCommons - def run(self): - """Run the bot.""" - commons = pywikibot.Site('commons', 'commons') - comment = i18n.twtranslate(self.site, 'imagetransfer-nowcommons_notice') - - for page in self.getPageGenerator(): - if self.getOption('use_hash'): - # Page -> Has the namespace | commons image -> Not - images_list = page # 0 -> local image, 1 -> commons image - page = pywikibot.Page(self.site, images_list[0]) - else: - # If use_hash is true, we have already print this before, no need - self.current_page = page - try: - localImagePage = pywikibot.FilePage(self.site, page.title()) - if localImagePage.fileIsShared(): - pywikibot.output(u'File is already on Commons.') - continue - sha1 = localImagePage.latest_file_info.sha1 - if self.getOption('use_hash'): - filenameOnCommons = images_list[1] - else: - filenameOnCommons = self.findFilenameOnCommons( - localImagePage) - if not filenameOnCommons and not self.getOption('use_hash'): - pywikibot.output(u'NowCommons template not found.') - continue - commonsImagePage = pywikibot.FilePage(commons, 'Image:%s' - % filenameOnCommons) - if (localImagePage.title(withNamespace=False) == - commonsImagePage.title(withNamespace=False) and - self.getOption('use_hash')): - pywikibot.output( - u'The local and the commons images have the same name') - if (localImagePage.title(withNamespace=False) != - commonsImagePage.title(withNamespace=False)): - usingPages = list(localImagePage.usingPages()) - if usingPages and usingPages != [localImagePage]: + def treat_page(self): + """Process a single file.""" + localImagePage = pywikibot.FilePage(self.current_page) + if localImagePage.fileIsShared(): + pywikibot.output(u'File is already on Commons.') + return + sha1 = localImagePage.latest_file_info.sha1 + filenameOnCommons = self.findFilenameOnCommons(localImagePage) + if not filenameOnCommons: + pywikibot.output(u'NowCommons template not found.') + return + commonsImagePage = pywikibot.FilePage(self.commons, 'Image:%s' + % filenameOnCommons) + if (localImagePage.title(withNamespace=False) != + commonsImagePage.title(withNamespace=False)): + usingPages = list(localImagePage.usingPages()) + if usingPages and usingPages != [localImagePage]: + pywikibot.output(color_format( + '"{lightred}{0}{default}" is still used in {1} pages.', + localImagePage.title(withNamespace=False), + len(usingPages))) + if self.getOption('replace') is True: pywikibot.output(color_format( - '"{lightred}{0}{default}" is still used in {1} pages.', + 'Replacing "{lightred}{0}{default}" by ' + '"{lightgreen}{1}{default}\".', localImagePage.title(withNamespace=False), - len(usingPages))) - if self.getOption('replace') is True: - pywikibot.output(color_format( - 'Replacing "{lightred}{0}{default}" by ' - '"{lightgreen}{1}{default}\".', - localImagePage.title(withNamespace=False), - commonsImagePage.title(withNamespace=False))) - bot = ImageBot( - pg.FileLinksGenerator(localImagePage), - localImagePage.title(withNamespace=False), - commonsImagePage.title(withNamespace=False), - '', self.getOption('replacealways'), - self.getOption('replaceloose')) - bot.run() - # If the image is used with the urlname the - # previous function won't work - is_used = bool(list(pywikibot.FilePage( - self.site, page.title()).usingPages(total=1))) - if is_used and self.getOption('replaceloose'): - bot = ImageBot( - pg.FileLinksGenerator( - localImagePage), - localImagePage.title( - withNamespace=False, asUrl=True), - commonsImagePage.title( - withNamespace=False), - '', self.getOption('replacealways'), - self.getOption('replaceloose')) - bot.run() - # refresh because we want the updated list - usingPages = len(list(pywikibot.FilePage( - self.site, page.title()).usingPages())) - if usingPages > 0 and self.getOption('use_hash'): - # just an enter - pywikibot.input( - u'There are still %s pages with this \ - image, confirm the manual removal from them please.' - % usingPages) - - else: - pywikibot.output(u'Please change them manually.') - continue - else: - pywikibot.output(color_format( - 'No page is using "{lightgreen}{0}{default}" ' - 'anymore.', - localImagePage.title(withNamespace=False))) - commonsText = commonsImagePage.get() - if self.getOption('replaceonly') is False: - if sha1 == commonsImagePage.latest_file_info.sha1: - pywikibot.output( - u'The image is identical to the one on Commons.') - if (len(localImagePage.getFileVersionHistory()) > 1 and - not self.getOption('use_hash')): - pywikibot.output( - u"This image has a version history. Please \ - delete it manually after making sure that the \ - old versions are not worth keeping.""") - continue - if self.getOption('always') is False: - format_str = color_format( - '\n\n>>>> Description on {lightpurple}%s' - '{default} <<<<\n') - pywikibot.output(format_str % page.title()) - pywikibot.output(localImagePage.get()) - pywikibot.output(format_str % - commonsImagePage.title()) - pywikibot.output(commonsText) - if pywikibot.input_yn( - u'Does the description on Commons contain ' - 'all required source and license\n' - 'information?', - default=False, automatic_quit=False): - localImagePage.delete( - '%s [[:commons:Image:%s]]' - % (comment, filenameOnCommons), prompt=False) - else: - localImagePage.delete( - comment + ' [[:commons:Image:%s]]' - % filenameOnCommons, prompt=False) - else: - pywikibot.output( - u'The image is not identical to the one on Commons.') - except (pywikibot.NoPage, pywikibot.IsRedirectPage) as e: - pywikibot.output(u'%s' % e[0]) - continue + commonsImagePage.title(withNamespace=False))) + bot = ImageBot( + pg.FileLinksGenerator(localImagePage), + localImagePage.title(withNamespace=False), + commonsImagePage.title(withNamespace=False), + '', self.getOption('replacealways'), + self.getOption('replaceloose')) + bot.run() + # If the image is used with the urlname the + # previous function won't work + is_used = bool(list(pywikibot.FilePage( + self.site, page.title()).usingPages(total=1))) + if is_used and self.getOption('replaceloose'): + bot = ImageBot( + pg.FileLinksGenerator( + localImagePage), + localImagePage.title( + withNamespace=False, asUrl=True), + commonsImagePage.title( + withNamespace=False), + '', self.getOption('replacealways'), + self.getOption('replaceloose')) + bot.run() + # refresh because we want the updated list + usingPages = len(list(pywikibot.FilePage( + self.site, page.title()).usingPages())) + else: + pywikibot.output(u'Please change them manually.') + return + else: + pywikibot.output(color_format( + 'No page is using "{lightgreen}{0}{default}" ' + 'anymore.', + localImagePage.title(withNamespace=False))) + commonsText = commonsImagePage.get() + if self.getOption('replaceonly') is False: + if sha1 == commonsImagePage.latest_file_info.sha1: + pywikibot.output( + u'The image is identical to the one on Commons.') + if len(localImagePage.getFileVersionHistory()) > 1: + pywikibot.output( + u"This image has a version history. Please \ + delete it manually after making sure that the \ + old versions are not worth keeping.""") + return + if self.getOption('always') is False: + format_str = color_format( + '\n\n>>>> Description on {lightpurple}%s' + '{default} <<<<\n') + pywikibot.output(format_str % page.title()) + pywikibot.output(localImagePage.get()) + pywikibot.output(format_str % + commonsImagePage.title()) + pywikibot.output(commonsText) + if pywikibot.input_yn( + u'Does the description on Commons contain ' + 'all required source and license\n' + 'information?', + default=False, automatic_quit=False): + localImagePage.delete( + '%s [[:commons:Image:%s]]' + % (comment, filenameOnCommons), prompt=False) + else: + localImagePage.delete( + comment + ' [[:commons:Image:%s]]' + % filenameOnCommons, prompt=False) + else: + pywikibot.output( + u'The image is not identical to the one on Commons.') def main(*args): @@ -468,8 +375,6 @@ if arg == '-replacealways': options['replace'] = True options['replacealways'] = True - elif arg == '-hash': - options['use_hash'] = True elif arg == '-autonomous': pywikibot.warning(u"The '-autonomous' argument is DEPRECATED," u" use '-always' instead.") -- To view, visit https://gerrit.wikimedia.org/r/282619 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I534f41a39097f47f6b3ae1d73eec3c87f3acec9d Gerrit-PatchSet: 1 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Xqt <i...@gno.de> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits