jenkins-bot has submitted this change and it was merged. ( 
https://gerrit.wikimedia.org/r/551812 )

Change subject: [cleanup] Remove commonshelper parts
......................................................................

[cleanup] Remove commonshelper parts

Remove commonshelper parts because CommonSense isn't available anymore.

Part 1 detached from I28d72f2

Bug: T195079
Change-Id: I765754366939b435b54a0340a1e518583b0a6f07
---
M scripts/imagerecat.py
1 file changed, 12 insertions(+), 190 deletions(-)

Approvals:
  Framawiki: Looks good to me, approved
  jenkins-bot: Verified



diff --git a/scripts/imagerecat.py b/scripts/imagerecat.py
index 04e3fd0..3a143ea 100755
--- a/scripts/imagerecat.py
+++ b/scripts/imagerecat.py
@@ -1,29 +1,15 @@
 #!/usr/bin/python
 # -*- coding: utf-8 -*-
 """
-Program to (re)categorize images at commons.
+Program to re-categorize images at commons.

-The program uses commonshelper for category suggestions.
-It takes the suggestions and the current categories. Put the categories through
+The program uses read the current categories, put the categories through
 some filters and adds the result.

 The following command line parameters are supported:

--onlyfilter     Don't use Commonsense to get categories, just filter the
-                current categories
-
 -onlyuncat      Only work on uncategorized images. Will prevent the bot from
                 working on an image multiple times.
-
--hint           Give Commonsense a hint.
-                For example -hint:li.wikipedia.org
-
--onlyhint       Give Commonsense a hint. And only work on this hint.
-                Syntax is the same as -hint. Some special hints are possible:
-                _20 : Work on the top 20 wikipedia's
-                _80 : Work on the top 80 wikipedia's
-                wps : Work on all wikipedia's
-
 """
 #
 # (C) Multichill, 2008-2011
@@ -33,7 +19,6 @@
 #
 from __future__ import absolute_import, division, unicode_literals

-import re
 import socket
 import xml.etree.ElementTree

@@ -52,9 +37,6 @@
 category_blacklist = []
 countries = []

-search_wikis = '_20'
-hint_wiki = ''
-

 def initLists():
     """Get the list of countries & the blacklist from Commons."""
@@ -73,7 +55,7 @@
     return


-def categorizeImages(generator, onlyFilter, onlyUncat):
+def categorizeImages(generator, onlyUncat):
     """Loop over all images in generator and try to categorize them.

     Get category suggestions from CommonSense.
@@ -93,19 +75,12 @@
             continue

         currentCats = getCurrentCats(imagepage)
-        if onlyFilter:
-            commonshelperCats = []
-            usage = []
-            galleries = []
-        else:
-            (commonshelperCats, usage,
-             galleries) = getCommonshelperCats(imagepage)
-        newcats = applyAllFilters(commonshelperCats + currentCats)
+        newcats = applyAllFilters(currentCats)

         if newcats and set(currentCats) != set(newcats):
             for cat in newcats:
                 pywikibot.output(' Found new cat: ' + cat)
-            saveImagePage(imagepage, newcats, usage, galleries, onlyFilter)
+            saveImagePage(imagepage, newcats)


 def getCurrentCats(imagepage):
@@ -116,91 +91,6 @@
     return list(set(result))


-def getCommonshelperCats(imagepage):
-    """Get category suggestions from CommonSense.
-
-    @rtype: list of unicode
-
-    """
-    commonshelperCats = []
-    usage = []
-    galleries = []
-
-    global search_wikis
-    global hint_wiki
-    site = imagepage.site
-    lang = site.code
-    family = site.family.name
-    if lang == 'commons' and family == 'commons':
-        parameters = urlencode(
-            {'i': imagepage.title(with_ns=False).encode('utf-8'),
-             'r': 'on',
-             'go-clean': 'Find+Categories',
-             'p': search_wikis,
-             'cl': hint_wiki})
-    elif family == 'wikipedia':
-        parameters = urlencode(
-            {'i': imagepage.title(with_ns=False).encode('utf-8'),
-             'r': 'on',
-             'go-move': 'Find+Categories',
-             'p': search_wikis,
-             'cl': hint_wiki,
-             'w': lang})
-    else:
-        # Can't handle other sites atm
-        return [], [], []
-
-    commonsenseRe = re.compile(
-        r'^#COMMONSENSE(.*)#USAGE(\s)+\((?P<usagenum>(\d)+)\)\s'
-        r'(?P<usage>(.*))\s'
-        r'#KEYWORDS(\s)+\((?P<keywords>(\d)+)\)(.*)'
-        r'#CATEGORIES(\s)+\((?P<catnum>(\d)+)\)\s(?P<cats>(.*))\s'
-        r'#GALLERIES(\s)+\((?P<galnum>(\d)+)\)\s(?P<gals>(.*))\s(.*)#EOF$',
-        re.MULTILINE + re.DOTALL)
-
-    gotInfo = False
-    matches = None
-    maxtries = 10
-    tries = 0
-    while not gotInfo:
-        try:
-            if tries < maxtries:
-                tries += 1
-                commonsHelperPage = fetch(
-                    'https://toolserver.org/~daniel/WikiSense/CommonSense.php?'
-                    + parameters)
-                matches = commonsenseRe.search(
-                    commonsHelperPage.text)
-                gotInfo = True
-            else:
-                break
-        except IOError:
-            pywikibot.output("Got an IOError, let's try again")
-        except socket.timeout:
-            pywikibot.output("Got a timeout, let's try again")
-
-    if matches and gotInfo:
-        if matches.group('usagenum') > 0:
-            used = matches.group('usage').splitlines()
-            for use in used:
-                usage = usage + getUsage(use)
-        if matches.group('catnum') > 0:
-            cats = matches.group('cats').splitlines()
-            for cat in cats:
-                commonshelperCats.append(cat.replace('_', ' '))
-                pywikibot.output('category : ' + cat)
-        if matches.group('galnum') > 0:
-            gals = matches.group('gals').splitlines()
-            for gal in gals:
-                galleries.append(gal.replace('_', ' '))
-                pywikibot.output('gallery : ' + gal)
-    commonshelperCats = list(set(commonshelperCats))
-    galleries = list(set(galleries))
-    for (lang, project, article) in usage:
-        pywikibot.output(lang + project + article)
-    return commonshelperCats, usage, galleries
-
-
 def getOpenStreetMapCats(latitude, longitude):
     """Get a list of location categories based on the OSM nomatim tool."""
     result = []
@@ -282,27 +172,6 @@
     return ''


-def getUsage(use):
-    """Parse the Commonsense output to get the usage."""
-    result = []
-    lang = ''
-    project = ''
-    articles = ''
-    usageRe = re.compile(
-        r'^(?P<lang>([\w-]+))\.(?P<project>([\w]+))\.org:(?P<articles>\s(.*))')
-    matches = usageRe.search(use)
-    if matches:
-        if matches.group('lang'):
-            lang = matches.group('lang')
-        if matches.group('project'):
-            project = matches.group('project')
-        if matches.group('articles'):
-            articles = matches.group('articles')
-    for article in articles.split():
-        result.append((lang, project, article))
-    return result
-
-
 def applyAllFilters(categories):
     """Apply all filters on categories."""
     result = filterDisambiguation(categories)
@@ -392,59 +261,22 @@
     return categories


-def saveImagePage(imagepage, newcats, usage, galleries, onlyFilter):
+def saveImagePage(imagepage, newcats):
     """Remove the old categories and add the new categories to the image."""
     newtext = textlib.removeCategoryLinks(imagepage.text, imagepage.site)
-    if not onlyFilter:
-        newtext = removeTemplates(newtext)
-        newtext = newtext + getCheckCategoriesTemplate(usage, galleries,
-                                                       len(newcats))
     newtext += '\n'
+
     for category in newcats:
         newtext = newtext + '[[Category:' + category + ']]\n'
-    if onlyFilter:
-        comment = 'Filtering categories'
-    else:
-        comment = ('Image is categorized by a bot using data from '
-                   '[[Commons:Tools#CommonSense|CommonSense]]')
+
+    comment = 'Filtering categories'
+
     pywikibot.showDiff(imagepage.text, newtext)
     imagepage.text = newtext
     imagepage.save(comment)
     return


-def removeTemplates(oldtext=''):
-    """Remove {{Uncategorized}} and {{Check categories}} templates."""
-    result = re.sub(
-        r'{{\s*([Uu]ncat(egori[sz]ed( image)?)?|'
-        r'[Nn]ocat|[Nn]eedscategory)[^}]*}}',
-        '', oldtext)
-    result = re.sub('<!-- Remove this line once you have added categories -->',
-                    '', result)
-    result = re.sub(r'\{\{\s*[Cc]heck categories[^}]*\}\}', '', result)
-    return result
-
-
-def getCheckCategoriesTemplate(usage, galleries, ncats):
-    """Build the check categories template with all parameters."""
-    result = ('{{Check categories|year={{subst:CURRENTYEAR}}|month={{subst:'
-              'CURRENTMONTHNAME}}|day={{subst:CURRENTDAY}}\n')
-    usageCounter = 1
-    for (lang, project, article) in usage:
-        result += '|lang%d=%s' % (usageCounter, lang)
-        result += '|wiki%d=%s' % (usageCounter, project)
-        result += '|article%d=%s' % (usageCounter, article)
-        result += '\n'
-        usageCounter += 1
-    galleryCounter = 1
-    for gallery in galleries:
-        result += '|gallery{}={}'.format(galleryCounter,
-                                         gallery.replace('_', ' ')) + '\n'
-        galleryCounter += 1
-    result += '|ncats={}\n}}\n'.format(ncats)
-    return result
-
-
 def main(*args):
     """
     Process command line arguments and invoke bot.
@@ -454,25 +286,15 @@
     @param args: command line arguments
     @type args: str
     """
-    onlyFilter = False
     onlyUncat = False

     # Process global args and prepare generator args parser
     local_args = pywikibot.handle_args(args)
     genFactory = pagegenerators.GeneratorFactory()

-    global search_wikis
-    global hint_wiki
-
     for arg in local_args:
-        if arg == '-onlyfilter':
-            onlyFilter = True
-        elif arg == '-onlyuncat':
+        if arg == '-onlyuncat':
             onlyUncat = True
-        elif arg.startswith('-hint:'):
-            hint_wiki = arg[len('-hint:'):]
-        elif arg.startswith('-onlyhint'):
-            search_wikis = arg[len('-onlyhint:'):]
         else:
             genFactory.handleArg(arg)

@@ -484,7 +306,7 @@
             recurse=True)

     initLists()
-    categorizeImages(generator, onlyFilter, onlyUncat)
+    categorizeImages(generator, onlyUncat)
     pywikibot.output('All done')



--
To view, visit https://gerrit.wikimedia.org/r/551812
To unsubscribe, or for help writing mail filters, visit 
https://gerrit.wikimedia.org/r/settings

Gerrit-Project: pywikibot/core
Gerrit-Branch: master
Gerrit-MessageType: merged
Gerrit-Change-Id: I765754366939b435b54a0340a1e518583b0a6f07
Gerrit-Change-Number: 551812
Gerrit-PatchSet: 3
Gerrit-Owner: Xqt <[email protected]>
Gerrit-Reviewer: D3r1ck01 <[email protected]>
Gerrit-Reviewer: Framawiki <[email protected]>
Gerrit-Reviewer: jenkins-bot (75)
_______________________________________________
Pywikibot-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikibot-commits

Reply via email to