-----BEGIN PGP SIGNED MESSAGE-----
Hash: SHA1

Hello all

Today DrTrigonBot complained about a

"HTTPError: HTTP Error 400: Bad Request"

that was originating from botlist.py. I was able to track this
down to the point where "タチコマ robot" should be used in an
URL as offset (line 96) which caused the error when puting in
this bot name. As I can see a

urllib.quote(...)

should solve the problem as done in the attached file. Could
please someone verify this (if there is time) and then commit
this change to SVN.

Btw: I was not able to find something similar to "urllib.quote"
in the bot framework so I had to import urllib.

Thanks a lot and Greetings
DrTrigon
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
Comment: Using GnuPG with Mozilla - http://enigmail.mozdev.org/

iEYEARECAAYFAk5xrygACgkQAXWvBxzBrDAxLwCfWtikjvv88d2Z+F49q37OcHtP
axcAniISk6qypZdT3WxlbAJPxfo9x4zn
=DTud
-----END PGP SIGNATURE-----
# -*- coding: utf-8 -*-
"""
Allows access to the site's bot user list.
 
The function refresh() downloads the current bot user list and saves
it to disk. It is run automatically when a bot first tries to get this
data.
"""
 
# (C) Daniel Herding, 2005
# (C) Dr. Trigon, 2009-2010
#
# DrTrigonBot: http://de.wikipedia.org/wiki/Benutzer:DrTrigonBot
#
# Distributed under the terms of the MIT license.
#
__version__='$Id: botlist.py 8989 2011-02-20 12:06:09Z xqt $'
#
 
import re, sys, pickle
import os.path
import time
import urllib
import wikipedia as pywikibot
 
cache = {}
 
def get(site = None):
    if site is None:
        site = pywikibot.getSite()
    if site in cache:
        # Use cached copy if it exists.
        botlist = cache[site]
    else:
        fn = pywikibot.config.datafilepath('botlists',
                  'botlist-%s-%s.dat' % (site.family.name, site.lang))
        try:
            # find out how old our saved dump is (in seconds)
            file_age = time.time() - os.path.getmtime(fn)
            # if it's older than 1 day, reload it
            if file_age > 1 * 24 * 60 * 60:
                pywikibot.output(u'Copy of bot user list is one day old, 
reloading')
                refresh(site)
        except OSError:
            # no saved botlist exists yet, retrieve one
            refresh(site)
        f = open(fn, 'r')
        botlist = pickle.load(f)
        f.close()
        # create cached copy
        cache[site] = botlist
    return botlist
 
def isBot(user, site=None):
    botlist = get(site)
    return user in botlist
 
def refresh(site, sysop=False, witheditsonly=True):
    #if not site.has_api() or site.versionnumber() < 10:
    #    _refreshOld(site)
    
    # get botlist special page's URL
    if not site.loggedInAs(sysop=sysop):
        site.forceLogin(sysop=sysop)
 
    params = {
        'action': 'query',
        'list': 'allusers',
        'augroup': 'bot',
    }
    if witheditsonly:
        params['auwitheditsonly'] = ''
 
    pywikibot.output(u'Retrieving bot user list for %s via API.' % repr(site))
    pywikibot.put_throttle() # It actually is a get, but a heavy one.
    botlist = []
    while True:
        data = pywikibot.query.GetData(params, site, sysop=sysop)
        if 'error' in data:
            raise RuntimeError('ERROR: %s' % data)
        botlist.extend([w['name'] for w in data['query']['allusers']])
 
        if 'query-continue' in data:
            params['aufrom'] = data['query-continue']['allusers']['aufrom']
        else:
            break

    pywikibot.output(u'Retrieving global bot user list for %s.' % repr(site))
    pywikibot.put_throttle() # It actually is a get, but a heavy one.
    m1 = True
    offset = ''
    if site.versionnumber() >= 17:
        PATTERN = u'<li>(.*?) *\((.*?),\s(.*?)\)(?:.*?)</li>'
    else:
        PATTERN = u'<li>(.*?) *\((.*?),\s(.*?)\)</li>'
    while m1:
        text = 
site.getUrl(site.globalusers_address(offset=urllib.quote(offset), 
group='Global_bot'))

        m1 = re.findall(u'<li>.*?</li>', text)
        for item in m1:
            m2 = re.search(PATTERN, item)
            (bot, flag_local, flag_global) = m2.groups()
            flag_local  = (flag_local[:2] == u'<a')
            flag_global = True # since group='Global_bot'

            if bot not in botlist:
                botlist.append( bot )

        #print len(botlist)
        offset = bot.encode(site.encoding())

    # Save the botlist to disk
    # The file is stored in the botlists subdir. Create if necessary.
    if sysop:
        f = open(pywikibot.config.datafilepath('botlists',
                 'botlist-%s-%s-sysop.dat' % (site.family.name, site.lang)), 
'w')    
    else:
        f = open(pywikibot.config.datafilepath('botlists',
                 'botlist-%s-%s.dat' % (site.family.name, site.lang)), 'w')
    pickle.dump(botlist, f)
    f.close()
 
#def refresh_all(new = False, sysop=False):
#    if new:
#        import config
#        pywikibot.output('Downloading All bot user lists for your accounts in 
user-config.py');
#        for family in config.usernames:
#            for lang in config.usernames[ family ]:
#                refresh(pywikibot.getSite( code = lang, fam = family ), 
sysop=sysop )
#        for family in config.sysopnames:
#            for lang in config.sysopnames[ family ]:
#                refresh(pywikibot.getSite( code = lang, fam = family ), 
sysop=sysop )
#
#    else:
#        import dircache, time
#        filenames = dircache.listdir(pywikibot.config.datafilepath('botlists'))
#        botlist_filenameR = re.compile('botlist-([a-z\-:]+).dat')
#        for filename in filenames:
#            match = botlist_filenameR.match(filename)
#            if match:
#                arr = match.group(1).split('-')
#                family = arr[0]
#                lang = '-'.join(arr[1:])
#                refresh(pywikibot.getSite(code = lang, fam = family))
#
#def main():
#    all = False
#    new = False
#    sysop = False
#    for arg in pywikibot.handleArgs():
#        if arg == '-all' or arg == '-update':
#            all = True
#        elif arg == '-new':
#            new = True
#        elif arg == '-sysop':
#            sysop = True
#    if all:
#        refresh_all(sysop=sysop)
#    elif new:
#        refresh_all(new, sysop=sysop)
#    else:
#        refresh(pywikibot.getSite(), sysop=sysop)
#
#        botlist = get(pywikibot.getSite())
#        pywikibot.output(u'%i pages in the bot user list.' % len(botlist))
#        for pageName in botlist:
#            pywikibot.output( pageName, toStdout = True )
#
#if __name__ == "__main__":
#    try:
#        main()
#    finally:
#        pywikibot.stopme()
 

_______________________________________________
Pywikipedia-l mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/pywikipedia-l

Reply via email to