jenkins-bot has submitted this change and it was merged. Change subject: (bug 55121) Port generate_family_file.py to core ......................................................................
(bug 55121) Port generate_family_file.py to core Now without BeautifulSoup and with Python 3 compatibility! Change-Id: I7a9ba232f9dee797347cde9c1792f068c64563fb --- A generate_family_file.py 1 file changed, 314 insertions(+), 0 deletions(-) Approvals: Xqt: Looks good to me, approved jenkins-bot: Verified diff --git a/generate_family_file.py b/generate_family_file.py new file mode 100644 index 0000000..6ad7fc3 --- /dev/null +++ b/generate_family_file.py @@ -0,0 +1,314 @@ +# -*- coding: utf-8 -*- +""" +This script generates a family file from a given URL. +Hackish, etc. Regexps, yes. Sorry, jwz. + +""" +from __future__ import (absolute_import, division, + print_function, unicode_literals) +# +# (C) Merlijn van Deen, 2010-2013 +# (C) Pywikibot team, 2010-2013 +# +# Distributed under the terms of the MIT license +# +__version__ = "$Id$" +# + +# system imports +import sys +import re +import os +import codecs +from collections import defaultdict +from distutils.version import LooseVersion as V + +if sys.version_info[0] == 3: + raw_input = input + +# creating & retrieving urls +if sys.version_info[0] == 2: + from urlparse import urlparse, urljoin, ParseResult + import urllib2 + from urllib2 import HTTPError +else: + from urllib.parse import urlparse, urljoin, ParseResult + from urllib.error import HTTPError + import urllib.request as urllib2 + + +def urlopen(url): + req = urllib2.Request( + url, + headers={'User-agent': 'Pywikibot Family File Generator 2.0 - https://www.mediawiki.org/wiki/pywikibot'}) + uo = urllib2.urlopen(req) + try: + if sys.version_info[0] == 2: + uo.charset = uo.headers.getfirstmatchingheader('Content-Type')[0].strip().split('charset=')[1] + else: + uo.charset = uo.headers.get_content_charset() + except IndexError: + uo.charset = 'latin-1' + return uo + +# parsing response data +import json +if sys.version_info[0] == 2: + from HTMLParser import HTMLParser +else: + from html.parser import HTMLParser + + +class WikiHTMLPageParser(HTMLParser): + def __init__(self, *args, **kwargs): + HTMLParser.__init__(self, *args, **kwargs) + self.generator = None + + def handle_starttag(self, tag, attrs): + attrs = defaultdict(lambda: None, attrs) + if tag == "meta": + if attrs["name"] == "generator": + self.generator = attrs["content"] + if tag == "link": + if attrs["rel"] == "EditURI": + self.edituri = attrs["href"] + + +class FamilyFileGenerator(object): + def __init__(self, url=None, name=None, dointerwiki=None): + if url is None: + url = raw_input("Please insert URL to wiki: ") + if name is None: + name = raw_input("Please insert a short name (eg: freeciv): ") + self.dointerwiki = dointerwiki + self.base_url = url + self.name = name + + self.wikis = {} # {'http://wiki/$1': Wiki('http://wiki/$1'), ...} + self.langs = [] # [Wiki('http://wiki/$1'), ...] + + def run(self): + print("Generating family file from %s" % self.base_url) + + w = Wiki(self.base_url) + self.wikis[w.iwpath] = w + print() + print("==================================") + print("api url: %s" % w.api) + print("MediaWiki version: %s" % w.version) + print("==================================") + print() + + self.getlangs(w) + self.getapis() + self.writefile() + + def getlangs(self, w): + print("Determining other languages...", end="") + try: + data = urlopen( + w.api + + "?action=query&meta=siteinfo&siprop=interwikimap&sifilteriw=local&format=json") + iw = json.loads(data.read().decode(data.charset)) + if 'error' in iw: + raise RuntimeError('%s - %s' % (iw['error']['code'], + iw['error']['info'])) + self.langs = [wiki for wiki in iw['query']['interwikimap'] + if u'language' in wiki] + print(u' '.join(sorted([wiki[u'prefix'] for wiki in self.langs]))) + except HTTPError as e: + self.langs = [] + print (e, "; continuing...") + + if len([lang for lang in self.langs if lang['url'] == w.iwpath]) == 0: + self.langs.append({u'language': w.lang, + u'local': u'', + u'prefix': w.lang, + u'url': w.iwpath}) + + if len(self.langs) > 1: + if self.dointerwiki is None: + makeiw = raw_input( + "\nThere are %i languages available.\nDo you want to generate interwiki links? This might take a long time. ([y]es/[N]o/[e]dit)" + % len(self.langs)).lower() + else: + makeiw = self.dointerwiki + + if makeiw == "y": + pass + elif makeiw == "e": + for wiki in self.langs: + print(wiki['prefix'], wiki['url']) + do_langs = raw_input("Which languages do you want: ") + self.langs = [wiki for wiki in self.langs + if wiki['prefix'] in do_langs + or wiki['url'] == w.iwpath] + else: + self.langs = [wiki for wiki in self.langs + if wiki[u'url'] == w.iwpath] + + def getapis(self): + print("Loading wikis... ") + for lang in self.langs: + print(" * %s... " % (lang[u'prefix']), end="") + if lang[u'url'] not in self.wikis: + try: + self.wikis[lang[u'url']] = Wiki(lang[u'url']) + print("downloaded") + except Exception as e: + print(e) + else: + print("in cache") + + def writefile(self): + fn = "pywikibot/families/%s_family.py" % self.name + print("Writing %s... " % fn) + try: + open(fn) + if raw_input("%s already exists. Overwrite? (y/n)" + % fn).lower() == 'n': + print("Terminating.") + sys.exit(1) + except IOError: # file not found + pass + f = codecs.open(fn, 'w', 'utf-8') + + f.write(""" +# -*- coding: utf-8 -*- +\"\"\" +This family file was auto-generated by $Id$ +Configuration parameters: + url = %(url)s + name = %(name)s + +Please do not commit this to the Git repository! +\"\"\" + +from pywikibot import family + +class Family(family.Family): + def __init__(self): + family.Family.__init__(self) + self.name = '%(name)s' + self.langs = { +""".lstrip() % {'url': self.base_url, 'name': self.name}) + + for w in self.wikis.values(): + f.write(" '%(lang)s': '%(hostname)s',\n" + % {'lang': w.lang, 'hostname': urlparse(w.server).netloc}) + + f.write(" }\n\n") + + f.write("\n\n") + + f.write(" def scriptpath(self, code):\n") + f.write(" return {\n") + + for w in self.wikis.values(): + f.write(" '%(lang)s': '%(path)s',\n" + % {'lang': w.lang, 'path': w.scriptpath}) + f.write(" }[code]\n") + f.write("\n") + + f.write(" def version(self, code):\n") + f.write(" return {\n") + for w in self.wikis.values(): + if w.version is None: + f.write(" '%(lang)s': None,\n" % {'lang': w.lang}) + else: + f.write(" '%(lang)s': u'%(ver)s',\n" + % {'lang': w.lang, 'ver': w.version}) + f.write(" }[code]\n") + + +class Wiki(object): + REwgEnableApi = re.compile(r'wgEnableAPI ?= ?true') + REwgServer = re.compile(r'wgServer ?= ?"([^"]*)"') + REwgScriptPath = re.compile(r'wgScriptPath ?= ?"([^"]*)"') + REwgArticlePath = re.compile(r'wgArticlePath ?= ?"([^"]*)"') + REwgContentLanguage = re.compile(r'wgContentLanguage ?= ?"([^"]*)"') + REwgVersion = re.compile(r'wgVersion ?= ?"([^"]*)"') + + def __init__(self, fromurl): + self.fromurl = fromurl + if fromurl.endswith("$1"): + fromurl = fromurl[:-2] + try: + uo = urlopen(fromurl) + data = uo.read().decode(uo.charset) + except HTTPError as e: + if e.code != 404: + raise + data = e.read().decode('latin-1') # don't care about mojibake for errors + pass + + wp = WikiHTMLPageParser() + wp.feed(data) + try: + self.version = wp.generator.replace("MediaWiki ", "") + except Exception: + self.version = "0.0" + + if V(self.version) < V("1.17.0"): + self._parse_pre_117(data) + else: + self._parse_post_117(wp, fromurl) + + def _parse_pre_117(self, data): + if not self.REwgEnableApi.search(data): + print("*** WARNING: Api does not seem to be enabled on %s" + % self.fromurl) + try: + self.version = self.REwgVersion.search(data).groups()[0] + except AttributeError: + self.version = None + + self.server = self.REwgServer.search(data).groups()[0] + self.scriptpath = self.REwgScriptPath.search(data).groups()[0] + self.articlepath = self.REwgArticlePath.search(data).groups()[0] + self.lang = self.REwgContentLanguage.search(data).groups()[0] + + if self.version is None: + # try to get version using api + try: + d = json.load(urlopen(self.api + "?version&format=json")) + self.version = filter( + lambda x: x.startswith("MediaWiki"), + [l.strip() + for l in d['error']['*'].split("\n")])[0].split()[1] + except Exception: + pass + + def _parse_post_117(self, wp, fromurl): + apipath = wp.edituri.split("?")[0] + fullurl = urljoin(fromurl, apipath) + data = urlopen(fullurl + "?action=query&meta=siteinfo&format=json") + info = json.loads(data.read().decode(data.charset))['query']['general'] + self.server = urljoin(fromurl, info['server']) + for item in ['scriptpath', 'articlepath', 'lang']: + setattr(self, item, info[item]) + + def __cmp__(self, other): + return (self.server + self.scriptpath == + other.server + other.scriptpath) + + def __hash__(self): + return hash(self.server + self.scriptpath) + + @property + def api(self): + return self.server + self.scriptpath + "/api.php" + + @property + def iwpath(self): + return self.server + self.articlepath + + +if __name__ == "__main__": + if len(sys.argv) != 3: + print("Usage: %s <url> <short name>" % sys.argv[0]) + print("Example: %s http://www.mywiki.bogus/wiki/Main_Page mywiki" + % sys.argv[0]) + print("This will create the file families/mywiki_family.py") + + FamilyFileGenerator(*sys.argv[1:]).run() -- To view, visit https://gerrit.wikimedia.org/r/107005 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: merged Gerrit-Change-Id: I7a9ba232f9dee797347cde9c1792f068c64563fb Gerrit-PatchSet: 7 Gerrit-Project: pywikibot/core Gerrit-Branch: master Gerrit-Owner: Merlijn van Deen <valhall...@arctus.nl> Gerrit-Reviewer: Ladsgroup <ladsgr...@gmail.com> Gerrit-Reviewer: Legoktm <legoktm.wikipe...@gmail.com> Gerrit-Reviewer: Merlijn van Deen <valhall...@arctus.nl> Gerrit-Reviewer: Xqt <i...@gno.de> Gerrit-Reviewer: jenkins-bot _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits