Alex Monk has uploaded a new change for review.

  https://gerrit.wikimedia.org/r/227505

Change subject: Add python3 script to populate meta_p
......................................................................

Add python3 script to populate meta_p

Bug: T107094
Change-Id: I380a3fdef334180ae32b2dad7f232fd5c5d1821a
---
A maintain-replicas/maintain-meta_p.py
1 file changed, 213 insertions(+), 0 deletions(-)


  git pull ssh://gerrit.wikimedia.org:29418/operations/software 
refs/changes/05/227505/1

diff --git a/maintain-replicas/maintain-meta_p.py 
b/maintain-replicas/maintain-meta_p.py
new file mode 100644
index 0000000..339438d
--- /dev/null
+++ b/maintain-replicas/maintain-meta_p.py
@@ -0,0 +1,213 @@
+#! /usr/bin/python3
+# -*- coding: utf-8 -*-
+
+#  Based on work by Marc-André Pelletier, ported to Python by Alex Monk
+#  Copyright © 2015 Alex Monk <[email protected]>
+#  Copyright © 2013 Marc-André Pelletier <[email protected]>
+#
+#  Permission to use, copy, modify, and/or distribute this software for any
+#  purpose with or without fee is hereby granted, provided that the above
+#  copyright notice and this permission notice appear in all copies.
+#
+#  THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
+#  WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
+#  MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
+#  ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
+#  WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
+#  ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
+#  OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
+#
+##
+##  This script sets up tables of metainformation on each slice (in the meta_p
+##  database).
+##
+##  The script expects to be invoked in a fresh copy of
+##  operations/mediawiki-config where it will get most of its information,
+##  and will connect to each wiki through the API to get the rest.
+##
+##  It connects to the slices with the credentials in the invoking
+##  user's .my.cnf, but is probably only useful if those credentials
+##  have full control over the meta_p database on each slice to be processed.
+##
+
+slices = [
+    ('labsdb1001.eqiad.wmnet', 3306),
+    ('labsdb1002.eqiad.wmnet', 3306),
+    ('labsdb1003.eqiad.wmnet', 3306),
+    #('localhost', 3306)
+]
+
+import codecs, collections, json, pymysql, re, subprocess, urllib, 
urllib.request
+from configparser import ConfigParser
+
+config = ConfigParser()
+config.read('.my.cnf')
+dbuser = config.get('client', 'user')[1:-1] # Strip first and last characters 
- just apostrophes
+dbpassword = config.get('client', 'password')[1:-1] # Strip first and last 
characters - just apostrophes
+
+subprocess.call("cd mediawiki-config; git pull", shell = True)
+
+dbs = collections.defaultdict(dict)
+def readList(listFname, prop, val):
+    for db in open('mediawiki-config/' + listFname + 
'.dblist').read().splitlines():
+        if not db.startswith('#'):
+            dbs[db][prop] = val
+
+readList("closed", "closed", True)
+readList("deleted", "deleted", True)
+readList("small", "size", 1)
+readList("medium", "size", 2)
+readList("large", "size", 3)
+readList("private", "private", True)
+readList("special", "family", "special")
+readList("flaggedrevs", "has_flaggedrevs", True)
+readList("visualeditor-default", "has_visualeditor", True)
+readList("wikidataclient", "has_wikidata", True)
+
+for slice in ['s1', 's2', 's3', 's4', 's5', 's6', 's7']: # TODO: silver
+    readList(slice, "slice", slice)
+
+for family in ["wikibooks", "wikidata", "wikinews", "wikiquote", "wikisource",
+                "wikiversity", "wikivoyage", "wiktionary", "wikimania", 
"wikimedia",
+                "wikipedia"]:
+    readList(family, "family", family)
+
+# Sadly, case sensitivity of titles isn't in a .dblist, nor is it
+# exposed through the API so we have to hardcode it here to match
+# what is in InitialiseSettings.php
+readList("wiktionary", "sensitive", True)
+dbs['jbowiki']['sensitive'] = True
+
+curVar = False
+canonical = {}
+for line in 
open('mediawiki-config/wmf-config/InitialiseSettings.php').read().splitlines():
+    if line == "'wgCanonicalServer' => array(":
+        curVar = True
+    elif curVar and line == "),":
+        curVar = False
+    else:
+        matches = re.match("^\s+'(.*)'\s+=>\s+'(.*)'\s*,\s*$", line)
+        if curVar and matches:
+            canonical[matches.group(1)] = matches.group(2)
+
+cached = collections.defaultdict(dict)
+try:
+    with open('wiki-cache.json') as cacheFile:
+        cached = json.load(cacheFile)
+except IOError as e:
+    pass
+
+for db, dbInfo in dbs.items():
+    if 'private' in dbInfo and dbInfo['private']:
+        continue
+    elif 'deleted' in dbInfo and dbInfo['deleted']:
+        continue
+
+    canon = None
+    if db in canonical:
+        canon = canonical[db]
+    else:
+        matches = re.match("^(.*)(wik[it].*)", db)
+        if matches:
+            lang = matches.group(1)
+            canon = canonical[dbInfo['family']].replace('$lang', lang)
+
+    if canon:
+        canon = canon.replace('_', '-')
+        dbInfo['url'] = canon
+        if canon in cached:
+            dbInfo['lang'] = cached[canon]['lang']
+            dbInfo['name'] = cached[canon]['name']
+        else:
+            print("Querying", canon + "...")
+            try:
+                req = urllib.request.Request(canon + 
"/w/api.php?action=query&meta=siteinfo&siprop=general&format=json")
+                req.add_header("User-Agent", "operations/software.git 
maintain-meta_p.py")
+
+                with urllib.request.urlopen(req) as response:
+                    result = 
json.load(codecs.getreader("utf-8")(response))['query']
+                    cached[canon]['lang'] = dbInfo['lang'] = 
result['general']['lang']
+                    cached[canon]['name'] = dbInfo['name'] = 
result['general']['sitename']
+            except Exception as e:
+                print(e)
+
+with open('wiki-cache.json', 'w') as cacheFile:
+    json.dump(cached, cacheFile)
+
+for dbhost, dbport in slices:
+    dbh = pymysql.connect(host = dbhost, port = dbport, user = dbuser, passwd 
= dbpassword, charset = 'utf8')
+    cursor = dbh.cursor()
+
+    print("Update/create meta tables on", dbhost + "...")
+    cursor.execute("CREATE DATABASE IF NOT EXISTS meta_p DEFAULT CHARACTER SET 
utf8;")
+    cursor.execute("""CREATE TABLE IF NOT EXISTS meta_p.wiki (
+        dbname varchar(32) PRIMARY KEY,
+        lang varchar(12) NOT NULL DEFAULT 'en',
+        name text,
+        family text,
+        url text,
+        size numeric(1) NOT NULL DEFAULT 1,
+        slice text NOT NULL,
+        is_closed numeric(1) NOT NULL DEFAULT 0,
+        has_echo numeric(1) NOT NULL DEFAULT 1,
+        has_flaggedrevs numeric(1) NOT NULL DEFAULT 0,
+        has_visualeditor numeric(1) NOT NULL DEFAULT 0,
+        has_wikidata numeric(1) NOT NULL DEFAULT 0,
+        is_sensitive numeric(1) NOT NULL DEFAULT 0);""")
+    cursor.execute("""CREATE OR REPLACE VIEW meta_p.legacy AS
+        SELECT dbname, lang, family, NULL AS domain, size, 0 AS is_meta,
+               is_closed, 0 AS is_multilang, (family='wiktionary') AS 
is_sensitive,
+               NULL AS root_category, slice AS server, '/w/' AS script_path
+            FROM meta_p.wiki;""")
+    cursor.execute("""CREATE TABLE IF NOT EXISTS 
meta_p.properties_anon_whitelist (
+        pw_property varbinary(255) PRIMARY KEY);""")
+    cursor.execute("START TRANSACTION;")
+    cursor.execute("TRUNCATE meta_p.wiki;")
+    for db, dbInfo in dbs.items():
+        if 'deleted' in dbInfo and dbInfo['deleted']:
+            continue
+        elif 'private' in dbInfo and dbInfo['private']:
+            continue
+        elif 'slice' not in dbInfo: # TODO: wikitech breaks here
+            continue
+
+        if dbInfo['slice'] in ['s2', 's4', 's5']:
+            ldb = 'c2'
+        elif dbInfo['slice'] == 's1':
+            ldb = 'c1'
+        else:
+            ldb = 'c3'
+
+        fields = {}
+        fields['dbname'] = dbh.escape_string(db)
+        fields['slice'] = dbh.escape_string(dbInfo['slice']) + '.labsdb'
+        if 'has_flaggedrevs' in dbInfo and dbInfo['has_flaggedrevs']:
+            fields['has_flaggedrevs'] = '1'
+        if 'has_visualeditor' in dbInfo and dbInfo['has_visualeditor']:
+            fields['has_visualeditor'] = '1'
+        if 'has_wikidata' in dbInfo and dbInfo['has_wikidata']:
+            fields['has_wikidata'] = '1'
+        if 'url' in dbInfo:
+            fields['url'] = dbh.escape_string(dbInfo['url'])
+        if 'family' in dbInfo:
+            fields['family'] = dbh.escape_string(dbInfo['family'])
+        if 'lang' in dbInfo:
+            fields['lang'] = dbh.escape_string(dbInfo['lang'])
+        if 'name' in dbInfo:
+            fields['name'] = dbh.escape_string(dbInfo['name'])
+        if 'closed' in dbInfo and dbInfo['closed']:
+            fields['is_closed'] = '1'
+        if 'sensitive' in dbInfo and dbInfo['sensitive']:
+            fields['is_sensitive'] = '1'
+        if 'size' in dbInfo:
+            fields['size'] = str(dbInfo['size'])
+        cursor.execute("INSERT INTO meta_p.wiki(" + ','.join(fields.keys()) + 
") VALUES ('" + "','".join(fields.values()) + "');")
+
+    cursor.execute("COMMIT;")
+    cursor.execute("START TRANSACTION;")
+    cursor.execute("DELETE FROM meta_p.properties_anon_whitelist;")
+    # This is hardcoded for now
+    cursor.execute("INSERT INTO meta_p.properties_anon_whitelist VALUES 
('gadget-%');")
+    cursor.execute("COMMIT;")
+
+print("All done.")
\ No newline at end of file

-- 
To view, visit https://gerrit.wikimedia.org/r/227505
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: newchange
Gerrit-Change-Id: I380a3fdef334180ae32b2dad7f232fd5c5d1821a
Gerrit-PatchSet: 1
Gerrit-Project: operations/software
Gerrit-Branch: master
Gerrit-Owner: Alex Monk <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to