Yuvipanda has submitted this change and it was merged.
Change subject: Add script to generate config about <wiki>_p viewdbs
......................................................................
Add script to generate config about <wiki>_p viewdbs
Change-Id: I8a4709e0b3803812fa85eb1f705f18426377bc9a
---
A auditor/__init__.py
A auditor/bootstrap.py
A auditor/dbreflector.py
A auditor/mwconfig.py
A auditor/table.py
A auditor/wikidatabase.py
6 files changed, 169 insertions(+), 0 deletions(-)
Approvals:
Yuvipanda: Verified; Looks good to me, approved
diff --git a/auditor/__init__.py b/auditor/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/auditor/__init__.py
diff --git a/auditor/bootstrap.py b/auditor/bootstrap.py
new file mode 100644
index 0000000..980af20
--- /dev/null
+++ b/auditor/bootstrap.py
@@ -0,0 +1,69 @@
+import sys
+import argparse
+import MySQLdb
+import yaml
+
+from dbreflector import DBReflector
+from mwconfig import MWConfig
+from wikidatabase import WikiDatabase
+from table import Table
+
+argparser = argparse.ArgumentParser()
+
+argparser.add_argument('--hosts', help='Hosts to connect to')
+argparser.add_argument('--mwconfig', help='Path to mediawiki-config
repository')
+argparser.add_argument('--db_suffix', help='Suffix to use for each database
name',
+ default='')
+
+args = argparser.parse_args()
+
+mwconfig = MWConfig(args.mwconfig)
+
+raw_whitelist_dbs = set(mwconfig.get_dblist('all')) -
set(mwconfig.get_dblist('private'))
+whitelist_dbs = set([r + args.db_suffix for r in raw_whitelist_dbs])
+
+all_dbs = set()
+
+dbs = {}
+tables = {}
+
+hostspec = args.hosts.split(',')
+for host in hostspec:
+ if ':' in host:
+ hostname, port = host.split(':')
+ port = int(port)
+ else:
+ hostname, port = host, 3306
+ conn = MySQLdb.connect(host=hostname, port=port,
read_default_file='~/.my.cnf')
+ reflector = DBReflector(conn)
+ dbnames = reflector.get_databases()
+ for dbname in dbnames:
+ all_dbs.add(dbname)
+ if dbname not in whitelist_dbs:
+ continue
+ db = WikiDatabase(dbname)
+ dbs[dbname] = db
+ tablenames = reflector.get_tables(dbname)
+ for tablename in tablenames:
+ if tablename in tables:
+ tables[tablename].add_db(db)
+ else:
+ table = Table(tablename, reflector.get_columns(dbname,
tablename))
+ tables[tablename] = table
+ db.add_table(table)
+
+# Write out db lists
+yaml.dump({
+ 'not-in-db': list(whitelist_dbs - set(dbs.keys())),
+ 'not-in-dblist': list(all_dbs - whitelist_dbs - raw_whitelist_dbs)
+}, open('dblists.yaml', 'w'))
+
+# Write out table schemas
+schemadata = {}
+for name, table in tables.items():
+ tabledata = {}
+ for columnname in table.columns:
+ tabledata[columnname] = {'whitelisted': True}
+ schemadata[name] = tabledata
+
+yaml.dump(schemadata, open('tableschema.yaml', 'w'))
diff --git a/auditor/dbreflector.py b/auditor/dbreflector.py
new file mode 100644
index 0000000..8e8a295
--- /dev/null
+++ b/auditor/dbreflector.py
@@ -0,0 +1,47 @@
+class DBReflector(object):
+ """
+ Provides methods to gather metadata about objects in a database server
+ """
+ def __init__(self, conn):
+ self.conn = conn
+
+ def get_databases(self):
+ """
+ Get list of databases in given server
+
+ :return: List of databases from current server
+ """
+ cur = self.conn.cursor()
+ cur.execute("SHOW DATABASES")
+ results = [r[0] for r in cur.fetchall()]
+ cur.close()
+ return results
+
+ def get_tables(self, dbname):
+ """
+ Get list of tables in given database
+
+ :arg dbname: Name of database to get tables list for
+ :return: List of tables in given database
+ """
+ cur = self.conn.cursor()
+ cur.execute('USE ' + dbname) # Can't do %s for USE, sigh
+ cur.execute("SHOW TABLES")
+ results = [r[0] for r in cur.fetchall()]
+ cur.close()
+ return results
+
+ def get_columns(self, dbname, tablename):
+ """
+ Get list of columns in given table in given database
+
+ :param dbname: Name of database to find table name
+ :param tablename: Table to get columns list for
+ :return: List of columns for the given table
+ """
+ cur = self.conn.cursor()
+ cur.execute('USE ' + dbname) # Can't do %s for USE, sigh
+ cur.execute('DESCRIBE ' + tablename)
+ results = [r[0] for r in cur.fetchall()]
+ cur.close()
+ return results
diff --git a/auditor/mwconfig.py b/auditor/mwconfig.py
new file mode 100644
index 0000000..72a674c
--- /dev/null
+++ b/auditor/mwconfig.py
@@ -0,0 +1,20 @@
+import os
+
+class MWConfig(object):
+ """
+ Represents settings that can be read from mediawiki-config git repository
+ """
+
+ def __init__(self, path):
+ self.path = path
+
+ def get_dblist(self, listname):
+ """
+ Get list of databases specified in a particular dblist
+
+ :param listname: Name of dblist (eg. wikipedia, special)
+ :return: List of databases in that dblist
+ """
+ path = os.path.join(self.path, listname + '.dblist')
+ with open(path) as f:
+ return [l.strip() for l in f.readlines()]
diff --git a/auditor/table.py b/auditor/table.py
new file mode 100644
index 0000000..8a94187
--- /dev/null
+++ b/auditor/table.py
@@ -0,0 +1,11 @@
+class Table(object):
+ def __init__(self, name, columns):
+ self.name = name
+ self.columns = columns
+ self.dbs = {}
+
+ def add_db(self, db):
+ if db.name in self.dbs:
+ return
+ self.dbs[db.name] = db
+ db.add_table(self)
diff --git a/auditor/wikidatabase.py b/auditor/wikidatabase.py
new file mode 100644
index 0000000..478dc37
--- /dev/null
+++ b/auditor/wikidatabase.py
@@ -0,0 +1,22 @@
+class WikiDatabase(object):
+ """
+ Represents a particular wiki's database
+ """
+ def __init__(self, name):
+ self.name = name
+ self.tables = {}
+
+ @property
+ def publicdbname(self):
+ return self.dbname + '_p'
+
+ def add_table(self, table):
+ """
+ Add a table to the current database
+ :param table: Table object to add
+ """
+ if table.name in self.tables:
+ return
+ self.tables[table.name] = table
+ table.add_db(self)
+
--
To view, visit https://gerrit.wikimedia.org/r/179110
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings
Gerrit-MessageType: merged
Gerrit-Change-Id: I8a4709e0b3803812fa85eb1f705f18426377bc9a
Gerrit-PatchSet: 17
Gerrit-Project: operations/software/labsdb-auditor
Gerrit-Branch: master
Gerrit-Owner: Yuvipanda <[email protected]>
Gerrit-Reviewer: Yuvipanda <[email protected]>
_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits