Yuvipanda has submitted this change and it was merged.

Change subject: Add script to generate config about <wiki>_p viewdbs
......................................................................


Add script to generate config about <wiki>_p viewdbs

Change-Id: I8a4709e0b3803812fa85eb1f705f18426377bc9a
---
A auditor/__init__.py
A auditor/bootstrap.py
A auditor/dbreflector.py
A auditor/mwconfig.py
A auditor/table.py
A auditor/wikidatabase.py
6 files changed, 169 insertions(+), 0 deletions(-)

Approvals:
  Yuvipanda: Verified; Looks good to me, approved



diff --git a/auditor/__init__.py b/auditor/__init__.py
new file mode 100644
index 0000000..e69de29
--- /dev/null
+++ b/auditor/__init__.py
diff --git a/auditor/bootstrap.py b/auditor/bootstrap.py
new file mode 100644
index 0000000..980af20
--- /dev/null
+++ b/auditor/bootstrap.py
@@ -0,0 +1,69 @@
+import sys
+import argparse
+import MySQLdb
+import yaml
+
+from dbreflector import DBReflector
+from mwconfig import MWConfig
+from wikidatabase import WikiDatabase
+from table import Table
+
+argparser = argparse.ArgumentParser()
+
+argparser.add_argument('--hosts', help='Hosts to connect to')
+argparser.add_argument('--mwconfig', help='Path to mediawiki-config 
repository')
+argparser.add_argument('--db_suffix', help='Suffix to use for each database 
name',
+                       default='')
+
+args = argparser.parse_args()
+
+mwconfig = MWConfig(args.mwconfig)
+
+raw_whitelist_dbs = set(mwconfig.get_dblist('all')) - 
set(mwconfig.get_dblist('private'))
+whitelist_dbs = set([r + args.db_suffix for r in raw_whitelist_dbs])
+
+all_dbs = set()
+
+dbs = {}
+tables = {}
+
+hostspec = args.hosts.split(',')
+for host in hostspec:
+    if ':' in host:
+        hostname, port = host.split(':')
+        port = int(port)
+    else:
+        hostname, port = host, 3306
+    conn = MySQLdb.connect(host=hostname, port=port, 
read_default_file='~/.my.cnf')
+    reflector = DBReflector(conn)
+    dbnames = reflector.get_databases()
+    for dbname in dbnames:
+        all_dbs.add(dbname)
+        if dbname not in whitelist_dbs:
+            continue
+        db = WikiDatabase(dbname)
+        dbs[dbname] = db
+        tablenames = reflector.get_tables(dbname)
+        for tablename in tablenames:
+            if tablename in tables:
+                tables[tablename].add_db(db)
+            else:
+                table = Table(tablename, reflector.get_columns(dbname, 
tablename))
+                tables[tablename] = table
+                db.add_table(table)
+
+# Write out db lists
+yaml.dump({
+    'not-in-db': list(whitelist_dbs - set(dbs.keys())),
+    'not-in-dblist': list(all_dbs - whitelist_dbs - raw_whitelist_dbs)
+}, open('dblists.yaml', 'w'))
+
+# Write out table schemas
+schemadata = {}
+for name, table in tables.items():
+    tabledata = {}
+    for columnname in table.columns:
+        tabledata[columnname] = {'whitelisted': True}
+    schemadata[name] = tabledata
+
+yaml.dump(schemadata, open('tableschema.yaml', 'w'))
diff --git a/auditor/dbreflector.py b/auditor/dbreflector.py
new file mode 100644
index 0000000..8e8a295
--- /dev/null
+++ b/auditor/dbreflector.py
@@ -0,0 +1,47 @@
+class DBReflector(object):
+    """
+    Provides methods to gather metadata about objects in a database server
+    """
+    def __init__(self, conn):
+        self.conn = conn
+
+    def get_databases(self):
+        """
+        Get list of databases in given server
+
+        :return: List of databases from current server
+        """
+        cur = self.conn.cursor()
+        cur.execute("SHOW DATABASES")
+        results = [r[0] for r in cur.fetchall()]
+        cur.close()
+        return results
+
+    def get_tables(self, dbname):
+        """
+        Get list of tables in given database
+
+        :arg dbname: Name of database to get tables list for
+        :return: List of tables in given database
+        """
+        cur = self.conn.cursor()
+        cur.execute('USE ' + dbname)  # Can't do %s for USE, sigh
+        cur.execute("SHOW TABLES")
+        results = [r[0] for r in cur.fetchall()]
+        cur.close()
+        return results
+
+    def get_columns(self, dbname, tablename):
+        """
+        Get list of columns in given table in given database
+
+        :param dbname: Name of database to find table name
+        :param tablename: Table to get columns list for
+        :return: List of columns for the given table
+        """
+        cur = self.conn.cursor()
+        cur.execute('USE ' + dbname)  # Can't do %s for USE, sigh
+        cur.execute('DESCRIBE ' + tablename)
+        results = [r[0] for r in cur.fetchall()]
+        cur.close()
+        return results
diff --git a/auditor/mwconfig.py b/auditor/mwconfig.py
new file mode 100644
index 0000000..72a674c
--- /dev/null
+++ b/auditor/mwconfig.py
@@ -0,0 +1,20 @@
+import os
+
+class MWConfig(object):
+    """
+    Represents settings that can be read from mediawiki-config git repository
+    """
+
+    def __init__(self, path):
+        self.path = path
+
+    def get_dblist(self, listname):
+        """
+        Get list of databases specified in a particular dblist
+
+        :param listname: Name of dblist (eg. wikipedia, special)
+        :return: List of databases in that dblist
+        """
+        path = os.path.join(self.path, listname + '.dblist')
+        with open(path) as f:
+            return [l.strip() for l in f.readlines()]
diff --git a/auditor/table.py b/auditor/table.py
new file mode 100644
index 0000000..8a94187
--- /dev/null
+++ b/auditor/table.py
@@ -0,0 +1,11 @@
+class Table(object):
+    def __init__(self, name, columns):
+        self.name = name
+        self.columns = columns
+        self.dbs = {}
+
+    def add_db(self, db):
+        if db.name in self.dbs:
+            return
+        self.dbs[db.name] = db
+        db.add_table(self)
diff --git a/auditor/wikidatabase.py b/auditor/wikidatabase.py
new file mode 100644
index 0000000..478dc37
--- /dev/null
+++ b/auditor/wikidatabase.py
@@ -0,0 +1,22 @@
+class WikiDatabase(object):
+    """
+    Represents a particular wiki's database
+    """
+    def __init__(self, name):
+        self.name = name
+        self.tables = {}
+
+    @property
+    def publicdbname(self):
+        return self.dbname + '_p'
+
+    def add_table(self, table):
+        """
+        Add a table to the current database
+        :param table: Table object to add
+        """
+        if table.name in self.tables:
+            return
+        self.tables[table.name] = table
+        table.add_db(self)
+

-- 
To view, visit https://gerrit.wikimedia.org/r/179110
To unsubscribe, visit https://gerrit.wikimedia.org/r/settings

Gerrit-MessageType: merged
Gerrit-Change-Id: I8a4709e0b3803812fa85eb1f705f18426377bc9a
Gerrit-PatchSet: 17
Gerrit-Project: operations/software/labsdb-auditor
Gerrit-Branch: master
Gerrit-Owner: Yuvipanda <[email protected]>
Gerrit-Reviewer: Yuvipanda <[email protected]>

_______________________________________________
MediaWiki-commits mailing list
[email protected]
https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits

Reply via email to