Mwalker has uploaded a new change for review. https://gerrit.wikimedia.org/r/78023
Change subject: Adding a tool to import a CSV file into MySQL ...................................................................... Adding a tool to import a CSV file into MySQL Change-Id: I0a10ca8485676f99cf57483c0d58ed484e3b3414 --- A CsvToSql/csv2sql.py 1 file changed, 130 insertions(+), 0 deletions(-) git pull ssh://gerrit.wikimedia.org:29418/wikimedia/fundraising/tools refs/changes/23/78023/1 diff --git a/CsvToSql/csv2sql.py b/CsvToSql/csv2sql.py new file mode 100644 index 0000000..839b743 --- /dev/null +++ b/CsvToSql/csv2sql.py @@ -0,0 +1,130 @@ +#!/usr/bin/python2 + +from optparse import OptionParser +import MySQLdb as MySQL +import csv +import re +import getpass +from dateutil.parser import parse as dateparse +from time import time + +colproto = { + 'c': (lambda x: x, 'varchar(255)'), + 'i': (lambda x: int(x), 'integer'), + 'u': (lambda x: int(x), 'integer unsigned'), + 's': (lambda x: x, 'text'), + 'd': (lambda x: dateparse(x), 'datetime'), + 'f': (lambda x: float(x), 'float') +} + +def main(host, user, password, database, tablename, delete, autoindex, schema, filename): + global colproto + + # === Connection to MySQL === + db = MySQL.connect(host, user, password, database) + + # === Open the file === + f = open(filename, 'r') + c = csv.reader(f) + + headers = c.next() + + # === Create the schema === + if len(headers) != len(schema): + raise Exception('Num of columns must be the same as num of schema cols') + if re.match("[^%s]" % ''.join(colproto.keys()), schema): + raise Exception("Unknown schema column type detected") + + schemaStr = [] + insertCols = [] + schemaCopy = schema + if autoindex: + schemaStr.append("id int auto_increment primary key") + for header in headers: + # Normalize the Name + header = re.sub('(^[0-9])|[^0-9a-zA-Z]', '', header) + schemaStr.append("%s %s" % (header, colproto[schemaCopy[0]][1])) + insertCols.append(header) + schemaCopy = schemaCopy[1:] + + schemaStr = ', '.join(schemaStr) + insertCols = ', '.join(insertCols) + queryStr = "CREATE TABLE %s (%s);" % (tablename, schemaStr) + print("Executing: %s" %queryStr) + + cur = db.cursor() + if delete: + cur.execute("DROP TABLE IF EXISTS %s;" % tablename) + cur.execute(queryStr) + + # Now inject all the rows! + start = time() + count = 0 + queryStr = "INSERT INTO %s (%s) VALUES (%s);" % (tablename, insertCols, (','.join(['%s'] * len(headers)))) + for row in c: + rowVals = [] + for i in range(0, len(row)): + ctype = schema[i] + func = colproto[ctype][0] + rowVals.append(func(row[i])) + + cur.execute(queryStr, rowVals) + count += 1 + if count % 2500 == 0: + db.commit() + print("%s - %s qps" % (count, 2500/(time()-start))) + start = time() + print(count) + db.commit() + + cur.close() + db.close() + f.close() + + +if __name__ == "__main__": + user = getpass.getuser() + + # === Extract options === + parser = OptionParser(usage="usage: %prog [options] schema file.csv") + parser.add_option("-s", "--host", dest='host', default='localhost', help='MySQL server name') + parser.add_option("-d", "--database", dest='database', default=user, help='MySQL database name') + parser.add_option("-u", "--user", dest='user', default=user, help='MySQL username') + parser.add_option("-p", "--password", dest='password', default=None, help='MySQL password, if none will prompt') + parser.add_option("-t", "--table", dest='tablename', default=None, help='Name of table to create, otherwise will be sanitized file name') + parser.add_option("-x", "--explain", dest='explainSchema', default=False, action='store_true', help="Describe schema options") + parser.add_option("--delete", dest='delete', default=False, action='store_true', help="Delete table if exists") + parser.add_option("-i", "--autoindex", dest='autoindex', default=False, action='store_true', help="Add auto increment pri key") + (options, args) = parser.parse_args() + + if options.explainSchema: + print("A schema is a simple string of characters from the following set that expands into SQL types:") + for t in colproto: + print( "%s - %s" % (t, colproto[t][1])) + exit() + elif len(args) != 2: + parser.print_usage() + exit() + + if options.password is None: + password = getpass.getpass("MySQL password for %s> " % options.user) + else: + password = options.password + + if options.tablename is None: + tablename = re.sub('(^[0-9])|[^0-9a-zA-Z]', '_', args[1]) + else: + tablename = options.tablename + + # === Launch the application === + main( + host=options.host, + user=options.user, + password=password, + database=options.database, + tablename=tablename, + delete=options.delete, + autoindex=options.autoindex, + schema=args[0], + filename=args[1] + ) -- To view, visit https://gerrit.wikimedia.org/r/78023 To unsubscribe, visit https://gerrit.wikimedia.org/r/settings Gerrit-MessageType: newchange Gerrit-Change-Id: I0a10ca8485676f99cf57483c0d58ed484e3b3414 Gerrit-PatchSet: 1 Gerrit-Project: wikimedia/fundraising/tools Gerrit-Branch: master Gerrit-Owner: Mwalker <mwal...@wikimedia.org> _______________________________________________ MediaWiki-commits mailing list MediaWiki-commits@lists.wikimedia.org https://lists.wikimedia.org/mailman/listinfo/mediawiki-commits