Hi folks,
Today I made some enhancements to the web2py/scripts/extract_mysql_models.py
script that converts existing MySQL tables into a DAL representation. I've
attached a copy for anyone that might find it useful.
The major changes are as follows:
- I got rid of the dependencies on the external mysql and mysqldump
executables and now use MySQLdb directly. This simplifies the parsing a lot
and makes the tool a little more self-contained. (Linux users probably have
the mysql command line tools installed, but Windows users may not -- at
least not in the PATH)
- If your id column does not have the default "id" name, the script looks
for an AUTO INCREMENT column and marks that with type "id" instead.
- If you have a COMMENT specified in your MySQL table, it is added as a
Python comment to the Field line
- You are no longer restricted to localhost - you can read tables form
remote databases
- There are several new command line options, such as:
--dalname (specify the variable name used for the DAL; e.g. [var] =
DAL(...); [var].define_table(...))
--colcomments (add a comment to the end of each Field line showing the
original column specification from CREATE TABLE... useful for nothing things
like default values, original datatype, etc.)
--commentalign (aligns comments to some number of spaces in the generated
text)
--notnull (detects NOT NULL and adds notnull=True to Field)
--singlemigrate (use a single migrate=False in DAL() rather than in each
define_table())
--verbose (adds logging to stderr so that if it crashes, you have a
chance of figuring out which table failed)
You can see the full usage by running the script without any arguments.
Example use:
extract_mysql_models.py --user kevin --password secret --host example.com
--database mywiki --dalname wikidb --colcomments --singlemigrate > mywiki.py
Generated output (only looks pretty with fixed-width formatting!):
wikidb = DAL('mysql://kevin:[email protected]/mywiki', migrate=False)
wikidb.define_table('comment',
Field('id', 'id'), # (`id` int(11) NOT NULL
AUTO_INCREMENT,)
Field('page_id', 'integer'), # (`page_id` int(11) DEFAULT
NULL,)
Field('body', 'text'), # (`body` longtext,)
Field('created_on', 'datetime'), # (`created_on` datetime
DEFAULT NULL,)
Field('created_by', 'integer'), # (`created_by` int(11)
DEFAULT NULL,)
)
wikidb.define_table('document',
Field('id', 'id'), # (`id` int(11) NOT NULL
AUTO_INCREMENT,)
Field('page_id', 'integer'), # (`page_id` int(11) DEFAULT
NULL,)
Field('name', 'string'), # (`name` varchar(255)
DEFAULT NULL,)
Field('file', 'string'), # (`file` varchar(255)
DEFAULT NULL,)
Field('created_on', 'datetime'), # (`created_on` datetime
DEFAULT NULL,)
Field('created_by', 'integer'), # (`created_by` int(11)
DEFAULT NULL,)
)
wikidb.define_table('page',
Field('id', 'id'), # (`id` int(11) NOT NULL
AUTO_INCREMENT,)
Field('body', 'text'), # (`body` longtext,)
Field('created_on', 'datetime'), # (`created_on` datetime
DEFAULT NULL,)
Field('created_by', 'integer'), # (`created_by` int(11)
DEFAULT NULL,)
Field('title', 'string'), # (`title` varchar(255)
DEFAULT NULL,)
)
Massimo: feel free to integrate into the web2py source if you think it would
be useful.
Cheers,
Kevin
'''
Create the web2py code needed to access your mysql legacy db.
This plugin needs the MySQLdb module
Access your tables with:
legacy_db(legacy_db.mytable.id>0).select()
If the script crashes this is might be due to that fact that the data_type_map
dictionary below is incomplete. Please complete it, improve it and continue.
Created by Falko Krause, minor modifications by Massimo Di Pierro,
Ron McOuat, and Kevin Ivarsen
'''
from optparse import OptionParser
import re
import sys
import MySQLdb
# If true, write some diagnostic messages to stderr. Overridden by -v option
SHOW_LOG_MESSAGES = False
data_type_map = dict(
varchar = 'string',
int = 'integer',
integer = 'integer',
tinyint = 'integer',
smallint = 'integer',
mediumint = 'integer',
bigint = 'integer',
float = 'double',
double = 'double',
char = 'string',
decimal = 'integer',
date = 'date',
#year = 'date',
time = 'time',
timestamp = 'datetime',
datetime = 'datetime',
binary = 'blob',
blob = 'blob',
tinyblob = 'blob',
mediumblob = 'blob',
longblob = 'blob',
text = 'text',
tinytext = 'text',
mediumtext = 'text',
longtext = 'text',
enum = 'string',
)
def main():
options = parse_args()
print extract_mysql(options)
log("")
log("PROCESS COMPLETE!")
def parse_args():
global SHOW_LOG_MESSAGES
optparser = OptionParser(add_help_option=False) # -h for help interferes with -h for host
optparser.add_option("-u", "--user", dest="user", action="store",
type="string", default="root",
help="MySQL username (default: root)")
optparser.add_option("-p", "--password", dest="password", action="store",
type="string", default="",
help="MySQL password (default: none)")
optparser.add_option("-h", "--host", dest="host", action="store",
type="string", default="localhost",
help="MySQL host (default: localhost)")
optparser.add_option("-D", "--database", dest="schema", action="store",
type="string",
help="MySQL database schema to use")
optparser.add_option("-a", "--dalname", dest="dalname", action="store",
type="string", default="legacy_db",
help="Assign DAL to this variable name (default: legacy_db)")
optparser.add_option("-c", "--colcomments", dest="column_comments", action="store_true",
help="Add column definition comments")
optparser.add_option("-C", "--commentalign", dest="alignment", action="store",
type="int", default=45,
help="Align comments this many spaces from start of line")
optparser.add_option("-n", "--notnull", dest="notnulls", action="store_true",
help="Add notnull=True for NOT NULL columns")
optparser.add_option("-v", "--verbose", dest="verbose", action="store_true",
help="Verbose: show what is happening while tables are being parsed")
optparser.add_option("-s", "--singlemigrate", dest="single_migrate", action="store_true",
help="Use a single 'migrate=False' in the DAL() construtor instead of one in each define_table()")
(options, args) = optparser.parse_args()
if options.schema is None:
optparser.print_help()
print
print "Examples:"
print " Read the guestbook schema from localhost, user root, no password"
print " extract_mysql_models.py -D guestbook "
print
print " Do the same, but on a different host with login and password"
print " extract_mysql_models.py -u bob -p secret -h db.example.com -D guestbook"
print
print " Assign the DAL to variable 'db', use a global migrate=False, add comments"
print " describing each column (using long-style args), and add notnull args:"
print " extract_mysql_models.py --database=guestbook --dalname db \\"
print " --singlemigrate --colcomments --notnull"
print
print "You must at least specify a schema to read from."
print
print "Generated database is written to stdout. Add \"> filename.py\""
print "to save to a file"
print
sys.exit(1)
if options.verbose:
SHOW_LOG_MESSAGES = True
return options
def log(text):
if SHOW_LOG_MESSAGES:
sys.stderr.write(text + "\n")
def extract_mysql(opt):
db = MySQLdb.connect(user=opt.user,
passwd=opt.password,
host=opt.host,
db=opt.schema)
curs = db.cursor()
curs.execute("SHOW TABLES")
tables = curs.fetchall()
tables = [row[0] for row in tables] # Convert tuple of tuples to tuple of strings
if opt.single_migrate:
dal_migrate = ", migrate=False"
else:
dal_migrate = ""
connection_string = "%s = DAL('mysql://%s:%s@localhost/%s'%s)" % (
opt.dalname, opt.user, opt.password, opt.schema, dal_migrate)
legacy_db_table_web2py_code = []
for table_name in tables:
log("Processing table '%s'" % table_name)
curs.execute("SHOW CREATE TABLE " + table_name)
sql_create_stmnt = curs.fetchone()[1]
if 'CREATE' in sql_create_stmnt: # make sure returned statement looks sensible
sql_lines = sql_create_stmnt.split('\n')
# remove garbage lines from sql statement
# n.b. this is a relic of old mysqldump method -- may no longer be
# necessary. SHOW CREATE TABLE doesn't seem to produce any junk
sql_lines = [x for x in sql_lines if not(x.startswith('--') or x.startswith('/*') or x =='')]
log("Processing create statement:")
for line in sql_lines:
log(" " + line)
# generate the web2py code from the create statement
web2py_table_code = ''
table_name = re.search('CREATE TABLE .(\S+). \(', sql_lines[0]).group(1)
log("Extracted table name '%s'" % table_name)
fields = []
for line in sql_lines[1:-1]:
if re.search('PRIMARY', line) or re.search('KEY', line) or re.search(' ID', line) or line.startswith(')'):
continue
hit = re.search('(\S+)\s+(\S+)(,| )( .*)?', line)
if hit != None:
name, coltype = hit.group(1), hit.group(2)
name = re.sub('`', '', name)
coltype = re.sub(r'(\w+)\(.*',r'\1', coltype)
daltype = data_type_map[coltype]
# Is it a primary key with a non-default name?
if daltype == 'integer' and line.find("AUTO_INCREMENT") != -1:
daltype = 'id'
# Extract a column comment, if any.
# Also append the original column definition
comment_index = line.find(" COMMENT ")
if comment_index != -1:
comment = line[comment_index+9:]
comment = comment.strip().lstrip("'").rstrip(",").rstrip("'")
else:
comment = ""
# Check nullable status
if opt.notnulls and line.find(" NOT NULL") != -1:
notnull = ", notnull=True"
else:
notnull = ""
if opt.column_comments:
comment += " (%s)" % line.strip()
if comment != "":
comment = " # " + comment.strip()
web2py_table_code += "\n"
field_definition = " Field('%s', '%s'%s)," % (
name, daltype, notnull)
field_definition = field_definition.ljust(opt.alignment)
field_definition += comment
web2py_table_code += field_definition
if opt.single_migrate:
define_table_migrate = ""
else:
define_table_migrate = "migrate=False"
web2py_table_code = "%s.define_table('%s', %s\n %s)" % (
opt.dalname, table_name, web2py_table_code, define_table_migrate)
legacy_db_table_web2py_code.append(web2py_table_code)
# write the legacy db to file
legacy_db_web2py_code = connection_string+"\n\n"
legacy_db_web2py_code += "\n\n".join(legacy_db_table_web2py_code)
return legacy_db_web2py_code
if __name__ == '__main__':
main()