Hi folks,

Today I made some enhancements to the web2py/scripts/extract_mysql_models.py 
script that converts existing MySQL tables into a DAL representation. I've 
attached a copy for anyone that might find it useful.

The major changes are as follows:
 - I got rid of the dependencies on the external mysql and mysqldump 
executables and now use MySQLdb directly. This simplifies the parsing a lot 
and makes the tool a little more self-contained. (Linux users probably have 
the mysql command line tools installed, but Windows users may not -- at 
least not in the PATH)
 - If your id column does not have the default "id" name, the script looks 
for an AUTO INCREMENT column and marks that with type "id" instead.
 - If you have a COMMENT specified in your MySQL table, it is added as a 
Python comment to the Field line
 - You are no longer restricted to localhost - you can read tables form 
remote databases
 - There are several new command line options, such as:
   --dalname (specify the variable name used for the DAL; e.g. [var] = 
DAL(...); [var].define_table(...))
   --colcomments  (add a comment to the end of each Field line showing the 
original column specification from CREATE TABLE... useful for nothing things 
like default values, original datatype, etc.)
   --commentalign (aligns comments to some number of spaces in the generated 
text)
   --notnull (detects NOT NULL and adds notnull=True to Field)
   --singlemigrate (use a single migrate=False in DAL() rather than in each 
define_table())
   --verbose (adds logging to stderr so that if it crashes, you have a 
chance of figuring out which table failed)

You can see the full usage by running the script without any arguments.


Example use:

extract_mysql_models.py --user kevin --password secret --host example.com 
--database mywiki --dalname wikidb --colcomments --singlemigrate > mywiki.py

Generated output (only looks pretty with fixed-width formatting!):

wikidb = DAL('mysql://kevin:sec...@example.com/mywiki', migrate=False)

wikidb.define_table('comment', 
    Field('id', 'id'),                         # (`id` int(11) NOT NULL 
AUTO_INCREMENT,)
    Field('page_id', 'integer'),               # (`page_id` int(11) DEFAULT 
NULL,)
    Field('body', 'text'),                     # (`body` longtext,)
    Field('created_on', 'datetime'),           # (`created_on` datetime 
DEFAULT NULL,)
    Field('created_by', 'integer'),            # (`created_by` int(11) 
DEFAULT NULL,)
    )

wikidb.define_table('document', 
    Field('id', 'id'),                         # (`id` int(11) NOT NULL 
AUTO_INCREMENT,)
    Field('page_id', 'integer'),               # (`page_id` int(11) DEFAULT 
NULL,)
    Field('name', 'string'),                   # (`name` varchar(255) 
DEFAULT NULL,)
    Field('file', 'string'),                   # (`file` varchar(255) 
DEFAULT NULL,)
    Field('created_on', 'datetime'),           # (`created_on` datetime 
DEFAULT NULL,)
    Field('created_by', 'integer'),            # (`created_by` int(11) 
DEFAULT NULL,)
    )

wikidb.define_table('page', 
    Field('id', 'id'),                         # (`id` int(11) NOT NULL 
AUTO_INCREMENT,)
    Field('body', 'text'),                     # (`body` longtext,)
    Field('created_on', 'datetime'),           # (`created_on` datetime 
DEFAULT NULL,)
    Field('created_by', 'integer'),            # (`created_by` int(11) 
DEFAULT NULL,)
    Field('title', 'string'),                  # (`title` varchar(255) 
DEFAULT NULL,)
    )


Massimo: feel free to integrate into the web2py source if you think it would 
be useful.

Cheers,
Kevin
'''
Create the web2py code needed to access your mysql legacy db.

This plugin needs the MySQLdb module

Access your tables with:
legacy_db(legacy_db.mytable.id>0).select()

If the script crashes this is might be due to that fact that the data_type_map
dictionary below is incomplete. Please complete it, improve it and continue.

Created by Falko Krause, minor modifications by Massimo Di Pierro,
Ron McOuat, and Kevin Ivarsen
'''

from optparse import OptionParser
import re
import sys

import MySQLdb

# If true, write some diagnostic messages to stderr. Overridden by -v option
SHOW_LOG_MESSAGES = False

data_type_map = dict(
        varchar = 'string',
        int = 'integer',
        integer = 'integer',
        tinyint = 'integer',
        smallint = 'integer',
        mediumint = 'integer',
        bigint = 'integer',
        float = 'double',
        double = 'double',
        char = 'string',
        decimal = 'integer',
        date = 'date',
        #year = 'date',
        time = 'time',
        timestamp = 'datetime',
        datetime = 'datetime',
        binary = 'blob',
        blob = 'blob',
        tinyblob = 'blob',
        mediumblob = 'blob',
        longblob = 'blob',
        text = 'text',
        tinytext = 'text',
        mediumtext = 'text',
        longtext = 'text',
        enum = 'string',
        )

def main():
    options = parse_args()

    print extract_mysql(options)
    log("")
    log("PROCESS COMPLETE!")

def parse_args():
    global SHOW_LOG_MESSAGES

    optparser  = OptionParser(add_help_option=False)  # -h for help interferes with -h for host
    optparser.add_option("-u", "--user", dest="user", action="store",
                      type="string", default="root",
                      help="MySQL username (default: root)")

    optparser.add_option("-p", "--password", dest="password", action="store",
                      type="string", default="",
                      help="MySQL password (default: none)")

    optparser.add_option("-h", "--host", dest="host", action="store",
                      type="string", default="localhost",
                      help="MySQL host (default: localhost)")

    optparser.add_option("-D", "--database", dest="schema", action="store",
                      type="string",
                      help="MySQL database schema to use")

    optparser.add_option("-a", "--dalname", dest="dalname", action="store",
                      type="string", default="legacy_db",
                      help="Assign DAL to this variable name (default: legacy_db)")

    optparser.add_option("-c", "--colcomments", dest="column_comments", action="store_true",
                      help="Add column definition comments")

    optparser.add_option("-C", "--commentalign", dest="alignment", action="store",
                      type="int", default=45,
                      help="Align comments this many spaces from start of line")

    optparser.add_option("-n", "--notnull", dest="notnulls", action="store_true",
                      help="Add notnull=True for NOT NULL columns")

    optparser.add_option("-v", "--verbose", dest="verbose", action="store_true",
                          help="Verbose: show what is happening while tables are being parsed")

    optparser.add_option("-s", "--singlemigrate", dest="single_migrate", action="store_true",
                          help="Use a single 'migrate=False' in the DAL() construtor instead of one in each define_table()")


    (options, args) = optparser.parse_args()

    if options.schema is None:
        optparser.print_help()
        print
        print "Examples:"
        print "  Read the guestbook schema from localhost, user root, no password"
        print "    extract_mysql_models.py -D guestbook "
        print
        print "  Do the same, but on a different host with login and password"
        print "    extract_mysql_models.py -u bob -p secret -h db.example.com -D guestbook"
        print
        print "  Assign the DAL to variable 'db', use a global migrate=False, add comments"
        print "  describing each column (using long-style args), and add notnull args:"
        print "    extract_mysql_models.py --database=guestbook --dalname db \\"
        print "    --singlemigrate --colcomments --notnull"
        print
        print "You must at least specify a schema to read from."
        print
        print "Generated database is written to stdout. Add \"> filename.py\""
        print "to save to a file"
        print

        sys.exit(1)

    if options.verbose:
        SHOW_LOG_MESSAGES = True

    return options

def log(text):
    if SHOW_LOG_MESSAGES:
        sys.stderr.write(text + "\n")

def extract_mysql(opt):
    db = MySQLdb.connect(user=opt.user,
                         passwd=opt.password,
                         host=opt.host,
                         db=opt.schema)
    curs = db.cursor()
    curs.execute("SHOW TABLES")
    tables = curs.fetchall()
    tables = [row[0] for row in tables] # Convert tuple of tuples to tuple of strings

    if opt.single_migrate:
        dal_migrate = ", migrate=False"
    else:
        dal_migrate = ""

    connection_string = "%s = DAL('mysql://%s:%s@localhost/%s'%s)" % (
            opt.dalname, opt.user, opt.password, opt.schema, dal_migrate)

    legacy_db_table_web2py_code = []

    for table_name in tables:
        log("Processing table '%s'" % table_name)

        curs.execute("SHOW CREATE TABLE " + table_name)
        sql_create_stmnt = curs.fetchone()[1]

        if 'CREATE' in sql_create_stmnt: # make sure returned statement looks sensible
            sql_lines = sql_create_stmnt.split('\n')

            # remove garbage lines from sql statement
            # n.b. this is a relic of old mysqldump method -- may no longer be
            # necessary. SHOW CREATE TABLE doesn't seem to produce any junk
            sql_lines = [x for x in sql_lines if not(x.startswith('--') or x.startswith('/*') or x =='')]

            log("Processing create statement:")
            for line in sql_lines:
                log("   " + line)

            # generate the web2py code from the create statement
            web2py_table_code = ''

            table_name = re.search('CREATE TABLE .(\S+). \(', sql_lines[0]).group(1)
            log("Extracted table name '%s'" % table_name)

            fields = []
            for line in sql_lines[1:-1]:
                if re.search('PRIMARY', line) or re.search('KEY', line) or re.search(' ID', line) or line.startswith(')'):
                    continue
                hit = re.search('(\S+)\s+(\S+)(,| )( .*)?', line)
                if hit != None:
                    name, coltype = hit.group(1), hit.group(2)

                    name = re.sub('`', '', name)
                    coltype = re.sub(r'(\w+)\(.*',r'\1', coltype)
                    daltype = data_type_map[coltype]

                    # Is it a primary key with a non-default name?
                    if daltype == 'integer' and line.find("AUTO_INCREMENT") != -1:
                        daltype = 'id'

                    # Extract a column comment, if any.
                    # Also append the original column definition
                    comment_index = line.find(" COMMENT ")
                    if comment_index != -1:
                        comment = line[comment_index+9:]
                        comment = comment.strip().lstrip("'").rstrip(",").rstrip("'")
                    else:
                        comment = ""

                    # Check nullable status
                    if opt.notnulls and line.find(" NOT NULL") != -1:
                        notnull = ", notnull=True"
                    else:
                        notnull = ""

                    if opt.column_comments:
                        comment += "  (%s)" % line.strip()

                    if comment != "":
                        comment = "  # " + comment.strip()

                    web2py_table_code += "\n"

                    field_definition = "    Field('%s', '%s'%s)," % (
                            name, daltype, notnull)
                    field_definition = field_definition.ljust(opt.alignment)
                    field_definition += comment

                    web2py_table_code += field_definition

            if opt.single_migrate:
                define_table_migrate = ""
            else:
                define_table_migrate = "migrate=False"
            web2py_table_code = "%s.define_table('%s', %s\n    %s)" % (
                    opt.dalname, table_name, web2py_table_code, define_table_migrate)

            legacy_db_table_web2py_code.append(web2py_table_code)

    # write the legacy db to file
    legacy_db_web2py_code = connection_string+"\n\n"
    legacy_db_web2py_code += "\n\n".join(legacy_db_table_web2py_code)
    return legacy_db_web2py_code

if __name__ == '__main__':
    main()

Reply via email to