Hi all,
Solving similar issue as described in this
thread<http://groups.google.com/group/web2py/browse_thread/thread/0d40a7d7a286a2da>,
I too first populated my model in appadmin and then required a way to
migrate existing sqlite database entries onto GAE datastore (either local or
remote).
I studied the way of
bulkloader<http://code.google.com/appengine/docs/python/tools/uploadingdata.html#Configuring_the_Bulk_Loader>and
came up with the solution that should reduce my required effort in the
future.
The [hopefully] attached script produces a bulkloader.yaml file (or a dict()
that can be yaml'd elsewhere if yaml module is not available) with
definitions for all defined database tables (DAL is expected in db variable)
and conversions/transforms for all supported
fields<http://www.web2py.com/book/default/chapter/06#Record-Representation>.
This bulkloader.yaml can be used as --config_file when running AppEngine's
appcfg.py.
The script comes with slight configuration as to how you want your 'result'
served. It's runs as any controller.
I hereby give interested parties this script.
Its license is as follows:
Said script is to be considered public domain with the following exception:
if you add to or modify the script with purpose to improve/fix/update it,
and up to such time there is no implementation providing similar
functionality integrated into web2py itself, then you are required to send
me your new improved/fixed/updated version of said script.
# coding: utf-8
""" License:
This script is to be considered public domain with the following exception:
if you add to or modify the script with purpose to improve/fix/update it,
and up to such time there is no implementation providing similar functionality
integrated into web2py itself, then you are required to email me your new
improved/fixed/updated version of this script.
"""
def autocreate_bulkloader_yaml():
# out_type - how do you want the results served; options:
# - False - return yaml string
# - 'file.yaml' - write yaml to file.yaml
# - True - print yaml to sys.stdout
out_type = True or 'bulkloader-auto.yaml' or None
bulkloader = {} # will be yaml.dump()'d as bulkloader.yaml configuration file
bulkloader['python_preamble'] = [
{'import': 'google.appengine.ext.bulkload.transform'},
{'import': 'google.appengine.ext.db'},
{'import': 'base64'},
{'import': 're'},
]
bulkloader['transformers'] = [] # transformer for various 'kinds' (db tables)
IMPORT_TRANSFORM, EXPORT_TRANSFORM = 0, 1 # to avoid "magic indexes" :)
transforms_map = {
'id': ('int', 'str'),
'reference':('int', 'unicode'),
'upload': ('unicode', 'unicode'),
'password': ('unicode', 'unicode'),
'string': ('unicode', 'unicode'),
'text': ('db.Text', 'unicode'),
'double': ('float', 'str'),
'decimal': ('float', 'str'),
'integer': ('lambda x: int(float(x))', 'str'),
'boolean': ('transform.regexp_bool("[Tt]rue")', 'str'),
# below list transform takes into account list:type and '|' chars in list-ed string,
# e.g. |1|2|3| becomes [1,2,3] and |str1|str2_1||str2_2|str3| becomes ['str1', 'str2_1|str2_2', 'str3']
'list': ( "lambda x: [int(i[0]) if re.match(r'^[0-9]+$', i[0]) else i[0].replace(r'||', '|') for i in re.findall(r'\|?([^\|]*(\|\|[^\|]*)*)\|', x]",
'lambda x: "|%s|" % "|".join([unicode(i).replace("|", "||") for i in x])'),
# blob base64 encoded (future-compatible, see http://goo.gl/k9Rx6)
'blob': ('transform.blobproperty_from_base64', 'base64.b64encode'),
# hopefully, web2py uses these exact date/time formats when exporting to csv (i.e. independent of locale settings)
'date': ( 'transform.import_date_time("%Y-%m-%d")',
'transform.export_date_time("%Y-%m-%d")'),
'time': ( 'transform.import_date_time("%H:%M:%S")',
'transform.export_date_time("%H:%M:%S")'),
'datetime': ( 'transform.import_date_time("%Y-%m-%d %H:%M:%S")',
'transform.export_date_time("%Y-%m-%d %H:%M:%S")'),
}
for table in db['tables']:
transformer = {}
bulkloader['transformers'].append(transformer)
transformer['connector'] = 'csv'
transformer['connector_options'] = {
'encoding': 'utf-8',
'print_export_header_row':True # GAE orders columns so better use this
}
transformer['kind'] = table
transformer['property_map'] = [] # list of field definitions (transforms)
for field in db[table]['fields']:
property = {}
transformer['property_map'].append(property)
property['property'] = db[table][field].name
property['external_name'] = table + '.' + property['property']
field_type = db[table][field].type
if field_type.startswith('list'): field_type = 'list'
elif field_type.startswith('decimal'): field_type = 'decimal'
elif field_type.startswith('reference'): field_type = 'reference'
if field_type not in transforms_map:
raise Exception('field type "%s" not defined' % field_type)
# lambda function for mapping '<NULL>' string to None
property['import_transform'] = 'lambda v: transform.none_if_empty(%s)(v) if not re.match("^[<]NULL>$", v) else None' % \
transforms_map[field_type][IMPORT_TRANSFORM]
property['export_transform'] = 'transform.empty_if_none(%s)' % \
transforms_map[field_type][EXPORT_TRANSFORM]
try:
import yaml
if out_type == False:
return yaml.dump(bulkloader, default_flow_style=False, width=200)
elif out_type == True:
from sys import stdout
print yaml.dump(bulkloader, default_flow_style=False, width=200)
elif type(out_type) == str:
yaml.dump(bulkloader, open(out_type, 'w'), default_flow_style=False, width=200)
return 'ok'
except: # no yaml, return dict string to be yaml'd elsewhere
return "''' yaml missing; here's a dict() instead '''\n\n" + str(bulkloader)