Hi all,

Solving similar issue as described in this
thread<http://groups.google.com/group/web2py/browse_thread/thread/0d40a7d7a286a2da>,
I too first populated my model in appadmin and then required a way to
migrate existing sqlite database entries onto GAE datastore (either local or
remote).
I studied the way of
bulkloader<http://code.google.com/appengine/docs/python/tools/uploadingdata.html#Configuring_the_Bulk_Loader>and
came up with the solution that should reduce my required effort in the
future.

The [hopefully] attached script produces a bulkloader.yaml file (or a dict()
that can be yaml'd elsewhere if yaml module is not available) with
definitions for all defined database tables (DAL is expected in db variable)
and conversions/transforms for all supported
fields<http://www.web2py.com/book/default/chapter/06#Record-Representation>.
This bulkloader.yaml can be used as --config_file when running AppEngine's
appcfg.py.

The script comes with slight configuration as to how you want your 'result'
served. It's runs as any controller.

I hereby give interested parties this script.

Its license is as follows:
Said script is to be considered public domain with the following exception:
if you add to or modify the script with purpose to improve/fix/update it,
and up to such time there is no implementation providing similar
functionality integrated into web2py itself, then you are required to send
me your new improved/fixed/updated version of said script.
# coding: utf-8

""" License:
This script is to be considered public domain with the following exception: 
if you add to or modify the script with purpose to improve/fix/update it,
and up to such time there is no implementation providing similar functionality
integrated into web2py itself, then you are required to email me your new 
improved/fixed/updated version of this script.
"""

def autocreate_bulkloader_yaml():
  # out_type - how do you want the results served; options:
  #   - False       - return yaml string
  #   - 'file.yaml' - write yaml to file.yaml
  #   - True        - print yaml to sys.stdout
  out_type = True or 'bulkloader-auto.yaml' or None

  bulkloader = {}   # will be yaml.dump()'d as bulkloader.yaml configuration file
  bulkloader['python_preamble'] = [
    {'import': 'google.appengine.ext.bulkload.transform'},
    {'import': 'google.appengine.ext.db'},
    {'import': 'base64'},
    {'import': 're'},
  ]
  bulkloader['transformers'] = []   # transformer for various 'kinds' (db tables)

  IMPORT_TRANSFORM, EXPORT_TRANSFORM = 0, 1   # to avoid "magic indexes" :)
  transforms_map = {
    'id':       ('int',     'str'),
    'reference':('int',     'unicode'),
    'upload':   ('unicode', 'unicode'),
    'password': ('unicode', 'unicode'),
    'string':   ('unicode', 'unicode'),
    'text':     ('db.Text', 'unicode'),
    'double':   ('float',   'str'),
    'decimal':  ('float',   'str'),
    'integer':  ('lambda x: int(float(x))', 'str'),
    'boolean':  ('transform.regexp_bool("[Tt]rue")', 'str'),
    # below list transform takes into account list:type and '|' chars in list-ed string,
    # e.g. |1|2|3| becomes [1,2,3] and |str1|str2_1||str2_2|str3| becomes ['str1', 'str2_1|str2_2', 'str3']
    'list':     ( "lambda x: [int(i[0]) if re.match(r'^[0-9]+$', i[0]) else i[0].replace(r'||', '|') for i in re.findall(r'\|?([^\|]*(\|\|[^\|]*)*)\|', x]",
                  'lambda x: "|%s|" % "|".join([unicode(i).replace("|", "||") for i in x])'),
    # blob base64 encoded (future-compatible, see http://goo.gl/k9Rx6)
    'blob':     ('transform.blobproperty_from_base64', 'base64.b64encode'),
    # hopefully, web2py uses these exact date/time formats when exporting to csv (i.e. independent of locale settings)
    'date':     ( 'transform.import_date_time("%Y-%m-%d")', 
                  'transform.export_date_time("%Y-%m-%d")'),
    'time':     ( 'transform.import_date_time("%H:%M:%S")', 
                  'transform.export_date_time("%H:%M:%S")'),
    'datetime': ( 'transform.import_date_time("%Y-%m-%d %H:%M:%S")', 
                  'transform.export_date_time("%Y-%m-%d %H:%M:%S")'),
  }

  for table in db['tables']:
    transformer = {}
    bulkloader['transformers'].append(transformer)
    transformer['connector'] = 'csv'
    transformer['connector_options'] = {
      'encoding': 'utf-8',
      'print_export_header_row':True  # GAE orders columns so better use this
    }
    transformer['kind'] = table
    transformer['property_map'] = []  # list of field definitions (transforms)
    
    for field in db[table]['fields']:
      property = {}
      transformer['property_map'].append(property)
      property['property'] = db[table][field].name
      property['external_name'] = table + '.' + property['property']
      
      field_type = db[table][field].type
      
      if   field_type.startswith('list'):      field_type = 'list'
      elif field_type.startswith('decimal'):   field_type = 'decimal'
      elif field_type.startswith('reference'): field_type = 'reference'
      
      if field_type not in transforms_map:
        raise Exception('field type "%s" not defined' % field_type)
      
      # lambda function for mapping '<NULL>' string to None
      property['import_transform'] = 'lambda v: transform.none_if_empty(%s)(v) if not re.match("^[<]NULL>$", v) else None' % \
                                      transforms_map[field_type][IMPORT_TRANSFORM]
      property['export_transform'] = 'transform.empty_if_none(%s)' % \
                                      transforms_map[field_type][EXPORT_TRANSFORM]
  
  try:
    import yaml
    if out_type == False:
      return yaml.dump(bulkloader, default_flow_style=False, width=200)
    elif out_type == True:
      from sys import stdout
      print yaml.dump(bulkloader, default_flow_style=False, width=200)
    elif type(out_type) == str:
      yaml.dump(bulkloader, open(out_type, 'w'), default_flow_style=False, width=200)
    return 'ok'
  except: # no yaml, return dict string to be yaml'd elsewhere
    return "'''  yaml missing; here's a dict() instead  '''\n\n" + str(bulkloader)

Reply via email to