C. Mundi wrote: > Hi. I have scanned the list archives and Googled. I may have missed > something, but what I found was not sufficiently general or robust for my > needs. > > Happily, sqlite has a very nice .output mode for CSV. It correctly > double-quotes fields when they contain a space or a comma, and it correctly > 'escapes' double-quote literals as "". Great! I need to go the other way. > > I don't see an inverse .import mode in sqlite to handle the completely > general CSV syntax. As anyone who has ever tried knows, parsing compliant > CSV is a lot harder than writing compliant CSV. > > I'm hoping someone will (please) tell me I missed something in the sqlite > docs. Otherwise, I guess I'll be using python's csv module to turn my CSV > file into SQL insert statements. This is likely to be an infequent task, > but it has to be done perfectly. So if someone knows of a command-line tool > to turn CSV into SQL inserts, I would appreciate that too. Here's one I just wrote.
#!/usr/bin/env python """Utility to import CSV files into an SQLite database.""" from __future__ import division import csv import getopt import os import sqlite3 import sys USAGE = 'USAGE: csv2sqlite [-o dbfile] csvfile...' DEFAULT_OUTPUT_FILE = 'a.db' _INTEGER_MAX = (1 << 63) - 1 _INTEGER_MIN = -_INTEGER_MAX - 1 def identify_type(value): """ Identify the SQLite type for a value. """ try: int_value = int(value) if int_value < _INTEGER_MIN or int_value > _INTEGER_MAX: return 'REAL' return 'INTEGER' except ValueError: try: float(value) return 'REAL' except ValueError: return 'TEXT' def common_type(values): """ Return the common type for a list of values. """ types = set(identify_type(value) for value in values if value) if len(types) == 1: return types.pop() elif types == set(['INTEGER', 'REAL']): return 'NUMERIC' else: return 'TEXT' def add_table_from_file(dbconn, filename): """ Read a CSV file and add it as a database table. dbconn -- open database connection filename -- path to the CSV file """ table_name = os.path.splitext(os.path.basename(filename))[0] data = list(csv.reader(open(filename))) if not data: return headers = data.pop(0) num_columns = len(headers) # Replace all empty strings with NULL data = [[value or None for value in row] for row in data] # Ensure that all rows have the same number of columns for row in data: for dummy in xrange(num_columns - len(row)): row.append(None) # Determine the appropriate type for each column column_info = [(column_name, common_type(row[col] for row in data)) for (col, column_name) in enumerate(headers)] # Build the SQL statements sql_create_table = 'CREATE TABLE IF NOT EXISTS "%s" (%s)' % \ (table_name, ', '.join('%s %s' % column for column in column_info)) sql_insert = 'INSERT INTO "%s" VALUES (%s)' % \ (table_name, ', '.join(['?'] * num_columns)) # Build the database table dbconn.execute(sql_create_table) for row in data: dbconn.execute(sql_insert, row) def _main(argv=None): """ Executed when this module is run as a script. """ if argv is None: argv = sys.argv options, args = getopt.getopt(argv[1:], 'o:', ['help']) options = dict(options) if '--help' in options: print USAGE else: dbconn = sqlite3.connect(options.get('-o', DEFAULT_OUTPUT_FILE)) for filename in args: add_table_from_file(dbconn, filename) dbconn.commit() dbconn.close() if __name__ == '__main__': _main() _______________________________________________ sqlite-users mailing list sqlite-users@sqlite.org http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users