C. Mundi wrote:
> Hi.  I have scanned the list archives and Googled.  I may have missed
> something, but what I found was not sufficiently general or robust for my
> needs.
>
> Happily, sqlite has a very nice .output mode for CSV.  It correctly
> double-quotes fields when they contain a space or a comma, and it correctly
> 'escapes' double-quote literals as "".  Great!  I need to go the other way.
>
> I don't see an inverse .import mode in sqlite to handle the completely
> general CSV syntax.  As anyone who has ever tried knows, parsing compliant
> CSV is a lot harder than writing compliant CSV.
>
> I'm hoping someone will (please) tell me I missed something in the sqlite
> docs.  Otherwise, I guess I'll be using python's csv module to turn my CSV
> file into SQL insert statements.  This is likely to be an infequent task,
> but it has to be done perfectly.  So if someone knows of a command-line tool
> to turn CSV into SQL inserts, I would appreciate that too.
Here's one I just wrote.

#!/usr/bin/env python

"""Utility to import CSV files into an SQLite database."""

from __future__ import division

import csv
import getopt
import os
import sqlite3
import sys

USAGE = 'USAGE: csv2sqlite [-o dbfile] csvfile...'
DEFAULT_OUTPUT_FILE = 'a.db'

_INTEGER_MAX = (1 << 63) - 1
_INTEGER_MIN = -_INTEGER_MAX - 1

def identify_type(value):
    """
    Identify the SQLite type for a value.
    """
    try:
        int_value = int(value)
        if int_value < _INTEGER_MIN or int_value > _INTEGER_MAX:
            return 'REAL'
        return 'INTEGER'
    except ValueError:
        try:
            float(value)
            return 'REAL'
        except ValueError:
            return 'TEXT'

def common_type(values):
    """
    Return the common type for a list of values.
    """
    types = set(identify_type(value) for value in values if value)
    if len(types) == 1:
        return types.pop()
    elif types == set(['INTEGER', 'REAL']):
        return 'NUMERIC'
    else:
        return 'TEXT'

def add_table_from_file(dbconn, filename):
    """
    Read a CSV file and add it as a database table.

    dbconn   -- open database connection
    filename -- path to the CSV file
    """
    table_name = os.path.splitext(os.path.basename(filename))[0]
    data = list(csv.reader(open(filename)))
    if not data:
        return
    headers = data.pop(0)
    num_columns = len(headers)
    # Replace all empty strings with NULL
    data = [[value or None for value in row] for row in data]
    # Ensure that all rows have the same number of columns
    for row in data:
        for dummy in xrange(num_columns - len(row)):
            row.append(None)
    # Determine the appropriate type for each column
    column_info = [(column_name, common_type(row[col] for row in data))
                   for (col, column_name) in enumerate(headers)]
    # Build the SQL statements
    sql_create_table = 'CREATE TABLE IF NOT EXISTS "%s" (%s)' % \
        (table_name, ', '.join('%s %s' % column for column in column_info))
    sql_insert = 'INSERT INTO "%s" VALUES (%s)' % \
        (table_name, ', '.join(['?'] * num_columns))
    # Build the database table
    dbconn.execute(sql_create_table)
    for row in data:
        dbconn.execute(sql_insert, row)

def _main(argv=None):
    """
    Executed when this module is run as a script.
    """
    if argv is None:
        argv = sys.argv
    options, args = getopt.getopt(argv[1:], 'o:', ['help'])
    options = dict(options)
    if '--help' in options:
        print USAGE
    else:
        dbconn = sqlite3.connect(options.get('-o', DEFAULT_OUTPUT_FILE))
        for filename in args:
            add_table_from_file(dbconn, filename)
        dbconn.commit()
        dbconn.close()

if __name__ == '__main__':
    _main()

_______________________________________________
sqlite-users mailing list
sqlite-users@sqlite.org
http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users

Reply via email to