Hi everyone,

Recently I've been doing a lot of work using both GRASS databases (x,y,z,etc vector data) and Numpy. To simplify things I've written a python function to extract data from a database using db.select into a numpy array, making an educated guess at the data type using db.describe. I'm wondering if this would be any use to anyone else? Any feedback would be appreciated.

#!/usr/bin/env python

import numpy
import grass.script as grass
import tempfile as pytempfile

def db_select_numpy(tables,sql):
    """
Read data from a grass database into a dictionary of numpy arrays, making
    an educated guess at the dtype from table descriptions.

    example:

>>> data = db_select('gpspoints','SELECT x,y,z FROM gpspoints')
>>> print len(data['x']), data['x'].dtype
    23 float64
    """
    # fetch dtypes from table descriptions
    if type(tables) == str:
        tables = [tables]
    cols = {}
    for table in tables:
        ret = grass.db.db_describe(table)
        for n in range(0,len(ret['cols'])):
            if ret['cols'][n][0] in cols:
warnings.warn(ret['cols'][n][0] + ' overwriting previous instance') cols[ret['cols'][n][0]] = {'type': ret['cols'][n][1], 'len': ret['cols'][n][2]}
    # dtypes translations
dtypes = {'DOUBLE PRECISION':numpy.float64, 'INTEGER':numpy.int, 'CHARACTER':'S'}
    # query database
    ofile = pytempfile.NamedTemporaryFile(mode = 'w+b')
    ofile.close
ret = grass.core.run_command('db.select', overwrite = True, quiet = True, sql = sql, output = ofile.name)
    database = ofile.readlines()
    # allocate empty numpy arrays
    data = {}
    columns = database[0].rstrip().split('|')
    for m in range(0,len(columns)):
        if columns[m] not in cols:
raise NameError('column "'+columns[m]+'" not in specified tables')
        else:
            d = dtypes[cols[columns[m]]['type']]
            if d == 'S':
                d = 'S' + cols[columns[m]]['len'] # e.g., 'S5'
            data[columns[m]] = numpy.empty([len(database)-1],dtype=d)
    # copy data into numpy arrays
    for n in range(1,len(database)):
        elements = database[n].rstrip().split('|')
        for m in range(0,len(columns)):
if type(data[columns[m]][n-1]) == numpy.string_ or len(elements[m]) > 0:
                data[columns[m]][n-1] = elements[m]
            else:
                warnings.warn('numeric cells cannot be empty')
    return data
_______________________________________________
grass-user mailing list
[email protected]
http://lists.osgeo.org/mailman/listinfo/grass-user

Reply via email to