Hello All,

I currently use PyTables to generate a dataset that is indexed by a
timestamp and a symbol. The problem that I have is that the data is stored
at irregular intervals. For example:

*# See below for method ts_from_str*
data=[{'text_ts':'2012-01-04T15:00:00Z', 'symbol':'APPL', 'price':689.00,
'timestamp':ts_from_str('2012-01-04T15:00:00Z')},
{'text_ts':'2012-01-04T15:11:00Z', 'symbol':'APPL', 'price':687.24,
'timestamp':ts_from_str('2012-01-04T15:11:00Z')},
{'text_ts':'2012-01-05T15:33:00Z', 'symbol':'APPL', 'price':688.32,
'timestamp':ts_from_str('2012-01-05T15:33:00Z')},
{'text_ts':'2012-01-04T15:01:00Z', 'symbol':'MSFT', 'price':32.30,
'timestamp':ts_from_str('2012-01-04T15:01:00Z')},
{'text_ts':'2012-01-04T16:00:00Z', 'symbol':'MSFT', 'price':36.44,
'timestamp':ts_from_str('2012-01-04T16:00:00Z')},
{'text_ts':'2012-01-05T15:19:00Z', 'symbol':'MSFT', 'price':35.89,
'timestamp':ts_from_str('2012-01-05T15:19:00Z')}]

If I want to look up the price for Apple on for January 4, 2012 at
15:01:00, I will get an empty ndarry. *Is there a way to optimize the
search for data "asof" a specific time other than iterating until you find
data?* I've written my own price_asof method (See code below), that
produces the following output.

*In [63]: price_asof(dt,'APPL')*
*QUERY: (timestamp == 1325707380) & (symbol == "APPL") -- text_ts:
2012-01-04T15:03:00Z*
*QUERY: (timestamp == 1325707320) & (symbol == "APPL") -- text_ts:
2012-01-04T15:02:00Z*
*QUERY: (timestamp == 1325707260) & (symbol == "APPL") -- text_ts:
2012-01-04T15:01:00Z*
*QUERY: (timestamp == 1325707200) & (symbol == "APPL") -- text_ts:
2012-01-04T15:00:00Z*
*Out[63]: *
*array([(689.0, 'APPL', '2012-01-04T15:00:00Z', 1325707200)], *
*      dtype=[('price', '<f8'), ('symbol', 'S16'), ('text_ts', 'S26'),
('timestamp', '<i4')])*

*# Code to generate data*
import tables
from datetime import datetime, timedelta
from time import mktime
import numpy as np

def ts_from_str(ts_str, ts_format='%Y-%m-%dT%H:%M:%SZ'):
    """
    Create a Unix Timestamp from an ISO 8601 timestamp string
    """
    dt = datetime.strptime(ts_str, ts_format)
    return mktime(dt.timetuple())

class PriceData(tables.IsDescription):
    text_ts = tables.StringCol(len('2012-01-01T00:00:00+00:00 '))
    symbol = tables.StringCol(16)
    price = tables.Float64Col()
    timestamp = tables.Time32Col()
h5f = tables.openFile('test.h5','w', title='Price Data For Apple and
Microsoft')
group = h5f.createGroup('/','January', 'January Price Data')
tbl = h5f.createTable(group, 'Prices',PriceData,'Apple and Microsoft
Prices')

data=[{'text_ts':'2012-01-04T15:00:00Z', 'symbol':'APPL', 'price':689.00,
'timestamp':ts_from_str('2012-01-04T15:00:00Z')},
{'text_ts':'2012-01-04T15:11:00Z', 'symbol':'APPL', 'price':687.24,
'timestamp':ts_from_str('2012-01-04T15:11:00Z')},
{'text_ts':'2012-01-05T15:33:00Z', 'symbol':'APPL', 'price':688.32,
'timestamp':ts_from_str('2012-01-05T15:33:00Z')},
{'text_ts':'2012-01-04T15:01:00Z', 'symbol':'MSFT', 'price':32.30,
'timestamp':ts_from_str('2012-01-04T15:01:00Z')},
{'text_ts':'2012-01-04T16:00:00Z', 'symbol':'MSFT', 'price':36.44,
'timestamp':ts_from_str('2012-01-04T16:00:00Z')},
{'text_ts':'2012-01-05T15:19:00Z', 'symbol':'MSFT', 'price':35.89,
'timestamp':ts_from_str('2012-01-05T15:19:00Z')}]

price_data = tbl.row
for d in data:
    price_data['text_ts'] = d['text_ts']
    price_data['symbol'] = d['symbol']
    price_data['price'] = d['price']
    price_data['timestamp'] = d['timestamp']
    price_data.append()
tbl.flush()

*# This is my price_asof function*
def price_asof(dt, symbol, max_rec=1000):
    """
    Return the price of the time dt
    """
    ts = mktime(dt.timetuple())
    query = '(timestamp == %d)' % ts
    if symbol:
        query += ' & (symbol == "%s")' % symbol
    data = np.ndarray(0)
    count = 0
    while (not data) and (count <= max_rec):
        # print "QUERY: %s -- text_ts: %s" % (query,
dt.strftime('%Y-%m-%dT%H:%M:%SZ'))
        data = tbl.readWhere(query)
        dt = dt-timedelta(seconds=60)
        ts = mktime(dt.timetuple())
        query = '(timestamp == %d)' % ts
        if symbol:
            query += ' & (symbol == "%s")' % symbol
        count += 1
    return data

h5f.close()


-- 
Aquil H. Abdullah
aquil.abdul...@gmail.com
------------------------------------------------------------------------------
LogMeIn Rescue: Anywhere, Anytime Remote support for IT. Free Trial
Remotely access PCs and mobile devices and provide instant support
Improve your efficiency, and focus on delivering more value-add services
Discover what IT Professionals Know. Rescue delivers
http://p.sf.net/sfu/logmein_12329d2d
_______________________________________________
Pytables-users mailing list
Pytables-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/pytables-users

Reply via email to