On Nov 20, 2007 10:43 AM, Jordan Atlas <[EMAIL PROTECTED]> wrote: > Can someone recomend a way to save the data in such a way that the > columns (or rows) are labeled? In otherwords, it would be nice to be > able to open the saved data and know what each row is without having to > refer to the script that created it. (referring to the creating script > feels error prone when you have many rows of data being saved). I'm > currently using the 'pylab.save' function to save the data.
I suggest using numpy record arrays for this -- the columns have names and the data can be of different types. You can save and load them using pickle (numpy.load and numpy.save will use pickle under the hood). If you want to stick with ASCII flat file representation (eg for use with other programs), in matplotlib svn there are two nice functions to help here: rec2csv and csv2rec. They support saving numpy record arrays to CSV files with column names, and loading these back up later doign type introspection to figure out the types (datetime, str, float, int). In [1]: import numpy as n In [2]: import matplotlib.mlab as mlab In [3]: x = n.random.rand(20,4) In [4]: r = n.rec.fromrecords(x, names='age,weight,height,cash') In [5]: r.dtype Out[5]: dtype([('age', '<f8'), ('weight', '<f8'), ('height', '<f8'), ('cash', '<f8')]) In [7]: mlab.rec2csv(r, 'mydata.csv') In [8]: !head mydata.csv age,weight,height,cash 0.0449935,0.252057,0.316116,0.0635711 0.777189,0.155186,0.0537382,0.233598 0.731376,0.654577,0.977792,0.0171022 0.685975,0.373741,0.714592,0.620079 0.634548,0.956708,0.360962,0.885379 0.431011,0.359094,0.21484,0.961865 0.115155,0.78767,0.352753,0.769402 0.984747,0.720163,0.887608,0.316844 0.0478857,0.813668,0.882535,0.8837 In [9]: newr = mlab.csv2rec('mydata.csv') In [10]: newr.dtype Out[10]: dtype([('age', '<f8'), ('weight', '<f8'), ('height', '<f8'), ('cash', '<f8')]) In [11]: newr Out[11]: recarray([ (0.044993499999999999, 0.25205699999999998, 0.31611600000000001, 0.063571100000000005), (0.77718900000000002, 0.15518599999999999, 0.0537382, 0.233598), (0.73137600000000003, 0.65457699999999996, 0.97779199999999999, 0.017102200000000001), (0.685975, 0.37374099999999999, 0.714592, 0.62007900000000005), (0.634548, 0.956708, 0.36096200000000001, 0.88537900000000003), (0.43101099999999998, 0.35909400000000002, 0.21484, 0.96186499999999997), (0.11515499999999999, 0.78766999999999998, 0.35275299999999998, 0.76940200000000003), (0.98474700000000004, 0.720163, 0.88760799999999995, 0.31684400000000001), (0.047885700000000003, 0.81366799999999995, 0.88253499999999996, 0.88370000000000004), (0.044475599999999997, 0.89918900000000002, 0.076484499999999997, 0.114994), (0.75139299999999998, 0.70954300000000003, 0.458505, 0.33839900000000001), (0.14619299999999999, 0.907717, 0.24915200000000001, 0.67030400000000001), (0.89663199999999998, 0.61957300000000004, 0.0060039200000000003, 0.048883500000000003), (0.20794000000000001, 0.56046499999999999, 0.078303899999999996, 0.216032), (0.28726000000000002, 0.14282500000000001, 0.51740200000000003, 0.553037), (0.96326999999999996, 0.21327299999999999, 0.72040999999999999, 0.181446), (0.31984000000000001, 0.39338299999999998, 0.45787899999999998, 0.33919199999999999), (0.42086200000000001, 0.98801499999999998, 0.53429000000000004, 0.074105699999999997), (0.104211, 0.15845100000000001, 0.13339200000000001, 0.99228300000000003), (0.73563299999999998, 0.948407, 0.44708900000000001, 0.79521399999999998)], dtype=[('age', '<f8'), ('weight', '<f8'), ('height', '<f8'), ('cash', '<f8')]) You can also work with non floating point data In [14]: url = 'http://ichart.finance.yahoo.com/table.csv?s=GE&d=10&e=20&f=2007&g=d&a=0&b=2&c=1962&ignore=.csv' In [15]: import urllib In [16]: urllib.urlretrieve(url, 'ge.csv') Out[16]: ('ge.csv', <httplib.HTTPMessage instance at 0x8fa7b2c>) In [17]: r = mlab.csv2rec('ge.csv') In [18]: !head ge.csv Date,Open,High,Low,Close,Volume,Adj Close 2007-11-19,38.48,38.51,38.00,38.16,35415000,38.16 2007-11-16,38.50,38.67,37.87,38.65,50181100,38.65 2007-11-15,38.93,38.93,38.13,38.31,41590000,38.31 2007-11-14,39.90,39.95,38.82,39.01,39650800,39.01 2007-11-13,38.50,39.25,38.25,39.21,42053400,39.21 2007-11-12,38.24,39.04,38.17,38.25,36968000,38.25 2007-11-09,38.52,38.75,38.11,38.38,42662200,38.38 2007-11-08,39.20,39.32,37.50,39.02,52970300,39.02 2007-11-07,39.90,39.93,38.99,39.08,46720100,39.08 In [20]: r[:10] Out[20]: recarray([ (datetime.datetime(2007, 11, 19, 0, 0), 38.479999999999997, 38.509999999999998, 38.0, 38.159999999999997, 35415000, 38.159999999999997), (datetime.datetime(2007, 11, 16, 0, 0), 38.5, 38.670000000000002, 37.869999999999997, 38.649999999999999, 50181100, 38.649999999999999), (datetime.datetime(2007, 11, 15, 0, 0), 38.93, 38.93, 38.130000000000003, 38.310000000000002, 41590000, 38.310000000000002), (datetime.datetime(2007, 11, 14, 0, 0), 39.899999999999999, 39.950000000000003, 38.82, 39.009999999999998, 39650800, 39.009999999999998), (datetime.datetime(2007, 11, 13, 0, 0), 38.5, 39.25, 38.25, 39.210000000000001, 42053400, 39.210000000000001), (datetime.datetime(2007, 11, 12, 0, 0), 38.240000000000002, 39.039999999999999, 38.170000000000002, 38.25, 36968000, 38.25), (datetime.datetime(2007, 11, 9, 0, 0), 38.520000000000003, 38.75, 38.109999999999999, 38.380000000000003, 42662200, 38.380000000000003), (datetime.datetime(2007, 11, 8, 0, 0), 39.200000000000003, 39.32, 37.5, 39.020000000000003, 52970300, 39.020000000000003), (datetime.datetime(2007, 11, 7, 0, 0), 39.899999999999999, 39.93, 38.990000000000002, 39.079999999999998, 46720100, 39.079999999999998), (datetime.datetime(2007, 11, 6, 0, 0), 40.200000000000003, 40.490000000000002, 39.969999999999999, 40.18, 42131000, 40.18)], dtype=[('date', '|O4'), ('open', '<f8'), ('high', '<f8'), ('low', '<f8'), ('close', '<f8'), ('volume', '<i4'), ('adj_close', '<f8')]) ------------------------------------------------------------------------- This SF.net email is sponsored by: Microsoft Defy all challenges. Microsoft(R) Visual Studio 2005. http://clk.atdmt.com/MRT/go/vse0120000070mrt/direct/01/ _______________________________________________ Matplotlib-users mailing list Matplotlib-users@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/matplotlib-users