I have the following scrip which transforms several pytables files by changing the format of one field in a table from a StringCol to Int32 col. I have a couple of problems:
1. The memory usage of the process keeps growing and then dies. I have to restart it several times 2. Is there any way to do this without converting it to a python list and make it faster. import os from tables import * from time import clock, time from path import path from mx.DateTime import Parser, Time, DateTime from attrdict import attrdict filterProps = Filters(complevel=2, complib='blosc') #Old table description: #class DataDescriptionDaily(IsDescription): # fullSymbol = StringCol(100, pos=0) # calcField = StringCol(100, pos=1) # hhmmss = StringCol(6, pos=2) # value = Float32Col(pos=3) class DataDescriptionDaily(IsDescription): fullSymbol = StringCol(100, pos=0) calcField = StringCol(100, pos=1) hhmmss = Int32Col(pos=2) value = Float32Col(pos=3) def getTables(mapping): try: mapping.tblData = mapping.hdf5.getNode('/data') except: mapping.tblData = mapping.hdf5.createTable('/', 'data', DataDescriptionDaily, expectedrows=75000000, filters=filterProps) createIndexes(mapping) def createIndexes(mapping): if mapping.tblData.cols.hhmmss.is_indexed: mapping.tblData.cols.hhmmss.reIndex() else: mapping.tblData.cols.hhmmss.createCSIndex() if mapping.tblData.cols.calcField.is_indexed: mapping.tblData.cols.calcField.reIndex() else: mapping.tblData.cols.calcField.createCSIndex() if mapping.tblData.cols.fullSymbol.is_indexed: mapping.tblData.cols.fullSymbol.reIndex() else: mapping.tblData.cols.fullSymbol.createCSIndex() mapping.hdf5.flush() dirPathList = [path("/calc")] for dirPath in dirPathList: for f in dirPath.files("*.h5"): oldFileName = path("%s.old" % f) if oldFileName.exists(): print 'Ignoring file: ', f continue mapping = attrdict() mapping.filePath = f mapping.hdf5 = openFile(mapping.filePath, "a") getTables(mapping) mapping2 = attrdict() mapping2.filePath = f+"_new" mapping2.hdf5 = openFile(mapping2.filePath, "w") getTables(mapping2) #save symbol mapping rowsToBeInserted = [] for row in mapping.tblData.iterrows(): row[2] = int(row[2]) rowsToBeInserted.append((row[0], row[1], row[2], row[3])) if rowsToBeInserted: mapping2.tblData.append(rowsToBeInserted) mapping2.tblData.flush() #create index and flush file createIndexes(mapping2) mapping2.hdf5.flush() #get rid of old file lines = os.popen("mv %s %s.old" % (mapping.filePath, mapping.filePath)).readlines() if lines: print lines lines = os.popen("mv %s %s" % (mapping2.filePath, mapping.filePath)).readlines() if lines: print lines ------------------------------------------------------------------------------ This SF.net email is sponsored by Make an app they can't live without Enter the BlackBerry Developer Challenge http://p.sf.net/sfu/RIM-dev2dev _______________________________________________ Pytables-users mailing list Pytables-users@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/pytables-users