import os, gc, time, tables
import multiprocessing as mp
import numpy as np
from scipy.sparse import csr_matrix

results_dir = './h5files'

class TestTables(object):
    def __init__(self, multiproc):
        self.multiproc = multiproc
        self.experiment_dir = results_dir
        if not os.path.exists(self.experiment_dir):
            os.makedirs(self.experiment_dir)
        filename = os.path.join(self.experiment_dir, 'results.h5')
        self.h5f = tables.openFile(filename, mode = "w", title = "Master file")

    def prepare(self, paramspace_pts):
        self.paramspace_pts = paramspace_pts
        self.h5f.createArray(self.h5f.root, 'somearray', np.random.rand(500))
        conn = np.random.binomial(1,0.1,size=(1000,1000))
        set_sparse_array(self.h5f, self.h5f.root, 'somesparse', csr_matrix(conn))

    def _get_sparse(self, group, name):
        csrgroup = group._f_getChild(name)
        data   = csrgroup.data.read()
        indices= csrgroup.indices.read()
        indptr = csrgroup.indptr.read()
        shape  = csrgroup.shape.read()
        return csr_matrix((data,indices,indptr),shape=shape)

    def prepare_subprocess(self, paramspace_pt):
        subprocdir = os.path.join(self.experiment_dir, str(paramspace_pt))
        params = {'sparse':self._get_sparse(self.h5f.root, 'somesparse'),
                  'array':self.h5f.root._f_getChild('somearray').read(),
                  'results_file':os.path.join(subprocdir, 'results.h5')}
        if not os.path.exists(subprocdir):
            os.makedirs(subprocdir)
        return (paramspace_pt, params)

    def process(self):
        num_sims = len(self.paramspace_pts)
        all_simulations = (self.prepare_subprocess(paramspace_pt)
                                for paramspace_pt in self.paramspace_pts)
        tstart=time.time()
        if self.multiproc:
            num_procs = 8
            pool = mp.Pool(num_procs,maxtasksperchild=1)
            results = pool.imap_unordered(run_simulation_single, all_simulations)
        else:
            num_procs = 1
            pool = None
            results = (run_simulation_single(p) for p in all_simulations)
        sl = "Starting multiprocessing pool with {0} processes for {1} simulations."
        print(sl.format(num_procs,num_sims))
        for finished_counter, _paramspace_pt in enumerate(results,1):
            msg = "{0:.1f} minutes elapsed, {1}/{2} simulations complete. "
            print(msg.format((time.time()-tstart)/60., finished_counter, num_sims))
            gc.collect()
    
        self.h5f.close()
        el = "...{0} simulations done in {1:.1f} minutes."
        print(el.format(num_sims,(time.time()-tstart)/60.))

def set_sparse_array(h5f, group, name, csr):
    csrgroup = h5f.createGroup(group, name)
    h5f.createArray(csrgroup, 'data', csr.data)
    h5f.createArray(csrgroup, 'indptr', csr.indptr)
    h5f.createArray(csrgroup, 'indices', csr.indices)
    h5f.createArray(csrgroup, 'shape', csr.shape)

def run_simulation_single((paramspace_pt, params)):
    import sys, traceback
    try:
        filename = params['results_file']
        h5f = tables.openFile(filename, mode = "w", title = "Subprocess results file")
        sparse = params['sparse']
        set_sparse_array(h5f, h5f.root, 'sparse', sparse)
        
        vlarray= h5f.createVLArray(h5f.root, 'vldata',tables.FloatAtom(shape=()))
        vldata = (np.random.rand(np.random.randint(0,500)) for i in xrange(1000))
        for vl in vldata:
            vlarray.append(vl)
    
        h5f.close()
    except:
        traceback.print_exception(*sys.exc_info())
    return paramspace_pt

def main():
    tt = TestTables(multiproc=True)
    tt.prepare(range(2000))
    tt.process()

if __name__ == '__main__':
    main()
