[theano-users] Speeding up theano code

Jong Hyun Bae Tue, 19 Jul 2016 12:04:30 -0700

I am using theano library to perform a function fitting. 
Unfortunately, the algorithm utilizes the numerical integration, which 
requires the scan function.
Also, I am using gradient-descent method for optimization.


Right now, the computation time is quite slow and I would like to improve 
the speed.
My goal is to obtain better speed for GPU as compared to CPU.

Can anybody give some advice on this? (General advice or specific advice on 
the code I have attached)

Thank you,

-- 

--- 
You received this message because you are subscribed to the Google Groups 
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email 
to [email protected].
For more options, visit https://groups.google.com/d/optout.

import numpy
from numpy import genfromtxt
import theano
from theano import tensor
import time
import math
from scipy import interpolate
from scipy import optimize
import matplotlib.pyplot as plt
import ipdb

def main():
    # Load the data
    Rdata = genfromtxt('DataSet.csv',delimiter=",")
    rt = Rdata[:,0]
    aif = Rdata[:,1]
    lesion = Rdata[:,2]
    # Interpolation
    t = numpy.arange(rt[-1]) # dt=1
    # 1) AIF Interpolation
    tck1 = interpolate.splrep(rt,aif,s=0)
    aifci = interpolate.splev(t,tck1,der=0).astype('float32')
    # 2) Lesion Interpolation 
    # (Excluding Dummy points and assuming nbase=5)
    lesionN=lesion/numpy.mean(lesion[0:4])
    # lesionN[0:4]=1
    tck2 = interpolate.splrep(rt,lesionN,s=0)
    lesioni = interpolate.splev(t,tck2,der=0).astype('float32')
    
    N = 50000 # n voxels

    lesioniM = numpy.tile(lesioni, [N,1])
    lesioniM = lesioniM.T

    # Initial Value
    vei = 0.02*numpy.random.rand(1)+0.05
    vbi = 0.02*numpy.random.rand(1)+0.01
    fpi = (0.1*numpy.random.rand(1)+0.1)/60
    psi = numpy.array([0.001])
    p0_ = numpy.concatenate((vei,vbi,fpi,psi)).astype('float32')
    p0_ = numpy.minimum(numpy.maximum(p0_,1e-20),2)
    p0_ = numpy.tile(p0_, [1,N]).flatten()

#    p0_[4:8]=numpy.array([0.06007994,0.02380165,0.00314048,0.001]).astype('float32')

    # Model Calcaulation

    p0 = theano.shared(p0_,name='p0')
    
    pC =tensor.stack(([1-p0[2::4]-p0[3::4], p0[1::4]*p0[3::4]/p0[0::4]],
        [p0[0::4]*p0[3::4]/p0[1::4],1-p0[3::4]]),axis=0)
    
    pA = tensor.stack((
        p0[1::4]*p0[2::4],tensor.zeros(N)
                         ),axis=0)

    def _scan_fn(aifci_t, C_t1):
        pC_t1 = (pC*C_t1).sum(1)
        C_t = tensor.maximum(tensor.minimum(pC_t1,3.4028e+34),-3.4028e+34) +  pA*aifci_t
        return C_t
       
    Cts_, _ = theano.scan(_scan_fn, sequences=[aifci.T],outputs_info=[tensor.alloc(0.,2,N)])
    Cts = Cts_.sum(1)

    def _exp(A):
        return tensor.exp(tensor.minimum(A, tensor.log(3.4028e+34)))
    
    R1 = 1.5+4.3*Cts
    sin_const = numpy.sin(12*numpy.pi/180)
    cos_const = numpy.cos(12*numpy.pi/180)
    sin_const = tensor.constant(sin_const, dtype='float32')
    cos_const = tensor.constant(cos_const, dtype='float32')
    Eh_ =((1-_exp(-(0.005)*R1))*sin_const/
            (1-_exp(-(0.005)*R1)*cos_const))
    

    Eh = Eh_/tensor.mean(Eh_[0:4,:],0)
    

    # Fitting Function
    tumori_ = lesioniM/numpy.mean(lesioniM[0:4,:],0)
    tumori = theano.shared(tumori_,name='tumori')


    J = ((tumori-Eh)**2/Eh.shape[0]).sum()
    
    dJ = tensor.grad(J, p0, disconnected_inputs='ignore')
    
    step_size = tensor.scalar('step_size', dtype=theano.config.floatX)
    f_up = theano.function([step_size],J, updates = {(p0, 
        tensor.minimum(tensor.maximum(p0-step_size*dJ,1e-10),2))})
    
    
    # Optimization

    max_iter = 1000
    step_size0 = 0.01
    disp_freq = 10

    t0 = time.time()
    
    for n_updates in xrange(max_iter):
        step_size = step_size0 / (n_updates*0.001 + 1)
        J_curr = f_up(step_size)
        if numpy.mod(n_updates, disp_freq)==0:
            print n_updates, 'Cost', J_curr

    t1 = time.time()
    
    print 'Done'
    print("Time taken %s" % (t1-t0))
    solution = p0.get_value()

    numpy.save('solution.npy',solution)

if __name__== "__main__":
    main()

[theano-users] Speeding up theano code

Reply via email to