I am using theano library to perform a function fitting.
Unfortunately, the algorithm utilizes the numerical integration, which
requires the scan function.
Also, I am using gradient-descent method for optimization.
Right now, the computation time is quite slow and I would like to improve
the speed.
My goal is to obtain better speed for GPU as compared to CPU.
Can anybody give some advice on this? (General advice or specific advice on
the code I have attached)
Thank you,
--
---
You received this message because you are subscribed to the Google Groups
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.
import numpy
from numpy import genfromtxt
import theano
from theano import tensor
import time
import math
from scipy import interpolate
from scipy import optimize
import matplotlib.pyplot as plt
import ipdb
def main():
# Load the data
Rdata = genfromtxt('DataSet.csv',delimiter=",")
rt = Rdata[:,0]
aif = Rdata[:,1]
lesion = Rdata[:,2]
# Interpolation
t = numpy.arange(rt[-1]) # dt=1
# 1) AIF Interpolation
tck1 = interpolate.splrep(rt,aif,s=0)
aifci = interpolate.splev(t,tck1,der=0).astype('float32')
# 2) Lesion Interpolation
# (Excluding Dummy points and assuming nbase=5)
lesionN=lesion/numpy.mean(lesion[0:4])
# lesionN[0:4]=1
tck2 = interpolate.splrep(rt,lesionN,s=0)
lesioni = interpolate.splev(t,tck2,der=0).astype('float32')
N = 50000 # n voxels
lesioniM = numpy.tile(lesioni, [N,1])
lesioniM = lesioniM.T
# Initial Value
vei = 0.02*numpy.random.rand(1)+0.05
vbi = 0.02*numpy.random.rand(1)+0.01
fpi = (0.1*numpy.random.rand(1)+0.1)/60
psi = numpy.array([0.001])
p0_ = numpy.concatenate((vei,vbi,fpi,psi)).astype('float32')
p0_ = numpy.minimum(numpy.maximum(p0_,1e-20),2)
p0_ = numpy.tile(p0_, [1,N]).flatten()
# p0_[4:8]=numpy.array([0.06007994,0.02380165,0.00314048,0.001]).astype('float32')
# Model Calcaulation
p0 = theano.shared(p0_,name='p0')
pC =tensor.stack(([1-p0[2::4]-p0[3::4], p0[1::4]*p0[3::4]/p0[0::4]],
[p0[0::4]*p0[3::4]/p0[1::4],1-p0[3::4]]),axis=0)
pA = tensor.stack((
p0[1::4]*p0[2::4],tensor.zeros(N)
),axis=0)
def _scan_fn(aifci_t, C_t1):
pC_t1 = (pC*C_t1).sum(1)
C_t = tensor.maximum(tensor.minimum(pC_t1,3.4028e+34),-3.4028e+34) + pA*aifci_t
return C_t
Cts_, _ = theano.scan(_scan_fn, sequences=[aifci.T],outputs_info=[tensor.alloc(0.,2,N)])
Cts = Cts_.sum(1)
def _exp(A):
return tensor.exp(tensor.minimum(A, tensor.log(3.4028e+34)))
R1 = 1.5+4.3*Cts
sin_const = numpy.sin(12*numpy.pi/180)
cos_const = numpy.cos(12*numpy.pi/180)
sin_const = tensor.constant(sin_const, dtype='float32')
cos_const = tensor.constant(cos_const, dtype='float32')
Eh_ =((1-_exp(-(0.005)*R1))*sin_const/
(1-_exp(-(0.005)*R1)*cos_const))
Eh = Eh_/tensor.mean(Eh_[0:4,:],0)
# Fitting Function
tumori_ = lesioniM/numpy.mean(lesioniM[0:4,:],0)
tumori = theano.shared(tumori_,name='tumori')
J = ((tumori-Eh)**2/Eh.shape[0]).sum()
dJ = tensor.grad(J, p0, disconnected_inputs='ignore')
step_size = tensor.scalar('step_size', dtype=theano.config.floatX)
f_up = theano.function([step_size],J, updates = {(p0,
tensor.minimum(tensor.maximum(p0-step_size*dJ,1e-10),2))})
# Optimization
max_iter = 1000
step_size0 = 0.01
disp_freq = 10
t0 = time.time()
for n_updates in xrange(max_iter):
step_size = step_size0 / (n_updates*0.001 + 1)
J_curr = f_up(step_size)
if numpy.mod(n_updates, disp_freq)==0:
print n_updates, 'Cost', J_curr
t1 = time.time()
print 'Done'
print("Time taken %s" % (t1-t0))
solution = p0.get_value()
numpy.save('solution.npy',solution)
if __name__== "__main__":
main()