I'm trying to implement both of them in theano but always I have exploding
gradient even with small learning rate.
with plain momentum I had to use 1e-6 to make it converge and to avoid
exploding gradient but then the network performed worse than a normal SGD. and
with nestrov I tried 1e-8 and it is still exploding ee=ven when i use gradient
clipping it is not converging and the cost is just increasing.
Myabe there is something wrong in my implementation
lass gd_optimizer():
def __init__(self, classop,params):
self.classop = classop
if self.classop == 'momentum' or "nestrov":
self.memory_ = [theano.shared(np.zeros_like(p.get_value())) for p
in params]
def update_param(self,grads, params, learning_rate, momentum):
updates = []
if self.classop == 'momentum':
grad_clipped = grads #
for n,(param_i,grad_i) in enumerate(zip(params,grad_clipped)):
memory = self.memory_[n]
# grad_clipped = T.clip(grad_i, -1, 1)
velocity = momentum * memory - learning_rate * grad_i
updates.append((memory, velocity))
updates.append((param_i, param_i+ velocity))
# updates.append((param_i, param_i -learning_rate * grad_i))
elif self.classop == 'nestrov':
grad_clipped = grads #[T.clip(g,-1,1) for g in grads]
for n, (param_i, grad_i) in enumerate(zip(params, grad_clipped)):
memory = self.memory_[n]
update1 = momentum * memory - learning_rate * grad_i
update2 = momentum * memory + ( 1 + momentum) * update1
updates.append((memory, update1))
updates.append((param_i, param_i - update2))
return updates
--
---
You received this message because you are subscribed to the Google Groups
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.