Hey all , I'm doing calassification for two classes using denoised
Autoencder DAE but I'm getting error which I don't know what it says
exactly and what is wrong with my code!
z_pre0 = InputLayer(shape=(None, num_inputs))
z0 = z_pre0 # for consistency with other layers
z_noise0 = GaussianNoiseLayer(z0, sigma=noise, name='enc_noise0')
h0 = z_noise0
def create_encoder(incoming, num_units, nonlinearity, layer_num):
i = layer_num
z_pre = DenseLayer(
incoming=incoming, num_units=num_units, nonlinearity=identity, b=
None,
name='enc_dense%i' % i, W=init)
norm_list = NormalizeLayer(
z_pre, return_stats=True, name='enc_normalize%i' % i,
stat_indices=unlabeled_slice)
z = ListIndexLayer(norm_list, index=0, name='enc_index%i' % i)
z_noise = GaussianNoiseLayer(z, sigma=noise, name='enc_noise%i' % i)
h = NonlinearityLayer(
ScaleAndShiftLayer(z_noise, name='enc_scale%i' % i),
nonlinearity=nonlinearity, name='enc_nonlin%i' % i)
return h, z, z_noise, norm_list
def create_decoder(z_hat_in, z_noise, num_units, norm_list, layer_num):
i = layer_num
dense = DenseLayer(z_hat_in, num_units=num_units, name='dec_dense%i' % i
,
W=init, nonlinearity=identity)
normalize = NormalizeLayer(dense, name='dec_normalize%i' % i)
u = ScaleAndShiftLayer(normalize, name='dec_scale%i' % i)
z_hat = DenoiseLayer(u_net=u, z_net=get_unlab(z_noise), name=
'dec_denoise%i' % i)
mean = ListIndexLayer(norm_list, index=1, name='dec_index_mean%i' % i)
var = ListIndexLayer(norm_list, index=2, name='dec_index_var%i' % i)
z_hat_bn = DecoderNormalizeLayer(z_hat, mean=mean, var=var,
name='dec_decnormalize%i' % i)
return z_hat, z_hat_bn
h1, z1, z_noise1, norm_list1 = create_encoder(
h0, num_units=100, nonlinearity=unit, layer_num=1)
h2, z2, z_noise2, norm_list2 = create_encoder(
h1, num_units=50, nonlinearity=unit, layer_num=2)
h3, z3, z_noise3, norm_list3 = create_encoder(
h2, num_units=25, nonlinearity=unit, layer_num=3)
h4, z4, z_noise4, norm_list4 = create_encoder(
h3, num_units=25, nonlinearity=unit, layer_num=4)
h5, z5, z_noise5, norm_list5 = create_encoder(
h4, num_units=25, nonlinearity=unit, layer_num=5)
h6, z6, z_noise6, norm_list6 = create_encoder(
h5, num_units=2, nonlinearity=softmax, layer_num=6)
l_out_enc = h6
h6_dec = l_out_enc
# print "y_weights_decoder:", lasagne.layers.get_output(h6_dec,
sym_x).eval({sym_x: x_train[:200]}).shape
#
# ###############
# # DECODER #
# ###############
#
# ##### Decoder Layer 6
u6 = ScaleAndShiftLayer(NormalizeLayer(
h6_dec, name='dec_normalize6'), name='dec_scale6')
z_hat6 = DenoiseLayer(u_net=u6, z_net=get_unlab(z_noise6), name=
'dec_denoise6')
mean6 = ListIndexLayer(norm_list6, index=1, name='dec_index_mean6')
var6 = ListIndexLayer(norm_list6, index=2, name='dec_index_var6')
z_hat_bn6 = DecoderNormalizeLayer(
z_hat6, mean=mean6, var=var6, name='dec_decnormalize6')
# ###########################
#
z_hat5, z_hat_bn5 = create_decoder(z_hat6, z_noise5, 25, norm_list5, 5)
z_hat4, z_hat_bn4 = create_decoder(z_hat5, z_noise4, 25, norm_list4, 4)
z_hat3, z_hat_bn3 = create_decoder(z_hat4, z_noise3, 25, norm_list3, 3)
z_hat2, z_hat_bn2 = create_decoder(z_hat3, z_noise2, 50, norm_list2, 2)
z_hat1, z_hat_bn1 = create_decoder(z_hat2, z_noise1, 100, norm_list1, 1)
#
#
# ############################# Decoder Layer 0
# i need this because i also has h0 aka. input layer....
u0 = ScaleAndShiftLayer( # refactor this...
NormalizeLayer(
DenseLayer(z_hat1, num_units=num_inputs, name='dec_dense0', W=init,
nonlinearity=identity),
name='dec_normalize0'), name='dec_scale0')
z_hat0 = DenoiseLayer(u_net=u0, z_net=get_unlab(z_noise0), name=
'dec_denoise0')
z_hat_bn0 = z_hat0 # for consistency
# #############################
#
#
[enc_out_clean, z0_clean, z1_clean, z2_clean,
z3_clean, z4_clean, z5_clean, z6_clean] = lasagne.layers.get_output(
[l_out_enc, z0, z1, z2, z3, z4, z5, z6], sym_x, deterministic=True)
#
# # Clean pass of encoder note that these are both labeled
# # and unlabeled so we need to slice
z0_clean = z0_clean[num_labels:]
z1_clean = z1_clean[num_labels:]
z2_clean = z2_clean[num_labels:]
z3_clean = z3_clean[num_labels:]
z4_clean = z4_clean[num_labels:]
z5_clean = z5_clean[num_labels:]
z6_clean = z6_clean[num_labels:]
#
# noisy pass encoder + decoder
# the output from the decoder is only unlabeled because we slice the top h
[out_enc_noisy, z_hat_bn0_noisy, z_hat_bn1_noisy,
z_hat_bn2_noisy, z_hat_bn3_noisy, z_hat_bn4_noisy,
z_hat_bn5_noisy, z_hat_bn6_noisy] = lasagne.layers.get_output(
[l_out_enc, z_hat_bn0, z_hat_bn1, z_hat_bn2,
z_hat_bn3, z_hat_bn4, z_hat_bn5, z_hat_bn6],
sym_x, deterministic=False)
#
#
# if unsupervised we need ot cut ot the samples with no labels.
out_enc_noisy = out_enc_noisy[:num_labels]
costs = [T.mean(T.nnet.categorical_crossentropy(out_enc_noisy, sym_t))]
#
# i checkt the blocks code - they do sum over the feature dimension
costs += [lambdas[6]*T.sqr(z6_clean.flatten(2) - z_hat_bn6_noisy.flatten(2
)).mean(axis=1).mean()]
costs += [lambdas[5]*T.sqr(z5_clean.flatten(2) - z_hat_bn5_noisy.flatten(2
)).mean(axis=1).mean()]
costs += [lambdas[4]*T.sqr(z4_clean.flatten(2) - z_hat_bn4_noisy.flatten(2
)).mean(axis=1).mean()]
costs += [lambdas[3]*T.sqr(z3_clean.flatten(2) - z_hat_bn3_noisy.flatten(2
)).mean(axis=1).mean()]
costs += [lambdas[2]*T.sqr(z2_clean.flatten(2) - z_hat_bn2_noisy.flatten(2
)).mean(axis=1).mean()]
costs += [lambdas[1]*T.sqr(z1_clean.flatten(2) - z_hat_bn1_noisy.flatten(2
)).mean(axis=1).mean()]
costs += [lambdas[0]*T.sqr(z0_clean.flatten(2) - z_hat_bn0_noisy.flatten(2
)).mean(axis=1).mean()]
cost =sum(costs)
# prediction passes
collect_out = lasagne.layers.get_output(l_out_enc, sym_x, deterministic=True
, collect=True)
cost =sum(costs)
# prediction passes
collect_out = lasagne.layers.get_output(l_out_enc, sym_x, deterministic=True
, collect=True)
# Get list of all trainable parameters in the network.
all_params = lasagne.layers.get_all_params(z_hat_bn0, trainable=True)
# print ""*20 + "PARAMETERS" + "-"*20
# for p in all_params:
# print p.name, p.get_value().shape
# print "-"*60
if gradclip is not None:
all_grads = [T.clip(g, -gradclip, gradclip)
for g in T.grad(cost, all_params)]
else:
all_grads = T.grad(cost, all_params)
updates = optimizer(all_grads, all_params, learning_rate=sh_lr)
f_clean = theano.function([sym_x], enc_out_clean)
f_train = theano.function([sym_x, sym_t],
[cost, out_enc_noisy] + costs,
updates=updates, on_unused_input='warn')
# Our current implementation of batchnormalization collects the statistics
# by passing the entire training dataset through the network. This collects
# the correct statistics but is not possible for larger datasets...
f_collect = theano.function([sym_x], # NO UPDATES !!!!!!! FOR COLLECT
[collect_out], on_unused_input='warn')
def train_epoch_semisupervised(x,y):
Enter code here...
confusion_train = parmesan.utils.ConfusionMatrix(num_classes)
losses = []
shuffle = np.random.permutation(x.shape[0])
x = x[shuffle]
for i in range(num_batches_train):
idx = range(i*batch_size, (i+1)*batch_size)
x_unsup = x[idx]
# add labels
x_batch = np.concatenate([x_train_lab, x_unsup], axis=0)
# nb same targets all the time...
print(x_batch.shape, targets_train_lab.shape)
output = f_train(x_batch, targets_train_lab)
batch_loss, net_out = output[0], output[1]
layer_costs = output[2:]
# cut out preds with labels
net_out = net_out[:num_labels]
preds = np.argmax(net_out, axis=-1)
confusion_train.batchadd(preds, targets_train_lab)
losses += [batch_loss]
return confusion_train, losses, layer_costs
train = np.memmap('/home/usr/train2', dtype='float32', mode='r',
shape=(26000, 784))
# print(x_train)
targets = np.memmap('/home/usr/label2', dtype='int32', mode='r',
shape=(26000,1))
# targets = np.random.randint(2, size=26000)
# targets = np.reshape(targets, (26000,1)).astype('int32')
train_idxs = [i for i in range(train.shape[0])]
np.random.shuffle(train_idxs)
num_batches_train = 70
def next_batch(start, train, labels, batch_size=100):
newstart = start + batch_size
if newstart > train.shape[0]:
newstart = 0
idxs = train_idxs[start:start + batch_size]
# print(idxs)
temp = labels[idxs, :]
temp = np.reshape(temp, (100,))
return train[idxs, :], temp, newstart
np.random.seed(1)
shuffle = np.random.permutation(train.shape[0])
x_train_lab = train[:num_labels]
targets_train_lab = targets[:num_labels]
targets_train_lab = np.reshape(targets_train_lab, (100,))
labeled_slice = slice(0, num_labels)
unlabeled_slice = slice(num_labels, 2 * num_labels)
for i in range(num_batches_train):
x_train, targets_train, newstart = next_batch(i*batch_size, train,
targets, batch_size=100)
confusion_train, losses_train, layer_costs =
train_epoch_semisupervised(x_train, targets_train)
It gives me following error:
ile
"/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py",
line 618, in __call__
storage_map=self.fn.storage_map)
File "/usr/local/lib/python2.7/dist-packages/theano/gof/link.py", line
297, in raise_with_op
reraise(exc_type, exc_value, exc_trace)
File
"/usr/local/lib/python2.7/dist-packages/theano/compile/function_module.py",
line 607, in __call__
outputs = self.fn()
ValueError: Input dimension mis-match. (input[2].shape[0] = 0,
input[4].shape[0] = 100)
Apply node that caused the error: Elemwise{Composite{scalar_sigmoid((i0 +
(i1 * i2) + (i3 * i4) + (i4 * i5 * i2)))}}(InplaceDimShuffle{x,0}.0,
InplaceDimShuffle{x,0}.0, Subtensor{int64::}.0, InplaceDimShuffle{x,0}.0,
Elemwise{Composite{((i0 * i1) + i2)}}.0, InplaceDimShuffle{x,0}.0)
Toposort index: 524
Inputs types: [TensorType(float64, row), TensorType(float64, row),
TensorType(float64, matrix), TensorType(float64, row), TensorType(float64,
matrix), TensorType(float64, row)]
Inputs shapes: [(1, 2), (1, 2), (0, 2), (1, 2), (100, 2), (1, 2)]
Inputs strides: [(16, 8), (16, 8), (16, 8), (16, 8), (16, 8), (16, 8)]
Inputs values: [array([[ 0., 0.]]), array([[ 1., 1.]]), array([],
shape=(0, 2), dtype=float64), array([[ 0., 0.]]), 'not shown', array([[
0., 0.]])]
Outputs clients: [[Elemwise{Composite{((i0 * i1 * i2) + (i3 * i2 * i4 * i5
* i1 * i6) + (i1 * i7) + (i8 * i4 * i5 * i1 *
i6))}}(InplaceDimShuffle{x,0}.0, Gemm{inplace}.0, Elemwise{Composite{((i0 *
i1) + i2)}}.0, InplaceDimShuffle{x,0}.0, Elemwise{sub}.0,
Elemwise{Composite{scalar_sigmoid((i0 + (i1 * i2) + (i3 * i4) + (i4 * i5 *
i2)))}}.0, InplaceDimShuffle{x,0}.0, InplaceDimShuffle{x,0}.0,
InplaceDimShuffle{x,0}.0), Elemwise{Composite{((i0 * i1 * i2) + (i0 * i3) +
(i4 * i5 * i6 * i0 * i7) + (i5 * i8 * i2 * i6 * i0 *
i7))}}(Gemm{inplace}.0, InplaceDimShuffle{x,0}.0, Subtensor{int64::}.0,
InplaceDimShuffle{x,0}.0, InplaceDimShuffle{x,0}.0, Elemwise{sub}.0,
Elemwise{Composite{scalar_sigmoid((i0 + (i1 * i2) + (i3 * i4) + (i4 * i5 *
i2)))}}.0, InplaceDimShuffle{x,0}.0, InplaceDimShuffle{x,0}.0),
Elemwise{Composite{((i0 * i1) + (i2 * i3) + (i3 * i4 * i5) + i6 + (i7 *
i5))}}(InplaceDimShuffle{x,0}.0, Elemwise{Composite{scalar_sigmoid((i0 +
(i1 * i2) + (i3 * i4) + (i4 * i5 * i2)))}}.0, InplaceDimShuffle{x,0}.0,
Elemwise{Composite{((i0 * i1) + i2)}}.0, InplaceDimShuffle{x,0}.0,
Subtensor{int64::}.0, InplaceDimShuffle{x,0}.0, InplaceDimShuffle{x,0}.0),
Elemwise{sub}(TensorConstant{(1, 1) of 1.0},
Elemwise{Composite{scalar_sigmoid((i0 + (i1 * i2) + (i3 * i4) + (i4 * i5 *
i2)))}}.0), Elemwise{mul}(Elemwise{sub}.0,
Elemwise{Composite{scalar_sigmoid((i0 + (i1 * i2) + (i3 * i4) + (i4 * i5 *
i2)))}}.0, Gemm{inplace}.0, InplaceDimShuffle{x,0}.0),
Elemwise{mul}(Subtensor{int64::}.0, Elemwise{sub}.0,
Elemwise{Composite{scalar_sigmoid((i0 + (i1 * i2) + (i3 * i4) + (i4 * i5 *
i2)))}}.0, Gemm{inplace}.0, InplaceDimShuffle{x,0}.0),
Elemwise{mul}(Elemwise{Composite{((i0 * i1) + i2)}}.0, Elemwise{sub}.0,
Elemwise{Composite{scalar_sigmoid((i0 + (i1 * i2) + (i3 * i4) + (i4 * i5 *
i2)))}}.0, Gemm{inplace}.0, InplaceDimShuffle{x,0}.0),
Elemwise{mul}(Subtensor{int64::}.0, Elemwise{Composite{((i0 * i1) +
i2)}}.0, Elemwise{sub}.0, Elemwise{Composite{scalar_sigmoid((i0 + (i1 * i2)
+ (i3 * i4) + (i4 * i5 * i2)))}}.0, Gemm{inplace}.0,
InplaceDimShuffle{x,0}.0), Elemwise{mul}(Gemm{inplace}.0,
Elemwise{Composite{scalar_sigmoid((i0 + (i1 * i2) + (i3 * i4) + (i4 * i5 *
i2)))}}.0)]]
HINT: Re-running with most Theano optimization disabled could give you a
back-trace of when this node was created. This can be done with by setting
the Theano flag 'optimizer=fast_compile'. If that does not work, Theano
optimizations can be disabled with 'optimizer=None'.
HINT: Use the Theano flag 'exception_verbosity=high' for a debugprint and
storage map footprint of this apply node.
Could you please help me?
--
---
You received this message because you are subscribed to the Google Groups
"theano-users" group.
To unsubscribe from this group and stop receiving emails from it, send an email
to [email protected].
For more options, visit https://groups.google.com/d/optout.