Author: dligach Date: Mon Sep 19 15:40:48 2016 New Revision: 1761469 URL: http://svn.apache.org/viewvc?rev=1761469&view=rev Log: cleaned up chen's scripts a bit more
Modified: ctakes/trunk/ctakes-temporal/scripts/nn/predict.py ctakes/trunk/ctakes-temporal/scripts/nn/train_and_package.py Modified: ctakes/trunk/ctakes-temporal/scripts/nn/predict.py URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/scripts/nn/predict.py?rev=1761469&r1=1761468&r2=1761469&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/scripts/nn/predict.py (original) +++ ctakes/trunk/ctakes-temporal/scripts/nn/predict.py Mon Sep 19 15:40:48 2016 @@ -21,12 +21,11 @@ def main(args): 2:'CONTAINS-1' } - ## Load models and weights: - #outcomes = ctk_io.get_outcome_array(working_dir) - model_dir = "/Users/Dima/Loyola/Workspaces/cTakes/ctakes/ctakes-temporal/target/eval/thyme/train_and_test/event-time" + ctakes_root = '/Users/Dima/Loyola/Workspaces/cTakes/ctakes/' + target_dir = 'ctakes-temporal/target/eval/thyme/train_and_test/event-time/' + model_dir = ctakes_root + target_dir maxlen = pickle.load(open(os.path.join(model_dir, "maxlen.p"), "rb")) alphabet = pickle.load(open(os.path.join(model_dir, "alphabet.p"), "rb")) - #print("Outcomes array is %s" % (outcomes) ) model = model_from_json(open(os.path.join(model_dir, "model_0.json")).read()) model.load_weights(os.path.join(model_dir, "model_0.h5")) @@ -36,26 +35,25 @@ def main(args): if not line: break - ## Convert the line of Strings to lists of indices feats=[] for unigram in line.rstrip().split(): if(alphabet.has_key(unigram)): feats.append(alphabet[unigram]) else: feats.append(alphabet["none"]) - if(len(feats)> maxlen): + + if(len(feats) > maxlen): feats=feats[0:maxlen] test_x = pad_sequences([feats], maxlen=maxlen) - #feats = np.reshape(feats, (1, 6, input_dims / 6)) - #feats = np.reshape(feats, (1, input_dims)) X_dup = [] X_dup.append(test_x) X_dup.append(test_x) X_dup.append(test_x) + X_dup.append(test_x) out = model.predict(X_dup, batch_size=50)[0] - # print("Out is %s and decision is %d" % (out, out.argmax())) + except KeyboardInterrupt: sys.stderr.write("Caught keyboard interrupt\n") break @@ -65,12 +63,10 @@ def main(args): break out_str = int2label[out.argmax()] - print(out_str) sys.stdout.flush() sys.exit(0) - if __name__ == "__main__": main(sys.argv[1:]) Modified: ctakes/trunk/ctakes-temporal/scripts/nn/train_and_package.py URL: http://svn.apache.org/viewvc/ctakes/trunk/ctakes-temporal/scripts/nn/train_and_package.py?rev=1761469&r1=1761468&r2=1761469&view=diff ============================================================================== --- ctakes/trunk/ctakes-temporal/scripts/nn/train_and_package.py (original) +++ ctakes/trunk/ctakes-temporal/scripts/nn/train_and_package.py Mon Sep 19 15:40:48 2016 @@ -1,18 +1,13 @@ #!/usr/bin/env python import sklearn as sk - import numpy as np np.random.seed(1337) - import et_cleartk_io as ctk_io import nn_models - import sys import os.path - import dataset - import keras as k from keras.utils.np_utils import to_categorical from keras.optimizers import RMSprop @@ -22,28 +17,20 @@ from keras.layers import Merge from keras.layers.core import Dense, Dropout, Activation, Flatten from keras.layers.convolutional import Convolution1D, MaxPooling1D from keras.layers.embeddings import Embedding - import pickle def main(args): if len(args) < 1: sys.stderr.write("Error - one required argument: <data directory>\n") sys.exit(-1) - + working_dir = args[0] - - #read in data file -# print("Reading data...") - #Y, X = ctk_io.read_liblinear(working_dir) # ('data_testing/multitask_assertion/train_and_test') data_file = os.path.join(working_dir, 'training-data.liblinear') - # learn alphabet from training and test data - dataset1 = dataset.DatasetProvider([data_file]) + # learn alphabet from training data + data_set = dataset.DatasetProvider([data_file]) # now load training examples and labels - train_x, train_y = dataset1.load(data_file) - - init_vectors = None #used for pre-trained embeddings - + train_x, train_y = data_set.load(data_file) # turn x and y into numpy array among other things maxlen = max([len(seq) for seq in train_x]) outcomes = set(train_y) @@ -53,58 +40,55 @@ def main(args): train_y = to_categorical(np.array(train_y), classes) pickle.dump(maxlen, open(os.path.join(working_dir, 'maxlen.p'),"wb")) - pickle.dump(dataset1.alphabet, open(os.path.join(working_dir, 'alphabet.p'),"wb")) - #test_x = pad_sequences(test_x, maxlen=maxlen) - #test_y = to_categorical(np.array(test_y), classes) + pickle.dump(data_set.alphabet, open(os.path.join(working_dir, 'alphabet.p'),"wb")) print 'train_x shape:', train_x.shape print 'train_y shape:', train_y.shape branches = [] # models to be merged train_xs = [] # train x for each branch - #test_xs = [] # test x for each branch - filtlens = "3,4,5" - for filter_len in filtlens.split(','): + for filter_len in '2,3,4,5'.split(','): + branch = Sequential() - branch.add(Embedding(len(dataset1.alphabet), - 300, - input_length=maxlen, - weights=init_vectors)) + branch.add(Embedding(len(data_set.alphabet), + 300, + input_length=maxlen, + weights=None)) branch.add(Convolution1D(nb_filter=200, - filter_length=int(filter_len), - border_mode='valid', - activation='relu', - subsample_length=1)) + filter_length=int(filter_len), + border_mode='valid', + activation='relu', + subsample_length=1)) branch.add(MaxPooling1D(pool_length=2)) branch.add(Flatten()) branches.append(branch) train_xs.append(train_x) - #test_xs.append(test_x) + model = Sequential() model.add(Merge(branches, mode='concat')) - model.add(Dense(250))#cfg.getint('cnn', 'hidden'))) - model.add(Dropout(0.25))#cfg.getfloat('cnn', 'dropout'))) + model.add(Dense(300)) + model.add(Dropout(0.25)) model.add(Activation('relu')) - model.add(Dropout(0.25))#cfg.getfloat('cnn', 'dropout'))) + model.add(Dropout(0.25)) model.add(Dense(classes)) model.add(Activation('softmax')) - optimizer = RMSprop(lr=0.0001,#cfg.getfloat('cnn', 'learnrt'), - rho=0.9, epsilon=1e-08) + optimizer = RMSprop(lr=0.0001, + rho=0.9, epsilon=1e-08) model.compile(loss='categorical_crossentropy', - optimizer=optimizer, - metrics=['accuracy']) + optimizer=optimizer, + metrics=['accuracy']) model.fit(train_xs, - train_y, - nb_epoch=3,#cfg.getint('cnn', 'epochs'), - batch_size=50,#cfg.getint('cnn', 'batches'), - verbose=1, - validation_split=0.1, - class_weight=None) + train_y, + nb_epoch=3, + batch_size=50, + verbose=1, + validation_split=0.1, + class_weight=None) model.summary() @@ -114,4 +98,4 @@ def main(args): sys.exit(0) if __name__ == "__main__": - main(sys.argv[1:]) \ No newline at end of file + main(sys.argv[1:])