[ 
https://issues.apache.org/jira/browse/OPENNLP-1009?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=16658132#comment-16658132
 ] 

ASF GitHub Bot commented on OPENNLP-1009:
-----------------------------------------

tteofili closed pull request #20: OPENNLP-1009 - upgrade to dl4j 1.0.0-beta2
URL: https://github.com/apache/opennlp-sandbox/pull/20
 
 
   

This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:

As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):

diff --git a/opennlp-dl/pom.xml b/opennlp-dl/pom.xml
index cfb1a1b..829cf6a 100644
--- a/opennlp-dl/pom.xml
+++ b/opennlp-dl/pom.xml
@@ -26,7 +26,7 @@
 
   <properties>
     <project.build.sourceEncoding>UTF-8</project.build.sourceEncoding>
-    <nd4j.version>0.9.1</nd4j.version>
+    <nd4j.version>1.0.0-beta2</nd4j.version>
   </properties>
 
   <dependencies>
diff --git a/opennlp-dl/src/main/java/opennlp/tools/dl/DataReader.java 
b/opennlp-dl/src/main/java/opennlp/tools/dl/DataReader.java
index 86af123..4f7b5c3 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/DataReader.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/DataReader.java
@@ -232,11 +232,6 @@ public DataSet next(int batchSize) {
         return new DataSet(features, labels, featuresMask, labelsMask);
     }
 
-    @Override
-    public int totalExamples() {
-        return this.records.size();
-    }
-
     @Override
     public int inputColumns() {
         return this.embedder.getVectorSize();
@@ -271,16 +266,6 @@ public int batch() {
         return this.batchSize;
     }
 
-    @Override
-    public int cursor() {
-        return this.cursor;
-    }
-
-    @Override
-    public int numExamples() {
-        return totalExamples();
-    }
-
     @Override
     public void setPreProcessor(DataSetPreProcessor preProcessor) {
         throw new UnsupportedOperationException();
@@ -298,7 +283,7 @@ public DataSetPreProcessor getPreProcessor() {
 
     @Override
     public boolean hasNext() {
-        return cursor < totalExamples() - 1;
+        return cursor < this.records.size() - 1;
     }
 
     @Override
diff --git a/opennlp-dl/src/main/java/opennlp/tools/dl/NameFinderDL.java 
b/opennlp-dl/src/main/java/opennlp/tools/dl/NameFinderDL.java
index 7547196..3a0ad54 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/NameFinderDL.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/NameFinderDL.java
@@ -42,6 +42,7 @@
 import org.nd4j.linalg.factory.Nd4j;
 import org.nd4j.linalg.indexing.INDArrayIndex;
 import org.nd4j.linalg.indexing.NDArrayIndex;
+import org.nd4j.linalg.learning.config.RmsProp;
 import org.nd4j.linalg.lossfunctions.LossFunctions;
 
 import opennlp.tools.namefind.BioCodec;
@@ -159,12 +160,9 @@ public static MultiLayerNetwork train(WordVectors 
wordVectors, ObjectStream<Name
     int layerSize = 256;
 
     MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
-        
.optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT).iterations(1)
-        .updater(Updater.RMSPROP)
-        .regularization(true).l2(0.001)
+        .optimizationAlgo(OptimizationAlgorithm.STOCHASTIC_GRADIENT_DESCENT)
+        .updater(new RmsProp(0.01)).l2(0.001)
         .weightInit(WeightInit.XAVIER)
-        // 
.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue).gradientNormalizationThreshold(1.0)
-        .learningRate(0.01)
         .list()
         .layer(0, new GravesLSTM.Builder().nIn(vectorSize).nOut(layerSize)
             .activation(Activation.TANH).build())
diff --git 
a/opennlp-dl/src/main/java/opennlp/tools/dl/NameSampleDataSetIterator.java 
b/opennlp-dl/src/main/java/opennlp/tools/dl/NameSampleDataSetIterator.java
index a420220..d6d171a 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/NameSampleDataSetIterator.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/NameSampleDataSetIterator.java
@@ -140,7 +140,7 @@ public DataSet next(int num) {
       }
 
       if (sample != null) {
-        INDArray feature = sample.getFeatureMatrix();
+        INDArray feature = sample.getFeatures();
         features.put(new INDArrayIndex[] {NDArrayIndex.point(i)}, 
feature.get(NDArrayIndex.point(0)));
 
         feature.get(new INDArrayIndex[] {NDArrayIndex.point(0), 
NDArrayIndex.all(),
diff --git a/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCat.java 
b/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCat.java
index 299a742..9e91484 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCat.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCat.java
@@ -60,7 +60,7 @@ public NeuralDocCat(NeuralDocCatModel model) {
         INDArray seqFeatures = this.model.getGloves().embed(text, 
this.model.getMaxSeqLen());
 
         INDArray networkOutput = this.model.getNetwork().output(seqFeatures);
-        int timeSeriesLength = networkOutput.size(2);
+        long timeSeriesLength = networkOutput.size(2);
         INDArray probsAtLastWord = networkOutput.get(NDArrayIndex.point(0),
                 NDArrayIndex.all(), NDArrayIndex.point(timeSeriesLength - 1));
 
diff --git a/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCatTrainer.java 
b/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCatTrainer.java
index 9ce3a3f..697bff0 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCatTrainer.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/NeuralDocCatTrainer.java
@@ -135,12 +135,11 @@ public MultiLayerNetwork createNetwork(int vectorSize) {
         //TODO: the below network params should be configurable from CLI or 
settings file
         //Set up network configuration
         MultiLayerConfiguration conf = new NeuralNetConfiguration.Builder()
-                .updater(new RmsProp(0.9)) // ADAM 
.adamMeanDecay(0.9).adamVarDecay(0.999)
-                .regularization(true).l2(1e-5)
+                .updater(new RmsProp(args.learningRate)) // ADAM 
.adamMeanDecay(0.9).adamVarDecay(0.999)
+                .l2(1e-5)
                 .weightInit(WeightInit.XAVIER)
                 
.gradientNormalization(GradientNormalization.ClipElementWiseAbsoluteValue)
                 .gradientNormalizationThreshold(1.0)
-                .learningRate(args.learningRate)
                 .list()
                 .layer(0, new GravesLSTM.Builder()
                         .nIn(vectorSize)
@@ -177,8 +176,8 @@ public void train() {
     public void train(int nEpochs, DataReader train, DataReader validation) {
         assert model != null;
         assert train != null;
-        LOG.info("Starting training...\nTotal epochs={}, Training Size={}, 
Validation Size={}", nEpochs,
-                train.totalExamples(), validation == null ? null : 
validation.totalExamples());
+//        LOG.info("Starting training...\nTotal epochs={}, Training Size={}, 
Validation Size={}", nEpochs,
+//                train.(), validation == null ? null : 
validation.totalExamples());
         for (int i = 0; i < nEpochs; i++) {
             model.getNetwork().fit(train);
             train.reset();
@@ -190,7 +189,7 @@ public void train(int nEpochs, DataReader train, DataReader 
validation) {
                 Evaluation evaluation = new Evaluation();
                 while (validation.hasNext()) {
                     DataSet t = validation.next();
-                    INDArray features = t.getFeatureMatrix();
+                    INDArray features = t.getFeatures();
                     INDArray labels = t.getLabels();
                     INDArray inMask = t.getFeaturesMaskArray();
                     INDArray outMask = t.getLabelsMaskArray();
diff --git a/opennlp-dl/src/main/java/opennlp/tools/dl/RNN.java 
b/opennlp-dl/src/main/java/opennlp/tools/dl/RNN.java
index e297cc5..7547cce 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/RNN.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/RNN.java
@@ -35,6 +35,7 @@
 import org.apache.commons.math3.util.Pair;
 import org.nd4j.linalg.api.iter.NdIndexIterator;
 import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.impl.transforms.OldSoftMax;
 import org.nd4j.linalg.api.ops.impl.transforms.SetRange;
 import org.nd4j.linalg.api.ops.impl.transforms.SoftMax;
 import org.nd4j.linalg.factory.Nd4j;
@@ -241,7 +242,7 @@ private double lossFun(INDArray inputs, INDArray targets, 
INDArray dWxh, INDArra
         ys = init(inputs.length(), yst.shape());
       }
       ys.putRow(t, yst);
-      INDArray pst = Nd4j.getExecutioner().execAndReturn(new SoftMax(yst)); // 
probabilities for next chars
+      INDArray pst = Nd4j.getExecutioner().execAndReturn(new OldSoftMax(yst)); 
// probabilities for next chars
       if (ps == null) {
         ps = init(inputs.length(), pst.shape());
       }
@@ -251,7 +252,7 @@ private double lossFun(INDArray inputs, INDArray targets, 
INDArray dWxh, INDArra
 
     // backward pass: compute gradients going backwards
     INDArray dhNext = Nd4j.zerosLike(hPrev);
-    for (int t = inputs.length() - 1; t >= 0; t--) {
+    for (int t = (int) (inputs.length() - 1); t >= 0; t--) {
       INDArray dy = ps.getRow(t);
       dy.putRow(targets.getInt(t), dy.getRow(targets.getInt(t)).sub(1)); // 
backprop into y
       INDArray hst = hs.getRow(t);
@@ -271,9 +272,9 @@ private double lossFun(INDArray inputs, INDArray targets, 
INDArray dWxh, INDArra
     return loss;
   }
 
-  protected INDArray init(int t, int[] aShape) {
+  protected INDArray init(long t, long[] aShape) {
     INDArray as;
-    int[] shape = new int[1 + aShape.length];
+    long[] shape = new long[1 + aShape.length];
     shape[0] = t;
     System.arraycopy(aShape, 0, shape, 1, aShape.length);
     as = Nd4j.create(shape);
@@ -295,7 +296,7 @@ public String sample(int seedIx) {
     for (int t = 0; t < sampleSize; t++) {
       h = Transforms.tanh(wxh.mmul(x).add(whh.mmul(h)).add(bh));
       INDArray y = (why.mmul(h)).add(by);
-      INDArray pm = Nd4j.getExecutioner().execAndReturn(new 
SoftMax(y)).ravel();
+      INDArray pm = Nd4j.getExecutioner().execAndReturn(new 
OldSoftMax(y)).ravel();
 
       List<Pair<Integer, Double>> d = new LinkedList<>();
       for (int pi = 0; pi < vocabSize; pi++) {
@@ -321,11 +322,12 @@ protected String getSampleString(INDArray ixes) {
 
     NdIndexIterator ndIndexIterator = new NdIndexIterator(ixes.shape());
     while (ndIndexIterator.hasNext()) {
-      int[] next = ndIndexIterator.next();
+      long[] next = ndIndexIterator.next();
       if (!useChars && txt.length() > 0) {
         txt.append(' ');
       }
-      txt.append(ixToChar.get(ixes.getInt(next)));
+      int aDouble = (int) ixes.getDouble(next);
+      txt.append(ixToChar.get(aDouble));
     }
     return txt.toString();
   }
diff --git a/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java 
b/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java
index fe56d8f..6a187c2 100644
--- a/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java
+++ b/opennlp-dl/src/main/java/opennlp/tools/dl/StackedRNN.java
@@ -29,6 +29,7 @@
 import org.apache.commons.math3.distribution.EnumeratedDistribution;
 import org.apache.commons.math3.util.Pair;
 import org.nd4j.linalg.api.ndarray.INDArray;
+import org.nd4j.linalg.api.ops.impl.transforms.OldSoftMax;
 import org.nd4j.linalg.api.ops.impl.transforms.ReplaceNans;
 import org.nd4j.linalg.api.ops.impl.transforms.SoftMax;
 import org.nd4j.linalg.factory.Nd4j;
@@ -251,7 +252,7 @@ private double lossFun(INDArray inputs, INDArray targets, 
INDArray dWxh, INDArra
       }
       ys.putRow(t, yst);
 
-      INDArray pst = Nd4j.getExecutioner().execAndReturn(new 
ReplaceNans(Nd4j.getExecutioner().execAndReturn(new SoftMax(yst)), 0d)); // 
probabilities for next chars
+      INDArray pst = Nd4j.getExecutioner().execAndReturn(new 
ReplaceNans(Nd4j.getExecutioner().execAndReturn(new OldSoftMax(yst)), 0d)); // 
probabilities for next chars
       if (ps == null) {
         ps = init(seqLength, pst.shape());
       }
@@ -312,7 +313,7 @@ public String sample(int seedIx) {
       h = Transforms.tanh((wxh.mmul(x)).add(whh.mmul(h)).add(bh));
       h2 = Transforms.tanh((wxh2.mmul(h)).add(whh2.mmul(h2)).add(bh2));
       INDArray y = wh2y.mmul(h2).add(by);
-      INDArray pm = Nd4j.getExecutioner().execAndReturn(new 
SoftMax(y)).ravel();
+      INDArray pm = Nd4j.getExecutioner().execAndReturn(new 
OldSoftMax(y)).ravel();
 
       List<Pair<Integer, Double>> d = new LinkedList<>();
       for (int pi = 0; pi < vocabSize; pi++) {
diff --git a/opennlp-dl/src/test/java/opennlp/tools/dl/StackedRNNTest.java 
b/opennlp-dl/src/test/java/opennlp/tools/dl/StackedRNNTest.java
index 6a61642..8c81565 100644
--- a/opennlp-dl/src/test/java/opennlp/tools/dl/StackedRNNTest.java
+++ b/opennlp-dl/src/test/java/opennlp/tools/dl/StackedRNNTest.java
@@ -63,7 +63,7 @@ public void setUp() throws Exception {
   @Parameterized.Parameters
   public static Collection<Object[]> data() {
     return Arrays.asList(new Object[][] {
-        {1e-2f, 25, 50, 4},
+        {1e-3f, 25, 50, 4},
     });
   }
 


 

----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
 
For queries about this service, please contact Infrastructure at:
[email protected]


> Experiment with deep learning based algorithms in OpenNLP
> ---------------------------------------------------------
>
>                 Key: OPENNLP-1009
>                 URL: https://issues.apache.org/jira/browse/OPENNLP-1009
>             Project: OpenNLP
>          Issue Type: Task
>            Reporter: Tommaso Teofili
>            Priority: Major
>
> It'd be interesting to experiment with deep learning algorithms in OpenNLP.
> Possible experiments can include using recurrent and / or convolutional 
> neural networks for NER, language detection, document categorization, etc.
> We could just leverage such algorithms during the training phase and let the 
> prediction phase as it is, as ti minimize impact on API / users.
> From the tooling perspective we could start experimenting with DeepLearning4J 
> and Nd4j.



--
This message was sent by Atlassian JIRA
(v7.6.3#76005)

Reply via email to