Author: joern
Date: Mon Oct 12 14:08:36 2015
New Revision: 1708123
URL: http://svn.apache.org/viewvc?rev=1708123&view=rev
Log:
OPENNLP-821 Now builds and runs with 1.6.0
Modified:
opennlp/sandbox/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
opennlp/sandbox/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java
opennlp/sandbox/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java
Modified:
opennlp/sandbox/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java?rev=1708123&r1=1708122&r2=1708123&view=diff
==============================================================================
---
opennlp/sandbox/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
(original)
+++
opennlp/sandbox/mallet-addon/src/main/java/opennlp/addons/mallet/CRFTrainer.java
Mon Oct 12 14:08:36 2015
@@ -29,8 +29,11 @@ import opennlp.tools.ml.model.Sequence;
import opennlp.tools.ml.model.SequenceClassificationModel;
import opennlp.tools.ml.model.SequenceStream;
import cc.mallet.fst.CRF;
+import cc.mallet.fst.CRFOptimizableByLabelLikelihood;
import cc.mallet.fst.CRFTrainerByLabelLikelihood;
+import cc.mallet.fst.CRFTrainerByValueGradients;
import cc.mallet.fst.Transducer;
+import cc.mallet.optimize.Optimizable;
import cc.mallet.types.Alphabet;
import cc.mallet.types.FeatureVector;
import cc.mallet.types.FeatureVectorSequence;
@@ -45,11 +48,6 @@ import cc.mallet.types.LabelSequence;
// Dummy feature generator ?!
public class CRFTrainer extends AbstractSequenceTrainer {
- public CRFTrainer(Map<String, String> trainParams,
- Map<String, String> reportMap) {
- super(trainParams, reportMap);
- }
-
private int[] getOrders() {
String[] ordersString = "0,1".split(",");
int[] orders = new int[ordersString.length];
@@ -71,7 +69,8 @@ public class CRFTrainer extends Abstract
InstanceList trainingData = new InstanceList(dataAlphabet, targetAlphabet);
int nameIndex = 0;
- for (Sequence sequence : sequences) {
+ Sequence sequence;
+ while ((sequence = sequences.read()) != null) {
FeatureVector featureVectors[] = new
FeatureVector[sequence.getEvents().length];
Label malletOutcomes[] = new Label[sequence.getEvents().length];
@@ -132,23 +131,22 @@ public class CRFTrainer extends Abstract
crf);
crfTrainer.setGaussianPriorVariance(1.0);
- // CRFOptimizableByLabelLikelihood optLabel = new
- // CRFOptimizableByLabelLikelihood(
- // crf, trainingData);
-
- // CRF trainer
- // Optimizable.ByGradientValue[] opts = new Optimizable.ByGradientValue[] {
- // optLabel };
+// CRFOptimizableByLabelLikelihood optLabel = new
+// CRFOptimizableByLabelLikelihood(crf, trainingData);
+//
+// // CRF trainer
+// Optimizable.ByGradientValue[] opts = new Optimizable.ByGradientValue[] {
+// optLabel };
// by default, use L-BFGS as the optimizer
- // CRFTrainerByValueGradients crfTrainer = new CRFTrainerByValueGradients(
- // crf, opts);
- // crfTrainer.setMaxResets(0);
+// CRFTrainerByValueGradients crfTrainer = new CRFTrainerByValueGradients(
+// crf, opts);
+// crfTrainer.setMaxResets(0);
// SNIP
crfTrainer.train(trainingData, Integer.MAX_VALUE);
-
+
// can be very similar to the other model
// one important difference is that the feature gen needs to be integrated
// ...
Modified:
opennlp/sandbox/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java?rev=1708123&r1=1708122&r2=1708123&view=diff
==============================================================================
---
opennlp/sandbox/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java
(original)
+++
opennlp/sandbox/mallet-addon/src/main/java/opennlp/addons/mallet/MaxentTrainer.java
Mon Oct 12 14:08:36 2015
@@ -37,11 +37,6 @@ import cc.mallet.types.LabelAlphabet;
public class MaxentTrainer extends AbstractEventTrainer {
- public MaxentTrainer(Map<String, String> trainParams,
- Map<String, String> reportMap) {
- super(trainParams, reportMap);
- }
-
@Override
public boolean isSortAndMerge() {
return true;
Modified:
opennlp/sandbox/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java
URL:
http://svn.apache.org/viewvc/opennlp/sandbox/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java?rev=1708123&r1=1708122&r2=1708123&view=diff
==============================================================================
---
opennlp/sandbox/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java
(original)
+++
opennlp/sandbox/mallet-addon/src/main/java/opennlp/addons/mallet/TransducerModel.java
Mon Oct 12 14:08:36 2015
@@ -27,7 +27,6 @@ import opennlp.tools.ml.model.SequenceCl
import opennlp.tools.util.BeamSearchContextGenerator;
import opennlp.tools.util.SequenceValidator;
import opennlp.tools.util.model.SerializableArtifact;
-import cc.mallet.fst.CRF;
import cc.mallet.fst.MaxLatticeDefault;
import cc.mallet.fst.Transducer;
import cc.mallet.types.Alphabet;
@@ -53,6 +52,14 @@ public class TransducerModel<T> implemen
return bestSequences(1, sequence, additionalContext, cg, validator)[0];
}
+ @Override
+ public opennlp.tools.util.Sequence[] bestSequences(int numSequences,
+ T[] sequence, Object[] additionalContext, double minSequenceScore,
+ BeamSearchContextGenerator<T> cg, SequenceValidator<T> validator) {
+ // TODO: How to implement min score filtering here?
+ return bestSequences(numSequences, sequence, additionalContext, cg,
validator);
+ }
+
public opennlp.tools.util.Sequence[] bestSequences(int numSequences,
T[] sequence, Object[] additionalContext,
BeamSearchContextGenerator<T> cg, SequenceValidator<T> validator) {
@@ -121,4 +128,20 @@ public class TransducerModel<T> implemen
public Class<?> getArtifactSerializerClass() {
return TransducerModelSerializer.class;
}
+
+
+
+ @Override
+ public String[] getOutcomes() {
+
+ Alphabet targetAlphabet = model.getInputPipe().getTargetAlphabet();
+
+ String outcomes[] = new String[targetAlphabet.size()];
+
+ for (int i = 0; i < targetAlphabet.size(); i++) {
+ outcomes[i] = targetAlphabet.lookupObject(i).toString();
+ }
+
+ return outcomes;
+ }
}