This is an automated email from the ASF dual-hosted git repository.
kinow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/master by this push:
new df91f275 OPENNLP-1401 Enhance JavaDoc in opennlp.tools.chunker package
df91f275 is described below
commit df91f275c15f0abd9c300fcc42dfa662615ab6f9
Author: Martin Wiesner <[email protected]>
AuthorDate: Sun Dec 4 10:13:27 2022 +0100
OPENNLP-1401 Enhance JavaDoc in opennlp.tools.chunker package
- adds missing JavaDoc
- improves existing documentation for clarity
- removes superfluous text
- fixes typos
- adds 'final' modifier where useful and applicable
- adds 'Override' annotation where useful and applicable
- removes deprecated and unused constructor in ChunkerME
---
.../java/opennlp/tools/chunker/ChunkSample.java | 51 +++++++-------
.../tools/chunker/ChunkSampleSequenceStream.java | 15 +++-
.../opennlp/tools/chunker/ChunkSampleStream.java | 1 +
.../tools/chunker/ChunkerContextGenerator.java | 18 ++---
.../tools/chunker/ChunkerCrossValidator.java | 33 ++++-----
.../tools/chunker/ChunkerEvaluationMonitor.java | 3 +
.../opennlp/tools/chunker/ChunkerEvaluator.java | 25 +++----
.../opennlp/tools/chunker/ChunkerEventStream.java | 9 +--
.../main/java/opennlp/tools/chunker/ChunkerME.java | 77 +++++++++------------
.../java/opennlp/tools/chunker/ChunkerModel.java | 80 +++++++++++++++++++---
.../chunker/DefaultChunkerContextGenerator.java | 14 ++--
.../chunker/DefaultChunkerSequenceValidator.java | 6 +-
.../util/eval/CrossValidationPartitioner.java | 12 ++--
13 files changed, 207 insertions(+), 137 deletions(-)
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java
index 216114e5..57a11768 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java
@@ -37,33 +37,29 @@ public class ChunkSample implements Sample {
private final List<String> preds;
/**
- * Initializes the current instance.
+ * Initializes a {@link ChunkSample} instance.
*
* @param sentence
- * training sentence
+ * The training sentence.
* @param tags
- * POS Tags for the sentence
+ * The POS tags for the {@code sentence}.
* @param preds
- * Chunk tags in B-* I-* notation
+ * The chunk tags in B-* I-* notation.
*/
public ChunkSample(String[] sentence, String[] tags, String[] preds) {
- validateArguments(sentence.length, tags.length, preds.length);
-
- this.sentence = Collections.unmodifiableList(new
ArrayList<>(Arrays.asList(sentence)));
- this.tags = Collections.unmodifiableList(new
ArrayList<>(Arrays.asList(tags)));
- this.preds = Collections.unmodifiableList(new
ArrayList<>(Arrays.asList(preds)));
+ this(Arrays.asList(sentence), Arrays.asList(tags), Arrays.asList(preds));
}
/**
- * Initializes the current instance.
+ * Initializes a {@link ChunkSample} instance.
*
* @param sentence
- * training sentence
+ * The training sentence.
* @param tags
- * POS Tags for the sentence
+ * The POS tags for the {@code sentence}.
* @param preds
- * Chunk tags in B-* I-* notation
+ * The chunk tags in B-* I-* notation.
*/
public ChunkSample(List<String> sentence, List<String> tags, List<String>
preds) {
@@ -74,40 +70,47 @@ public class ChunkSample implements Sample {
this.preds = Collections.unmodifiableList(new ArrayList<>(preds));
}
- /** Gets the training sentence */
+ /**
+ * @return Retrieves the training sentence.
+ */
public String[] getSentence() {
return sentence.toArray(new String[sentence.size()]);
}
- /** Gets the POS Tags for the sentence */
+ /**
+ * @return Retrieves the POS Tags for the sentence.
+ */
public String[] getTags() {
return tags.toArray(new String[tags.size()]);
}
- /** Gets the Chunk tags in B-* I-* notation */
+ /**
+ * @return Retrieves the chunk tags in B-* I-* notation.
+ */
public String[] getPreds() {
return preds.toArray(new String[preds.size()]);
}
- /** Gets the phrases as an array of spans */
+ /**
+ * @return the phrases as an array of spans.
+ */
public Span[] getPhrasesAsSpanList() {
return phrasesAsSpanList(getSentence(), getTags(), getPreds());
}
/**
- * Static method to create arrays of spans of phrases
+ * Create arrays of {@link Span spans} of phrases.
*
* @param aSentence
- * training sentence
+ * The training sentence.
* @param aTags
- * POS Tags for the sentence
+ * The POS tags for the {@code sentence}.
* @param aPreds
- * Chunk tags in B-* I-* notation
+ * The chunk tags in B-* I-* notation.
*
* @return the phrases as an array of spans
*/
- public static Span[] phrasesAsSpanList(String[] aSentence, String[] aTags,
- String[] aPreds) {
+ public static Span[] phrasesAsSpanList(String[] aSentence, String[] aTags,
String[] aPreds) {
validateArguments(aSentence.length, aTags.length, aPreds.length);
@@ -160,7 +163,7 @@ public class ChunkSample implements Sample {
* [PP for_IN ] [NP the_DT planes_NNS ] ._.
* </code>
*
- * @return a nice to read string representation of the chunk phases
+ * @return A nice to read string representation of the chunk phases
*/
public String nicePrint() {
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
index 4a6d2d51..d54fd456 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
@@ -25,15 +25,24 @@ import opennlp.tools.ml.model.Sequence;
import opennlp.tools.ml.model.SequenceStream;
import opennlp.tools.util.ObjectStream;
+/**
+ * A {@link SequenceStream} implementation encapsulating {@link ChunkSample
samples}.
+ */
public class ChunkSampleSequenceStream implements SequenceStream<ChunkSample> {
private final ObjectStream<ChunkSample> samples;
private final ChunkerContextGenerator contextGenerator;
- public ChunkSampleSequenceStream(ObjectStream<ChunkSample> samples,
- ChunkerContextGenerator contextGenerator) {
+ /**
+ * Creates a {@link ChunkSampleSequenceStream} with given {@code samples}
using
+ * a {@link ChunkerContextGenerator}.
+ *
+ * @param samples The data stream of {@link ChunkSample samples}.
+ * @param generator A {@link ChunkerContextGenerator} which shall be used.
+ */
+ public ChunkSampleSequenceStream(ObjectStream<ChunkSample> samples,
ChunkerContextGenerator generator) {
this.samples = samples;
- this.contextGenerator = contextGenerator;
+ this.contextGenerator = generator;
}
@Override
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java
index 9a074ee5..3d70fa13 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java
@@ -42,6 +42,7 @@ public class ChunkSampleStream extends
FilterObjectStream<String, ChunkSample> {
super(samples);
}
+ @Override
public ChunkSample read() throws IOException {
List<String> toks = new ArrayList<>();
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java
index b666ad33..f5d84b5d 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java
@@ -21,19 +21,21 @@ import opennlp.tools.util.BeamSearchContextGenerator;
import opennlp.tools.util.TokenTag;
/**
- * Interface for the context generator used in syntactic chunking.
+ * Interface for a {@link BeamSearchContextGenerator} used in syntactic
chunking.
*/
public interface ChunkerContextGenerator extends
BeamSearchContextGenerator<TokenTag> {
/**
- * Returns the contexts for chunking of the specified index.
- * @param i The index of the token in the specified toks array for which the
context should be constructed.
- * @param toks The tokens of the sentence. The <code>toString</code>
methods of these objects
+ * Retrieves the contexts for chunking of the specified {@code idx}.
+ *
+ * @param idx The index of the token in the specified {@code toks} array for
which the context
+ * should be constructed.
+ * @param toks The tokens of the sentence. The {@code toString} methods of
these objects
* should return the token text.
- * @param tags The POS tags for the the specified tokens.
- * @param preds The previous decisions made in the taging of this sequence.
- * Only indices less than i will be examined.
+ * @param tags The POS tags for the specified tokens.
+ * @param preds The previous decisions made in the tagging of this sequence.
+ * Only indices less than {@code idx} will be examined.
* @return An array of predictive contexts on which a model basis its
decisions.
*/
- String[] getContext(int i, String[] toks, String[] tags, String[] preds);
+ String[] getContext(int idx, String[] toks, String[] tags, String[] preds);
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
index 2a193f72..41b54cd5 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
@@ -29,10 +29,18 @@ public class ChunkerCrossValidator {
private final String languageCode;
private final TrainingParameters params;
- private FMeasure fmeasure = new FMeasure();
- private ChunkerEvaluationMonitor[] listeners;
- private ChunkerFactory chunkerFactory;
+ private final FMeasure fmeasure = new FMeasure();
+ private final ChunkerEvaluationMonitor[] listeners;
+ private final ChunkerFactory chunkerFactory;
+ /**
+ * Initializes a {@link ChunkerModel} instance via given parameters.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param factory The {@link ChunkerFactory} for creating related objects.
+ * @param params The {@link TrainingParameters} for the context of cross
validation.
+ * @param listeners the {@link ChunkerEvaluationMonitor evaluation
listeners}.
+ */
public ChunkerCrossValidator(String languageCode, TrainingParameters params,
ChunkerFactory factory, ChunkerEvaluationMonitor... listeners) {
this.chunkerFactory = factory;
@@ -44,29 +52,22 @@ public class ChunkerCrossValidator {
/**
* Starts the evaluation.
*
- * @param samples
- * the data to train and test
- * @param nFolds
- * number of folds
+ * @param samples The {@link ObjectStream} of {@link ChunkSample samples} to
train and test with.
+ * @param nFolds Number of folds. It must be greater than zero.
*
- * @throws IOException
+ * @throws IOException Thrown if IO errors occurred.
*/
- public void evaluate(ObjectStream<ChunkSample> samples, int nFolds)
- throws IOException {
- CrossValidationPartitioner<ChunkSample> partitioner = new
CrossValidationPartitioner<>(
- samples, nFolds);
+ public void evaluate(ObjectStream<ChunkSample> samples, int nFolds) throws
IOException {
+ CrossValidationPartitioner<ChunkSample> partitioner = new
CrossValidationPartitioner<>(samples, nFolds);
while (partitioner.hasNext()) {
- CrossValidationPartitioner.TrainingSampleStream<ChunkSample>
trainingSampleStream = partitioner
- .next();
-
+ CrossValidationPartitioner.TrainingSampleStream<ChunkSample>
trainingSampleStream = partitioner.next();
ChunkerModel model = ChunkerME.train(languageCode, trainingSampleStream,
params, chunkerFactory);
// do testing
ChunkerEvaluator evaluator = new ChunkerEvaluator(new ChunkerME(model),
listeners);
-
evaluator.evaluate(trainingSampleStream.getTestSampleStream());
fmeasure.mergeInto(evaluator.getFMeasure());
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluationMonitor.java
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluationMonitor.java
index 6aec250f..273e8d71 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluationMonitor.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluationMonitor.java
@@ -19,6 +19,9 @@ package opennlp.tools.chunker;
import opennlp.tools.util.eval.EvaluationMonitor;
+/**
+ * A marker interface for evaluating {@link Chunker chunkers}.
+ */
public interface ChunkerEvaluationMonitor extends
EvaluationMonitor<ChunkSample> {
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluator.java
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluator.java
index 24f3f85e..3c8fa3ed 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluator.java
@@ -22,9 +22,8 @@ import opennlp.tools.util.eval.Evaluator;
import opennlp.tools.util.eval.FMeasure;
/**
- * The {@link ChunkerEvaluator} measures the performance
- * of the given {@link Chunker} with the provided
- * reference {@link ChunkSample}s.
+ * The {@link ChunkerEvaluator} measures the performance of the given {@link
Chunker} with the provided
+ * reference {@link ChunkSample samples}.
*
* @see Evaluator
* @see Chunker
@@ -32,20 +31,18 @@ import opennlp.tools.util.eval.FMeasure;
*/
public class ChunkerEvaluator extends Evaluator<ChunkSample> {
- private FMeasure fmeasure = new FMeasure();
+ private final FMeasure fmeasure = new FMeasure();
/**
- * The {@link Chunker} used to create the predicted
- * {@link ChunkSample} objects.
+ * The {@link Chunker} used to create the predicted {@link ChunkSample}
objects.
*/
- private Chunker chunker;
+ private final Chunker chunker;
/**
- * Initializes the current instance with the given
- * {@link Chunker}.
+ * Initializes the current instance with the given {@link Chunker}.
*
* @param chunker the {@link Chunker} to evaluate.
- * @param listeners evaluation listeners
+ * @param listeners the {@link ChunkerEvaluationMonitor evaluation
listeners}.
*/
public ChunkerEvaluator(Chunker chunker, ChunkerEvaluationMonitor...
listeners) {
super(listeners);
@@ -55,14 +52,12 @@ public class ChunkerEvaluator extends
Evaluator<ChunkSample> {
/**
* Evaluates the given reference {@link ChunkSample} object.
*
- * This is done by finding the phrases with the
- * {@link Chunker} in the sentence from the reference
- * {@link ChunkSample}. The found phrases are then used to
- * calculate and update the scores.
+ * This is done by finding the phrases with the {@link Chunker} in the
sentence from the reference
+ * {@link ChunkSample}. The found phrases are then used to calculate and
update the scores.
*
* @param reference the reference {@link ChunkSample}.
*
- * @return the predicted sample
+ * @return The predicted {@link ChunkSample}.
*/
@Override
protected ChunkSample processSample(ChunkSample reference) {
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java
index d72d1158..2a2fc1f3 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java
@@ -27,16 +27,17 @@ import opennlp.tools.util.AbstractEventStream;
import opennlp.tools.util.ObjectStream;
/**
- * Class for creating an event stream out of data files for training a chunker.
+ * Class for creating an event stream out of data files for training a {@link
Chunker}.
*/
public class ChunkerEventStream extends AbstractEventStream<ChunkSample> {
- private ChunkerContextGenerator cg;
+ private final ChunkerContextGenerator cg;
/**
- * Creates a new event stream based on the specified data stream using the
specified context generator.
+ * Creates a new event stream based on the specified data stream using the
given context generator.
+ *
* @param d The data stream for this event stream.
- * @param cg The context generator which should be used in the creation of
events for this event stream.
+ * @param cg A {@link ChunkerContextGenerator} which should be used for the
event stream {@code d}.
*/
public ChunkerEventStream(ObjectStream<ChunkSample> d,
ChunkerContextGenerator cg) {
super(d);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
index 212a1ec0..ebaf6f62 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
@@ -39,7 +39,7 @@ import opennlp.tools.util.TokenTag;
import opennlp.tools.util.TrainingParameters;
/**
- * The class represents a maximum-entropy-based chunker. Such a chunker can be
used to
+ * The class represents a maximum-entropy-based {@link Chunker}. This chunker
can be used to
* find flat structures based on sequence inputs such as noun phrases or named
entities.
*/
public class ChunkerME implements Chunker {
@@ -60,38 +60,10 @@ public class ChunkerME implements Chunker {
* Initializes the current instance with the specified model and
* the specified beam size.
*
- * @param model The model for this chunker.
+ * @param model The model for this {@link Chunker}.
* @param beamSize The size of the beam that should be used when decoding
sequences.
- * @param sequenceValidator The {@link SequenceValidator} to determines
whether the outcome
- * is valid for the preceding sequence. This can be used to implement
constraints
- * on what sequences are valid.
- * @deprecated Use {@link #ChunkerME(ChunkerModel, int)} instead and use the
{@link ChunkerFactory}
- * to configure the {@link SequenceValidator} and {@link
ChunkerContextGenerator}.
- */
- @Deprecated
- private ChunkerME(ChunkerModel model, int beamSize,
SequenceValidator<TokenTag> sequenceValidator,
- ChunkerContextGenerator contextGenerator) {
-
- this.sequenceValidator = sequenceValidator;
- this.contextGenerator = contextGenerator;
-
- if (model.getChunkerSequenceModel() != null) {
- this.model = model.getChunkerSequenceModel();
- }
- else {
- this.model = new opennlp.tools.ml.BeamSearch<>(beamSize,
- model.getChunkerModel(), 0);
- }
- }
-
- /**
- * Initializes the current instance with the specified model and
- * the specified beam size.
*
- * @param model The model for this chunker.
- * @param beamSize The size of the beam that should be used when decoding
sequences.
- *
- * @deprecated beam size is now stored inside the model
+ * @deprecated {@code beamSize} is now stored inside the model
*/
@Deprecated
private ChunkerME(ChunkerModel model, int beamSize) {
@@ -109,7 +81,8 @@ public class ChunkerME implements Chunker {
}
/**
- * Initializes the chunker by downloading a default model.
+ * Initializes the {@link Chunker} by downloading a default model.
+ *
* @param language The language of the model.
* @throws IOException Thrown if the model cannot be downloaded or saved.
*/
@@ -119,15 +92,16 @@ public class ChunkerME implements Chunker {
}
/**
- * Initializes the current instance with the specified model.
- * The default beam size is used.
+ * Initializes the current instance with the specified {@link ChunkerModel}.
+ * The {@link #DEFAULT_BEAM_SIZE} is used.
*
- * @param model
+ * @param model A valid {@link ChunkerModel model} instance.
*/
public ChunkerME(ChunkerModel model) {
this(model, DEFAULT_BEAM_SIZE);
}
+ @Override
public String[] chunk(String[] toks, String[] tags) {
TokenTag[] tuples = TokenTag.create(toks, tags);
bestSequence = model.bestSequence(tuples, new Object[] {},
contextGenerator, sequenceValidator);
@@ -135,11 +109,13 @@ public class ChunkerME implements Chunker {
return c.toArray(new String[c.size()]);
}
+ @Override
public Span[] chunkAsSpans(String[] toks, String[] tags) {
String[] preds = chunk(toks, tags);
return ChunkSample.phrasesAsSpanList(toks, tags, preds);
}
+ @Override
public Sequence[] topKSequences(String[] sentence, String[] tags) {
TokenTag[] tuples = TokenTag.create(sentence, tags);
@@ -147,6 +123,7 @@ public class ChunkerME implements Chunker {
new Object[] { }, contextGenerator, sequenceValidator);
}
+ @Override
public Sequence[] topKSequences(String[] sentence, String[] tags, double
minSequenceScore) {
TokenTag[] tuples = TokenTag.create(sentence, tags);
return model.bestSequences(DEFAULT_BEAM_SIZE, tuples, new Object[] { },
minSequenceScore,
@@ -154,10 +131,10 @@ public class ChunkerME implements Chunker {
}
/**
- * Populates the specified array with the probabilities of the last decoded
sequence. The
- * sequence was determined based on the previous call to <code>chunk</code>.
The
- * specified array should be at least as large as the numbe of tokens in the
previous
- * call to <code>chunk</code>.
+ * Populates the specified array with the probabilities of the last decoded
sequence. The
+ * sequence was determined based on the previous call to {@code chunk}. The
+ * specified array should be at least as large as the number of tokens in
the previous
+ * call to {@code chunk}.
*
* @param probs An array used to hold the probabilities of the last decoded
sequence.
*/
@@ -166,15 +143,27 @@ public class ChunkerME implements Chunker {
}
/**
- * Returns an array with the probabilities of the last decoded sequence. The
- * sequence was determined based on the previous call to <code>chunk</code>.
- * @return An array with the same number of probabilities as tokens were
sent to <code>chunk</code>
- * when it was last called.
+ * Returns an array with the probabilities of the last decoded sequence. The
+ * sequence was determined based on the previous call to {@code chunk}.
+ *
+ * @return An array with the same number of probabilities as tokens when
+ * {@link ChunkerME#chunk(String[], String[])} was last called.
*/
public double[] probs() {
return bestSequence.getProbs();
}
+ /**
+ * Start a training of a {@link ChunkerModel} with the given parameters.
+ *
+ * @param lang The ISO conform language code.
+ * @param in The {@link ObjectStream} of {@link ChunkSample} used as input
for training.
+ * @param mlParams The {@link TrainingParameters} for the context of the
training.
+ * @param factory The {@link ChunkerFactory} for creating related objects
defined via {@code mlParams}.
+ *
+ * @return A valid, trained {@link ChunkerModel} instance.
+ * @throws IOException Thrown if IO errors occurred.
+ */
public static ChunkerModel train(String lang, ObjectStream<ChunkSample> in,
TrainingParameters mlParams, ChunkerFactory factory) throws IOException {
@@ -183,8 +172,6 @@ public class ChunkerME implements Chunker {
Map<String, String> manifestInfoEntries = new HashMap<>();
TrainerType trainerType = TrainerFactory.getTrainerType(mlParams);
-
-
MaxentModel chunkerModel = null;
SequenceClassificationModel<String> seqChunkerModel = null;
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
index 4ab01012..71f37c3f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
@@ -36,8 +36,7 @@ import opennlp.tools.util.TokenTag;
import opennlp.tools.util.model.BaseModel;
/**
- * The {@link ChunkerModel} is the model used
- * by a learnable {@link Chunker}.
+ * The {@link ChunkerModel} is the model used by a learnable {@link Chunker}.
*
* @see ChunkerME
*/
@@ -47,6 +46,14 @@ public class ChunkerModel extends BaseModel {
private static final String COMPONENT_NAME = "ChunkerME";
private static final String CHUNKER_MODEL_ENTRY_NAME = "chunker.model";
+ /**
+ * Initializes a {@link ChunkerModel} instance via given parameters.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param chunkerModel A valid {@link SequenceClassificationModel}.
+ * @param manifestInfoEntries Additional information kept in the manifest.
+ * @param factory The {@link ChunkerFactory} for creating related objects.
+ */
public ChunkerModel(String languageCode, SequenceClassificationModel<String>
chunkerModel,
Map<String, String> manifestInfoEntries, ChunkerFactory factory) {
super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
@@ -54,11 +61,28 @@ public class ChunkerModel extends BaseModel {
checkArtifactMap();
}
+ /**
+ * Initializes a {@link ChunkerModel} instance via given parameters.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param chunkerModel A valid {@link MaxentModel}.
+ * @param manifestInfoEntries Additional information kept in the manifest.
+ * @param factory The {@link ChunkerFactory} for creating related objects.
+ */
public ChunkerModel(String languageCode, MaxentModel chunkerModel,
Map<String, String> manifestInfoEntries, ChunkerFactory factory) {
this(languageCode, chunkerModel, ChunkerME.DEFAULT_BEAM_SIZE,
manifestInfoEntries, factory);
}
+ /**
+ * Initializes a {@link ChunkerModel} instance via given parameters.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param chunkerModel A valid {@link MaxentModel}.
+ * @param beamSize The size of the beam that should be used when decoding
sequences.
+ * @param manifestInfoEntries Additional information kept in the manifest.
+ * @param factory The {@link ChunkerFactory} for creating related objects.
+ */
public ChunkerModel(String languageCode, MaxentModel chunkerModel, int
beamSize,
Map<String, String> manifestInfoEntries, ChunkerFactory factory) {
super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
@@ -70,23 +94,58 @@ public class ChunkerModel extends BaseModel {
checkArtifactMap();
}
+ /**
+ * Initializes a {@link ChunkerModel} instance via given parameters.
+ *
+ * @param languageCode An ISO conform language code.
+ * @param chunkerModel A valid {@link MaxentModel}.
+ * @param factory The {@link ChunkerFactory} for creating related objects.
+ */
public ChunkerModel(String languageCode, MaxentModel chunkerModel,
ChunkerFactory factory) {
this(languageCode, chunkerModel, null, factory);
}
- public ChunkerModel(InputStream in) throws IOException,
InvalidFormatException {
+ /**
+ * Initializes a {@link ChunkerModel} instance via a valid {@link
InputStream}.
+ *
+ * @param in The {@link InputStream} used for loading the model.
+ *
+ * @throws IOException Thrown if IO errors occurred during initialization.
+ */
+ public ChunkerModel(InputStream in) throws IOException {
super(COMPONENT_NAME, in);
}
- public ChunkerModel(File modelFile) throws IOException,
InvalidFormatException {
+ /**
+ * Initializes a {@link ChunkerModel} instance via a valid {@link File}.
+ *
+ * @param modelFile The {@link File} used for loading the model.
+ *
+ * @throws IOException Thrown if IO errors occurred during initialization.
+ */
+ public ChunkerModel(File modelFile) throws IOException {
super(COMPONENT_NAME, modelFile);
}
- public ChunkerModel(Path modelPath) throws IOException,
InvalidFormatException {
+ /**
+ * Initializes a {@link ChunkerModel} instance via a valid {@link Path}.
+ *
+ * @param modelPath The {@link Path} used for loading the model.
+ *
+ * @throws IOException Thrown if IO errors occurred during initialization.
+ */
+ public ChunkerModel(Path modelPath) throws IOException {
this(modelPath.toFile());
}
- public ChunkerModel(URL modelURL) throws IOException, InvalidFormatException
{
+ /**
+ * Initializes a {@link ChunkerModel} instance via a valid {@link URL}.
+ *
+ * @param modelURL The {@link URL} used for loading the model.
+ *
+ * @throws IOException Thrown if IO errors occurred during initialization.
+ */
+ public ChunkerModel(URL modelURL) throws IOException {
super(COMPONENT_NAME, modelURL);
}
@@ -111,7 +170,7 @@ public class ChunkerModel extends BaseModel {
}
/**
- * @deprecated use getChunkerSequenceModel instead. This method will be
removed soon.
+ * @deprecated use {@link ChunkerModel#getChunkerSequenceModel()} instead.
This method will be removed soon.
*/
@Deprecated
public MaxentModel getChunkerModel() {
@@ -123,6 +182,9 @@ public class ChunkerModel extends BaseModel {
}
}
+ /**
+ * @return Retrieves a {@link SequenceClassificationModel}.
+ */
public SequenceClassificationModel<TokenTag> getChunkerSequenceModel() {
Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
@@ -150,7 +212,9 @@ public class ChunkerModel extends BaseModel {
return ChunkerFactory.class;
}
-
+ /**
+ * @return Retrieves the active {@link ChunkerFactory}.
+ */
public ChunkerFactory getFactory() {
return (ChunkerFactory) this.toolFactory;
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java
b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java
index 74e4a77a..b715ccbc 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java
@@ -20,14 +20,15 @@ package opennlp.tools.chunker;
import opennlp.tools.util.TokenTag;
-/** Features based on chunking model described in Fei Sha and Fernando
Pereira. Shallow
- * parsing with conditional random fields. In Proceedings of HLT-NAACL 2003.
Association
- * for Computational Linguistics, 2003.
+/**
+ * Features based on chunking model described in Fei Sha and Fernando Pereira.
Shallow
+ * parsing with conditional random fields. In Proceedings of HLT-NAACL 2003.
Association
+ * for Computational Linguistics, 2003.
*/
public class DefaultChunkerContextGenerator implements ChunkerContextGenerator
{
/**
- * Creates the default context generator a chunker.
+ * Initializes a {@link DefaultChunkerContextGenerator} instance.
*/
public DefaultChunkerContextGenerator() {
}
@@ -37,6 +38,7 @@ public class DefaultChunkerContextGenerator implements
ChunkerContextGenerator {
return getContext(index, tokens, postags, priorDecisions);
}
+ @Override
public String[] getContext(int i, String[] toks, String[] tags, String[]
preds) {
// Words in a 5-word window
String w_2, w_1, w0, w1, w2;
@@ -90,7 +92,7 @@ public class DefaultChunkerContextGenerator implements
ChunkerContextGenerator {
t2 = "t2=" + tags[i + 2];
}
- String[] features = new String[] {
+ return new String[] {
//add word features
w_2,
w_1,
@@ -142,8 +144,6 @@ public class DefaultChunkerContextGenerator implements
ChunkerContextGenerator {
p_1 + w_1 + w0,
p_1 + w0 + w1
};
-
- return features;
}
@Override
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java
b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java
index e184d278..d6708f41 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java
@@ -20,6 +20,9 @@ package opennlp.tools.chunker;
import opennlp.tools.util.SequenceValidator;
import opennlp.tools.util.TokenTag;
+/**
+ * The default chunker {@link SequenceValidator} implementation.
+ */
public class DefaultChunkerSequenceValidator implements
SequenceValidator<TokenTag> {
private boolean validOutcome(String outcome, String prevOutcome) {
@@ -37,7 +40,7 @@ public class DefaultChunkerSequenceValidator implements
SequenceValidator<TokenT
return true;
}
- protected boolean validOutcome(String outcome, String[] sequence) {
+ private boolean validOutcome(String outcome, String[] sequence) {
String prevOutcome = null;
if (sequence.length > 0) {
prevOutcome = sequence[sequence.length - 1];
@@ -45,6 +48,7 @@ public class DefaultChunkerSequenceValidator implements
SequenceValidator<TokenT
return validOutcome(outcome,prevOutcome);
}
+ @Override
public boolean validSequence(int i, TokenTag[] sequence, String[] s, String
outcome) {
return validOutcome(outcome, s);
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/util/eval/CrossValidationPartitioner.java
b/opennlp-tools/src/main/java/opennlp/tools/util/eval/CrossValidationPartitioner.java
index 2de72d94..61fd860c 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/util/eval/CrossValidationPartitioner.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/util/eval/CrossValidationPartitioner.java
@@ -33,7 +33,7 @@ import opennlp.tools.util.ObjectStream;
* and the training / evaluation is performed n times on these parts.
* The training partition always consists of n -1 parts and one part is used
for testing.
* <p>
- * To use the <code>CrossValidationPartioner</code> a client iterates over the
n
+ * To use the <code>CrossValidationPartitioner</code> a client iterates over
the n
* <code>TrainingSampleStream</code>s. Each <code>TrainingSampleStream</code>
represents
* one partition and is used first for training and afterwards for testing.
* The <code>TestSampleStream</code> can be obtained from the
<code>TrainingSampleStream</code>
@@ -48,7 +48,7 @@ public class CrossValidationPartitioner<E> {
*/
private static class TestSampleStream<E> implements ObjectStream<E> {
- private ObjectStream<E> sampleStream;
+ private final ObjectStream<E> sampleStream;
private final int numberOfPartitions;
@@ -116,7 +116,7 @@ public class CrossValidationPartitioner<E> {
*/
public static class TrainingSampleStream<E> implements ObjectStream<E> {
- private ObjectStream<E> sampleStream;
+ private final ObjectStream<E> sampleStream;
private final int numberOfPartitions;
@@ -206,7 +206,7 @@ public class CrossValidationPartitioner<E> {
* An <code>ObjectStream</code> over the whole set of data samples which
* are used for the cross validation.
*/
- private ObjectStream<E> sampleStream;
+ private final ObjectStream<E> sampleStream;
/**
* The number of parts the data is divided into.
@@ -279,7 +279,7 @@ public class CrossValidationPartitioner<E> {
@Override
public String toString() {
- return "At partition" + Integer.toString(testIndex + 1) +
- " of " + Integer.toString(numberOfPartitions);
+ return "At partition" + (testIndex + 1) +
+ " of " + numberOfPartitions;
}
}