[opennlp] branch master updated: OPENNLP-1401 Enhance JavaDoc in opennlp.tools.chunker package

kinow Sun, 04 Dec 2022 01:30:48 -0800

This is an automated email from the ASF dual-hosted git repository.

kinow pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git



The following commit(s) were added to refs/heads/master by this push:
     new df91f275 OPENNLP-1401 Enhance JavaDoc in opennlp.tools.chunker package
df91f275 is described below

commit df91f275c15f0abd9c300fcc42dfa662615ab6f9
Author: Martin Wiesner <[email protected]>
AuthorDate: Sun Dec 4 10:13:27 2022 +0100

    OPENNLP-1401 Enhance JavaDoc in opennlp.tools.chunker package
    
    - adds missing JavaDoc
    - improves existing documentation for clarity
    - removes superfluous text
    - fixes typos
    - adds 'final' modifier where useful and applicable
    - adds 'Override' annotation where useful and applicable
    - removes deprecated and unused constructor in ChunkerME
---
 .../java/opennlp/tools/chunker/ChunkSample.java    | 51 +++++++-------
 .../tools/chunker/ChunkSampleSequenceStream.java   | 15 +++-
 .../opennlp/tools/chunker/ChunkSampleStream.java   |  1 +
 .../tools/chunker/ChunkerContextGenerator.java     | 18 ++---
 .../tools/chunker/ChunkerCrossValidator.java       | 33 ++++-----
 .../tools/chunker/ChunkerEvaluationMonitor.java    |  3 +
 .../opennlp/tools/chunker/ChunkerEvaluator.java    | 25 +++----
 .../opennlp/tools/chunker/ChunkerEventStream.java  |  9 +--
 .../main/java/opennlp/tools/chunker/ChunkerME.java | 77 +++++++++------------
 .../java/opennlp/tools/chunker/ChunkerModel.java   | 80 +++++++++++++++++++---
 .../chunker/DefaultChunkerContextGenerator.java    | 14 ++--
 .../chunker/DefaultChunkerSequenceValidator.java   |  6 +-
 .../util/eval/CrossValidationPartitioner.java      | 12 ++--
 13 files changed, 207 insertions(+), 137 deletions(-)

diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java
index 216114e5..57a11768 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSample.java
@@ -37,33 +37,29 @@ public class ChunkSample implements Sample {
   private final List<String> preds;
 
   /**
-   * Initializes the current instance.
+   * Initializes a {@link ChunkSample} instance.
    *
    * @param sentence
-   *          training sentence
+   *          The training sentence.
    * @param tags
-   *          POS Tags for the sentence
+   *          The POS tags for the {@code sentence}.
    * @param preds
-   *          Chunk tags in B-* I-* notation
+   *          The chunk tags in B-* I-* notation.
    */
   public ChunkSample(String[] sentence, String[] tags, String[] preds) {
 
-    validateArguments(sentence.length, tags.length, preds.length);
-
-    this.sentence = Collections.unmodifiableList(new 
ArrayList<>(Arrays.asList(sentence)));
-    this.tags = Collections.unmodifiableList(new 
ArrayList<>(Arrays.asList(tags)));
-    this.preds = Collections.unmodifiableList(new 
ArrayList<>(Arrays.asList(preds)));
+    this(Arrays.asList(sentence), Arrays.asList(tags), Arrays.asList(preds));
   }
 
   /**
-   * Initializes the current instance.
+   * Initializes a {@link ChunkSample} instance.
    *
    * @param sentence
-   *          training sentence
+   *          The training sentence.
    * @param tags
-   *          POS Tags for the sentence
+   *          The POS tags for the {@code sentence}.
    * @param preds
-   *          Chunk tags in B-* I-* notation
+   *          The chunk tags in B-* I-* notation.
    */
   public ChunkSample(List<String> sentence, List<String> tags, List<String> 
preds) {
 
@@ -74,40 +70,47 @@ public class ChunkSample implements Sample {
     this.preds = Collections.unmodifiableList(new ArrayList<>(preds));
   }
 
-  /** Gets the training sentence */
+  /**
+   * @return Retrieves the training sentence.
+   */
   public String[] getSentence() {
     return sentence.toArray(new String[sentence.size()]);
   }
 
-  /** Gets the POS Tags for the sentence */
+  /**
+   * @return Retrieves the POS Tags for the sentence.
+   */
   public String[] getTags() {
     return tags.toArray(new String[tags.size()]);
   }
 
-  /** Gets the Chunk tags in B-* I-* notation */
+  /**
+   * @return Retrieves the chunk tags in B-* I-* notation.
+   */
   public String[] getPreds() {
     return preds.toArray(new String[preds.size()]);
   }
 
-  /** Gets the phrases as an array of spans */
+  /**
+   * @return the phrases as an array of spans.
+   */
   public Span[] getPhrasesAsSpanList() {
     return phrasesAsSpanList(getSentence(), getTags(), getPreds());
   }
 
   /**
-   * Static method to create arrays of spans of phrases
+   * Create arrays of {@link Span spans} of phrases.
    *
    * @param aSentence
-   *          training sentence
+   *          The training sentence.
    * @param aTags
-   *          POS Tags for the sentence
+   *          The POS tags for the {@code sentence}.
    * @param aPreds
-   *          Chunk tags in B-* I-* notation
+   *          The chunk tags in B-* I-* notation.
    *
    * @return the phrases as an array of spans
    */
-  public static Span[] phrasesAsSpanList(String[] aSentence, String[] aTags,
-      String[] aPreds) {
+  public static Span[] phrasesAsSpanList(String[] aSentence, String[] aTags, 
String[] aPreds) {
 
     validateArguments(aSentence.length, aTags.length, aPreds.length);
 
@@ -160,7 +163,7 @@ public class ChunkSample implements Sample {
    * [PP for_IN ] [NP the_DT planes_NNS ] ._.
    * </code>
    *
-   * @return a nice to read string representation of the chunk phases
+   * @return A nice to read string representation of the chunk phases
    */
   public String nicePrint() {
 
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
index 4a6d2d51..d54fd456 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleSequenceStream.java
@@ -25,15 +25,24 @@ import opennlp.tools.ml.model.Sequence;
 import opennlp.tools.ml.model.SequenceStream;
 import opennlp.tools.util.ObjectStream;
 
+/**
+ * A {@link SequenceStream} implementation encapsulating {@link ChunkSample 
samples}.
+ */
 public class ChunkSampleSequenceStream implements SequenceStream<ChunkSample> {
 
   private final ObjectStream<ChunkSample> samples;
   private final ChunkerContextGenerator contextGenerator;
 
-  public ChunkSampleSequenceStream(ObjectStream<ChunkSample> samples,
-      ChunkerContextGenerator contextGenerator) {
+  /**
+   * Creates a {@link ChunkSampleSequenceStream} with given {@code samples} 
using
+   * a {@link ChunkerContextGenerator}.
+   *
+   * @param samples The data stream of {@link ChunkSample samples}.
+   * @param generator A {@link ChunkerContextGenerator} which shall be used.
+   */
+  public ChunkSampleSequenceStream(ObjectStream<ChunkSample> samples, 
ChunkerContextGenerator generator) {
     this.samples = samples;
-    this.contextGenerator = contextGenerator;
+    this.contextGenerator = generator;
   }
 
   @Override
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java
index 9a074ee5..3d70fa13 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkSampleStream.java
@@ -42,6 +42,7 @@ public class ChunkSampleStream extends 
FilterObjectStream<String, ChunkSample> {
     super(samples);
   }
 
+  @Override
   public ChunkSample read() throws IOException {
 
     List<String> toks = new ArrayList<>();
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java
 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java
index b666ad33..f5d84b5d 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerContextGenerator.java
@@ -21,19 +21,21 @@ import opennlp.tools.util.BeamSearchContextGenerator;
 import opennlp.tools.util.TokenTag;
 
 /**
- * Interface for the context generator used in syntactic chunking.
+ * Interface for a {@link BeamSearchContextGenerator} used in syntactic 
chunking.
  */
 public interface ChunkerContextGenerator extends 
BeamSearchContextGenerator<TokenTag> {
 
   /**
-   * Returns the contexts for chunking of the specified index.
-   * @param i The index of the token in the specified toks array for which the 
context should be constructed.
-   * @param toks The tokens of the sentence.  The <code>toString</code> 
methods of these objects
+   * Retrieves the contexts for chunking of the specified {@code idx}.
+   *
+   * @param idx The index of the token in the specified {@code toks} array for 
which the context
+   *            should be constructed.
+   * @param toks The tokens of the sentence. The {@code toString} methods of 
these objects
    *             should return the token text.
-   * @param tags The POS tags for the the specified tokens.
-   * @param preds The previous decisions made in the taging of this sequence.
-   *              Only indices less than i will be examined.
+   * @param tags The POS tags for the specified tokens.
+   * @param preds The previous decisions made in the tagging of this sequence.
+   *              Only indices less than {@code idx} will be examined.
    * @return An array of predictive contexts on which a model basis its 
decisions.
    */
-  String[] getContext(int i, String[] toks, String[] tags, String[] preds);
+  String[] getContext(int idx, String[] toks, String[] tags, String[] preds);
 }
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
index 2a193f72..41b54cd5 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerCrossValidator.java
@@ -29,10 +29,18 @@ public class ChunkerCrossValidator {
   private final String languageCode;
   private final TrainingParameters params;
 
-  private FMeasure fmeasure = new FMeasure();
-  private ChunkerEvaluationMonitor[] listeners;
-  private ChunkerFactory chunkerFactory;
+  private final FMeasure fmeasure = new FMeasure();
+  private final ChunkerEvaluationMonitor[] listeners;
+  private final ChunkerFactory chunkerFactory;
 
+  /**
+   * Initializes a {@link ChunkerModel} instance via given parameters.
+   *
+   * @param languageCode An ISO conform language code.
+   * @param factory The {@link ChunkerFactory} for creating related objects.
+   * @param params The {@link TrainingParameters} for the context of cross 
validation.
+   * @param listeners the {@link ChunkerEvaluationMonitor evaluation 
listeners}.
+   */
   public ChunkerCrossValidator(String languageCode, TrainingParameters params,
       ChunkerFactory factory, ChunkerEvaluationMonitor... listeners) {
     this.chunkerFactory = factory;
@@ -44,29 +52,22 @@ public class ChunkerCrossValidator {
   /**
    * Starts the evaluation.
    *
-   * @param samples
-   *          the data to train and test
-   * @param nFolds
-   *          number of folds
+   * @param samples The {@link ObjectStream} of {@link ChunkSample samples} to 
train and test with.
+   * @param nFolds Number of folds. It must be greater than zero.
    *
-   * @throws IOException
+   * @throws IOException Thrown if IO errors occurred.
    */
-  public void evaluate(ObjectStream<ChunkSample> samples, int nFolds)
-      throws IOException {
-    CrossValidationPartitioner<ChunkSample> partitioner = new 
CrossValidationPartitioner<>(
-        samples, nFolds);
+  public void evaluate(ObjectStream<ChunkSample> samples, int nFolds) throws 
IOException {
+    CrossValidationPartitioner<ChunkSample> partitioner = new 
CrossValidationPartitioner<>(samples, nFolds);
 
     while (partitioner.hasNext()) {
 
-      CrossValidationPartitioner.TrainingSampleStream<ChunkSample> 
trainingSampleStream = partitioner
-          .next();
-
+      CrossValidationPartitioner.TrainingSampleStream<ChunkSample> 
trainingSampleStream = partitioner.next();
       ChunkerModel model = ChunkerME.train(languageCode, trainingSampleStream,
           params, chunkerFactory);
 
       // do testing
       ChunkerEvaluator evaluator = new ChunkerEvaluator(new ChunkerME(model), 
listeners);
-
       evaluator.evaluate(trainingSampleStream.getTestSampleStream());
 
       fmeasure.mergeInto(evaluator.getFMeasure());
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluationMonitor.java
 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluationMonitor.java
index 6aec250f..273e8d71 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluationMonitor.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluationMonitor.java
@@ -19,6 +19,9 @@ package opennlp.tools.chunker;
 
 import opennlp.tools.util.eval.EvaluationMonitor;
 
+/**
+ * A marker interface for evaluating {@link Chunker chunkers}.
+ */
 public interface ChunkerEvaluationMonitor extends 
EvaluationMonitor<ChunkSample> {
 
 }
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluator.java 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluator.java
index 24f3f85e..3c8fa3ed 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluator.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEvaluator.java
@@ -22,9 +22,8 @@ import opennlp.tools.util.eval.Evaluator;
 import opennlp.tools.util.eval.FMeasure;
 
 /**
- * The {@link ChunkerEvaluator} measures the performance
- * of the given {@link Chunker} with the provided
- * reference {@link ChunkSample}s.
+ * The {@link ChunkerEvaluator} measures the performance of the given {@link 
Chunker} with the provided
+ * reference {@link ChunkSample samples}.
  *
  * @see Evaluator
  * @see Chunker
@@ -32,20 +31,18 @@ import opennlp.tools.util.eval.FMeasure;
  */
 public class ChunkerEvaluator extends Evaluator<ChunkSample> {
 
-  private FMeasure fmeasure = new FMeasure();
+  private final FMeasure fmeasure = new FMeasure();
 
   /**
-   * The {@link Chunker} used to create the predicted
-   * {@link ChunkSample} objects.
+   * The {@link Chunker} used to create the predicted {@link ChunkSample} 
objects.
    */
-  private Chunker chunker;
+  private final Chunker chunker;
 
   /**
-   * Initializes the current instance with the given
-   * {@link Chunker}.
+   * Initializes the current instance with the given {@link Chunker}.
    *
    * @param chunker the {@link Chunker} to evaluate.
-   * @param listeners evaluation listeners
+   * @param listeners the {@link ChunkerEvaluationMonitor evaluation 
listeners}.
    */
   public ChunkerEvaluator(Chunker chunker, ChunkerEvaluationMonitor... 
listeners) {
     super(listeners);
@@ -55,14 +52,12 @@ public class ChunkerEvaluator extends 
Evaluator<ChunkSample> {
   /**
    * Evaluates the given reference {@link ChunkSample} object.
    *
-   * This is done by finding the phrases with the
-   * {@link Chunker} in the sentence from the reference
-   * {@link ChunkSample}. The found phrases are then used to
-   * calculate and update the scores.
+   * This is done by finding the phrases with the {@link Chunker} in the 
sentence from the reference
+   * {@link ChunkSample}. The found phrases are then used to calculate and 
update the scores.
    *
    * @param reference the reference {@link ChunkSample}.
    *
-   * @return the predicted sample
+   * @return The predicted {@link ChunkSample}.
    */
   @Override
   protected ChunkSample processSample(ChunkSample reference) {
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java
index d72d1158..2a2fc1f3 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerEventStream.java
@@ -27,16 +27,17 @@ import opennlp.tools.util.AbstractEventStream;
 import opennlp.tools.util.ObjectStream;
 
 /**
- * Class for creating an event stream out of data files for training a chunker.
+ * Class for creating an event stream out of data files for training a {@link 
Chunker}.
  */
 public class ChunkerEventStream extends AbstractEventStream<ChunkSample> {
 
-  private ChunkerContextGenerator cg;
+  private final ChunkerContextGenerator cg;
 
   /**
-   * Creates a new event stream based on the specified data stream using the 
specified context generator.
+   * Creates a new event stream based on the specified data stream using the 
given context generator.
+   *
    * @param d The data stream for this event stream.
-   * @param cg The context generator which should be used in the creation of 
events for this event stream.
+   * @param cg A {@link ChunkerContextGenerator} which should be used for the 
event stream {@code d}.
    */
   public ChunkerEventStream(ObjectStream<ChunkSample> d, 
ChunkerContextGenerator cg) {
     super(d);
diff --git a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
index 212a1ec0..ebaf6f62 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerME.java
@@ -39,7 +39,7 @@ import opennlp.tools.util.TokenTag;
 import opennlp.tools.util.TrainingParameters;
 
 /**
- * The class represents a maximum-entropy-based chunker. Such a chunker can be 
used to
+ * The class represents a maximum-entropy-based {@link Chunker}. This chunker 
can be used to
  * find flat structures based on sequence inputs such as noun phrases or named 
entities.
  */
 public class ChunkerME implements Chunker {
@@ -60,38 +60,10 @@ public class ChunkerME implements Chunker {
    * Initializes the current instance with the specified model and
    * the specified beam size.
    *
-   * @param model The model for this chunker.
+   * @param model The model for this {@link Chunker}.
    * @param beamSize The size of the beam that should be used when decoding 
sequences.
-   * @param sequenceValidator  The {@link SequenceValidator} to determines 
whether the outcome
-   *        is valid for the preceding sequence. This can be used to implement 
constraints
-   *        on what sequences are valid.
-   * @deprecated Use {@link #ChunkerME(ChunkerModel, int)} instead and use the 
{@link ChunkerFactory}
-   *     to configure the {@link SequenceValidator} and {@link 
ChunkerContextGenerator}.
-   */
-  @Deprecated
-  private ChunkerME(ChunkerModel model, int beamSize, 
SequenceValidator<TokenTag> sequenceValidator,
-      ChunkerContextGenerator contextGenerator) {
-
-    this.sequenceValidator = sequenceValidator;
-    this.contextGenerator = contextGenerator;
-
-    if (model.getChunkerSequenceModel() != null) {
-      this.model = model.getChunkerSequenceModel();
-    }
-    else {
-      this.model = new opennlp.tools.ml.BeamSearch<>(beamSize,
-          model.getChunkerModel(), 0);
-    }
-  }
-
-  /**
-   * Initializes the current instance with the specified model and
-   * the specified beam size.
    *
-   * @param model The model for this chunker.
-   * @param beamSize The size of the beam that should be used when decoding 
sequences.
-   *
-   * @deprecated beam size is now stored inside the model
+   * @deprecated {@code beamSize} is now stored inside the model
    */
   @Deprecated
   private ChunkerME(ChunkerModel model, int beamSize) {
@@ -109,7 +81,8 @@ public class ChunkerME implements Chunker {
   }
 
   /**
-   * Initializes the chunker by downloading a default model.
+   * Initializes the {@link Chunker} by downloading a default model.
+   *
    * @param language The language of the model.
    * @throws IOException Thrown if the model cannot be downloaded or saved.
    */
@@ -119,15 +92,16 @@ public class ChunkerME implements Chunker {
   }
 
   /**
-   * Initializes the current instance with the specified model.
-   * The default beam size is used.
+   * Initializes the current instance with the specified {@link ChunkerModel}.
+   * The {@link #DEFAULT_BEAM_SIZE} is used.
    *
-   * @param model
+   * @param model A valid {@link ChunkerModel model} instance.
    */
   public ChunkerME(ChunkerModel model) {
     this(model, DEFAULT_BEAM_SIZE);
   }
 
+  @Override
   public String[] chunk(String[] toks, String[] tags) {
     TokenTag[] tuples = TokenTag.create(toks, tags);
     bestSequence = model.bestSequence(tuples, new Object[] {}, 
contextGenerator, sequenceValidator);
@@ -135,11 +109,13 @@ public class ChunkerME implements Chunker {
     return c.toArray(new String[c.size()]);
   }
 
+  @Override
   public Span[] chunkAsSpans(String[] toks, String[] tags) {
     String[] preds = chunk(toks, tags);
     return ChunkSample.phrasesAsSpanList(toks, tags, preds);
   }
 
+  @Override
   public Sequence[] topKSequences(String[] sentence, String[] tags) {
     TokenTag[] tuples = TokenTag.create(sentence, tags);
 
@@ -147,6 +123,7 @@ public class ChunkerME implements Chunker {
         new Object[] { }, contextGenerator, sequenceValidator);
   }
 
+  @Override
   public Sequence[] topKSequences(String[] sentence, String[] tags, double 
minSequenceScore) {
     TokenTag[] tuples = TokenTag.create(sentence, tags);
     return model.bestSequences(DEFAULT_BEAM_SIZE, tuples, new Object[] { }, 
minSequenceScore,
@@ -154,10 +131,10 @@ public class ChunkerME implements Chunker {
   }
 
   /**
-   * Populates the specified array with the probabilities of the last decoded 
sequence.  The
-   * sequence was determined based on the previous call to <code>chunk</code>. 
 The
-   * specified array should be at least as large as the numbe of tokens in the 
previous
-   * call to <code>chunk</code>.
+   * Populates the specified array with the probabilities of the last decoded 
sequence. The
+   * sequence was determined based on the previous call to {@code chunk}. The
+   * specified array should be at least as large as the number of tokens in 
the previous
+   * call to {@code chunk}.
    *
    * @param probs An array used to hold the probabilities of the last decoded 
sequence.
    */
@@ -166,15 +143,27 @@ public class ChunkerME implements Chunker {
   }
 
   /**
-   * Returns an array with the probabilities of the last decoded sequence.  The
-   * sequence was determined based on the previous call to <code>chunk</code>.
-   * @return An array with the same number of probabilities as tokens were 
sent to <code>chunk</code>
-   *     when it was last called.
+   * Returns an array with the probabilities of the last decoded sequence. The
+   * sequence was determined based on the previous call to {@code chunk}.
+   *
+   * @return An array with the same number of probabilities as tokens when
+   *         {@link ChunkerME#chunk(String[], String[])} was last called.
    */
   public double[] probs() {
     return bestSequence.getProbs();
   }
 
+  /**
+   * Start a training of a {@link ChunkerModel} with the given parameters.
+   *
+   * @param lang The ISO conform language code.
+   * @param in The {@link ObjectStream} of {@link ChunkSample} used as input 
for training.
+   * @param mlParams The {@link TrainingParameters} for the context of the 
training.
+   * @param factory The {@link ChunkerFactory} for creating related objects 
defined via {@code mlParams}.
+   *
+   * @return A valid, trained {@link ChunkerModel} instance.
+   * @throws IOException Thrown if IO errors occurred.
+   */
   public static ChunkerModel train(String lang, ObjectStream<ChunkSample> in,
       TrainingParameters mlParams, ChunkerFactory factory) throws IOException {
 
@@ -183,8 +172,6 @@ public class ChunkerME implements Chunker {
     Map<String, String> manifestInfoEntries = new HashMap<>();
 
     TrainerType trainerType = TrainerFactory.getTrainerType(mlParams);
-
-
     MaxentModel chunkerModel = null;
     SequenceClassificationModel<String> seqChunkerModel = null;
 
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
index 4ab01012..71f37c3f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/chunker/ChunkerModel.java
@@ -36,8 +36,7 @@ import opennlp.tools.util.TokenTag;
 import opennlp.tools.util.model.BaseModel;
 
 /**
- * The {@link ChunkerModel} is the model used
- * by a learnable {@link Chunker}.
+ * The {@link ChunkerModel} is the model used by a learnable {@link Chunker}.
  *
  * @see ChunkerME
  */
@@ -47,6 +46,14 @@ public class ChunkerModel extends BaseModel {
   private static final String COMPONENT_NAME = "ChunkerME";
   private static final String CHUNKER_MODEL_ENTRY_NAME = "chunker.model";
 
+  /**
+   * Initializes a {@link ChunkerModel} instance via given parameters.
+   *
+   * @param languageCode An ISO conform language code.
+   * @param chunkerModel A valid {@link SequenceClassificationModel}.
+   * @param manifestInfoEntries Additional information kept in the manifest.
+   * @param factory The {@link ChunkerFactory} for creating related objects.
+   */
   public ChunkerModel(String languageCode, SequenceClassificationModel<String> 
chunkerModel,
       Map<String, String> manifestInfoEntries, ChunkerFactory factory) {
     super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
@@ -54,11 +61,28 @@ public class ChunkerModel extends BaseModel {
     checkArtifactMap();
   }
 
+  /**
+   * Initializes a {@link ChunkerModel} instance via given parameters.
+   *
+   * @param languageCode An ISO conform language code.
+   * @param chunkerModel A valid {@link MaxentModel}.
+   * @param manifestInfoEntries Additional information kept in the manifest.
+   * @param factory The {@link ChunkerFactory} for creating related objects.
+   */
   public ChunkerModel(String languageCode, MaxentModel chunkerModel,
       Map<String, String> manifestInfoEntries, ChunkerFactory factory) {
     this(languageCode, chunkerModel, ChunkerME.DEFAULT_BEAM_SIZE, 
manifestInfoEntries, factory);
   }
 
+  /**
+   * Initializes a {@link ChunkerModel} instance via given parameters.
+   *
+   * @param languageCode An ISO conform language code.
+   * @param chunkerModel A valid {@link MaxentModel}.
+   * @param beamSize The size of the beam that should be used when decoding 
sequences.
+   * @param manifestInfoEntries Additional information kept in the manifest.
+   * @param factory The {@link ChunkerFactory} for creating related objects.
+   */
   public ChunkerModel(String languageCode, MaxentModel chunkerModel, int 
beamSize,
       Map<String, String> manifestInfoEntries, ChunkerFactory factory) {
     super(COMPONENT_NAME, languageCode, manifestInfoEntries, factory);
@@ -70,23 +94,58 @@ public class ChunkerModel extends BaseModel {
     checkArtifactMap();
   }
 
+  /**
+   * Initializes a {@link ChunkerModel} instance via given parameters.
+   *
+   * @param languageCode An ISO conform language code.
+   * @param chunkerModel A valid {@link MaxentModel}.
+   * @param factory The {@link ChunkerFactory} for creating related objects.
+   */
   public ChunkerModel(String languageCode, MaxentModel chunkerModel, 
ChunkerFactory factory) {
     this(languageCode, chunkerModel, null, factory);
   }
 
-  public ChunkerModel(InputStream in) throws IOException, 
InvalidFormatException {
+  /**
+   * Initializes a {@link ChunkerModel} instance via a valid {@link 
InputStream}.
+   *
+   * @param in The {@link InputStream} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   */
+  public ChunkerModel(InputStream in) throws IOException {
     super(COMPONENT_NAME, in);
   }
 
-  public ChunkerModel(File modelFile) throws IOException, 
InvalidFormatException {
+  /**
+   * Initializes a {@link ChunkerModel} instance via a valid {@link File}.
+   *
+   * @param modelFile The {@link File} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   */
+  public ChunkerModel(File modelFile) throws IOException {
     super(COMPONENT_NAME, modelFile);
   }
 
-  public ChunkerModel(Path modelPath) throws IOException, 
InvalidFormatException {
+  /**
+   * Initializes a {@link ChunkerModel} instance via a valid {@link Path}.
+   *
+   * @param modelPath The {@link Path} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   */
+  public ChunkerModel(Path modelPath) throws IOException {
     this(modelPath.toFile());
   }
 
-  public ChunkerModel(URL modelURL) throws IOException, InvalidFormatException 
{
+  /**
+   * Initializes a {@link ChunkerModel} instance via a valid {@link URL}.
+   *
+   * @param modelURL The {@link URL} used for loading the model.
+   *
+   * @throws IOException Thrown if IO errors occurred during initialization.
+   */
+  public ChunkerModel(URL modelURL) throws IOException {
     super(COMPONENT_NAME, modelURL);
   }
 
@@ -111,7 +170,7 @@ public class ChunkerModel extends BaseModel {
   }
 
   /**
-   * @deprecated use getChunkerSequenceModel instead. This method will be 
removed soon.
+   * @deprecated use {@link ChunkerModel#getChunkerSequenceModel()} instead. 
This method will be removed soon.
    */
   @Deprecated
   public MaxentModel getChunkerModel() {
@@ -123,6 +182,9 @@ public class ChunkerModel extends BaseModel {
     }
   }
 
+  /**
+   * @return Retrieves a {@link SequenceClassificationModel}.
+   */
   public SequenceClassificationModel<TokenTag> getChunkerSequenceModel() {
 
     Properties manifest = (Properties) artifactMap.get(MANIFEST_ENTRY);
@@ -150,7 +212,9 @@ public class ChunkerModel extends BaseModel {
     return ChunkerFactory.class;
   }
 
-
+  /**
+   * @return Retrieves the active {@link ChunkerFactory}.
+   */
   public ChunkerFactory getFactory() {
     return (ChunkerFactory) this.toolFactory;
   }
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java
 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java
index 74e4a77a..b715ccbc 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerContextGenerator.java
@@ -20,14 +20,15 @@ package opennlp.tools.chunker;
 
 import opennlp.tools.util.TokenTag;
 
-/** Features based on chunking model described in Fei Sha and Fernando 
Pereira. Shallow
- *  parsing with conditional random fields. In Proceedings of HLT-NAACL 2003. 
Association
- *  for Computational Linguistics, 2003.
+/**
+ * Features based on chunking model described in Fei Sha and Fernando Pereira. 
Shallow
+ * parsing with conditional random fields. In Proceedings of HLT-NAACL 2003. 
Association
+ * for Computational Linguistics, 2003.
  */
 public class DefaultChunkerContextGenerator implements ChunkerContextGenerator 
{
 
   /**
-   * Creates the default context generator a chunker.
+   * Initializes a {@link DefaultChunkerContextGenerator} instance.
    */
   public DefaultChunkerContextGenerator() {
   }
@@ -37,6 +38,7 @@ public class DefaultChunkerContextGenerator implements 
ChunkerContextGenerator {
     return getContext(index, tokens, postags, priorDecisions);
   }
 
+  @Override
   public String[] getContext(int i, String[] toks, String[] tags, String[] 
preds) {
     // Words in a 5-word window
     String w_2, w_1, w0, w1, w2;
@@ -90,7 +92,7 @@ public class DefaultChunkerContextGenerator implements 
ChunkerContextGenerator {
       t2 = "t2=" + tags[i + 2];
     }
 
-    String[] features = new String[] {
+    return new String[] {
         //add word features
         w_2,
         w_1,
@@ -142,8 +144,6 @@ public class DefaultChunkerContextGenerator implements 
ChunkerContextGenerator {
         p_1 + w_1 + w0,
         p_1 + w0 + w1
     };
-
-    return features;
   }
 
   @Override
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java
 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java
index e184d278..d6708f41 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/chunker/DefaultChunkerSequenceValidator.java
@@ -20,6 +20,9 @@ package opennlp.tools.chunker;
 import opennlp.tools.util.SequenceValidator;
 import opennlp.tools.util.TokenTag;
 
+/**
+ * The default chunker {@link SequenceValidator} implementation.
+ */
 public class DefaultChunkerSequenceValidator implements 
SequenceValidator<TokenTag> {
 
   private boolean validOutcome(String outcome, String prevOutcome) {
@@ -37,7 +40,7 @@ public class DefaultChunkerSequenceValidator implements 
SequenceValidator<TokenT
     return true;
   }
 
-  protected boolean validOutcome(String outcome, String[] sequence) {
+  private boolean validOutcome(String outcome, String[] sequence) {
     String prevOutcome = null;
     if (sequence.length > 0) {
       prevOutcome = sequence[sequence.length - 1];
@@ -45,6 +48,7 @@ public class DefaultChunkerSequenceValidator implements 
SequenceValidator<TokenT
     return validOutcome(outcome,prevOutcome);
   }
 
+  @Override
   public boolean validSequence(int i, TokenTag[] sequence, String[] s, String 
outcome) {
     return validOutcome(outcome, s);
   }
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/util/eval/CrossValidationPartitioner.java
 
b/opennlp-tools/src/main/java/opennlp/tools/util/eval/CrossValidationPartitioner.java
index 2de72d94..61fd860c 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/util/eval/CrossValidationPartitioner.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/util/eval/CrossValidationPartitioner.java
@@ -33,7 +33,7 @@ import opennlp.tools.util.ObjectStream;
  * and the training / evaluation is performed n times on these parts.
  * The training partition always consists of n -1 parts and one part is used 
for testing.
  * <p>
- * To use the <code>CrossValidationPartioner</code> a client iterates over the 
n
+ * To use the <code>CrossValidationPartitioner</code> a client iterates over 
the n
  * <code>TrainingSampleStream</code>s. Each <code>TrainingSampleStream</code> 
represents
  * one partition and is used first for training and afterwards for testing.
  * The <code>TestSampleStream</code> can be obtained from the 
<code>TrainingSampleStream</code>
@@ -48,7 +48,7 @@ public class CrossValidationPartitioner<E> {
    */
   private static class TestSampleStream<E> implements ObjectStream<E> {
 
-    private ObjectStream<E> sampleStream;
+    private final ObjectStream<E> sampleStream;
 
     private final int numberOfPartitions;
 
@@ -116,7 +116,7 @@ public class CrossValidationPartitioner<E> {
    */
   public static class TrainingSampleStream<E> implements ObjectStream<E> {
 
-    private ObjectStream<E> sampleStream;
+    private final ObjectStream<E> sampleStream;
 
     private final int numberOfPartitions;
 
@@ -206,7 +206,7 @@ public class CrossValidationPartitioner<E> {
    * An <code>ObjectStream</code> over the whole set of data samples which
    * are used for the cross validation.
    */
-  private ObjectStream<E> sampleStream;
+  private final ObjectStream<E> sampleStream;
 
   /**
    * The number of parts the data is divided into.
@@ -279,7 +279,7 @@ public class CrossValidationPartitioner<E> {
 
   @Override
   public String toString() {
-    return "At partition" + Integer.toString(testIndex + 1) +
-        " of " + Integer.toString(numberOfPartitions);
+    return "At partition" + (testIndex + 1) +
+        " of " + numberOfPartitions;
   }
 }

[opennlp] branch master updated: OPENNLP-1401 Enhance JavaDoc in opennlp.tools.chunker package

Reply via email to