[opennlp] branch master updated: OPENNLP-1415 Enhance JavaDoc in opennlp.tools.formats.masc package (#460)

jzemerick Tue, 20 Dec 2022 08:24:24 -0800

This is an automated email from the ASF dual-hosted git repository.

jzemerick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git



The following commit(s) were added to refs/heads/master by this push:
     new d2ac7d24 OPENNLP-1415 Enhance JavaDoc in opennlp.tools.formats.masc 
package (#460)
d2ac7d24 is described below

commit d2ac7d24ffa94f278e6daf3952dc1c76e9445fdf
Author: Martin Wiesner <[email protected]>
AuthorDate: Tue Dec 20 17:24:09 2022 +0100

    OPENNLP-1415 Enhance JavaDoc in opennlp.tools.formats.masc package (#460)
    
    - adds missing JavaDoc
    - improves existing documentation for clarity
    - removes superfluous text
    - adds 'final' modifier where useful and applicable
    - adds 'Override' annotation where useful and applicable
    - fixes several typos
    - modernizes handling of AutoClosable resources in `MascDocument`
    - reduces compiler warnings
---
 .../opennlp/tools/formats/masc/MascDocument.java   | 165 ++++++++++-----------
 .../tools/formats/masc/MascDocumentStream.java     |   2 +-
 .../tools/formats/masc/MascNamedEntityParser.java  |   7 +-
 .../formats/masc/MascNamedEntitySampleStream.java  |  24 +--
 .../masc/MascNamedEntitySampleStreamFactory.java   |   9 +-
 .../tools/formats/masc/MascPOSSampleStream.java    |  26 ++--
 .../formats/masc/MascPOSSampleStreamFactory.java   |   9 +-
 .../opennlp/tools/formats/masc/MascSentence.java   |  61 ++++----
 .../tools/formats/masc/MascSentenceParser.java     |   9 +-
 .../formats/masc/MascSentenceSampleStream.java     |   8 +-
 .../masc/MascSentenceSampleStreamFactory.java      |  11 +-
 .../java/opennlp/tools/formats/masc/MascToken.java |  37 +++--
 .../tools/formats/masc/MascTokenSampleStream.java  |  34 +++--
 .../formats/masc/MascTokenSampleStreamFactory.java |  12 +-
 .../java/opennlp/tools/formats/masc/MascWord.java  |  12 +-
 .../opennlp/tools/formats/masc/MascWordParser.java |   9 +-
 .../opennlp/tools/postag/POSTaggerFactory.java     |   2 +-
 .../java/opennlp/tools/tokenize/TokenizerME.java   |   4 +-
 18 files changed, 211 insertions(+), 230 deletions(-)

diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocument.java 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocument.java
index 0e7af1a2..01bea73f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocument.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocument.java
@@ -17,6 +17,7 @@
 
 package opennlp.tools.formats.masc;
 
+import java.io.BufferedInputStream;
 import java.io.BufferedReader;
 import java.io.IOException;
 import java.io.InputStream;
@@ -54,16 +55,16 @@ public class MascDocument {
   }
 
   /**
-   * Creates a MASC document with all of the stand-off annotations translated 
into the internal
-   * structure.
+   * Initializes a {@link MascDocument} with all the stand-off annotations 
translated into the
+   * internal structure.
    *
    * @param path      The path where the document header is.
-   * @param f_primary The file with the raw corpus text.
-   * @param f_seg     The file with segmentation into quarks.
-   * @param f_ne      The file with named entities.
-   * @param f_penn    The file with tokenization and Penn POS tags produced
+   * @param f_primary The {@link InputStream file} with the raw corpus text.
+   * @param f_seg     The {@link InputStream file} with segmentation into 
quarks.
+   * @param f_ne      The {@link InputStream file} with named entities.
+   * @param f_penn    The {@link InputStream file} with tokenization and Penn 
POS tags produced
    *                  by GATE-5.0 ANNIE application.
-   * @param f_s       The file with sentence boundaries.
+   * @param f_s       The {@link InputStream file} with sentence boundaries.
    * @return A document containing the text and its annotations. Immutability 
is not guaranteed yet.
    * @throws IOException if the raw data cannot be read or the alignment of 
the raw data
    *                     with annotations fails
@@ -77,7 +78,7 @@ public class MascDocument {
     List<Span> sentenceSpans = parseSentences(f_s);
 
     List<MascSentence> sentences = combineAnnotations(text, sentenceSpans, 
words);
-    MascDocument doc = new MascDocument(path, sentences);
+    final MascDocument doc = new MascDocument(path, sentences);
 
     // if the file has Penn POS tags, add them
     if (f_penn != null) {
@@ -88,21 +89,21 @@ public class MascDocument {
       doc.addNamedEntityTags(parseNamedEntity(f_ne));
     }
 
-    //todo: make the annotations immutable
-    //todo: should we cleanup the document (e.g. remove sentences without 
tokens?)
+    //TODO: make the annotations immutable
+    //TODO: should we cleanup the document (e.g. remove sentences without 
tokens?)
     return doc;
   }
 
   /**
-   * Read in the corpus file text
+   * Reads in the corpus file text.
    *
-   * @param stream The corpus file
-   * @return The text of the file
-   * @throws IOException if anything goes wrong
+   * @param stream A valid, open {@link InputStream stream} for a corpus file.
+   *
+   * @return The text of the file.
+   * @throws IOException Thrown if IO errors occurred.
    */
   private static String readText(InputStream stream) throws IOException {
-    try {
-      Reader reader = new BufferedReader(new InputStreamReader(stream, 
StandardCharsets.UTF_8));
+    try (Reader reader = new BufferedReader(new InputStreamReader(stream, 
StandardCharsets.UTF_8))) {
       StringBuilder contents = new StringBuilder();
       char[] buffer = new char[8192];
       int read;
@@ -110,9 +111,6 @@ public class MascDocument {
         contents.append(buffer, 0, read);
       }
       return contents.toString();
-    } finally {
-      // this may throw an exception
-      stream.close();
     }
   }
 
@@ -120,52 +118,51 @@ public class MascDocument {
   /**
    * Parses the word segmentation stand-off annotation
    *
-   * @param f_seg The file with segmentation
+   * @param f_seg A valid, open {@link InputStream stream} for a file with 
segmentation.
    * @return A list of individual quarks, expressed as MascWord-s
-   * @throws IOException if anything goes wrong
+   * @throws IOException Thrown if IO errors occurred.
    */
   private static List<MascWord> parseWords(InputStream f_seg) throws 
IOException {
 
-    try {
+    try (BufferedInputStream bStream = new BufferedInputStream(f_seg)) {
       SAXParser saxParser = XmlUtil.createSaxParser();
       MascWordParser handler = new MascWordParser();
       try {
-        saxParser.parse(f_seg, handler);
+        saxParser.parse(bStream, handler);
       } catch (SAXException e) {
         throw new IOException("Could not parse the region annotation file");
       }
 
       return Collections.unmodifiableList(handler.getAnchors());
-
-    } finally {
-      f_seg.close();
     }
   }
 
   /**
-   * Parse the sentence annotation file, align it with the raw text
+   * Parses the sentence annotation file, align it with the raw text
    *
-   * @param f_s the sentence annotation file
-   * @return the list of Spans delimiting each sentence
+   * @param f_s A valid, open {@link InputStream stream} for a sentence 
annotation file.
+   * @return The {@link List<Span>} delimiting each sentence.
    * @throws IOException if the sentence file cannot be parsed or closed
    */
   private static List<Span> parseSentences(InputStream f_s) throws IOException 
{
 
-    try {
+    try (BufferedInputStream bStream = new BufferedInputStream(f_s)) {
       SAXParser saxParser = XmlUtil.createSaxParser();
       MascSentenceParser handler = new MascSentenceParser();
       try {
-        saxParser.parse(f_s, handler);
+        saxParser.parse(bStream, handler);
       } catch (SAXException e) {
         throw new IOException("Could not parse the sentence annotation file");
       }
 
       List<Span> anchors = handler.getAnchors();
 
-      /*Filter out sentence overlaps.
-      Keep only those sentences  where sentence.end < nextsentence.beginning
-      avoid deleting in the middle and repeatedly shifting the list by copying 
into a new list*/
-      //todo: can we know a priori, if we need this filtering?
+      /*
+       * Filter out sentence overlaps.
+       * Keep only those sentences  where sentence.end < nextSentence.beginning
+       * avoid deleting in the middle and repeatedly shifting the list by 
copying into a new list
+       */
+      //TODO: can we know a priori, if we need this filtering?
       List<Span> filteredAnchors = new ArrayList<>();
       for (int i = 0; i < anchors.size() - 1; i++) {
         if (anchors.get(i).getEnd() < anchors.get(i + 1).getStart()) {
@@ -175,30 +172,28 @@ public class MascDocument {
       filteredAnchors.add(anchors.get(anchors.size() - 1));
 
       return Collections.unmodifiableList(filteredAnchors);
-
-    } finally {
-      f_s.close();
     }
 
   }
 
   /**
-   * Parses the Penn-POS (GATE5-ANNIE) stand-off annotation
+   * Parses the Penn-POS (GATE5-ANNIE) stand-off annotation.
    *
-   * @param f_penn The file with Penn POS tags
+   * @param f_penn A valid, open {@link InputStream stream} for a file with 
Penn POS tags.
+   *               
    * @return A map of three sub-maps: tokenToTag, from Penn token ID (int) to 
Penn POS-tag,
    * tokenToBase, from Penn token ID (int) to the base and tokenToQuarks, from 
Penn token ID
    * (int) to a List of quark IDs contained in that token.
-   * @throws IOException if anything goes wrong
+   * @throws IOException Thrown if IO errors occurred.
    */
-  private static Map<String, Map> parsePennTags(InputStream f_penn) throws 
IOException {
-    Map<String, Map> tagsAndBases = new HashMap<>();
+  private static Map<String, Map<Integer, ?>> parsePennTags(InputStream 
f_penn) throws IOException {
+    Map<String, Map<Integer, ?>> tagsAndBases = new HashMap<>();
 
-    try {
+    try (BufferedInputStream bStream = new BufferedInputStream(f_penn)) {
       SAXParser saxParser = XmlUtil.createSaxParser();
       MascPennTagParser handler = new MascPennTagParser();
       try {
-        saxParser.parse(f_penn, handler);
+        saxParser.parse(bStream, handler);
       } catch (SAXException e) {
         throw new IOException("Could not parse the Penn tag annotation file");
       }
@@ -208,28 +203,25 @@ public class MascDocument {
       tagsAndBases.put("tokenToQuarks", handler.getTokenToQuarks());
 
       return tagsAndBases;
-
-    } finally {
-      f_penn.close();
     }
   }
 
   /**
-   * Parses the named entity stand-off annotation
+   * Parses the named entity stand-off annotation.
    *
-   * @param f_ne The file with named entity annotations
+   * @param f_ne A valid, open {@link InputStream stream} for a file with 
named entity annotations.
    * @return A map with two sub-maps, entityIDtoEntityType, mapping entity ID 
integers
    * to entity type Strings, and entityIDsToTokens, mapping entity ID integers 
to Penn
-   * token ID integers
-   * @throws IOException if anything goes wrong
+   * token ID integers.
+   * @throws IOException Thrown if IO errors occurred.
    */
-  private static Map<String, Map> parseNamedEntity(InputStream f_ne) throws 
IOException {
+  private static Map<String, Map<Integer, ?>> parseNamedEntity(InputStream 
f_ne) throws IOException {
 
-    try {
+    try (BufferedInputStream bStream = new BufferedInputStream(f_ne)) {
       SAXParser saxParser = XmlUtil.createSaxParser();
       MascNamedEntityParser handler = new MascNamedEntityParser();
       try {
-        saxParser.parse(f_ne, handler);
+        saxParser.parse(bStream, handler);
       } catch (SAXException e) {
         System.out.println(e.getMessage());
         throw new IOException("Could not parse the named entity annotation 
file");
@@ -237,13 +229,10 @@ public class MascDocument {
 
       Map<Integer, String> entityIDtoEntityType = 
handler.getEntityIDtoEntityType();
       Map<Integer, List<Integer>> entityIDsToTokens = 
handler.getEntityIDsToTokens();
-      Map<String, Map> results = new HashMap<>();
+      Map<String, Map<Integer, ?>> results = new HashMap<>();
       results.put("entityIDtoEntityType", entityIDtoEntityType);
       results.put("entityIDsToTokens", entityIDsToTokens);
       return results;
-
-    } finally {
-      f_ne.close();
     }
   }
 
@@ -251,14 +240,13 @@ public class MascDocument {
    * Combines the raw text with annotations that every file should have.
    *
    * @param text          The raw text.
-   * @param sentenceSpans The spans definining individual sentences. Overlaps 
are not permitted.
+   * @param sentenceSpans The spans defining individual sentences. Overlaps 
are not permitted.
    * @param words         The quarks of the raw text.
    * @return A list of sentences, each of which is a list of quarks. Some 
quarks may belong to
    * more than one sentence. Quarks which do not belong to a single sentence 
are silently dropped.
    * @throws IOException If sentences and quarks cannot be aligned.
    */
-  private static List<MascSentence> combineAnnotations(String text,
-                                                       List<Span> 
sentenceSpans,
+  private static List<MascSentence> combineAnnotations(String text, List<Span> 
sentenceSpans,
                                                        List<MascWord> words) 
throws IOException {
 
     int wordIndex = 0;
@@ -270,21 +258,21 @@ public class MascDocument {
         int sentenceStart = s.getStart();
         int sentenceEnd = s.getEnd();
 
-        //todo: is it okay that quarks can cross sentence boundary? What are 
the implications?
+        // TODO: is it okay that quarks can cross sentence boundary? What are 
the implications?
         /*
-        Allow quarks to cross sentence boundary.
-        The decisive factor determining if a quark belongs to a sentence is if 
they overlap.
-        I.e. sent.getEnd() > quark.getStart() && sent.getStart() < 
quark.getEnd()
+         * Allow quarks to cross sentence boundary.
+         * The decisive factor determining if a quark belongs to a sentence is 
if they overlap.
+         * I.e. sent.getEnd() > quark.getStart() && sent.getStart() < 
quark.getEnd()
          */
         MascWord nextWord = words.get(wordIndex);
-        //Find sentence beginning, should not be needed unless overlaps occur
+        // Find sentence beginning, should not be needed unless overlaps occur
         while (sentenceStart < nextWord.getEnd() && wordIndex > 0) {
           wordIndex--;
           nextWord = words.get(wordIndex);
         }
 
-        //todo: can this be translated into Span's methods 
.crosses()/.contains()?
-        //find all quarks contained or crossing the span of that sentence
+        // TODO: can this be translated into Span's methods 
.crosses()/.contains()?
+        // Find all quarks contained or crossing the span of that sentence
         boolean sentenceOver = false;
         while ((!sentenceOver) && wordIndex < wordCount) {
           nextWord = words.get(wordIndex);
@@ -315,23 +303,23 @@ public class MascDocument {
         sentences.add(sentence);
       }
     }
-
     return Collections.unmodifiableList(sentences);
-
   }
 
 
   /**
-   * Attach the named entity labels to individual tokens
+   * Attaches the named entity labels to individual tokens.
    *
    * @param namedEntities A map with two sub-maps, entityIDtoEntityType, 
mapping entity ID integers
-   *                      * to entity type Strings, and entityIDsToTokens, 
mapping entity ID integers to Penn
-   *                      * token ID integers
+   *                      to entity type Strings, and entityIDsToTokens, 
mapping entity ID integers to Penn
+   *                      token ID integers
    */
-  private void addNamedEntityTags(Map<String, Map> namedEntities) {
+  private void addNamedEntityTags(Map<String, Map<Integer, ?>> namedEntities) {
     try {
-      Map<Integer, String> entityIDtoEntityType = 
namedEntities.get("entityIDtoEntityType");
-      Map<Integer, List<Integer>> entityIDsToTokens = 
namedEntities.get("entityIDsToTokens");
+      Map<Integer, String> entityIDtoEntityType =
+              (Map<Integer, String>) namedEntities.get("entityIDtoEntityType");
+      Map<Integer, List<Integer>> entityIDsToTokens =
+              (Map<Integer, List<Integer>>) 
namedEntities.get("entityIDsToTokens");
 
       for (MascSentence s : sentences) {
         boolean success = s.addNamedEntities(entityIDtoEntityType, 
entityIDsToTokens);
@@ -356,12 +344,12 @@ public class MascDocument {
    *                * tokenToBase, from Penn token ID (int) to the base and 
tokenToQuarks, from Penn token ID
    *                * (int) to a List of quark IDs contained in that token.
    */
-  private void addPennTags(Map<String, Map> tagMaps) throws IOException {
+  private void addPennTags(Map<String, Map<Integer, ?>> tagMaps) throws 
IOException {
     try {
       // Extract individual mappings
-      Map<Integer, String> tokenToTag = tagMaps.get("tokenToTag");
-      Map<Integer, String> tokenToBase = tagMaps.get("tokenToBase");
-      Map<Integer, int[]> tokenToQuarks = tagMaps.get("tokenToQuarks");
+      Map<Integer, String> tokenToTag = (Map<Integer, String>) 
tagMaps.get("tokenToTag");
+      Map<Integer, String> tokenToBase = (Map<Integer, String>) 
tagMaps.get("tokenToBase");
+      Map<Integer, int[]> tokenToQuarks = (Map<Integer, int[]>) 
tagMaps.get("tokenToQuarks");
 
       //Check that all tokens have at least one quark.
       for (Map.Entry<Integer, int[]> token : tokenToQuarks.entrySet()) {
@@ -409,22 +397,25 @@ public class MascDocument {
 
 
   /**
-   * Check whether there is Penn tagging produced by GATE-5.0 ANNIE
+   * Checks whether there is Penn tagging produced by GATE-5.0 ANNIE.
    *
-   * @return true if this file has aligned tags/tokens
+   * @return {@code true} if this file has aligned tags/tokens, {@code false} 
otherwise.
    */
   public boolean hasPennTags() {
     return hasPennTags;
   }
 
+  /**
+   * Checks whether there is NER by GATE-5.0 ANNIE.
+   *
+   * @return {@code true} if this file has named entities, {@code false} 
otherwise.
+   */
   public boolean hasNamedEntities() {
     return hasNamedEntities;
   }
 
   /**
-   * Get next sentence.
-   *
-   * @return Next sentence or null if end of document reached.
+   * @return Retrieves the next sentence or {@code null} if end of document 
reached.
    */
   public MascSentence read() {
     MascSentence next = null;
@@ -435,7 +426,7 @@ public class MascDocument {
   }
 
   /**
-   * Return the reading of sentences to the beginning of the document.
+   * Resets the reading of sentences to the beginning of the document.
    */
   public void reset() {
     this.sentenceIterator = this.sentences.iterator();
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java
index 4dffcf43..96ec80d3 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java
@@ -44,7 +44,7 @@ public class MascDocumentStream implements 
ObjectStream<MascDocument> {
   /**
    * A helper class to parse the header (.hdr) files.
    */
-  private class HeaderHandler extends DefaultHandler {
+  private static class HeaderHandler extends DefaultHandler {
     private HashMap<String, String> annotationFiles = null;
     private String file = null;
     private String fType = null;
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntityParser.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntityParser.java
index c1e22de0..f11ecdd2 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntityParser.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntityParser.java
@@ -31,9 +31,9 @@ import org.xml.sax.helpers.DefaultHandler;
  */
 public class MascNamedEntityParser extends DefaultHandler {
 
-  private Map<Integer, String> entityIDtoEntityType = new HashMap<>();
-  private Map<Integer, List<Integer>> entityIDsToTokens = new HashMap<>();
-  private Map<Integer, String> tokenToEntity = new HashMap<>();
+  private final Map<Integer, String> entityIDtoEntityType = new HashMap<>();
+  private final Map<Integer, List<Integer>> entityIDsToTokens = new 
HashMap<>();
+  private final Map<Integer, String> tokenToEntity = new HashMap<>();
 
   public Map<Integer, String> getEntityIDtoEntityType() {
     return entityIDtoEntityType;
@@ -86,7 +86,6 @@ public class MascNamedEntityParser extends DefaultHandler {
           System.out.println("[WARNING] One token assigned to different named 
entity types.\n" +
               "\tPenn-TokenID: " + tokenID + "\n\tToken types: \"" + type + 
"\", \"" +
               tokenToEntity.get(tokenID) + "\"\n\tKeeping only " + "\"type\"");
-          int i = 0;
         }
         tokenToEntity.put(tokenID, type);
       }
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStream.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStream.java
index dd7c6da9..826e5e44 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStream.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStream.java
@@ -27,13 +27,13 @@ import opennlp.tools.util.Span;
 
 public class MascNamedEntitySampleStream extends 
FilterObjectStream<MascDocument, NameSample> {
 
-  MascDocument buffer;
+  private MascDocument buffer;
 
   /**
-   * Create a stream of named entity samples from a stream of MascDocuments
+   * Initializes {@link MascNamedEntitySampleStream} from a stream of {@link 
MascDocument documents}.
    *
-   * @param samples a MascDocumentStream
-   * @throws IOException
+   * @param samples A {@link ObjectStream<MascDocument>} of samples.
+   * @throws IOException Thrown if none of the documents has NE labels.
    */
   public MascNamedEntitySampleStream(ObjectStream<MascDocument> samples) 
throws IOException {
     super(samples);
@@ -48,16 +48,18 @@ public class MascNamedEntitySampleStream extends 
FilterObjectStream<MascDocument
   }
 
   /**
-   * Get the next sample of named entities.
+   * Reads the next sample of named entities.
    *
-   * @return One sentence together with its named entity annotation
-   * @throws IOException if the sample cannot be extracted
+   * @return One {@link NameSample sentence together with its named entity 
annotation}.
+   * @throws IOException Thrown if the sample cannot be extracted
    */
+  @Override
   public NameSample read() throws IOException {
 
-    /* Read the documents one sentence at a time
-    If the document is over, move to the next one
-    If both document stream and sentence stream are over, return null
+    /*
+     * Read the documents one sentence at a time
+     * If the document is over, move to the next one
+     * If both document stream and sentence stream are over, return null
      */
     try {
       MascSentence sentence = buffer.read();
@@ -79,7 +81,7 @@ public class MascNamedEntitySampleStream extends 
FilterObjectStream<MascDocument
       Span[] namedEntitiesArray = new Span[namedEntities.size()];
       namedEntities.toArray(namedEntitiesArray);
 
-      //todo: should the user decide about clearAdaptiveData?
+      // TODO: should the user decide about clearAdaptiveData?
       return new NameSample(tokensArray, namedEntitiesArray, true);
 
     } catch (IOException e) {
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStreamFactory.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStreamFactory.java
index 258cac96..1adf3535 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStreamFactory.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStreamFactory.java
@@ -38,15 +38,14 @@ public class MascNamedEntitySampleStreamFactory<P> extends 
AbstractSampleStreamF
   public static void registerFactory() {
     StreamFactoryRegistry.registerFactory(NameSample.class,
         MASC_FORMAT,
-        new opennlp.tools.formats.masc.MascNamedEntitySampleStreamFactory<>(
-            
opennlp.tools.formats.masc.MascNamedEntitySampleStreamFactory.Parameters.class));
+        new MascNamedEntitySampleStreamFactory<>(
+            MascNamedEntitySampleStreamFactory.Parameters.class));
   }
 
   @Override
   public ObjectStream<NameSample> create(String[] args) {
-    opennlp.tools.formats.masc.MascNamedEntitySampleStreamFactory.Parameters 
params =
-        ArgumentParser.parse(args,
-            
opennlp.tools.formats.masc.MascNamedEntitySampleStreamFactory.Parameters.class);
+    MascNamedEntitySampleStreamFactory.Parameters params =
+        ArgumentParser.parse(args, 
MascNamedEntitySampleStreamFactory.Parameters.class);
 
     try {
       FileFilter fileFilter = pathname -> 
pathname.getName().contains(params.getFileFilter());
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStream.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStream.java
index 7d7b2958..d971c260 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStream.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStream.java
@@ -26,13 +26,13 @@ import opennlp.tools.util.ObjectStream;
 
 public class MascPOSSampleStream extends FilterObjectStream<MascDocument, 
POSSample> {
 
-  MascDocument buffer;
-
+  private MascDocument buffer;
+  
   /**
-   * Create a stream of POS-samples from a stream of MascDocuments.
+   * Initializes {@link MascPOSSampleStream} from a stream of {@link 
MascDocument documents}.
    *
-   * @param samples A MascDocumentStream.
-   * @throws IOException
+   * @param samples A {@link ObjectStream<MascDocument>} of samples.
+   * @throws IOException Thrown if none of the documents has POS tags.
    */
   public MascPOSSampleStream(ObjectStream<MascDocument> samples) throws 
IOException {
     super(samples);
@@ -45,18 +45,20 @@ public class MascPOSSampleStream extends 
FilterObjectStream<MascDocument, POSSam
           e.getMessage());
     }
   }
-
+  
   /**
-   * Get the next sample
+   * Reads the next sample.
    *
-   * @return One sentence together with its POS tags.
-   * @throws IOException if anything goes wrong.
+   * @return One {@link POSSample sentence together with its POS tags}.
+   * @throws IOException Thrown if the sample cannot be extracted.
    */
+  @Override
   public POSSample read() throws IOException {
 
-    /* Read the documents one sentence at a time
-    If the document is over, move to the next one
-    If both document stream and sentence stream are over, return null
+    /*
+     * Read the documents one sentence at a time
+     * If the document is over, move to the next one
+     * If both document stream and sentence stream are over, return null
      */
     try {
       MascSentence sentence = buffer.read();
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStreamFactory.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStreamFactory.java
index 85aba5d5..4e910c68 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStreamFactory.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStreamFactory.java
@@ -38,15 +38,14 @@ public class MascPOSSampleStreamFactory<P> extends 
AbstractSampleStreamFactory<P
   public static void registerFactory() {
     StreamFactoryRegistry.registerFactory(POSSample.class,
         MASC_FORMAT,
-        new opennlp.tools.formats.masc.MascPOSSampleStreamFactory<>(
-            
opennlp.tools.formats.masc.MascPOSSampleStreamFactory.Parameters.class));
+        new MascPOSSampleStreamFactory<>(
+            MascPOSSampleStreamFactory.Parameters.class));
   }
 
   @Override
   public ObjectStream<POSSample> create(String[] args) {
-    opennlp.tools.formats.masc.MascPOSSampleStreamFactory.Parameters params =
-        ArgumentParser.parse(args,
-            
opennlp.tools.formats.masc.MascPOSSampleStreamFactory.Parameters.class);
+    MascPOSSampleStreamFactory.Parameters params =
+        ArgumentParser.parse(args, 
MascPOSSampleStreamFactory.Parameters.class);
 
     try {
       FileFilter fileFilter = pathname -> 
pathname.getName().contains(params.getFileFilter());
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentence.java 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentence.java
index ba4a1ad9..06c75d5a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentence.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentence.java
@@ -54,12 +54,13 @@ public class MascSentence extends Span {
      * Extract a quark by its key
      *
      * @param key The quark's ID
-     * @return The quark reference
-     * @throws IOException if not found in the document
+     * @return The {@link MascWord quark reference}.
+     *
+     * @throws IOException Thrown if the {@code key} was not found in the 
document.
      */
     protected MascWord get(int key) throws IOException {
-      //We first check if this word is in the sentence
-      //todo: evaluate the necessity: HashMaps are O(1), right?
+      // First, check if this word is in the sentence
+      // TODO: evaluate the necessity: HashMaps are O(1), right?
       if (wordsById.containsKey(key)) {
         return wordsById.get(key);
       } else {
@@ -83,7 +84,7 @@ public class MascSentence extends Span {
   private List<Span> namedEntities = new ArrayList<>();
 
   /**
-   * Create a MascSentence, containing its associated text and quarks
+   * Initializes a {@link MascSentence} containing its associated text and 
quarks
    *
    * @param s              Start of the sentence within the corpus file
    * @param e              End of the sentence within the corpus file
@@ -107,14 +108,15 @@ public class MascSentence extends Span {
   }
 
   /**
-   * Add the Penn tokenization and POS tagging to the sentence
+   * Add the Penn tokenization and POS tagging to the sentence.
+   *
+   * @param tokenToQuarks A map from token ID to quarks in that token.
+   * @param quarkToTokens A map of quark IDs and the token IDs containing that 
quark.
+   * @param tokenToBase   Token ID to the token base.
+   * @param tokenToTag    Token ID to the POS tag.
    *
-   * @param tokenToQuarks A map from token ID to quarks in that token
-   * @param quarkToTokens A map of quark IDs and the token IDs containing that 
quark
-   * @param tokenToBase   Token ID to the token base
-   * @param tokenToTag    Token ID to the POS tag
-   * @return true if no issue encountered, false if tokens cross sentence 
boundaries
-   * @throws IOException If anything goes wrong
+   * @return {@code true} if no issue encountered, {@code false} if tokens 
cross sentence boundaries.
+   * @throws IOException Thrown if IO errors occurred.
    */
   boolean tokenizePenn(Map<Integer, int[]> tokenToQuarks,
                        Map<Integer, int[]> quarkToTokens,
@@ -151,7 +153,7 @@ public class MascSentence extends Span {
               }
             }
 
-            /*Because there are some quarks which are parts of tokens outside 
of a sentence
+            /*Because there are some quarks which are parts of tokens outside 
a sentence
             We need to check every time if that quark was actually assigned to 
the sentence
             If not, we need to extract it manually from the whole document*/
             MascWord[] quarks = new MascWord[quarksOfToken.length]; //Get the 
actual quark references
@@ -186,14 +188,15 @@ public class MascSentence extends Span {
    *
    * @param entityIDtoEntityType Maps the named entity ID to its type
    * @param entityIDsToTokens    A list of tokens covered by each named entity
-   * @return true if all went well, false if named entities overlap
-   * @throws IOException if anything goes wrong
+   *
+   * @return {@code true} if all went well, {@code false} if named entities 
overlap.
+   * @throws IOException Thrown if IO errors occurred.
    */
   boolean addNamedEntities(Map<Integer, String> entityIDtoEntityType,
                            Map<Integer, List<Integer>> entityIDsToTokens) 
throws IOException {
     boolean fileWithoutIssues = true;
     if (sentenceTokens == null) {
-      throw new IOException("Named entity labels provided for un untokenized 
sentence.");
+      throw new IOException("Named entity labels provided for an un-tokenized 
sentence.");
     }
 
     //for each named entity identify its span
@@ -264,27 +267,21 @@ public class MascSentence extends Span {
   }
 
   /**
-   * Get the named entities
-   *
-   * @return List of named entities defined as token span, e.g. Span(1,3, 
"org") for tokens [1,3)
+   * @return Retrieves the {@link List<Span> named entities}, e.g. {@code 
Span(1,3, "org")} for tokens [1,3).
    */
   public List<Span> getNamedEntities() {
     return namedEntities;
   }
 
   /**
-   * Get the sentence text
-   *
-   * @return Text of the sentence as defined by the sentence segmentation 
annotation.
+   * @return Retrieves text of the sentence as defined by the sentence 
segmentation annotation.
    */
   public String getSentDetectText() {
     return text.substring(getStart(), getEnd());
   }
 
   /**
-   * Get the text of the sentence tokens
-   *
-   * @return Text of the sentence as defined by the tokens in it.
+   * @return Retrieves text of the sentence as defined by the tokens in it.
    */
   public String getTokenText() {
     if (sentenceTokens.isEmpty()) {
@@ -295,8 +292,6 @@ public class MascSentence extends Span {
   }
 
   /**
-   * Get the text of the sentence tokens
-   *
    * @return The texts of the individual tokens in the sentence
    */
   public List<String> getTokenStrings() {
@@ -310,9 +305,10 @@ public class MascSentence extends Span {
   }
 
   /**
-   * Get the boundaries of individual tokens
+   * Retrieves the boundaries of individual tokens.
    *
-   * @return Spans representing the tokens of the sentence (according to Penn 
tokenization)
+   * @return The {@link List<Span> spans} representing the tokens of the 
sentence,
+   *         according to Penn tokenization.
    */
   public List<Span> getTokensSpans() {
 
@@ -327,10 +323,9 @@ public class MascSentence extends Span {
   }
 
   /**
-   * Get the tags of tokens in the sentence
-   *
-   * @return A list of individual tags
-   * @throws IOException if used on an untokenized sentence
+   * @return Get the (individual) tags of tokens in the sentence.
+   * 
+   * @throws IOException Thrown if used on an un-tokenized sentence.
    */
   public List<String> getTags() throws IOException {
     List<String> tags = new ArrayList<>();
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceParser.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceParser.java
index 7a679a0e..b970d6d2 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceParser.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceParser.java
@@ -27,11 +27,11 @@ import org.xml.sax.helpers.DefaultHandler;
 import opennlp.tools.util.Span;
 
 /**
- * A class to parse the sentence segmentation stand-off annotation
+ * A class to parse the sentence segmentation stand-off annotation.
  */
 class MascSentenceParser extends DefaultHandler {
 
-  private List<Span> sentenceAnchors = null;
+  private final List<Span> sentenceAnchors = new ArrayList<>();
 
   public List<Span> getAnchors() {
     return sentenceAnchors;
@@ -49,11 +49,6 @@ class MascSentenceParser extends DefaultHandler {
         int left = Integer.parseInt(anchors[0]);
         int right = Integer.parseInt(anchors[1]);
 
-        // initialize list
-        if (sentenceAnchors == null) {
-          sentenceAnchors = new ArrayList<Span>();
-        }
-
         sentenceAnchors.add(new Span(left, right));
       }
 
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStream.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStream.java
index 7e8a5dbc..39248390 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStream.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStream.java
@@ -39,10 +39,12 @@ public class MascSentenceSampleStream extends 
FilterObjectStream<MascDocument, S
   }
 
   /**
-   * Reads a new sample of sentences
+   * Reads a new {@link SentenceSample sample of sentences}.
    *
-   * @return The specified number of sentences. If fewer left, then return 
whatever is left.
-   * @throws IOException
+   * @return The {@link SentenceSample specified number of sentences}.
+   *         If fewer left, then return whatever is left.
+   *
+   * @throws IOException Thrown if IO errors occurred during read operation.
    */
   @Override
   public SentenceSample read() throws IOException {
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStreamFactory.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStreamFactory.java
index ce55cfe3..d44bf780 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStreamFactory.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStreamFactory.java
@@ -37,17 +37,14 @@ public class MascSentenceSampleStreamFactory<P> extends 
AbstractSampleStreamFact
   }
 
   public static void registerFactory() {
-    StreamFactoryRegistry.registerFactory(SentenceSample.class,
-        MASC_FORMAT,
-        new opennlp.tools.formats.masc.MascSentenceSampleStreamFactory<>(
-            
opennlp.tools.formats.masc.MascSentenceSampleStreamFactory.Parameters.class));
+    StreamFactoryRegistry.registerFactory(SentenceSample.class, MASC_FORMAT,
+        new 
MascSentenceSampleStreamFactory<>(MascSentenceSampleStreamFactory.Parameters.class));
   }
 
   @Override
   public ObjectStream<SentenceSample> create(String[] args) {
-    opennlp.tools.formats.masc.MascSentenceSampleStreamFactory.Parameters 
params =
-        ArgumentParser.parse(args,
-            
opennlp.tools.formats.masc.MascSentenceSampleStreamFactory.Parameters.class);
+    MascSentenceSampleStreamFactory.Parameters params =
+        ArgumentParser.parse(args, 
MascSentenceSampleStreamFactory.Parameters.class);
 
     try {
       FileFilter fileFilter = pathname -> 
pathname.getName().contains(params.getFileFilter());
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascToken.java 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascToken.java
index 593315cf..5fa96eeb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascToken.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascToken.java
@@ -19,6 +19,9 @@ package opennlp.tools.formats.masc;
 
 import opennlp.tools.util.Span;
 
+/**
+ * A specialized {@link Span} to express tokens in {@link MascDocument 
documents}.
+ */
 public class MascToken extends Span {
 
   private static final long serialVersionUID = -780646706788037041L;
@@ -28,14 +31,18 @@ public class MascToken extends Span {
   private final MascWord[] quarks;
 
   /**
-   * Create a MascToken, which may combine multiple quarks
+   * Initializes a {@link MascToken} which may combine multiple quarks.
+   *
+   * @param s      The start of the token in the corpus file.
+   *               Must be equal to or greater than {@code 0}.
+   * @param e      The end of the token in the corpus file.
+   *               Must be equal to or greater than {@code 0} and be greater 
than {@code s}.
+   * @param pennId The ID of the token as assigned by the Penn stand-off 
annotation.
+   * @param pos    The POS-tag.
+   * @param base   The base form.
+   * @param quarks The {@link MascWord array of Quarks} contained in the token.
    *
-   * @param s      The start of the token in the corpus file
-   * @param e      The end of the token in the corpus file
-   * @param pennId The ID of the token as assigned by the Penn stand-off 
annotation
-   * @param pos    The POS-tag
-   * @param base   The base form
-   * @param quarks Quarks contained in the token
+   * @throws IllegalArgumentException Thrown if one of the parameters are 
invalid.
    */
   public MascToken(int s, int e, int pennId, String pos, String base, 
MascWord[] quarks) {
     super(s, e);
@@ -46,36 +53,28 @@ public class MascToken extends Span {
   }
 
   /**
-   * Get ID of the token
-   *
-   * @return the ID
+   * @return Retrieves the ID of the token.
    */
   public int getTokenId() {
     return tokenId;
   }
 
   /**
-   * Get the base form
-   *
-   * @return the base form
+   * @return Retrieves the base form.
    */
   public String getBase() {
     return base;
   }
 
   /**
-   * Get the POS tag
-   *
-   * @return POS tag
+   * @return Retrieves the POS tag.
    */
   public String getPos() {
     return pos;
   }
 
   /**
-   * Get quarks of the token
-   *
-   * @return Array of quark references
+   * @return Retrieves quarks of the token.
    */
   public MascWord[] getQuarks() {
     return quarks;
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStream.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStream.java
index 93fd21d4..fccb15ef 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStream.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStream.java
@@ -27,8 +27,14 @@ import opennlp.tools.util.Span;
 
 public class MascTokenSampleStream extends FilterObjectStream<MascDocument, 
TokenSample> {
 
-  MascDocument buffer;
-
+  private MascDocument buffer;
+
+  /**
+   * Initializes a {@link MascTokenSampleStream}.
+   *
+   * @param samples The {@link ObjectStream<MascDocument>} samples to process.
+   * @throws IOException Thrown if non of the {@link MascDocument documents} 
had Penn tokenization.
+   */
   public MascTokenSampleStream(ObjectStream<MascDocument> samples) throws 
IOException {
     super(samples);
     try {
@@ -41,16 +47,18 @@ public class MascTokenSampleStream extends 
FilterObjectStream<MascDocument, Toke
     }
   }
 
+  @Override
   public TokenSample read() throws IOException {
 
-    /* Read the documents one sentence at a time
-    If the document is over, move to the next one
-    If both document stream and sentence stream are over, return null
+    /*
+     * Read the documents one sentence at a time
+     * If the document is over, move to the next one
+     * If both document stream and sentence stream are over, return null
      */
     try {
       boolean sentenceFound = true;
       String sentenceString;
-      List<Span> tokensSpans;
+      List<Span> tokenSpans;
       MascSentence sentence;
       do {
         sentence = buffer.read();
@@ -65,23 +73,23 @@ public class MascTokenSampleStream extends 
FilterObjectStream<MascDocument, Toke
         }
 
         sentenceString = sentence.getTokenText();
-        tokensSpans = sentence.getTokensSpans();
+        tokenSpans = sentence.getTokensSpans();
 
         if (sentenceString.length() == 0) {
           System.err.println("[WARNING] Zero sentence found: " +
               "there is a sentence without any tokens.");
           System.err.println(sentenceString);
-          System.err.println(tokensSpans.toString());
+          System.err.println(tokenSpans.toString());
           sentenceFound = false;
         }
 
-        for (int i = 0; i < tokensSpans.size(); i++) {
-          Span t = tokensSpans.get(i);
+        for (int i = 0; i < tokenSpans.size(); i++) {
+          Span t = tokenSpans.get(i);
           if (t.getEnd() - t.getStart() == 0) {
             System.err.println("[WARNING] Zero token found: " +
                 "there is a token without any quarks.");
             System.err.println(sentenceString);
-            System.err.println(tokensSpans.toString());
+            System.err.println(tokenSpans);
             sentenceFound = false;
           }
         }
@@ -89,8 +97,8 @@ public class MascTokenSampleStream extends 
FilterObjectStream<MascDocument, Toke
 
       } while (!sentenceFound);
 
-      Span[] tokensSpansArray = new Span[tokensSpans.size()];
-      tokensSpans.toArray(tokensSpansArray);
+      Span[] tokensSpansArray = new Span[tokenSpans.size()];
+      tokenSpans.toArray(tokensSpansArray);
 
       return new TokenSample(sentenceString, tokensSpansArray);
 
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStreamFactory.java
 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStreamFactory.java
index 99bf1f58..c58eb133 100644
--- 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStreamFactory.java
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStreamFactory.java
@@ -32,24 +32,20 @@ public class MascTokenSampleStreamFactory<P> extends 
AbstractSampleStreamFactory
 
   public static final String MASC_FORMAT = "masc";
 
-
   protected MascTokenSampleStreamFactory(Class<P> params) {
     super(params);
   }
 
   public static void registerFactory() {
-    StreamFactoryRegistry.registerFactory(TokenSample.class,
-        MASC_FORMAT,
-        new opennlp.tools.formats.masc.MascTokenSampleStreamFactory<>(
-            
opennlp.tools.formats.masc.MascTokenSampleStreamFactory.Parameters.class));
+    StreamFactoryRegistry.registerFactory(TokenSample.class, MASC_FORMAT,
+        new 
MascTokenSampleStreamFactory<>(MascTokenSampleStreamFactory.Parameters.class));
   }
 
 
   @Override
   public ObjectStream<TokenSample> create(String[] args) {
-    opennlp.tools.formats.masc.MascTokenSampleStreamFactory.Parameters params =
-        ArgumentParser.parse(args,
-            
opennlp.tools.formats.masc.MascTokenSampleStreamFactory.Parameters.class);
+    MascTokenSampleStreamFactory.Parameters params =
+        ArgumentParser.parse(args, 
MascTokenSampleStreamFactory.Parameters.class);
 
     try {
       FileFilter fileFilter = pathname -> 
pathname.getName().contains(params.getFileFilter());
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWord.java 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWord.java
index 1f3cffc3..916f6d2d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWord.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWord.java
@@ -25,11 +25,15 @@ public class MascWord extends Span {
   private final int id;
 
   /**
-   * Saves one of MASC's quarks - basic-level units (may be sub-word)
+   * Holds one of MASC's quarks, that is: basic-level units (may be sub-word).
    *
-   * @param s  The beginning of the word in the corpus file
-   * @param e  The end of the word in the corpus file
-   * @param id The id as assigned by the stand-off annotation
+   * @param s  The beginning of the word in the corpus file.
+   *           Must be equal to or greater than {@code 0}.
+   * @param e  The end of the word in the corpus file.
+   *           Must be equal to or greater than {@code 0} and be greater than 
{@code s}.
+   * @param id The id as assigned by the stand-off annotation.
+   *           
+   * @throws IllegalArgumentException Thrown if one of the parameters are 
invalid.
    */
   public MascWord(int s, int e, int id) {
     super(s, e);
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWordParser.java 
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWordParser.java
index db57f82d..ea1cb3b7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWordParser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWordParser.java
@@ -25,11 +25,11 @@ import org.xml.sax.SAXException;
 import org.xml.sax.helpers.DefaultHandler;
 
 /**
- * Class to parse the word ("quark") segmentation stand-off annotation
+ * Class to parse the word ("quark") segmentation stand-off annotation.
  */
 class MascWordParser extends DefaultHandler {
 
-  private List<MascWord> wordAnchors = null;
+  private final List<MascWord> wordAnchors = new ArrayList<>();
 
   public List<MascWord> getAnchors() {
     return wordAnchors;
@@ -48,11 +48,6 @@ class MascWordParser extends DefaultHandler {
         int left = Integer.parseInt(anchors[0]);
         int right = Integer.parseInt(anchors[1]);
 
-        // initialize list
-        if (wordAnchors == null) {
-          wordAnchors = new ArrayList<MascWord>();
-        }
-
         wordAnchors.add(new MascWord(left, right, id));
       }
 
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java 
b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
index 94fcd500..9efffbf6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
@@ -136,7 +136,7 @@ public class POSTaggerFactory extends BaseToolFactory {
    * Note:
    * The generators are created on every call to this method.
    *
-   * @return the feature generator or {@link null} if there is no descriptor 
in the model
+   * @return the feature generator or {@code null} if there is no descriptor 
in the model
    */
   public AdaptiveFeatureGenerator createFeatureGenerators() {
 
diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java 
b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
index 10086e9b..5a30dd60 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
@@ -53,7 +53,7 @@ import opennlp.tools.util.TrainingParameters;
  * must be instantiated which can share one {@link TokenizerModel} instance
  * to safe memory.
  * <p>
- * To train a new model, the {@link #train(ObjectStream, TokenizerFactory, 
TrainingParameters) method
+ * To train a new model, the {@link #train(ObjectStream, TokenizerFactory, 
TrainingParameters)} method
  * can be used.
  * <p>
  * Sample usage:
@@ -69,8 +69,6 @@ import opennlp.tools.util.TrainingParameters;
  * <br>
  * String tokens[] = tokenizer.tokenize("A sentence to be tokenized.");
  * </code>
- * <p>
- *   
  * @see Tokenizer
  * @see TokenizerModel
  * @see TokenSample

[opennlp] branch master updated: OPENNLP-1415 Enhance JavaDoc in opennlp.tools.formats.masc package (#460)

Reply via email to