This is an automated email from the ASF dual-hosted git repository.
jzemerick pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/opennlp.git
The following commit(s) were added to refs/heads/master by this push:
new d2ac7d24 OPENNLP-1415 Enhance JavaDoc in opennlp.tools.formats.masc
package (#460)
d2ac7d24 is described below
commit d2ac7d24ffa94f278e6daf3952dc1c76e9445fdf
Author: Martin Wiesner <[email protected]>
AuthorDate: Tue Dec 20 17:24:09 2022 +0100
OPENNLP-1415 Enhance JavaDoc in opennlp.tools.formats.masc package (#460)
- adds missing JavaDoc
- improves existing documentation for clarity
- removes superfluous text
- adds 'final' modifier where useful and applicable
- adds 'Override' annotation where useful and applicable
- fixes several typos
- modernizes handling of AutoClosable resources in `MascDocument`
- reduces compiler warnings
---
.../opennlp/tools/formats/masc/MascDocument.java | 165 ++++++++++-----------
.../tools/formats/masc/MascDocumentStream.java | 2 +-
.../tools/formats/masc/MascNamedEntityParser.java | 7 +-
.../formats/masc/MascNamedEntitySampleStream.java | 24 +--
.../masc/MascNamedEntitySampleStreamFactory.java | 9 +-
.../tools/formats/masc/MascPOSSampleStream.java | 26 ++--
.../formats/masc/MascPOSSampleStreamFactory.java | 9 +-
.../opennlp/tools/formats/masc/MascSentence.java | 61 ++++----
.../tools/formats/masc/MascSentenceParser.java | 9 +-
.../formats/masc/MascSentenceSampleStream.java | 8 +-
.../masc/MascSentenceSampleStreamFactory.java | 11 +-
.../java/opennlp/tools/formats/masc/MascToken.java | 37 +++--
.../tools/formats/masc/MascTokenSampleStream.java | 34 +++--
.../formats/masc/MascTokenSampleStreamFactory.java | 12 +-
.../java/opennlp/tools/formats/masc/MascWord.java | 12 +-
.../opennlp/tools/formats/masc/MascWordParser.java | 9 +-
.../opennlp/tools/postag/POSTaggerFactory.java | 2 +-
.../java/opennlp/tools/tokenize/TokenizerME.java | 4 +-
18 files changed, 211 insertions(+), 230 deletions(-)
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocument.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocument.java
index 0e7af1a2..01bea73f 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocument.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocument.java
@@ -17,6 +17,7 @@
package opennlp.tools.formats.masc;
+import java.io.BufferedInputStream;
import java.io.BufferedReader;
import java.io.IOException;
import java.io.InputStream;
@@ -54,16 +55,16 @@ public class MascDocument {
}
/**
- * Creates a MASC document with all of the stand-off annotations translated
into the internal
- * structure.
+ * Initializes a {@link MascDocument} with all the stand-off annotations
translated into the
+ * internal structure.
*
* @param path The path where the document header is.
- * @param f_primary The file with the raw corpus text.
- * @param f_seg The file with segmentation into quarks.
- * @param f_ne The file with named entities.
- * @param f_penn The file with tokenization and Penn POS tags produced
+ * @param f_primary The {@link InputStream file} with the raw corpus text.
+ * @param f_seg The {@link InputStream file} with segmentation into
quarks.
+ * @param f_ne The {@link InputStream file} with named entities.
+ * @param f_penn The {@link InputStream file} with tokenization and Penn
POS tags produced
* by GATE-5.0 ANNIE application.
- * @param f_s The file with sentence boundaries.
+ * @param f_s The {@link InputStream file} with sentence boundaries.
* @return A document containing the text and its annotations. Immutability
is not guaranteed yet.
* @throws IOException if the raw data cannot be read or the alignment of
the raw data
* with annotations fails
@@ -77,7 +78,7 @@ public class MascDocument {
List<Span> sentenceSpans = parseSentences(f_s);
List<MascSentence> sentences = combineAnnotations(text, sentenceSpans,
words);
- MascDocument doc = new MascDocument(path, sentences);
+ final MascDocument doc = new MascDocument(path, sentences);
// if the file has Penn POS tags, add them
if (f_penn != null) {
@@ -88,21 +89,21 @@ public class MascDocument {
doc.addNamedEntityTags(parseNamedEntity(f_ne));
}
- //todo: make the annotations immutable
- //todo: should we cleanup the document (e.g. remove sentences without
tokens?)
+ //TODO: make the annotations immutable
+ //TODO: should we cleanup the document (e.g. remove sentences without
tokens?)
return doc;
}
/**
- * Read in the corpus file text
+ * Reads in the corpus file text.
*
- * @param stream The corpus file
- * @return The text of the file
- * @throws IOException if anything goes wrong
+ * @param stream A valid, open {@link InputStream stream} for a corpus file.
+ *
+ * @return The text of the file.
+ * @throws IOException Thrown if IO errors occurred.
*/
private static String readText(InputStream stream) throws IOException {
- try {
- Reader reader = new BufferedReader(new InputStreamReader(stream,
StandardCharsets.UTF_8));
+ try (Reader reader = new BufferedReader(new InputStreamReader(stream,
StandardCharsets.UTF_8))) {
StringBuilder contents = new StringBuilder();
char[] buffer = new char[8192];
int read;
@@ -110,9 +111,6 @@ public class MascDocument {
contents.append(buffer, 0, read);
}
return contents.toString();
- } finally {
- // this may throw an exception
- stream.close();
}
}
@@ -120,52 +118,51 @@ public class MascDocument {
/**
* Parses the word segmentation stand-off annotation
*
- * @param f_seg The file with segmentation
+ * @param f_seg A valid, open {@link InputStream stream} for a file with
segmentation.
* @return A list of individual quarks, expressed as MascWord-s
- * @throws IOException if anything goes wrong
+ * @throws IOException Thrown if IO errors occurred.
*/
private static List<MascWord> parseWords(InputStream f_seg) throws
IOException {
- try {
+ try (BufferedInputStream bStream = new BufferedInputStream(f_seg)) {
SAXParser saxParser = XmlUtil.createSaxParser();
MascWordParser handler = new MascWordParser();
try {
- saxParser.parse(f_seg, handler);
+ saxParser.parse(bStream, handler);
} catch (SAXException e) {
throw new IOException("Could not parse the region annotation file");
}
return Collections.unmodifiableList(handler.getAnchors());
-
- } finally {
- f_seg.close();
}
}
/**
- * Parse the sentence annotation file, align it with the raw text
+ * Parses the sentence annotation file, align it with the raw text
*
- * @param f_s the sentence annotation file
- * @return the list of Spans delimiting each sentence
+ * @param f_s A valid, open {@link InputStream stream} for a sentence
annotation file.
+ * @return The {@link List<Span>} delimiting each sentence.
* @throws IOException if the sentence file cannot be parsed or closed
*/
private static List<Span> parseSentences(InputStream f_s) throws IOException
{
- try {
+ try (BufferedInputStream bStream = new BufferedInputStream(f_s)) {
SAXParser saxParser = XmlUtil.createSaxParser();
MascSentenceParser handler = new MascSentenceParser();
try {
- saxParser.parse(f_s, handler);
+ saxParser.parse(bStream, handler);
} catch (SAXException e) {
throw new IOException("Could not parse the sentence annotation file");
}
List<Span> anchors = handler.getAnchors();
- /*Filter out sentence overlaps.
- Keep only those sentences where sentence.end < nextsentence.beginning
- avoid deleting in the middle and repeatedly shifting the list by copying
into a new list*/
- //todo: can we know a priori, if we need this filtering?
+ /*
+ * Filter out sentence overlaps.
+ * Keep only those sentences where sentence.end < nextSentence.beginning
+ * avoid deleting in the middle and repeatedly shifting the list by
copying into a new list
+ */
+ //TODO: can we know a priori, if we need this filtering?
List<Span> filteredAnchors = new ArrayList<>();
for (int i = 0; i < anchors.size() - 1; i++) {
if (anchors.get(i).getEnd() < anchors.get(i + 1).getStart()) {
@@ -175,30 +172,28 @@ public class MascDocument {
filteredAnchors.add(anchors.get(anchors.size() - 1));
return Collections.unmodifiableList(filteredAnchors);
-
- } finally {
- f_s.close();
}
}
/**
- * Parses the Penn-POS (GATE5-ANNIE) stand-off annotation
+ * Parses the Penn-POS (GATE5-ANNIE) stand-off annotation.
*
- * @param f_penn The file with Penn POS tags
+ * @param f_penn A valid, open {@link InputStream stream} for a file with
Penn POS tags.
+ *
* @return A map of three sub-maps: tokenToTag, from Penn token ID (int) to
Penn POS-tag,
* tokenToBase, from Penn token ID (int) to the base and tokenToQuarks, from
Penn token ID
* (int) to a List of quark IDs contained in that token.
- * @throws IOException if anything goes wrong
+ * @throws IOException Thrown if IO errors occurred.
*/
- private static Map<String, Map> parsePennTags(InputStream f_penn) throws
IOException {
- Map<String, Map> tagsAndBases = new HashMap<>();
+ private static Map<String, Map<Integer, ?>> parsePennTags(InputStream
f_penn) throws IOException {
+ Map<String, Map<Integer, ?>> tagsAndBases = new HashMap<>();
- try {
+ try (BufferedInputStream bStream = new BufferedInputStream(f_penn)) {
SAXParser saxParser = XmlUtil.createSaxParser();
MascPennTagParser handler = new MascPennTagParser();
try {
- saxParser.parse(f_penn, handler);
+ saxParser.parse(bStream, handler);
} catch (SAXException e) {
throw new IOException("Could not parse the Penn tag annotation file");
}
@@ -208,28 +203,25 @@ public class MascDocument {
tagsAndBases.put("tokenToQuarks", handler.getTokenToQuarks());
return tagsAndBases;
-
- } finally {
- f_penn.close();
}
}
/**
- * Parses the named entity stand-off annotation
+ * Parses the named entity stand-off annotation.
*
- * @param f_ne The file with named entity annotations
+ * @param f_ne A valid, open {@link InputStream stream} for a file with
named entity annotations.
* @return A map with two sub-maps, entityIDtoEntityType, mapping entity ID
integers
* to entity type Strings, and entityIDsToTokens, mapping entity ID integers
to Penn
- * token ID integers
- * @throws IOException if anything goes wrong
+ * token ID integers.
+ * @throws IOException Thrown if IO errors occurred.
*/
- private static Map<String, Map> parseNamedEntity(InputStream f_ne) throws
IOException {
+ private static Map<String, Map<Integer, ?>> parseNamedEntity(InputStream
f_ne) throws IOException {
- try {
+ try (BufferedInputStream bStream = new BufferedInputStream(f_ne)) {
SAXParser saxParser = XmlUtil.createSaxParser();
MascNamedEntityParser handler = new MascNamedEntityParser();
try {
- saxParser.parse(f_ne, handler);
+ saxParser.parse(bStream, handler);
} catch (SAXException e) {
System.out.println(e.getMessage());
throw new IOException("Could not parse the named entity annotation
file");
@@ -237,13 +229,10 @@ public class MascDocument {
Map<Integer, String> entityIDtoEntityType =
handler.getEntityIDtoEntityType();
Map<Integer, List<Integer>> entityIDsToTokens =
handler.getEntityIDsToTokens();
- Map<String, Map> results = new HashMap<>();
+ Map<String, Map<Integer, ?>> results = new HashMap<>();
results.put("entityIDtoEntityType", entityIDtoEntityType);
results.put("entityIDsToTokens", entityIDsToTokens);
return results;
-
- } finally {
- f_ne.close();
}
}
@@ -251,14 +240,13 @@ public class MascDocument {
* Combines the raw text with annotations that every file should have.
*
* @param text The raw text.
- * @param sentenceSpans The spans definining individual sentences. Overlaps
are not permitted.
+ * @param sentenceSpans The spans defining individual sentences. Overlaps
are not permitted.
* @param words The quarks of the raw text.
* @return A list of sentences, each of which is a list of quarks. Some
quarks may belong to
* more than one sentence. Quarks which do not belong to a single sentence
are silently dropped.
* @throws IOException If sentences and quarks cannot be aligned.
*/
- private static List<MascSentence> combineAnnotations(String text,
- List<Span>
sentenceSpans,
+ private static List<MascSentence> combineAnnotations(String text, List<Span>
sentenceSpans,
List<MascWord> words)
throws IOException {
int wordIndex = 0;
@@ -270,21 +258,21 @@ public class MascDocument {
int sentenceStart = s.getStart();
int sentenceEnd = s.getEnd();
- //todo: is it okay that quarks can cross sentence boundary? What are
the implications?
+ // TODO: is it okay that quarks can cross sentence boundary? What are
the implications?
/*
- Allow quarks to cross sentence boundary.
- The decisive factor determining if a quark belongs to a sentence is if
they overlap.
- I.e. sent.getEnd() > quark.getStart() && sent.getStart() <
quark.getEnd()
+ * Allow quarks to cross sentence boundary.
+ * The decisive factor determining if a quark belongs to a sentence is
if they overlap.
+ * I.e. sent.getEnd() > quark.getStart() && sent.getStart() <
quark.getEnd()
*/
MascWord nextWord = words.get(wordIndex);
- //Find sentence beginning, should not be needed unless overlaps occur
+ // Find sentence beginning, should not be needed unless overlaps occur
while (sentenceStart < nextWord.getEnd() && wordIndex > 0) {
wordIndex--;
nextWord = words.get(wordIndex);
}
- //todo: can this be translated into Span's methods
.crosses()/.contains()?
- //find all quarks contained or crossing the span of that sentence
+ // TODO: can this be translated into Span's methods
.crosses()/.contains()?
+ // Find all quarks contained or crossing the span of that sentence
boolean sentenceOver = false;
while ((!sentenceOver) && wordIndex < wordCount) {
nextWord = words.get(wordIndex);
@@ -315,23 +303,23 @@ public class MascDocument {
sentences.add(sentence);
}
}
-
return Collections.unmodifiableList(sentences);
-
}
/**
- * Attach the named entity labels to individual tokens
+ * Attaches the named entity labels to individual tokens.
*
* @param namedEntities A map with two sub-maps, entityIDtoEntityType,
mapping entity ID integers
- * * to entity type Strings, and entityIDsToTokens,
mapping entity ID integers to Penn
- * * token ID integers
+ * to entity type Strings, and entityIDsToTokens,
mapping entity ID integers to Penn
+ * token ID integers
*/
- private void addNamedEntityTags(Map<String, Map> namedEntities) {
+ private void addNamedEntityTags(Map<String, Map<Integer, ?>> namedEntities) {
try {
- Map<Integer, String> entityIDtoEntityType =
namedEntities.get("entityIDtoEntityType");
- Map<Integer, List<Integer>> entityIDsToTokens =
namedEntities.get("entityIDsToTokens");
+ Map<Integer, String> entityIDtoEntityType =
+ (Map<Integer, String>) namedEntities.get("entityIDtoEntityType");
+ Map<Integer, List<Integer>> entityIDsToTokens =
+ (Map<Integer, List<Integer>>)
namedEntities.get("entityIDsToTokens");
for (MascSentence s : sentences) {
boolean success = s.addNamedEntities(entityIDtoEntityType,
entityIDsToTokens);
@@ -356,12 +344,12 @@ public class MascDocument {
* * tokenToBase, from Penn token ID (int) to the base and
tokenToQuarks, from Penn token ID
* * (int) to a List of quark IDs contained in that token.
*/
- private void addPennTags(Map<String, Map> tagMaps) throws IOException {
+ private void addPennTags(Map<String, Map<Integer, ?>> tagMaps) throws
IOException {
try {
// Extract individual mappings
- Map<Integer, String> tokenToTag = tagMaps.get("tokenToTag");
- Map<Integer, String> tokenToBase = tagMaps.get("tokenToBase");
- Map<Integer, int[]> tokenToQuarks = tagMaps.get("tokenToQuarks");
+ Map<Integer, String> tokenToTag = (Map<Integer, String>)
tagMaps.get("tokenToTag");
+ Map<Integer, String> tokenToBase = (Map<Integer, String>)
tagMaps.get("tokenToBase");
+ Map<Integer, int[]> tokenToQuarks = (Map<Integer, int[]>)
tagMaps.get("tokenToQuarks");
//Check that all tokens have at least one quark.
for (Map.Entry<Integer, int[]> token : tokenToQuarks.entrySet()) {
@@ -409,22 +397,25 @@ public class MascDocument {
/**
- * Check whether there is Penn tagging produced by GATE-5.0 ANNIE
+ * Checks whether there is Penn tagging produced by GATE-5.0 ANNIE.
*
- * @return true if this file has aligned tags/tokens
+ * @return {@code true} if this file has aligned tags/tokens, {@code false}
otherwise.
*/
public boolean hasPennTags() {
return hasPennTags;
}
+ /**
+ * Checks whether there is NER by GATE-5.0 ANNIE.
+ *
+ * @return {@code true} if this file has named entities, {@code false}
otherwise.
+ */
public boolean hasNamedEntities() {
return hasNamedEntities;
}
/**
- * Get next sentence.
- *
- * @return Next sentence or null if end of document reached.
+ * @return Retrieves the next sentence or {@code null} if end of document
reached.
*/
public MascSentence read() {
MascSentence next = null;
@@ -435,7 +426,7 @@ public class MascDocument {
}
/**
- * Return the reading of sentences to the beginning of the document.
+ * Resets the reading of sentences to the beginning of the document.
*/
public void reset() {
this.sentenceIterator = this.sentences.iterator();
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java
index 4dffcf43..96ec80d3 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascDocumentStream.java
@@ -44,7 +44,7 @@ public class MascDocumentStream implements
ObjectStream<MascDocument> {
/**
* A helper class to parse the header (.hdr) files.
*/
- private class HeaderHandler extends DefaultHandler {
+ private static class HeaderHandler extends DefaultHandler {
private HashMap<String, String> annotationFiles = null;
private String file = null;
private String fType = null;
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntityParser.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntityParser.java
index c1e22de0..f11ecdd2 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntityParser.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntityParser.java
@@ -31,9 +31,9 @@ import org.xml.sax.helpers.DefaultHandler;
*/
public class MascNamedEntityParser extends DefaultHandler {
- private Map<Integer, String> entityIDtoEntityType = new HashMap<>();
- private Map<Integer, List<Integer>> entityIDsToTokens = new HashMap<>();
- private Map<Integer, String> tokenToEntity = new HashMap<>();
+ private final Map<Integer, String> entityIDtoEntityType = new HashMap<>();
+ private final Map<Integer, List<Integer>> entityIDsToTokens = new
HashMap<>();
+ private final Map<Integer, String> tokenToEntity = new HashMap<>();
public Map<Integer, String> getEntityIDtoEntityType() {
return entityIDtoEntityType;
@@ -86,7 +86,6 @@ public class MascNamedEntityParser extends DefaultHandler {
System.out.println("[WARNING] One token assigned to different named
entity types.\n" +
"\tPenn-TokenID: " + tokenID + "\n\tToken types: \"" + type +
"\", \"" +
tokenToEntity.get(tokenID) + "\"\n\tKeeping only " + "\"type\"");
- int i = 0;
}
tokenToEntity.put(tokenID, type);
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStream.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStream.java
index dd7c6da9..826e5e44 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStream.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStream.java
@@ -27,13 +27,13 @@ import opennlp.tools.util.Span;
public class MascNamedEntitySampleStream extends
FilterObjectStream<MascDocument, NameSample> {
- MascDocument buffer;
+ private MascDocument buffer;
/**
- * Create a stream of named entity samples from a stream of MascDocuments
+ * Initializes {@link MascNamedEntitySampleStream} from a stream of {@link
MascDocument documents}.
*
- * @param samples a MascDocumentStream
- * @throws IOException
+ * @param samples A {@link ObjectStream<MascDocument>} of samples.
+ * @throws IOException Thrown if none of the documents has NE labels.
*/
public MascNamedEntitySampleStream(ObjectStream<MascDocument> samples)
throws IOException {
super(samples);
@@ -48,16 +48,18 @@ public class MascNamedEntitySampleStream extends
FilterObjectStream<MascDocument
}
/**
- * Get the next sample of named entities.
+ * Reads the next sample of named entities.
*
- * @return One sentence together with its named entity annotation
- * @throws IOException if the sample cannot be extracted
+ * @return One {@link NameSample sentence together with its named entity
annotation}.
+ * @throws IOException Thrown if the sample cannot be extracted
*/
+ @Override
public NameSample read() throws IOException {
- /* Read the documents one sentence at a time
- If the document is over, move to the next one
- If both document stream and sentence stream are over, return null
+ /*
+ * Read the documents one sentence at a time
+ * If the document is over, move to the next one
+ * If both document stream and sentence stream are over, return null
*/
try {
MascSentence sentence = buffer.read();
@@ -79,7 +81,7 @@ public class MascNamedEntitySampleStream extends
FilterObjectStream<MascDocument
Span[] namedEntitiesArray = new Span[namedEntities.size()];
namedEntities.toArray(namedEntitiesArray);
- //todo: should the user decide about clearAdaptiveData?
+ // TODO: should the user decide about clearAdaptiveData?
return new NameSample(tokensArray, namedEntitiesArray, true);
} catch (IOException e) {
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStreamFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStreamFactory.java
index 258cac96..1adf3535 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStreamFactory.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascNamedEntitySampleStreamFactory.java
@@ -38,15 +38,14 @@ public class MascNamedEntitySampleStreamFactory<P> extends
AbstractSampleStreamF
public static void registerFactory() {
StreamFactoryRegistry.registerFactory(NameSample.class,
MASC_FORMAT,
- new opennlp.tools.formats.masc.MascNamedEntitySampleStreamFactory<>(
-
opennlp.tools.formats.masc.MascNamedEntitySampleStreamFactory.Parameters.class));
+ new MascNamedEntitySampleStreamFactory<>(
+ MascNamedEntitySampleStreamFactory.Parameters.class));
}
@Override
public ObjectStream<NameSample> create(String[] args) {
- opennlp.tools.formats.masc.MascNamedEntitySampleStreamFactory.Parameters
params =
- ArgumentParser.parse(args,
-
opennlp.tools.formats.masc.MascNamedEntitySampleStreamFactory.Parameters.class);
+ MascNamedEntitySampleStreamFactory.Parameters params =
+ ArgumentParser.parse(args,
MascNamedEntitySampleStreamFactory.Parameters.class);
try {
FileFilter fileFilter = pathname ->
pathname.getName().contains(params.getFileFilter());
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStream.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStream.java
index 7d7b2958..d971c260 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStream.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStream.java
@@ -26,13 +26,13 @@ import opennlp.tools.util.ObjectStream;
public class MascPOSSampleStream extends FilterObjectStream<MascDocument,
POSSample> {
- MascDocument buffer;
-
+ private MascDocument buffer;
+
/**
- * Create a stream of POS-samples from a stream of MascDocuments.
+ * Initializes {@link MascPOSSampleStream} from a stream of {@link
MascDocument documents}.
*
- * @param samples A MascDocumentStream.
- * @throws IOException
+ * @param samples A {@link ObjectStream<MascDocument>} of samples.
+ * @throws IOException Thrown if none of the documents has POS tags.
*/
public MascPOSSampleStream(ObjectStream<MascDocument> samples) throws
IOException {
super(samples);
@@ -45,18 +45,20 @@ public class MascPOSSampleStream extends
FilterObjectStream<MascDocument, POSSam
e.getMessage());
}
}
-
+
/**
- * Get the next sample
+ * Reads the next sample.
*
- * @return One sentence together with its POS tags.
- * @throws IOException if anything goes wrong.
+ * @return One {@link POSSample sentence together with its POS tags}.
+ * @throws IOException Thrown if the sample cannot be extracted.
*/
+ @Override
public POSSample read() throws IOException {
- /* Read the documents one sentence at a time
- If the document is over, move to the next one
- If both document stream and sentence stream are over, return null
+ /*
+ * Read the documents one sentence at a time
+ * If the document is over, move to the next one
+ * If both document stream and sentence stream are over, return null
*/
try {
MascSentence sentence = buffer.read();
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStreamFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStreamFactory.java
index 85aba5d5..4e910c68 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStreamFactory.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascPOSSampleStreamFactory.java
@@ -38,15 +38,14 @@ public class MascPOSSampleStreamFactory<P> extends
AbstractSampleStreamFactory<P
public static void registerFactory() {
StreamFactoryRegistry.registerFactory(POSSample.class,
MASC_FORMAT,
- new opennlp.tools.formats.masc.MascPOSSampleStreamFactory<>(
-
opennlp.tools.formats.masc.MascPOSSampleStreamFactory.Parameters.class));
+ new MascPOSSampleStreamFactory<>(
+ MascPOSSampleStreamFactory.Parameters.class));
}
@Override
public ObjectStream<POSSample> create(String[] args) {
- opennlp.tools.formats.masc.MascPOSSampleStreamFactory.Parameters params =
- ArgumentParser.parse(args,
-
opennlp.tools.formats.masc.MascPOSSampleStreamFactory.Parameters.class);
+ MascPOSSampleStreamFactory.Parameters params =
+ ArgumentParser.parse(args,
MascPOSSampleStreamFactory.Parameters.class);
try {
FileFilter fileFilter = pathname ->
pathname.getName().contains(params.getFileFilter());
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentence.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentence.java
index ba4a1ad9..06c75d5a 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentence.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentence.java
@@ -54,12 +54,13 @@ public class MascSentence extends Span {
* Extract a quark by its key
*
* @param key The quark's ID
- * @return The quark reference
- * @throws IOException if not found in the document
+ * @return The {@link MascWord quark reference}.
+ *
+ * @throws IOException Thrown if the {@code key} was not found in the
document.
*/
protected MascWord get(int key) throws IOException {
- //We first check if this word is in the sentence
- //todo: evaluate the necessity: HashMaps are O(1), right?
+ // First, check if this word is in the sentence
+ // TODO: evaluate the necessity: HashMaps are O(1), right?
if (wordsById.containsKey(key)) {
return wordsById.get(key);
} else {
@@ -83,7 +84,7 @@ public class MascSentence extends Span {
private List<Span> namedEntities = new ArrayList<>();
/**
- * Create a MascSentence, containing its associated text and quarks
+ * Initializes a {@link MascSentence} containing its associated text and
quarks
*
* @param s Start of the sentence within the corpus file
* @param e End of the sentence within the corpus file
@@ -107,14 +108,15 @@ public class MascSentence extends Span {
}
/**
- * Add the Penn tokenization and POS tagging to the sentence
+ * Add the Penn tokenization and POS tagging to the sentence.
+ *
+ * @param tokenToQuarks A map from token ID to quarks in that token.
+ * @param quarkToTokens A map of quark IDs and the token IDs containing that
quark.
+ * @param tokenToBase Token ID to the token base.
+ * @param tokenToTag Token ID to the POS tag.
*
- * @param tokenToQuarks A map from token ID to quarks in that token
- * @param quarkToTokens A map of quark IDs and the token IDs containing that
quark
- * @param tokenToBase Token ID to the token base
- * @param tokenToTag Token ID to the POS tag
- * @return true if no issue encountered, false if tokens cross sentence
boundaries
- * @throws IOException If anything goes wrong
+ * @return {@code true} if no issue encountered, {@code false} if tokens
cross sentence boundaries.
+ * @throws IOException Thrown if IO errors occurred.
*/
boolean tokenizePenn(Map<Integer, int[]> tokenToQuarks,
Map<Integer, int[]> quarkToTokens,
@@ -151,7 +153,7 @@ public class MascSentence extends Span {
}
}
- /*Because there are some quarks which are parts of tokens outside
of a sentence
+ /*Because there are some quarks which are parts of tokens outside
a sentence
We need to check every time if that quark was actually assigned to
the sentence
If not, we need to extract it manually from the whole document*/
MascWord[] quarks = new MascWord[quarksOfToken.length]; //Get the
actual quark references
@@ -186,14 +188,15 @@ public class MascSentence extends Span {
*
* @param entityIDtoEntityType Maps the named entity ID to its type
* @param entityIDsToTokens A list of tokens covered by each named entity
- * @return true if all went well, false if named entities overlap
- * @throws IOException if anything goes wrong
+ *
+ * @return {@code true} if all went well, {@code false} if named entities
overlap.
+ * @throws IOException Thrown if IO errors occurred.
*/
boolean addNamedEntities(Map<Integer, String> entityIDtoEntityType,
Map<Integer, List<Integer>> entityIDsToTokens)
throws IOException {
boolean fileWithoutIssues = true;
if (sentenceTokens == null) {
- throw new IOException("Named entity labels provided for un untokenized
sentence.");
+ throw new IOException("Named entity labels provided for an un-tokenized
sentence.");
}
//for each named entity identify its span
@@ -264,27 +267,21 @@ public class MascSentence extends Span {
}
/**
- * Get the named entities
- *
- * @return List of named entities defined as token span, e.g. Span(1,3,
"org") for tokens [1,3)
+ * @return Retrieves the {@link List<Span> named entities}, e.g. {@code
Span(1,3, "org")} for tokens [1,3).
*/
public List<Span> getNamedEntities() {
return namedEntities;
}
/**
- * Get the sentence text
- *
- * @return Text of the sentence as defined by the sentence segmentation
annotation.
+ * @return Retrieves text of the sentence as defined by the sentence
segmentation annotation.
*/
public String getSentDetectText() {
return text.substring(getStart(), getEnd());
}
/**
- * Get the text of the sentence tokens
- *
- * @return Text of the sentence as defined by the tokens in it.
+ * @return Retrieves text of the sentence as defined by the tokens in it.
*/
public String getTokenText() {
if (sentenceTokens.isEmpty()) {
@@ -295,8 +292,6 @@ public class MascSentence extends Span {
}
/**
- * Get the text of the sentence tokens
- *
* @return The texts of the individual tokens in the sentence
*/
public List<String> getTokenStrings() {
@@ -310,9 +305,10 @@ public class MascSentence extends Span {
}
/**
- * Get the boundaries of individual tokens
+ * Retrieves the boundaries of individual tokens.
*
- * @return Spans representing the tokens of the sentence (according to Penn
tokenization)
+ * @return The {@link List<Span> spans} representing the tokens of the
sentence,
+ * according to Penn tokenization.
*/
public List<Span> getTokensSpans() {
@@ -327,10 +323,9 @@ public class MascSentence extends Span {
}
/**
- * Get the tags of tokens in the sentence
- *
- * @return A list of individual tags
- * @throws IOException if used on an untokenized sentence
+ * @return Get the (individual) tags of tokens in the sentence.
+ *
+ * @throws IOException Thrown if used on an un-tokenized sentence.
*/
public List<String> getTags() throws IOException {
List<String> tags = new ArrayList<>();
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceParser.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceParser.java
index 7a679a0e..b970d6d2 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceParser.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceParser.java
@@ -27,11 +27,11 @@ import org.xml.sax.helpers.DefaultHandler;
import opennlp.tools.util.Span;
/**
- * A class to parse the sentence segmentation stand-off annotation
+ * A class to parse the sentence segmentation stand-off annotation.
*/
class MascSentenceParser extends DefaultHandler {
- private List<Span> sentenceAnchors = null;
+ private final List<Span> sentenceAnchors = new ArrayList<>();
public List<Span> getAnchors() {
return sentenceAnchors;
@@ -49,11 +49,6 @@ class MascSentenceParser extends DefaultHandler {
int left = Integer.parseInt(anchors[0]);
int right = Integer.parseInt(anchors[1]);
- // initialize list
- if (sentenceAnchors == null) {
- sentenceAnchors = new ArrayList<Span>();
- }
-
sentenceAnchors.add(new Span(left, right));
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStream.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStream.java
index 7e8a5dbc..39248390 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStream.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStream.java
@@ -39,10 +39,12 @@ public class MascSentenceSampleStream extends
FilterObjectStream<MascDocument, S
}
/**
- * Reads a new sample of sentences
+ * Reads a new {@link SentenceSample sample of sentences}.
*
- * @return The specified number of sentences. If fewer left, then return
whatever is left.
- * @throws IOException
+ * @return The {@link SentenceSample specified number of sentences}.
+ * If fewer left, then return whatever is left.
+ *
+ * @throws IOException Thrown if IO errors occurred during read operation.
*/
@Override
public SentenceSample read() throws IOException {
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStreamFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStreamFactory.java
index ce55cfe3..d44bf780 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStreamFactory.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascSentenceSampleStreamFactory.java
@@ -37,17 +37,14 @@ public class MascSentenceSampleStreamFactory<P> extends
AbstractSampleStreamFact
}
public static void registerFactory() {
- StreamFactoryRegistry.registerFactory(SentenceSample.class,
- MASC_FORMAT,
- new opennlp.tools.formats.masc.MascSentenceSampleStreamFactory<>(
-
opennlp.tools.formats.masc.MascSentenceSampleStreamFactory.Parameters.class));
+ StreamFactoryRegistry.registerFactory(SentenceSample.class, MASC_FORMAT,
+ new
MascSentenceSampleStreamFactory<>(MascSentenceSampleStreamFactory.Parameters.class));
}
@Override
public ObjectStream<SentenceSample> create(String[] args) {
- opennlp.tools.formats.masc.MascSentenceSampleStreamFactory.Parameters
params =
- ArgumentParser.parse(args,
-
opennlp.tools.formats.masc.MascSentenceSampleStreamFactory.Parameters.class);
+ MascSentenceSampleStreamFactory.Parameters params =
+ ArgumentParser.parse(args,
MascSentenceSampleStreamFactory.Parameters.class);
try {
FileFilter fileFilter = pathname ->
pathname.getName().contains(params.getFileFilter());
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascToken.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascToken.java
index 593315cf..5fa96eeb 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascToken.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascToken.java
@@ -19,6 +19,9 @@ package opennlp.tools.formats.masc;
import opennlp.tools.util.Span;
+/**
+ * A specialized {@link Span} to express tokens in {@link MascDocument
documents}.
+ */
public class MascToken extends Span {
private static final long serialVersionUID = -780646706788037041L;
@@ -28,14 +31,18 @@ public class MascToken extends Span {
private final MascWord[] quarks;
/**
- * Create a MascToken, which may combine multiple quarks
+ * Initializes a {@link MascToken} which may combine multiple quarks.
+ *
+ * @param s The start of the token in the corpus file.
+ * Must be equal to or greater than {@code 0}.
+ * @param e The end of the token in the corpus file.
+ * Must be equal to or greater than {@code 0} and be greater
than {@code s}.
+ * @param pennId The ID of the token as assigned by the Penn stand-off
annotation.
+ * @param pos The POS-tag.
+ * @param base The base form.
+ * @param quarks The {@link MascWord array of Quarks} contained in the token.
*
- * @param s The start of the token in the corpus file
- * @param e The end of the token in the corpus file
- * @param pennId The ID of the token as assigned by the Penn stand-off
annotation
- * @param pos The POS-tag
- * @param base The base form
- * @param quarks Quarks contained in the token
+ * @throws IllegalArgumentException Thrown if one of the parameters are
invalid.
*/
public MascToken(int s, int e, int pennId, String pos, String base,
MascWord[] quarks) {
super(s, e);
@@ -46,36 +53,28 @@ public class MascToken extends Span {
}
/**
- * Get ID of the token
- *
- * @return the ID
+ * @return Retrieves the ID of the token.
*/
public int getTokenId() {
return tokenId;
}
/**
- * Get the base form
- *
- * @return the base form
+ * @return Retrieves the base form.
*/
public String getBase() {
return base;
}
/**
- * Get the POS tag
- *
- * @return POS tag
+ * @return Retrieves the POS tag.
*/
public String getPos() {
return pos;
}
/**
- * Get quarks of the token
- *
- * @return Array of quark references
+ * @return Retrieves quarks of the token.
*/
public MascWord[] getQuarks() {
return quarks;
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStream.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStream.java
index 93fd21d4..fccb15ef 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStream.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStream.java
@@ -27,8 +27,14 @@ import opennlp.tools.util.Span;
public class MascTokenSampleStream extends FilterObjectStream<MascDocument,
TokenSample> {
- MascDocument buffer;
-
+ private MascDocument buffer;
+
+ /**
+ * Initializes a {@link MascTokenSampleStream}.
+ *
+ * @param samples The {@link ObjectStream<MascDocument>} samples to process.
+ * @throws IOException Thrown if non of the {@link MascDocument documents}
had Penn tokenization.
+ */
public MascTokenSampleStream(ObjectStream<MascDocument> samples) throws
IOException {
super(samples);
try {
@@ -41,16 +47,18 @@ public class MascTokenSampleStream extends
FilterObjectStream<MascDocument, Toke
}
}
+ @Override
public TokenSample read() throws IOException {
- /* Read the documents one sentence at a time
- If the document is over, move to the next one
- If both document stream and sentence stream are over, return null
+ /*
+ * Read the documents one sentence at a time
+ * If the document is over, move to the next one
+ * If both document stream and sentence stream are over, return null
*/
try {
boolean sentenceFound = true;
String sentenceString;
- List<Span> tokensSpans;
+ List<Span> tokenSpans;
MascSentence sentence;
do {
sentence = buffer.read();
@@ -65,23 +73,23 @@ public class MascTokenSampleStream extends
FilterObjectStream<MascDocument, Toke
}
sentenceString = sentence.getTokenText();
- tokensSpans = sentence.getTokensSpans();
+ tokenSpans = sentence.getTokensSpans();
if (sentenceString.length() == 0) {
System.err.println("[WARNING] Zero sentence found: " +
"there is a sentence without any tokens.");
System.err.println(sentenceString);
- System.err.println(tokensSpans.toString());
+ System.err.println(tokenSpans.toString());
sentenceFound = false;
}
- for (int i = 0; i < tokensSpans.size(); i++) {
- Span t = tokensSpans.get(i);
+ for (int i = 0; i < tokenSpans.size(); i++) {
+ Span t = tokenSpans.get(i);
if (t.getEnd() - t.getStart() == 0) {
System.err.println("[WARNING] Zero token found: " +
"there is a token without any quarks.");
System.err.println(sentenceString);
- System.err.println(tokensSpans.toString());
+ System.err.println(tokenSpans);
sentenceFound = false;
}
}
@@ -89,8 +97,8 @@ public class MascTokenSampleStream extends
FilterObjectStream<MascDocument, Toke
} while (!sentenceFound);
- Span[] tokensSpansArray = new Span[tokensSpans.size()];
- tokensSpans.toArray(tokensSpansArray);
+ Span[] tokensSpansArray = new Span[tokenSpans.size()];
+ tokenSpans.toArray(tokensSpansArray);
return new TokenSample(sentenceString, tokensSpansArray);
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStreamFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStreamFactory.java
index 99bf1f58..c58eb133 100644
---
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStreamFactory.java
+++
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascTokenSampleStreamFactory.java
@@ -32,24 +32,20 @@ public class MascTokenSampleStreamFactory<P> extends
AbstractSampleStreamFactory
public static final String MASC_FORMAT = "masc";
-
protected MascTokenSampleStreamFactory(Class<P> params) {
super(params);
}
public static void registerFactory() {
- StreamFactoryRegistry.registerFactory(TokenSample.class,
- MASC_FORMAT,
- new opennlp.tools.formats.masc.MascTokenSampleStreamFactory<>(
-
opennlp.tools.formats.masc.MascTokenSampleStreamFactory.Parameters.class));
+ StreamFactoryRegistry.registerFactory(TokenSample.class, MASC_FORMAT,
+ new
MascTokenSampleStreamFactory<>(MascTokenSampleStreamFactory.Parameters.class));
}
@Override
public ObjectStream<TokenSample> create(String[] args) {
- opennlp.tools.formats.masc.MascTokenSampleStreamFactory.Parameters params =
- ArgumentParser.parse(args,
-
opennlp.tools.formats.masc.MascTokenSampleStreamFactory.Parameters.class);
+ MascTokenSampleStreamFactory.Parameters params =
+ ArgumentParser.parse(args,
MascTokenSampleStreamFactory.Parameters.class);
try {
FileFilter fileFilter = pathname ->
pathname.getName().contains(params.getFileFilter());
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWord.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWord.java
index 1f3cffc3..916f6d2d 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWord.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWord.java
@@ -25,11 +25,15 @@ public class MascWord extends Span {
private final int id;
/**
- * Saves one of MASC's quarks - basic-level units (may be sub-word)
+ * Holds one of MASC's quarks, that is: basic-level units (may be sub-word).
*
- * @param s The beginning of the word in the corpus file
- * @param e The end of the word in the corpus file
- * @param id The id as assigned by the stand-off annotation
+ * @param s The beginning of the word in the corpus file.
+ * Must be equal to or greater than {@code 0}.
+ * @param e The end of the word in the corpus file.
+ * Must be equal to or greater than {@code 0} and be greater than
{@code s}.
+ * @param id The id as assigned by the stand-off annotation.
+ *
+ * @throws IllegalArgumentException Thrown if one of the parameters are
invalid.
*/
public MascWord(int s, int e, int id) {
super(s, e);
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWordParser.java
b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWordParser.java
index db57f82d..ea1cb3b7 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWordParser.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/formats/masc/MascWordParser.java
@@ -25,11 +25,11 @@ import org.xml.sax.SAXException;
import org.xml.sax.helpers.DefaultHandler;
/**
- * Class to parse the word ("quark") segmentation stand-off annotation
+ * Class to parse the word ("quark") segmentation stand-off annotation.
*/
class MascWordParser extends DefaultHandler {
- private List<MascWord> wordAnchors = null;
+ private final List<MascWord> wordAnchors = new ArrayList<>();
public List<MascWord> getAnchors() {
return wordAnchors;
@@ -48,11 +48,6 @@ class MascWordParser extends DefaultHandler {
int left = Integer.parseInt(anchors[0]);
int right = Integer.parseInt(anchors[1]);
- // initialize list
- if (wordAnchors == null) {
- wordAnchors = new ArrayList<MascWord>();
- }
-
wordAnchors.add(new MascWord(left, right, id));
}
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
index 94fcd500..9efffbf6 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/postag/POSTaggerFactory.java
@@ -136,7 +136,7 @@ public class POSTaggerFactory extends BaseToolFactory {
* Note:
* The generators are created on every call to this method.
*
- * @return the feature generator or {@link null} if there is no descriptor
in the model
+ * @return the feature generator or {@code null} if there is no descriptor
in the model
*/
public AdaptiveFeatureGenerator createFeatureGenerators() {
diff --git
a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
index 10086e9b..5a30dd60 100644
--- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
+++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/TokenizerME.java
@@ -53,7 +53,7 @@ import opennlp.tools.util.TrainingParameters;
* must be instantiated which can share one {@link TokenizerModel} instance
* to safe memory.
* <p>
- * To train a new model, the {@link #train(ObjectStream, TokenizerFactory,
TrainingParameters) method
+ * To train a new model, the {@link #train(ObjectStream, TokenizerFactory,
TrainingParameters)} method
* can be used.
* <p>
* Sample usage:
@@ -69,8 +69,6 @@ import opennlp.tools.util.TrainingParameters;
* <br>
* String tokens[] = tokenizer.tokenize("A sentence to be tokenized.");
* </code>
- * <p>
- *
* @see Tokenizer
* @see TokenizerModel
* @see TokenSample