This is an automated email from the ASF dual-hosted git repository. mawiesne pushed a commit to branch OPENNLP-1757-Expose-probs()-method-in-thread-safe-probabilistic-ME-classes in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit 0ebd9d10dc1fcd717f889a87d1d1b24deeef6db3 Author: Martin Wiesner <martin.wies...@hs-heilbronn.de> AuthorDate: Mon Jul 7 21:49:00 2025 +0200 OPENNLP-1757: Expose probs() method in thread-safe probabilistic ME classes - adds Probabilistic marker interface to API - adjusts classic ME related thread-safe classes to implement common probs() method via Probabilistic interface - declares non-common probs-like methods 'getSentenceProbabilities' and 'getTokenProbabilities' deprecated as 'probs()' must now be implemented by all ME classes. - improves JavaDoc along the path --- .../main/java/opennlp/tools/ml/Probabilistic.java | 32 ++++++++++++++++++++++ .../main/java/opennlp/tools/chunker/ChunkerME.java | 13 ++++++--- .../opennlp/tools/chunker/ThreadSafeChunkerME.java | 15 +++++++--- .../langdetect/ThreadSafeLanguageDetectorME.java | 7 +++-- .../opennlp/tools/lemmatizer/LemmatizerME.java | 12 +++++--- .../tools/lemmatizer/ThreadSafeLemmatizerME.java | 15 +++++++--- .../java/opennlp/tools/namefind/NameFinderME.java | 12 ++++++-- .../tools/namefind/ThreadSafeNameFinderME.java | 15 +++++++--- .../java/opennlp/tools/postag/POSTaggerME.java | 23 ++++++++++------ .../tools/postag/ThreadSafePOSTaggerME.java | 16 ++++++++--- .../tools/sentdetect/SentenceDetectorME.java | 27 ++++++++++++++---- .../sentdetect/ThreadSafeSentenceDetectorME.java | 28 +++++++++++++------ .../tools/tokenize/ThreadSafeTokenizerME.java | 22 +++++++++++---- .../java/opennlp/tools/tokenize/TokenizerME.java | 26 ++++++++++++++++-- .../sentdetect/SentenceDetectorMEDutchTest.java | 6 ++-- .../sentdetect/SentenceDetectorMEFrenchTest.java | 4 +-- .../sentdetect/SentenceDetectorMEGermanTest.java | 8 +++--- .../tools/sentdetect/SentenceDetectorMEIT.java | 14 +++++----- .../sentdetect/SentenceDetectorMEItalianTest.java | 4 +-- .../sentdetect/SentenceDetectorMEPolishTest.java | 4 +-- .../SentenceDetectorMEPortugueseTest.java | 4 +-- .../sentdetect/SentenceDetectorMESpanishTest.java | 4 +-- .../tools/sentdetect/SentenceDetectorMETest.java | 16 +++++------ .../opennlp/uima/sentdetect/SentenceDetector.java | 2 +- .../main/java/opennlp/uima/tokenize/Tokenizer.java | 2 +- 25 files changed, 238 insertions(+), 93 deletions(-) diff --git a/opennlp-api/src/main/java/opennlp/tools/ml/Probabilistic.java b/opennlp-api/src/main/java/opennlp/tools/ml/Probabilistic.java new file mode 100644 index 00000000..87025681 --- /dev/null +++ b/opennlp-api/src/main/java/opennlp/tools/ml/Probabilistic.java @@ -0,0 +1,32 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.ml; + +/** + * A marker interface for classes with probabilistic capabilities. + */ +public interface Probabilistic { + + /** + * Retrieves the probabilities of the last decoded sequence. + * + * @return An array with the same number of probabilities as tokens were sent to + * the computational method when it was last called. + */ + double[] probs(); +} diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/chunker/ChunkerME.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/chunker/ChunkerME.java index fc47b885..ee3f8103 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/chunker/ChunkerME.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/chunker/ChunkerME.java @@ -24,6 +24,7 @@ import java.util.Map; import opennlp.tools.ml.BeamSearch; import opennlp.tools.ml.EventTrainer; +import opennlp.tools.ml.Probabilistic; import opennlp.tools.ml.SequenceTrainer; import opennlp.tools.ml.TrainerFactory; import opennlp.tools.ml.TrainerFactory.TrainerType; @@ -40,10 +41,13 @@ import opennlp.tools.util.TokenTag; import opennlp.tools.util.TrainingParameters; /** - * The class represents a maximum-entropy-based {@link Chunker}. This chunker can be used to + * The class represents a maximum-entropy-based {@link Chunker}. A chunker can be used to * find flat structures based on sequence inputs such as noun phrases or named entities. + * + * @see Chunker + * @see Probabilistic */ -public class ChunkerME implements Chunker { +public class ChunkerME implements Chunker, Probabilistic { public static final int DEFAULT_BEAM_SIZE = 10; @@ -128,12 +132,13 @@ public class ChunkerME implements Chunker { } /** - * Returns an array with the probabilities of the last decoded sequence. The - * sequence was determined based on the previous call to {@link #chunk(String[], String[])}. + * {@inheritDoc} + * The sequence was determined based on the previous call to {@link #chunk(String[], String[])}. * * @return An array with the same number of probabilities as tokens when * {@link ChunkerME#chunk(String[], String[])} was last called. */ + @Override public double[] probs() { return bestSequence.getProbs(); } diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/chunker/ThreadSafeChunkerME.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/chunker/ThreadSafeChunkerME.java index 5d92ba21..48d76f5f 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/chunker/ThreadSafeChunkerME.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/chunker/ThreadSafeChunkerME.java @@ -18,27 +18,30 @@ package opennlp.tools.chunker; import opennlp.tools.commons.ThreadSafe; +import opennlp.tools.ml.Probabilistic; import opennlp.tools.util.Sequence; import opennlp.tools.util.Span; /** * A thread-safe version of the {@link ChunkerME}. Using it is completely transparent. * You can use it in a single-threaded context as well, it only incurs a minimal overhead. - * - * @implNote + * <p> + * <b>Note:</b><br/> * This implementation uses a {@link ThreadLocal}. Although the implementation is * lightweight because the model is not duplicated, if you have many long-running threads, * you may run into memory problems. * <p> * Be careful when using this in a Jakarta EE application, for example. * </p> - * The user is responsible for clearing the {@link ThreadLocal}. + * The user is responsible for clearing the {@link ThreadLocal} + * via calling {@link #close()}. * * @see Chunker * @see ChunkerME + * @see Probabilistic */ @ThreadSafe -public class ThreadSafeChunkerME implements Chunker, AutoCloseable { +public class ThreadSafeChunkerME implements Chunker, Probabilistic, AutoCloseable { private final ChunkerModel model; @@ -88,4 +91,8 @@ public class ThreadSafeChunkerME implements Chunker, AutoCloseable { threadLocal.remove(); } + @Override + public double[] probs() { + return getChunker().probs(); + } } diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/langdetect/ThreadSafeLanguageDetectorME.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/langdetect/ThreadSafeLanguageDetectorME.java index c8058d65..6b545572 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/langdetect/ThreadSafeLanguageDetectorME.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/langdetect/ThreadSafeLanguageDetectorME.java @@ -22,15 +22,16 @@ import opennlp.tools.commons.ThreadSafe; /** * A thread-safe version of the {@link LanguageDetectorME}. Using it is completely transparent. * You can use it in a single-threaded context as well, it only incurs a minimal overhead. - * - * @implNote + * <p> + * <b>Note:</b><br/> * This implementation uses a {@link ThreadLocal}. Although the implementation is * lightweight because the model is not duplicated, if you have many long-running threads, * you may run into memory problems. * <p> * Be careful when using this in a Jakarta EE application, for example. * </p> - * The user is responsible for clearing the {@link ThreadLocal}. + * The user is responsible for clearing the {@link ThreadLocal} + * via calling {@link #close()}. * * @see LanguageDetector * @see LanguageDetectorME diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java index 87aee940..5b4282f6 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/lemmatizer/LemmatizerME.java @@ -27,6 +27,7 @@ import java.util.Map; import opennlp.tools.ml.BeamSearch; import opennlp.tools.ml.EventModelSequenceTrainer; import opennlp.tools.ml.EventTrainer; +import opennlp.tools.ml.Probabilistic; import opennlp.tools.ml.SequenceTrainer; import opennlp.tools.ml.TrainerFactory; import opennlp.tools.ml.TrainerFactory.TrainerType; @@ -50,8 +51,10 @@ import opennlp.tools.util.TrainingParameters; * Towards a Machine-Learning Architecture for Lexical Functional Grammar Parsing. * </a> PhD dissertation, Dublin City University * + * @see Lemmatizer + * @see Probabilistic */ -public class LemmatizerME implements Lemmatizer { +public class LemmatizerME implements Lemmatizer, Probabilistic { public static final int LEMMA_NUMBER = 29; public static final int DEFAULT_BEAM_SIZE = 3; @@ -100,8 +103,7 @@ public class LemmatizerME implements Lemmatizer { } @Override - public List<List<String>> lemmatize(List<String> toks, - List<String> tags) { + public List<List<String>> lemmatize(List<String> toks, List<String> tags) { String[] tokens = toks.toArray(new String[0]); String[] posTags = tags.toArray(new String[0]); String[][] allLemmas = predictLemmas(LEMMA_NUMBER, tokens, posTags); @@ -225,13 +227,15 @@ public class LemmatizerME implements Lemmatizer { } /** - * Returns an array with the probabilities of the last decoded sequence. + * {@inheritDoc} + * * The sequence was determined based on the previous call to * {@link #lemmatize(String[], String[])}. * * @return An array with the same number of probabilities as tokens were sent to * {@link #lemmatize(String[], String[])} when it was last called. */ + @Override public double[] probs() { return bestSequence.getProbs(); } diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java index fc3aba08..3835d8f1 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java @@ -20,25 +20,27 @@ package opennlp.tools.lemmatizer; import java.util.List; import opennlp.tools.commons.ThreadSafe; +import opennlp.tools.ml.Probabilistic; /** * A thread-safe version of the {@link LemmatizerME}. Using it is completely transparent. * You can use it in a single-threaded context as well, it only incurs a minimal overhead. - * - * @implNote + * <p> + * <b>Note:</b><br/> * This implementation uses a {@link ThreadLocal}. Although the implementation is * lightweight because the model is not duplicated, if you have many long-running threads, * you may run into memory problems. * <p> * Be careful when using this in a Jakarta EE application, for example. * </p> - * The user is responsible for clearing the {@link ThreadLocal}. + * The user is responsible for clearing the {@link ThreadLocal} + * via calling {@link #close()}. * * @see Lemmatizer * @see LemmatizerME */ @ThreadSafe -public class ThreadSafeLemmatizerME implements Lemmatizer, AutoCloseable { +public class ThreadSafeLemmatizerME implements Lemmatizer, Probabilistic, AutoCloseable { private final LemmatizerModel model; @@ -73,6 +75,11 @@ public class ThreadSafeLemmatizerME implements Lemmatizer, AutoCloseable { return getLemmatizer().lemmatize(toks, tags); } + @Override + public double[] probs() { + return getLemmatizer().probs(); + } + @Override public void close() { threadLocal.remove(); diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/namefind/NameFinderME.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/namefind/NameFinderME.java index 04844702..37e04382 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/namefind/NameFinderME.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/namefind/NameFinderME.java @@ -31,6 +31,7 @@ import opennlp.tools.ml.AlgorithmType; import opennlp.tools.ml.BeamSearch; import opennlp.tools.ml.EventModelSequenceTrainer; import opennlp.tools.ml.EventTrainer; +import opennlp.tools.ml.Probabilistic; import opennlp.tools.ml.SequenceTrainer; import opennlp.tools.ml.TrainerFactory; import opennlp.tools.ml.TrainerFactory.TrainerType; @@ -49,8 +50,11 @@ import opennlp.tools.util.featuregen.WindowFeatureGenerator; /** * A maximum-entropy-based {@link TokenNameFinder name finder} implementation. + * + * @see Probabilistic + * @see TokenNameFinder */ -public class NameFinderME implements TokenNameFinder { +public class NameFinderME implements TokenNameFinder, Probabilistic { private static final String[][] EMPTY = new String[0][0]; public static final int DEFAULT_BEAM_SIZE = 3; @@ -136,12 +140,14 @@ public class NameFinderME implements TokenNameFinder { } /** - * Retrieves the probabilities of the last decoded sequence. The - * sequence was determined based on the previous call to {@link #find(String[])}. + * {@inheritDoc} + * + * The sequence was determined based on the previous call to {@link #find(String[])}. * * @return An array with the same number of probabilities as tokens were sent * to {@link #find(String[])} when it was last called. */ + @Override public double[] probs() { return bestSequence.getProbs(); } diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/namefind/ThreadSafeNameFinderME.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/namefind/ThreadSafeNameFinderME.java index e820605c..0da6f964 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/namefind/ThreadSafeNameFinderME.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/namefind/ThreadSafeNameFinderME.java @@ -18,26 +18,28 @@ package opennlp.tools.namefind; import opennlp.tools.commons.ThreadSafe; +import opennlp.tools.ml.Probabilistic; import opennlp.tools.util.Span; /** * A thread-safe version of {@link NameFinderME}. Using it is completely transparent. * You can use it in a single-threaded context as well, it only incurs a minimal overhead. - * - * @implNote + * <p> + * <b>Note:</b><br/> * This implementation uses a {@link ThreadLocal}. Although the implementation is * lightweight because the model is not duplicated, if you have many long-running threads, * you may run into memory problems. * <p> * Be careful when using this in a Jakarta EE application, for example. * </p> - * The user is responsible for clearing the {@link ThreadLocal}. + * The user is responsible for clearing the {@link ThreadLocal} via calling {@link #close()}. * * @see NameFinderME + * @see Probabilistic * @see TokenNameFinder */ @ThreadSafe -public class ThreadSafeNameFinderME implements TokenNameFinder, AutoCloseable { +public class ThreadSafeNameFinderME implements TokenNameFinder, Probabilistic, AutoCloseable { private final TokenNameFinderModel model; @@ -68,6 +70,11 @@ public class ThreadSafeNameFinderME implements TokenNameFinder, AutoCloseable { return getNameFinder().find(tokens); } + @Override + public double[] probs() { + return getNameFinder().probs(); + } + @Override public void clearAdaptiveData() { getNameFinder().clearAdaptiveData(); diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/postag/POSTaggerME.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/postag/POSTaggerME.java index fbe097da..e46cf194 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/postag/POSTaggerME.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/postag/POSTaggerME.java @@ -33,6 +33,7 @@ import opennlp.tools.dictionary.Dictionary; import opennlp.tools.ml.BeamSearch; import opennlp.tools.ml.EventModelSequenceTrainer; import opennlp.tools.ml.EventTrainer; +import opennlp.tools.ml.Probabilistic; import opennlp.tools.ml.SequenceTrainer; import opennlp.tools.ml.TrainerFactory; import opennlp.tools.ml.TrainerFactory.TrainerType; @@ -59,8 +60,9 @@ import opennlp.tools.util.featuregen.StringPattern; * @see POSModel * @see POSTagFormat * @see POSTagger + * @see Probabilistic */ -public class POSTaggerME implements POSTagger { +public class POSTaggerME implements POSTagger, Probabilistic { private static final Logger logger = LoggerFactory.getLogger(POSTaggerME.class); @@ -245,8 +247,14 @@ public class POSTaggerME implements POSTagger { } /** - * @return An array with the probabilities for each tag of the last tagged sentence. + * {@inheritDoc} + * + * The sequence was determined based on the previous call to {@link #tag(String[])}. + * + * @return An array with the same number of probabilities as tokens were sent + * to {@link #tag(String[])} when it was last called. */ + @Override public double[] probs() { return bestSequence.getProbs(); } @@ -324,7 +332,7 @@ public class POSTaggerME implements POSTagger { SequenceTrainer<TrainingParameters> trainer = TrainerFactory.getSequenceModelTrainer( mlParams, manifestInfoEntries); - // TODO: This will probably cause issue, since the feature generator uses the outcomes array + // TODO: This will probably cause issues, since the feature generator uses the outcomes array POSSampleSequenceStream ss = new POSSampleSequenceStream(samples, contextGenerator); seqPosModel = trainer.train(ss); @@ -340,7 +348,7 @@ public class POSTaggerME implements POSTagger { } /** - * Constructs a {@link Dictionary nGram dictionary} from an {@link ObjectStream} of samples. + * Constructs an {@link Dictionary nGram dictionary} from an {@link ObjectStream} of samples. * * @param samples The {@link ObjectStream} to process. * @param cutoff A non-negative cut-off value. @@ -379,8 +387,7 @@ public class POSTaggerME implements POSTagger { logger.info("Expanding POS Dictionary ..."); long start = System.nanoTime(); - // the data structure will store the word, the tag, and the number of - // occurrences + // the data structure will store the word, the tag, and the number of occurrences Map<String, Map<String, AtomicInteger>> newEntries = new HashMap<>(); POSSample sample; while ((sample = samples.read()) != null) { @@ -421,8 +428,8 @@ public class POSTaggerME implements POSTagger { } } - // now we check if the word + tag pairs have enough occurrences, if yes we - // add it to the dictionary + // now we check if the word + tag pairs have enough occurrences, + // if yes we add it to the dictionary for (Entry<String, Map<String, AtomicInteger>> wordEntry : newEntries .entrySet()) { List<String> tagsForWord = new ArrayList<>(); diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java index 39ad425e..cb8fd13d 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java @@ -20,6 +20,7 @@ package opennlp.tools.postag; import java.io.IOException; import opennlp.tools.commons.ThreadSafe; +import opennlp.tools.ml.Probabilistic; import opennlp.tools.models.ModelType; import opennlp.tools.util.DownloadUtil; import opennlp.tools.util.Sequence; @@ -27,21 +28,23 @@ import opennlp.tools.util.Sequence; /** * A thread-safe version of the {@link POSTaggerME}. Using it is completely transparent. * You can use it in a single-threaded context as well, it only incurs a minimal overhead. - * - * @implNote + * <p> + * <b>Note:</b><br/> * This implementation uses a {@link ThreadLocal}. Although the implementation is * lightweight because the model is not duplicated, if you have many long-running threads, * you may run into memory problems. * <p> * Be careful when using this in a Jakarta EE application, for example. * </p> - * The user is responsible for clearing the {@link ThreadLocal}. + * The user is responsible for clearing the {@link ThreadLocal} + * via calling {@link #close()}. * * @see POSTagger * @see POSTaggerME + * @see Probabilistic */ @ThreadSafe -public class ThreadSafePOSTaggerME implements POSTagger, AutoCloseable { +public class ThreadSafePOSTaggerME implements POSTagger, Probabilistic, AutoCloseable { private final POSModel model; @@ -122,6 +125,11 @@ public class ThreadSafePOSTaggerME implements POSTagger, AutoCloseable { return getTagger().topKSequences(sentence, additionaContext); } + @Override + public double[] probs() { + return getTagger().probs(); + } + @Override public void close() { threadLocal.remove(); diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java index bb48c7b2..e627f3cd 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/sentdetect/SentenceDetectorME.java @@ -28,6 +28,7 @@ import java.util.Set; import opennlp.tools.dictionary.Dictionary; import opennlp.tools.ml.ArrayMath; import opennlp.tools.ml.EventTrainer; +import opennlp.tools.ml.Probabilistic; import opennlp.tools.ml.TrainerFactory; import opennlp.tools.ml.model.Event; import opennlp.tools.ml.model.MaxentModel; @@ -46,7 +47,7 @@ import opennlp.tools.util.TrainingParameters; * A maximum entropy model is used to evaluate end-of-sentence characters in a * string to determine if they signify the end of a sentence. */ -public class SentenceDetectorME implements SentenceDetector { +public class SentenceDetectorME implements SentenceDetector, Probabilistic { /** * Constant indicates a sentence split. @@ -294,15 +295,31 @@ public class SentenceDetectorME implements SentenceDetector { } /** - * Returns the probabilities associated with the most recent - * calls to {@link SentenceDetectorME#sentDetect(CharSequence)}. + * {@inheritDoc} + * + * The sequence was determined based on the previous call to + * {@link #sentDetect(CharSequence)}. + * + * @return An array with the same number of probabilities as tokens were sent to + * {@link #sentDetect(CharSequence)} when it was last called. + * If not applicable, an empty array is returned. + */ + @Override + public double[] probs() { + return ArrayMath.toDoubleArray(sentProbs); + } + + /** * * @return The probability for each sentence returned for the most recent - * call to {@link SentenceDetectorME#sentDetect(CharSequence)}. + * call to {@link #sentDetect(CharSequence)}. * If not applicable, an empty array is returned. + * + * @deprecated Use {@link #probs()} instead. */ + @Deprecated(forRemoval = true, since = "2.5.5") public double[] getSentenceProbabilities() { - return ArrayMath.toDoubleArray(sentProbs); + return probs(); } /** diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java index dae891c4..21773c68 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java @@ -21,6 +21,7 @@ import java.io.IOException; import opennlp.tools.commons.ThreadSafe; import opennlp.tools.dictionary.Dictionary; +import opennlp.tools.ml.Probabilistic; import opennlp.tools.models.ModelType; import opennlp.tools.util.DownloadUtil; import opennlp.tools.util.Span; @@ -28,21 +29,23 @@ import opennlp.tools.util.Span; /** * A thread-safe version of {@link SentenceDetectorME}. Using it is completely transparent. * You can use it in a single-threaded context as well, it only incurs a minimal overhead. - * - * @implNote + * <p> + * <b>Note:</b><br/> * This implementation uses a {@link ThreadLocal}. Although the implementation is * lightweight because the model is not duplicated, if you have many long-running threads, * you may run into memory problems. * <p> * Be careful when using this in a Jakarta EE application, for example. * </p> - * The user is responsible for clearing the {@link ThreadLocal}. + * The user is responsible for clearing the {@link ThreadLocal} + * via calling {@link #close()}. * + * @see Probabilistic * @see SentenceDetector * @see SentenceDetectorME */ @ThreadSafe -public class ThreadSafeSentenceDetectorME implements SentenceDetector, AutoCloseable { +public class ThreadSafeSentenceDetectorME implements SentenceDetector, Probabilistic, AutoCloseable { private final SentenceModel model; private final Dictionary abbDict; @@ -90,10 +93,6 @@ public class ThreadSafeSentenceDetectorME implements SentenceDetector, AutoClose return sd; } - public double[] getSentenceProbabilities() { - return getSD().getSentenceProbabilities(); - } - @Override public String[] sentDetect(CharSequence s) { return getSD().sentDetect(s); @@ -104,6 +103,19 @@ public class ThreadSafeSentenceDetectorME implements SentenceDetector, AutoClose return getSD().sentPosDetect(s); } + /** + * @deprecated Use {@link #probs()} instead. + */ + @Deprecated + public double[] getSentenceProbabilities() { + return probs(); + } + + @Override + public double[] probs() { + return getSD().probs(); + } + @Override public void close() { threadLocal.remove(); diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java index 6915f5fb..afaeef72 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java @@ -21,6 +21,7 @@ import java.io.IOException; import opennlp.tools.commons.ThreadSafe; import opennlp.tools.dictionary.Dictionary; +import opennlp.tools.ml.Probabilistic; import opennlp.tools.models.ModelType; import opennlp.tools.util.DownloadUtil; import opennlp.tools.util.Span; @@ -28,21 +29,23 @@ import opennlp.tools.util.Span; /** * A thread-safe version of {@link TokenizerME}. Using it is completely transparent. * You can use it in a single-threaded context as well, it only incurs a minimal overhead. - * - * @implNote + * <p> + * <b>Note:</b><br/> * This implementation uses a {@link ThreadLocal}. Although the implementation is * lightweight because the model is not duplicated, if you have many long-running threads, * you may run into memory problems. * <p> * Be careful when using this in a Jakarta EE application, for example. * </p> - * The user is responsible for clearing the {@link ThreadLocal}. + * The user is responsible for clearing the {@link ThreadLocal} + * via calling {@link #close()}. * + * @see Probabilistic * @see Tokenizer * @see TokenizerME */ @ThreadSafe -public class ThreadSafeTokenizerME implements Tokenizer, AutoCloseable { +public class ThreadSafeTokenizerME implements Tokenizer, Probabilistic, AutoCloseable { private final TokenizerModel model; private final Dictionary abbDict; @@ -99,8 +102,17 @@ public class ThreadSafeTokenizerME implements Tokenizer, AutoCloseable { return getTokenizer().tokenizePos(s); } + @Override + public double[] probs() { + return getTokenizer().probs(); + } + + /** + * @deprecated Use {@link #probs()} instead. + */ + @Deprecated(forRemoval = true, since = "2.5.5") public double[] getProbabilities() { - return getTokenizer().getTokenProbabilities(); + return probs(); } @Override diff --git a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/tokenize/TokenizerME.java b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/tokenize/TokenizerME.java index 9655dbb1..4c87e645 100644 --- a/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/tokenize/TokenizerME.java +++ b/opennlp-core/opennlp-runtime/src/main/java/opennlp/tools/tokenize/TokenizerME.java @@ -27,6 +27,7 @@ import java.util.regex.Pattern; import opennlp.tools.dictionary.Dictionary; import opennlp.tools.ml.ArrayMath; import opennlp.tools.ml.EventTrainer; +import opennlp.tools.ml.Probabilistic; import opennlp.tools.ml.TrainerFactory; import opennlp.tools.ml.model.Event; import opennlp.tools.ml.model.MaxentModel; @@ -69,11 +70,13 @@ import opennlp.tools.util.TrainingParameters; * <br> * String tokens[] = tokenizer.tokenize("A sentence to be tokenized."); * </code> + * * @see Tokenizer * @see TokenizerModel * @see TokenSample + * @see Probabilistic */ -public class TokenizerME extends AbstractTokenizer { +public class TokenizerME extends AbstractTokenizer implements Probabilistic { /** * Constant indicates a token split. @@ -152,13 +155,30 @@ public class TokenizerME extends AbstractTokenizer { tokProbs = new ArrayList<>(50); } + /** + * {@inheritDoc} + * + * The sequence was determined based on the previous call to {@link #tokenizePos(String)}. + * + * @return An array with the same number of probabilities as tokens were sent to + * the computational method when {@link #tokenizePos(String)} was last called. + * If not applicable an empty array is returned. + */ + @Override + public double[] probs() { + return ArrayMath.toDoubleArray(tokProbs); + } + /** * @return the probabilities associated with the most recent calls to - * {@link TokenizerME#tokenize(String)} or {@link TokenizerME#tokenizePos(String)}. + * {@link #tokenizePos(String)}. * If not applicable an empty array is returned. + * + * @deprecated Use {@link #probs()} instead. */ + @Deprecated(forRemoval = true, since = "2.5.5") public double[] getTokenProbabilities() { - return ArrayMath.toDoubleArray(tokProbs); + return probs(); } /** diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEDutchTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEDutchTest.java index eeff57cd..35703e82 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEDutchTest.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEDutchTest.java @@ -68,7 +68,7 @@ public class SentenceDetectorMEDutchTest extends AbstractSentenceDetectorTest { Assertions.assertEquals(2, sents.length); Assertions.assertEquals(sent1, sents[0]); Assertions.assertEquals(sent2, sents[1]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(2, probs.length); } @@ -87,7 +87,7 @@ public class SentenceDetectorMEDutchTest extends AbstractSentenceDetectorTest { String[] sents = sentDetect.sentDetect(sent1); Assertions.assertEquals(1, sents.length); Assertions.assertEquals(sent1, sents[0]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(1, probs.length); } @@ -106,7 +106,7 @@ public class SentenceDetectorMEDutchTest extends AbstractSentenceDetectorTest { String[] sents = sentDetect.sentDetect(sent1); Assertions.assertEquals(1, sents.length); Assertions.assertEquals(sent1, sents[0]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(1, probs.length); } } diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEFrenchTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEFrenchTest.java index f439ddbe..db427775 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEFrenchTest.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEFrenchTest.java @@ -70,7 +70,7 @@ public class SentenceDetectorMEFrenchTest extends AbstractSentenceDetectorTest { String[] sents = sentDetect.sentDetect(input); Assertions.assertEquals(1, sents.length); Assertions.assertEquals(input, sents[0]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(1, probs.length); } @@ -94,7 +94,7 @@ public class SentenceDetectorMEFrenchTest extends AbstractSentenceDetectorTest { Assertions.assertEquals(2, sents.length); Assertions.assertEquals(sent1, sents[0]); Assertions.assertEquals(sent2, sents[1]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(2, probs.length); } diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEGermanTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEGermanTest.java index 7593100a..4f814da9 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEGermanTest.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEGermanTest.java @@ -80,7 +80,7 @@ public class SentenceDetectorMEGermanTest extends AbstractSentenceDetectorTest { SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel); String sampleSentences = sent1 + " " + sent2; String[] sents = sentDetect.sentDetect(sampleSentences); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); assertAll(() -> assertEquals(2, sents.length), () -> assertEquals(sent1, sents[0]), @@ -99,7 +99,7 @@ public class SentenceDetectorMEGermanTest extends AbstractSentenceDetectorTest { SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel); String[] sents = sentDetect.sentDetect(sent1); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); assertAll(() -> assertEquals(1, sents.length), () -> assertEquals(sent1, sents[0]), @@ -117,7 +117,7 @@ public class SentenceDetectorMEGermanTest extends AbstractSentenceDetectorTest { SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel); String[] sents = sentDetect.sentDetect(sent1); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); assertAll(() -> assertEquals(1, sents.length), () -> assertEquals(sent1, sents[0]), @@ -134,7 +134,7 @@ public class SentenceDetectorMEGermanTest extends AbstractSentenceDetectorTest { SentenceDetectorME sentDetect = new SentenceDetectorME(sentdetectModel); //There is no blank space before start of the second sentence. String[] sents = sentDetect.sentDetect(sent1 + sent2); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); assertAll(() -> assertEquals(2, sents.length), () -> assertEquals(sent1, sents[0]), diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEIT.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEIT.java index e722fdc3..bb6b29b8 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEIT.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEIT.java @@ -37,13 +37,13 @@ public class SentenceDetectorMEIT { Assertions.assertEquals(2, sents.length); Assertions.assertEquals("This is a test.", sents[0]); Assertions.assertEquals("There are many tests, this is the second.", sents[1]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(2, probs.length); String sampleSentences2 = "This is a test. There are many tests, this is the second"; sents = sentDetect.sentDetect(sampleSentences2); Assertions.assertEquals(2, sents.length); - probs = sentDetect.getSentenceProbabilities(); + probs = sentDetect.probs(); Assertions.assertEquals(2, probs.length); Assertions.assertEquals("This is a test.", sents[0]); Assertions.assertEquals("There are many tests, this is the second", sents[1]); @@ -51,7 +51,7 @@ public class SentenceDetectorMEIT { String sampleSentences3 = "This is a \"test\". He said \"There are many tests, this is the second.\""; sents = sentDetect.sentDetect(sampleSentences3); Assertions.assertEquals(2, sents.length); - probs = sentDetect.getSentenceProbabilities(); + probs = sentDetect.probs(); Assertions.assertEquals(2, probs.length); Assertions.assertEquals("This is a \"test\".", sents[0]); Assertions.assertEquals("He said \"There are many tests, this is the second.\"", sents[1]); @@ -59,7 +59,7 @@ public class SentenceDetectorMEIT { String sampleSentences4 = "This is a \"test\". I said \"This is a test.\" Any questions?"; sents = sentDetect.sentDetect(sampleSentences4); Assertions.assertEquals(3, sents.length); - probs = sentDetect.getSentenceProbabilities(); + probs = sentDetect.probs(); Assertions.assertEquals(3, probs.length); Assertions.assertEquals("This is a \"test\".", sents[0]); Assertions.assertEquals("I said \"This is a test.\"", sents[1]); @@ -67,7 +67,7 @@ public class SentenceDetectorMEIT { String sampleSentences5 = "This is a one sentence test space at the end. "; sents = sentDetect.sentDetect(sampleSentences5); - Assertions.assertEquals(1, sentDetect.getSentenceProbabilities().length); + Assertions.assertEquals(1, sentDetect.probs().length); Assertions.assertEquals("This is a one sentence test space at the end.", sents[0]); String sampleSentences6 = "This is a one sentences test with tab at the end. "; @@ -90,7 +90,7 @@ public class SentenceDetectorMEIT { String sampleSentences11 = "This is test sentence without a dot at the end and spaces "; sents = sentDetect.sentDetect(sampleSentences11); Assertions.assertEquals("This is test sentence without a dot at the end and spaces", sents[0]); - probs = sentDetect.getSentenceProbabilities(); + probs = sentDetect.probs(); Assertions.assertEquals(1, probs.length); String sampleSentence12 = " This is a test."; @@ -104,7 +104,7 @@ public class SentenceDetectorMEIT { // Test that sentPosDetect also works Span[] pos = sentDetect.sentPosDetect(sampleSentences2); Assertions.assertEquals(2, pos.length); - probs = sentDetect.getSentenceProbabilities(); + probs = sentDetect.probs(); Assertions.assertEquals(2, probs.length); Assertions.assertEquals(new Span(0, 15), pos[0]); Assertions.assertEquals(new Span(16, 56), pos[1]); diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEItalianTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEItalianTest.java index 24330727..65c3d679 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEItalianTest.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEItalianTest.java @@ -75,7 +75,7 @@ public class SentenceDetectorMEItalianTest extends AbstractSentenceDetectorTest String[] sents = sentDetect.sentDetect(input); Assertions.assertEquals(1, sents.length); Assertions.assertEquals(input, sents[0]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(1, probs.length); } @@ -93,7 +93,7 @@ public class SentenceDetectorMEItalianTest extends AbstractSentenceDetectorTest Assertions.assertEquals(2, sents.length); Assertions.assertEquals(sent1, sents[0]); Assertions.assertEquals(sent2, sents[1]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(2, probs.length); } diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEPolishTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEPolishTest.java index e4740aa7..c1a08a9d 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEPolishTest.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEPolishTest.java @@ -74,7 +74,7 @@ public class SentenceDetectorMEPolishTest extends AbstractSentenceDetectorTest { String[] sents = sentDetect.sentDetect(input); Assertions.assertEquals(1, sents.length); Assertions.assertEquals(input, sents[0]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(1, probs.length); } @@ -92,7 +92,7 @@ public class SentenceDetectorMEPolishTest extends AbstractSentenceDetectorTest { Assertions.assertEquals(2, sents.length); Assertions.assertEquals(sent1, sents[0]); Assertions.assertEquals(sent2, sents[1]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(2, probs.length); } diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEPortugueseTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEPortugueseTest.java index a2faba93..40b0fca5 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEPortugueseTest.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMEPortugueseTest.java @@ -66,7 +66,7 @@ public class SentenceDetectorMEPortugueseTest extends AbstractSentenceDetectorTe String[] sents = sentDetect.sentDetect(input); Assertions.assertEquals(1, sents.length); Assertions.assertEquals(input, sents[0]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(1, probs.length); } @@ -84,7 +84,7 @@ public class SentenceDetectorMEPortugueseTest extends AbstractSentenceDetectorTe Assertions.assertEquals(2, sents.length); Assertions.assertEquals(sent1, sents[0]); Assertions.assertEquals(sent2, sents[1]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(2, probs.length); } diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMESpanishTest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMESpanishTest.java index ef956fea..12ed4285 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMESpanishTest.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMESpanishTest.java @@ -66,7 +66,7 @@ public class SentenceDetectorMESpanishTest extends AbstractSentenceDetectorTest String[] sents = sentDetect.sentDetect(input); Assertions.assertEquals(1, sents.length); Assertions.assertEquals(input, sents[0]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(1, probs.length); } @@ -89,7 +89,7 @@ public class SentenceDetectorMESpanishTest extends AbstractSentenceDetectorTest Assertions.assertEquals(2, sents.length); Assertions.assertEquals(sent1, sents[0]); Assertions.assertEquals(sent2, sents[1]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(2, probs.length); } diff --git a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java index 56f2077f..1a28aa29 100644 --- a/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java +++ b/opennlp-core/opennlp-runtime/src/test/java/opennlp/tools/sentdetect/SentenceDetectorMETest.java @@ -61,13 +61,13 @@ public class SentenceDetectorMETest extends AbstractSentenceDetectorTest { Assertions.assertEquals(2, sents.length); Assertions.assertEquals("This is a test.", sents[0]); Assertions.assertEquals("There are many tests, this is the second.", sents[1]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(2, probs.length); String sampleSentences2 = "This is a test. There are many tests, this is the second"; sents = sentDetect.sentDetect(sampleSentences2); Assertions.assertEquals(2, sents.length); - probs = sentDetect.getSentenceProbabilities(); + probs = sentDetect.probs(); Assertions.assertEquals(2, probs.length); Assertions.assertEquals("This is a test.", sents[0]); Assertions.assertEquals("There are many tests, this is the second", sents[1]); @@ -75,7 +75,7 @@ public class SentenceDetectorMETest extends AbstractSentenceDetectorTest { String sampleSentences3 = "This is a \"test\". He said \"There are many tests, this is the second.\""; sents = sentDetect.sentDetect(sampleSentences3); Assertions.assertEquals(2, sents.length); - probs = sentDetect.getSentenceProbabilities(); + probs = sentDetect.probs(); Assertions.assertEquals(2, probs.length); Assertions.assertEquals("This is a \"test\".", sents[0]); Assertions.assertEquals("He said \"There are many tests, this is the second.\"", sents[1]); @@ -83,7 +83,7 @@ public class SentenceDetectorMETest extends AbstractSentenceDetectorTest { String sampleSentences4 = "This is a \"test\". I said \"This is a test.\" Any questions?"; sents = sentDetect.sentDetect(sampleSentences4); Assertions.assertEquals(3, sents.length); - probs = sentDetect.getSentenceProbabilities(); + probs = sentDetect.probs(); Assertions.assertEquals(3, probs.length); Assertions.assertEquals("This is a \"test\".", sents[0]); Assertions.assertEquals("I said \"This is a test.\"", sents[1]); @@ -91,7 +91,7 @@ public class SentenceDetectorMETest extends AbstractSentenceDetectorTest { String sampleSentences5 = "This is a one sentence test space at the end. "; sents = sentDetect.sentDetect(sampleSentences5); - Assertions.assertEquals(1, sentDetect.getSentenceProbabilities().length); + Assertions.assertEquals(1, sentDetect.probs().length); Assertions.assertEquals("This is a one sentence test space at the end.", sents[0]); String sampleSentences6 = "This is a one sentences test with tab at the end. "; @@ -114,7 +114,7 @@ public class SentenceDetectorMETest extends AbstractSentenceDetectorTest { String sampleSentences11 = "This is test sentence without a dot at the end and spaces "; sents = sentDetect.sentDetect(sampleSentences11); Assertions.assertEquals("This is test sentence without a dot at the end and spaces", sents[0]); - probs = sentDetect.getSentenceProbabilities(); + probs = sentDetect.probs(); Assertions.assertEquals(1, probs.length); String sampleSentence12 = " This is a test."; @@ -128,7 +128,7 @@ public class SentenceDetectorMETest extends AbstractSentenceDetectorTest { // Test that sentPosDetect also works Span[] pos = sentDetect.sentPosDetect(sampleSentences2); Assertions.assertEquals(2, pos.length); - probs = sentDetect.getSentenceProbabilities(); + probs = sentDetect.probs(); Assertions.assertEquals(2, probs.length); Assertions.assertEquals(new Span(0, 15), pos[0]); Assertions.assertEquals(new Span(16, 56), pos[1]); @@ -149,7 +149,7 @@ public class SentenceDetectorMETest extends AbstractSentenceDetectorTest { Assertions.assertEquals(2, sents.length); Assertions.assertEquals("This is a test for Mr. Miller.", sents[0]); Assertions.assertEquals("His wife, Ms. Susan Miller, is also part of this test.", sents[1]); - double[] probs = sentDetect.getSentenceProbabilities(); + double[] probs = sentDetect.probs(); Assertions.assertEquals(2, probs.length); } diff --git a/opennlp-extensions/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java b/opennlp-extensions/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java index 59bc3e6a..c00faf08 100644 --- a/opennlp-extensions/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java +++ b/opennlp-extensions/opennlp-uima/src/main/java/opennlp/uima/sentdetect/SentenceDetector.java @@ -118,7 +118,7 @@ public final class SentenceDetector extends AbstractSentenceDetector { protected void postProcessAnnotations(AnnotationFS[] sentences) { if (probabilityFeature != null) { - double[] sentenceProbabilities = sentenceDetector.getSentenceProbabilities(); + double[] sentenceProbabilities = sentenceDetector.probs(); for (int i = 0; i < sentences.length; i++) { sentences[i].setDoubleValue(probabilityFeature, sentenceProbabilities[i]); diff --git a/opennlp-extensions/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java b/opennlp-extensions/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java index 38d2b343..2000196f 100644 --- a/opennlp-extensions/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java +++ b/opennlp-extensions/opennlp-uima/src/main/java/opennlp/uima/tokenize/Tokenizer.java @@ -125,7 +125,7 @@ public final class Tokenizer extends AbstractTokenizer { AnnotationFS[] tokenAnnotations) { // if interest if (probabilityFeature != null) { - double[] tokenProbabilities = tokenizer.getTokenProbabilities(); + double[] tokenProbabilities = tokenizer.probs(); for (int i = 0; i < tokenAnnotations.length; i++) { tokenAnnotations[i].setDoubleValue(probabilityFeature,