This is an automated email from the ASF dual-hosted git repository. rzo1 pushed a commit to branch OPENNLP-1620 in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit 8129d0a74d26549300f7f044262b021024f8d338 Author: Richard Zowalla <[email protected]> AuthorDate: Tue Oct 15 13:25:30 2024 +0200 OPENNLP-1620 - It should be possible to remove the allocated ThreadLocal --- .../tools/postag/ThreadSafePOSTaggerME.java | 16 ++++++++++++++- .../sentdetect/ThreadSafeSentenceDetectorME.java | 24 +++++++++++++++------- .../tools/tokenize/ThreadSafeTokenizerME.java | 22 ++++++++++++++++---- 3 files changed, 50 insertions(+), 12 deletions(-) diff --git a/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java b/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java index 52419ddf..b567f1ea 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/postag/ThreadSafePOSTaggerME.java @@ -23,9 +23,18 @@ import opennlp.tools.util.Sequence; /** * A thread-safe version of the POSTaggerME. Using it is completely transparent. You can use it in * a single-threaded context as well, it only incurs a minimal overhead. + * <p> + * Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is + * lightweight because the model is not duplicated, if you have many long-running threads, + * you may run into memory problems. + * </p> + * <p> + * Be careful when using this in a Jakarta EE application, for example. + * </p> + * The user is responsible for clearing the {@link ThreadLocal}. */ @ThreadSafe -public class ThreadSafePOSTaggerME implements POSTagger { +public class ThreadSafePOSTaggerME implements POSTagger, AutoCloseable { private final POSModel model; @@ -64,4 +73,9 @@ public class ThreadSafePOSTaggerME implements POSTagger { public Sequence[] topKSequences(String[] sentence, Object[] additionaContext) { return getTagger().topKSequences(sentence, additionaContext); } + + @Override + public void close() { + threadLocal.remove(); + } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java index 99abc6fb..17ea14e8 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/sentdetect/ThreadSafeSentenceDetectorME.java @@ -24,16 +24,21 @@ import opennlp.tools.util.Span; * A thread-safe version of SentenceDetectorME. Using it is completely transparent. You can use it in * a single-threaded context as well, it only incurs a minimal overhead. * <p> - * Note, however, that this implementation uses a ThreadLocal. Although the implementation is - * lightweight as the model is not duplicated, if you have many long-running threads, you may run - * into memory issues. Be careful when you use this in a JEE application, for example. + * Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is + * lightweight because the model is not duplicated, if you have many long-running threads, + * you may run into memory problems. + * </p> + * <p> + * Be careful when using this in a Jakarta EE application, for example. + * </p> + * The user is responsible for clearing the {@link ThreadLocal}. */ @ThreadSafe -public class ThreadSafeSentenceDetectorME implements SentenceDetector { +public class ThreadSafeSentenceDetectorME implements SentenceDetector, AutoCloseable { private final SentenceModel model; - private final ThreadLocal<SentenceDetectorME> sentenceDetectorThreadLocal = + private final ThreadLocal<SentenceDetectorME> threadLocal = new ThreadLocal<>(); public ThreadSafeSentenceDetectorME(SentenceModel model) { @@ -43,10 +48,10 @@ public class ThreadSafeSentenceDetectorME implements SentenceDetector { // If a thread-local version exists, return it. Otherwise, create, then return. private SentenceDetectorME getSD() { - SentenceDetectorME sd = sentenceDetectorThreadLocal.get(); + SentenceDetectorME sd = threadLocal.get(); if (sd == null) { sd = new SentenceDetectorME(model); - sentenceDetectorThreadLocal.set(sd); + threadLocal.set(sd); } return sd; } @@ -64,4 +69,9 @@ public class ThreadSafeSentenceDetectorME implements SentenceDetector { public Span[] sentPosDetect(CharSequence s) { return getSD().sentPosDetect(s); } + + @Override + public void close() { + threadLocal.remove(); + } } diff --git a/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java b/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java index b92dd5e0..3ebbd1e3 100644 --- a/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java +++ b/opennlp-tools/src/main/java/opennlp/tools/tokenize/ThreadSafeTokenizerME.java @@ -23,13 +23,22 @@ import opennlp.tools.util.Span; /** * A thread-safe version of TokenizerME. Using it is completely transparent. You can use it in * a single-threaded context as well, it only incurs a minimal overhead. + * <p> + * Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is + * lightweight because the model is not duplicated, if you have many long-running threads, + * you may run into memory problems. + * </p> + * <p> + * Be careful when using this in a Jakarta EE application, for example. + * </p> + * The user is responsible for clearing the {@link ThreadLocal}. */ @ThreadSafe -public class ThreadSafeTokenizerME implements Tokenizer { +public class ThreadSafeTokenizerME implements Tokenizer, AutoCloseable { private final TokenizerModel model; - private final ThreadLocal<TokenizerME> tokenizerThreadLocal = new ThreadLocal<>(); + private final ThreadLocal<TokenizerME> threadLocal = new ThreadLocal<>(); public ThreadSafeTokenizerME(TokenizerModel model) { super(); @@ -37,10 +46,10 @@ public class ThreadSafeTokenizerME implements Tokenizer { } private TokenizerME getTokenizer() { - TokenizerME tokenizer = tokenizerThreadLocal.get(); + TokenizerME tokenizer = threadLocal.get(); if (tokenizer == null) { tokenizer = new TokenizerME(model); - tokenizerThreadLocal.set(tokenizer); + threadLocal.set(tokenizer); } return tokenizer; } @@ -58,4 +67,9 @@ public class ThreadSafeTokenizerME implements Tokenizer { public double[] getProbabilities() { return getTokenizer().getTokenProbabilities(); } + + @Override + public void close() { + threadLocal.remove(); + } }
