This is an automated email from the ASF dual-hosted git repository. mawiesne pushed a commit to branch OPENNLP-1653-Add-thread-safe-version-of-LemmatizerME in repository https://gitbox.apache.org/repos/asf/opennlp.git
commit 9e8ed5bfbd26fa8be1d15f908314548cdc0e4ad1 Author: Martin Wiesner <[email protected]> AuthorDate: Sat Nov 23 23:11:16 2024 +0100 OPENNLP-1653 Add thread-safe version of LemmatizerME --- .../tools/lemmatizer/ThreadSafeLemmatizerME.java | 80 ++++++++++++++++++++++ 1 file changed, 80 insertions(+) diff --git a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java new file mode 100644 index 00000000..e63c27d3 --- /dev/null +++ b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java @@ -0,0 +1,80 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package opennlp.tools.lemmatizer; + +import java.util.List; + +import opennlp.tools.commons.ThreadSafe; + +/** + * A thread-safe version of the {@link LemmatizerME}. Using it is completely transparent. + * You can use it in a single-threaded context as well, it only incurs a minimal overhead. + * <p> + * Note, however, that this implementation uses a {@link ThreadLocal}. Although the implementation is + * lightweight because the model is not duplicated, if you have many long-running threads, + * you may run into memory problems. + * </p> + * <p> + * Be careful when using this in a Jakarta EE application, for example. + * </p> + * The user is responsible for clearing the {@link ThreadLocal}. + * + * @see Lemmatizer + */ +@ThreadSafe +public class ThreadSafeLemmatizerME implements Lemmatizer, AutoCloseable { + + private final LemmatizerModel model; + + private final ThreadLocal<LemmatizerME> threadLocal = new ThreadLocal<>(); + + /** + * Initializes a {@link ThreadSafeLemmatizerME} with the specified {@code model}. + * + * @param model A valid {@link LemmatizerModel}. + */ + public ThreadSafeLemmatizerME(LemmatizerModel model) { + super(); + this.model = model; + } + + private LemmatizerME getLemmatizer() { + LemmatizerME tagger = threadLocal.get(); + if (tagger == null) { + tagger = new LemmatizerME(model); + threadLocal.set(tagger); + } + return tagger; + } + + @Override + public String[] lemmatize(String[] toks, String[] tags) { + return getLemmatizer().lemmatize(toks, tags); + } + + @Override + public List<List<String>> lemmatize(List<String> toks, List<String> tags) { + return getLemmatizer().lemmatize(toks, tags); + } + + @Override + public void close() { + threadLocal.remove(); + } + +}
