This is an automated email from the ASF dual-hosted git repository.

mawiesne pushed a commit to branch 
OPENNLP-1653-Add-thread-safe-version-of-LemmatizerME
in repository https://gitbox.apache.org/repos/asf/opennlp.git

commit 9e8ed5bfbd26fa8be1d15f908314548cdc0e4ad1
Author: Martin Wiesner <[email protected]>
AuthorDate: Sat Nov 23 23:11:16 2024 +0100

    OPENNLP-1653 Add thread-safe version of LemmatizerME
---
 .../tools/lemmatizer/ThreadSafeLemmatizerME.java   | 80 ++++++++++++++++++++++
 1 file changed, 80 insertions(+)

diff --git 
a/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java
 
b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java
new file mode 100644
index 00000000..e63c27d3
--- /dev/null
+++ 
b/opennlp-tools/src/main/java/opennlp/tools/lemmatizer/ThreadSafeLemmatizerME.java
@@ -0,0 +1,80 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package opennlp.tools.lemmatizer;
+
+import java.util.List;
+
+import opennlp.tools.commons.ThreadSafe;
+
+/**
+ * A thread-safe version of the {@link LemmatizerME}. Using it is completely 
transparent.
+ * You can use it in a single-threaded context as well, it only incurs a 
minimal overhead.
+ * <p>
+ * Note, however, that this implementation uses a {@link ThreadLocal}. 
Although the implementation is
+ * lightweight because the model is not duplicated, if you have many 
long-running threads,
+ * you may run into memory problems.
+ * </p>
+ * <p>
+ * Be careful when using this in a Jakarta EE application, for example.
+ * </p>
+ * The user is responsible for clearing the {@link ThreadLocal}.
+ *
+ * @see Lemmatizer
+ */
+@ThreadSafe
+public class ThreadSafeLemmatizerME implements Lemmatizer, AutoCloseable {
+
+  private final LemmatizerModel model;
+
+  private final ThreadLocal<LemmatizerME> threadLocal = new ThreadLocal<>();
+
+  /**
+   * Initializes a {@link ThreadSafeLemmatizerME} with the specified {@code 
model}.
+   *
+   * @param model A valid {@link LemmatizerModel}.
+   */
+  public ThreadSafeLemmatizerME(LemmatizerModel model) {
+    super();
+    this.model = model;
+  }
+
+  private LemmatizerME getLemmatizer() {
+    LemmatizerME tagger = threadLocal.get();
+    if (tagger == null) {
+      tagger = new LemmatizerME(model);
+      threadLocal.set(tagger);
+    }
+    return tagger;
+  }
+
+  @Override
+  public String[] lemmatize(String[] toks, String[] tags) {
+    return getLemmatizer().lemmatize(toks, tags);
+  }
+
+  @Override
+  public List<List<String>> lemmatize(List<String> toks, List<String> tags) {
+    return getLemmatizer().lemmatize(toks, tags);
+  }
+
+  @Override
+  public void close() {
+    threadLocal.remove();
+  }
+
+}

Reply via email to