This is an automated email from the ASF dual-hosted git repository.

rzo1 pushed a commit to branch 
OPENNLP-1598-Update-documentation-on-how-to-use-ClassPathModelFinder
in repository https://gitbox.apache.org/repos/asf/opennlp.git

commit 7ef465c807ad9deb0eaac560b1ce82da504b2caf
Author: Richard Zowalla <[email protected]>
AuthorDate: Mon Oct 28 20:53:59 2024 +0100

    OPENNLP-1598 - Update documentation on how to use ClassPathModelFinder
---
 opennlp-docs/src/docbkx/model-loading.xml | 147 ++++++++++++++++++++++++++++++
 opennlp-docs/src/docbkx/opennlp.xml       |   3 +-
 2 files changed, 149 insertions(+), 1 deletion(-)

diff --git a/opennlp-docs/src/docbkx/model-loading.xml 
b/opennlp-docs/src/docbkx/model-loading.xml
new file mode 100644
index 00000000..266dbe2d
--- /dev/null
+++ b/opennlp-docs/src/docbkx/model-loading.xml
@@ -0,0 +1,147 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE chapter PUBLIC "-//OASIS//DTD DocBook XML V4.4//EN"
+        "http://www.oasis-open.org/docbook/xml/4.4/docbookx.dtd";[
+        ]>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+   http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<chapter id="tools.model">
+    <title>Classpath Loading of OpenNLP Models</title>
+    <para>
+        Since version 2.4.0, OpenNLP supports the ability to load pre-trained 
OpenNLP models from the classpath.
+        It relies on either a simple implementation using the application's 
classpath or on the
+        <ulink 
url="https://github.com/classgraph/classgraph";>classgraph</ulink>
+        library to locate OpenNLP model JAR files.
+        Our pre-trained models are bundled from the <ulink 
url="https://github.com/apache/opennlp-models";>OpenNLP Models
+        repository</ulink>.
+
+        This section describes
+
+        <itemizedlist>
+            <listitem>
+                <para>how to load and use a pre-trained OpenNLP model from the 
classpath.</para>
+            </listitem>
+            <listitem>
+                <para>how to bundle a custom OpenNLP model to be loadable as a 
JAR file from the classpath.</para>
+            </listitem>
+        </itemizedlist>
+
+    </para>
+
+       <section id="tools.model.load">
+               <title>Loading a pre-trained OpenNLP model from the 
classpath</title>
+        <para>
+            First, you need to add the following dependency to your classpath:
+
+            <programlisting language="xml">
+                <![CDATA[
+<dependency>
+  <groupId>org.apache.opennlp</groupId>
+  <artifactId>opennlp-tools-models</artifactId>
+  <version>CURRENT_OPENNLP_VERSION</version>
+</dependency>
+]]>
+            </programlisting>
+
+            by using our pre-trained models or by building custom models as 
described later in this chapter.
+            If you need advanced classpath scanning capabilities, you should 
also add the classgraph library to your classpath.
+
+            <programlisting language="xml">
+                <![CDATA[
+<dependency>
+    <groupId>io.github.classgraph</groupId>
+    <artifactId>classgraph</artifactId>
+    <version>CURRENT_CLASSGRAPH_VERSION</version>
+</dependency>
+]]>
+            </programlisting>
+
+            Make sure you replace the placeholders with the appropriate 
version values.
+
+            Next, you can search for such a model and load it from the 
classpath:
+
+            <programlisting language="java">
+                <![CDATA[
+final ClassgraphModelFinder finder = new ClassgraphModelFinder(); // or use 
new SimpleClassPathModelFinder()
+final ClassPathModelLoader loader = new ClassPathModelLoader();
+final Set<ClassPathModelEntry> models = finder.findModels(false);
+for(ClassPathModelEntry entry : models) {
+
+   final ClassPathModel model = loader.load(model);
+
+   if(model != null) {
+      System.out.println(model.name());
+      System.out.println(model.sha256());
+      System.out.println(model.version());
+      System.out.println(model.language());
+      // do something with the model by consuming the byte array
+   }
+}]]>
+            </programlisting>
+
+        </para>
+       </section>
+
+
+       <section id="tools.model.bundle">
+               <title>Bundling a custom trained OpenNLP model for the 
classpath</title>
+               <para>
+            If you intend to provide your own custom trained OpenNLP models as 
JAR files for classpath discovery,
+            we recommend that you have a look at our setup in the <ulink 
url="https://github.com/apache/opennlp-models";>OpenNLP Models
+            repository</ulink>. We recommend to put one model per JAR file.
+
+            Make sure you add a model.properties file with the following 
content
+
+       <programlisting language="java">
+                <![CDATA[
+model.name=${model.name}
+model.version=${model.version}
+model.sha256=${model.sha256}
+model.language=${model.language}
+]]>
+            </programlisting>
+
+            Make sure to replace the values accordingly and configure your 
build tool to include the binary model and the model.properties
+            in the resulting JAR file.
+
+            To load such a custom model, you may need to adjust the pattern 
for classpath scanning, i.e. if you name your model "custom-opennlp-model",
+            you would need the following code to successfully find and load it:
+
+            <programlisting language="java">
+                <![CDATA[
+final ClassgraphModelFinder finder = new 
ClassgraphModelFinder("custom-opennlp-model.jar"); // or use new 
SimpleClassPathModelFinder("custom-opennlp-model.jar")
+final ClassPathModelLoader loader = new ClassPathModelLoader();
+final Set<ClassPathModelEntry> models = finder.findModels(false);
+for(ClassPathModelEntry entry : models) {
+
+   final ClassPathModel model = loader.load(model);
+
+   if(model != null) {
+      System.out.println(model.name());
+      System.out.println(model.sha256());
+      System.out.println(model.version());
+      System.out.println(model.language());
+      // do something with the model by consuming the byte array
+   }
+}]]>
+            </programlisting>
+
+        </para>
+    </section>
+</chapter>
\ No newline at end of file
diff --git a/opennlp-docs/src/docbkx/opennlp.xml 
b/opennlp-docs/src/docbkx/opennlp.xml
index 0d2115a2..1f62e894 100644
--- a/opennlp-docs/src/docbkx/opennlp.xml
+++ b/opennlp-docs/src/docbkx/opennlp.xml
@@ -94,7 +94,8 @@ under the License.
        <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"; 
href="./chunker.xml" />
        <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"; 
href="./parser.xml" />
        <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"; 
href="./coref.xml" />
-       <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"; 
href="./extension.xml" />        
+       <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"; 
href="./model-loading.xml" />
+       <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"; 
href="./extension.xml" />
        <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"; 
href="./corpora.xml" />
        <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"; 
href="./machine-learning.xml" />
        <xi:include xmlns:xi="http://www.w3.org/2001/XInclude"; 
href="./uima-integration.xml" />

Reply via email to