This is an automated email from the ASF dual-hosted git repository.

rzo1 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp-models.git


The following commit(s) were added to refs/heads/main by this push:
     new 6af838c  OPENNLP-1557 - Create Model Jars on-the-fly (#1)
6af838c is described below

commit 6af838ce1b41f8ea7f2a62cf380b149c0f6461eb
Author: Richard Zowalla <[email protected]>
AuthorDate: Mon May 27 16:19:19 2024 +0200

    OPENNLP-1557 - Create Model Jars on-the-fly (#1)
---
 .github/workflows/maven.yml                        |  37 +++++++
 .gitignore                                         |   1 +
 README.md                                          |   3 +
 opennlp-models-langdetect/pom.xml                  |  41 +++++++-
 .../src/main/resources/langdetect-183.bin          |   3 -
 .../src/main/resources/model.properties            |  18 ++++
 .../opennlp-models-sentdetect-de/pom.xml           |  75 +++++++++++++++
 .../src/main/resources/model.properties            |  18 ++++
 .../opennlp-models-sentdetect-en/pom.xml           |  75 +++++++++++++++
 .../src/main/resources/model.properties            |  18 ++++
 .../opennlp-models-sentdetect-fr/pom.xml           |  75 +++++++++++++++
 .../src/main/resources/model.properties            |  18 ++++
 .../opennlp-models-sentdetect-it/pom.xml           |  75 +++++++++++++++
 .../src/main/resources/model.properties            |  18 ++++
 .../opennlp-models-sentdetect-nl/pom.xml           |  75 +++++++++++++++
 .../src/main/resources/model.properties            |  18 ++++
 opennlp-models-sentdetect/pom.xml                  |  46 +++++++++
 opennlp-models-test/pom.xml                        |  59 ++++++++++++
 .../java/org.apache.opennlp/ModelValidator.java    | 106 +++++++++++++++++++++
 .../src/main/resources/expected-models.txt         |  21 ++++
 pom.xml                                            |  57 ++++++++++-
 21 files changed, 850 insertions(+), 7 deletions(-)

diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
new file mode 100644
index 0000000..c995950
--- /dev/null
+++ b/.github/workflows/maven.yml
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Java CI
+
+on: [push, pull_request]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/cache@v3
+        with:
+          path: ~/.m2/repository
+          key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
+          restore-keys: |
+            ${{ runner.os }}-maven-
+      - name: Setup Java
+        uses: actions/setup-java@v3
+        with:
+          distribution: adopt
+          java-version: 17
+      - name: Build with Maven
+        run: mvn -V clean install --no-transfer-progress
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 81ef51f..035c795 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ nbactions.xml
 nb-configuration.xml
 *.DS_Store
 .checkstyle
+*.bin
\ No newline at end of file
diff --git a/README.md b/README.md
index 5f57255..6cc4748 100644
--- a/README.md
+++ b/README.md
@@ -72,6 +72,9 @@ compile group: "org.apache.opennlp", name: 
"opennlp-models-langdetect", version:
 
 For more details please check our 
[documentation](http://opennlp.apache.org/docs/)
 
+## Adding a new Model
+
+Ensure to add a new model to the `expected-models.txt` file located in 
`opennlp-models-test`.
 
 ## Contributing
 
diff --git a/opennlp-models-langdetect/pom.xml 
b/opennlp-models-langdetect/pom.xml
index 518b455..54e55f6 100644
--- a/opennlp-models-langdetect/pom.xml
+++ b/opennlp-models-langdetect/pom.xml
@@ -30,8 +30,45 @@
   </parent>
 
   <artifactId>opennlp-models-langdetect</artifactId>
-  <version>0.1-SNAPSHOT</version>
 
-  <name>Apache OpenNLP Models Lang-Detect</name>
+  <name>Apache OpenNLP Models :: Lang-Detect</name>
 
+  <properties>
+    <dist.base>${asf.dist.base}</dist.base>
+    <model.family>langdetect</model.family>
+    <model.name>langdetect-183.bin</model.name>
+    <model.version>1.8.3</model.version>
+    
<model.sha256>2ddf585fac2e02a9dcfb9a4a9cc9417562eaac351be2efb506a2eaa87f19e9d4</model.sha256>
+  </properties>
+
+  <build>
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+        <filtering>true</filtering>
+        <includes>
+          <include>**/model.properties</include>
+        </includes>
+      </resource>
+      <resource>
+        <directory>src/main/resources</directory>
+        <includes>
+          <include>**/*.bin</include>
+        </includes>
+      </resource>
+    </resources>
+    <plugins>
+      <plugin>
+        <groupId>com.googlecode.maven-download-plugin</groupId>
+        <artifactId>download-maven-plugin</artifactId>
+        <configuration>
+          
<url>${dist.base}/${model.family}/${model.version}/${model.name}</url>
+          </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
 </project>
diff --git a/opennlp-models-langdetect/src/main/resources/langdetect-183.bin 
b/opennlp-models-langdetect/src/main/resources/langdetect-183.bin
deleted file mode 100644
index 05dc88e..0000000
--- a/opennlp-models-langdetect/src/main/resources/langdetect-183.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2ddf585fac2e02a9dcfb9a4a9cc9417562eaac351be2efb506a2eaa87f19e9d4
-size 10568188
diff --git a/opennlp-models-langdetect/src/main/resources/model.properties 
b/opennlp-models-langdetect/src/main/resources/model.properties
new file mode 100644
index 0000000..4089d05
--- /dev/null
+++ b/opennlp-models-langdetect/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.sha256=${model.sha256}
\ No newline at end of file
diff --git a/opennlp-models-sentdetect/opennlp-models-sentdetect-de/pom.xml 
b/opennlp-models-sentdetect/opennlp-models-sentdetect-de/pom.xml
new file mode 100644
index 0000000..e1108d8
--- /dev/null
+++ b/opennlp-models-sentdetect/opennlp-models-sentdetect-de/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.opennlp</groupId>
+        <artifactId>opennlp-models-sentdetect</artifactId>
+        <version>0.1-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>opennlp-models-sentdetect-de</artifactId>
+
+    <name>Apache OpenNLP Models :: Sent-Detect :: German</name>
+
+    <properties>
+        <dist.base>${asf.dist.base}</dist.base>
+        <model.family>ud-models-1.0</model.family>
+        <model.name>opennlp-de-ud-gsd-sentence-1.0-1.9.3.bin</model.name>
+        <model.version>1.9.3</model.version>
+        
<model.sha256>2d4da68109269aab96d128004fe0b88c8a0d226c0ead2b40afc0d10c6d5dc182</model.sha256>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>true</filtering>
+                <includes>
+                    <include>**/model.properties</include>
+                </includes>
+            </resource>
+            <resource>
+                <directory>src/main/resources</directory>
+                <includes>
+                    <include>**/*.bin</include>
+                </includes>
+            </resource>
+        </resources>
+        <plugins>
+            <plugin>
+                <groupId>com.googlecode.maven-download-plugin</groupId>
+                <artifactId>download-maven-plugin</artifactId>
+                <configuration>
+                    <url>${dist.base}/${model.family}/${model.name}</url>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git 
a/opennlp-models-sentdetect/opennlp-models-sentdetect-de/src/main/resources/model.properties
 
b/opennlp-models-sentdetect/opennlp-models-sentdetect-de/src/main/resources/model.properties
new file mode 100644
index 0000000..4089d05
--- /dev/null
+++ 
b/opennlp-models-sentdetect/opennlp-models-sentdetect-de/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.sha256=${model.sha256}
\ No newline at end of file
diff --git a/opennlp-models-sentdetect/opennlp-models-sentdetect-en/pom.xml 
b/opennlp-models-sentdetect/opennlp-models-sentdetect-en/pom.xml
new file mode 100644
index 0000000..9cff087
--- /dev/null
+++ b/opennlp-models-sentdetect/opennlp-models-sentdetect-en/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.opennlp</groupId>
+        <artifactId>opennlp-models-sentdetect</artifactId>
+        <version>0.1-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>opennlp-models-sentdetect-en</artifactId>
+
+    <name>Apache OpenNLP Models :: Sent-Detect :: English</name>
+
+    <properties>
+        <dist.base>${asf.dist.base}</dist.base>
+        <model.family>ud-models-1.0</model.family>
+        <model.name>opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin</model.name>
+        <model.version>1.9.3</model.version>
+        
<model.sha256>8dddd7e582cedef4a79a83fd340cefcdafe58419bd20520fec90492ea4bffd11</model.sha256>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>true</filtering>
+                <includes>
+                    <include>**/model.properties</include>
+                </includes>
+            </resource>
+            <resource>
+                <directory>src/main/resources</directory>
+                <includes>
+                    <include>**/*.bin</include>
+                </includes>
+            </resource>
+        </resources>
+        <plugins>
+            <plugin>
+                <groupId>com.googlecode.maven-download-plugin</groupId>
+                <artifactId>download-maven-plugin</artifactId>
+                <configuration>
+                    <url>${dist.base}/${model.family}/${model.name}</url>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git 
a/opennlp-models-sentdetect/opennlp-models-sentdetect-en/src/main/resources/model.properties
 
b/opennlp-models-sentdetect/opennlp-models-sentdetect-en/src/main/resources/model.properties
new file mode 100644
index 0000000..4089d05
--- /dev/null
+++ 
b/opennlp-models-sentdetect/opennlp-models-sentdetect-en/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.sha256=${model.sha256}
\ No newline at end of file
diff --git a/opennlp-models-sentdetect/opennlp-models-sentdetect-fr/pom.xml 
b/opennlp-models-sentdetect/opennlp-models-sentdetect-fr/pom.xml
new file mode 100644
index 0000000..9e15d9d
--- /dev/null
+++ b/opennlp-models-sentdetect/opennlp-models-sentdetect-fr/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.opennlp</groupId>
+        <artifactId>opennlp-models-sentdetect</artifactId>
+        <version>0.1-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>opennlp-models-sentdetect-fr</artifactId>
+
+    <name>Apache OpenNLP Models :: Sent-Detect :: French</name>
+
+    <properties>
+        <dist.base>${asf.dist.base}</dist.base>
+        <model.family>ud-models-1.0</model.family>
+        
<model.name>opennlp-1.0-1.9.3fr-ud-ftb-sentence-1.0-1.9.3.bin</model.name>
+        <model.version>1.9.3</model.version>
+        
<model.sha256>94e57992d4b671909ccb3c5d8e6cb81c84ac96c1c6a19b8b4e7b0197a41cc870</model.sha256>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>true</filtering>
+                <includes>
+                    <include>**/model.properties</include>
+                </includes>
+            </resource>
+            <resource>
+                <directory>src/main/resources</directory>
+                <includes>
+                    <include>**/*.bin</include>
+                </includes>
+            </resource>
+        </resources>
+        <plugins>
+            <plugin>
+                <groupId>com.googlecode.maven-download-plugin</groupId>
+                <artifactId>download-maven-plugin</artifactId>
+                <configuration>
+                    <url>${dist.base}/${model.family}/${model.name}</url>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git 
a/opennlp-models-sentdetect/opennlp-models-sentdetect-fr/src/main/resources/model.properties
 
b/opennlp-models-sentdetect/opennlp-models-sentdetect-fr/src/main/resources/model.properties
new file mode 100644
index 0000000..4089d05
--- /dev/null
+++ 
b/opennlp-models-sentdetect/opennlp-models-sentdetect-fr/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.sha256=${model.sha256}
\ No newline at end of file
diff --git a/opennlp-models-sentdetect/opennlp-models-sentdetect-it/pom.xml 
b/opennlp-models-sentdetect/opennlp-models-sentdetect-it/pom.xml
new file mode 100644
index 0000000..f5db512
--- /dev/null
+++ b/opennlp-models-sentdetect/opennlp-models-sentdetect-it/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.opennlp</groupId>
+        <artifactId>opennlp-models-sentdetect</artifactId>
+        <version>0.1-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>opennlp-models-sentdetect-it</artifactId>
+
+    <name>Apache OpenNLP Models :: Sent-Detect :: Italian</name>
+
+    <properties>
+        <dist.base>${asf.dist.base}</dist.base>
+        <model.family>ud-models-1.0</model.family>
+        <model.name>opennlp-it-ud-vit-sentence-1.0-1.9.3.bin</model.name>
+        <model.version>1.9.3</model.version>
+        
<model.sha256>1f9c15b5bb6be611ba2cc7e0c50edce56416977ad8c0c4e53379a0c80606d3ad</model.sha256>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>true</filtering>
+                <includes>
+                    <include>**/model.properties</include>
+                </includes>
+            </resource>
+            <resource>
+                <directory>src/main/resources</directory>
+                <includes>
+                    <include>**/*.bin</include>
+                </includes>
+            </resource>
+        </resources>
+        <plugins>
+            <plugin>
+                <groupId>com.googlecode.maven-download-plugin</groupId>
+                <artifactId>download-maven-plugin</artifactId>
+                <configuration>
+                    <url>${dist.base}/${model.family}/${model.name}</url>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git 
a/opennlp-models-sentdetect/opennlp-models-sentdetect-it/src/main/resources/model.properties
 
b/opennlp-models-sentdetect/opennlp-models-sentdetect-it/src/main/resources/model.properties
new file mode 100644
index 0000000..4089d05
--- /dev/null
+++ 
b/opennlp-models-sentdetect/opennlp-models-sentdetect-it/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.sha256=${model.sha256}
\ No newline at end of file
diff --git a/opennlp-models-sentdetect/opennlp-models-sentdetect-nl/pom.xml 
b/opennlp-models-sentdetect/opennlp-models-sentdetect-nl/pom.xml
new file mode 100644
index 0000000..3760149
--- /dev/null
+++ b/opennlp-models-sentdetect/opennlp-models-sentdetect-nl/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.opennlp</groupId>
+        <artifactId>opennlp-models-sentdetect</artifactId>
+        <version>0.1-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>opennlp-models-sentdetect-nl</artifactId>
+
+    <name>Apache OpenNLP Models :: Sent-Detect :: Dutch</name>
+
+    <properties>
+        <dist.base>${asf.dist.base}</dist.base>
+        <model.family>ud-models-1.0</model.family>
+        <model.name>opennlp-nl-ud-alpino-sentence-1.0-1.9.3.bin</model.name>
+        <model.version>1.9.3</model.version>
+        
<model.sha256>4e4d4ce182c0dc6f12f406077a967807828a523191fbb9199219e5d5969edfb7</model.sha256>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>true</filtering>
+                <includes>
+                    <include>**/model.properties</include>
+                </includes>
+            </resource>
+            <resource>
+                <directory>src/main/resources</directory>
+                <includes>
+                    <include>**/*.bin</include>
+                </includes>
+            </resource>
+        </resources>
+        <plugins>
+            <plugin>
+                <groupId>com.googlecode.maven-download-plugin</groupId>
+                <artifactId>download-maven-plugin</artifactId>
+                <configuration>
+                    <url>${dist.base}/${model.family}/${model.name}</url>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git 
a/opennlp-models-sentdetect/opennlp-models-sentdetect-nl/src/main/resources/model.properties
 
b/opennlp-models-sentdetect/opennlp-models-sentdetect-nl/src/main/resources/model.properties
new file mode 100644
index 0000000..4089d05
--- /dev/null
+++ 
b/opennlp-models-sentdetect/opennlp-models-sentdetect-nl/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.sha256=${model.sha256}
\ No newline at end of file
diff --git a/opennlp-models-sentdetect/pom.xml 
b/opennlp-models-sentdetect/pom.xml
new file mode 100644
index 0000000..c445dbd
--- /dev/null
+++ b/opennlp-models-sentdetect/pom.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.opennlp</groupId>
+        <artifactId>opennlp-models</artifactId>
+        <version>0.1-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>opennlp-models-sentdetect</artifactId>
+
+    <name>Apache OpenNLP Models :: Sent-Detect</name>
+
+    <packaging>pom</packaging>
+
+    <modules>
+        <module>opennlp-models-sentdetect-de</module>
+        <module>opennlp-models-sentdetect-it</module>
+        <module>opennlp-models-sentdetect-en</module>
+        <module>opennlp-models-sentdetect-fr</module>
+        <module>opennlp-models-sentdetect-nl</module>
+    </modules>
+
+</project>
\ No newline at end of file
diff --git a/opennlp-models-test/pom.xml b/opennlp-models-test/pom.xml
new file mode 100644
index 0000000..43ac0ef
--- /dev/null
+++ b/opennlp-models-test/pom.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.opennlp</groupId>
+        <artifactId>opennlp-models</artifactId>
+        <version>0.1-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>opennlp-models-test</artifactId>
+    <name>Apache OpenNLP Models :: Tests</name>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>exec-maven-plugin</artifactId>
+                <version>${exec.plugin.version}</version>
+                <executions>
+                    <execution>
+                        <phase>verify</phase>
+                        <goals>
+                            <goal>java</goal>
+                        </goals>
+                    </execution>
+                </executions>
+                <configuration>
+                    <mainClass>org.apache.opennlp.ModelValidator</mainClass>
+                    <arguments>
+                        <argument>${project.basedir}</argument>
+                    </arguments>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git 
a/opennlp-models-test/src/main/java/org.apache.opennlp/ModelValidator.java 
b/opennlp-models-test/src/main/java/org.apache.opennlp/ModelValidator.java
new file mode 100644
index 0000000..82aa82e
--- /dev/null
+++ b/opennlp-models-test/src/main/java/org.apache.opennlp/ModelValidator.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.opennlp;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.jar.JarFile;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+
+
+public class ModelValidator {
+
+  public static void main(String[] args) {
+    if (args.length != 1) {
+      throw new IllegalArgumentException("This tool expects at least one 
argument");
+    }
+    System.err.println("Executing basic model validation checks.");
+
+    final Path testBaseDir = Path.of(args[0]);
+    final Path projectDir = testBaseDir.getParent();
+    final List<String> expectedModels = getExpectedModels();
+
+    final String pattern = "opennlp-models.*\\.jar";
+
+    final List<Path> availableModelJars = getAvailableModelJars(pattern, 
testBaseDir, projectDir);
+
+    if (expectedModels.size() != availableModelJars.size()) {
+      throw new IllegalArgumentException("Detected a mismatch between " +
+          "expected and available models! " +
+          "Expected: " + expectedModels.size() +
+          "; Actual: " + availableModelJars.size());
+    }
+
+    for (String model : expectedModels) {
+      boolean found;
+      for (Path availableJar : availableModelJars) {
+        found = isModelInJar(availableJar, model);
+        if (found) {
+          return;
+        }
+      }
+      throw new IllegalArgumentException(
+          "Expected model '" + model + "' could not be found inside the 
generated JAR files!");
+    }
+
+
+  }
+
+  public static boolean isModelInJar(Path jarFilePath, String expectedModel) {
+    try (JarFile jarFile = new JarFile(jarFilePath.toFile())) {
+      return jarFile.stream()
+          .anyMatch(entry -> entry.getName().equals(expectedModel));
+    } catch (IOException e) {
+      throw new RuntimeException("Failed to read the JAR file: " + 
jarFilePath, e);
+    }
+  }
+
+  private static List<Path> getAvailableModelJars(String pattern, Path 
testDir, Path projectDir) {
+    final Pattern regexPattern = Pattern.compile(pattern);
+    try (Stream<Path> stream = Files.walk(projectDir)) {
+      return stream
+          .filter(Files::isRegularFile)
+          .filter(path -> !path.startsWith(testDir))
+          .filter(path -> 
regexPattern.matcher(path.getFileName().toString()).matches())
+          .toList();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private static List<String> getExpectedModels() {
+    try (InputStream inputStream = 
Thread.currentThread().getContextClassLoader().getResourceAsStream("expected-models.txt"))
 {
+      if (inputStream == null) {
+        throw new IllegalArgumentException("Expected model file could not be 
found!");
+      }
+
+      try (BufferedReader reader = new BufferedReader(new 
InputStreamReader(inputStream))) {
+        return reader.lines()
+            .filter(line -> !line.startsWith("#") && !line.trim().isEmpty())
+            .toList();
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+}
diff --git a/opennlp-models-test/src/main/resources/expected-models.txt 
b/opennlp-models-test/src/main/resources/expected-models.txt
new file mode 100644
index 0000000..68c678a
--- /dev/null
+++ b/opennlp-models-test/src/main/resources/expected-models.txt
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+langdetect-183.bin
+opennlp-de-ud-gsd-sentence-1.0-1.9.3.bin
+opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin
+opennlp-1.0-1.9.3fr-ud-ftb-sentence-1.0-1.9.3.bin
+opennlp-nl-ud-alpino-sentence-1.0-1.9.3.bin
+opennlp-nl-ud-alpino-sentence-1.0-1.9.3.bin
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index a931648..a5221d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -94,8 +94,20 @@
        <properties>
                <!-- Build Properties -->
                <java.version>17</java.version>
-               <maven.version>3.3.9</maven.version>
+               <maven.version>3.6.3</maven.version>
+
+               
<asf.dist.base>https://dist.apache.org/repos/dist/release/opennlp/models/</asf.dist.base>
+               
<sf.dist.base>https://opennlp.sourceforge.net/models-1.5/</sf.dist.base>
+
+               <!-- set a fixed value here to enable reproducable builds -->
+               
<project.build.outputTimestamp>2024-01-01T00:00:00Z</project.build.outputTimestamp>
+
+               <!-- maven plugin versions -->
                <enforcer.plugin.version>3.3.0</enforcer.plugin.version>
+               <download.plugin.version>1.9.0</download.plugin.version>
+               <compiler.plugin.version>3.10.1</compiler.plugin.version>
+               <build-helper.plugin.version>3.6.0</build-helper.plugin.version>
+               <exec.plugin.version>3.3.0</exec.plugin.version>
        </properties>
 
        <build>
@@ -111,6 +123,45 @@
                                                
<mavenExecutorId>forked-path</mavenExecutorId>
                                        </configuration>
                                </plugin>
+                               <plugin>
+                                       <groupId>org.codehaus.mojo</groupId>
+                                       
<artifactId>build-helper-maven-plugin</artifactId>
+                                       
<version>${build-helper.plugin.version}</version>
+                                       <executions>
+                                               <execution>
+                                                       <id>add-resource</id>
+                                                       
<phase>generate-resources</phase>
+                                                       <goals>
+                                                               
<goal>add-resource</goal>
+                                                       </goals>
+                                                       <configuration>
+                                                               <resources>
+                                                                       
<resource>
+                                                                               
<directory>${project.build.directory}/models/</directory>
+                                                                       
</resource>
+                                                               </resources>
+                                                       </configuration>
+                                               </execution>
+                                       </executions>
+                               </plugin>
+                               <plugin>
+                                       
<groupId>com.googlecode.maven-download-plugin</groupId>
+                                       
<artifactId>download-maven-plugin</artifactId>
+                                       
<version>${download.plugin.version}</version>
+                                       <executions>
+                                               <execution>
+                                                       <id>download-model</id>
+                                                       
<phase>generate-resources</phase>
+                                                       <goals>
+                                                               
<goal>wget</goal>
+                                                       </goals>
+                                               </execution>
+                                       </executions>
+                                       <configuration>
+                                               
<outputDirectory>${project.build.directory}/models</outputDirectory>
+                                               <sha256>${model.sha256}</sha256>
+                                       </configuration>
+                               </plugin>
                        </plugins>
                </pluginManagement>
 
@@ -118,7 +169,7 @@
                        <plugin>
                                <groupId>org.apache.maven.plugins</groupId>
                                <artifactId>maven-compiler-plugin</artifactId>
-                               <version>3.10.1</version>
+                               <version>${compiler.plugin.version}</version>
                                <configuration>
                                        <release>${java.version}</release>
                                        
<compilerArgument>-Xlint</compilerArgument>
@@ -201,6 +252,8 @@
 
        <modules>
                <module>opennlp-models-langdetect</module>
+               <module>opennlp-models-sentdetect</module>
+               <module>opennlp-models-test</module>
        </modules>
 
 </project>


Reply via email to