This is an automated email from the ASF dual-hosted git repository.

rzo1 pushed a commit to branch generative-artifact-creation
in repository https://gitbox.apache.org/repos/asf/opennlp-models.git

commit 137267ccb14cf566e40f5afb850a415dbb343759
Author: Richard Zowalla <[email protected]>
AuthorDate: Fri May 24 13:44:47 2024 +0200

    This is a conceptual draft to propose a way to automatically build JARs 
from our OpenNLP models during a Maven build. It still relies on the models put 
on dist.a.o. but enables a way to distribute them via Maven Central for easier 
consumption within the Java ecosystem.
---
 .github/workflows/maven.yml                        |  37 +++++++
 .gitignore                                         |   1 +
 opennlp-models-langdetect/pom.xml                  |  41 +++++++-
 .../src/main/resources/langdetect-183.bin          |   3 -
 .../src/main/resources/model.properties            |  18 ++++
 .../opennlp-models-sendetect-de/pom.xml            |  75 +++++++++++++++
 .../src/main/resources/model.properties            |  18 ++++
 .../opennlp-models-sendetect-en/pom.xml            |  75 +++++++++++++++
 .../src/main/resources/model.properties            |  18 ++++
 .../opennlp-models-sendetect-fr/pom.xml            |  75 +++++++++++++++
 .../src/main/resources/model.properties            |  18 ++++
 .../opennlp-models-sendetect-it/pom.xml            |  75 +++++++++++++++
 .../src/main/resources/model.properties            |  18 ++++
 .../opennlp-models-sendetect-nl/pom.xml            |  75 +++++++++++++++
 .../src/main/resources/model.properties            |  18 ++++
 opennlp-models-sendetect/pom.xml                   |  46 +++++++++
 opennlp-models-test/pom.xml                        |  59 ++++++++++++
 .../src/main/java/org.apache.opennlp/Main.java     | 106 +++++++++++++++++++++
 .../src/main/resources/expected-models.txt         |  21 ++++
 pom.xml                                            |  49 ++++++++++
 20 files changed, 841 insertions(+), 5 deletions(-)

diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
new file mode 100644
index 0000000..c995950
--- /dev/null
+++ b/.github/workflows/maven.yml
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements.  See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License.  You may obtain a copy of the License at
+#
+#      http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Java CI
+
+on: [push, pull_request]
+
+jobs:
+  build:
+    runs-on: ubuntu-latest
+    steps:
+      - uses: actions/checkout@v3
+      - uses: actions/cache@v3
+        with:
+          path: ~/.m2/repository
+          key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
+          restore-keys: |
+            ${{ runner.os }}-maven-
+      - name: Setup Java
+        uses: actions/setup-java@v3
+        with:
+          distribution: adopt
+          java-version: 17
+      - name: Build with Maven
+        run: mvn -V clean install --no-transfer-progress
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 81ef51f..035c795 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ nbactions.xml
 nb-configuration.xml
 *.DS_Store
 .checkstyle
+*.bin
\ No newline at end of file
diff --git a/opennlp-models-langdetect/pom.xml 
b/opennlp-models-langdetect/pom.xml
index 518b455..7d4933a 100644
--- a/opennlp-models-langdetect/pom.xml
+++ b/opennlp-models-langdetect/pom.xml
@@ -30,8 +30,45 @@
   </parent>
 
   <artifactId>opennlp-models-langdetect</artifactId>
-  <version>0.1-SNAPSHOT</version>
 
-  <name>Apache OpenNLP Models Lang-Detect</name>
+  <name>Apache OpenNLP Models :: Lang-Detect</name>
 
+  <properties>
+    <dist.base>${asf.dist.base}</dist.base>
+    <model.family>langdetect</model.family>
+    <model.name>langdetect-183.bin</model.name>
+    <model.version>1.8.3</model.version>
+    <model.md5>87be0a1cf60e5d8998e521401a87ca97</model.md5>
+  </properties>
+
+  <build>
+    <resources>
+      <resource>
+        <directory>src/main/resources</directory>
+        <filtering>true</filtering>
+        <includes>
+          <include>**/model.properties</include>
+        </includes>
+      </resource>
+      <resource>
+        <directory>src/main/resources</directory>
+        <includes>
+          <include>**/*.bin</include>
+        </includes>
+      </resource>
+    </resources>
+    <plugins>
+      <plugin>
+        <groupId>com.googlecode.maven-download-plugin</groupId>
+        <artifactId>download-maven-plugin</artifactId>
+        <configuration>
+          
<url>${dist.base}/${model.family}/${model.version}/${model.name}</url>
+          </configuration>
+      </plugin>
+      <plugin>
+        <groupId>org.codehaus.mojo</groupId>
+        <artifactId>build-helper-maven-plugin</artifactId>
+      </plugin>
+    </plugins>
+  </build>
 </project>
diff --git a/opennlp-models-langdetect/src/main/resources/langdetect-183.bin 
b/opennlp-models-langdetect/src/main/resources/langdetect-183.bin
deleted file mode 100644
index 05dc88e..0000000
--- a/opennlp-models-langdetect/src/main/resources/langdetect-183.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2ddf585fac2e02a9dcfb9a4a9cc9417562eaac351be2efb506a2eaa87f19e9d4
-size 10568188
diff --git a/opennlp-models-langdetect/src/main/resources/model.properties 
b/opennlp-models-langdetect/src/main/resources/model.properties
new file mode 100644
index 0000000..023541a
--- /dev/null
+++ b/opennlp-models-langdetect/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.md5=${model.md5}
\ No newline at end of file
diff --git a/opennlp-models-sendetect/opennlp-models-sendetect-de/pom.xml 
b/opennlp-models-sendetect/opennlp-models-sendetect-de/pom.xml
new file mode 100644
index 0000000..e53f035
--- /dev/null
+++ b/opennlp-models-sendetect/opennlp-models-sendetect-de/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.opennlp</groupId>
+        <artifactId>opennlp-models-sendetect</artifactId>
+        <version>0.1-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>opennlp-models-sendetect-de</artifactId>
+
+    <name>Apache OpenNLP Models :: Sen-Detect :: German</name>
+
+    <properties>
+        <dist.base>${asf.dist.base}</dist.base>
+        <model.family>ud-models-1.0</model.family>
+        <model.name>opennlp-de-ud-gsd-sentence-1.0-1.9.3.bin</model.name>
+        <model.version>1.9.3</model.version>
+        <model.md5>20d335035a6958ec34fef6ceec8e7307</model.md5>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>true</filtering>
+                <includes>
+                    <include>**/model.properties</include>
+                </includes>
+            </resource>
+            <resource>
+                <directory>src/main/resources</directory>
+                <includes>
+                    <include>**/*.bin</include>
+                </includes>
+            </resource>
+        </resources>
+        <plugins>
+            <plugin>
+                <groupId>com.googlecode.maven-download-plugin</groupId>
+                <artifactId>download-maven-plugin</artifactId>
+                <configuration>
+                    <url>${dist.base}/${model.family}/${model.name}</url>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git 
a/opennlp-models-sendetect/opennlp-models-sendetect-de/src/main/resources/model.properties
 
b/opennlp-models-sendetect/opennlp-models-sendetect-de/src/main/resources/model.properties
new file mode 100644
index 0000000..023541a
--- /dev/null
+++ 
b/opennlp-models-sendetect/opennlp-models-sendetect-de/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.md5=${model.md5}
\ No newline at end of file
diff --git a/opennlp-models-sendetect/opennlp-models-sendetect-en/pom.xml 
b/opennlp-models-sendetect/opennlp-models-sendetect-en/pom.xml
new file mode 100644
index 0000000..f66bd09
--- /dev/null
+++ b/opennlp-models-sendetect/opennlp-models-sendetect-en/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.opennlp</groupId>
+        <artifactId>opennlp-models-sendetect</artifactId>
+        <version>0.1-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>opennlp-models-sendetect-en</artifactId>
+
+    <name>Apache OpenNLP Models :: Sen-Detect :: English</name>
+
+    <properties>
+        <dist.base>${asf.dist.base}</dist.base>
+        <model.family>ud-models-1.0</model.family>
+        <model.name>opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin</model.name>
+        <model.version>1.9.3</model.version>
+        <model.md5>5965ada99a2ca77beb8632bb47741b7a</model.md5>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>true</filtering>
+                <includes>
+                    <include>**/model.properties</include>
+                </includes>
+            </resource>
+            <resource>
+                <directory>src/main/resources</directory>
+                <includes>
+                    <include>**/*.bin</include>
+                </includes>
+            </resource>
+        </resources>
+        <plugins>
+            <plugin>
+                <groupId>com.googlecode.maven-download-plugin</groupId>
+                <artifactId>download-maven-plugin</artifactId>
+                <configuration>
+                    <url>${dist.base}/${model.family}/${model.name}</url>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git 
a/opennlp-models-sendetect/opennlp-models-sendetect-en/src/main/resources/model.properties
 
b/opennlp-models-sendetect/opennlp-models-sendetect-en/src/main/resources/model.properties
new file mode 100644
index 0000000..023541a
--- /dev/null
+++ 
b/opennlp-models-sendetect/opennlp-models-sendetect-en/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.md5=${model.md5}
\ No newline at end of file
diff --git a/opennlp-models-sendetect/opennlp-models-sendetect-fr/pom.xml 
b/opennlp-models-sendetect/opennlp-models-sendetect-fr/pom.xml
new file mode 100644
index 0000000..dd18ee5
--- /dev/null
+++ b/opennlp-models-sendetect/opennlp-models-sendetect-fr/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.opennlp</groupId>
+        <artifactId>opennlp-models-sendetect</artifactId>
+        <version>0.1-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>opennlp-models-sendetect-fr</artifactId>
+
+    <name>Apache OpenNLP Models :: Sen-Detect :: French</name>
+
+    <properties>
+        <dist.base>${asf.dist.base}</dist.base>
+        <model.family>ud-models-1.0</model.family>
+        
<model.name>opennlp-1.0-1.9.3fr-ud-ftb-sentence-1.0-1.9.3.bin</model.name>
+        <model.version>1.9.3</model.version>
+        <model.md5>771252c520a0dc238af35911c139374c</model.md5>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>true</filtering>
+                <includes>
+                    <include>**/model.properties</include>
+                </includes>
+            </resource>
+            <resource>
+                <directory>src/main/resources</directory>
+                <includes>
+                    <include>**/*.bin</include>
+                </includes>
+            </resource>
+        </resources>
+        <plugins>
+            <plugin>
+                <groupId>com.googlecode.maven-download-plugin</groupId>
+                <artifactId>download-maven-plugin</artifactId>
+                <configuration>
+                    <url>${dist.base}/${model.family}/${model.name}</url>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git 
a/opennlp-models-sendetect/opennlp-models-sendetect-fr/src/main/resources/model.properties
 
b/opennlp-models-sendetect/opennlp-models-sendetect-fr/src/main/resources/model.properties
new file mode 100644
index 0000000..023541a
--- /dev/null
+++ 
b/opennlp-models-sendetect/opennlp-models-sendetect-fr/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.md5=${model.md5}
\ No newline at end of file
diff --git a/opennlp-models-sendetect/opennlp-models-sendetect-it/pom.xml 
b/opennlp-models-sendetect/opennlp-models-sendetect-it/pom.xml
new file mode 100644
index 0000000..47aa391
--- /dev/null
+++ b/opennlp-models-sendetect/opennlp-models-sendetect-it/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.opennlp</groupId>
+        <artifactId>opennlp-models-sendetect</artifactId>
+        <version>0.1-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>opennlp-models-sendetect-it</artifactId>
+
+    <name>Apache OpenNLP Models :: Sen-Detect :: Italian</name>
+
+    <properties>
+        <dist.base>${asf.dist.base}</dist.base>
+        <model.family>ud-models-1.0</model.family>
+        <model.name>opennlp-it-ud-vit-sentence-1.0-1.9.3.bin</model.name>
+        <model.version>1.9.3</model.version>
+        <model.md5>3083dc13ba071c5aca94f81eeed6c097</model.md5>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>true</filtering>
+                <includes>
+                    <include>**/model.properties</include>
+                </includes>
+            </resource>
+            <resource>
+                <directory>src/main/resources</directory>
+                <includes>
+                    <include>**/*.bin</include>
+                </includes>
+            </resource>
+        </resources>
+        <plugins>
+            <plugin>
+                <groupId>com.googlecode.maven-download-plugin</groupId>
+                <artifactId>download-maven-plugin</artifactId>
+                <configuration>
+                    <url>${dist.base}/${model.family}/${model.name}</url>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git 
a/opennlp-models-sendetect/opennlp-models-sendetect-it/src/main/resources/model.properties
 
b/opennlp-models-sendetect/opennlp-models-sendetect-it/src/main/resources/model.properties
new file mode 100644
index 0000000..023541a
--- /dev/null
+++ 
b/opennlp-models-sendetect/opennlp-models-sendetect-it/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.md5=${model.md5}
\ No newline at end of file
diff --git a/opennlp-models-sendetect/opennlp-models-sendetect-nl/pom.xml 
b/opennlp-models-sendetect/opennlp-models-sendetect-nl/pom.xml
new file mode 100644
index 0000000..ffd349e
--- /dev/null
+++ b/opennlp-models-sendetect/opennlp-models-sendetect-nl/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.opennlp</groupId>
+        <artifactId>opennlp-models-sendetect</artifactId>
+        <version>0.1-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>opennlp-models-sendetect-nl</artifactId>
+
+    <name>Apache OpenNLP Models :: Sen-Detect :: Dutch</name>
+
+    <properties>
+        <dist.base>${asf.dist.base}</dist.base>
+        <model.family>ud-models-1.0</model.family>
+        <model.name>opennlp-nl-ud-alpino-sentence-1.0-1.9.3.bin</model.name>
+        <model.version>1.9.3</model.version>
+        <model.md5>ed160f2cf99b249017d7fc3d3ad8c6b7</model.md5>
+    </properties>
+
+    <build>
+        <resources>
+            <resource>
+                <directory>src/main/resources</directory>
+                <filtering>true</filtering>
+                <includes>
+                    <include>**/model.properties</include>
+                </includes>
+            </resource>
+            <resource>
+                <directory>src/main/resources</directory>
+                <includes>
+                    <include>**/*.bin</include>
+                </includes>
+            </resource>
+        </resources>
+        <plugins>
+            <plugin>
+                <groupId>com.googlecode.maven-download-plugin</groupId>
+                <artifactId>download-maven-plugin</artifactId>
+                <configuration>
+                    <url>${dist.base}/${model.family}/${model.name}</url>
+                </configuration>
+            </plugin>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>build-helper-maven-plugin</artifactId>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git 
a/opennlp-models-sendetect/opennlp-models-sendetect-nl/src/main/resources/model.properties
 
b/opennlp-models-sendetect/opennlp-models-sendetect-nl/src/main/resources/model.properties
new file mode 100644
index 0000000..023541a
--- /dev/null
+++ 
b/opennlp-models-sendetect/opennlp-models-sendetect-nl/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.md5=${model.md5}
\ No newline at end of file
diff --git a/opennlp-models-sendetect/pom.xml b/opennlp-models-sendetect/pom.xml
new file mode 100644
index 0000000..61cb86d
--- /dev/null
+++ b/opennlp-models-sendetect/pom.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.opennlp</groupId>
+        <artifactId>opennlp-models</artifactId>
+        <version>0.1-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>opennlp-models-sendetect</artifactId>
+
+    <name>Apache OpenNLP Models :: Sen-Detect</name>
+
+    <packaging>pom</packaging>
+
+    <modules>
+        <module>opennlp-models-sendetect-de</module>
+        <module>opennlp-models-sendetect-it</module>
+        <module>opennlp-models-sendetect-en</module>
+        <module>opennlp-models-sendetect-fr</module>
+        <module>opennlp-models-sendetect-nl</module>
+    </modules>
+
+</project>
\ No newline at end of file
diff --git a/opennlp-models-test/pom.xml b/opennlp-models-test/pom.xml
new file mode 100644
index 0000000..303f128
--- /dev/null
+++ b/opennlp-models-test/pom.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0";
+         xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance";
+         xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 
http://maven.apache.org/xsd/maven-4.0.0.xsd";>
+    <modelVersion>4.0.0</modelVersion>
+    <parent>
+        <groupId>org.apache.opennlp</groupId>
+        <artifactId>opennlp-models</artifactId>
+        <version>0.1-SNAPSHOT</version>
+    </parent>
+
+    <artifactId>opennlp-models-test</artifactId>
+    <name>Apache OpenNLP Models :: Tests</name>
+
+    <build>
+        <plugins>
+            <plugin>
+                <groupId>org.codehaus.mojo</groupId>
+                <artifactId>exec-maven-plugin</artifactId>
+                <version>3.3.0</version>
+                <executions>
+                    <execution>
+                        <phase>verify</phase>
+                        <goals>
+                            <goal>java</goal>
+                        </goals>
+                    </execution>
+                </executions>
+                <configuration>
+                    <mainClass>org.apache.opennlp.Main</mainClass>
+                    <arguments>
+                        <argument>${project.basedir}</argument>
+                    </arguments>
+                </configuration>
+            </plugin>
+        </plugins>
+    </build>
+
+</project>
\ No newline at end of file
diff --git a/opennlp-models-test/src/main/java/org.apache.opennlp/Main.java 
b/opennlp-models-test/src/main/java/org.apache.opennlp/Main.java
new file mode 100644
index 0000000..ed446a6
--- /dev/null
+++ b/opennlp-models-test/src/main/java/org.apache.opennlp/Main.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.opennlp;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.jar.JarFile;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+
+
+public class Main {
+
+  public static void main(String[] args) {
+    if (args.length != 1) {
+      throw new IllegalArgumentException("This tool expects at least one 
argument");
+    }
+    System.err.println("Executing basic model validation checks.");
+
+    final Path testBaseDir = Path.of(args[0]);
+    final Path projectDir = testBaseDir.getParent();
+    final List<String> expectedModels = getExpectedModels();
+
+    final String pattern = "opennlp-models.*\\.jar";
+
+    final List<Path> availableModelJars = getAvailableModelJars(pattern, 
testBaseDir, projectDir);
+
+    if (expectedModels.size() != availableModelJars.size()) {
+      throw new IllegalArgumentException("Detected a mismatch between " +
+          "expected and available models! " +
+          "Expected: " + expectedModels.size() +
+          "; Actual: " + availableModelJars.size());
+    }
+
+    for (String model : expectedModels) {
+      boolean found;
+      for (Path availableJar : availableModelJars) {
+        found = isModelInJar(availableJar, model);
+        if (found) {
+          return;
+        }
+      }
+      throw new IllegalArgumentException(
+          "Expected model '" + model + "' could not be found inside the 
generated JAR files!");
+    }
+
+
+  }
+
+  public static boolean isModelInJar(Path jarFilePath, String expectedModel) {
+    try (JarFile jarFile = new JarFile(jarFilePath.toFile())) {
+      return jarFile.stream()
+          .anyMatch(entry -> entry.getName().equals(expectedModel));
+    } catch (IOException e) {
+      throw new RuntimeException("Failed to read the JAR file: " + 
jarFilePath, e);
+    }
+  }
+
+  private static List<Path> getAvailableModelJars(String pattern, Path 
testDir, Path projectDir) {
+    final Pattern regexPattern = Pattern.compile(pattern);
+    try (Stream<Path> stream = Files.walk(projectDir)) {
+      return stream
+          .filter(Files::isRegularFile)
+          .filter(path -> !path.startsWith(testDir))
+          .filter(path -> 
regexPattern.matcher(path.getFileName().toString()).matches())
+          .toList();
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+
+  private static List<String> getExpectedModels() {
+    try (InputStream inputStream = 
Thread.currentThread().getContextClassLoader().getResourceAsStream("expected-models.txt"))
 {
+      if (inputStream == null) {
+        throw new IllegalArgumentException("Expected model file could not be 
found!");
+      }
+
+      try (BufferedReader reader = new BufferedReader(new 
InputStreamReader(inputStream))) {
+        return reader.lines()
+            .filter(line -> !line.startsWith("#") && !line.trim().isEmpty())
+            .toList();
+      }
+    } catch (IOException e) {
+      throw new RuntimeException(e);
+    }
+  }
+}
diff --git a/opennlp-models-test/src/main/resources/expected-models.txt 
b/opennlp-models-test/src/main/resources/expected-models.txt
new file mode 100644
index 0000000..68c678a
--- /dev/null
+++ b/opennlp-models-test/src/main/resources/expected-models.txt
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+langdetect-183.bin
+opennlp-de-ud-gsd-sentence-1.0-1.9.3.bin
+opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin
+opennlp-1.0-1.9.3fr-ud-ftb-sentence-1.0-1.9.3.bin
+opennlp-nl-ud-alpino-sentence-1.0-1.9.3.bin
+opennlp-nl-ud-alpino-sentence-1.0-1.9.3.bin
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index a931648..9799f97 100644
--- a/pom.xml
+++ b/pom.xml
@@ -96,6 +96,14 @@
                <java.version>17</java.version>
                <maven.version>3.3.9</maven.version>
                <enforcer.plugin.version>3.3.0</enforcer.plugin.version>
+
+               <junit.version>5.10.1</junit.version>
+
+               
<asf.dist.base>https://dist.apache.org/repos/dist/release/opennlp/models/</asf.dist.base>
+               
<sf.dist.base>https://opennlp.sourceforge.net/models-1.5/</sf.dist.base>
+
+               <!-- set a fixed value here to enable reproducable builds -->
+               
<project.build.outputTimestamp>2024-01-01T00:00:00Z</project.build.outputTimestamp>
        </properties>
 
        <build>
@@ -111,6 +119,45 @@
                                                
<mavenExecutorId>forked-path</mavenExecutorId>
                                        </configuration>
                                </plugin>
+                               <plugin>
+                                       <groupId>org.codehaus.mojo</groupId>
+                                       
<artifactId>build-helper-maven-plugin</artifactId>
+                                       <version>3.6.0</version>
+                                       <executions>
+                                               <execution>
+                                                       <id>add-resource</id>
+                                                       
<phase>generate-resources</phase>
+                                                       <goals>
+                                                               
<goal>add-resource</goal>
+                                                       </goals>
+                                                       <configuration>
+                                                               <resources>
+                                                                       
<resource>
+                                                                               
<directory>${project.build.directory}/models/</directory>
+                                                                       
</resource>
+                                                               </resources>
+                                                       </configuration>
+                                               </execution>
+                                       </executions>
+                               </plugin>
+                               <plugin>
+                                       
<groupId>com.googlecode.maven-download-plugin</groupId>
+                                       
<artifactId>download-maven-plugin</artifactId>
+                                       <version>1.9.0</version>
+                                       <executions>
+                                               <execution>
+                                                       <id>download-model</id>
+                                                       
<phase>generate-resources</phase>
+                                                       <goals>
+                                                               
<goal>wget</goal>
+                                                       </goals>
+                                               </execution>
+                                       </executions>
+                                       <configuration>
+                                               
<outputDirectory>${project.build.directory}/models</outputDirectory>
+                                               <md5>${model.md5}</md5>
+                                       </configuration>
+                               </plugin>
                        </plugins>
                </pluginManagement>
 
@@ -201,6 +248,8 @@
 
        <modules>
                <module>opennlp-models-langdetect</module>
+               <module>opennlp-models-sendetect</module>
+               <module>opennlp-models-test</module>
        </modules>
 
 </project>


Reply via email to