This is an automated email from the ASF dual-hosted git repository.
rzo1 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/opennlp-models.git
The following commit(s) were added to refs/heads/main by this push:
new 6af838c OPENNLP-1557 - Create Model Jars on-the-fly (#1)
6af838c is described below
commit 6af838ce1b41f8ea7f2a62cf380b149c0f6461eb
Author: Richard Zowalla <[email protected]>
AuthorDate: Mon May 27 16:19:19 2024 +0200
OPENNLP-1557 - Create Model Jars on-the-fly (#1)
---
.github/workflows/maven.yml | 37 +++++++
.gitignore | 1 +
README.md | 3 +
opennlp-models-langdetect/pom.xml | 41 +++++++-
.../src/main/resources/langdetect-183.bin | 3 -
.../src/main/resources/model.properties | 18 ++++
.../opennlp-models-sentdetect-de/pom.xml | 75 +++++++++++++++
.../src/main/resources/model.properties | 18 ++++
.../opennlp-models-sentdetect-en/pom.xml | 75 +++++++++++++++
.../src/main/resources/model.properties | 18 ++++
.../opennlp-models-sentdetect-fr/pom.xml | 75 +++++++++++++++
.../src/main/resources/model.properties | 18 ++++
.../opennlp-models-sentdetect-it/pom.xml | 75 +++++++++++++++
.../src/main/resources/model.properties | 18 ++++
.../opennlp-models-sentdetect-nl/pom.xml | 75 +++++++++++++++
.../src/main/resources/model.properties | 18 ++++
opennlp-models-sentdetect/pom.xml | 46 +++++++++
opennlp-models-test/pom.xml | 59 ++++++++++++
.../java/org.apache.opennlp/ModelValidator.java | 106 +++++++++++++++++++++
.../src/main/resources/expected-models.txt | 21 ++++
pom.xml | 57 ++++++++++-
21 files changed, 850 insertions(+), 7 deletions(-)
diff --git a/.github/workflows/maven.yml b/.github/workflows/maven.yml
new file mode 100644
index 0000000..c995950
--- /dev/null
+++ b/.github/workflows/maven.yml
@@ -0,0 +1,37 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+name: Java CI
+
+on: [push, pull_request]
+
+jobs:
+ build:
+ runs-on: ubuntu-latest
+ steps:
+ - uses: actions/checkout@v3
+ - uses: actions/cache@v3
+ with:
+ path: ~/.m2/repository
+ key: ${{ runner.os }}-maven-${{ hashFiles('**/pom.xml') }}
+ restore-keys: |
+ ${{ runner.os }}-maven-
+ - name: Setup Java
+ uses: actions/setup-java@v3
+ with:
+ distribution: adopt
+ java-version: 17
+ - name: Build with Maven
+ run: mvn -V clean install --no-transfer-progress
\ No newline at end of file
diff --git a/.gitignore b/.gitignore
index 81ef51f..035c795 100644
--- a/.gitignore
+++ b/.gitignore
@@ -8,3 +8,4 @@ nbactions.xml
nb-configuration.xml
*.DS_Store
.checkstyle
+*.bin
\ No newline at end of file
diff --git a/README.md b/README.md
index 5f57255..6cc4748 100644
--- a/README.md
+++ b/README.md
@@ -72,6 +72,9 @@ compile group: "org.apache.opennlp", name:
"opennlp-models-langdetect", version:
For more details please check our
[documentation](http://opennlp.apache.org/docs/)
+## Adding a new Model
+
+Ensure to add a new model to the `expected-models.txt` file located in
`opennlp-models-test`.
## Contributing
diff --git a/opennlp-models-langdetect/pom.xml
b/opennlp-models-langdetect/pom.xml
index 518b455..54e55f6 100644
--- a/opennlp-models-langdetect/pom.xml
+++ b/opennlp-models-langdetect/pom.xml
@@ -30,8 +30,45 @@
</parent>
<artifactId>opennlp-models-langdetect</artifactId>
- <version>0.1-SNAPSHOT</version>
- <name>Apache OpenNLP Models Lang-Detect</name>
+ <name>Apache OpenNLP Models :: Lang-Detect</name>
+ <properties>
+ <dist.base>${asf.dist.base}</dist.base>
+ <model.family>langdetect</model.family>
+ <model.name>langdetect-183.bin</model.name>
+ <model.version>1.8.3</model.version>
+
<model.sha256>2ddf585fac2e02a9dcfb9a4a9cc9417562eaac351be2efb506a2eaa87f19e9d4</model.sha256>
+ </properties>
+
+ <build>
+ <resources>
+ <resource>
+ <directory>src/main/resources</directory>
+ <filtering>true</filtering>
+ <includes>
+ <include>**/model.properties</include>
+ </includes>
+ </resource>
+ <resource>
+ <directory>src/main/resources</directory>
+ <includes>
+ <include>**/*.bin</include>
+ </includes>
+ </resource>
+ </resources>
+ <plugins>
+ <plugin>
+ <groupId>com.googlecode.maven-download-plugin</groupId>
+ <artifactId>download-maven-plugin</artifactId>
+ <configuration>
+
<url>${dist.base}/${model.family}/${model.version}/${model.name}</url>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
</project>
diff --git a/opennlp-models-langdetect/src/main/resources/langdetect-183.bin
b/opennlp-models-langdetect/src/main/resources/langdetect-183.bin
deleted file mode 100644
index 05dc88e..0000000
--- a/opennlp-models-langdetect/src/main/resources/langdetect-183.bin
+++ /dev/null
@@ -1,3 +0,0 @@
-version https://git-lfs.github.com/spec/v1
-oid sha256:2ddf585fac2e02a9dcfb9a4a9cc9417562eaac351be2efb506a2eaa87f19e9d4
-size 10568188
diff --git a/opennlp-models-langdetect/src/main/resources/model.properties
b/opennlp-models-langdetect/src/main/resources/model.properties
new file mode 100644
index 0000000..4089d05
--- /dev/null
+++ b/opennlp-models-langdetect/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.sha256=${model.sha256}
\ No newline at end of file
diff --git a/opennlp-models-sentdetect/opennlp-models-sentdetect-de/pom.xml
b/opennlp-models-sentdetect/opennlp-models-sentdetect-de/pom.xml
new file mode 100644
index 0000000..e1108d8
--- /dev/null
+++ b/opennlp-models-sentdetect/opennlp-models-sentdetect-de/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-models-sentdetect</artifactId>
+ <version>0.1-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>opennlp-models-sentdetect-de</artifactId>
+
+ <name>Apache OpenNLP Models :: Sent-Detect :: German</name>
+
+ <properties>
+ <dist.base>${asf.dist.base}</dist.base>
+ <model.family>ud-models-1.0</model.family>
+ <model.name>opennlp-de-ud-gsd-sentence-1.0-1.9.3.bin</model.name>
+ <model.version>1.9.3</model.version>
+
<model.sha256>2d4da68109269aab96d128004fe0b88c8a0d226c0ead2b40afc0d10c6d5dc182</model.sha256>
+ </properties>
+
+ <build>
+ <resources>
+ <resource>
+ <directory>src/main/resources</directory>
+ <filtering>true</filtering>
+ <includes>
+ <include>**/model.properties</include>
+ </includes>
+ </resource>
+ <resource>
+ <directory>src/main/resources</directory>
+ <includes>
+ <include>**/*.bin</include>
+ </includes>
+ </resource>
+ </resources>
+ <plugins>
+ <plugin>
+ <groupId>com.googlecode.maven-download-plugin</groupId>
+ <artifactId>download-maven-plugin</artifactId>
+ <configuration>
+ <url>${dist.base}/${model.family}/${model.name}</url>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
\ No newline at end of file
diff --git
a/opennlp-models-sentdetect/opennlp-models-sentdetect-de/src/main/resources/model.properties
b/opennlp-models-sentdetect/opennlp-models-sentdetect-de/src/main/resources/model.properties
new file mode 100644
index 0000000..4089d05
--- /dev/null
+++
b/opennlp-models-sentdetect/opennlp-models-sentdetect-de/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.sha256=${model.sha256}
\ No newline at end of file
diff --git a/opennlp-models-sentdetect/opennlp-models-sentdetect-en/pom.xml
b/opennlp-models-sentdetect/opennlp-models-sentdetect-en/pom.xml
new file mode 100644
index 0000000..9cff087
--- /dev/null
+++ b/opennlp-models-sentdetect/opennlp-models-sentdetect-en/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-models-sentdetect</artifactId>
+ <version>0.1-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>opennlp-models-sentdetect-en</artifactId>
+
+ <name>Apache OpenNLP Models :: Sent-Detect :: English</name>
+
+ <properties>
+ <dist.base>${asf.dist.base}</dist.base>
+ <model.family>ud-models-1.0</model.family>
+ <model.name>opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin</model.name>
+ <model.version>1.9.3</model.version>
+
<model.sha256>8dddd7e582cedef4a79a83fd340cefcdafe58419bd20520fec90492ea4bffd11</model.sha256>
+ </properties>
+
+ <build>
+ <resources>
+ <resource>
+ <directory>src/main/resources</directory>
+ <filtering>true</filtering>
+ <includes>
+ <include>**/model.properties</include>
+ </includes>
+ </resource>
+ <resource>
+ <directory>src/main/resources</directory>
+ <includes>
+ <include>**/*.bin</include>
+ </includes>
+ </resource>
+ </resources>
+ <plugins>
+ <plugin>
+ <groupId>com.googlecode.maven-download-plugin</groupId>
+ <artifactId>download-maven-plugin</artifactId>
+ <configuration>
+ <url>${dist.base}/${model.family}/${model.name}</url>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
\ No newline at end of file
diff --git
a/opennlp-models-sentdetect/opennlp-models-sentdetect-en/src/main/resources/model.properties
b/opennlp-models-sentdetect/opennlp-models-sentdetect-en/src/main/resources/model.properties
new file mode 100644
index 0000000..4089d05
--- /dev/null
+++
b/opennlp-models-sentdetect/opennlp-models-sentdetect-en/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.sha256=${model.sha256}
\ No newline at end of file
diff --git a/opennlp-models-sentdetect/opennlp-models-sentdetect-fr/pom.xml
b/opennlp-models-sentdetect/opennlp-models-sentdetect-fr/pom.xml
new file mode 100644
index 0000000..9e15d9d
--- /dev/null
+++ b/opennlp-models-sentdetect/opennlp-models-sentdetect-fr/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-models-sentdetect</artifactId>
+ <version>0.1-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>opennlp-models-sentdetect-fr</artifactId>
+
+ <name>Apache OpenNLP Models :: Sent-Detect :: French</name>
+
+ <properties>
+ <dist.base>${asf.dist.base}</dist.base>
+ <model.family>ud-models-1.0</model.family>
+
<model.name>opennlp-1.0-1.9.3fr-ud-ftb-sentence-1.0-1.9.3.bin</model.name>
+ <model.version>1.9.3</model.version>
+
<model.sha256>94e57992d4b671909ccb3c5d8e6cb81c84ac96c1c6a19b8b4e7b0197a41cc870</model.sha256>
+ </properties>
+
+ <build>
+ <resources>
+ <resource>
+ <directory>src/main/resources</directory>
+ <filtering>true</filtering>
+ <includes>
+ <include>**/model.properties</include>
+ </includes>
+ </resource>
+ <resource>
+ <directory>src/main/resources</directory>
+ <includes>
+ <include>**/*.bin</include>
+ </includes>
+ </resource>
+ </resources>
+ <plugins>
+ <plugin>
+ <groupId>com.googlecode.maven-download-plugin</groupId>
+ <artifactId>download-maven-plugin</artifactId>
+ <configuration>
+ <url>${dist.base}/${model.family}/${model.name}</url>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
\ No newline at end of file
diff --git
a/opennlp-models-sentdetect/opennlp-models-sentdetect-fr/src/main/resources/model.properties
b/opennlp-models-sentdetect/opennlp-models-sentdetect-fr/src/main/resources/model.properties
new file mode 100644
index 0000000..4089d05
--- /dev/null
+++
b/opennlp-models-sentdetect/opennlp-models-sentdetect-fr/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.sha256=${model.sha256}
\ No newline at end of file
diff --git a/opennlp-models-sentdetect/opennlp-models-sentdetect-it/pom.xml
b/opennlp-models-sentdetect/opennlp-models-sentdetect-it/pom.xml
new file mode 100644
index 0000000..f5db512
--- /dev/null
+++ b/opennlp-models-sentdetect/opennlp-models-sentdetect-it/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-models-sentdetect</artifactId>
+ <version>0.1-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>opennlp-models-sentdetect-it</artifactId>
+
+ <name>Apache OpenNLP Models :: Sent-Detect :: Italian</name>
+
+ <properties>
+ <dist.base>${asf.dist.base}</dist.base>
+ <model.family>ud-models-1.0</model.family>
+ <model.name>opennlp-it-ud-vit-sentence-1.0-1.9.3.bin</model.name>
+ <model.version>1.9.3</model.version>
+
<model.sha256>1f9c15b5bb6be611ba2cc7e0c50edce56416977ad8c0c4e53379a0c80606d3ad</model.sha256>
+ </properties>
+
+ <build>
+ <resources>
+ <resource>
+ <directory>src/main/resources</directory>
+ <filtering>true</filtering>
+ <includes>
+ <include>**/model.properties</include>
+ </includes>
+ </resource>
+ <resource>
+ <directory>src/main/resources</directory>
+ <includes>
+ <include>**/*.bin</include>
+ </includes>
+ </resource>
+ </resources>
+ <plugins>
+ <plugin>
+ <groupId>com.googlecode.maven-download-plugin</groupId>
+ <artifactId>download-maven-plugin</artifactId>
+ <configuration>
+ <url>${dist.base}/${model.family}/${model.name}</url>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
\ No newline at end of file
diff --git
a/opennlp-models-sentdetect/opennlp-models-sentdetect-it/src/main/resources/model.properties
b/opennlp-models-sentdetect/opennlp-models-sentdetect-it/src/main/resources/model.properties
new file mode 100644
index 0000000..4089d05
--- /dev/null
+++
b/opennlp-models-sentdetect/opennlp-models-sentdetect-it/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.sha256=${model.sha256}
\ No newline at end of file
diff --git a/opennlp-models-sentdetect/opennlp-models-sentdetect-nl/pom.xml
b/opennlp-models-sentdetect/opennlp-models-sentdetect-nl/pom.xml
new file mode 100644
index 0000000..3760149
--- /dev/null
+++ b/opennlp-models-sentdetect/opennlp-models-sentdetect-nl/pom.xml
@@ -0,0 +1,75 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-models-sentdetect</artifactId>
+ <version>0.1-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>opennlp-models-sentdetect-nl</artifactId>
+
+ <name>Apache OpenNLP Models :: Sent-Detect :: Dutch</name>
+
+ <properties>
+ <dist.base>${asf.dist.base}</dist.base>
+ <model.family>ud-models-1.0</model.family>
+ <model.name>opennlp-nl-ud-alpino-sentence-1.0-1.9.3.bin</model.name>
+ <model.version>1.9.3</model.version>
+
<model.sha256>4e4d4ce182c0dc6f12f406077a967807828a523191fbb9199219e5d5969edfb7</model.sha256>
+ </properties>
+
+ <build>
+ <resources>
+ <resource>
+ <directory>src/main/resources</directory>
+ <filtering>true</filtering>
+ <includes>
+ <include>**/model.properties</include>
+ </includes>
+ </resource>
+ <resource>
+ <directory>src/main/resources</directory>
+ <includes>
+ <include>**/*.bin</include>
+ </includes>
+ </resource>
+ </resources>
+ <plugins>
+ <plugin>
+ <groupId>com.googlecode.maven-download-plugin</groupId>
+ <artifactId>download-maven-plugin</artifactId>
+ <configuration>
+ <url>${dist.base}/${model.family}/${model.name}</url>
+ </configuration>
+ </plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>build-helper-maven-plugin</artifactId>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
\ No newline at end of file
diff --git
a/opennlp-models-sentdetect/opennlp-models-sentdetect-nl/src/main/resources/model.properties
b/opennlp-models-sentdetect/opennlp-models-sentdetect-nl/src/main/resources/model.properties
new file mode 100644
index 0000000..4089d05
--- /dev/null
+++
b/opennlp-models-sentdetect/opennlp-models-sentdetect-nl/src/main/resources/model.properties
@@ -0,0 +1,18 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+model.name=${model.name}
+model.version=${model.version}
+model.sha256=${model.sha256}
\ No newline at end of file
diff --git a/opennlp-models-sentdetect/pom.xml
b/opennlp-models-sentdetect/pom.xml
new file mode 100644
index 0000000..c445dbd
--- /dev/null
+++ b/opennlp-models-sentdetect/pom.xml
@@ -0,0 +1,46 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-models</artifactId>
+ <version>0.1-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>opennlp-models-sentdetect</artifactId>
+
+ <name>Apache OpenNLP Models :: Sent-Detect</name>
+
+ <packaging>pom</packaging>
+
+ <modules>
+ <module>opennlp-models-sentdetect-de</module>
+ <module>opennlp-models-sentdetect-it</module>
+ <module>opennlp-models-sentdetect-en</module>
+ <module>opennlp-models-sentdetect-fr</module>
+ <module>opennlp-models-sentdetect-nl</module>
+ </modules>
+
+</project>
\ No newline at end of file
diff --git a/opennlp-models-test/pom.xml b/opennlp-models-test/pom.xml
new file mode 100644
index 0000000..43ac0ef
--- /dev/null
+++ b/opennlp-models-test/pom.xml
@@ -0,0 +1,59 @@
+<?xml version="1.0" encoding="UTF-8"?>
+
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements. See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership. The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License. You may obtain a copy of the License at
+
+ http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied. See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+
+<project xmlns="http://maven.apache.org/POM/4.0.0"
+ xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance"
+ xsi:schemaLocation="http://maven.apache.org/POM/4.0.0
http://maven.apache.org/xsd/maven-4.0.0.xsd">
+ <modelVersion>4.0.0</modelVersion>
+ <parent>
+ <groupId>org.apache.opennlp</groupId>
+ <artifactId>opennlp-models</artifactId>
+ <version>0.1-SNAPSHOT</version>
+ </parent>
+
+ <artifactId>opennlp-models-test</artifactId>
+ <name>Apache OpenNLP Models :: Tests</name>
+
+ <build>
+ <plugins>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+ <artifactId>exec-maven-plugin</artifactId>
+ <version>${exec.plugin.version}</version>
+ <executions>
+ <execution>
+ <phase>verify</phase>
+ <goals>
+ <goal>java</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+ <mainClass>org.apache.opennlp.ModelValidator</mainClass>
+ <arguments>
+ <argument>${project.basedir}</argument>
+ </arguments>
+ </configuration>
+ </plugin>
+ </plugins>
+ </build>
+
+</project>
\ No newline at end of file
diff --git
a/opennlp-models-test/src/main/java/org.apache.opennlp/ModelValidator.java
b/opennlp-models-test/src/main/java/org.apache.opennlp/ModelValidator.java
new file mode 100644
index 0000000..82aa82e
--- /dev/null
+++ b/opennlp-models-test/src/main/java/org.apache.opennlp/ModelValidator.java
@@ -0,0 +1,106 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to you under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+package org.apache.opennlp;
+
+import java.io.BufferedReader;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.InputStreamReader;
+import java.nio.file.Files;
+import java.nio.file.Path;
+import java.util.List;
+import java.util.jar.JarFile;
+import java.util.regex.Pattern;
+import java.util.stream.Stream;
+
+
+public class ModelValidator {
+
+ public static void main(String[] args) {
+ if (args.length != 1) {
+ throw new IllegalArgumentException("This tool expects at least one
argument");
+ }
+ System.err.println("Executing basic model validation checks.");
+
+ final Path testBaseDir = Path.of(args[0]);
+ final Path projectDir = testBaseDir.getParent();
+ final List<String> expectedModels = getExpectedModels();
+
+ final String pattern = "opennlp-models.*\\.jar";
+
+ final List<Path> availableModelJars = getAvailableModelJars(pattern,
testBaseDir, projectDir);
+
+ if (expectedModels.size() != availableModelJars.size()) {
+ throw new IllegalArgumentException("Detected a mismatch between " +
+ "expected and available models! " +
+ "Expected: " + expectedModels.size() +
+ "; Actual: " + availableModelJars.size());
+ }
+
+ for (String model : expectedModels) {
+ boolean found;
+ for (Path availableJar : availableModelJars) {
+ found = isModelInJar(availableJar, model);
+ if (found) {
+ return;
+ }
+ }
+ throw new IllegalArgumentException(
+ "Expected model '" + model + "' could not be found inside the
generated JAR files!");
+ }
+
+
+ }
+
+ public static boolean isModelInJar(Path jarFilePath, String expectedModel) {
+ try (JarFile jarFile = new JarFile(jarFilePath.toFile())) {
+ return jarFile.stream()
+ .anyMatch(entry -> entry.getName().equals(expectedModel));
+ } catch (IOException e) {
+ throw new RuntimeException("Failed to read the JAR file: " +
jarFilePath, e);
+ }
+ }
+
+ private static List<Path> getAvailableModelJars(String pattern, Path
testDir, Path projectDir) {
+ final Pattern regexPattern = Pattern.compile(pattern);
+ try (Stream<Path> stream = Files.walk(projectDir)) {
+ return stream
+ .filter(Files::isRegularFile)
+ .filter(path -> !path.startsWith(testDir))
+ .filter(path ->
regexPattern.matcher(path.getFileName().toString()).matches())
+ .toList();
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+
+ private static List<String> getExpectedModels() {
+ try (InputStream inputStream =
Thread.currentThread().getContextClassLoader().getResourceAsStream("expected-models.txt"))
{
+ if (inputStream == null) {
+ throw new IllegalArgumentException("Expected model file could not be
found!");
+ }
+
+ try (BufferedReader reader = new BufferedReader(new
InputStreamReader(inputStream))) {
+ return reader.lines()
+ .filter(line -> !line.startsWith("#") && !line.trim().isEmpty())
+ .toList();
+ }
+ } catch (IOException e) {
+ throw new RuntimeException(e);
+ }
+ }
+}
diff --git a/opennlp-models-test/src/main/resources/expected-models.txt
b/opennlp-models-test/src/main/resources/expected-models.txt
new file mode 100644
index 0000000..68c678a
--- /dev/null
+++ b/opennlp-models-test/src/main/resources/expected-models.txt
@@ -0,0 +1,21 @@
+# Licensed to the Apache Software Foundation (ASF) under one or more
+# contributor license agreements. See the NOTICE file distributed with
+# this work for additional information regarding copyright ownership.
+# The ASF licenses this file to You under the Apache License, Version 2.0
+# (the "License"); you may not use this file except in compliance with
+# the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+
+langdetect-183.bin
+opennlp-de-ud-gsd-sentence-1.0-1.9.3.bin
+opennlp-en-ud-ewt-sentence-1.0-1.9.3.bin
+opennlp-1.0-1.9.3fr-ud-ftb-sentence-1.0-1.9.3.bin
+opennlp-nl-ud-alpino-sentence-1.0-1.9.3.bin
+opennlp-nl-ud-alpino-sentence-1.0-1.9.3.bin
\ No newline at end of file
diff --git a/pom.xml b/pom.xml
index a931648..a5221d8 100644
--- a/pom.xml
+++ b/pom.xml
@@ -94,8 +94,20 @@
<properties>
<!-- Build Properties -->
<java.version>17</java.version>
- <maven.version>3.3.9</maven.version>
+ <maven.version>3.6.3</maven.version>
+
+
<asf.dist.base>https://dist.apache.org/repos/dist/release/opennlp/models/</asf.dist.base>
+
<sf.dist.base>https://opennlp.sourceforge.net/models-1.5/</sf.dist.base>
+
+ <!-- set a fixed value here to enable reproducable builds -->
+
<project.build.outputTimestamp>2024-01-01T00:00:00Z</project.build.outputTimestamp>
+
+ <!-- maven plugin versions -->
<enforcer.plugin.version>3.3.0</enforcer.plugin.version>
+ <download.plugin.version>1.9.0</download.plugin.version>
+ <compiler.plugin.version>3.10.1</compiler.plugin.version>
+ <build-helper.plugin.version>3.6.0</build-helper.plugin.version>
+ <exec.plugin.version>3.3.0</exec.plugin.version>
</properties>
<build>
@@ -111,6 +123,45 @@
<mavenExecutorId>forked-path</mavenExecutorId>
</configuration>
</plugin>
+ <plugin>
+ <groupId>org.codehaus.mojo</groupId>
+
<artifactId>build-helper-maven-plugin</artifactId>
+
<version>${build-helper.plugin.version}</version>
+ <executions>
+ <execution>
+ <id>add-resource</id>
+
<phase>generate-resources</phase>
+ <goals>
+
<goal>add-resource</goal>
+ </goals>
+ <configuration>
+ <resources>
+
<resource>
+
<directory>${project.build.directory}/models/</directory>
+
</resource>
+ </resources>
+ </configuration>
+ </execution>
+ </executions>
+ </plugin>
+ <plugin>
+
<groupId>com.googlecode.maven-download-plugin</groupId>
+
<artifactId>download-maven-plugin</artifactId>
+
<version>${download.plugin.version}</version>
+ <executions>
+ <execution>
+ <id>download-model</id>
+
<phase>generate-resources</phase>
+ <goals>
+
<goal>wget</goal>
+ </goals>
+ </execution>
+ </executions>
+ <configuration>
+
<outputDirectory>${project.build.directory}/models</outputDirectory>
+ <sha256>${model.sha256}</sha256>
+ </configuration>
+ </plugin>
</plugins>
</pluginManagement>
@@ -118,7 +169,7 @@
<plugin>
<groupId>org.apache.maven.plugins</groupId>
<artifactId>maven-compiler-plugin</artifactId>
- <version>3.10.1</version>
+ <version>${compiler.plugin.version}</version>
<configuration>
<release>${java.version}</release>
<compilerArgument>-Xlint</compilerArgument>
@@ -201,6 +252,8 @@
<modules>
<module>opennlp-models-langdetect</module>
+ <module>opennlp-models-sentdetect</module>
+ <module>opennlp-models-test</module>
</modules>
</project>