This is an automated email from the ASF dual-hosted git repository. rzo1 pushed a commit to branch OPENNLP-1627 in repository https://gitbox.apache.org/repos/asf/opennlp-models.git
commit 38e1cee2f8ffcdaa4aab4064024df1e67dd1e517 Author: Richard Zowalla <[email protected]> AuthorDate: Tue Oct 15 09:34:44 2024 +0200 OPENNLP-1627 - Add Bulgarian models --- opennlp-models-pos/opennlp-models-pos-bg/pom.xml | 73 +++++++++++++++++++++ .../src/main/resources/model.properties | 24 ++----- opennlp-models-pos/pom.xml | 1 + .../opennlp-models-sentdetect-bg/pom.xml | 74 ++++++++++++++++++++++ .../src/main/resources/model.properties | 24 ++----- opennlp-models-sentdetect/pom.xml | 1 + opennlp-models-test/pom.xml | 15 +++++ .../src/main/resources/expected-models.txt | 5 +- .../opennlp-models-tokenizer-bg/pom.xml | 74 ++++++++++++++++++++++ .../src/main/resources/model.properties | 24 ++----- opennlp-models-tokenizer/pom.xml | 1 + 11 files changed, 255 insertions(+), 61 deletions(-) diff --git a/opennlp-models-pos/opennlp-models-pos-bg/pom.xml b/opennlp-models-pos/opennlp-models-pos-bg/pom.xml new file mode 100644 index 0000000..032a586 --- /dev/null +++ b/opennlp-models-pos/opennlp-models-pos-bg/pom.xml @@ -0,0 +1,73 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.opennlp</groupId> + <artifactId>opennlp-models-pos</artifactId> + <version>1.1.0-SNAPSHOT</version> + </parent> + + <name>Apache OpenNLP Models :: Part-Of-Speech :: Bulgarian</name> + <artifactId>opennlp-models-pos-bg</artifactId> + + <properties> + <dist.base>${asf.dist.base}</dist.base> + <model.family>ud-models-1.1</model.family> + <model.name>opennlp-bg-ud-btb-pos-1.1-2.4.0.bin</model.name> + <model.version>2.4.0</model.version> + <model.sha256>b5bf61fd6a646c81c299b4a48fc2b6bfe37c2563cbe91767e547cbcd660ead38</model.sha256> + <model.language>bg</model.language> + </properties> + + <build> + <resources> + <resource> + <directory>src/main/resources</directory> + <filtering>true</filtering> + <includes> + <include>**/model.properties</include> + </includes> + </resource> + <resource> + <directory>src/main/resources</directory> + <includes> + <include>**/*.bin</include> + </includes> + </resource> + </resources> + <plugins> + <plugin> + <groupId>com.googlecode.maven-download-plugin</groupId> + <artifactId>download-maven-plugin</artifactId> + <configuration> + <url>${dist.base}/${model.family}/${model.name}</url> + </configuration> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>build-helper-maven-plugin</artifactId> + </plugin> + </plugins> + </build> + +</project> \ No newline at end of file diff --git a/opennlp-models-test/src/main/resources/expected-models.txt b/opennlp-models-pos/opennlp-models-pos-bg/src/main/resources/model.properties similarity index 53% copy from opennlp-models-test/src/main/resources/expected-models.txt copy to opennlp-models-pos/opennlp-models-pos-bg/src/main/resources/model.properties index b851b75..2be8681 100644 --- a/opennlp-models-test/src/main/resources/expected-models.txt +++ b/opennlp-models-pos/opennlp-models-pos-bg/src/main/resources/model.properties @@ -13,23 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Language Detection -langdetect-183.bin -# Sentence Detection -opennlp-de-ud-gsd-sentence-1.1-2.4.0.bin -opennlp-en-ud-ewt-sentence-1.1-2.4.0.bin -opennlp-fr-ud-gsd-sentence-1.1-2.4.0.bin -opennlp-nl-ud-alpino-sentence-1.1-2.4.0.bin -opennlp-nl-ud-alpino-sentence-1.1-2.4.0.bin -# POS -opennlp-de-ud-gsd-pos-1.1-2.4.0.bin -opennlp-de-ud-gsd-pos-1.1-2.4.0.bin -opennlp-fr-ud-gsd-pos-1.1-2.4.0.bin -opennlp-it-ud-vit-pos-1.1-2.4.0.bin -opennlp-nl-ud-alpino-pos-1.1-2.4.0.bin -# Tokens -opennlp-de-ud-gsd-tokens-1.1-2.4.0.bin -opennlp-en-ud-ewt-tokens-1.1-2.4.0.bin -opennlp-fr-ud-gsd-tokens-1.1-2.4.0.bin -opennlp-it-ud-vit-tokens-1.1-2.4.0.bin -opennlp-nl-ud-alpino-tokens-1.1-2.4.0.bin \ No newline at end of file +model.name=${model.name} +model.version=${model.version} +model.sha256=${model.sha256} +model.language=${model.language} \ No newline at end of file diff --git a/opennlp-models-pos/pom.xml b/opennlp-models-pos/pom.xml index 10cebaf..adb586e 100644 --- a/opennlp-models-pos/pom.xml +++ b/opennlp-models-pos/pom.xml @@ -32,6 +32,7 @@ under the License. <packaging>pom</packaging> <modules> + <module>opennlp-models-pos-bg</module> <module>opennlp-models-pos-de</module> <module>opennlp-models-pos-en</module> <module>opennlp-models-pos-fr</module> diff --git a/opennlp-models-sentdetect/opennlp-models-sentdetect-bg/pom.xml b/opennlp-models-sentdetect/opennlp-models-sentdetect-bg/pom.xml new file mode 100644 index 0000000..17b7273 --- /dev/null +++ b/opennlp-models-sentdetect/opennlp-models-sentdetect-bg/pom.xml @@ -0,0 +1,74 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.opennlp</groupId> + <artifactId>opennlp-models-sentdetect</artifactId> + <version>1.1.0-SNAPSHOT</version> + </parent> + + <artifactId>opennlp-models-sentdetect-bg</artifactId> + + <name>Apache OpenNLP Models :: Sent-Detect :: Bulgarian</name> + + <properties> + <dist.base>${asf.dist.base}</dist.base> + <model.family>ud-models-1.1</model.family> + <model.name>opennlp-bg-ud-btb-sentence-1.1-2.4.0.bin</model.name> + <model.version>2.4.0</model.version> + <model.sha256>c093162b8e7e5e7842d33c2ad5ac11c24952a14687646224962795670dee3a71</model.sha256> + <model.language>bg</model.language> + </properties> + + <build> + <resources> + <resource> + <directory>src/main/resources</directory> + <filtering>true</filtering> + <includes> + <include>**/model.properties</include> + </includes> + </resource> + <resource> + <directory>src/main/resources</directory> + <includes> + <include>**/*.bin</include> + </includes> + </resource> + </resources> + <plugins> + <plugin> + <groupId>com.googlecode.maven-download-plugin</groupId> + <artifactId>download-maven-plugin</artifactId> + <configuration> + <url>${dist.base}/${model.family}/${model.name}</url> + </configuration> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>build-helper-maven-plugin</artifactId> + </plugin> + </plugins> + </build> + +</project> \ No newline at end of file diff --git a/opennlp-models-test/src/main/resources/expected-models.txt b/opennlp-models-sentdetect/opennlp-models-sentdetect-bg/src/main/resources/model.properties similarity index 53% copy from opennlp-models-test/src/main/resources/expected-models.txt copy to opennlp-models-sentdetect/opennlp-models-sentdetect-bg/src/main/resources/model.properties index b851b75..2be8681 100644 --- a/opennlp-models-test/src/main/resources/expected-models.txt +++ b/opennlp-models-sentdetect/opennlp-models-sentdetect-bg/src/main/resources/model.properties @@ -13,23 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Language Detection -langdetect-183.bin -# Sentence Detection -opennlp-de-ud-gsd-sentence-1.1-2.4.0.bin -opennlp-en-ud-ewt-sentence-1.1-2.4.0.bin -opennlp-fr-ud-gsd-sentence-1.1-2.4.0.bin -opennlp-nl-ud-alpino-sentence-1.1-2.4.0.bin -opennlp-nl-ud-alpino-sentence-1.1-2.4.0.bin -# POS -opennlp-de-ud-gsd-pos-1.1-2.4.0.bin -opennlp-de-ud-gsd-pos-1.1-2.4.0.bin -opennlp-fr-ud-gsd-pos-1.1-2.4.0.bin -opennlp-it-ud-vit-pos-1.1-2.4.0.bin -opennlp-nl-ud-alpino-pos-1.1-2.4.0.bin -# Tokens -opennlp-de-ud-gsd-tokens-1.1-2.4.0.bin -opennlp-en-ud-ewt-tokens-1.1-2.4.0.bin -opennlp-fr-ud-gsd-tokens-1.1-2.4.0.bin -opennlp-it-ud-vit-tokens-1.1-2.4.0.bin -opennlp-nl-ud-alpino-tokens-1.1-2.4.0.bin \ No newline at end of file +model.name=${model.name} +model.version=${model.version} +model.sha256=${model.sha256} +model.language=${model.language} \ No newline at end of file diff --git a/opennlp-models-sentdetect/pom.xml b/opennlp-models-sentdetect/pom.xml index 36254db..1f6bd5d 100644 --- a/opennlp-models-sentdetect/pom.xml +++ b/opennlp-models-sentdetect/pom.xml @@ -34,6 +34,7 @@ under the License. <packaging>pom</packaging> <modules> + <module>opennlp-models-sentdetect-bg</module> <module>opennlp-models-sentdetect-de</module> <module>opennlp-models-sentdetect-it</module> <module>opennlp-models-sentdetect-en</module> diff --git a/opennlp-models-test/pom.xml b/opennlp-models-test/pom.xml index 9ba157c..c85117e 100644 --- a/opennlp-models-test/pom.xml +++ b/opennlp-models-test/pom.xml @@ -32,6 +32,11 @@ under the License. <dependencies> <!-- These dependencies are added here to influence Maven build order. Test module needs to build last! --> + <dependency> + <groupId>org.apache.opennlp</groupId> + <artifactId>opennlp-models-sentdetect-bg</artifactId> + <version>${project.version}</version> + </dependency> <dependency> <groupId>org.apache.opennlp</groupId> <artifactId>opennlp-models-sentdetect-de</artifactId> @@ -57,6 +62,11 @@ under the License. <artifactId>opennlp-models-sentdetect-fr</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.apache.opennlp</groupId> + <artifactId>opennlp-models-tokenizer-bg</artifactId> + <version>${project.version}</version> + </dependency> <dependency> <groupId>org.apache.opennlp</groupId> <artifactId>opennlp-models-tokenizer-de</artifactId> @@ -82,6 +92,11 @@ under the License. <artifactId>opennlp-models-tokenizer-fr</artifactId> <version>${project.version}</version> </dependency> + <dependency> + <groupId>org.apache.opennlp</groupId> + <artifactId>opennlp-models-pos-bg</artifactId> + <version>${project.version}</version> + </dependency> <dependency> <groupId>org.apache.opennlp</groupId> <artifactId>opennlp-models-pos-de</artifactId> diff --git a/opennlp-models-test/src/main/resources/expected-models.txt b/opennlp-models-test/src/main/resources/expected-models.txt index b851b75..228e4ac 100644 --- a/opennlp-models-test/src/main/resources/expected-models.txt +++ b/opennlp-models-test/src/main/resources/expected-models.txt @@ -16,18 +16,21 @@ # Language Detection langdetect-183.bin # Sentence Detection +opennlp-bg-ud-btb-sentence-1.1-2.4.0.bin opennlp-de-ud-gsd-sentence-1.1-2.4.0.bin opennlp-en-ud-ewt-sentence-1.1-2.4.0.bin opennlp-fr-ud-gsd-sentence-1.1-2.4.0.bin -opennlp-nl-ud-alpino-sentence-1.1-2.4.0.bin +opennlp-it-ud-vit-sentence-1.1-2.4.0.bin opennlp-nl-ud-alpino-sentence-1.1-2.4.0.bin # POS +opennlp-bg-ud-btb-pos-1.1-2.4.0.bin opennlp-de-ud-gsd-pos-1.1-2.4.0.bin opennlp-de-ud-gsd-pos-1.1-2.4.0.bin opennlp-fr-ud-gsd-pos-1.1-2.4.0.bin opennlp-it-ud-vit-pos-1.1-2.4.0.bin opennlp-nl-ud-alpino-pos-1.1-2.4.0.bin # Tokens +opennlp-bg-ud-btb-tokens-1.1-2.4.0.bin opennlp-de-ud-gsd-tokens-1.1-2.4.0.bin opennlp-en-ud-ewt-tokens-1.1-2.4.0.bin opennlp-fr-ud-gsd-tokens-1.1-2.4.0.bin diff --git a/opennlp-models-tokenizer/opennlp-models-tokenizer-bg/pom.xml b/opennlp-models-tokenizer/opennlp-models-tokenizer-bg/pom.xml new file mode 100644 index 0000000..5e3d3ed --- /dev/null +++ b/opennlp-models-tokenizer/opennlp-models-tokenizer-bg/pom.xml @@ -0,0 +1,74 @@ +<?xml version="1.0" encoding="UTF-8"?> + +<!-- +Licensed to the Apache Software Foundation (ASF) under one +or more contributor license agreements. See the NOTICE file +distributed with this work for additional information +regarding copyright ownership. The ASF licenses this file +to you under the Apache License, Version 2.0 (the +"License"); you may not use this file except in compliance +with the License. You may obtain a copy of the License at + + http://www.apache.org/licenses/LICENSE-2.0 + +Unless required by applicable law or agreed to in writing, +software distributed under the License is distributed on an +"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +KIND, either express or implied. See the License for the +specific language governing permissions and limitations +under the License. +--> + +<project xmlns="http://maven.apache.org/POM/4.0.0" xmlns:xsi="http://www.w3.org/2001/XMLSchema-instance" xsi:schemaLocation="http://maven.apache.org/POM/4.0.0 http://maven.apache.org/xsd/maven-4.0.0.xsd"> + <modelVersion>4.0.0</modelVersion> + <parent> + <groupId>org.apache.opennlp</groupId> + <artifactId>opennlp-models-tokenizer</artifactId> + <version>1.1.0-SNAPSHOT</version> + </parent> + + <artifactId>opennlp-models-tokenizer-bg</artifactId> + + <name>Apache OpenNLP Models :: Tokenizer :: Bulgarian</name> + + <properties> + <dist.base>${asf.dist.base}</dist.base> + <model.family>ud-models-1.1</model.family> + <model.name>opennlp-bg-ud-btb-tokens-1.1-2.4.0.bin</model.name> + <model.version>2.4.0</model.version> + <model.sha256>2f4f539eaf966cda174be9b464f75c413835b393e42112790a2ee4214cf08eaa</model.sha256> + <model.language>bg</model.language> + </properties> + + <build> + <resources> + <resource> + <directory>src/main/resources</directory> + <filtering>true</filtering> + <includes> + <include>**/model.properties</include> + </includes> + </resource> + <resource> + <directory>src/main/resources</directory> + <includes> + <include>**/*.bin</include> + </includes> + </resource> + </resources> + <plugins> + <plugin> + <groupId>com.googlecode.maven-download-plugin</groupId> + <artifactId>download-maven-plugin</artifactId> + <configuration> + <url>${dist.base}/${model.family}/${model.name}</url> + </configuration> + </plugin> + <plugin> + <groupId>org.codehaus.mojo</groupId> + <artifactId>build-helper-maven-plugin</artifactId> + </plugin> + </plugins> + </build> + +</project> \ No newline at end of file diff --git a/opennlp-models-test/src/main/resources/expected-models.txt b/opennlp-models-tokenizer/opennlp-models-tokenizer-bg/src/main/resources/model.properties similarity index 53% copy from opennlp-models-test/src/main/resources/expected-models.txt copy to opennlp-models-tokenizer/opennlp-models-tokenizer-bg/src/main/resources/model.properties index b851b75..2be8681 100644 --- a/opennlp-models-test/src/main/resources/expected-models.txt +++ b/opennlp-models-tokenizer/opennlp-models-tokenizer-bg/src/main/resources/model.properties @@ -13,23 +13,7 @@ # See the License for the specific language governing permissions and # limitations under the License. -# Language Detection -langdetect-183.bin -# Sentence Detection -opennlp-de-ud-gsd-sentence-1.1-2.4.0.bin -opennlp-en-ud-ewt-sentence-1.1-2.4.0.bin -opennlp-fr-ud-gsd-sentence-1.1-2.4.0.bin -opennlp-nl-ud-alpino-sentence-1.1-2.4.0.bin -opennlp-nl-ud-alpino-sentence-1.1-2.4.0.bin -# POS -opennlp-de-ud-gsd-pos-1.1-2.4.0.bin -opennlp-de-ud-gsd-pos-1.1-2.4.0.bin -opennlp-fr-ud-gsd-pos-1.1-2.4.0.bin -opennlp-it-ud-vit-pos-1.1-2.4.0.bin -opennlp-nl-ud-alpino-pos-1.1-2.4.0.bin -# Tokens -opennlp-de-ud-gsd-tokens-1.1-2.4.0.bin -opennlp-en-ud-ewt-tokens-1.1-2.4.0.bin -opennlp-fr-ud-gsd-tokens-1.1-2.4.0.bin -opennlp-it-ud-vit-tokens-1.1-2.4.0.bin -opennlp-nl-ud-alpino-tokens-1.1-2.4.0.bin \ No newline at end of file +model.name=${model.name} +model.version=${model.version} +model.sha256=${model.sha256} +model.language=${model.language} \ No newline at end of file diff --git a/opennlp-models-tokenizer/pom.xml b/opennlp-models-tokenizer/pom.xml index d4eeda7..e2c65e7 100644 --- a/opennlp-models-tokenizer/pom.xml +++ b/opennlp-models-tokenizer/pom.xml @@ -34,6 +34,7 @@ under the License. <packaging>pom</packaging> <modules> + <module>opennlp-models-tokenizer-bg</module> <module>opennlp-models-tokenizer-de</module> <module>opennlp-models-tokenizer-en</module> <module>opennlp-models-tokenizer-fr</module>
