This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-483 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 0eb60473a26d298775cd6ebe50d93e31e2ca6a2e Author: Sergey Kamov <skhdlem...@gmail.com> AuthorDate: Wed Mar 2 15:43:13 2022 +0300 WIP. --- .../examples/lightswitch/LightSwitchRuModel.scala | 23 +++--- .../parser/semantic/NCRuSemanticEntityParser.scala | 2 +- .../lightswitch/LightSwitchGroovyModel.groovy | 5 +- .../examples/lightswitch/LightSwitchJavaModel.java | 11 +-- .../examples/lightswitch/LightSwitchKotlinModel.kt | 6 +- .../lightswitch/LightSwitchScalaModel.scala | 12 +-- .../apache/nlpcraft/examples/time/TimeModel.java | 16 ++-- .../apache/nlpcraft/NCModelPipelineBuilder.java | 42 +++++++++++ ...icStemmer.java => NCModelPipelineLanguage.java} | 15 ++-- .../nlpcraft/internal/util/NCResourceReader.java | 64 ---------------- ...urceReaderImpl.scala => NCResourceReader.scala} | 55 ++++++-------- .../apache/nlpcraft/nlp/NCENDefaultPipeline.java | 85 ---------------------- .../nlpcraft/nlp/NCENSemanticEntityParser.java | 69 ------------------ .../parser/semantic/NCEnSemanticEntityParser.java | 56 ++++++++++++++ .../token/enricher}/NCBracketsTokenEnricher.java | 4 +- .../token/enricher}/NCDictionaryTokenEnricher.java | 4 +- .../token/enricher}/NCQuotesTokenEnricher.java | 4 +- .../token/enricher}/NCStopWordsTokenEnricher.java | 4 +- .../N\320\241SwearWordsTokenEnricher.java" | 4 +- .../impl/NCBracketsTokenEnricherImpl.scala | 2 +- .../impl/NCDictionaryTokenEnricherImpl.scala | 2 +- .../impl/NCLemmaPosTokenEnricherImpl.scala | 2 +- .../enricher}/impl/NCQuotesTokenEnricherImpl.scala | 2 +- .../token/enricher}/impl/NCStopWordGenerator.scala | 2 +- .../impl/NCStopWordsTokenEnricherImpl.scala | 9 ++- .../impl/NCSwearWordsTokenEnricherImpl.scala | 2 +- .../parser/opennlp/NCENOpenNLPTokenParser.java} | 20 ++--- .../entity/parser/nlp/NCNLPEntityParser.java | 4 +- .../parser/nlp/impl/NCNLPEntityParserImpl.scala | 4 +- .../parser/opennlp/NCOpenNLPEntityParser.java | 4 +- .../opennlp/impl/NCOpenNLPEntityParserImpl.scala | 2 +- .../entity/parser/semantic/NCSemanticElement.java | 2 +- .../parser/semantic/NCSemanticEntityParser.java | 4 +- .../entity/parser/semantic/NCSemanticStemmer.java | 2 +- .../semantic/impl/NCSemanticEntityParserImpl.scala | 11 ++- .../semantic/impl/NCSemanticSourceReader.scala | 6 +- .../parser/semantic/impl/NCSemanticSynonym.scala | 4 +- .../impl/NCSemanticSynonymsProcessor.scala | 6 +- .../enricher/opennlp}/NCLemmaPosTokenEnricher.java | 5 +- .../token/parser/opennlp/NCOpenNLPTokenParser.java | 4 +- .../opennlp/impl/NCOpenNLPTokenParserImpl.scala | 2 +- .../internal/impl/NCModelCallbacksSpec.scala | 7 +- .../nlpcraft/internal/impl/NCModelClientSpec.scala | 10 +-- .../internal/impl/NCModelPingPongSpec.scala | 9 +-- .../internal/impl/NCModelPipelineManagerSpec.scala | 7 +- .../nlpcraft/internal/util/NCResourceSpec.scala | 10 +-- .../nlpcraft/nlp/NCENDefaultPipelineSpec.scala | 10 ++- .../opennlp/NCEnOpenNlpTokenParserBenchmark.java | 6 +- .../entity/parser/nlp/NCNLPEntityParserSpec.scala | 3 +- .../parser/opennlp/NCOpenNLPEntityParserSpec.scala | 4 +- .../semantic/NCSemanticEntityParserJsonSpec.scala | 13 ++-- .../semantic/NCSemanticEntityParserSpec.scala | 17 ++--- .../semantic/NCSemanticEntityParserYamlSpec.scala | 11 +-- .../enricher/en/NCBracketsTokenEnricherSpec.scala | 1 + .../en/NCDictionaryTokenEnricherSpec.scala | 8 +- .../enricher/en/NCQuotesTokenEnricherSpec.scala | 9 +-- .../enricher/en/NCStopWordsEnricherSpec.scala | 8 +- .../en/NCSwearWordsTokenEnricherSpec.scala | 3 +- .../enricher/en/impl/NCStopWordsImplSpec.scala | 2 +- .../parser/opennlp/NCOpenNLPTokenParserSpec.scala | 9 +-- .../apache/nlpcraft/nlp/util/NCTestPipeline.scala | 2 +- .../nlpcraft/nlp/util/opennlp/NCTestConfig.scala | 2 +- .../nlp/util/opennlp/NCTestConfigJava.java | 9 +-- 63 files changed, 277 insertions(+), 465 deletions(-) diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala index 1317473..708d99f 100644 --- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala +++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala @@ -21,11 +21,10 @@ import org.apache.nlpcraft.* import org.apache.nlpcraft.examples.lightswitch.nlp.entity.parser.semantic.NCRuSemanticEntityParser import org.apache.nlpcraft.examples.lightswitch.nlp.token.enricher.{NCRuLemmaPosTokenEnricher, NCRuStopWordsTokenEnricher} import org.apache.nlpcraft.examples.lightswitch.nlp.token.parser.NCRuTokenParser -import org.apache.nlpcraft.nlp.entity.parser.nlp.NCNLPEntityParser -import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer -import org.apache.nlpcraft.nlp.token.enricher.en.NCStopWordsTokenEnricher -import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser +import org.apache.nlpcraft.nlp.en.token.enricher.NCStopWordsTokenEnricher +import org.apache.nlpcraft.nlp.mult.entity.parser.nlp.NCNLPEntityParser +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.NCSemanticEntityParser +import org.apache.nlpcraft.nlp.mult.token.parser.opennlp.NCOpenNLPTokenParser import java.util import scala.jdk.CollectionConverters.* @@ -44,13 +43,13 @@ import scala.jdk.CollectionConverters.* */ class LightSwitchRuModel extends NCModelAdapter( new NCModelConfig("nlpcraft.lightswitch.ru.ex", "LightSwitch Example Model RU", "1.0"), - new NCModelPipeline: - override val getTokenParser: NCTokenParser = new NCRuTokenParser() - override val getTokenEnrichers: util.List[NCTokenEnricher] = Seq( - new NCRuLemmaPosTokenEnricher(), - new NCRuStopWordsTokenEnricher() - ).asJava - override val getEntityParsers: util.List[NCEntityParser] = Seq(new NCRuSemanticEntityParser("lightswitch_model_ru.yaml")).asJava + new NCModelPipelineBuilder( + new NCRuTokenParser(), + new NCRuSemanticEntityParser("lightswitch_model_ru.yaml") + ). + withTokenEnricher(new NCRuLemmaPosTokenEnricher()). + withTokenEnricher(new NCRuStopWordsTokenEnricher()). + build() ): /** * Intent and its on-match callback. diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/semantic/NCRuSemanticEntityParser.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/semantic/NCRuSemanticEntityParser.scala index a0cdd9a..d50bdf6 100644 --- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/semantic/NCRuSemanticEntityParser.scala +++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/semantic/NCRuSemanticEntityParser.scala @@ -19,7 +19,7 @@ package org.apache.nlpcraft.examples.lightswitch.nlp.entity.parser.semantic import opennlp.tools.stemmer.snowball.SnowballStemmer import org.apache.nlpcraft.examples.lightswitch.nlp.token.parser.NCRuTokenParser -import org.apache.nlpcraft.nlp.entity.parser.semantic.* +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.* /** * diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy index 9ce91cb..90dbb4e 100644 --- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy +++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy @@ -18,8 +18,7 @@ package org.apache.nlpcraft.examples.lightswitch import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.NCENDefaultPipeline -import org.apache.nlpcraft.nlp.NCENSemanticEntityParser +import org.apache.nlpcraft.nlp.en.entity.parser.semantic.*; /** * This example provides very simple implementation for NLI-powered light switch. @@ -37,7 +36,7 @@ class LightSwitchGroovyModel extends NCModelAdapter { LightSwitchGroovyModel() { super( new NCModelConfig("nlpcraft.lightswitch.java.ex", "LightSwitch Example Model", "1.0"), - new NCENDefaultPipeline(new NCENSemanticEntityParser("lightswitch_model.yaml")) + new NCModelPipelineBuilder(NCModelPipelineLanguage.EN, new NCEnSemanticEntityParser("lightswitch_model.yaml")).build() ) } diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java index b557111..3050bed 100644 --- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java +++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java @@ -18,14 +18,7 @@ package org.apache.nlpcraft.examples.lightswitch; import org.apache.nlpcraft.*; -import org.apache.nlpcraft.internal.util.NCResourceReader; -import org.apache.nlpcraft.nlp.NCENDefaultPipeline; -import org.apache.nlpcraft.nlp.NCENSemanticEntityParser; -import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser; -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer; -import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser; -import org.apache.nlpcraft.nlp.token.enricher.en.NCStopWordsTokenEnricher; - +import org.apache.nlpcraft.nlp.en.entity.parser.semantic.*; import java.util.List; import java.util.stream.Collectors; @@ -45,7 +38,7 @@ public class LightSwitchJavaModel extends NCModelAdapter { public LightSwitchJavaModel() { super( new NCModelConfig("nlpcraft.lightswitch.java.ex", "LightSwitch Example Model", "1.0"), - new NCENDefaultPipeline(new NCENSemanticEntityParser("lightswitch_model.yaml")) + new NCModelPipelineBuilder(NCModelPipelineLanguage.EN, new NCEnSemanticEntityParser("lightswitch_model.yaml")).build() ); } diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt index 63ed597..10d41ea 100644 --- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt +++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt @@ -19,11 +19,11 @@ package org.apache.nlpcraft.examples.lightswitch import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.NCENDefaultPipeline -import org.apache.nlpcraft.nlp.NCENSemanticEntityParser +import org.apache.nlpcraft.nlp.en.entity.parser.semantic.NCEnSemanticEntityParser import java.util.* import java.util.stream.Collectors + /** * This example provides very simple implementation for NLI-powered light switch. * You can say something like this: @@ -38,7 +38,7 @@ import java.util.stream.Collectors */ class LightSwitchKotlinModel : NCModelAdapter( NCModelConfig("nlpcraft.lightswitch.kotlin.ex", "LightSwitch Example Model", "1.0"), - NCENDefaultPipeline(NCENSemanticEntityParser("lightswitch_model.yaml")) + NCModelPipelineBuilder(NCModelPipelineLanguage.EN, NCEnSemanticEntityParser("lightswitch_model.yaml")).build() ) { /** * Intent and its on-match callback. diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala index 6167d34..96ce7e1 100644 --- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala +++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala @@ -19,12 +19,12 @@ package org.apache.nlpcraft.examples.lightswitch import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.util.NCResourceReader -import org.apache.nlpcraft.nlp.entity.parser.nlp.NCNLPEntityParser -import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer -import org.apache.nlpcraft.nlp.token.enricher.en.NCStopWordsTokenEnricher -import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser import org.apache.nlpcraft.nlp.* +import org.apache.nlpcraft.nlp.en.entity.parser.semantic.NCEnSemanticEntityParser +import org.apache.nlpcraft.nlp.en.token.enricher.NCStopWordsTokenEnricher +import org.apache.nlpcraft.nlp.mult.entity.parser.nlp.NCNLPEntityParser +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.NCSemanticEntityParser +import org.apache.nlpcraft.nlp.mult.token.parser.opennlp.NCOpenNLPTokenParser /** * This example provides very simple implementation for NLI-powered light switch. @@ -41,7 +41,7 @@ import org.apache.nlpcraft.nlp.* class LightSwitchScalaModel extends NCModelAdapter( new NCModelConfig("nlpcraft.lightswitch.java.ex", "LightSwitch Example Model", "1.0"), - new NCENDefaultPipeline(new NCENSemanticEntityParser("lightswitch_model.yaml")) + new NCModelPipelineBuilder(NCModelPipelineLanguage.EN, new NCEnSemanticEntityParser("lightswitch_model.yaml")).build() ): /** * Intent and its on-match callback. diff --git a/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java b/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java index 8f6af61..f477901 100644 --- a/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java +++ b/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java @@ -28,7 +28,8 @@ import org.apache.nlpcraft.NCIntentSample; import org.apache.nlpcraft.NCIntentTerm; import org.apache.nlpcraft.NCModelAdapter; import org.apache.nlpcraft.NCModelConfig; -import org.apache.nlpcraft.NCModelPipeline; +import org.apache.nlpcraft.NCModelPipelineBuilder; +import org.apache.nlpcraft.NCModelPipelineLanguage; import org.apache.nlpcraft.NCRejection; import org.apache.nlpcraft.NCResult; import org.apache.nlpcraft.NCResultType; @@ -37,10 +38,7 @@ import org.apache.nlpcraft.examples.time.utils.cities.City; import org.apache.nlpcraft.examples.time.utils.cities.CityData; import org.apache.nlpcraft.examples.time.utils.keycdn.GeoData; import org.apache.nlpcraft.examples.time.utils.keycdn.GeoManager; -import org.apache.nlpcraft.internal.util.NCResourceReader; -import org.apache.nlpcraft.nlp.NCENDefaultPipeline; -import org.apache.nlpcraft.nlp.NCENSemanticEntityParser; -import org.apache.nlpcraft.nlp.entity.parser.opennlp.NCOpenNLPEntityParser; +import org.apache.nlpcraft.nlp.en.entity.parser.semantic.NCEnSemanticEntityParser; import java.time.ZoneId; import java.time.ZonedDateTime; @@ -79,10 +77,10 @@ public class TimeModel extends NCModelAdapter { public TimeModel() { super( new NCModelConfig("nlpcraft.time.ex", "Time Example Model", "1.0"), - new NCENDefaultPipeline( - new NCENSemanticEntityParser("time_model.yaml"), - new NCOpenNLPEntityParser(new NCResourceReader().getPath("opennlp/en-ner-location.bin")) - ) + new NCModelPipelineBuilder( + NCModelPipelineLanguage.EN, + new NCEnSemanticEntityParser("time_model.yaml") + ).build() ); } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java index 1d4d977..699751f 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java @@ -17,12 +17,22 @@ package org.apache.nlpcraft; +import org.apache.nlpcraft.internal.util.NCResourceReader; +import org.apache.nlpcraft.nlp.en.token.enricher.NCBracketsTokenEnricher; +import org.apache.nlpcraft.nlp.en.token.enricher.NCDictionaryTokenEnricher; +import org.apache.nlpcraft.nlp.en.token.enricher.NCEnLemmaPosTokenEnricher; +import org.apache.nlpcraft.nlp.en.token.enricher.NCQuotesTokenEnricher; +import org.apache.nlpcraft.nlp.en.token.enricher.NCStopWordsTokenEnricher; +import org.apache.nlpcraft.nlp.en.token.enricher.NСSwearWordsTokenEnricher; +import org.apache.nlpcraft.nlp.en.token.parser.opennlp.NCENOpenNLPTokenParser; + import java.util.*; /** * */ public class NCModelPipelineBuilder { + private final NCTokenParser tokParser; private final List<NCTokenEnricher> tokEnrichers = new ArrayList<>(); private final List<NCEntityEnricher> entEnrichers = new ArrayList<>(); @@ -56,6 +66,38 @@ public class NCModelPipelineBuilder { this(tokParser, Arrays.asList(entParsers)); } + /** + * + * @param lang + * @param entParsers + */ + public NCModelPipelineBuilder(NCModelPipelineLanguage lang, List<NCEntityParser> entParsers) { + Objects.requireNonNull(lang, "Language cannot be null."); + Objects.requireNonNull(entParsers, "Entity parsers cannot be null."); + if (entParsers.isEmpty()) + throw new IllegalArgumentException("At least one entity parser must be defined."); + + tokParser = new NCENOpenNLPTokenParser(); + + tokEnrichers.add(new NCEnLemmaPosTokenEnricher()); + tokEnrichers.add(new NCStopWordsTokenEnricher()); + tokEnrichers.add(new NСSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt"))); + tokEnrichers.add(new NCQuotesTokenEnricher()); + tokEnrichers.add(new NCDictionaryTokenEnricher()); + tokEnrichers.add(new NCBracketsTokenEnricher()); + + this.entParsers.addAll(entParsers); + } + + /** + * + * @param lang + * @param entParsers + */ + public NCModelPipelineBuilder(NCModelPipelineLanguage lang, NCEntityParser... entParsers) { + this(lang, Arrays.asList(entParsers)); + } + /** * @param tokEnrichers diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticStemmer.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineLanguage.java similarity index 76% copy from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticStemmer.java copy to nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineLanguage.java index 279e4f4..4fab6fe 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticStemmer.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineLanguage.java @@ -6,7 +6,7 @@ * (the "License"); you may not use this file except in compliance with * the License. You may obtain a copy of the License at * - * https://www.apache.org/licenses/LICENSE-2.0 + * http://www.apache.org/licenses/LICENSE-2.0 * * Unless required by applicable law or agreed to in writing, software * distributed under the License is distributed on an "AS IS" BASIS, @@ -15,16 +15,11 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.semantic; +package org.apache.nlpcraft; /** - * + * */ -public interface NCSemanticStemmer { - /** - * - * @param txt - * @return - */ - String stem(String txt); +public enum NCModelPipelineLanguage { + EN } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCResourceReader.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCResourceReader.java deleted file mode 100644 index 8c31db5..0000000 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCResourceReader.java +++ /dev/null @@ -1,64 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nlpcraft.internal.util; - -import java.io.File; - -/** - * TODO: move it into right folder. - */ -public class NCResourceReader { - /** - * - */ - private NCResourceReaderImpl impl; - - /** - * - */ - public NCResourceReader() { - impl = NCResourceReaderImpl.apply(); - } - - /** - * - * @param dir Folder to save downloaded data. - */ - public NCResourceReader(String dir) { - impl = NCResourceReaderImpl.apply(dir); - } - - /** - * Gets if exists or download. - * - * @param path - * @return - */ - public File get(String path) { - return impl.get(path); - } - - /** - * - * @param path - * @return - */ - public String getPath(String path) { - return impl.get(path).getAbsolutePath(); - } -} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCResourceReaderImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCResourceReader.scala similarity index 85% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCResourceReaderImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCResourceReader.scala index 2363d1a..c5bbe25 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCResourceReaderImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/util/NCResourceReader.scala @@ -26,12 +26,14 @@ import scala.io.Source import com.typesafe.scalalogging.LazyLogging import org.apache.commons.io.IOUtils import org.apache.commons.codec.digest.DigestUtils + import java.nio.file.Files +import scala.collection.immutable.Map /** - * + * TODO: move it NCUtils? */ -object NCResourceReaderImpl extends LazyLogging: +object NCResourceReader extends LazyLogging: private final val DFLT_DIR = new File(System.getProperty("user.home"), ".nlpcraft/extcfg").getAbsolutePath private final val BASE_URL = "https://github.com/apache/incubator-nlpcraft/raw/external_config/external" private final val MD5_FILE_URL = s"$BASE_URL/md5.txt" @@ -81,28 +83,6 @@ object NCResourceReaderImpl extends LazyLogging: /** * - * @param dir - * @return - */ - def apply(dir: String): NCResourceReaderImpl = new NCResourceReaderImpl(mkDir(dir)) - - /** - * - * @return - */ - def apply(): NCResourceReaderImpl = new NCResourceReaderImpl(mkDir(null)) - -import NCResourceReaderImpl.* - -/** - * - * @param dir - */ -class NCResourceReaderImpl(dir: File) extends LazyLogging: - private val md5 = readMd5(MD5_FILE_URL) - - /** - * * @param f * @return */ @@ -111,9 +91,10 @@ class NCResourceReaderImpl(dir: File) extends LazyLogging: /** * * @param f + * @param md5 * @return */ - private def getMd5(f: File): String = + private def getMd5(f: File, md5: Map[String, String]): String = val path = f.getAbsolutePath val nameLen = f.getName.length @@ -126,10 +107,11 @@ class NCResourceReaderImpl(dir: File) extends LazyLogging: /** * * @param f + * @param md5 * @return */ - private def isValid(f: File): Boolean = - val v1 = getMd5(f) + private def isValid(f: File, md5: Map[String, String]): Boolean = + val v1 = getMd5(f, md5) val v2 = try Using.resource(Files.newInputStream(f.toPath)) { in => DigestUtils.md5Hex(in) } @@ -141,9 +123,10 @@ class NCResourceReaderImpl(dir: File) extends LazyLogging: * * @param path * @param outFile + * @param md5 * @return */ - private def download(path: String, outFile: String): File = + private def download(path: String, outFile: String, md5: Map[String, String]): File = mkDir(new File(outFile).getParent) val url = s"$BASE_URL/$path" @@ -154,7 +137,7 @@ class NCResourceReaderImpl(dir: File) extends LazyLogging: logger.info(s"One-time download for external config [url='$url', file='$outFile']") val f = new File(outFile) - if !isValid(f) then throw new NCException(s"Invalid downloaded file [url='$url'") + if !isValid(f, md5) then throw new NCException(s"Invalid downloaded file [url='$url'") f } catch case e: IOException => throw new NCException(s"Failed to download external config [url='$url', file='$outFile']", e) @@ -165,18 +148,26 @@ class NCResourceReaderImpl(dir: File) extends LazyLogging: * @return */ def get(path: String): File = + val md5 = readMd5(MD5_FILE_URL) var f = new File(path) def process(f: File): File = - if isValid(f) then + if isValid(f, md5) then logger.info(s"File found: ${f.getAbsolutePath}") f else delete(f) - download(path, f.getAbsolutePath) + download(path, f.getAbsolutePath, md5) if isExists(f) then process(f) else f = new File(DFLT_DIR, path) - if isExists(f) then process(f) else download(path, f.getAbsolutePath) \ No newline at end of file + if isExists(f) then process(f) else download(path, f.getAbsolutePath, md5) + + /** + * + * @param path + * @return + */ + def getPath(path: String): String = get(path).getAbsolutePath diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENDefaultPipeline.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENDefaultPipeline.java deleted file mode 100644 index 837a80f..0000000 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENDefaultPipeline.java +++ /dev/null @@ -1,85 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nlpcraft.nlp; - -import org.apache.nlpcraft.NCEntityParser; -import org.apache.nlpcraft.NCModelPipeline; -import org.apache.nlpcraft.NCTokenEnricher; -import org.apache.nlpcraft.NCTokenParser; -import org.apache.nlpcraft.internal.util.NCResourceReader; -import org.apache.nlpcraft.nlp.token.enricher.en.*; -import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser; - -import java.util.Arrays; -import java.util.Collections; -import java.util.List; - -/** - * Default EN implementation based on Open Nlp token parser, and set of built token enrichers including stopword enricher. - * Also at least one entity parser must be defined. - */ -public class NCENDefaultPipeline implements NCModelPipeline { - private static final NCResourceReader reader = new NCResourceReader(); - - private final NCTokenParser tokParser = new NCOpenNLPTokenParser(reader.getPath("opennlp/en-token.bin")); - - private List<NCTokenEnricher> tokenEnrichers = Arrays.asList( - new NCLemmaPosTokenEnricher( - reader.getPath("opennlp/en-pos-maxent.bin"), - reader.getPath("opennlp/en-lemmatizer.dict") - ), - new NCStopWordsTokenEnricher(), - new NСSwearWordsTokenEnricher(reader.getPath("badfilter/swear_words.txt")), - new NCQuotesTokenEnricher(), - new NCDictionaryTokenEnricher(), - new NCBracketsTokenEnricher() - ); - - private final List<NCEntityParser> entParsers; - - /** - * - * @param entParsers - */ - public NCENDefaultPipeline(List<NCEntityParser> entParsers) { - this.entParsers = entParsers; - } - - /** - * - * @param parser - */ - public NCENDefaultPipeline(NCEntityParser... parsers) { - this.entParsers = Arrays.asList(parsers); - } - - @Override - public NCTokenParser getTokenParser() { - return tokParser; - } - - @Override - public List<NCEntityParser> getEntityParsers() { - return entParsers; - } - - @Override - public List<NCTokenEnricher> getTokenEnrichers() { - return tokenEnrichers; - } -} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENSemanticEntityParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENSemanticEntityParser.java deleted file mode 100644 index 08362a0..0000000 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENSemanticEntityParser.java +++ /dev/null @@ -1,69 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nlpcraft.nlp; - -import org.apache.nlpcraft.NCTokenParser; -import org.apache.nlpcraft.internal.util.NCResourceReader; -import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticElement; -import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser; -import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticStemmer; -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer; -import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser; - -import java.util.List; -import java.util.Map; - -/** - * Wrapper on {@link NCSemanticEntityParser} which uses Open Nlp token parser (same as {@link NCENDefaultPipeline}) and built EN stemmer. - */ -public class NCENSemanticEntityParser extends NCSemanticEntityParser { - private static final NCResourceReader reader = new NCResourceReader(); - - private static NCSemanticStemmer mkStemmer() { - return new NCEnSemanticPorterStemmer(); - } - - private static NCOpenNLPTokenParser mkParser() { - return new NCOpenNLPTokenParser(reader.getPath("opennlp/en-token.bin")); - } - - /** - * - * @param elms - */ - public NCENSemanticEntityParser(List<NCSemanticElement> elms) { - super(mkStemmer(), mkParser(), elms); - } - - /** - * - * @param macros - * @param elms - */ - public NCENSemanticEntityParser(Map<String, String> macros, List<NCSemanticElement> elms) { - super(mkStemmer(), mkParser(), macros, elms); - } - - /** - * - * @param src - */ - public NCENSemanticEntityParser(String src) { - super(mkStemmer(), mkParser(), src); - } -} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/entity/parser/semantic/NCEnSemanticEntityParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/entity/parser/semantic/NCEnSemanticEntityParser.java new file mode 100644 index 0000000..e1dd5b0 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/entity/parser/semantic/NCEnSemanticEntityParser.java @@ -0,0 +1,56 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.nlp.en.entity.parser.semantic; + +import opennlp.tools.stemmer.PorterStemmer; +import org.apache.nlpcraft.NCTokenParser; +import org.apache.nlpcraft.nlp.en.token.parser.opennlp.NCENOpenNLPTokenParser; +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.NCSemanticElement; +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.NCSemanticEntityParser; +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.NCSemanticStemmer; + +import java.util.List; +import java.util.Map; + +/** + * + */ +public class NCEnSemanticEntityParser extends NCSemanticEntityParser { + private static final NCSemanticStemmer porterStemmer = new NCSemanticStemmer() { + private final PorterStemmer stemmerImpl = new PorterStemmer(); + + @Override + public synchronized String stem(String s) { + return stemmerImpl.stem(s.toLowerCase()); + } + }; + + private static final NCTokenParser opennlpParser = new NCENOpenNLPTokenParser(); + + public NCEnSemanticEntityParser(List<NCSemanticElement> elms) { + super(porterStemmer, opennlpParser, elms); + } + + public NCEnSemanticEntityParser(Map<String, String> macros, List<NCSemanticElement> elms) { + super(porterStemmer, opennlpParser, macros, elms); + } + + public NCEnSemanticEntityParser(String src) { + super(porterStemmer, opennlpParser, src); + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCBracketsTokenEnricher.java similarity index 93% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricher.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCBracketsTokenEnricher.java index d458591..cc1ed3a 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCBracketsTokenEnricher.java @@ -15,13 +15,13 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.token.enricher.en; +package org.apache.nlpcraft.nlp.en.token.enricher; import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.NCRequest; import org.apache.nlpcraft.NCToken; import org.apache.nlpcraft.NCTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCBracketsTokenEnricherImpl; +import org.apache.nlpcraft.nlp.en.token.enricher.impl.NCBracketsTokenEnricherImpl; import java.util.List; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCDictionaryTokenEnricher.java similarity index 93% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricher.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCDictionaryTokenEnricher.java index cf76943..e788924 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCDictionaryTokenEnricher.java @@ -15,13 +15,13 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.token.enricher.en; +package org.apache.nlpcraft.nlp.en.token.enricher; import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.NCRequest; import org.apache.nlpcraft.NCToken; import org.apache.nlpcraft.NCTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCDictionaryTokenEnricherImpl; +import org.apache.nlpcraft.nlp.en.token.enricher.impl.NCDictionaryTokenEnricherImpl; import java.util.List; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCQuotesTokenEnricher.java similarity index 93% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricher.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCQuotesTokenEnricher.java index e5a6c5b..0de565d 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCQuotesTokenEnricher.java @@ -15,13 +15,13 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.token.enricher.en; +package org.apache.nlpcraft.nlp.en.token.enricher; import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.NCRequest; import org.apache.nlpcraft.NCToken; import org.apache.nlpcraft.NCTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCQuotesTokenEnricherImpl; +import org.apache.nlpcraft.nlp.en.token.enricher.impl.NCQuotesTokenEnricherImpl; import java.util.List; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCStopWordsTokenEnricher.java similarity index 94% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsTokenEnricher.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCStopWordsTokenEnricher.java index c91ed82..5e1dd34 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCStopWordsTokenEnricher.java @@ -15,13 +15,13 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.token.enricher.en; +package org.apache.nlpcraft.nlp.en.token.enricher; import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.NCRequest; import org.apache.nlpcraft.NCToken; import org.apache.nlpcraft.NCTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCStopWordsTokenEnricherImpl; +import org.apache.nlpcraft.nlp.en.token.enricher.impl.NCStopWordsTokenEnricherImpl; import java.util.List; import java.util.Set; diff --git "a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/N\320\241SwearWordsTokenEnricher.java" "b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/N\320\241SwearWordsTokenEnricher.java" similarity index 94% rename from "nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/N\320\241SwearWordsTokenEnricher.java" rename to "nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/N\320\241SwearWordsTokenEnricher.java" index 70c4286..462f024 100644 --- "a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/N\320\241SwearWordsTokenEnricher.java" +++ "b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/N\320\241SwearWordsTokenEnricher.java" @@ -15,13 +15,13 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.token.enricher.en; +package org.apache.nlpcraft.nlp.en.token.enricher; import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.NCRequest; import org.apache.nlpcraft.NCToken; import org.apache.nlpcraft.NCTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCSwearWordsTokenEnricherImpl; +import org.apache.nlpcraft.nlp.en.token.enricher.impl.NCSwearWordsTokenEnricherImpl; import java.util.List; import java.util.Objects; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCBracketsTokenEnricherImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCBracketsTokenEnricherImpl.scala similarity index 97% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCBracketsTokenEnricherImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCBracketsTokenEnricherImpl.scala index cd5f3bf..ace6847 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCBracketsTokenEnricherImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCBracketsTokenEnricherImpl.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.token.enricher.en.impl +package org.apache.nlpcraft.nlp.en.token.enricher.impl import com.typesafe.scalalogging.LazyLogging import org.apache.nlpcraft.* diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCDictionaryTokenEnricherImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCDictionaryTokenEnricherImpl.scala similarity index 96% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCDictionaryTokenEnricherImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCDictionaryTokenEnricherImpl.scala index 41feb75..c842ace 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCDictionaryTokenEnricherImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCDictionaryTokenEnricherImpl.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.token.enricher.en.impl +package org.apache.nlpcraft.nlp.en.token.enricher.impl import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.util.NCUtils diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCLemmaPosTokenEnricherImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCLemmaPosTokenEnricherImpl.scala similarity index 98% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCLemmaPosTokenEnricherImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCLemmaPosTokenEnricherImpl.scala index 16a1d64..9f378f0 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCLemmaPosTokenEnricherImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCLemmaPosTokenEnricherImpl.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.token.enricher.en.impl +package org.apache.nlpcraft.nlp.en.token.enricher.impl import com.typesafe.scalalogging.LazyLogging import opennlp.tools.lemmatizer.DictionaryLemmatizer diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCQuotesTokenEnricherImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCQuotesTokenEnricherImpl.scala similarity index 96% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCQuotesTokenEnricherImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCQuotesTokenEnricherImpl.scala index d804a18..04359af 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCQuotesTokenEnricherImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCQuotesTokenEnricherImpl.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.token.enricher.en.impl +package org.apache.nlpcraft.nlp.en.token.enricher.impl import com.typesafe.scalalogging.LazyLogging import org.apache.nlpcraft.* diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordGenerator.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCStopWordGenerator.scala similarity index 99% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordGenerator.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCStopWordGenerator.scala index ff85727..5d68a05 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordGenerator.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCStopWordGenerator.scala @@ -1,4 +1,4 @@ -package org.apache.nlpcraft.nlp.token.enricher.en.impl +package org.apache.nlpcraft.nlp.en.token.enricher.impl import opennlp.tools.stemmer.PorterStemmer import org.apache.nlpcraft.internal.util.NCUtils diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsTokenEnricherImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCStopWordsTokenEnricherImpl.scala similarity index 98% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsTokenEnricherImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCStopWordsTokenEnricherImpl.scala index 6460290..7b31018 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsTokenEnricherImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCStopWordsTokenEnricherImpl.scala @@ -15,13 +15,13 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.token.enricher.en.impl +package org.apache.nlpcraft.nlp.en.token.enricher.impl import com.typesafe.scalalogging.LazyLogging import opennlp.tools.stemmer.PorterStemmer import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.util.NCUtils -import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticStemmer +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.NCSemanticStemmer import java.io.* import java.util @@ -113,7 +113,8 @@ object NCStopWordsTokenEnricherImpl: * @param tokens Tokens. * @param maxLen Maximum number of tokens in the sequence. */ - private[impl] def tokenMixWithStopWords(tokens: Seq[NCToken], maxLen: Int = Integer.MAX_VALUE): Seq[Seq[NCToken]] = + // TODO: private[impl] + def tokenMixWithStopWords(tokens: Seq[NCToken], maxLen: Int = Integer.MAX_VALUE): Seq[Seq[NCToken]] = /** * Gets all combinations for sequence of mandatory tokens with stop-words and without. * @@ -164,7 +165,7 @@ object NCStopWordsTokenEnricherImpl: private def tokenMix(toks: Seq[NCToken], maxLen: Int = Integer.MAX_VALUE): Seq[Seq[NCToken]] = (for (n <- toks.length until 0 by -1 if n <= maxLen) yield toks.sliding(n)).flatten -import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCStopWordsTokenEnricherImpl.* +import NCStopWordsTokenEnricherImpl.* /** * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCSwearWordsTokenEnricherImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCSwearWordsTokenEnricherImpl.scala similarity index 96% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCSwearWordsTokenEnricherImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCSwearWordsTokenEnricherImpl.scala index 9bb2ea1..bfb97fa 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCSwearWordsTokenEnricherImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/impl/NCSwearWordsTokenEnricherImpl.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.token.enricher.en.impl +package org.apache.nlpcraft.nlp.en.token.enricher.impl import com.typesafe.scalalogging.LazyLogging import opennlp.tools.stemmer.PorterStemmer diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/en/NCEnSemanticPorterStemmer.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/parser/opennlp/NCENOpenNLPTokenParser.java similarity index 65% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/en/NCEnSemanticPorterStemmer.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/parser/opennlp/NCENOpenNLPTokenParser.java index 811882f..040ac90 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/en/NCEnSemanticPorterStemmer.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/parser/opennlp/NCENOpenNLPTokenParser.java @@ -15,20 +15,16 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en; +package org.apache.nlpcraft.nlp.en.token.parser.opennlp; -import opennlp.tools.stemmer.PorterStemmer; -import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticStemmer; +import org.apache.nlpcraft.internal.util.NCResourceReader; +import org.apache.nlpcraft.nlp.mult.token.parser.opennlp.NCOpenNLPTokenParser; -/** - * +/* + * */ -public class NCEnSemanticPorterStemmer implements NCSemanticStemmer { - /** */ - private final PorterStemmer stemmer = new PorterStemmer(); - - @Override - public synchronized String stem(String s) { - return stemmer.stem(s.toLowerCase()); +public class NCENOpenNLPTokenParser extends NCOpenNLPTokenParser { + public NCENOpenNLPTokenParser() { + super(NCResourceReader.getPath("opennlp/en-token.bin")); } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNLPEntityParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/nlp/NCNLPEntityParser.java similarity index 92% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNLPEntityParser.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/nlp/NCNLPEntityParser.java index d36951d..1876cf8 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNLPEntityParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/nlp/NCNLPEntityParser.java @@ -15,14 +15,14 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.nlp; +package org.apache.nlpcraft.nlp.mult.entity.parser.nlp; import org.apache.nlpcraft.NCEntity; import org.apache.nlpcraft.NCEntityParser; import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.NCRequest; import org.apache.nlpcraft.NCToken; -import org.apache.nlpcraft.nlp.entity.parser.nlp.impl.NCNLPEntityParserImpl; +import org.apache.nlpcraft.nlp.mult.entity.parser.nlp.impl.NCNLPEntityParserImpl; import java.util.List; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNLPEntityParserImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/nlp/impl/NCNLPEntityParserImpl.scala similarity index 92% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNLPEntityParserImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/nlp/impl/NCNLPEntityParserImpl.scala index db0941f..d3fc1e6 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNLPEntityParserImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/nlp/impl/NCNLPEntityParserImpl.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.nlp.impl +package org.apache.nlpcraft.nlp.mult.entity.parser.nlp.impl import org.apache.nlpcraft.* @@ -29,7 +29,7 @@ import java.util.stream.Collectors object NCNLPEntityParserImpl: private def id = "nlp:token" -import org.apache.nlpcraft.nlp.entity.parser.nlp.impl.NCNLPEntityParserImpl.* +import NCNLPEntityParserImpl.* /** * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNLPEntityParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/opennlp/NCOpenNLPEntityParser.java similarity index 93% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNLPEntityParser.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/opennlp/NCOpenNLPEntityParser.java index ab97c30..4de886c 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNLPEntityParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/opennlp/NCOpenNLPEntityParser.java @@ -15,10 +15,10 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.opennlp; +package org.apache.nlpcraft.nlp.mult.entity.parser.opennlp; import org.apache.nlpcraft.*; -import org.apache.nlpcraft.nlp.entity.parser.opennlp.impl.NCOpenNLPEntityParserImpl; +import org.apache.nlpcraft.nlp.mult.entity.parser.opennlp.impl.NCOpenNLPEntityParserImpl; import java.util.List; import java.util.Objects; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/impl/NCOpenNLPEntityParserImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/opennlp/impl/NCOpenNLPEntityParserImpl.scala similarity index 97% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/impl/NCOpenNLPEntityParserImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/opennlp/impl/NCOpenNLPEntityParserImpl.scala index de458fb..c154e45 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/impl/NCOpenNLPEntityParserImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/opennlp/impl/NCOpenNLPEntityParserImpl.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.opennlp.impl +package org.apache.nlpcraft.nlp.mult.entity.parser.opennlp.impl import com.typesafe.scalalogging.LazyLogging import opennlp.tools.namefind.* diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticElement.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/NCSemanticElement.java similarity index 95% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticElement.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/NCSemanticElement.java index 6e052b0..fa30d79 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticElement.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/NCSemanticElement.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.semantic; +package org.apache.nlpcraft.nlp.mult.entity.parser.semantic; import java.util.*; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/NCSemanticEntityParser.java similarity index 95% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/NCSemanticEntityParser.java index 880ec0e..99e6141 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/NCSemanticEntityParser.java @@ -15,10 +15,10 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.semantic; +package org.apache.nlpcraft.nlp.mult.entity.parser.semantic; import org.apache.nlpcraft.*; -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticEntityParserImpl; +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.impl.NCSemanticEntityParserImpl; import java.util.Collections; import java.util.List; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticStemmer.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/NCSemanticStemmer.java similarity index 93% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticStemmer.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/NCSemanticStemmer.java index 279e4f4..0b62707 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticStemmer.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/NCSemanticStemmer.java @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.semantic; +package org.apache.nlpcraft.nlp.mult.entity.parser.semantic; /** * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala similarity index 96% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala index 71ea590..0eaca66 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala @@ -15,16 +15,15 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.semantic.impl - +package org.apache.nlpcraft.nlp.mult.entity.parser.semantic.impl import com.typesafe.scalalogging.LazyLogging import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.makro.NCMacroParser import org.apache.nlpcraft.internal.util.NCUtils -import org.apache.nlpcraft.nlp.entity.parser.semantic.* -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticChunkKind.* -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticSourceType.* +import NCSemanticChunkKind.* +import NCSemanticSourceType.* +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.* import java.io.* import java.util @@ -140,7 +139,7 @@ object NCSemanticEntityParserImpl: ) }) -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticEntityParserImpl.* +import NCSemanticEntityParserImpl.* /** * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSourceReader.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/impl/NCSemanticSourceReader.scala similarity index 94% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSourceReader.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/impl/NCSemanticSourceReader.scala index d8981b1..e588638 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSourceReader.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/impl/NCSemanticSourceReader.scala @@ -14,15 +14,15 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.semantic.impl +package org.apache.nlpcraft.nlp.mult.entity.parser.semantic.impl import com.fasterxml.jackson.core.JsonParser import com.fasterxml.jackson.databind.* import com.fasterxml.jackson.dataformat.yaml.* import com.fasterxml.jackson.module.scala.DefaultScalaModule import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.entity.parser.semantic.* -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticSourceType.* +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.* +import NCSemanticSourceType.* import java.io.InputStream import java.util diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonym.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/impl/NCSemanticSynonym.scala similarity index 93% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonym.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/impl/NCSemanticSynonym.scala index 7057507..71cd15c 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonym.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/impl/NCSemanticSynonym.scala @@ -14,10 +14,10 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.semantic.impl +package org.apache.nlpcraft.nlp.mult.entity.parser.semantic.impl import org.apache.nlpcraft.NCToken -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticChunkKind.* +import NCSemanticChunkKind.* import java.util.regex.Pattern diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala similarity index 97% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala index 40d1c3b..2f87aab 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala @@ -14,7 +14,7 @@ * See the License for the specific language governing permissions and * limitations under the License. */ -package org.apache.nlpcraft.nlp.entity.parser.semantic.impl +package org.apache.nlpcraft.nlp.mult.entity.parser.semantic.impl import com.fasterxml.jackson.databind.* import com.fasterxml.jackson.dataformat.yaml.* @@ -23,8 +23,8 @@ import com.typesafe.scalalogging.LazyLogging import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.makro.NCMacroParser import org.apache.nlpcraft.internal.util.NCUtils -import org.apache.nlpcraft.nlp.entity.parser.semantic.* -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticChunkKind.* +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.* +import NCSemanticChunkKind.* import java.io.InputStream import java.util diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCLemmaPosTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/enricher/opennlp/NCLemmaPosTokenEnricher.java similarity index 93% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCLemmaPosTokenEnricher.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/enricher/opennlp/NCLemmaPosTokenEnricher.java index 81e3a73..13a72cc 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCLemmaPosTokenEnricher.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/enricher/opennlp/NCLemmaPosTokenEnricher.java @@ -15,16 +15,15 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.token.enricher.en; +package org.apache.nlpcraft.nlp.mult.token.enricher.opennlp; import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.NCRequest; import org.apache.nlpcraft.NCToken; import org.apache.nlpcraft.NCTokenEnricher; -import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCLemmaPosTokenEnricherImpl; +import org.apache.nlpcraft.nlp.en.token.enricher.impl.NCLemmaPosTokenEnricherImpl; import java.util.List; -import java.util.Set; /** * TODO: enriches with <code>lemma</code> and <code>pos</code> properties. diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNLPTokenParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/parser/opennlp/NCOpenNLPTokenParser.java similarity index 92% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNLPTokenParser.java rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/parser/opennlp/NCOpenNLPTokenParser.java index 54bee08..a77e650 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNLPTokenParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/parser/opennlp/NCOpenNLPTokenParser.java @@ -15,12 +15,12 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.token.parser.opennlp; +package org.apache.nlpcraft.nlp.mult.token.parser.opennlp; import org.apache.nlpcraft.NCException; import org.apache.nlpcraft.NCToken; import org.apache.nlpcraft.NCTokenParser; -import org.apache.nlpcraft.nlp.token.parser.opennlp.impl.NCOpenNLPTokenParserImpl; +import org.apache.nlpcraft.nlp.mult.token.parser.opennlp.impl.NCOpenNLPTokenParserImpl; import java.util.List; import java.util.Objects; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/NCOpenNLPTokenParserImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/parser/opennlp/impl/NCOpenNLPTokenParserImpl.scala similarity index 96% rename from nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/NCOpenNLPTokenParserImpl.scala rename to nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/parser/opennlp/impl/NCOpenNLPTokenParserImpl.scala index 51755ce..e9506ed 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/impl/NCOpenNLPTokenParserImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/token/parser/opennlp/impl/NCOpenNLPTokenParserImpl.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.nlpcraft.nlp.token.parser.opennlp.impl +package org.apache.nlpcraft.nlp.mult.token.parser.opennlp.impl import com.typesafe.scalalogging.LazyLogging import opennlp.tools.tokenize.* diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelCallbacksSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelCallbacksSpec.scala index 184c2d7..1f2d1f1 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelCallbacksSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelCallbacksSpec.scala @@ -19,8 +19,9 @@ package org.apache.nlpcraft.internal.impl import org.apache.nlpcraft.* import org.apache.nlpcraft.NCResultType.* +import org.apache.nlpcraft.nlp.en.entity.parser.semantic.NCEnSemanticEntityParser import org.apache.nlpcraft.nlp.entity.parser.semantic.* -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.NCSemanticEntityParser import org.apache.nlpcraft.nlp.util.NCTestModelAdapter import org.apache.nlpcraft.nlp.util.opennlp.* import org.junit.jupiter.api.* @@ -64,9 +65,7 @@ class NCModelCallbacksSpec: override def onError(ctx: NCContext, e: Throwable): NCResult = getOrElse(ErrorNotNull, RESULT_ERROR, null) MDL.getPipeline.getEntityParsers.add( - new NCSemanticEntityParser( - new NCEnSemanticPorterStemmer, EN_PIPELINE.getTokenParser, Seq(NCSemanticTestElement("x")).asJava - ) + new NCEnSemanticEntityParser(Seq(NCSemanticTestElement("x")).asJava) ) /** diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala index 0d685e7..19a0ec9 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala @@ -18,8 +18,8 @@ package org.apache.nlpcraft.internal.impl import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer +import org.apache.nlpcraft.nlp.en.entity.parser.semantic.NCEnSemanticEntityParser +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.NCSemanticEntityParser import org.apache.nlpcraft.nlp.util.NCTestModelAdapter import org.apache.nlpcraft.nlp.util.opennlp.* import org.junit.jupiter.api.Test @@ -30,11 +30,7 @@ import scala.util.Using class NCModelClientSpec: private def test0(mdl: NCTestModelAdapter): Unit = mdl.getPipeline.getEntityParsers.add( - new NCSemanticEntityParser( - new NCEnSemanticPorterStemmer, - EN_PIPELINE.getTokenParser, - "models/lightswitch_model.yaml" - ) + new NCEnSemanticEntityParser("models/lightswitch_model.yaml") ) Using.resource(new NCModelClient(mdl)) { client => diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala index 3e045c3..910fb61 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala @@ -19,12 +19,13 @@ package org.apache.nlpcraft.internal.impl import org.apache.nlpcraft.* import org.apache.nlpcraft.nlp.entity.parser.semantic.* -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer import org.apache.nlpcraft.NCResultType.* +import org.apache.nlpcraft.nlp.en.entity.parser.semantic.NCEnSemanticEntityParser import org.apache.nlpcraft.nlp.util.NCTestModelAdapter import org.apache.nlpcraft.nlp.util.opennlp.* import org.junit.jupiter.api.* import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticTestElement as STE +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.NCSemanticEntityParser import scala.jdk.CollectionConverters.* import scala.util.Using @@ -64,11 +65,7 @@ class NCModelPingPongSpec: R(ASK_RESULT, s"Some request by: ${other.mkText()}") MDL.getPipeline.getEntityParsers.add( - new NCSemanticEntityParser( - new NCEnSemanticPorterStemmer, - EN_PIPELINE.getTokenParser, - Seq(STE("command"), STE("confirm"), STE("other")).asJava - ) + new NCEnSemanticEntityParser(Seq(STE("command"), STE("confirm"), STE("other")).asJava) ) @BeforeEach diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala index 76371c0..1357ef4 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala @@ -18,9 +18,10 @@ package org.apache.nlpcraft.internal.impl import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.entity.parser.nlp.impl.NCNLPEntityParserImpl +import org.apache.nlpcraft.nlp.en.entity.parser.semantic.NCEnSemanticEntityParser import org.apache.nlpcraft.nlp.entity.parser.semantic.* -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer +import org.apache.nlpcraft.nlp.mult.entity.parser.nlp.impl.NCNLPEntityParserImpl +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.* import org.apache.nlpcraft.nlp.util.* import org.apache.nlpcraft.nlp.util.opennlp.* import org.junit.jupiter.api.* @@ -44,7 +45,7 @@ class NCModelPipelineManagerSpec: def test(txt: String, variantCnt: Int, elements: NCSemanticElement*): Unit = val pipeline = EN_PIPELINE.clone() - val parser = new NCSemanticEntityParser(new NCEnSemanticPorterStemmer, pipeline.getTokenParser, elements.asJava) + val parser = new NCEnSemanticEntityParser(elements.asJava) pipeline.getEntityParsers.clear() pipeline.getEntityParsers.add(parser) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/util/NCResourceSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/util/NCResourceSpec.scala index be15240..9d50975 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/util/NCResourceSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/util/NCResourceSpec.scala @@ -25,18 +25,16 @@ import org.junit.jupiter.api.Test class NCResourceSpec: @Test def test(): Unit = - val res = new NCResourceReader() - // Get and delete. - var f = res.get("opennlp/en-lemmatizer.dict") + var f = NCResourceReader.get("opennlp/en-lemmatizer.dict") require(f.delete()) println(s"Deleted: ${f.getAbsolutePath}") // Download. - f = res.get("opennlp/en-lemmatizer.dict") + f = NCResourceReader.get("opennlp/en-lemmatizer.dict") // From cache. - f = res.get("opennlp/en-lemmatizer.dict") + f = NCResourceReader.get("opennlp/en-lemmatizer.dict") // By absolute path. - f = res.get(f.getAbsolutePath) + f = NCResourceReader.get(f.getAbsolutePath) require(f.exists()) \ No newline at end of file diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala index a8a6d1b..586ace2 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala @@ -18,8 +18,8 @@ package org.apache.nlpcraft.nlp import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer +import org.apache.nlpcraft.nlp.en.entity.parser.semantic.* +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.NCSemanticEntityParser import org.apache.nlpcraft.nlp.util.NCTestModelAdapter import org.junit.jupiter.api.Test @@ -53,7 +53,11 @@ class NCENDefaultPipelineSpec: def test(): Unit = val cfg = new NCModelConfig("test.id", "Test model", "1.0") // Default EN pipeline with default EN semantic parser. - val pipeline = new NCENDefaultPipeline(new NCENSemanticEntityParser("models/lightswitch_model.yaml")) + + val pipeline = new NCModelPipelineBuilder( + NCModelPipelineLanguage.EN, + new NCEnSemanticEntityParser("models/lightswitch_model.yaml") + ).build() Using.resource(new NCModelClient(mkModel(cfg, pipeline))) { client => println(client.ask("Please, put the light out in the upstairs bedroom.", null, "userId").getBody) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java index 69fe200..f3725de 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java @@ -19,7 +19,7 @@ package org.apache.nlpcraft.nlp.benchmark.token.parser.opennlp; import org.apache.nlpcraft.NCRequest; import org.apache.nlpcraft.internal.util.NCResourceReader; -import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser; +import org.apache.nlpcraft.nlp.mult.token.parser.opennlp.NCOpenNLPTokenParser; import org.apache.nlpcraft.nlp.util.NCTestRequest; import org.openjdk.jmh.annotations.Benchmark; import org.openjdk.jmh.annotations.BenchmarkMode; @@ -59,9 +59,7 @@ public class NCEnOpenNlpTokenParserBenchmark { @Setup public void setUp() { - NCResourceReader reader = new NCResourceReader(); - - parser = new NCOpenNLPTokenParser(reader.getPath("opennlp/en-token.bin")); + parser = new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")); } @Benchmark diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNLPEntityParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNLPEntityParserSpec.scala index 864209e..d56bf06 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNLPEntityParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNLPEntityParserSpec.scala @@ -19,7 +19,8 @@ package org.apache.nlpcraft.nlp.entity.parser.nlp import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.util.NCUtils -import org.apache.nlpcraft.nlp.entity.parser.opennlp.NCOpenNLPEntityParser +import org.apache.nlpcraft.nlp.mult.entity.parser.nlp.NCNLPEntityParser +import org.apache.nlpcraft.nlp.mult.entity.parser.opennlp.NCOpenNLPEntityParser import org.apache.nlpcraft.nlp.util.* import org.apache.nlpcraft.nlp.util.opennlp.* import org.junit.jupiter.api.* diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNLPEntityParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNLPEntityParserSpec.scala index 1aeaaad..a22f029 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNLPEntityParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNLPEntityParserSpec.scala @@ -19,6 +19,7 @@ package org.apache.nlpcraft.nlp.entity.parser.opennlp import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.util.{NCResourceReader, NCUtils} +import org.apache.nlpcraft.nlp.mult.entity.parser.opennlp.NCOpenNLPEntityParser import org.apache.nlpcraft.nlp.util.* import org.apache.nlpcraft.nlp.util.opennlp.* import org.junit.jupiter.api.* @@ -33,7 +34,6 @@ import scala.jdk.OptionConverters.RichOptional * */ class NCOpenNLPEntityParserSpec: - private val reader = new NCResourceReader() private val parser = new NCOpenNLPEntityParser( Seq( "opennlp/en-ner-location.bin", @@ -42,7 +42,7 @@ class NCOpenNLPEntityParserSpec: "opennlp/en-ner-organization.bin", "opennlp/en-ner-date.bin", "opennlp/en-ner-percentage.bin" - ).map(reader.getPath).asJava + ).map(NCResourceReader.getPath).asJava ) /** diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala index 9fc1b0b..c4a618e 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala @@ -19,9 +19,10 @@ package org.apache.nlpcraft.nlp.entity.parser.semantic import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.util.NCUtils -import org.apache.nlpcraft.nlp.entity.parser.opennlp.NCOpenNLPEntityParser -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer -import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser +import org.apache.nlpcraft.nlp.en.entity.parser.semantic.NCEnSemanticEntityParser +import org.apache.nlpcraft.nlp.mult.entity.parser.opennlp.NCOpenNLPEntityParser +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.NCSemanticEntityParser +import org.apache.nlpcraft.nlp.mult.token.parser.opennlp.NCOpenNLPTokenParser import org.apache.nlpcraft.nlp.util.* import org.apache.nlpcraft.nlp.util.opennlp.* import org.junit.jupiter.api.* @@ -36,11 +37,7 @@ import scala.jdk.OptionConverters.RichOptional * */ class NCSemanticEntityParserJsonSpec: - private val parser = new NCSemanticEntityParser( - new NCEnSemanticPorterStemmer(), - EN_PIPELINE.getTokenParser, - "models/alarm_model.json" - ) + private val parser = new NCEnSemanticEntityParser("models/alarm_model.json") /** * diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala index b403cc3..e9588a0 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala @@ -19,8 +19,10 @@ package org.apache.nlpcraft.nlp.entity.parser.semantic import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.util.{NCResourceReader, NCUtils} -import org.apache.nlpcraft.nlp.entity.parser.opennlp.NCOpenNLPEntityParser -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer +import org.apache.nlpcraft.nlp.en.entity.parser.semantic.NCEnSemanticEntityParser +import org.apache.nlpcraft.nlp.en.token.enricher.{NCEnLemmaPosTokenEnricher, NCStopWordsTokenEnricher} +import org.apache.nlpcraft.nlp.mult.entity.parser.opennlp.NCOpenNLPEntityParser +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.{NCSemanticElement, NCSemanticEntityParser} import org.apache.nlpcraft.nlp.token.enricher.en.* import org.apache.nlpcraft.nlp.util.* import org.apache.nlpcraft.nlp.util.opennlp.* @@ -64,9 +66,7 @@ object NCSemanticTestElement: */ class NCSemanticEntityParserSpec: private val parser = - new NCSemanticEntityParser( - new NCEnSemanticPorterStemmer, - EN_PIPELINE.getTokenParser, + new NCEnSemanticEntityParser( Seq( // Standard. NCSemanticTestElement("t1", synonyms = Set("t1")), @@ -88,12 +88,7 @@ class NCSemanticEntityParserSpec: private val stopWordsEnricher = new NCStopWordsTokenEnricher() - private val reader = new NCResourceReader() - - private val lemmaPosEnricher = new NCLemmaPosTokenEnricher( - reader.getPath("opennlp/en-pos-maxent.bin"), - reader.getPath("opennlp/en-lemmatizer.dict") - ) + private val lemmaPosEnricher = new NCEnLemmaPosTokenEnricher() /** * diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala index 797c3db..2a48f45 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala @@ -19,8 +19,9 @@ package org.apache.nlpcraft.nlp.entity.parser.semantic import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.util.NCUtils -import org.apache.nlpcraft.nlp.entity.parser.opennlp.NCOpenNLPEntityParser -import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer +import org.apache.nlpcraft.nlp.en.entity.parser.semantic.NCEnSemanticEntityParser +import org.apache.nlpcraft.nlp.mult.entity.parser.opennlp.NCOpenNLPEntityParser +import org.apache.nlpcraft.nlp.mult.entity.parser.semantic.NCSemanticEntityParser import org.apache.nlpcraft.nlp.util.* import org.apache.nlpcraft.nlp.util.opennlp.* import org.junit.jupiter.api.* @@ -34,11 +35,7 @@ import scala.jdk.OptionConverters.RichOptional * */ class NCSemanticEntityParserYamlSpec: - private val parser = new NCSemanticEntityParser( - new NCEnSemanticPorterStemmer, - EN_PIPELINE.getTokenParser, - "models/lightswitch_model.yaml" - ) + private val parser = new NCEnSemanticEntityParser("models/lightswitch_model.yaml") /** * diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala index 9402faf..e621782 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCBracketsTokenEnricherSpec.scala @@ -18,6 +18,7 @@ package org.apache.nlpcraft.nlp.token.enricher.en import org.apache.nlpcraft.* +import org.apache.nlpcraft.nlp.en.token.enricher.NCBracketsTokenEnricher import org.apache.nlpcraft.nlp.token.enricher.en.* import org.apache.nlpcraft.nlp.util.* import org.apache.nlpcraft.nlp.util.opennlp.* diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala index 0bf56b6..c961a2e 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCDictionaryTokenEnricherSpec.scala @@ -18,6 +18,7 @@ package org.apache.nlpcraft.nlp.token.enricher.en import org.apache.nlpcraft.internal.util.NCResourceReader +import org.apache.nlpcraft.nlp.en.token.enricher.{NCDictionaryTokenEnricher, NCEnLemmaPosTokenEnricher} import org.apache.nlpcraft.nlp.token.enricher.en.* import org.apache.nlpcraft.nlp.util.* import org.apache.nlpcraft.nlp.util.opennlp.* @@ -31,12 +32,7 @@ import scala.jdk.CollectionConverters.* class NCDictionaryTokenEnricherSpec: private val dictEnricher = new NCDictionaryTokenEnricher() - private val reader = new NCResourceReader() - - private val lemmaPosEnricher = new NCLemmaPosTokenEnricher( - reader.getPath("opennlp/en-pos-maxent.bin"), - reader.getPath("opennlp/en-lemmatizer.dict") - ) + private val lemmaPosEnricher = new NCEnLemmaPosTokenEnricher() @Test def test(): Unit = diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala index 2071b06..376f0b0 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCQuotesTokenEnricherSpec.scala @@ -19,6 +19,7 @@ package org.apache.nlpcraft.nlp.token.enricher.en import org.apache.nlpcraft.NCToken import org.apache.nlpcraft.internal.util.NCResourceReader +import org.apache.nlpcraft.nlp.en.token.enricher.{NCEnLemmaPosTokenEnricher, NCQuotesTokenEnricher} import org.apache.nlpcraft.nlp.token.enricher.en.* import org.apache.nlpcraft.nlp.util.* import org.apache.nlpcraft.nlp.util.opennlp.* @@ -30,13 +31,7 @@ import scala.jdk.CollectionConverters.* * */ class NCQuotesTokenEnricherSpec: - private val reader = new NCResourceReader() - - private val lemmaPosEnricher = new NCLemmaPosTokenEnricher( - reader.getPath("opennlp/en-pos-maxent.bin"), - reader.getPath("opennlp/en-lemmatizer.dict") - ) - + private val lemmaPosEnricher = new NCEnLemmaPosTokenEnricher private val quoteEnricher = new NCQuotesTokenEnricher /** diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala index e8cd1c9..492593d 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCStopWordsEnricherSpec.scala @@ -19,6 +19,7 @@ package org.apache.nlpcraft.nlp.token.enricher.en import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.util.NCResourceReader +import org.apache.nlpcraft.nlp.en.token.enricher.* import org.apache.nlpcraft.nlp.token.enricher.en.* import org.apache.nlpcraft.nlp.util.* import org.apache.nlpcraft.nlp.util.opennlp.* @@ -31,12 +32,7 @@ import scala.jdk.CollectionConverters.* * */ class NCStopWordsEnricherSpec: - private val reader = new NCResourceReader() - - private val lemmaPosEnricher = new NCLemmaPosTokenEnricher( - reader.getPath("opennlp/en-pos-maxent.bin"), - reader.getPath("opennlp/en-lemmatizer.dict") - ) + private val lemmaPosEnricher = new NCEnLemmaPosTokenEnricher /** * diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala index c30b5b4..12b8ebc 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/NCSwearWordsTokenEnricherSpec.scala @@ -18,6 +18,7 @@ package org.apache.nlpcraft.nlp.token.enricher.en import org.apache.nlpcraft.internal.util.NCResourceReader +import org.apache.nlpcraft.nlp.en.token.enricher.NСSwearWordsTokenEnricher import org.apache.nlpcraft.nlp.token.enricher.en.* import org.apache.nlpcraft.nlp.util.* import org.apache.nlpcraft.nlp.util.opennlp.* @@ -30,7 +31,7 @@ import scala.jdk.CollectionConverters.* * */ class NCSwearWordsTokenEnricherSpec: - private val enricher = new NСSwearWordsTokenEnricher(new NCResourceReader().getPath("badfilter/swear_words.txt")) + private val enricher = new NСSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt")) @Test def test(): Unit = diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImplSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImplSpec.scala index 144f08a..4787634 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImplSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImplSpec.scala @@ -18,8 +18,8 @@ package org.apache.nlpcraft.nlp.token.enricher.en.impl import org.apache.nlpcraft.* +import org.apache.nlpcraft.nlp.en.token.enricher.impl.NCStopWordsTokenEnricherImpl import org.apache.nlpcraft.nlp.token.enricher.en.* -import org.apache.nlpcraft.nlp.token.enricher.en.impl.NCStopWordsTokenEnricherImpl import org.apache.nlpcraft.nlp.util.* import org.apache.nlpcraft.nlp.util.opennlp.* import org.junit.jupiter.api.* diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNLPTokenParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNLPTokenParserSpec.scala index dc56f8e..57ecd4e 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNLPTokenParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNLPTokenParserSpec.scala @@ -20,6 +20,7 @@ package org.apache.nlpcraft.nlp.token.parser.opennlp import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.ascii.NCAsciiTable import org.apache.nlpcraft.internal.util.NCResourceReader +import org.apache.nlpcraft.nlp.en.token.enricher.{NCEnLemmaPosTokenEnricher, NCStopWordsTokenEnricher} import org.apache.nlpcraft.nlp.token.enricher.en.* import org.apache.nlpcraft.nlp.util.* import org.apache.nlpcraft.nlp.util.opennlp.* @@ -32,13 +33,7 @@ import scala.jdk.CollectionConverters.* * */ class NCOpenNLPTokenParserSpec: - private val reader = new NCResourceReader() - - private val lemmaPosEnricher = new NCLemmaPosTokenEnricher( - reader.getPath("opennlp/en-pos-maxent.bin"), - reader.getPath("opennlp/en-lemmatizer.dict") - ) - + private val lemmaPosEnricher = new NCEnLemmaPosTokenEnricher private val stopEnricher = new NCStopWordsTokenEnricher(null, null) private def isStopWord(t: NCToken): Boolean = t.get[Boolean]("stopword") diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala index bc2c4f1..2ba0cd8 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala @@ -18,7 +18,7 @@ package org.apache.nlpcraft.nlp.util import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser +import org.apache.nlpcraft.nlp.mult.token.parser.opennlp.NCOpenNLPTokenParser import org.apache.nlpcraft.nlp.util.NCTestPipeline.* import java.util.{Optional, ArrayList as JList} diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/opennlp/NCTestConfig.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/opennlp/NCTestConfig.scala index 7ae1fc6..505ee5f 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/opennlp/NCTestConfig.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/opennlp/NCTestConfig.scala @@ -18,7 +18,7 @@ package org.apache.nlpcraft.nlp.util.opennlp import org.apache.nlpcraft.NCModelConfig -import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser +import org.apache.nlpcraft.nlp.mult.token.parser.opennlp.NCOpenNLPTokenParser import org.apache.nlpcraft.nlp.util.* /** diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/opennlp/NCTestConfigJava.java b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/opennlp/NCTestConfigJava.java index 25774b6..e49f5f7 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/opennlp/NCTestConfigJava.java +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/opennlp/NCTestConfigJava.java @@ -19,7 +19,7 @@ package org.apache.nlpcraft.nlp.util.opennlp; import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.internal.util.NCResourceReader; -import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser; +import org.apache.nlpcraft.nlp.mult.token.parser.opennlp.NCOpenNLPTokenParser; import org.apache.nlpcraft.nlp.util.NCTestPipeline; /** @@ -32,12 +32,7 @@ public class NCTestConfigJava { public static final NCModelConfig CFG = new NCModelConfig("testId", "test", "1.0", "Test description", "Test origin"); /** */ - private static NCResourceReader reader = new NCResourceReader(); - - /** - * - */ public static final NCTestPipeline EN_PIPELINE = new NCTestPipeline( - new NCOpenNLPTokenParser(reader.getPath("opennlp/en-token.bin")) + new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")) ); }