This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push: new ebd6a60 Model builder refactoring. ebd6a60 is described below commit ebd6a60724a20e26d633a01fedaaed2fb8a612e8 Author: Sergey Kamov <skhdlem...@gmail.com> AuthorDate: Wed Mar 16 22:30:39 2022 +0300 Model builder refactoring. --- .../apache/nlpcraft/examples/time/EchoModel.java | 2 +- .../lightswitch/LightSwitchGroovyModel.groovy | 2 +- .../examples/lightswitch/LightSwitchJavaModel.java | 2 +- .../examples/lightswitch/LightSwitchKotlinModel.kt | 2 +- .../lightswitch/LightSwitchScalaModel.scala | 2 +- .../apache/nlpcraft/examples/time/TimeModel.java | 2 +- .../apache/nlpcraft/NCModelPipelineBuilder.java | 107 ++++++++++----------- .../apache/nlpcraft/nlp/NCEntityEnricherSpec.scala | 4 +- .../nlpcraft/nlp/NCEntityValidatorSpec.scala | 4 +- .../apache/nlpcraft/nlp/NCTokenEnricherSpec.scala | 4 +- .../apache/nlpcraft/nlp/NCTokenValidatorSpec.scala | 4 +- .../apache/nlpcraft/nlp/NCVariantFilterSpec.scala | 4 +- 12 files changed, 74 insertions(+), 65 deletions(-) diff --git a/nlpcraft-examples/echo/src/main/java/org/apache/nlpcraft/examples/time/EchoModel.java b/nlpcraft-examples/echo/src/main/java/org/apache/nlpcraft/examples/time/EchoModel.java index a514717..9715f70 100644 --- a/nlpcraft-examples/echo/src/main/java/org/apache/nlpcraft/examples/time/EchoModel.java +++ b/nlpcraft-examples/echo/src/main/java/org/apache/nlpcraft/examples/time/EchoModel.java @@ -39,7 +39,7 @@ public class EchoModel extends NCModelAdapter { public EchoModel() { super( new NCModelConfig("nlpcraft.echo.ex", "Echo Example Model", "1.0"), - new NCModelPipelineBuilder().withLanguage("EN").build() + new NCModelPipelineBuilder().build() ); } diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy index 27d3999..722d2c1 100644 --- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy +++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy @@ -35,7 +35,7 @@ class LightSwitchGroovyModel extends NCModelAdapter { LightSwitchGroovyModel() { super( new NCModelConfig("nlpcraft.lightswitch.java.ex", "LightSwitch Example Model", "1.0"), - new NCModelPipelineBuilder().withLanguage("EN").withSemantic("EN", "lightswitch_model.yaml").build() + new NCModelPipelineBuilder().withSemantic("EN", "lightswitch_model.yaml").build() ) } diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java index b3058b8..eb050a8 100644 --- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java +++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java @@ -38,7 +38,7 @@ public class LightSwitchJavaModel extends NCModelAdapter { public LightSwitchJavaModel() { super( new NCModelConfig("nlpcraft.lightswitch.java.ex", "LightSwitch Example Model", "1.0"), - new NCModelPipelineBuilder().withLanguage("EN").withSemantic("EN", "lightswitch_model.yaml").build() + new NCModelPipelineBuilder().withSemantic("EN", "lightswitch_model.yaml").build() ); } diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt index 72a4e58..0082944 100644 --- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt +++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt @@ -37,7 +37,7 @@ import java.util.stream.Collectors */ class LightSwitchKotlinModel : NCModelAdapter( NCModelConfig("nlpcraft.lightswitch.kotlin.ex", "LightSwitch Example Model", "1.0"), - NCModelPipelineBuilder().withLanguage("EN").withSemantic("EN", "lightswitch_model.yaml").build() + NCModelPipelineBuilder().withSemantic("EN", "lightswitch_model.yaml").build() ) { /** * Intent and its on-match callback. diff --git a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala index 451dc4d..0852975 100644 --- a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala +++ b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala @@ -40,7 +40,7 @@ import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser class LightSwitchScalaModel extends NCModelAdapter( new NCModelConfig("nlpcraft.lightswitch.java.ex", "LightSwitch Example Model", "1.0"), - new NCModelPipelineBuilder().withLanguage("EN").withSemantic("EN", "lightswitch_model.yaml").build() + new NCModelPipelineBuilder().withSemantic("EN", "lightswitch_model.yaml").build() ): /** * Intent and its on-match callback. diff --git a/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java b/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java index 365f503..127c464 100644 --- a/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java +++ b/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java @@ -75,7 +75,7 @@ public class TimeModel extends NCModelAdapter { public TimeModel() { super( new NCModelConfig("nlpcraft.time.ex", "Time Example Model", "1.0"), - new NCModelPipelineBuilder().withLanguage("EN").withSemantic("EN", "time_model.yaml").build() + new NCModelPipelineBuilder().withSemantic("EN", "time_model.yaml").build() ); } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java index b8c08fa..1fd8500 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java @@ -49,6 +49,30 @@ public class NCModelPipelineBuilder { private Optional<NCVariantFilter> varFilter = Optional.empty(); /** + * + * @return + */ + private static NCSemanticStemmer mkEnStemmer() { + return new NCSemanticStemmer() { + private final PorterStemmer ps = new PorterStemmer(); + + @Override + public synchronized String stem(String txt) { + return ps.stem(txt.toLowerCase()); // TODO: + } + }; + } + + /** + * + * @return + */ + private NCOpenNLPTokenParser mkEnOpenNlpTokenParser() { + return new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")); + } + + + /** * @param tokEnrichers * @return This instance for call chaining. */ @@ -196,31 +220,21 @@ public class NCModelPipelineBuilder { return this; } - public NCModelPipelineBuilder withLanguage(String lang) { - Objects.requireNonNull(lang, "Language cannot be null."); - - switch (lang.toUpperCase()) { - case "EN": - tokParser = new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")); - - tokEnrichers.add(new NCOpenNLPLemmaPosTokenEnricher( - NCResourceReader.getPath("opennlp/en-pos-maxent.bin"), - NCResourceReader.getPath("opennlp/en-lemmatizer.dict") - )); - tokEnrichers.add(new NCEnStopWordsTokenEnricher()); - tokEnrichers.add(new NСEnSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt"))); - tokEnrichers.add(new NCEnQuotesTokenEnricher()); - tokEnrichers.add(new NCEnDictionaryTokenEnricher()); - tokEnrichers.add(new NCEnBracketsTokenEnricher()); - - this.entParsers.addAll(entParsers); - - break; - default: - throw new IllegalArgumentException("Unsupported language: " + lang); - } - - return this; + /** + * + */ + private void setEnComponents() { + tokParser = mkEnOpenNlpTokenParser(); + + tokEnrichers.add(new NCOpenNLPLemmaPosTokenEnricher( + NCResourceReader.getPath("opennlp/en-pos-maxent.bin"), + NCResourceReader.getPath("opennlp/en-lemmatizer.dict") + )); + tokEnrichers.add(new NCEnStopWordsTokenEnricher()); + tokEnrichers.add(new NСEnSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt"))); + tokEnrichers.add(new NCEnQuotesTokenEnricher()); + tokEnrichers.add(new NCEnDictionaryTokenEnricher()); + tokEnrichers.add(new NCEnBracketsTokenEnricher()); } /** @@ -231,23 +245,15 @@ public class NCModelPipelineBuilder { * @return */ public NCModelPipelineBuilder withSemantic(String lang, Map<String, String> macros, List<NCSemanticElement> elms) { + Objects.requireNonNull(lang, "Language cannot be null."); + Objects.requireNonNull(elms, "Model elements cannot be null."); + if (elms.isEmpty()) throw new IllegalArgumentException("Model elements cannot be empty."); + switch (lang.toUpperCase()) { case "EN": - this.entParsers.add( - new NCSemanticEntityParser( - new NCSemanticStemmer() { - private final PorterStemmer ps = new PorterStemmer(); - - @Override - public synchronized String stem(String txt) { - return ps.stem(txt.toLowerCase()); // TODO: - } - }, - new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")), - macros, - elms - ) - ); + setEnComponents(); + + this.entParsers.add(new NCSemanticEntityParser(mkEnStemmer(), mkEnOpenNlpTokenParser(), macros, elms)); break; @@ -275,22 +281,14 @@ public class NCModelPipelineBuilder { * @return */ public NCModelPipelineBuilder withSemantic(String lang, String src) { + Objects.requireNonNull(lang, "Language cannot be null."); + Objects.requireNonNull(src, "Model source cannot be null."); + switch (lang.toUpperCase()) { case "EN": - this.entParsers.add( - new NCSemanticEntityParser( - new NCSemanticStemmer() { - private final PorterStemmer ps = new PorterStemmer(); - - @Override - public synchronized String stem(String txt) { - return ps.stem(txt.toLowerCase()); // TODO: - } - }, - new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")), - src - ) - ); + setEnComponents(); + + this.entParsers.add(new NCSemanticEntityParser(mkEnStemmer(), mkEnOpenNlpTokenParser(), src)); break; @@ -301,6 +299,7 @@ public class NCModelPipelineBuilder { return this; } + /** * @return */ diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityEnricherSpec.scala index f5414e4..8f05dde 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityEnricherSpec.scala @@ -18,7 +18,9 @@ package org.apache.nlpcraft.nlp import org.apache.nlpcraft.* +import org.apache.nlpcraft.internal.util.NCResourceReader import org.apache.nlpcraft.nlp.entity.parser.NCNLPEntityParser +import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser import org.apache.nlpcraft.nlp.util.NCTestUtils import org.junit.jupiter.api.Test @@ -38,7 +40,7 @@ class NCEntityEnricherSpec: private def mkBuilder(): NCModelPipelineBuilder = new NCModelPipelineBuilder(). - withLanguage("EN"). + withTokenParser(new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))). // For intents matching, we have to add at least one entity parser. withEntityParser(new NCNLPEntityParser) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityValidatorSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityValidatorSpec.scala index de6070f..0b40526 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityValidatorSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityValidatorSpec.scala @@ -18,6 +18,8 @@ package org.apache.nlpcraft.nlp import org.apache.nlpcraft.* +import org.apache.nlpcraft.internal.util.NCResourceReader +import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser import org.apache.nlpcraft.nlp.util.NCTestUtils import org.junit.jupiter.api.Test @@ -34,7 +36,7 @@ class NCEntityValidatorSpec: NCTestUtils.askSomething(mdl, ok) - private def mkBuilder(): NCModelPipelineBuilder = new NCModelPipelineBuilder().withLanguage("EN") + private def mkBuilder(): NCModelPipelineBuilder = new NCModelPipelineBuilder().withTokenParser(new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))) private def mkPipeline(apply: NCModelPipelineBuilder => NCModelPipelineBuilder): NCModelPipeline = apply(mkBuilder()).build() @Test diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala index 485369d..802742c 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala @@ -18,7 +18,9 @@ package org.apache.nlpcraft.nlp import org.apache.nlpcraft.* +import org.apache.nlpcraft.internal.util.NCResourceReader import org.apache.nlpcraft.nlp.entity.parser.NCNLPEntityParser +import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser import org.apache.nlpcraft.nlp.util.NCTestUtils import org.junit.jupiter.api.Test @@ -38,7 +40,7 @@ class NCTokenEnricherSpec: private def mkBuilder(): NCModelPipelineBuilder = new NCModelPipelineBuilder(). - withLanguage("EN"). + withTokenParser(new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))). // For intents matching, we have to add at least one entity parser. withEntityParser(new NCNLPEntityParser) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenValidatorSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenValidatorSpec.scala index e7b0a4a..e9c938c 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenValidatorSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenValidatorSpec.scala @@ -18,6 +18,8 @@ package org.apache.nlpcraft.nlp import org.apache.nlpcraft.* +import org.apache.nlpcraft.internal.util.NCResourceReader +import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser import org.apache.nlpcraft.nlp.util.NCTestUtils import org.junit.jupiter.api.Test @@ -34,7 +36,7 @@ class NCTokenValidatorSpec: NCTestUtils.askSomething(mdl, ok) - private def mkBuilder(): NCModelPipelineBuilder = new NCModelPipelineBuilder().withLanguage("EN") + private def mkBuilder(): NCModelPipelineBuilder = new NCModelPipelineBuilder().withTokenParser(new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))) private def mkPipeline(apply: NCModelPipelineBuilder => NCModelPipelineBuilder): NCModelPipeline = apply(mkBuilder()).build() @Test diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala index a9274be..d5b7142 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala @@ -18,7 +18,9 @@ package org.apache.nlpcraft.nlp import org.apache.nlpcraft.* +import org.apache.nlpcraft.internal.util.NCResourceReader import org.apache.nlpcraft.nlp.entity.parser.NCNLPEntityParser +import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser import org.apache.nlpcraft.nlp.util.NCTestUtils import org.junit.jupiter.api.Test @@ -39,7 +41,7 @@ class NCVariantFilterSpec: private def mkBuilder(): NCModelPipelineBuilder = new NCModelPipelineBuilder(). - withLanguage("EN"). + withTokenParser(new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))). // For intents matching, we have to add at least one entity parser. withEntityParser(new NCNLPEntityParser)