This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/master by this push:
     new ebd6a60  Model builder refactoring.
ebd6a60 is described below

commit ebd6a60724a20e26d633a01fedaaed2fb8a612e8
Author: Sergey Kamov <skhdlem...@gmail.com>
AuthorDate: Wed Mar 16 22:30:39 2022 +0300

    Model builder refactoring.
---
 .../apache/nlpcraft/examples/time/EchoModel.java   |   2 +-
 .../lightswitch/LightSwitchGroovyModel.groovy      |   2 +-
 .../examples/lightswitch/LightSwitchJavaModel.java |   2 +-
 .../examples/lightswitch/LightSwitchKotlinModel.kt |   2 +-
 .../lightswitch/LightSwitchScalaModel.scala        |   2 +-
 .../apache/nlpcraft/examples/time/TimeModel.java   |   2 +-
 .../apache/nlpcraft/NCModelPipelineBuilder.java    | 107 ++++++++++-----------
 .../apache/nlpcraft/nlp/NCEntityEnricherSpec.scala |   4 +-
 .../nlpcraft/nlp/NCEntityValidatorSpec.scala       |   4 +-
 .../apache/nlpcraft/nlp/NCTokenEnricherSpec.scala  |   4 +-
 .../apache/nlpcraft/nlp/NCTokenValidatorSpec.scala |   4 +-
 .../apache/nlpcraft/nlp/NCVariantFilterSpec.scala  |   4 +-
 12 files changed, 74 insertions(+), 65 deletions(-)

diff --git 
a/nlpcraft-examples/echo/src/main/java/org/apache/nlpcraft/examples/time/EchoModel.java
 
b/nlpcraft-examples/echo/src/main/java/org/apache/nlpcraft/examples/time/EchoModel.java
index a514717..9715f70 100644
--- 
a/nlpcraft-examples/echo/src/main/java/org/apache/nlpcraft/examples/time/EchoModel.java
+++ 
b/nlpcraft-examples/echo/src/main/java/org/apache/nlpcraft/examples/time/EchoModel.java
@@ -39,7 +39,7 @@ public class EchoModel extends NCModelAdapter {
     public EchoModel() {
         super(
             new NCModelConfig("nlpcraft.echo.ex", "Echo Example Model", "1.0"),
-            new NCModelPipelineBuilder().withLanguage("EN").build()
+            new NCModelPipelineBuilder().build()
         );
     }
 
diff --git 
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy
 
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy
index 27d3999..722d2c1 100644
--- 
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy
+++ 
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchGroovyModel.groovy
@@ -35,7 +35,7 @@ class LightSwitchGroovyModel extends NCModelAdapter {
     LightSwitchGroovyModel() {
         super(
             new NCModelConfig("nlpcraft.lightswitch.java.ex", "LightSwitch 
Example Model", "1.0"),
-            new NCModelPipelineBuilder().withLanguage("EN").withSemantic("EN", 
"lightswitch_model.yaml").build()
+            new NCModelPipelineBuilder().withSemantic("EN", 
"lightswitch_model.yaml").build()
         )
     }
 
diff --git 
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java
 
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java
index b3058b8..eb050a8 100644
--- 
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java
+++ 
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchJavaModel.java
@@ -38,7 +38,7 @@ public class LightSwitchJavaModel extends NCModelAdapter {
     public LightSwitchJavaModel() {
         super(
             new NCModelConfig("nlpcraft.lightswitch.java.ex", "LightSwitch 
Example Model", "1.0"),
-            new NCModelPipelineBuilder().withLanguage("EN").withSemantic("EN", 
"lightswitch_model.yaml").build()
+            new NCModelPipelineBuilder().withSemantic("EN", 
"lightswitch_model.yaml").build()
         );
     }
 
diff --git 
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt
 
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt
index 72a4e58..0082944 100644
--- 
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt
+++ 
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchKotlinModel.kt
@@ -37,7 +37,7 @@ import java.util.stream.Collectors
  */
 class LightSwitchKotlinModel : NCModelAdapter(
     NCModelConfig("nlpcraft.lightswitch.kotlin.ex", "LightSwitch Example 
Model", "1.0"),
-    NCModelPipelineBuilder().withLanguage("EN").withSemantic("EN", 
"lightswitch_model.yaml").build()
+    NCModelPipelineBuilder().withSemantic("EN", 
"lightswitch_model.yaml").build()
 ) {
     /**
      * Intent and its on-match callback.
diff --git 
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala
 
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala
index 451dc4d..0852975 100644
--- 
a/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala
+++ 
b/nlpcraft-examples/lightswitch/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchScalaModel.scala
@@ -40,7 +40,7 @@ import 
org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser
 
 class LightSwitchScalaModel extends NCModelAdapter(
     new NCModelConfig("nlpcraft.lightswitch.java.ex", "LightSwitch Example 
Model", "1.0"),
-    new NCModelPipelineBuilder().withLanguage("EN").withSemantic("EN", 
"lightswitch_model.yaml").build()
+    new NCModelPipelineBuilder().withSemantic("EN", 
"lightswitch_model.yaml").build()
 ):
     /**
       * Intent and its on-match callback.
diff --git 
a/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java
 
b/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java
index 365f503..127c464 100644
--- 
a/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java
+++ 
b/nlpcraft-examples/time/src/main/java/org/apache/nlpcraft/examples/time/TimeModel.java
@@ -75,7 +75,7 @@ public class TimeModel extends NCModelAdapter {
     public TimeModel() {
         super(
             new NCModelConfig("nlpcraft.time.ex", "Time Example Model", "1.0"),
-            new NCModelPipelineBuilder().withLanguage("EN").withSemantic("EN", 
"time_model.yaml").build()
+            new NCModelPipelineBuilder().withSemantic("EN", 
"time_model.yaml").build()
         );
     }
 
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
index b8c08fa..1fd8500 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java
@@ -49,6 +49,30 @@ public class NCModelPipelineBuilder {
     private Optional<NCVariantFilter> varFilter = Optional.empty();
 
     /**
+     *
+     * @return
+     */
+    private static NCSemanticStemmer mkEnStemmer() {
+        return new NCSemanticStemmer() {
+            private final PorterStemmer ps = new PorterStemmer();
+
+            @Override
+            public synchronized String stem(String txt) {
+                return ps.stem(txt.toLowerCase()); // TODO:
+            }
+        };
+    }
+
+    /**
+     *
+     * @return
+     */
+    private NCOpenNLPTokenParser mkEnOpenNlpTokenParser() {
+        return new 
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"));
+    }
+
+
+    /**
      * @param tokEnrichers
      * @return This instance for call chaining.
      */
@@ -196,31 +220,21 @@ public class NCModelPipelineBuilder {
         return this;
     }
 
-    public NCModelPipelineBuilder withLanguage(String lang) {
-        Objects.requireNonNull(lang, "Language cannot be null.");
-
-        switch (lang.toUpperCase()) {
-            case "EN":
-                tokParser = new 
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"));
-
-                tokEnrichers.add(new NCOpenNLPLemmaPosTokenEnricher(
-                    NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
-                    NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
-                ));
-                tokEnrichers.add(new NCEnStopWordsTokenEnricher());
-                tokEnrichers.add(new 
NСEnSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt")));
-                tokEnrichers.add(new NCEnQuotesTokenEnricher());
-                tokEnrichers.add(new NCEnDictionaryTokenEnricher());
-                tokEnrichers.add(new NCEnBracketsTokenEnricher());
-
-                this.entParsers.addAll(entParsers);
-
-                break;
-            default:
-                throw new IllegalArgumentException("Unsupported language: " + 
lang);
-        }
-
-        return this;
+    /**
+     *
+     */
+    private void setEnComponents() {
+        tokParser = mkEnOpenNlpTokenParser();
+
+        tokEnrichers.add(new NCOpenNLPLemmaPosTokenEnricher(
+            NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
+            NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
+        ));
+        tokEnrichers.add(new NCEnStopWordsTokenEnricher());
+        tokEnrichers.add(new 
NСEnSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt")));
+        tokEnrichers.add(new NCEnQuotesTokenEnricher());
+        tokEnrichers.add(new NCEnDictionaryTokenEnricher());
+        tokEnrichers.add(new NCEnBracketsTokenEnricher());
     }
 
     /**
@@ -231,23 +245,15 @@ public class NCModelPipelineBuilder {
      * @return
      */
     public NCModelPipelineBuilder withSemantic(String lang, Map<String, 
String> macros, List<NCSemanticElement> elms) {
+        Objects.requireNonNull(lang, "Language cannot be null.");
+        Objects.requireNonNull(elms, "Model elements cannot be null.");
+        if (elms.isEmpty()) throw new IllegalArgumentException("Model elements 
cannot be empty.");
+
         switch (lang.toUpperCase()) {
             case "EN":
-                this.entParsers.add(
-                    new NCSemanticEntityParser(
-                        new NCSemanticStemmer() {
-                            private final PorterStemmer ps = new 
PorterStemmer();
-
-                            @Override
-                            public synchronized String stem(String txt) {
-                                return ps.stem(txt.toLowerCase()); // TODO:
-                            }
-                        },
-                        new 
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")),
-                        macros,
-                        elms
-                    )
-                );
+                setEnComponents();
+
+                this.entParsers.add(new NCSemanticEntityParser(mkEnStemmer(), 
mkEnOpenNlpTokenParser(), macros, elms));
 
                 break;
 
@@ -275,22 +281,14 @@ public class NCModelPipelineBuilder {
      * @return
      */
     public NCModelPipelineBuilder withSemantic(String lang, String src) {
+        Objects.requireNonNull(lang, "Language cannot be null.");
+        Objects.requireNonNull(src, "Model source cannot be null.");
+
         switch (lang.toUpperCase()) {
             case "EN":
-                this.entParsers.add(
-                    new NCSemanticEntityParser(
-                        new NCSemanticStemmer() {
-                            private final PorterStemmer ps = new 
PorterStemmer();
-
-                            @Override
-                            public synchronized String stem(String txt) {
-                                return ps.stem(txt.toLowerCase()); // TODO:
-                            }
-                        },
-                        new 
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")),
-                        src
-                    )
-                );
+                setEnComponents();
+
+                this.entParsers.add(new NCSemanticEntityParser(mkEnStemmer(), 
mkEnOpenNlpTokenParser(), src));
 
                 break;
 
@@ -301,6 +299,7 @@ public class NCModelPipelineBuilder {
         return this;
     }
 
+
     /**
      * @return
      */
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityEnricherSpec.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityEnricherSpec.scala
index f5414e4..8f05dde 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityEnricherSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityEnricherSpec.scala
@@ -18,7 +18,9 @@
 package org.apache.nlpcraft.nlp
 
 import org.apache.nlpcraft.*
+import org.apache.nlpcraft.internal.util.NCResourceReader
 import org.apache.nlpcraft.nlp.entity.parser.NCNLPEntityParser
+import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser
 import org.apache.nlpcraft.nlp.util.NCTestUtils
 import org.junit.jupiter.api.Test
 
@@ -38,7 +40,7 @@ class NCEntityEnricherSpec:
 
     private def mkBuilder(): NCModelPipelineBuilder =
         new NCModelPipelineBuilder().
-            withLanguage("EN").
+            withTokenParser(new 
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))).
             //  For intents matching, we have to add at least one entity 
parser.
             withEntityParser(new NCNLPEntityParser)
 
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityValidatorSpec.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityValidatorSpec.scala
index de6070f..0b40526 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityValidatorSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCEntityValidatorSpec.scala
@@ -18,6 +18,8 @@
 package org.apache.nlpcraft.nlp
 
 import org.apache.nlpcraft.*
+import org.apache.nlpcraft.internal.util.NCResourceReader
+import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser
 import org.apache.nlpcraft.nlp.util.NCTestUtils
 import org.junit.jupiter.api.Test
 
@@ -34,7 +36,7 @@ class NCEntityValidatorSpec:
 
         NCTestUtils.askSomething(mdl, ok)
 
-    private def mkBuilder(): NCModelPipelineBuilder = new 
NCModelPipelineBuilder().withLanguage("EN")
+    private def mkBuilder(): NCModelPipelineBuilder = new 
NCModelPipelineBuilder().withTokenParser(new 
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")))
     private def mkPipeline(apply: NCModelPipelineBuilder => 
NCModelPipelineBuilder): NCModelPipeline = apply(mkBuilder()).build()
 
     @Test
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala
index 485369d..802742c 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenEnricherSpec.scala
@@ -18,7 +18,9 @@
 package org.apache.nlpcraft.nlp
 
 import org.apache.nlpcraft.*
+import org.apache.nlpcraft.internal.util.NCResourceReader
 import org.apache.nlpcraft.nlp.entity.parser.NCNLPEntityParser
+import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser
 import org.apache.nlpcraft.nlp.util.NCTestUtils
 import org.junit.jupiter.api.Test
 
@@ -38,7 +40,7 @@ class NCTokenEnricherSpec:
 
     private def mkBuilder(): NCModelPipelineBuilder =
         new NCModelPipelineBuilder().
-            withLanguage("EN").
+            withTokenParser(new 
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))).
             //  For intents matching, we have to add at least one entity 
parser.
             withEntityParser(new NCNLPEntityParser)
 
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenValidatorSpec.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenValidatorSpec.scala
index e7b0a4a..e9c938c 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenValidatorSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCTokenValidatorSpec.scala
@@ -18,6 +18,8 @@
 package org.apache.nlpcraft.nlp
 
 import org.apache.nlpcraft.*
+import org.apache.nlpcraft.internal.util.NCResourceReader
+import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser
 import org.apache.nlpcraft.nlp.util.NCTestUtils
 import org.junit.jupiter.api.Test
 
@@ -34,7 +36,7 @@ class NCTokenValidatorSpec:
 
         NCTestUtils.askSomething(mdl, ok)
 
-    private def mkBuilder(): NCModelPipelineBuilder = new 
NCModelPipelineBuilder().withLanguage("EN")
+    private def mkBuilder(): NCModelPipelineBuilder = new 
NCModelPipelineBuilder().withTokenParser(new 
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")))
     private def mkPipeline(apply: NCModelPipelineBuilder => 
NCModelPipelineBuilder): NCModelPipeline = apply(mkBuilder()).build()
 
     @Test
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala
index a9274be..d5b7142 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala
@@ -18,7 +18,9 @@
 package org.apache.nlpcraft.nlp
 
 import org.apache.nlpcraft.*
+import org.apache.nlpcraft.internal.util.NCResourceReader
 import org.apache.nlpcraft.nlp.entity.parser.NCNLPEntityParser
+import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser
 import org.apache.nlpcraft.nlp.util.NCTestUtils
 import org.junit.jupiter.api.Test
 
@@ -39,7 +41,7 @@ class NCVariantFilterSpec:
 
     private def mkBuilder(): NCModelPipelineBuilder =
         new NCModelPipelineBuilder().
-            withLanguage("EN").
+            withTokenParser(new 
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))).
             //  For intents matching, we have to add at least one entity 
parser.
             withEntityParser(new NCNLPEntityParser)
 

Reply via email to