[incubator-nlpcraft] branch master updated: Tests infrastructure improved.

sergeykamov Thu, 17 Mar 2022 01:26:26 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/master by this push:
     new c28f449  Tests infrastructure improved.
c28f449 is described below

commit c28f44965492ae2d7dd37c7b78ed727ea6f3b09e
Author: Sergey Kamov <skhdlem...@gmail.com>
AuthorDate: Thu Mar 17 11:20:23 2022 +0300

    Tests infrastructure improved.
---
 .../parser/stanford/NCStanfordNLPTokenParser.java  |  3 ++-
 .../stanford/NCStanfordNLPEntityParserSpec.scala   |  2 +-
 .../stanford/NCStanfordNLPTokenParserSpec.scala    |  3 +--
 .../nlpcraft/nlp/util/stanford/NCTestConfig.scala  |  5 +----
 .../nlpcraft/internal/impl/NCModelClientSpec.scala |  4 +---
 .../internal/impl/NCModelPingPongSpec.scala        |  4 +---
 .../internal/impl/NCModelPipelineManagerSpec.scala |  6 ++---
 .../internal/impl/scan/NCTestModelJava.java        |  2 +-
 .../nlp/entity/parser/NCNLPEntityParserSpec.scala  |  2 +-
 .../entity/parser/NCOpenNLPEntityParserSpec.scala  | 26 +++++++++++++---------
 .../semantic/NCSemanticEntityParserJsonSpec.scala  |  8 ++-----
 .../semantic/NCSemanticEntityParserLemmaSpec.scala | 14 ++++--------
 .../semantic/NCSemanticEntityParserSpec.scala      | 24 ++++++++------------
 .../semantic/NCSemanticEntityParserYamlSpec.scala  |  8 ++-----
 .../enricher/NCBracketsTokenEnricherSpec.scala     |  2 +-
 .../enricher/NCDictionaryTokenEnricherSpec.scala   | 10 ++-------
 .../token/enricher/NCQuotesTokenEnricherSpec.scala |  9 ++------
 .../token/enricher/NCStopWordsEnricherSpec.scala   |  9 ++------
 .../enricher/NCSwearWordsTokenEnricherSpec.scala   |  2 +-
 .../token/enricher/impl/NCStopWordsImplSpec.scala  |  2 +-
 .../token/parser/NCOpenNLPTokenParserSpec.scala    | 13 +++--------
 .../apache/nlpcraft/nlp/util/NCTestConfig.scala    | 12 ++++------
 .../apache/nlpcraft/nlp/util/NCTestConfigJava.java | 16 +++++++------
 .../nlpcraft/nlp/util/NCTestModelAdapter.scala     |  4 ++--
 .../org/apache/nlpcraft/nlp/util/NCTestUtils.scala |  4 ++--
 25 files changed, 72 insertions(+), 122 deletions(-)

diff --git 
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.java
 
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.java
index 747ac3e..78cd92e 100644
--- 
a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.java
+++ 
b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.java
@@ -26,7 +26,8 @@ import java.util.List;
 import java.util.Objects;
 
 /**
- *
+ * TODO:
+ * Note that this rokenizer also addes 2 properties into parsed token: lemma 
and pos
  */
 public class NCStanfordNLPTokenParser implements NCTokenParser {
     private final NCStanfordNLPTokenParserImpl impl;
diff --git 
a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala
 
b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala
index 49caefa..13eeabd 100644
--- 
a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala
+++ 
b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala
@@ -34,7 +34,7 @@ class NCStanfordNLPEntityParserSpec:
     def test(): Unit =
         val txt = "Los Angeles, 23 August, 23 and sergeyka...@apache.org, 
tomorrow"
 
-        val toks = EN_STANFORD_PIPELINE.getTokenParser.tokenize(txt)
+        val toks = TOK_STANFORD_PARSER.tokenize(txt)
         NCTestUtils.printTokens(toks.asScala.toSeq)
 
         val res = parser.parse(NCTestRequest(txt), CFG, toks)
diff --git 
a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParserSpec.scala
 
b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParserSpec.scala
index 204e0c5..157d7d2 100644
--- 
a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParserSpec.scala
+++ 
b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParserSpec.scala
@@ -31,8 +31,7 @@ import scala.jdk.CollectionConverters.*
 class NCStanfordNLPTokenParserSpec:
     @Test
     def test(): Unit =
-        val toks =
-            EN_STANFORD_PIPELINE.getTokenParser.tokenize("I had a lunch with 
brand names 'AAA'").asScala.toSeq
+        val toks = TOK_STANFORD_PARSER.tokenize("I had a lunch with brand 
names 'AAA'").asScala.toSeq
 
         require(toks.sizeIs > 1)
         NCTestUtils.printTokens(toks)
diff --git 
a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/util/stanford/NCTestConfig.scala
 
b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/util/stanford/NCTestConfig.scala
index 4e5f644..ea80fdc 100644
--- 
a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/util/stanford/NCTestConfig.scala
+++ 
b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/util/stanford/NCTestConfig.scala
@@ -17,7 +17,4 @@ final val STANFORD =
     props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner")
     new StanfordCoreNLP(props)
 
-/**
-  *
-  */
-final val EN_STANFORD_PIPELINE = NCTestPipeline(new 
NCStanfordNLPTokenParser(STANFORD))
+final val TOK_STANFORD_PARSER = new NCStanfordNLPTokenParser(STANFORD)
\ No newline at end of file
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala
index eb3df69..5123c84 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala
@@ -28,9 +28,7 @@ import scala.util.Using
 
 class NCModelClientSpec:
     private def test0(mdl: NCTestModelAdapter): Unit =
-        mdl.getPipeline.getEntityParsers.add(
-            NCTestUtils.mkENSemanticParser("models/lightswitch_model.yaml")
-        )
+        
mdl.getPipeline.getEntityParsers.add(NCTestUtils.mkENSemanticParser("models/lightswitch_model.yaml"))
 
         Using.resource(new NCModelClient(mdl)) { client =>
             val res = client.ask("Lights on at second floor kitchen", null, 
"userId")
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala
index 2650993..31e4cf5 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala
@@ -59,9 +59,7 @@ class NCModelPingPongSpec:
             def onOther(im: NCIntentMatch, @NCIntentTerm("other") other: 
NCEntity): NCResult =
                 R(ASK_RESULT, s"Some request by: ${other.mkText()}")
 
-    MDL.getPipeline.getEntityParsers.add(
-        NCTestUtils.mkENSemanticParser(Seq(STE("command"), STE("confirm"), 
STE("other")).asJava)
-    )
+    
MDL.getPipeline.getEntityParsers.add(NCTestUtils.mkENSemanticParser(Seq(STE("command"),
 STE("confirm"), STE("other")).asJava))
 
     @BeforeEach
     def setUp(): Unit = client = new NCModelClient(MDL)
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala
index 86ff1b5..bb78d4d 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala
@@ -41,11 +41,9 @@ class NCModelPipelineManagerSpec:
     @Test
     def test(): Unit =
         def test(txt: String, variantCnt: Int, elements: NCSemanticElement*): 
Unit =
-            val pipeline = EN_PIPELINE.clone()
+            val pipeline = mkEnPipeline
 
-            val parser = NCTestUtils.mkENSemanticParser(elements.asJava)
-            pipeline.getEntityParsers.clear()
-            pipeline.getEntityParsers.add(parser)
+            
pipeline.getEntityParsers.add(NCTestUtils.mkENSemanticParser(elements.asJava))
 
             val res = new NCModelPipelineManager(CFG, pipeline).prepare(txt, 
null, "userId")
 
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/scan/NCTestModelJava.java
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/scan/NCTestModelJava.java
index 42dc72f..2289709 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/scan/NCTestModelJava.java
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/scan/NCTestModelJava.java
@@ -40,7 +40,7 @@ public class NCTestModelJava {
      */
     public static NCModel mkModel() {
         return
-            new NCModelAdapter(NCTestConfigJava.CFG, 
NCTestConfigJava.EN_PIPELINE) {
+            new NCModelAdapter(NCTestConfigJava.CFG, 
NCTestConfigJava.mkEnPipeline()) {
                 @NCIntent(
                     "intent=locInt term(single)~{# == 'id1'} term(list)~{# == 
'id2'}[0,10] term(opt)~{# == 'id3'}?"
                 )
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCNLPEntityParserSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCNLPEntityParserSpec.scala
index 4ace4ac..b652b9b 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCNLPEntityParserSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCNLPEntityParserSpec.scala
@@ -41,7 +41,7 @@ class NCNLPEntityParserSpec:
     @Test
     def test(): Unit =
         val req = NCTestRequest("I had the lunch")
-        val toks = EN_PIPELINE.getTokenParser.tokenize(req.txt)
+        val toks = EN_TOK_PARSER.tokenize(req.txt)
         val entities = parser.parse(req, CFG, toks).asScala.toSeq
 
         NCTestUtils.printEntities(req.txt, entities)
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCOpenNLPEntityParserSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCOpenNLPEntityParserSpec.scala
index 1ab256c..cc15bca 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCOpenNLPEntityParserSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCOpenNLPEntityParserSpec.scala
@@ -33,16 +33,20 @@ import scala.jdk.OptionConverters.RichOptional
   *
   */
 class NCOpenNLPEntityParserSpec:
-    private val parser = new NCOpenNLPEntityParser(
-        Seq(
-            "opennlp/en-ner-location.bin",
-            "opennlp/en-ner-money.bin",
-            "opennlp/en-ner-person.bin",
-            "opennlp/en-ner-organization.bin",
-            "opennlp/en-ner-date.bin",
-            "opennlp/en-ner-percentage.bin"
-        ).map(NCResourceReader.getPath).asJava
-    )
+    private val parser =
+        val list = new java.util.concurrent.CopyOnWriteArrayList[String]()
+
+        NCUtils.execPar(
+            Seq(
+                "opennlp/en-ner-location.bin",
+                "opennlp/en-ner-money.bin",
+                "opennlp/en-ner-person.bin",
+                "opennlp/en-ner-organization.bin",
+                "opennlp/en-ner-date.bin",
+                "opennlp/en-ner-percentage.bin"
+            ).map(p => () => 
list.add(NCResourceReader.getPath(p)))*)(ExecutionContext.Implicits.global)
+
+        new NCOpenNLPEntityParser(list)
 
     /**
       *
@@ -51,7 +55,7 @@ class NCOpenNLPEntityParserSpec:
       */
     private def check(txt: String, expected: String): Unit =
         val req = NCTestRequest(txt)
-        val toks = EN_PIPELINE.getTokenParser.tokenize(txt)
+        val toks = EN_TOK_PARSER.tokenize(txt)
         val ents = parser.parse(req, CFG, toks).asScala.toSeq
 
         NCTestUtils.printEntities(txt, ents)
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala
index e588f4a..b11bcf5 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala
@@ -34,7 +34,7 @@ import scala.jdk.OptionConverters.RichOptional
   *
   */
 class NCSemanticEntityParserJsonSpec:
-    private val parser = 
NCTestUtils.mkENSemanticParser("models/alarm_model.json")
+    private val semParser = 
NCTestUtils.mkENSemanticParser("models/alarm_model.json")
 
     /**
       * 
@@ -44,11 +44,7 @@ class NCSemanticEntityParserJsonSpec:
       */
     private def check(txt: String, id: String, elemData: Option[Map[String, 
Any]] = None): Unit =
         val req = NCTestRequest(txt)
-        val ents = parser.parse(
-            req,
-            CFG,
-            EN_PIPELINE.getTokenParser.tokenize(req.txt)
-        ).asScala.toSeq
+        val ents = semParser.parse(req, CFG, 
EN_TOK_PARSER.tokenize(req.txt)).asScala.toSeq
 
         NCTestUtils.printEntities(txt, ents)
 
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala
index d3e15b2..73f0fd8 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala
@@ -35,12 +35,6 @@ import scala.jdk.CollectionConverters.*
   *
   */
 class NCSemanticEntityParserLemmaSpec:
-    private val lemmaTokEnricher = new NCOpenNLPLemmaPosTokenEnricher(
-        NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
-        NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
-    )
-    private val swTokEnricher = new NCEnStopWordsTokenEnricher
-    private val tokParser = new 
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))
     private val lemmaStemmer =
         new NCSemanticStemmer():
             override def stem(txt: String): String = if wrapped(txt) then 
unwrap(txt) else UUID.randomUUID().toString
@@ -64,15 +58,15 @@ class NCSemanticEntityParserLemmaSpec:
         val mgr = new NCModelPipelineManager(
             CFG,
             new NCModelPipelineBuilder().
-                withTokenParser(tokParser).
-                withTokenEnricher(lemmaTokEnricher).
-                withTokenEnricher(swTokEnricher).
+                withTokenParser(EN_TOK_PARSER).
+                withTokenEnricher(EN_TOK_LEMMA_POS_ENRICHER).
+                withTokenEnricher(EN_TOK_STOP_ENRICHER).
                 // 1. Wraps lemmas.
                 withTokenEnricher((req: NCRequest, cfg: NCModelConfig, toks: 
JList[NCToken]) =>
                     toks.forEach(t => t.put("lemma", 
wrap(t.get[String]("lemma"))))
                 ).
                 // 2. Semantic parser with fixed stemmer which stems only 
lemmas.
-                withEntityParser(new NCSemanticEntityParser(lemmaStemmer, 
tokParser, elems.asJava)).
+                withEntityParser(new NCSemanticEntityParser(lemmaStemmer, 
EN_TOK_PARSER, elems.asJava)).
                 build()
         )
 
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
index 4c97e1f..d298673 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala
@@ -35,7 +35,7 @@ import scala.jdk.OptionConverters.RichOptional
   */
 class NCSemanticEntityParserSpec:
     import NCSemanticTestElement as E
-    private val parser =
+    private val semParser =
         NCTestUtils.mkENSemanticParser(
             Seq(
                 // Standard.
@@ -55,12 +55,6 @@ class NCSemanticEntityParserSpec:
             ).asJava
         )
 
-    private val stopWordsEnricher = new NCEnStopWordsTokenEnricher()
-    private val lemmaPosEnricher = new NCOpenNLPLemmaPosTokenEnricher(
-        NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
-        NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
-    )
-
     /**
       *
       * @param txt
@@ -70,14 +64,14 @@ class NCSemanticEntityParserSpec:
       */
     private def check(txt: String, id: String, value: Option[String] = None, 
elemData: Option[Map[String, Any]] = None): Unit =
         val req = NCTestRequest(txt)
-        val toks = EN_PIPELINE.getTokenParser.tokenize(txt)
+        val toks = EN_TOK_PARSER.tokenize(txt)
 
-        lemmaPosEnricher.enrich(req, CFG, toks)
-        stopWordsEnricher.enrich(req, CFG, toks)
+        EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toks)
+        EN_TOK_STOP_ENRICHER.enrich(req, CFG, toks)
 
         NCTestUtils.printTokens(toks.asScala.toSeq)
 
-        val ents = parser.parse(req, CFG, toks).asScala.toSeq
+        val ents = semParser.parse(req, CFG, toks).asScala.toSeq
 
         NCTestUtils.printEntities(txt, ents)
         require(ents.sizeIs == 1)
@@ -99,14 +93,14 @@ class NCSemanticEntityParserSpec:
       */
     private def checkMultiple(txt: String, ids: String*): Unit =
         val req = NCTestRequest(txt)
-        val toks = EN_PIPELINE.getTokenParser.tokenize(txt)
+        val toks = EN_TOK_PARSER.tokenize(txt)
 
-        lemmaPosEnricher.enrich(req, CFG, toks)
-        stopWordsEnricher.enrich(req, CFG, toks)
+        EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toks)
+        EN_TOK_STOP_ENRICHER.enrich(req, CFG, toks)
 
         NCTestUtils.printTokens(toks.asScala.toSeq)
 
-        val ents = parser.parse(req, CFG, toks).asScala.toSeq
+        val ents = semParser.parse(req, CFG, toks).asScala.toSeq
 
         NCTestUtils.printEntities(txt, ents)
         require(ents.sizeIs == ids.size)
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala
index 9a1df76..3a4702c 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala
@@ -32,7 +32,7 @@ import scala.jdk.OptionConverters.RichOptional
   *
   */
 class NCSemanticEntityParserYamlSpec:
-    private val parser = 
NCTestUtils.mkENSemanticParser("models/lightswitch_model.yaml")
+    private val semParser = 
NCTestUtils.mkENSemanticParser("models/lightswitch_model.yaml")
 
     /**
       * 
@@ -41,11 +41,7 @@ class NCSemanticEntityParserYamlSpec:
       */
     private def check(txt: String, id: String): Unit =
         val req = NCTestRequest(txt)
-        val ents = parser.parse(
-            req,
-            CFG,
-            EN_PIPELINE.getTokenParser.tokenize(req.txt)
-        ).asScala.toSeq
+        val ents = semParser.parse(req, CFG, 
EN_TOK_PARSER.tokenize(req.txt)).asScala.toSeq
 
         NCTestUtils.printEntities(txt, ents)
 
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCBracketsTokenEnricherSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCBracketsTokenEnricherSpec.scala
index e74c656..9b07783 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCBracketsTokenEnricherSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCBracketsTokenEnricherSpec.scala
@@ -36,7 +36,7 @@ class NCBracketsTokenEnricherSpec:
       * @param brackets
       */
     private def check(txt: String, brackets: Set[Integer]): Unit =
-        val toks = EN_PIPELINE.getTokenParser.tokenize(txt)
+        val toks = EN_TOK_PARSER.tokenize(txt)
         enricher.enrich(NCTestRequest(txt), CFG, toks)
         val seq = toks.asScala.toSeq
         
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCDictionaryTokenEnricherSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCDictionaryTokenEnricherSpec.scala
index 244690b..43c52e7 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCDictionaryTokenEnricherSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCDictionaryTokenEnricherSpec.scala
@@ -30,23 +30,17 @@ import scala.jdk.CollectionConverters.*
 class NCDictionaryTokenEnricherSpec:
     private val dictEnricher = new NCEnDictionaryTokenEnricher()
 
-    private val lemmaPosEnricher =
-        new NCOpenNLPLemmaPosTokenEnricher(
-            NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
-            NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
-        )
-
     @Test
     def test(): Unit =
         val txt = "milk XYZ"
-        val toks = EN_PIPELINE.getTokenParser.tokenize(txt).asScala.toSeq
+        val toks = EN_TOK_PARSER.tokenize(txt).asScala.toSeq
 
         require(toks.head.getOpt[Boolean]("dict:en").isEmpty)
         require(toks.last.getOpt[Boolean]("dict:en").isEmpty)
 
         val req = NCTestRequest(txt)
 
-        lemmaPosEnricher.enrich(req, CFG, toks.asJava)
+        EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toks.asJava)
         dictEnricher.enrich(req, CFG, toks.asJava)
         NCTestUtils.printTokens(toks)
 
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCQuotesTokenEnricherSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCQuotesTokenEnricherSpec.scala
index 700bfa7..0f6dbe6 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCQuotesTokenEnricherSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCQuotesTokenEnricherSpec.scala
@@ -29,11 +29,6 @@ import scala.jdk.CollectionConverters.*
   *
   */
 class NCQuotesTokenEnricherSpec:
-    private val lemmaPosEnricher = 
-        new NCOpenNLPLemmaPosTokenEnricher(
-            NCResourceReader.getPath("opennlp/en-pos-maxent.bin"), 
-            NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
-        )
     private val quoteEnricher = new NCEnQuotesTokenEnricher
 
     /**
@@ -42,11 +37,11 @@ class NCQuotesTokenEnricherSpec:
       * @param quotes
       */
     private def check(txt: String, quotes: Set[Integer]): Unit =
-        val toks = EN_PIPELINE.getTokenParser.tokenize(txt)
+        val toks = EN_TOK_PARSER.tokenize(txt)
         val toksSeq = toks.asScala.toSeq
 
         val req = NCTestRequest(txt)
-        lemmaPosEnricher.enrich(req, CFG, toks)
+        EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toks)
         quoteEnricher.enrich(req, CFG, toks)
         
         NCTestUtils.printTokens(toksSeq)
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCStopWordsEnricherSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCStopWordsEnricherSpec.scala
index 517e30a..8bdd11a 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCStopWordsEnricherSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCStopWordsEnricherSpec.scala
@@ -30,11 +30,6 @@ import scala.jdk.CollectionConverters.*
   *
   */
 class NCStopWordsEnricherSpec:
-    private val lemmaPosEnricher = new NCOpenNLPLemmaPosTokenEnricher(
-        NCResourceReader.getPath("opennlp/en-pos-maxent.bin"), 
-        NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
-    )
-
     /**
       *
       * @param stopEnricher
@@ -42,7 +37,7 @@ class NCStopWordsEnricherSpec:
       * @param boolVals
       */
     private def test(stopEnricher: NCEnStopWordsTokenEnricher, txt: String, 
boolVals: Boolean*): Unit =
-        val toksList = EN_PIPELINE.getTokenParser.tokenize(txt)
+        val toksList = EN_TOK_PARSER.tokenize(txt)
         require(toksList.size == boolVals.size)
         val toks = toksList.asScala.toSeq
 
@@ -50,7 +45,7 @@ class NCStopWordsEnricherSpec:
 
         val req = NCTestRequest(txt)
 
-        lemmaPosEnricher.enrich(req, CFG, toksList)
+        EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toksList)
         stopEnricher.enrich(req, CFG, toksList)
 
         NCTestUtils.printTokens(toks)
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCSwearWordsTokenEnricherSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCSwearWordsTokenEnricherSpec.scala
index 3493dcc..35ada43 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCSwearWordsTokenEnricherSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCSwearWordsTokenEnricherSpec.scala
@@ -33,7 +33,7 @@ class NCSwearWordsTokenEnricherSpec:
 
     @Test
     def test(): Unit =
-        val toks = EN_PIPELINE.getTokenParser.tokenize("english 
ass").asScala.toSeq
+        val toks = EN_TOK_PARSER.tokenize("english ass").asScala.toSeq
 
         require(toks.head.getOpt[Boolean]("swear:en").isEmpty)
         require(toks.last.getOpt[Boolean]("swear:en").isEmpty)
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCStopWordsImplSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCStopWordsImplSpec.scala
index 7bdf7c4..50b7dfb 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCStopWordsImplSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCStopWordsImplSpec.scala
@@ -37,7 +37,7 @@ class NCStopWordsImplSpec:
       * @param expected
       */
     private def test0(words: Seq[W], expected: Seq[String]): Unit =
-        val toksList = 
EN_PIPELINE.getTokenParser.tokenize(words.map(_.text).mkString(" "))
+        val toksList = EN_TOK_PARSER.tokenize(words.map(_.text).mkString(" "))
         require(toksList.size == words.size)
         val toks = toksList.asScala
 
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/NCOpenNLPTokenParserSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/NCOpenNLPTokenParserSpec.scala
index d022350..9470db7 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/NCOpenNLPTokenParserSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/NCOpenNLPTokenParserSpec.scala
@@ -31,22 +31,15 @@ import scala.jdk.CollectionConverters.*
   *
   */
 class NCOpenNLPTokenParserSpec:
-    private val lemmaPosEnricher =
-        new NCOpenNLPLemmaPosTokenEnricher(
-            NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
-            NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
-        )
-    private val stopEnricher = new NCEnStopWordsTokenEnricher(null, null)
-
     private def isStopWord(t: NCToken): Boolean = t.get[Boolean]("stopword")
 
     private def test(txt: String, validate: Seq[NCToken] => _): Unit =
-        val toksList = EN_PIPELINE.getTokenParser.tokenize(txt)
+        val toksList = EN_TOK_PARSER.tokenize(txt)
 
         val req = NCTestRequest(txt)
 
-        lemmaPosEnricher.enrich(req, CFG, toksList)
-        stopEnricher.enrich(req, CFG, toksList)
+        EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toksList)
+        EN_TOK_STOP_ENRICHER.enrich(req, CFG, toksList)
 
         val toks = toksList.asScala.toSeq
         assert(toks.nonEmpty)
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
index bb0dfdf..bd919b3 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala
@@ -21,12 +21,8 @@ import org.apache.nlpcraft.NCModelConfig
 import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser
 import org.apache.nlpcraft.nlp.util.*
 
-/**
-  *
-  */
 final val CFG = NCTestConfigJava.CFG
-
-/**
-  *
-  */
-final val EN_PIPELINE = NCTestConfigJava.EN_PIPELINE
+final val EN_TOK_PARSER = NCTestConfigJava.EN_TOK_PARSER
+final val EN_TOK_STOP_ENRICHER = NCTestConfigJava.EN_TOK_STOP_ENRICHER
+final val EN_TOK_LEMMA_POS_ENRICHER = 
NCTestConfigJava.EN_TOK_LEMMA_POS_ENRICHER
+final def mkEnPipeline = NCTestConfigJava.mkEnPipeline()
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfigJava.java 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfigJava.java
index c75545d..bc9f79a 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfigJava.java
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfigJava.java
@@ -20,18 +20,20 @@ package org.apache.nlpcraft.nlp.util;
 import org.apache.nlpcraft.NCModelConfig;
 import org.apache.nlpcraft.internal.util.NCResourceReader;
 import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser;
+import org.apache.nlpcraft.nlp.token.enricher.*;
 
 /**
  *
  */
 public class NCTestConfigJava {
-    /**
-     *
-     */
     public static final NCModelConfig CFG = new NCModelConfig("testId", 
"test", "1.0", "Test description", "Test origin");
-
-    /** */
-    public static final NCTestPipeline EN_PIPELINE = new NCTestPipeline(
-        new 
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"))
+    public static final NCOpenNLPTokenParser EN_TOK_PARSER = new 
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin"));
+    public static final NCEnStopWordsTokenEnricher EN_TOK_STOP_ENRICHER = new 
NCEnStopWordsTokenEnricher();
+    public static final NCOpenNLPLemmaPosTokenEnricher 
EN_TOK_LEMMA_POS_ENRICHER = new NCOpenNLPLemmaPosTokenEnricher(
+        NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
+        NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
     );
+    public static final NCTestPipeline mkEnPipeline() {
+        return new NCTestPipeline(EN_TOK_PARSER);
+    }
 }
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestModelAdapter.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestModelAdapter.scala
index f088130..8393aef 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestModelAdapter.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestModelAdapter.scala
@@ -24,5 +24,5 @@ import org.apache.nlpcraft.nlp.util.*
   *
   */
 abstract class NCTestModelAdapter extends NCModel:
-    override def getConfig: NCModelConfig = CFG
-    override def getPipeline: NCModelPipeline = EN_PIPELINE
\ No newline at end of file
+    override val getConfig: NCModelConfig = CFG
+    override val getPipeline: NCModelPipeline = mkEnPipeline
\ No newline at end of file
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
index cf5aed4..f8ada63 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala
@@ -134,7 +134,7 @@ object NCTestUtils:
             new NCSemanticStemmer():
                 override def stem(txt: String): String = 
s.stem(txt.toLowerCase)
             ,
-            new 
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")),
+            EN_TOK_PARSER,
             macros,
             elms
         )
@@ -151,6 +151,6 @@ object NCTestUtils:
             new NCSemanticStemmer():
                 override def stem(txt: String): String = 
s.stem(txt.toLowerCase)
             ,
-            new 
NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")),
+            EN_TOK_PARSER,
             src
         )

[incubator-nlpcraft] branch master updated: Tests infrastructure improved.

Reply via email to