This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push: new c28f449 Tests infrastructure improved. c28f449 is described below commit c28f44965492ae2d7dd37c7b78ed727ea6f3b09e Author: Sergey Kamov <skhdlem...@gmail.com> AuthorDate: Thu Mar 17 11:20:23 2022 +0300 Tests infrastructure improved. --- .../parser/stanford/NCStanfordNLPTokenParser.java | 3 ++- .../stanford/NCStanfordNLPEntityParserSpec.scala | 2 +- .../stanford/NCStanfordNLPTokenParserSpec.scala | 3 +-- .../nlpcraft/nlp/util/stanford/NCTestConfig.scala | 5 +---- .../nlpcraft/internal/impl/NCModelClientSpec.scala | 4 +--- .../internal/impl/NCModelPingPongSpec.scala | 4 +--- .../internal/impl/NCModelPipelineManagerSpec.scala | 6 ++--- .../internal/impl/scan/NCTestModelJava.java | 2 +- .../nlp/entity/parser/NCNLPEntityParserSpec.scala | 2 +- .../entity/parser/NCOpenNLPEntityParserSpec.scala | 26 +++++++++++++--------- .../semantic/NCSemanticEntityParserJsonSpec.scala | 8 ++----- .../semantic/NCSemanticEntityParserLemmaSpec.scala | 14 ++++-------- .../semantic/NCSemanticEntityParserSpec.scala | 24 ++++++++------------ .../semantic/NCSemanticEntityParserYamlSpec.scala | 8 ++----- .../enricher/NCBracketsTokenEnricherSpec.scala | 2 +- .../enricher/NCDictionaryTokenEnricherSpec.scala | 10 ++------- .../token/enricher/NCQuotesTokenEnricherSpec.scala | 9 ++------ .../token/enricher/NCStopWordsEnricherSpec.scala | 9 ++------ .../enricher/NCSwearWordsTokenEnricherSpec.scala | 2 +- .../token/enricher/impl/NCStopWordsImplSpec.scala | 2 +- .../token/parser/NCOpenNLPTokenParserSpec.scala | 13 +++-------- .../apache/nlpcraft/nlp/util/NCTestConfig.scala | 12 ++++------ .../apache/nlpcraft/nlp/util/NCTestConfigJava.java | 16 +++++++------ .../nlpcraft/nlp/util/NCTestModelAdapter.scala | 4 ++-- .../org/apache/nlpcraft/nlp/util/NCTestUtils.scala | 4 ++-- 25 files changed, 72 insertions(+), 122 deletions(-) diff --git a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.java b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.java index 747ac3e..78cd92e 100644 --- a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.java +++ b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParser.java @@ -26,7 +26,8 @@ import java.util.List; import java.util.Objects; /** - * + * TODO: + * Note that this rokenizer also addes 2 properties into parsed token: lemma and pos */ public class NCStanfordNLPTokenParser implements NCTokenParser { private final NCStanfordNLPTokenParserImpl impl; diff --git a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala index 49caefa..13eeabd 100644 --- a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala +++ b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/stanford/NCStanfordNLPEntityParserSpec.scala @@ -34,7 +34,7 @@ class NCStanfordNLPEntityParserSpec: def test(): Unit = val txt = "Los Angeles, 23 August, 23 and sergeyka...@apache.org, tomorrow" - val toks = EN_STANFORD_PIPELINE.getTokenParser.tokenize(txt) + val toks = TOK_STANFORD_PARSER.tokenize(txt) NCTestUtils.printTokens(toks.asScala.toSeq) val res = parser.parse(NCTestRequest(txt), CFG, toks) diff --git a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParserSpec.scala b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParserSpec.scala index 204e0c5..157d7d2 100644 --- a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParserSpec.scala +++ b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordNLPTokenParserSpec.scala @@ -31,8 +31,7 @@ import scala.jdk.CollectionConverters.* class NCStanfordNLPTokenParserSpec: @Test def test(): Unit = - val toks = - EN_STANFORD_PIPELINE.getTokenParser.tokenize("I had a lunch with brand names 'AAA'").asScala.toSeq + val toks = TOK_STANFORD_PARSER.tokenize("I had a lunch with brand names 'AAA'").asScala.toSeq require(toks.sizeIs > 1) NCTestUtils.printTokens(toks) diff --git a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/util/stanford/NCTestConfig.scala b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/util/stanford/NCTestConfig.scala index 4e5f644..ea80fdc 100644 --- a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/util/stanford/NCTestConfig.scala +++ b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/util/stanford/NCTestConfig.scala @@ -17,7 +17,4 @@ final val STANFORD = props.setProperty("annotators", "tokenize, ssplit, pos, lemma, ner") new StanfordCoreNLP(props) -/** - * - */ -final val EN_STANFORD_PIPELINE = NCTestPipeline(new NCStanfordNLPTokenParser(STANFORD)) +final val TOK_STANFORD_PARSER = new NCStanfordNLPTokenParser(STANFORD) \ No newline at end of file diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala index eb3df69..5123c84 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelClientSpec.scala @@ -28,9 +28,7 @@ import scala.util.Using class NCModelClientSpec: private def test0(mdl: NCTestModelAdapter): Unit = - mdl.getPipeline.getEntityParsers.add( - NCTestUtils.mkENSemanticParser("models/lightswitch_model.yaml") - ) + mdl.getPipeline.getEntityParsers.add(NCTestUtils.mkENSemanticParser("models/lightswitch_model.yaml")) Using.resource(new NCModelClient(mdl)) { client => val res = client.ask("Lights on at second floor kitchen", null, "userId") diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala index 2650993..31e4cf5 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPingPongSpec.scala @@ -59,9 +59,7 @@ class NCModelPingPongSpec: def onOther(im: NCIntentMatch, @NCIntentTerm("other") other: NCEntity): NCResult = R(ASK_RESULT, s"Some request by: ${other.mkText()}") - MDL.getPipeline.getEntityParsers.add( - NCTestUtils.mkENSemanticParser(Seq(STE("command"), STE("confirm"), STE("other")).asJava) - ) + MDL.getPipeline.getEntityParsers.add(NCTestUtils.mkENSemanticParser(Seq(STE("command"), STE("confirm"), STE("other")).asJava)) @BeforeEach def setUp(): Unit = client = new NCModelClient(MDL) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala index 86ff1b5..bb78d4d 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManagerSpec.scala @@ -41,11 +41,9 @@ class NCModelPipelineManagerSpec: @Test def test(): Unit = def test(txt: String, variantCnt: Int, elements: NCSemanticElement*): Unit = - val pipeline = EN_PIPELINE.clone() + val pipeline = mkEnPipeline - val parser = NCTestUtils.mkENSemanticParser(elements.asJava) - pipeline.getEntityParsers.clear() - pipeline.getEntityParsers.add(parser) + pipeline.getEntityParsers.add(NCTestUtils.mkENSemanticParser(elements.asJava)) val res = new NCModelPipelineManager(CFG, pipeline).prepare(txt, null, "userId") diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/scan/NCTestModelJava.java b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/scan/NCTestModelJava.java index 42dc72f..2289709 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/scan/NCTestModelJava.java +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/impl/scan/NCTestModelJava.java @@ -40,7 +40,7 @@ public class NCTestModelJava { */ public static NCModel mkModel() { return - new NCModelAdapter(NCTestConfigJava.CFG, NCTestConfigJava.EN_PIPELINE) { + new NCModelAdapter(NCTestConfigJava.CFG, NCTestConfigJava.mkEnPipeline()) { @NCIntent( "intent=locInt term(single)~{# == 'id1'} term(list)~{# == 'id2'}[0,10] term(opt)~{# == 'id3'}?" ) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCNLPEntityParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCNLPEntityParserSpec.scala index 4ace4ac..b652b9b 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCNLPEntityParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCNLPEntityParserSpec.scala @@ -41,7 +41,7 @@ class NCNLPEntityParserSpec: @Test def test(): Unit = val req = NCTestRequest("I had the lunch") - val toks = EN_PIPELINE.getTokenParser.tokenize(req.txt) + val toks = EN_TOK_PARSER.tokenize(req.txt) val entities = parser.parse(req, CFG, toks).asScala.toSeq NCTestUtils.printEntities(req.txt, entities) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCOpenNLPEntityParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCOpenNLPEntityParserSpec.scala index 1ab256c..cc15bca 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCOpenNLPEntityParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/NCOpenNLPEntityParserSpec.scala @@ -33,16 +33,20 @@ import scala.jdk.OptionConverters.RichOptional * */ class NCOpenNLPEntityParserSpec: - private val parser = new NCOpenNLPEntityParser( - Seq( - "opennlp/en-ner-location.bin", - "opennlp/en-ner-money.bin", - "opennlp/en-ner-person.bin", - "opennlp/en-ner-organization.bin", - "opennlp/en-ner-date.bin", - "opennlp/en-ner-percentage.bin" - ).map(NCResourceReader.getPath).asJava - ) + private val parser = + val list = new java.util.concurrent.CopyOnWriteArrayList[String]() + + NCUtils.execPar( + Seq( + "opennlp/en-ner-location.bin", + "opennlp/en-ner-money.bin", + "opennlp/en-ner-person.bin", + "opennlp/en-ner-organization.bin", + "opennlp/en-ner-date.bin", + "opennlp/en-ner-percentage.bin" + ).map(p => () => list.add(NCResourceReader.getPath(p)))*)(ExecutionContext.Implicits.global) + + new NCOpenNLPEntityParser(list) /** * @@ -51,7 +55,7 @@ class NCOpenNLPEntityParserSpec: */ private def check(txt: String, expected: String): Unit = val req = NCTestRequest(txt) - val toks = EN_PIPELINE.getTokenParser.tokenize(txt) + val toks = EN_TOK_PARSER.tokenize(txt) val ents = parser.parse(req, CFG, toks).asScala.toSeq NCTestUtils.printEntities(txt, ents) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala index e588f4a..b11bcf5 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala @@ -34,7 +34,7 @@ import scala.jdk.OptionConverters.RichOptional * */ class NCSemanticEntityParserJsonSpec: - private val parser = NCTestUtils.mkENSemanticParser("models/alarm_model.json") + private val semParser = NCTestUtils.mkENSemanticParser("models/alarm_model.json") /** * @@ -44,11 +44,7 @@ class NCSemanticEntityParserJsonSpec: */ private def check(txt: String, id: String, elemData: Option[Map[String, Any]] = None): Unit = val req = NCTestRequest(txt) - val ents = parser.parse( - req, - CFG, - EN_PIPELINE.getTokenParser.tokenize(req.txt) - ).asScala.toSeq + val ents = semParser.parse(req, CFG, EN_TOK_PARSER.tokenize(req.txt)).asScala.toSeq NCTestUtils.printEntities(txt, ents) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala index d3e15b2..73f0fd8 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserLemmaSpec.scala @@ -35,12 +35,6 @@ import scala.jdk.CollectionConverters.* * */ class NCSemanticEntityParserLemmaSpec: - private val lemmaTokEnricher = new NCOpenNLPLemmaPosTokenEnricher( - NCResourceReader.getPath("opennlp/en-pos-maxent.bin"), - NCResourceReader.getPath("opennlp/en-lemmatizer.dict") - ) - private val swTokEnricher = new NCEnStopWordsTokenEnricher - private val tokParser = new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")) private val lemmaStemmer = new NCSemanticStemmer(): override def stem(txt: String): String = if wrapped(txt) then unwrap(txt) else UUID.randomUUID().toString @@ -64,15 +58,15 @@ class NCSemanticEntityParserLemmaSpec: val mgr = new NCModelPipelineManager( CFG, new NCModelPipelineBuilder(). - withTokenParser(tokParser). - withTokenEnricher(lemmaTokEnricher). - withTokenEnricher(swTokEnricher). + withTokenParser(EN_TOK_PARSER). + withTokenEnricher(EN_TOK_LEMMA_POS_ENRICHER). + withTokenEnricher(EN_TOK_STOP_ENRICHER). // 1. Wraps lemmas. withTokenEnricher((req: NCRequest, cfg: NCModelConfig, toks: JList[NCToken]) => toks.forEach(t => t.put("lemma", wrap(t.get[String]("lemma")))) ). // 2. Semantic parser with fixed stemmer which stems only lemmas. - withEntityParser(new NCSemanticEntityParser(lemmaStemmer, tokParser, elems.asJava)). + withEntityParser(new NCSemanticEntityParser(lemmaStemmer, EN_TOK_PARSER, elems.asJava)). build() ) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala index 4c97e1f..d298673 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala @@ -35,7 +35,7 @@ import scala.jdk.OptionConverters.RichOptional */ class NCSemanticEntityParserSpec: import NCSemanticTestElement as E - private val parser = + private val semParser = NCTestUtils.mkENSemanticParser( Seq( // Standard. @@ -55,12 +55,6 @@ class NCSemanticEntityParserSpec: ).asJava ) - private val stopWordsEnricher = new NCEnStopWordsTokenEnricher() - private val lemmaPosEnricher = new NCOpenNLPLemmaPosTokenEnricher( - NCResourceReader.getPath("opennlp/en-pos-maxent.bin"), - NCResourceReader.getPath("opennlp/en-lemmatizer.dict") - ) - /** * * @param txt @@ -70,14 +64,14 @@ class NCSemanticEntityParserSpec: */ private def check(txt: String, id: String, value: Option[String] = None, elemData: Option[Map[String, Any]] = None): Unit = val req = NCTestRequest(txt) - val toks = EN_PIPELINE.getTokenParser.tokenize(txt) + val toks = EN_TOK_PARSER.tokenize(txt) - lemmaPosEnricher.enrich(req, CFG, toks) - stopWordsEnricher.enrich(req, CFG, toks) + EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toks) + EN_TOK_STOP_ENRICHER.enrich(req, CFG, toks) NCTestUtils.printTokens(toks.asScala.toSeq) - val ents = parser.parse(req, CFG, toks).asScala.toSeq + val ents = semParser.parse(req, CFG, toks).asScala.toSeq NCTestUtils.printEntities(txt, ents) require(ents.sizeIs == 1) @@ -99,14 +93,14 @@ class NCSemanticEntityParserSpec: */ private def checkMultiple(txt: String, ids: String*): Unit = val req = NCTestRequest(txt) - val toks = EN_PIPELINE.getTokenParser.tokenize(txt) + val toks = EN_TOK_PARSER.tokenize(txt) - lemmaPosEnricher.enrich(req, CFG, toks) - stopWordsEnricher.enrich(req, CFG, toks) + EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toks) + EN_TOK_STOP_ENRICHER.enrich(req, CFG, toks) NCTestUtils.printTokens(toks.asScala.toSeq) - val ents = parser.parse(req, CFG, toks).asScala.toSeq + val ents = semParser.parse(req, CFG, toks).asScala.toSeq NCTestUtils.printEntities(txt, ents) require(ents.sizeIs == ids.size) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala index 9a1df76..3a4702c 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserYamlSpec.scala @@ -32,7 +32,7 @@ import scala.jdk.OptionConverters.RichOptional * */ class NCSemanticEntityParserYamlSpec: - private val parser = NCTestUtils.mkENSemanticParser("models/lightswitch_model.yaml") + private val semParser = NCTestUtils.mkENSemanticParser("models/lightswitch_model.yaml") /** * @@ -41,11 +41,7 @@ class NCSemanticEntityParserYamlSpec: */ private def check(txt: String, id: String): Unit = val req = NCTestRequest(txt) - val ents = parser.parse( - req, - CFG, - EN_PIPELINE.getTokenParser.tokenize(req.txt) - ).asScala.toSeq + val ents = semParser.parse(req, CFG, EN_TOK_PARSER.tokenize(req.txt)).asScala.toSeq NCTestUtils.printEntities(txt, ents) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCBracketsTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCBracketsTokenEnricherSpec.scala index e74c656..9b07783 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCBracketsTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCBracketsTokenEnricherSpec.scala @@ -36,7 +36,7 @@ class NCBracketsTokenEnricherSpec: * @param brackets */ private def check(txt: String, brackets: Set[Integer]): Unit = - val toks = EN_PIPELINE.getTokenParser.tokenize(txt) + val toks = EN_TOK_PARSER.tokenize(txt) enricher.enrich(NCTestRequest(txt), CFG, toks) val seq = toks.asScala.toSeq diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCDictionaryTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCDictionaryTokenEnricherSpec.scala index 244690b..43c52e7 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCDictionaryTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCDictionaryTokenEnricherSpec.scala @@ -30,23 +30,17 @@ import scala.jdk.CollectionConverters.* class NCDictionaryTokenEnricherSpec: private val dictEnricher = new NCEnDictionaryTokenEnricher() - private val lemmaPosEnricher = - new NCOpenNLPLemmaPosTokenEnricher( - NCResourceReader.getPath("opennlp/en-pos-maxent.bin"), - NCResourceReader.getPath("opennlp/en-lemmatizer.dict") - ) - @Test def test(): Unit = val txt = "milk XYZ" - val toks = EN_PIPELINE.getTokenParser.tokenize(txt).asScala.toSeq + val toks = EN_TOK_PARSER.tokenize(txt).asScala.toSeq require(toks.head.getOpt[Boolean]("dict:en").isEmpty) require(toks.last.getOpt[Boolean]("dict:en").isEmpty) val req = NCTestRequest(txt) - lemmaPosEnricher.enrich(req, CFG, toks.asJava) + EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toks.asJava) dictEnricher.enrich(req, CFG, toks.asJava) NCTestUtils.printTokens(toks) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCQuotesTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCQuotesTokenEnricherSpec.scala index 700bfa7..0f6dbe6 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCQuotesTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCQuotesTokenEnricherSpec.scala @@ -29,11 +29,6 @@ import scala.jdk.CollectionConverters.* * */ class NCQuotesTokenEnricherSpec: - private val lemmaPosEnricher = - new NCOpenNLPLemmaPosTokenEnricher( - NCResourceReader.getPath("opennlp/en-pos-maxent.bin"), - NCResourceReader.getPath("opennlp/en-lemmatizer.dict") - ) private val quoteEnricher = new NCEnQuotesTokenEnricher /** @@ -42,11 +37,11 @@ class NCQuotesTokenEnricherSpec: * @param quotes */ private def check(txt: String, quotes: Set[Integer]): Unit = - val toks = EN_PIPELINE.getTokenParser.tokenize(txt) + val toks = EN_TOK_PARSER.tokenize(txt) val toksSeq = toks.asScala.toSeq val req = NCTestRequest(txt) - lemmaPosEnricher.enrich(req, CFG, toks) + EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toks) quoteEnricher.enrich(req, CFG, toks) NCTestUtils.printTokens(toksSeq) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCStopWordsEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCStopWordsEnricherSpec.scala index 517e30a..8bdd11a 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCStopWordsEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCStopWordsEnricherSpec.scala @@ -30,11 +30,6 @@ import scala.jdk.CollectionConverters.* * */ class NCStopWordsEnricherSpec: - private val lemmaPosEnricher = new NCOpenNLPLemmaPosTokenEnricher( - NCResourceReader.getPath("opennlp/en-pos-maxent.bin"), - NCResourceReader.getPath("opennlp/en-lemmatizer.dict") - ) - /** * * @param stopEnricher @@ -42,7 +37,7 @@ class NCStopWordsEnricherSpec: * @param boolVals */ private def test(stopEnricher: NCEnStopWordsTokenEnricher, txt: String, boolVals: Boolean*): Unit = - val toksList = EN_PIPELINE.getTokenParser.tokenize(txt) + val toksList = EN_TOK_PARSER.tokenize(txt) require(toksList.size == boolVals.size) val toks = toksList.asScala.toSeq @@ -50,7 +45,7 @@ class NCStopWordsEnricherSpec: val req = NCTestRequest(txt) - lemmaPosEnricher.enrich(req, CFG, toksList) + EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toksList) stopEnricher.enrich(req, CFG, toksList) NCTestUtils.printTokens(toks) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCSwearWordsTokenEnricherSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCSwearWordsTokenEnricherSpec.scala index 3493dcc..35ada43 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCSwearWordsTokenEnricherSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/NCSwearWordsTokenEnricherSpec.scala @@ -33,7 +33,7 @@ class NCSwearWordsTokenEnricherSpec: @Test def test(): Unit = - val toks = EN_PIPELINE.getTokenParser.tokenize("english ass").asScala.toSeq + val toks = EN_TOK_PARSER.tokenize("english ass").asScala.toSeq require(toks.head.getOpt[Boolean]("swear:en").isEmpty) require(toks.last.getOpt[Boolean]("swear:en").isEmpty) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCStopWordsImplSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCStopWordsImplSpec.scala index 7bdf7c4..50b7dfb 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCStopWordsImplSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/enricher/impl/NCStopWordsImplSpec.scala @@ -37,7 +37,7 @@ class NCStopWordsImplSpec: * @param expected */ private def test0(words: Seq[W], expected: Seq[String]): Unit = - val toksList = EN_PIPELINE.getTokenParser.tokenize(words.map(_.text).mkString(" ")) + val toksList = EN_TOK_PARSER.tokenize(words.map(_.text).mkString(" ")) require(toksList.size == words.size) val toks = toksList.asScala diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/NCOpenNLPTokenParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/NCOpenNLPTokenParserSpec.scala index d022350..9470db7 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/NCOpenNLPTokenParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/NCOpenNLPTokenParserSpec.scala @@ -31,22 +31,15 @@ import scala.jdk.CollectionConverters.* * */ class NCOpenNLPTokenParserSpec: - private val lemmaPosEnricher = - new NCOpenNLPLemmaPosTokenEnricher( - NCResourceReader.getPath("opennlp/en-pos-maxent.bin"), - NCResourceReader.getPath("opennlp/en-lemmatizer.dict") - ) - private val stopEnricher = new NCEnStopWordsTokenEnricher(null, null) - private def isStopWord(t: NCToken): Boolean = t.get[Boolean]("stopword") private def test(txt: String, validate: Seq[NCToken] => _): Unit = - val toksList = EN_PIPELINE.getTokenParser.tokenize(txt) + val toksList = EN_TOK_PARSER.tokenize(txt) val req = NCTestRequest(txt) - lemmaPosEnricher.enrich(req, CFG, toksList) - stopEnricher.enrich(req, CFG, toksList) + EN_TOK_LEMMA_POS_ENRICHER.enrich(req, CFG, toksList) + EN_TOK_STOP_ENRICHER.enrich(req, CFG, toksList) val toks = toksList.asScala.toSeq assert(toks.nonEmpty) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala index bb0dfdf..bd919b3 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfig.scala @@ -21,12 +21,8 @@ import org.apache.nlpcraft.NCModelConfig import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser import org.apache.nlpcraft.nlp.util.* -/** - * - */ final val CFG = NCTestConfigJava.CFG - -/** - * - */ -final val EN_PIPELINE = NCTestConfigJava.EN_PIPELINE +final val EN_TOK_PARSER = NCTestConfigJava.EN_TOK_PARSER +final val EN_TOK_STOP_ENRICHER = NCTestConfigJava.EN_TOK_STOP_ENRICHER +final val EN_TOK_LEMMA_POS_ENRICHER = NCTestConfigJava.EN_TOK_LEMMA_POS_ENRICHER +final def mkEnPipeline = NCTestConfigJava.mkEnPipeline() diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfigJava.java b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfigJava.java index c75545d..bc9f79a 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfigJava.java +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestConfigJava.java @@ -20,18 +20,20 @@ package org.apache.nlpcraft.nlp.util; import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.internal.util.NCResourceReader; import org.apache.nlpcraft.nlp.token.parser.NCOpenNLPTokenParser; +import org.apache.nlpcraft.nlp.token.enricher.*; /** * */ public class NCTestConfigJava { - /** - * - */ public static final NCModelConfig CFG = new NCModelConfig("testId", "test", "1.0", "Test description", "Test origin"); - - /** */ - public static final NCTestPipeline EN_PIPELINE = new NCTestPipeline( - new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")) + public static final NCOpenNLPTokenParser EN_TOK_PARSER = new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")); + public static final NCEnStopWordsTokenEnricher EN_TOK_STOP_ENRICHER = new NCEnStopWordsTokenEnricher(); + public static final NCOpenNLPLemmaPosTokenEnricher EN_TOK_LEMMA_POS_ENRICHER = new NCOpenNLPLemmaPosTokenEnricher( + NCResourceReader.getPath("opennlp/en-pos-maxent.bin"), + NCResourceReader.getPath("opennlp/en-lemmatizer.dict") ); + public static final NCTestPipeline mkEnPipeline() { + return new NCTestPipeline(EN_TOK_PARSER); + } } diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestModelAdapter.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestModelAdapter.scala index f088130..8393aef 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestModelAdapter.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestModelAdapter.scala @@ -24,5 +24,5 @@ import org.apache.nlpcraft.nlp.util.* * */ abstract class NCTestModelAdapter extends NCModel: - override def getConfig: NCModelConfig = CFG - override def getPipeline: NCModelPipeline = EN_PIPELINE \ No newline at end of file + override val getConfig: NCModelConfig = CFG + override val getPipeline: NCModelPipeline = mkEnPipeline \ No newline at end of file diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala index cf5aed4..f8ada63 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestUtils.scala @@ -134,7 +134,7 @@ object NCTestUtils: new NCSemanticStemmer(): override def stem(txt: String): String = s.stem(txt.toLowerCase) , - new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")), + EN_TOK_PARSER, macros, elms ) @@ -151,6 +151,6 @@ object NCTestUtils: new NCSemanticStemmer(): override def stem(txt: String): String = s.stem(txt.toLowerCase) , - new NCOpenNLPTokenParser(NCResourceReader.getPath("opennlp/en-token.bin")), + EN_TOK_PARSER, src )