This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-472 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push: new c8bb1b2 WIP. c8bb1b2 is described below commit c8bb1b25a74d398274ab302c08ebc90d827f9dd6 Author: Sergey Kamov <skhdlem...@gmail.com> AuthorDate: Wed Jan 12 11:22:02 2022 +0300 WIP. --- .../nlp/token/parser/stanford/NCStanfordTokenParser.java | 2 -- .../token/parser/stanford/NCStanfordTokenParserSpec.scala | 4 ++-- nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntity.java | 2 -- nlpcraft/src/main/scala/org/apache/nlpcraft/NCModel.java | 2 -- .../src/main/scala/org/apache/nlpcraft/NCModelClient.java | 1 - .../org/apache/nlpcraft/internal/NCPipelineProcessor.scala | 6 +++--- .../org/apache/nlpcraft/internal/ascii/NCAsciiTable.scala | 2 +- .../parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala | 4 ++-- .../nlp/token/enricher/en/impl/NCStopWordsImpl.scala | 12 ++++++------ .../apache/nlpcraft/internal/NCPipelineProcessorSpec.scala | 2 +- .../nlp/entity/parser/nlp/NCNlpEntityParserSpec.scala | 2 +- .../entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala | 2 +- .../parser/semantic/NCSemanticEntityParserJsonSpec.scala | 2 +- .../entity/parser/semantic/NCSemanticEntityParserSpec.scala | 10 +++++----- .../nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala | 8 ++++---- 15 files changed, 27 insertions(+), 34 deletions(-) diff --git a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParser.java b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParser.java index 0b9ebf5..6896c8b 100644 --- a/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParser.java +++ b/nlpcraft-stanford/src/main/java/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParser.java @@ -18,8 +18,6 @@ package org.apache.nlpcraft.nlp.token.parser.stanford; import edu.stanford.nlp.pipeline.StanfordCoreNLP; -import org.apache.nlpcraft.NCException; -import org.apache.nlpcraft.NCModelConfig; import org.apache.nlpcraft.NCToken; import org.apache.nlpcraft.NCTokenParser; import org.apache.nlpcraft.nlp.token.parser.stanford.impl.*; diff --git a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParserSpec.scala b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParserSpec.scala index 24fd34f..77ad649 100644 --- a/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParserSpec.scala +++ b/nlpcraft-stanford/src/test/scala/org/apache/nlpcraft/nlp/token/parser/stanford/NCStanfordTokenParserSpec.scala @@ -34,9 +34,9 @@ class NCStanfordTokenParserSpec: val toks = NCStanfordTestConfig.EN_PIPELINE.getTokenParser.tokenize("I had a lunch with brand names 'AAA'").asScala.toSeq - require(toks.length > 1) + require(toks.sizeIs > 1) NCTestUtils.printTokens(toks) val words = toks.map(_.getText) - require(toks.map(_.getPos).distinct.size > 1) + require(toks.map(_.getPos).distinct.sizeIs > 1) require(toks.map(_.getLemma).zip(words).exists {_ != _}) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntity.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntity.java index 0b2bd14..ca746bb 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntity.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCEntity.java @@ -17,8 +17,6 @@ package org.apache.nlpcraft; -import org.apache.nlpcraft.internal.util.NCUtils; - import java.util.List; /** diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModel.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModel.java index cd82e99..56990e5 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModel.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModel.java @@ -17,8 +17,6 @@ package org.apache.nlpcraft; -import java.util.List; - /** * */ diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java index 79c41cf..dbfa8f7 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java @@ -20,7 +20,6 @@ package org.apache.nlpcraft; import org.apache.nlpcraft.internal.NCModelClientImpl; import java.util.Map; -import java.util.List; import java.util.concurrent.*; /** diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCPipelineProcessor.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCPipelineProcessor.scala index a523487..494e024 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCPipelineProcessor.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCPipelineProcessor.scala @@ -18,8 +18,8 @@ package org.apache.nlpcraft.internal import com.typesafe.scalalogging.LazyLogging -import org.apache.nlpcraft.{NCTokenEnricher, *} import org.apache.nlpcraft.internal.util.NCUtils +import org.apache.nlpcraft.* import java.util import java.util.concurrent.* @@ -38,7 +38,7 @@ object NCPipelineProcessor { case class VariantsHolder(request: NCRequest, variants: Seq[NCVariant], checkCancel: () => Unit) } -import NCPipelineProcessor._ +import org.apache.nlpcraft.internal.NCPipelineProcessor.* /** * @@ -130,7 +130,7 @@ class NCPipelineProcessor(mdl: NCModel) extends LazyLogging : // Looks at each token. map(t => t.getIndex -> entities.filter(_.getTokens.contains(t))). // Collects all overlapped entities. - map { case (_, ents) => if (ents.size > 1) ents.toSet else Set.empty }.filter(_.nonEmpty).toSeq + map { case (_, ents) => if (ents.sizeIs > 1) ents.toSet else Set.empty }.filter(_.nonEmpty).toSeq val dels = NCSentenceHelper.findCombinations(overEntities.map(_.asJava).asJava, pool).asScala.map(_.asScala) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/ascii/NCAsciiTable.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/ascii/NCAsciiTable.scala index 4a28a42..c510651 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/ascii/NCAsciiTable.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/ascii/NCAsciiTable.scala @@ -559,7 +559,7 @@ class NCAsciiTable: * * @param header */ - def print(header: String): Unit = println(mkLogString(Some(header))) + def print(header: String): Unit = println(mkLogString(Option(header))) /** * Renders this table to log as trace. diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala index 1fa9f28..eef48cc 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/impl/NCOpenNlpEntityParserImpl.scala @@ -71,7 +71,7 @@ class NCOpenNlpEntityParserImpl(resources: JList[String]) extends NCEntityParser val toks = toksList.asScala val txtArr = toks.map(_.getText).toArray - finders.flatMap(find(_, txtArr)).flatMap(h => + finders.flatMap(find(_, txtArr)).flatMap(h => { def calcIndex(getHolderIndex: Holder => Int): Int = toks.find(_.getIndex == getHolderIndex(h)) match case Some(t) => t.getIndex @@ -89,4 +89,4 @@ class NCOpenNlpEntityParserImpl(resources: JList[String]) extends NCEntityParser override val getRequestId: String = req.getRequestId override val getId: String = s"opennlp:${h.name}" ) - ).asJava \ No newline at end of file + }).asJava \ No newline at end of file diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala index ec87e3f..e1915f0 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/enricher/en/impl/NCStopWordsImpl.scala @@ -206,7 +206,7 @@ class NCStopWordsImpl(addStopsSet: JSet[String], exclStopsSet: JSet[String]) ext def matches(toks: Seq[NCToken]): Boolean = val posOpt = toks.size match case 0 => throw new AssertionError(s"Unexpected empty tokens.") - case 1 => Some(toks.head.getPos) + case 1 => Option(toks.head.getPos) case _ => None // Hash access. @@ -237,9 +237,9 @@ class NCStopWordsImpl(addStopsSet: JSet[String], exclStopsSet: JSet[String]) ext def permutations(toks: Seq[NCToken]): Seq[Seq[NCToken]] = def multiple(seq: Seq[Seq[Option[NCToken]]], t: NCToken): Seq[Seq[Option[NCToken]]] = if seq.isEmpty then - if isStopWord(t) then IndexedSeq(IndexedSeq(Some(t)), IndexedSeq(None)) else IndexedSeq(IndexedSeq(Some(t))) + if isStopWord(t) then IndexedSeq(IndexedSeq(Option(t)), IndexedSeq(None)) else IndexedSeq(IndexedSeq(Option(t))) else - (for (subSeq <- seq) yield subSeq :+ Some(t)) ++ (if isStopWord(t) then for (subSeq <- seq) yield subSeq :+ None else Seq.empty) + (for (subSeq <- seq) yield subSeq :+ Option(t)) ++ (if isStopWord(t) then for (subSeq <- seq) yield subSeq :+ None else Seq.empty) var res: Seq[Seq[Option[NCToken]]] = Seq.empty for (t <- toks) res = multiple(res, t) @@ -295,9 +295,9 @@ class NCStopWordsImpl(addStopsSet: JSet[String], exclStopsSet: JSet[String]) ext val dups = addStems.intersect(exclStems) if dups.nonEmpty then - throw new NCException(s"Duplicate stems detected between additional and excluded stopwords [" + - s"dups=${dups.mkString(",")}" + // TODO: error texts. - s"]") + throw new NCException( + s"Duplicate stems detected between additional and excluded stopwords [dups=${dups.mkString(",")}]" + ) percents = PERCENTS.map(stem) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCPipelineProcessorSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCPipelineProcessorSpec.scala index 177e079..762bcb9 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCPipelineProcessorSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/internal/NCPipelineProcessorSpec.scala @@ -53,7 +53,7 @@ class NCPipelineProcessorSpec: println(s"Variant: $idx") NCTestUtils.printEntities(txt, v.getEntities.asScala.toSeq) - require(res.variants.size == variantCnt) + require(res.variants.sizeIs == variantCnt) test("t1 t2", 4, NCSemanticTestElement("t1", "t2"), NCSemanticTestElement("t2", "t1")) test("t1 t2", 2, NCSemanticTestElement("t1", "t2"), NCSemanticTestElement("t2")) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParserSpec.scala index b14e90d..ea5caf9 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParserSpec.scala @@ -43,7 +43,7 @@ class NCNlpEntityParserSpec: NCTestUtils.printEntities(req.txt, entities) - require(entities.size == toks.size()) + require(entities.sizeIs == toks.size()) entities.zipWithIndex.foreach { (ent, idx) => require(ent.getTokens.size() == 1) require(ent.getTokens.get(0) == toks.get(idx)) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala index d9f0035..0f37bb6 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/opennlp/NCOpenNlpEntityParserSpec.scala @@ -51,7 +51,7 @@ class NCOpenNlpEntityParserSpec: NCTestUtils.printEntities(txt, ents) - require(ents.size == 1) + require(ents.sizeIs == 1) require(ents.exists(_.getOpt(s"opennlp:$expected:probability").isPresent)) @Test diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala index bffef2c..560ba45 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserJsonSpec.scala @@ -64,5 +64,5 @@ class NCSemanticEntityParserJsonSpec: "Ping me in 3 minutes tomorrow", "x:alarm", // File contains these data for element. - elemData = Some(Map("testKey" -> "testValue")) + elemData = Option(Map("testKey" -> "testValue")) ) \ No newline at end of file diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala index 93490eb..28fa49a 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/NCSemanticEntityParserSpec.scala @@ -104,7 +104,7 @@ class NCSemanticEntityParserSpec: val ents = parser.parse(req, NCTestConfig.CFG, toks).asScala.toSeq NCTestUtils.printEntities(txt, ents) - require(ents.size == 1) + require(ents.sizeIs == 1) val e = ents.head require(e.getId == id) @@ -132,7 +132,7 @@ class NCSemanticEntityParserSpec: val ents = parser.parse(req, NCTestConfig.CFG, toks).asScala.toSeq NCTestUtils.printEntities(txt, ents) - require(ents.size == ids.size) + require(ents.sizeIs == ids.size) ents.map(_.getId).sorted.zip(ids.sorted).foreach { case (eId, id) => require(eId == id) } @Test @@ -143,9 +143,9 @@ class NCSemanticEntityParserSpec: check("the t2", "t2") check("t3 t3", "t3") check("t3 the t3", "t3") // With stopword inside. - check("value4", "t4", value = Some("value4")) - check("value the 5", "t5", value = Some("value5")) // With stopword inside. - check("t6", "t6", elemData = Some(Map("testKey" -> "testValue"))) + check("value4", "t4", value = Option("value4")) + check("value the 5", "t5", value = Option("value5")) // With stopword inside. + check("t6", "t6", elemData = Option(Map("testKey" -> "testValue"))) check("the x abc x abe", "t7") // `x abc` should be matched, `x abe` shouldn't. checkMultiple("t1 the x abc the x the abc", "t1", "t7", "t7") \ No newline at end of file diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala index 1b6b5ee..3a96e66 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/NCOpenNlpTokenParserSpec.scala @@ -49,14 +49,14 @@ class NCOpenNlpTokenParserSpec: test( "Test requests!", toks => - require(toks.length == 3); + require(toks.sizeIs == 3); require(!isStopWord(toks.head)); require(isStopWord(toks.last)) ) test( "Test requests !", toks => - require(toks.length == 3); + require(toks.sizeIs == 3); require(!isStopWord(toks.head)); require(isStopWord(toks.last)) ) @@ -67,7 +67,7 @@ class NCOpenNlpTokenParserSpec: // So we use spaces around quotes to simplify test. "a ` a a ` a", toks => - require(toks.length == 6); + require(toks.sizeIs == 6); require(isStopWord(toks.head)); require(isStopWord(toks.last)); require(toks.drop(1).reverse.drop(1).forall(p => !isStopWord(p))) @@ -77,7 +77,7 @@ class NCOpenNlpTokenParserSpec: // Third and fourth are not because brackets. "a ( a a ) a", toks => - require(toks.length == 6); + require(toks.sizeIs == 6); require(isStopWord(toks.head)); require(isStopWord(toks.last)); require(toks.drop(1).reverse.drop(1).forall(p => !isStopWord(p)))