This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-287 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 7eebb407a412ab152f07935782c56d003e40f214 Author: Sergey Kamov <[email protected]> AuthorDate: Sat Apr 3 10:27:38 2021 +0300 WIP. --- .../nlpcraft/probe/mgrs/NCProbeSynonym.scala | 34 ++++++++++++++-------- .../probe/mgrs/deploy/NCDeployManager.scala | 16 ++++++---- .../nlpcraft/probe/mgrs/model/NCModelManager.scala | 27 ++++++++--------- .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 16 +++++----- 4 files changed, 56 insertions(+), 37 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala index 92e9ece..d09418a 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala @@ -33,12 +33,16 @@ import scala.collection.mutable * In this case chunks contain value name. * @param isDirect Direct or permuted synonym flag. * @param value Optional value name if this is a value synonym. + * @param sparse Flag. + * @param perm Flag. */ class NCProbeSynonym( val isElementId: Boolean, val isValueName: Boolean, val isDirect: Boolean, - val value: String = null + val value: String = null, + val sparse: Boolean, + val perm: Boolean ) extends mutable.ArrayBuffer[NCProbeSynonymChunk] with Ordered[NCProbeSynonym] { require((isElementId && !isValueName && value == null) || !isElementId) require((isValueName && value != null) || !isValueName) @@ -144,7 +148,7 @@ class NCProbeSynonym( var ok = true val buf = mutable.ArrayBuffer.empty[NCNlpSentenceToken] - if (isDirect) { + if (!perm) { var lastIdx = 0 val tokIdxs = sen.zipWithIndex.toMap @@ -175,13 +179,10 @@ class NCProbeSynonym( */ private def isMatch(tow: NCDslContent, chunk: NCProbeSynonymChunk, req: NCRequest): Boolean = { def get0[T](fromToken: NCToken ⇒ T, fromWord: NCNlpSentenceToken ⇒ T): T = - if (tow.isLeft) fromToken(tow.left.get) - else fromWord(tow.right.get) + if (tow.isLeft) fromToken(tow.left.get) else fromWord(tow.right.get) chunk.kind match { - case TEXT ⇒ - chunk.wordStem == get0(_.stem, _.stem) - + case TEXT ⇒ chunk.wordStem == get0(_.stem, _.stem) case REGEX ⇒ val r = chunk.regex @@ -223,7 +224,7 @@ class NCProbeSynonym( var ok = true val buf = mutable.ArrayBuffer.empty[NCDslContent] - if (isDirect) { + if (!perm) { var lastIdx = 0 val tokIdxs = sen.zipWithIndex.toMap @@ -243,7 +244,7 @@ class NCProbeSynonym( convertResult(ok, buf) } - collectMatches(sen ,trySparseMatch0) + collectMatches(sen, trySparseMatch0) } override def toString(): String = mkString(" ") @@ -339,10 +340,19 @@ object NCProbeSynonym { * @param isDirect * @param value * @param chunks - * @return + * @param sparse + * @param perm */ - def apply(isElementId: Boolean, isValueName: Boolean, isDirect: Boolean, value: String, chunks: Seq[NCProbeSynonymChunk]): NCProbeSynonym = { - var syn = new NCProbeSynonym(isElementId, isValueName, isDirect, value) + def apply( + isElementId: Boolean, + isValueName: Boolean, + isDirect: Boolean, + value: String, + chunks: Seq[NCProbeSynonymChunk], + sparse: Boolean, + perm: Boolean + ): NCProbeSynonym = { + var syn = new NCProbeSynonym(isElementId, isValueName, isDirect, value, sparse, perm) syn ++= chunks diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala index 2889241..a9f4d4a 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala @@ -218,16 +218,20 @@ object NCDeployManager extends NCService with DecorateAsScala { s"]" ) + val sparse = elm.isSparse.orElse(mdl.isSparse) + val perm = elm.isPermutateSynonyms.orElse(mdl.isPermutateSynonyms) + def addSynonym( isElementId: Boolean, isValueName: Boolean, value: String, - chunks: Seq[NCProbeSynonymChunk]): Unit = { + chunks: Seq[NCProbeSynonymChunk] + ): Unit = { def add(chunks: Seq[NCProbeSynonymChunk], isDirect: Boolean): Unit = { val holder = SynonymHolder( elmId = elmId, - sparse = elm.isSparse.orElse(mdl.isSparse), - syn = NCProbeSynonym(isElementId, isValueName, isDirect, value, chunks) + sparse = sparse, + syn = NCProbeSynonym(isElementId, isValueName, isDirect, value, chunks, sparse, perm) ) if (syns.add(holder)) { @@ -261,8 +265,10 @@ object NCDeployManager extends NCService with DecorateAsScala { } if ( - elm.isPermutateSynonyms.orElse(mdl.isPermutateSynonyms) && - !isElementId && chunks.forall(_.wordStem != null) + perm && + !sparse && + !isElementId && + chunks.forall(_.wordStem != null) ) simplePermute(chunks).map(p ⇒ p.map(_.wordStem) → p).toMap.values.foreach(p ⇒ add(p, p == chunks)) else diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala index ddcc286..cdfdf89 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala @@ -58,23 +58,24 @@ object NCModelManager extends NCService with DecorateAsScala { data.values.foreach(w ⇒ { val mdl = w.model - val synCnt = - w.sparseSynonyms.map(_._2.size).sum + - w.sparseSynonymsDsl.map(_._2.size).sum + - w.nonSparseSynonyms.flatMap(_._2.map(_._2.count)).sum + - w.nonSparseSynonymsDsl.map(_._2.size).sum - + val synCnt = w.nonSparseSynonyms.flatMap(_._2.map(_._2.count)).sum + val synDslCnt = w.nonSparseSynonymsDsl.map(_._2.size).sum + val synSparseCnt = w.sparseSynonyms.map(_._2.size).sum + val synSparseDslCnt = w.sparseSynonymsDsl.map(_._2.size).sum val elmCnt = w.elements.keySet.size val intentCnt = w.intents.size tbl += Seq( - s"Name: ${bo(c(mdl.getName))}", - s"ID: ${bo(mdl.getId)}", - s"Version: ${mdl.getVersion}", - s"Origin: ${mdl.getOrigin}", - s"Elements: $elmCnt" + (if (elmCnt == 0) s" ${r("(!)")}" else ""), - s"Synonyms: $synCnt" + (if (synCnt == 0) s" ${r("(!)")}" else ""), - s"Intents: $intentCnt" + (if (intentCnt == 0) s" ${r("(!)")}" else "") + s"Name: ${bo(c(mdl.getName))}", + s"ID: ${bo(mdl.getId)}", + s"Version: ${mdl.getVersion}", + s"Origin: ${mdl.getOrigin}", + s"Elements: $elmCnt" + (if (elmCnt == 0) s" ${r("(!)")}" else ""), + s"Synonyms: $synCnt" + (if (synCnt == 0) s" ${r("(!)")}" else ""), + s"Synonyms(DSL): $synDslCnt" + (if (synDslCnt == 0) s" ${r("(!)")}" else ""), + s"Synonyms(Sparse): $synSparseCnt" + (if (synSparseCnt == 0) s" ${r("(!)")}" else ""), + s"Synonyms(Sparse, DSL): $synSparseDslCnt" + (if (synSparseDslCnt == 0) s" ${r("(!)")}" else ""), + s"Intents: $intentCnt" + (if (intentCnt == 0) s" ${r("(!)")}" else "") ) }) } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala index ed80630..22a6a5b 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala @@ -19,7 +19,7 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model import io.opencensus.trace.Span import org.apache.nlpcraft.common._ -import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken ⇒ NlpToken, _} +import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken => NlpToken, _} import org.apache.nlpcraft.model._ import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCDslContent import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind, TEXT} @@ -289,7 +289,6 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { if (!matches.exists(m ⇒ m.element.getId == elm.getId && tokensSet.subsetOf(m.tokensSet))) { matches += ElementMatch(elm, toks, syn, parts) - true } else @@ -357,8 +356,9 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { ) { _ ⇒ // 1. Simple, sparse. - for ((elemId, syns) ← mdl.sparseSynonyms; syn ← syns) - syn.trySparseMatch(ns).foreach(toks ⇒ addMatch(mdl.elements(elemId), toks, syn, Seq.empty)) + if (!ns.exists(_.isUser)) + for ((elemId, syns) ← mdl.sparseSynonyms; syn ← syns) + syn.trySparseMatch(ns).foreach(toks ⇒ addMatch(mdl.elements(elemId), toks, syn, Seq.empty)) // 2. DSL, sparse. for ((elemId, syns) ← mdl.sparseSynonymsDsl; syn ← syns) { @@ -423,9 +423,11 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { toks: Seq[NlpToken], syn: NCProbeSynonym, parts: Seq[TokenData] - ): Unit = - if (addMatch(elm, toks, syn, parts)) - found = true + ): Unit = { + addMatch(elm, toks, syn, parts) + + found = true + } // 3. Simple, not sparse. // Optimization - plain synonyms can be used only on first iteration
