This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-287 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit de6df644338de14f622b4ca3819cd53f2e00c39c Author: Sergey Kamov <[email protected]> AuthorDate: Wed Apr 14 14:53:13 2021 +0300 WIP. --- .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 93 ++++++++++------------ 1 file changed, 44 insertions(+), 49 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala index 8ca93f2..f01619c 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala @@ -128,14 +128,6 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { case class ComplexHolder(complexesWords: Seq[Complex], complexes: Seq[ComplexSeq]) - object State extends Enumeration { - type State = Value - - val SIMPLE, IDL_FIRST, IDL_NEXT = Value - } - - import State._ - /** * * @param parent Optional parent span. @@ -481,19 +473,25 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { * @param mdl * @param ns * @param combosToks - * @param state + * @param simpleEnabled + * @param idlEnabled * @param req + * @param ch + * @param span */ private def execute( mdl: NCProbeModel, ns: NCNlpSentence, combosToks: Seq[Seq[NlpToken]], - state: State, req: NCRequest, - h: ⇒ ComplexHolder, span: Span + simpleEnabled: Boolean, + idlEnabled: Boolean, + req: NCRequest, + ch: ⇒ ComplexHolder, + span: Span ): Unit = startScopedSpan("execute", span, "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, "txt" → ns.text) { _ ⇒ if (DEEP_DEBUG) - println(s"Execution started [state=$state]") + println(s"Execution started [simpleEnabled=$simpleEnabled, idlEnabled=$idlEnabled]") val contCache = mutable.HashMap.empty[String, mutable.ArrayBuffer[Seq[Int]]] ++ @@ -519,20 +517,19 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { if (DEEP_DEBUG) println( s"${if (added) "Added" else "Skipped"} element [" + - s"id=${elm.getId}, " + - s"type=$typ, " + - s"text='${res.map(_.origText).mkString(" ")}', " + - s"indexes=${resIdxs.mkString("[", ",", "]")}, " + - s"allTokensIndexes=${allToksIdxs.mkString("[", ",", "]")}, " + - s"continuous=$continuous, " + - s"synonym=$s" + - s"]" + s"id=${elm.getId}, " + + s"type=$typ, " + + s"text='${res.map(_.origText).mkString(" ")}', " + + s"indexes=${resIdxs.mkString("[", ",", "]")}, " + + s"allTokensIndexes=${allToksIdxs.mkString("[", ",", "]")}, " + + s"continuous=$continuous, " + + s"synonym=$s" + + s"]" ) } for (toks ← combosToks) { val tokIdxs = toks.map(_.index) - lazy val idlCombs: Seq[Seq[Complex]] = mkComplexCombinations(h, toks, idlCache.toSet) lazy val tokStems = toks.map(_.stem).mkString(" ") // Attempt to match each element. @@ -544,17 +541,10 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { !alreadyMarked(toks, tokIdxs, continuous = true, elemId) // Checks whole tokens slice. ) { // 1. SIMPLE. - val simpleEnabled = - state match { - case SIMPLE ⇒ !mdl.hasIdlSynonyms(elemId) - case IDL_FIRST ⇒ mdl.hasIdlSynonyms(elemId) - case _ ⇒ false - } - - var found = false + if (simpleEnabled && (if (idlEnabled) mdl.hasIdlSynonyms(elemId) else !mdl.hasIdlSynonyms(elemId))) { + // 1.1 Continuous. + var found = false - // 1.1 Continuous. - if (simpleEnabled) fastAccess(mdl.continuousSynonyms, elemId, toks.length) match { case Some(h) ⇒ def tryMap(syns: Map[String, Synonym], notFound: () ⇒ Unit): Unit = @@ -584,28 +574,28 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { case None ⇒ // No-op. } - // 1.2 Sparse. - if (simpleEnabled) + // 1.2 Sparse. for (s ← get(mdl.sparseSynonyms, elemId)) s.sparseMatch(toks) match { case Some(res) ⇒ add("simple sparse", elm, res, tokIdxs, s) case None ⇒ // No-op. } + } // 2. IDL. - if (state != SIMPLE && mdl.hasIdlSynonyms) { - // 2.1 Sparse. - if (mdl.hasIdlSynonyms) - for (s ← get(mdl.idlSynonyms, elemId); comb ← idlCombs) - s.idlMatch(comb.map(_.data), req) match { - case Some(res) ⇒ - val typ = if (s.sparse) "IDL sparse" else "IDL continuous" - - add(typ, elm, toTokens(res, ns), tokIdxs, s, toParts(res, s)) - - idlCache += comb - case None ⇒ // No-op. - } + if (idlEnabled) { + val idlCombs = mkComplexCombinations(ch, toks, idlCache.toSet) + + for (s ← get(mdl.idlSynonyms, elemId); comb ← idlCombs) + s.idlMatch(comb.map(_.data), req) match { + case Some(res) ⇒ + val typ = if (s.sparse) "IDL sparse" else "IDL continuous" + + add(typ, elm, toTokens(res, ns), tokIdxs, s, toParts(res, s)) + + idlCache += comb + case None ⇒ // No-op. + } } } } @@ -617,16 +607,21 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, "txt" → ns.text) { span ⇒ val req = NCRequestImpl(senMeta, ns.srvReqId) - val combosToks = combos(ns) + val combToks = combos(ns) lazy val h = mkComplexes(mdl, ns) - execute(mdl, ns, combosToks, if (ns.firstProbePhase) SIMPLE else IDL_NEXT, req, h, parent) + val idlEnabled = mdl.hasIdlSynonyms if (ns.firstProbePhase) { ns.firstProbePhase = false - execute(mdl, ns, combosToks, IDL_FIRST, req, h, parent) + execute(mdl, ns, combToks, simpleEnabled = true, idlEnabled = false, req, h, parent) + execute(mdl, ns, combToks, simpleEnabled = true, idlEnabled, req, h, parent) + } + else { + if (idlEnabled) + execute(mdl, ns, combToks, simpleEnabled = false, idlEnabled, req, h, parent) } processParsers(mdl, ns, span, req)
