This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-287 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 315b1fae49e691ded1efea2a98a1c9b1d8e28aab Author: Sergey Kamov <[email protected]> AuthorDate: Tue Apr 13 13:24:07 2021 +0300 WIP. --- .../apache/nlpcraft/probe/mgrs/NCProbeModel.scala | 1 + .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 258 +++++++++++---------- 2 files changed, 134 insertions(+), 125 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala index 1618421..1c21cb9 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala @@ -50,4 +50,5 @@ case class NCProbeModel( ) { def hasDslSynonyms(elemId: String): Boolean = dslSynonyms.contains(elemId) def hasDslSynonyms: Boolean = dslSynonyms.nonEmpty + def hasSparseSynonyms: Boolean = sparseSynonyms.nonEmpty } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala index adf1358..f2aa542 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala @@ -475,155 +475,163 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { }).seq } - @throws[NCE] - override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = { - require(isStarted) - - val mdlId = mdl.model.getId - val srvReqId = ns.srvReqId - - startScopedSpan("enrich", parent, "srvReqId" → srvReqId, "mdlId" → mdlId, "txt" → ns.text) { span ⇒ - val req = NCRequestImpl(senMeta, srvReqId) - lazy val h = mkComplexes(mdl, ns) - - startScopedSpan("synsProc", span, "srvReqId" → srvReqId, "mdlId" → mdlId, "txt" → ns.text) { _ ⇒ - var state = if (ns.firstProbePhase) SIMPLE else DSL_NEXT + /** + * + * @param mdl + * @param ns + * @param combosToks + * @param state + * @param req + */ + private def execute( + mdl: NCProbeModel, + ns: NCNlpSentence, + combosToks: Seq[Seq[NlpToken]], + state: State, req: NCRequest, + h: ⇒ ComplexHolder, span: Span + ): Unit = + startScopedSpan("execute", span, "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, "txt" → ns.text) { _ ⇒ + if (DEEP_DEBUG) + println(s"Execution started [state=$state]") - ns.firstProbePhase = false + val contCache = + mutable.HashMap.empty[String, mutable.ArrayBuffer[Seq[Int]]] ++ + mdl.elements.keys.map(k ⇒ k → mutable.ArrayBuffer.empty[Seq[Int]]) + lazy val dslCache = mutable.HashSet.empty[Seq[Complex]] - val combosToks = combos(ns) + var found = false - def go(): Unit = { - val contCache = mutable.HashMap.empty[String, mutable.ArrayBuffer[Seq[Int]]] ++ mdl.elements.keys.map(k ⇒ k → mutable.ArrayBuffer.empty[Seq[Int]]) - lazy val dslCache = mutable.HashSet.empty[Seq[Complex]] + def add(typ: String, elm: NCElement, res: Seq[NlpToken], allToksIdxs: Seq[Int], s: Synonym, parts: Seq[TokType] = Seq.empty): Unit = { + found = true + val resIdxs = res.map(_.index) - var found = false + val continuous = U.isContinuous(resIdxs.sorted) - def add(typ: String, elm: NCElement, res: Seq[NlpToken], allToksIdxs: Seq[Int], s: Synonym, parts: Seq[TokType] = Seq.empty): Unit = { - found = true - val resIdxs = res.map(_.index) + if (continuous && resIdxs == allToksIdxs) + contCache(elm.getId) += allToksIdxs - val continuous = U.isContinuous(resIdxs.sorted) + val added = !alreadyMarked(res, allToksIdxs, continuous, elm.getId) - if (continuous && resIdxs == allToksIdxs) - contCache(elm.getId) += allToksIdxs + if (added) { + val direct = s.isDirect && U.isIncreased(resIdxs) - val added = !alreadyMarked(res, allToksIdxs, continuous, elm.getId) + mark(ns, elm, res, direct, syn = Some(s), metaOpt = None, parts, allToksIdxs, continuous) + } - if (added) { - val direct = s.isDirect && U.isIncreased(resIdxs) + if (DEEP_DEBUG) + println( + s"${if (added) "Added" else "Skipped"} element [" + + s"id=${elm.getId}, " + + s"type=$typ, " + + s"text='${res.map(_.origText).mkString(" ")}', " + + s"indexes=${resIdxs.mkString("[", ",", "]")}, " + + s"allTokensIndexes=${allToksIdxs.mkString("[", ",", "]")}, " + + s"continuous=$continuous, " + + s"synonym=$s" + + s"]" + ) + } - mark(ns, elm, res, direct, syn = Some(s), metaOpt = None, parts, allToksIdxs, continuous) + for (toks ← combosToks) { + val tokIdxs = toks.map(_.index) + lazy val dslCombs: Seq[Seq[Complex]] = mkComplexCombinations(h, toks, dslCache.toSet) + lazy val tokStems = toks.map(_.stem).mkString(" ") + + // Attempt to match each element. + for ( + elm ← mdl.elements.values; + elemId = elm.getId + if + !contCache(elemId).exists(_.containsSlice(tokIdxs)) && + // Checks whole tokens slice. + !alreadyMarked(toks, tokIdxs, continuous = true, elemId) + ) { + // 1. SIMPLE. + found = false + + val simpleEnabled: Boolean = + state match { + case SIMPLE ⇒ !mdl.hasDslSynonyms(elemId) + case DSL_FIRST ⇒ mdl.hasDslSynonyms(elemId) + case _ ⇒ false } - if (DEEP_DEBUG) - println( - s"${if (added) "Added" else "Skipped"} element [" + - s"id=${elm.getId}, " + - s"type=$typ, " + - s"text='${res.map(_.origText).mkString(" ")}', " + - s"indexes=${resIdxs.mkString("[", ",", "]")}, " + - s"allTokensIndexes=${allToksIdxs.mkString("[", ",", "]")}, " + - s"continuous=$continuous, " + - s"synonym=$s" + - s"]" - ) - } + // 1.1 Continuous. + if (simpleEnabled && !found) + fastAccess(mdl.continuousSynonyms, elemId, toks.length) match { + case Some(h) ⇒ + def tryMap(syns: Map[String, Synonym], notFound: () ⇒ Unit): Unit = + syns.get(tokStems) match { + case Some(s) ⇒ add("simple continuous", elm, toks, tokIdxs, s) + case None ⇒ notFound() + } - for (toks ← combosToks) { - val tokIdxs = toks.map(_.index) - lazy val dslCombs: Seq[Seq[Complex]] = mkComplexCombinations(h, toks, dslCache.toSet) - lazy val tokStems = toks.map(_.stem).mkString(" ") - - // Attempt to match each element. - for ( - elm ← mdl.elements.values; - elemId = elm.getId - if - !contCache(elemId).exists(_.containsSlice(tokIdxs)) && - // Checks whole tokens slice. - !alreadyMarked(toks, tokIdxs, continuous = true, elemId) - ) { - // 1. SIMPLE. - found = false - - val simpleEnabled: Boolean = - state match { - case SIMPLE ⇒ !mdl.hasDslSynonyms(elemId) - case DSL_FIRST ⇒ mdl.hasDslSynonyms(elemId) - case _ ⇒ false - } + def tryScan(syns: Seq[Synonym]): Unit = + for (s ← syns if !found) + if (s.isMatch(toks)) + add("simple continuous scan", elm, toks, tokIdxs, s) - // 1.1 Direct. - if (simpleEnabled && !found) - fastAccess(mdl.continuousSynonyms, elemId, toks.length) match { - case Some(h) ⇒ - def tryMap(syns: Map[String, Synonym], notFound: () ⇒ Unit): Unit = - syns.get(tokStems) match { - case Some(s) ⇒ add("direct simple", elm, toks, tokIdxs, s) - case None ⇒ notFound() - } - - def tryScan(syns: Seq[Synonym]): Unit = - for (s ← syns if !found) - if (s.isMatch(toks)) - add("scan simple", elm, toks, tokIdxs, s) - - tryMap( - h.txtDirectSynonyms, - () ⇒ { - tryScan(h.notTxtDirectSynonyms) - - if (!found) - tryMap(h.txtNotDirectSynonyms, () ⇒ tryScan(h.notTxtNotDirectSynonyms)) - } - ) - case None ⇒ // No-op. - } + tryMap( + h.txtDirectSynonyms, + () ⇒ { + tryScan(h.notTxtDirectSynonyms) - // 1.2 Sparse. - if (simpleEnabled && !found) - for (s ← get(mdl.sparseSynonyms, elemId) if !found) - s.trySparseMatch(toks) match { - case Some(res) ⇒ add("sparse simple", elm, res, tokIdxs, s) - case None ⇒ // No-op. + if (!found) + tryMap(h.txtNotDirectSynonyms, () ⇒ tryScan(h.notTxtNotDirectSynonyms)) } + ) + case None ⇒ // No-op. + } - // 2. DSL. - if (state != SIMPLE && mdl.dslSynonyms.nonEmpty) { - found = false - - // 2.1 Sparse. - if (mdl.hasDslSynonyms) - for (s ← get(mdl.dslSynonyms, elemId); comb ← dslCombs if !found) - s.trySparseMatch(comb.map(_.data), req) match { - case Some(res) ⇒ - add("sparse DSL", elm, toTokens(res, ns), tokIdxs, s, toParts(res, s)) - dslCache += comb - case None ⇒ // No-op. - } - // 2.2 Direct. - else - for (s ← get(mdl.dslSynonyms, elemId); comb ← dslCombs if !found) - if (s.isMatch(comb.map(_.data), req)) { - add("direct DSL", elm, toks, tokIdxs, s, toPartsComplex(comb, s)) - dslCache += comb - } + // 1.2 Sparse. + if (simpleEnabled && !found) + for (s ← get(mdl.sparseSynonyms, elemId) if !found) + s.trySparseMatch(toks) match { + case Some(res) ⇒ add("simple sparse", elm, res, tokIdxs, s) + case None ⇒ // No-op. } - } + + // 2. DSL. + if (state != SIMPLE && mdl.dslSynonyms.nonEmpty) { + found = false + + // 2.1 Sparse. + if (mdl.hasSparseSynonyms) + for (s ← get(mdl.dslSynonyms, elemId); comb ← dslCombs if !found) + s.trySparseMatch(comb.map(_.data), req) match { + case Some(res) ⇒ + add("DSL sparse", elm, toTokens(res, ns), tokIdxs, s, toParts(res, s)) + dslCache += comb + case None ⇒ // No-op. + } + // 2.2 Continuous. + else + for (s ← get(mdl.dslSynonyms, elemId); comb ← dslCombs if !found) + if (s.isMatch(comb.map(_.data), req)) { + add("DSL continuous", elm, toks, tokIdxs, s, toPartsComplex(comb, s)) + dslCache += comb + } } } + } + } - if (DEEP_DEBUG) - println(s"Execution started with state: $state.") + @throws[NCE] + override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = { + require(isStarted) - go() + startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, "txt" → ns.text) { span ⇒ + val req = NCRequestImpl(senMeta, ns.srvReqId) + val combosToks = combos(ns) - if (state == SIMPLE) { - state = DSL_FIRST + lazy val h = mkComplexes(mdl, ns) - go() - } + execute(mdl, ns, combosToks, if (ns.firstProbePhase) SIMPLE else DSL_NEXT, req, h, parent) + + if (ns.firstProbePhase) { + ns.firstProbePhase = false + + execute(mdl, ns, combosToks, DSL_FIRST, req, h, parent) } processParsers(mdl, ns, span, req)
