This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git

commit 0387d2e157b0b6d189d9e439bdbc8c30c484c9c1
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Apr 15 12:33:07 2021 +0300

    WIP.
---
 .../apache/nlpcraft/probe/mgrs/NCProbeModel.scala  |   7 +-
 .../nlpcraft/probe/mgrs/NCProbeSynonym.scala       |  19 +++-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 126 +++++++++++++--------
 3 files changed, 100 insertions(+), 52 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
index 2670fb7..03c5cb3 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
@@ -48,7 +48,10 @@ case class NCProbeModel(
     elements: Map[String /*Element ID*/ , NCElement],
     samples: Set[(String, Seq[Seq[String]])]
 ) {
+    lazy val hasIdlSynonyms: Boolean = idlSynonyms.nonEmpty
+    lazy val hasNoIdlSynonyms: Boolean = continuousSynonyms.nonEmpty || 
sparseSynonyms.nonEmpty
+    lazy val hasSparseSynonyms: Boolean = sparseSynonyms.nonEmpty || 
idlSynonyms.exists(_._2.exists(_.sparse))
+    lazy val hasContinuousSynonyms: Boolean = continuousSynonyms.nonEmpty || 
idlSynonyms.exists(_._2.exists(!_.sparse))
+
     def hasIdlSynonyms(elemId: String): Boolean = idlSynonyms.contains(elemId)
-    def hasIdlSynonyms: Boolean = idlSynonyms.nonEmpty
-    def hasNoIdlSynonyms: Boolean = continuousSynonyms.nonEmpty || 
sparseSynonyms.nonEmpty
 }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index b944ccc..d5361f8 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -186,6 +186,23 @@ class NCProbeSynonym(
 
     /**
       *
+      * @param tows
+      * @param req
+      * @return
+      */
+    def isMatch(tows: Seq[NCIdlContent], req: NCRequest): Boolean = {
+        require(tows != null)
+
+        if (tows.length == length && tows.count(_.isLeft) >= idlChunks)
+            tows.zip(this).sortBy(p ⇒ getSort(p._2.kind)).forall { case (tow, 
chunk) ⇒ isMatch(tow, chunk, req) }
+        else
+            false
+    }
+
+
+
+    /**
+      *
       * @param toks
       */
     def sparseMatch(toks: NCNlpSentenceTokenBuffer): 
Option[Seq[NCNlpSentenceToken]] = {
@@ -200,7 +217,7 @@ class NCProbeSynonym(
       * @param tows
       * @param req
       */
-    def idlMatch(tows: Seq[NCIdlContent], req: NCRequest): 
Option[Seq[NCIdlContent]] = {
+    def sparseMatch(tows: Seq[NCIdlContent], req: NCRequest): 
Option[Seq[NCIdlContent]] = {
         require(tows != null)
         require(req != null)
         require(hasIdl)
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 353687b..120d8d0 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -402,7 +402,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
       * @param h
       * @param toks
       */
-    private def mkComplexCombinations(h: ComplexHolder, toks: Seq[NlpToken], 
cache: Set[Seq[Complex]]): Seq[Seq[Complex]] = {
+    private def mkCombinations(h: ComplexHolder, toks: Seq[NlpToken], cache: 
Set[Seq[Complex]]): Seq[Seq[Complex]] = {
         val idxs = toks.flatMap(_.wordIndexes).toSet
 
         h.complexes.par.
@@ -423,9 +423,9 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
     }
 
     private def add(
+        dbgType: String,
         ns: NCNlpSentence,
-        contCache: mutable.Map[String, ArrayBuffer[Seq[Int]]],
-        typ: String,
+        contCache: Cache,
         elm: NCElement,
         res: Seq[NlpToken],
         allToksIdxs: Seq[Int],
@@ -449,7 +449,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
             logger.trace(
                 s"${if (ok) "Added" else "Skipped"} element [" +
                     s"id=${elm.getId}, " +
-                    s"type=$typ, " +
+                    s"type=$dbgType, " +
                     s"text='${res.map(_.origText).mkString(" ")}', " +
                     s"indexes=${resIdxs.mkString("[", ",", "]")}, " +
                     s"allTokensIndexes=${allToksIdxs.mkString("[", ",", "]")}, 
" +
@@ -462,85 +462,110 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
     override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: 
Map[String, Serializable], parent: Span = null): Unit = {
         require(isStarted)
 
-        startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "mdlId" → 
mdl.model.getId, "txt" → ns.text) { span ⇒
+        startScopedSpan(
+            "enrich", parent, "srvReqId" → ns.srvReqId, "mdlId" → 
mdl.model.getId, "txt" → ns.text
+        ) { span ⇒
             val req = NCRequestImpl(senMeta, ns.srvReqId)
             val combToks = combos(ns)
             lazy val ch = mkComplexes(mdl, ns)
 
             def execute(simpleEnabled: Boolean, idlEnabled: Boolean): Unit =
-                startScopedSpan("execute", parent, "srvReqId" → ns.srvReqId, 
"mdlId" → mdl.model.getId, "txt" → ns.text) { _ ⇒
+                startScopedSpan(
+                    "execute", span, "srvReqId" → ns.srvReqId, "mdlId" → 
mdl.model.getId, "txt" → ns.text
+                ) { _ ⇒
                     if (DEEP_DEBUG)
                         logger.trace(s"Execution started 
[simpleEnabled=$simpleEnabled, idlEnabled=$idlEnabled]")
 
-                    val contCache = mutable.HashMap.empty ++ 
mdl.elements.keys.map(k ⇒ k → mutable.ArrayBuffer.empty[Seq[Int]])
+                    val contCache = mutable.HashMap.empty ++
+                        mdl.elements.keys.map(k ⇒ k → 
mutable.ArrayBuffer.empty[Seq[Int]])
                     lazy val idlCache = mutable.HashSet.empty[Seq[Complex]]
 
                     for (
                         toks ← combToks;
-                        tokIdxs = toks.map(_.index);
-                        elm ← mdl.elements.values;
-                        elemId = elm.getId
+                        idxs = toks.map(_.index);
+                        e ← mdl.elements.values;
+                        eId = e.getId
                         if
-                            
!contCache(elemId).exists(_.containsSlice(tokIdxs)) &&
-                            !alreadyMarked(ns, elemId, toks, tokIdxs)
+                            !contCache(eId).exists(_.containsSlice(idxs)) &&
+                            !alreadyMarked(ns, eId, toks, idxs)
                     ) {
                         // 1. SIMPLE.
-                        if (simpleEnabled && (if (idlEnabled) 
mdl.hasIdlSynonyms(elemId) else !mdl.hasIdlSynonyms(elemId))) {
+                        if (simpleEnabled && (if (idlEnabled) 
mdl.hasIdlSynonyms(eId) else !mdl.hasIdlSynonyms(eId))) {
                             lazy val tokStems = toks.map(_.stem).mkString(" ")
 
                             // 1.1 Continuous.
                             var found = false
 
-                            fastAccess(mdl.continuousSynonyms, elemId, 
toks.length) match {
-                                case Some(h) ⇒
-                                    def tryMap(syns: Map[String, Synonym], 
notFound: () ⇒ Unit): Unit =
-                                        syns.get(tokStems) match {
-                                            case Some(s) ⇒
-                                                found = true
-                                                add(ns, contCache,"simple 
continuous", elm, toks, tokIdxs, s)
-                                            case None ⇒ notFound()
-                                        }
-
-                                    def tryScan(syns: Seq[Synonym]): Unit =
-                                        for (s ← syns if !found)
-                                            if (s.isMatch(toks)) {
-                                                found = true
-                                                add(ns, contCache, "simple 
continuous scan", elm, toks, tokIdxs, s)
+                            if (mdl.hasContinuousSynonyms)
+                                fastAccess(mdl.continuousSynonyms, eId, 
toks.length) match {
+                                    case Some(h) ⇒
+                                        def tryMap(syns: Map[String, Synonym], 
notFound: () ⇒ Unit): Unit =
+                                            syns.get(tokStems) match {
+                                                case Some(s) ⇒
+                                                    found = true
+                                                    add("simple continuous", 
ns, contCache, e, toks, idxs, s)
+                                                case None ⇒ notFound()
                                             }
 
-                                    tryMap(
-                                        h.txtDirectSynonyms,
-                                        () ⇒ {
-                                            tryScan(h.notTxtDirectSynonyms)
+                                        def tryScan(syns: Seq[Synonym]): Unit =
+                                            for (s ← syns if !found)
+                                                if (s.isMatch(toks)) {
+                                                    found = true
+                                                    add("simple continuous 
scan", ns, contCache, e, toks, idxs, s)
+                                                }
 
-                                            if (!found)
-                                                tryMap(h.txtNotDirectSynonyms, 
() ⇒ tryScan(h.notTxtNotDirectSynonyms))
-                                        }
-                                    )
-                                case None ⇒ // No-op.
-                            }
+                                        tryMap(
+                                            h.txtDirectSynonyms,
+                                            () ⇒ {
+                                                tryScan(h.notTxtDirectSynonyms)
+
+                                                if (!found)
+                                                    
tryMap(h.txtNotDirectSynonyms, () ⇒ tryScan(h.notTxtNotDirectSynonyms))
+                                            }
+                                        )
+                                    case None ⇒ // No-op.
+                                }
 
                             // 1.2 Sparse.
-                            if (!found)
-                                for (s ← get(mdl.sparseSynonyms, elemId))
+                            if (!found && mdl.hasSparseSynonyms)
+                                for (s ← get(mdl.sparseSynonyms, eId))
                                     s.sparseMatch(toks) match {
-                                        case Some(res) ⇒ add(ns, contCache, 
"simple sparse", elm, res, tokIdxs, s)
+                                        case Some(res) ⇒ add("simple sparse", 
ns, contCache, e, res, idxs, s)
                                         case None ⇒ // No-op.
                                     }
                         }
 
                         // 2. IDL.
-                        if (idlEnabled)
-                            for (s ← get(mdl.idlSynonyms, elemId); comb ← 
mkComplexCombinations(ch, toks, idlCache.toSet))
-                                s.idlMatch(comb.map(_.data), req) match {
-                                    case Some(res) ⇒
-                                        val typ = if (s.sparse) "IDL sparse" 
else "IDL continuous"
+                        if (idlEnabled) {
+                            if (mdl.hasSparseSynonyms)
+                                for (s ← get(mdl.idlSynonyms, eId); comb ← 
mkCombinations(ch, toks, idlCache.toSet))
+                                    s.sparseMatch(comb.map(_.data), req) match 
{
+                                        case Some(res) ⇒
+                                            val typ = if (s.sparse) "IDL 
sparse" else "IDL continuous"
+
+                                            add(typ, ns, contCache, e, 
toTokens(res, ns), idxs, s, toParts(res, s))
 
-                                        add(ns, contCache, typ, elm, 
toTokens(res, ns), tokIdxs, s, toParts(res, s))
+                                            idlCache += comb
+                                        case None ⇒ // No-op.
+                                    }
+                            else {
+                                var found = false
+
+                                for (
+                                    s ← get(mdl.idlSynonyms, eId);
+                                    comb ← mkCombinations(ch, toks, 
idlCache.toSet);
+                                    data = comb.map(_.data)
+                                    if !found
+                                )
+                                    if (s.isMatch(data, req)) {
+                                        add("IDL continuous", ns, contCache, 
e, toks, idxs, s, toParts(data, s))
 
                                         idlCache += comb
-                                    case None ⇒ // No-op.
+
+                                        found = true
                                 }
+                            }
+                        }
                     }
                 }
 
@@ -559,6 +584,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
     }
 
     /**
+      * TODO: simplify, add tests, check model properties (sparse etc) for 
optimization.
       *
       * @param elemId
       * @param toks
@@ -577,7 +603,9 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                     ||
                 (
                     n.tokenIndexes == toksIdxsSorted ||
-                    n.tokenIndexes.containsSlice(toksIdxsSorted) && 
U.isContinuous(toksIdxsSorted) && U.isContinuous(n.tokenIndexes)
+                        n.tokenIndexes.containsSlice(toksIdxsSorted) &&
+                        U.isContinuous(toksIdxsSorted) &&
+                        U.isContinuous(n.tokenIndexes)
                 )
             )
         ))

Reply via email to