[incubator-nlpcraft] branch NLPCRAFT-287 updated: WIP.

sergeykamov Sat, 03 Apr 2021 12:13:58 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-287 by this push:
     new 7ee5235  WIP.
7ee5235 is described below

commit 7ee5235ec8c15d38980aad2b0c7dac88cdd6bc7e
Author: Sergey Kamov <[email protected]>
AuthorDate: Sat Apr 3 22:13:02 2021 +0300

    WIP.
---
 .../org/apache/nlpcraft/common/util/NCUtils.scala  |  12 ++
 .../nlpcraft/probe/mgrs/NCProbeSynonym.scala       | 166 ++++++++-------------
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala |  21 +--
 3 files changed, 83 insertions(+), 116 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
index fc8bcf8..141e813 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
@@ -2110,4 +2110,16 @@ object NCUtils extends LazyLogging {
       * @return
       */
     def getYamlMapper: ObjectMapper = YAML
+
+    /**
+      *
+      * @param list
+      * @tparam T
+      * @return
+      */
+    def permute[T](list: List[List[T]]): List[List[T]] =
+        list match {
+            case Nil ⇒ List(Nil)
+            case head :: tail ⇒ for (h ← head; t ← permute(tail)) yield h :: t
+        }
 }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index d09418a..4dd1c61 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -17,10 +17,11 @@
 
 package org.apache.nlpcraft.probe.mgrs
 
+import org.apache.nlpcraft.common.U
 import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken, 
NCNlpSentenceTokenBuffer}
 import org.apache.nlpcraft.model._
-import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCDslContent
 import org.apache.nlpcraft.model.intent.NCIdlContext
+import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCDslContent
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind._
 
 import scala.collection.mutable
@@ -85,90 +86,44 @@ class NCProbeSynonym(
 
     /**
       *
-      * @param toks
+      * @param sen
       * @return
       */
-    def isMatch(toks: NCNlpSentenceTokenBuffer): Boolean = {
-        require(toks != null)
+    private def trySparseMatch0[T](sen: Seq[T], isMatch: (T, 
NCProbeSynonymChunk) ⇒ Boolean, getIndex: T ⇒ Int): List[List[T]] = {
+        require(sen != null)
+        require(sen.nonEmpty)
+        require(this.size > 1)
 
-        if (toks.length == length) {
-            if (isTextOnly)
-                toks.stemsHash == stemsHash && toks.stems == stems
-            else
-                toks.zip(this).sortBy(p ⇒ getSort(p._2.kind)).forall { case 
(tok, chunk) ⇒ isMatch(tok, chunk) }
-        }
-        else
-            false
-    }
+        lazy val buf = mutable.ArrayBuffer.empty[List[T]]
+        lazy val flattenBuf = mutable.ArrayBuffer.empty[T]
 
-    /**
-      *
-      * @param ok
-      * @param buf
-      * @tparam T
-      */
-    private def convertResult[T](ok: Boolean, buf: mutable.ArrayBuffer[T]): 
Option[Seq[T]] =
-        if (ok) {
-            require(buf.nonEmpty)
-
-            Some(buf)
-        }
-        else
-            None
+        var ok = true
 
-    private def collectMatches[T](seq: Seq[T], tryMatch: Seq[T] ⇒ 
Option[Seq[T]]): Seq[Seq[T]] = {
-        val buf = mutable.ArrayBuffer.empty[Seq[T]]
+        for (chunk ← this if ok) {
+            val res = sen.filter(tok ⇒ !flattenBuf.contains(tok) && 
isMatch(tok, chunk))
 
-        var ok = true
-        var arg = seq
-
-        while (ok) {
-            tryMatch(arg) match {
-                case Some(ts) ⇒
-                    buf += ts
-                    arg = arg.filter(t ⇒ !ts.contains(t))
-                case None ⇒ ok = false
+            if (res.nonEmpty) {
+                buf += res.toList
+                flattenBuf ++= res
             }
+            else
+                ok = false
         }
 
-        buf
-    }
-
-    /**
-      *
-      * @param sen
-      * @return
-      */
-    def trySparseMatch(sen: NCNlpSentenceTokenBuffer): 
Seq[Seq[NCNlpSentenceToken]] = {
-        require(sen != null)
-        require(sen.nonEmpty)
-        require(this.size > 1)
+        if (ok) {
+            var variants = U.permute(buf.toList)
 
-        def trySparseMatch0(sen: Seq[NCNlpSentenceToken]): 
Option[Seq[NCNlpSentenceToken]] = {
-            var ok = true
-            val buf = mutable.ArrayBuffer.empty[NCNlpSentenceToken]
+            def isOrdered(list: List[T]): Boolean =
+                list.tail.zipWithIndex.forall { case (t, idx) ⇒ getIndex(t) > 
getIndex(list(idx)) }
 
-            if (!perm) {
-                var lastIdx = 0
-                val tokIdxs = sen.zipWithIndex.toMap
+            if (!perm)
+                variants = variants.filter(isOrdered)
 
-                for (chunk ← this if ok)
-                    sen.drop(lastIdx).find(tok ⇒ isMatch(tok, chunk)) match {
-                        case Some(tok) ⇒ buf += tok; lastIdx = tokIdxs(tok) + 1
-                        case None ⇒ ok = false
-                    }
-            }
-            else
-                for (chunk ← this if ok)
-                    sen.find(tok ⇒ !buf.contains(tok) && isMatch(tok, chunk)) 
match {
-                        case Some(tok) ⇒ buf += tok
-                        case None ⇒ ok = false
-                    }
+            variants
 
-            convertResult(ok, buf)
         }
-
-        collectMatches(sen, trySparseMatch0)
+        else
+            List.empty
     }
 
     /**
@@ -197,6 +152,36 @@ class NCProbeSynonym(
 
     /**
       *
+      * @param toks
+      * @return
+      */
+    def isMatch(toks: NCNlpSentenceTokenBuffer): Boolean = {
+        require(toks != null)
+
+        if (toks.length == length) {
+            if (isTextOnly)
+                toks.stemsHash == stemsHash && toks.stems == stems
+            else
+                toks.zip(this).sortBy(p ⇒ getSort(p._2.kind)).forall { case 
(tok, chunk) ⇒ isMatch(tok, chunk) }
+        }
+        else
+            false
+    }
+
+    /**
+      *
+      * @param sen
+      * @return
+      */
+    def trySparseMatch(sen: NCNlpSentenceTokenBuffer): 
List[List[NCNlpSentenceToken]] =
+        trySparseMatch0(
+            sen,
+            isMatch,
+            (t: NCNlpSentenceToken) ⇒ t.startCharIndex
+        )
+
+    /**
+      *
       * @param tows
       * @param req
       * @return
@@ -215,37 +200,12 @@ class NCProbeSynonym(
       * @param sen
       * @param req
       */
-    def trySparseMatch(sen: Seq[NCDslContent], req: NCRequest): 
Seq[Seq[NCDslContent]] = {
-        require(sen != null)
-        require(sen.nonEmpty)
-        require(this.size > 1)
-
-        def trySparseMatch0(sen: Seq[NCDslContent]): Option[Seq[NCDslContent]] 
= {
-            var ok = true
-            val buf = mutable.ArrayBuffer.empty[NCDslContent]
-
-            if (!perm) {
-                var lastIdx = 0
-                val tokIdxs = sen.zipWithIndex.toMap
-
-                for (chunk ← this if ok)
-                    sen.drop(lastIdx).find(tow ⇒ isMatch(tow, chunk, req)) 
match {
-                        case Some(t) ⇒ buf += t; lastIdx = tokIdxs(t) + 1
-                        case None ⇒ ok = false
-                    }
-            }
-            else
-                for (chunk ← this if ok)
-                    sen.find(tow ⇒ !buf.contains(tow) && isMatch(tow, chunk, 
req)) match {
-                        case Some(tow) ⇒ buf += tow
-                        case None ⇒ ok = false
-                    }
-
-            convertResult(ok, buf)
-        }
-
-        collectMatches(sen, trySparseMatch0)
-    }
+    def trySparseMatch(sen: Seq[NCDslContent], req: NCRequest): 
List[List[NCDslContent]] =
+        trySparseMatch0(
+            sen,
+            (t: NCDslContent, chunk: NCProbeSynonymChunk) ⇒ isMatch(t, chunk, 
req),
+            (t: NCDslContent) ⇒ if (t.isLeft) t.left.get.getStartCharIndex 
else t.right.get.startCharIndex
+        )
 
     override def toString(): String = mkString(" ")
     
@@ -358,4 +318,4 @@ object NCProbeSynonym {
         
         syn
     }
-}
+}
\ No newline at end of file
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 22a6a5b..9b98dc2 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -279,9 +279,12 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
         startScopedSpan("enrich", parent,
             "srvReqId" → ns.srvReqId,
             "mdlId" → mdl.model.getId,
-            "txt" → ns.text) { span ⇒
+            "txt" → ns.text
+        ) { span ⇒
             val cache = mutable.HashSet.empty[Seq[Int]]
             val req = NCRequestImpl(senMeta, ns.srvReqId)
+            val tokIdxs = ns.map(t ⇒ t → t.wordIndexes).toMap
+            val senHasUserTokens = ns.exists(_.isUser)
             val matches = mutable.ArrayBuffer.empty[ElementMatch]
 
             def addMatch(elm: NCElement, toks: Seq[NlpToken], syn: 
NCProbeSynonym, parts: Seq[TokenData]): Boolean = {
@@ -347,7 +350,6 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                             
}).filter(_.exists(_.isToken)).map(ComplexSeq(_)).distinct
                     ).seq
 
-            val tokIdxs = ns.map(t ⇒ t → t.wordIndexes).toMap
 
             startScopedSpan("synsProc", span,
                 "srvReqId" → ns.srvReqId,
@@ -356,7 +358,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
             ) {
                 _ ⇒
                 // 1. Simple, sparse.
-                if (!ns.exists(_.isUser))
+                if (!senHasUserTokens)
                     for ((elemId, syns) ← mdl.sparseSynonyms; syn ← syns)
                         syn.trySparseMatch(ns).foreach(toks ⇒ 
addMatch(mdl.elements(elemId), toks, syn, Seq.empty))
 
@@ -431,7 +433,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
 
                             // 3. Simple, not sparse.
                             // Optimization - plain synonyms can be used only 
on first iteration
-                            if (mdl.nonSparseSynonyms.nonEmpty && 
!ns.exists(_.isUser))
+                            if (mdl.nonSparseSynonyms.nonEmpty && 
!senHasUserTokens)
                                 fastAccess(mdl.nonSparseSynonyms, elm.getId, 
toks.length) match {
                                     case Some(h) ⇒
                                         def tryMap(synsMap: Map[String, 
NCProbeSynonym], notFound: () ⇒ Unit): Unit =
@@ -464,7 +466,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                                     case None ⇒ // No-op.
                                 }
 
-                            if (mdl.nonSparseSynonymsDsl.nonEmpty) {
+                            if (mdl.nonSparseSynonymsDsl.nonEmpty)
                                 // 4. DSL, non sparse.
                                 for (
                                     (len, seq) ← dslCombs;
@@ -474,7 +476,6 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                                 )
                                     if (syn.isMatch(data, req))
                                         setFound(elm, toks, syn, 
getParts(comb, syn))
-                            }
                         }
                     }
                 }
@@ -492,14 +493,8 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                     flatMap(m ⇒ m.tokens.map(_ → m)).
                     groupBy { case (t, m) ⇒ (m.element.getId, m.length, t) }.
                     flatMap { case (_, seq) ⇒
-                        def perm[T](list: List[List[T]]): List[List[T]] =
-                            list match {
-                                case Nil ⇒ List(Nil)
-                                case head :: tail ⇒ for (h ← head; t ← 
perm(tail)) yield h :: t
-                            }
-
                         // Optimization by sparsity sum for each tokens set 
for one element found with same tokens count.
-                        perm(
+                        U.permute(
                             seq.groupBy { case (tok, _) ⇒ tok }.
                                 map { case (_, seq) ⇒ seq.map { case (_, m) ⇒ 
m }.toList }.toList
                         ).minBy(_.map(_.sparsity).sum)

[incubator-nlpcraft] branch NLPCRAFT-287 updated: WIP.

Reply via email to