[incubator-nlpcraft] 04/05: WIP.

sergeykamov Tue, 06 Apr 2021 02:15:18 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


commit 66593155e7f4fbf96aa694afa6a0675096ce3166
Author: Sergey Kamov <[email protected]>
AuthorDate: Tue Apr 6 12:14:45 2021 +0300

    WIP.
---
 .../nlpcraft/probe/mgrs/NCProbeSynonym.scala       |  3 +-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 61 +++++++++++-----------
 .../probe/mgrs/sentence/NCSentenceManager.scala    | 13 +----
 3 files changed, 34 insertions(+), 43 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index 95c526f..c54b347 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -114,11 +114,12 @@ class NCProbeSynonym(
                     if (!perm && res.nonEmpty && getIndex(head) <= 
getIndex(res.last))
                         state = -1
                     else {
-                        res += head
                         all ++= seq
 
                         if (all.size > res.size)
                             state = -1
+                        else
+                            res += head
                     }
                 }
                 else
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index f9acd95..30f5084 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -19,14 +19,14 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
 
 import io.opencensus.trace.Span
 import org.apache.nlpcraft.common._
-import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken ⇒ NlpToken, _}
+import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken => NlpToken, _}
 import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCDslContent
 import 
org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind, 
TEXT}
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
 import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
-import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeSynonym, 
NCProbeVariants}
+import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeSynonym ⇒ Synonym, 
NCProbeVariants}
 
 import java.io.Serializable
 import java.util
@@ -39,8 +39,9 @@ import scala.collection.{Map, Seq, mutable}
   * Model elements enricher.
   */
 object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
-    type TokenData = (NCToken, NCSynonymChunkKind)
-    
+    type TokType = (NCToken, NCSynonymChunkKind)
+    type Cache = mutable.Map[String, ArrayBuffer[Seq[Int]]]
+
     object Complex {
         def apply(t: NCToken): Complex =
             Complex(
@@ -128,8 +129,8 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
     case class ElementMatch(
         element: NCElement,
         tokens: Seq[NlpToken],
-        synonym: NCProbeSynonym,
-        parts: Seq[TokenData]
+        synonym: Synonym,
+        parts: Seq[TokType]
     ) extends Ordered[ElementMatch] {
         // Tokens sparsity.
         lazy val sparsity = U.calcSparsity(tokens.map(_.index))
@@ -197,9 +198,9 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
         elem: NCElement,
         toks: Seq[NlpToken],
         direct: Boolean,
-        syn: Option[NCProbeSynonym],
+        syn: Option[Synonym],
         metaOpt: Option[Map[String, Object]],
-        parts: Seq[TokenData]
+        parts: Seq[TokType]
     ): Unit = {
         val params = mutable.ArrayBuffer.empty[(String, AnyRef)]
 
@@ -279,7 +280,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
       * @param comb
       * @param syn
       */
-    private def getPartsComplex(comb: Seq[Complex], syn: NCProbeSynonym): 
Seq[TokenData] =
+    private def getPartsComplex(comb: Seq[Complex], syn: Synonym): 
Seq[TokType] =
         comb.zip(syn.map(_.kind)).flatMap {
             case (complex, kind) ⇒ if (complex.isToken) Some(complex.token → 
kind)
             else None
@@ -290,19 +291,18 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
       * @param comb
       * @param syn
       */
-    private def getPartsContent(comb: Seq[NCDslContent], syn: NCProbeSynonym): 
Seq[TokenData] =
+    private def toParts(comb: Seq[NCDslContent], syn: Synonym): Seq[TokType] =
         comb.zip(syn.map(_.kind)).flatMap {
-            case (complex, kind) ⇒ if (complex.isLeft) Some(complex.left.get → 
kind)
-            else None
+            case (complex, kind) ⇒ if (complex.isLeft) Some(complex.left.get → 
kind) else None
         }
 
-    private def mkCache(): mutable.Map[String, ArrayBuffer[Seq[Int]]] =
+    private def mkCache(): Cache =
         mutable.HashMap.empty[
             String,
             mutable.ArrayBuffer[Seq[Int]]
         ].withDefault(_ ⇒ mutable.ArrayBuffer.empty[Seq[Int]])
 
-    private def convert(tows: Seq[NCDslContent], ns: NCNlpSentence): 
Seq[NlpToken] =
+    private def toNlpTokens(tows: Seq[NCDslContent], ns: NCNlpSentence): 
Seq[NlpToken] =
         (
             tows.filter(_.isRight).map(_.right.get) ++
                 tows.filter(_.isLeft).map(_.left.get).
@@ -325,7 +325,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
             val cacheSparse = mkCache()
             val cacheNotSparse = mkCache()
 
-            def addMatch(elm: NCElement, toks: Seq[NlpToken], syn: 
NCProbeSynonym, parts: Seq[TokenData]): Unit = {
+            def addMatch(elm: NCElement, toks: Seq[NlpToken], syn: Synonym, 
parts: Seq[TokType]): Unit = {
                 val toksSet = toks.toSet
 
                 // TODO:
@@ -419,35 +419,36 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                     lazy val tokStems = toks.map(_.stem).mkString(" ")
 
                     // Attempt to match each element.
-                    for (elm ← mdl.elements.values) {
-                        val elemId = elm.getId
-                        val sparseEnabled = 
!cacheSparse(elemId).exists(_.containsSlice(indexes))
-                        val notSparseEnabled = 
!cacheNotSparse(elemId).exists(_.containsSlice(indexes))
+                    for (
+                        elm ← mdl.elements.values;
+                        elemId = elm.getId;
+                        sparseEnabled = 
!cacheSparse(elemId).exists(_.containsSlice(indexes));
+                        notSparseEnabled = 
!cacheNotSparse(elemId).exists(_.containsSlice(indexes))
+
+                        if !alreadyMarked(toks, elm.getId) && (sparseEnabled 
|| notSparseEnabled)
+                    ) {
                         var found = false
 
-                        def addSparse(res: Seq[NlpToken], syn: NCProbeSynonym, 
parts: Seq[TokenData]): Unit = {
-                            addMatch(elm, res, syn, parts)
-                            cacheSparse(elemId) += indexes
+                        def add(cache: Cache, res: Seq[NlpToken], s: Synonym, 
parts: Seq[TokType]): Unit = {
+                            addMatch(elm, res, s, parts)
+                            cache(elemId) += indexes
                             found = true
                         }
 
-                        def addNotSparse(syn: NCProbeSynonym, parts: 
Seq[TokenData]): Unit = {
-                            addMatch(elm, toks, syn, parts)
-                            cacheNotSparse(elemId) += indexes
-                            found = true
-                        }
+                        def addSparse(res: Seq[NlpToken], s: Synonym, parts: 
Seq[TokType]): Unit = add(cacheSparse, res, s, parts)
+                        def addNotSparse(s: Synonym, parts: Seq[TokType]): 
Unit = add(cacheNotSparse, toks,  s, parts)
 
                         // 1. Simple, not sparse.
                         if (firstPhase && notSparseEnabled && !found)
                             fastAccess(mdl.nonSparseSynonyms, elemId, 
toks.length) match {
                                 case Some(h) ⇒
-                                    def tryMap(synsMap: Map[String, 
NCProbeSynonym], notFound: () ⇒ Unit): Unit =
+                                    def tryMap(synsMap: Map[String, Synonym], 
notFound: () ⇒ Unit): Unit =
                                         synsMap.get(tokStems) match {
                                             case Some(syn) ⇒ addNotSparse(syn, 
Seq.empty)
                                             case None ⇒ notFound()
                                         }
 
-                                    def tryScan(synsSeq: Seq[NCProbeSynonym]): 
Unit =
+                                    def tryScan(synsSeq: Seq[Synonym]): Unit =
                                         for (syn ← synsSeq if !found)
                                             if (syn.isMatch(toks))
                                                 addNotSparse(syn, Seq.empty)
@@ -495,7 +496,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                                 comb ← seq if !found
                             ) {
                                 syn.trySparseMatch(comb.map(_.data), req) 
match {
-                                    case Some(towsRes) ⇒ 
addSparse(convert(towsRes, ns), syn, getPartsContent(towsRes, syn))
+                                    case Some(towsRes) ⇒ 
addSparse(toNlpTokens(towsRes, ns), syn, toParts(towsRes, syn))
                                     case None ⇒ // No-op.
                                 }
                             }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index a938f59..fb676d0 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -37,8 +37,6 @@ import scala.language.implicitConversions
 object NCSentenceManager extends NCService {
     @volatile private var pool: java.util.concurrent.ForkJoinPool = _
 
-    private val cache = U.mkLRUMap[Seq[Set[NCNlpSentenceNote]], 
util.List[util.List[NCNlpSentenceNote]]]("sentence-combinations-cache", 500)
-
     case class PartKey(id: String, start: Int, end: Int) {
         require(start <= end)
 
@@ -686,17 +684,8 @@ object NCSentenceManager extends NCService {
                         map { case (_, seq) ⇒ seq.map { case (_, note) ⇒ note 
}.toSet }.
                         toSeq.sortBy(-_.size)
 
-
-                var combs: JList[JList[NCNlpSentenceNote]] = 
cache.get(toksByIdx)
-
-                if (combs == null) {
-                    combs = 
NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava, pool)
-
-                    cache.put(toksByIdx, combs)
-                }
-
                 val seqSens =
-                    combs.asScala.map(_.asScala).
+                    
NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava, 
pool).asScala.map(_.asScala).
                         par.
                         flatMap(delComb ⇒ {
                             val nsClone = sen.clone()

[incubator-nlpcraft] 04/05: WIP.

Reply via email to