[incubator-nlpcraft] branch NLPCRAFT-443-1 updated: WIP.

sergeykamov Wed, 22 Sep 2021 06:46:52 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-443-1
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-443-1 by this push:
     new 72dc24f  WIP.
72dc24f is described below

commit 72dc24fc345f28d2bc73401c6881bcae0bcb34b2
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Sep 22 16:46:41 2021 +0300

    WIP.
---
 .../org/apache/nlpcraft/probe/NCProbeBoot.scala    |   3 +-
 .../nlpcraft/probe/mgrs/NCProbeSynonym.scala       | 205 -------------
 .../nlpcraft/probe/mgrs/NCProbeVariants.scala      |  31 +-
 .../probe/mgrs/nlp/NCProbeEnrichmentManager.scala  |   5 +-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala |  10 +-
 .../probe/mgrs/sentence/NCSentenceManager.scala    |  46 +--
 .../probe/mgrs/sentence/NCSynonymsManager.scala    | 333 +++++++++++++++++++++
 7 files changed, 348 insertions(+), 285 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
index ecf7a18..4df9f53 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
@@ -49,7 +49,7 @@ import 
org.apache.nlpcraft.probe.mgrs.nlp.enrichers.sort.NCSortEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.stopword.NCStopWordEnricher
 import 
org.apache.nlpcraft.probe.mgrs.nlp.enrichers.suspicious.NCSuspiciousNounsEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.validate.NCValidateManager
-import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.sentence.{NCSentenceManager, 
NCSynonymsManager}
 
 import java.io._
 import java.util.concurrent.CompletableFuture
@@ -527,6 +527,7 @@ private [probe] object NCProbeBoot extends LazyLogging with 
NCOpenCensusTrace {
             startedMgrs += NCConnectionManager.start(span)
             startedMgrs += NCDialogFlowManager.start(span)
             startedMgrs += NCSentenceManager.start(span)
+            startedMgrs += NCSynonymsManager.start(span)
         }
     }
 
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index e324857..7eefd97 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -17,13 +17,9 @@
 
 package org.apache.nlpcraft.probe.mgrs
 
-import org.apache.nlpcraft.common.U
 import org.apache.nlpcraft.common.nlp.NCNlpSentenceToken
 import org.apache.nlpcraft.model._
-import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction}
-import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.{NCIdlContent, saveIdl}
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind._
-import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
 
 import scala.collection.mutable
 
@@ -56,199 +52,6 @@ class NCProbeSynonym(
     lazy val isValueSynonym: Boolean = value != null
     lazy val stems: String = map(_.wordStem).mkString(" ")
 
-    /**
-      *
-      * @param kind
-      * @return
-      */
-    private def getSort(kind: NCSynonymChunkKind): Int =
-        kind match {
-            case TEXT => 0
-            case IDL => 1
-            case REGEX => 2
-            case _ => throw new AssertionError(s"Unexpected kind: $kind")
-        }
-
-    /**
-      *
-      * @param tok
-      * @param chunk
-      */
-    private def isMatch(tok: NCNlpSentenceToken, chunk: NCProbeSynonymChunk): 
Boolean =
-        chunk.kind match {
-            case TEXT => chunk.wordStem == tok.stem
-            case REGEX =>
-                val regex = chunk.regex
-
-                regex.matcher(tok.origText).matches() || 
regex.matcher(tok.normText).matches()
-            case IDL => throw new AssertionError()
-            case _ => throw new AssertionError()
-        }
-
-    /**
-      *
-      * @param toks
-      * @param isMatch
-      * @param getIndex
-      * @param shouldBeNeighbors
-      * @tparam T
-      * @return
-      */
-    private def sparseMatch0[T](
-        toks: Seq[T],
-        isMatch: (T, NCProbeSynonymChunk) => Boolean,
-        getIndex: T => Int,
-        shouldBeNeighbors: Boolean
-    ): Option[Seq[T]] =
-        if (toks.size >= this.size) {
-            lazy val res = mutable.ArrayBuffer.empty[T]
-            lazy val all = mutable.HashSet.empty[T]
-
-            var state = 0
-
-            for (chunk <- this if state != -1) {
-                val seq =
-                    if (state == 0) {
-                        state = 1
-
-                        toks.filter(t => isMatch(t, chunk))
-                    }
-                    else
-                        toks.filter(t => !res.contains(t) && isMatch(t, chunk))
-
-                if (seq.nonEmpty) {
-                    val head = seq.head
-
-                    if (!permute && res.nonEmpty && getIndex(head) <= 
getIndex(res.last))
-                        state = -1
-                    else {
-                        all ++= seq
-
-                        if (all.size > this.size)
-                            state = -1
-                        else
-                            res += head
-                    }
-                }
-                else
-                    state = -1
-            }
-
-            if (state != -1 && all.size == res.size && (!shouldBeNeighbors || 
U.isIncreased(res.map(getIndex).toSeq.sorted)))
-                Some(res.toSeq)
-            else
-                None
-        }
-        else
-            None
-
-    /**
-      *
-      * @param tow
-      * @param chunk
-      * @param req
-      * @param variantsToks
-      */
-    private def isMatch(
-        tow: NCIdlContent, chunk: NCProbeSynonymChunk, req: NCRequest, 
variantsToks: Seq[Seq[NCToken]]
-    ): Boolean = {
-        def get0[T](fromToken: NCToken => T, fromWord: NCNlpSentenceToken => 
T): T =
-            if (tow.isLeft) fromToken(tow.swap.toOption.get) else 
fromWord(tow.toOption.get)
-
-        chunk.kind match {
-            case TEXT => chunk.wordStem == get0(_.stem, _.stem)
-
-            case REGEX =>
-                val r = chunk.regex
-
-                r.matcher(get0(_.origText, _.origText)).matches() || 
r.matcher(get0(_.normText, _.normText)).matches()
-
-            case IDL =>
-                val ok =
-                    variantsToks.exists(variantToks =>
-                        get0(t =>
-                            chunk.idlPred.apply(
-                                t,
-                                NCIdlContext(req = req, toks = variantToks)
-                            ).value.asInstanceOf[Boolean], _ => false
-                        )
-                    )
-
-                if (ok)
-                    saveIdl(req, tow.swap.toOption.get, chunk.idlPred)
-
-                ok
-
-            case _ => throw new AssertionError()
-        }
-    }
-
-    /**
-      *
-      * @param toks
-      */
-    def isMatch(toks: Seq[NCNlpSentenceToken]): Boolean = {
-        require(toks != null)
-        require(!sparse && !hasIdl)
-
-        if (toks.length == length) {
-            if (isTextOnly)
-                toks.zip(this).forall(p => p._1.stem == p._2.wordStem)
-            else
-                toks.zip(this).sortBy(p => getSort(p._2.kind)).forall { case 
(tok, chunk) => isMatch(tok, chunk) }
-        }
-        else
-            false
-    }
-
-    /**
-      *
-      * @param tows
-      * @param req
-      * @param variantsToks
-      * @return
-      */
-    def isMatch(tows: Seq[NCIdlContent], req: NCRequest, variantsToks: 
Seq[Seq[NCToken]]): Boolean= {
-        require(tows != null)
-
-        if (tows.length == length && tows.count(_.isLeft) >= idlChunks)
-            tows.zip(this).sortBy(p => getSort(p._2.kind)).forall {
-                case (tow, chunk) => isMatch(tow, chunk, req, variantsToks)
-            }
-        else
-            false
-    }
-
-    /**
-      *
-      * @param toks
-      */
-    def sparseMatch(toks: Seq[NCNlpSentenceToken]): 
Option[Seq[NCNlpSentenceToken]] = {
-        require(toks != null)
-        require(sparse && !hasIdl)
-
-        sparseMatch0(toks, isMatch, (t: NCNlpSentenceToken) => 
t.startCharIndex, shouldBeNeighbors = false)
-    }
-
-    /**
-      *
-      * @param tows
-      * @param req
-      * @param variantsToks
-      */
-    def sparseMatch(tows: Seq[NCIdlContent], req: NCRequest, variantsToks: 
Seq[Seq[NCToken]]): Option[Seq[NCIdlContent]] = {
-        require(tows != null)
-        require(req != null)
-        require(hasIdl)
-
-        sparseMatch0(
-            tows,
-            (t: NCIdlContent, chunk: NCProbeSynonymChunk) => isMatch(t, chunk, 
req, variantsToks),
-            (t: NCIdlContent) => if (t.isLeft) 
t.swap.toOption.get.getStartCharIndex else t.toOption.get.startCharIndex,
-            shouldBeNeighbors = !sparse
-        )
-    }
-
     override def toString(): String = mkString(" ")
 
     // Orders synonyms from least to most significant.
@@ -366,12 +169,4 @@ object NCProbeSynonym {
 
         syn
     }
-
-    /**
-      *
-      * @param req
-      * @param tok
-      * @param idlPred
-      */
-    def saveIdl(req: NCRequest, tok: NCToken, idlPred: NCIdlFunction): Unit = 
NCSentenceManager.saveIdl(req, tok, idlPred)
 }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
index 39f6969..e876065 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
@@ -22,8 +22,7 @@ import org.apache.nlpcraft.common.nlp.{NCNlpSentence => 
NlpSentence, NCNlpSenten
 import org.apache.nlpcraft.common.{NCE, TOK_META_ALIASES_KEY}
 import org.apache.nlpcraft.model.NCVariant
 import org.apache.nlpcraft.model.impl.{NCTokenImpl, NCTokenLogger, 
NCVariantImpl}
-import org.apache.nlpcraft.model.intent.NCIdlContext
-import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSynonymsManager
 
 import java.io.{Serializable => JSerializable}
 import java.util
@@ -269,33 +268,7 @@ object NCProbeVariants {
                 for ((tok, tokNlp) <- toks.zip(nlpSen) if tokNlp.isUser)
                     process(tok, tokNlp)
 
-                if (ok) {
-                    NCSentenceManager.getIdlData(srvReqId) match {
-                        case Some((req, toksData)) =>
-                            ok =
-                                toks.forall(t =>
-                                    toksData.get((t, t.getId)) match {
-                                        case Some(f) =>
-                                            val x =
-                                            f.apply(
-                                                t,
-                                                NCIdlContext(req = req, toks = 
toks.toSeq)
-                                            ).value.asInstanceOf[Boolean]
-
-
-                                            if (!x)
-                                                println("x="+x + ", t=" + t  + 
", toks=" + toks)
-                                            x
-
-
-                                        case None => true
-                                    }
-                                )
-
-                        case None =>  // No-op.
-
-                    }
-                }
+                ok = ok  && NCSynonymsManager.isStillValid(srvReqId, 
toks.toSeq)
 
                 if (ok) Some(new NCVariantImpl(toks.asJava)) else None
             })
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index 9af0c61..64049ac 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -43,7 +43,7 @@ import 
org.apache.nlpcraft.probe.mgrs.nlp.enrichers.stopword.NCStopWordEnricher
 import 
org.apache.nlpcraft.probe.mgrs.nlp.enrichers.suspicious.NCSuspiciousNounsEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl._
 import org.apache.nlpcraft.probe.mgrs.nlp.validate._
-import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.sentence.{NCSentenceManager, 
NCSynonymsManager}
 import org.apache.nlpcraft.probe.mgrs.{NCProbeMessage, NCProbeVariants}
 
 import java.io.Serializable
@@ -554,7 +554,8 @@ object NCProbeEnrichmentManager extends NCService with 
NCOpenCensusModelStats {
 
         var senVars = NCProbeVariants.convert(srvReqId, mdl, sensSeq, 
lastPhase = true)
 
-        NCSentenceManager.clearCache(srvReqId)
+        NCSentenceManager.clearRequestData(srvReqId)
+        NCSynonymsManager.clearRequestData(srvReqId)
 
         // Sentence variants can be filtered by model.
         val fltSenVars: Seq[(NCVariant, Int)] =
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 7a11806..03c5b5d 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -27,7 +27,7 @@ import 
org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
 import 
org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.NCSynonymChunkKind
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
-import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.sentence.{NCSentenceManager, 
NCSynonymsManager}
 import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants, 
NCTokenPartKey, NCProbeSynonym => Synonym}
 
 import java.io.Serializable
@@ -584,7 +584,7 @@ object NCModelEnricher extends NCProbeEnricher {
 
                                         def tryScan(syns: Seq[Synonym]): Unit =
                                             for (s <- syns if !found)
-                                                if (s.isMatch(toks)) {
+                                                if 
(NCSynonymsManager.isMatch(s, toks)) {
                                                     found = true
                                                     add("simple continuous 
scan", ns, contCache, eId, greedy, toksExt, idxs, s)
                                                 }
@@ -604,7 +604,7 @@ object NCModelEnricher extends NCProbeEnricher {
                             // 1.2 Sparse.
                             if (!found && mdl.hasSparseSynonyms)
                                 for (s <- get(mdl.sparseSynonyms, eId))
-                                    s.sparseMatch(toks) match {
+                                    NCSynonymsManager.sparseMatch(s, toks) 
match {
                                         case Some(res) =>
                                             add("simple sparse", ns, 
contCache, eId, greedy, getSparsedTokens(res, toks), idxs, s)
                                         case None => // No-op.
@@ -627,7 +627,7 @@ object NCModelEnricher extends NCProbeEnricher {
                                     if !found;
                                     data = comb.map(_.data)
                                 )
-                                    if (s.isMatch(data, req, variantsToks)) {
+                                    if (NCSynonymsManager.isMatch(s, data, 
req, variantsToks)) {
                                         val parts = toParts(mdl, ns.srvReqId, 
data, s)
 
                                         add("IDL continuous", ns, contCache, 
eId, greedy, toksExt, idxs, s, parts)
@@ -643,7 +643,7 @@ object NCModelEnricher extends NCProbeEnricher {
                                     s <- allSyns;
                                     comb <- allCombs
                                 )
-                                    s.sparseMatch(comb.map(_.data), req, 
variantsToks) match {
+                                    NCSynonymsManager.sparseMatch(s, 
comb.map(_.data), req, variantsToks) match {
                                         case Some(res) =>
                                             val typ = if (s.sparse) "IDL 
sparse" else "IDL continuous"
 
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index b0a077a..2e280ac 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -22,9 +22,9 @@ import org.apache.nlpcraft.common.nlp.NCNlpSentence.NoteLink
 import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank
 import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, 
NCNlpSentenceToken}
 import org.apache.nlpcraft.common.{NCE, NCService, U, _}
-import org.apache.nlpcraft.model.intent.NCIdlFunction
-import org.apache.nlpcraft.model.{NCModel, NCRequest, NCToken}
+import org.apache.nlpcraft.model.NCModel
 import org.apache.nlpcraft.probe.mgrs.NCTokenPartKey
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSynonymsManager.{idlCache, 
reqCache}
 
 import java.io.{Serializable => JSerializable}
 import java.util
@@ -44,14 +44,6 @@ object NCSentenceManager extends NCService {
     type CacheValue = Seq[Seq[NCNlpSentenceNote]]
     private val combCache = mutable.HashMap.empty[String, 
mutable.HashMap[CacheKey, CacheValue]]
 
-    type IdlCacheKey = (NCToken, String)
-    private val reqCache = mutable.HashMap.empty[String, NCRequest]
-    private val idlCache = mutable.HashMap.empty[String, 
mutable.HashMap[IdlCacheKey, NCIdlFunction]]
-
-    /**
-      *
-      * @param notes
-      */
     def getLinks(notes: Seq[NCNlpSentenceNote]): Seq[NoteLink] = {
         val noteLinks = mutable.ArrayBuffer.empty[NoteLink]
 
@@ -822,37 +814,5 @@ object NCSentenceManager extends NCService {
       *
       * @param srvReqId
       */
-    def clearCache(srvReqId: String): Unit = {
-        combCache -= srvReqId
-        reqCache -= srvReqId
-        idlCache -= srvReqId
-    }
-
-    def saveIdl(req: NCRequest, tok: NCToken, idlPred: NCIdlFunction): Unit = {
-        val srvReqId = req.getServerRequestId
-
-        reqCache += srvReqId -> req
-
-        val idlCacheReq: mutable.Map[IdlCacheKey, NCIdlFunction] =
-            idlCache.get(srvReqId) match {
-                case Some(m) => m
-                case None =>
-                    val m  = mutable.HashMap.empty[IdlCacheKey, NCIdlFunction]
-
-                    idlCache += srvReqId -> m
-
-                    m
-            }
-
-        idlCacheReq += (tok, tok.getId) -> idlPred
-    }
-
-    def getIdlData(srvReqId: String) : Option[(NCRequest, Map[IdlCacheKey, 
NCIdlFunction])] = {
-        val reqData = reqCache.get(srvReqId)
-        val idlData = idlCache.get(srvReqId)
-
-        require(reqData.isDefined && idlData.isDefined || reqData.isEmpty && 
idlData.isEmpty)
-
-        if (reqData.isDefined) Some((reqData.get, idlData.get.toMap)) else None
-    }
+    def clearRequestData(srvReqId: String): Unit = combCache -= srvReqId
 }
\ No newline at end of file
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSynonymsManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSynonymsManager.scala
new file mode 100644
index 0000000..e6258ec
--- /dev/null
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSynonymsManager.scala
@@ -0,0 +1,333 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.sentence
+
+import io.opencensus.trace.Span
+import org.apache.nlpcraft.common.nlp.NCNlpSentenceToken
+import org.apache.nlpcraft.common.{NCService, U}
+import org.apache.nlpcraft.model._
+import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction}
+import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
+import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{IDL, 
NCSynonymChunkKind, REGEX, TEXT}
+import org.apache.nlpcraft.probe.mgrs.{NCProbeSynonymChunk, NCProbeSynonym => 
Synonym}
+
+import scala.collection.mutable
+
+/**
+  *
+  */
+object NCSynonymsManager extends NCService {
+    type IdlCacheKey = (NCToken, String)
+
+    private val reqCache = mutable.HashMap.empty[String, NCRequest]
+    private val idlCache = mutable.HashMap.empty[String, 
mutable.HashMap[IdlCacheKey, NCIdlFunction]]
+
+    override def start(parent: Span): NCService = {
+        ackStarting()
+
+        ackStarted()
+    }
+
+    override def stop(parent: Span): Unit = {
+        ackStopping()
+
+        ackStopped()
+    }
+
+    /**
+      *
+      * @param tok
+      * @param chunk
+      */
+    private def isMatch(tok: NCNlpSentenceToken, chunk: NCProbeSynonymChunk): 
Boolean =
+        chunk.kind match {
+            case TEXT => chunk.wordStem == tok.stem
+            case REGEX =>
+                val regex = chunk.regex
+
+                regex.matcher(tok.origText).matches() || 
regex.matcher(tok.normText).matches()
+            case IDL => throw new AssertionError()
+            case _ => throw new AssertionError()
+        }
+
+    /**
+      *
+      * @param kind
+      */
+    private def getSort(kind: NCSynonymChunkKind): Int =
+        kind match {
+            case TEXT => 0
+            case IDL => 1
+            case REGEX => 2
+            case _ => throw new AssertionError(s"Unexpected kind: $kind")
+        }
+
+    /**
+      *
+      * @param s
+      * @param toks
+      * @param isMatch
+      * @param getIndex
+      * @param shouldBeNeighbors
+      * @tparam T
+      */
+    private def sparseMatch0[T](
+        s: Synonym,
+        toks: Seq[T],
+        isMatch: (T, NCProbeSynonymChunk) => Boolean,
+        getIndex: T => Int,
+        shouldBeNeighbors: Boolean
+    ): Option[Seq[T]] =
+        if (toks.size >= s.size) {
+            lazy val res = mutable.ArrayBuffer.empty[T]
+            lazy val all = mutable.HashSet.empty[T]
+
+            var state = 0
+
+            for (chunk <- s if state != -1) {
+                val seq =
+                    if (state == 0) {
+                        state = 1
+
+                        toks.filter(t => isMatch(t, chunk))
+                    }
+                    else
+                        toks.filter(t => !res.contains(t) && isMatch(t, chunk))
+
+                if (seq.nonEmpty) {
+                    val head = seq.head
+
+                    if (!s.permute && res.nonEmpty && getIndex(head) <= 
getIndex(res.last))
+                        state = -1
+                    else {
+                        all ++= seq
+
+                        if (all.size > s.size)
+                            state = -1
+                        else
+                            res += head
+                    }
+                }
+                else
+                    state = -1
+            }
+
+            if (state != -1 && all.size == res.size && (!shouldBeNeighbors || 
U.isIncreased(res.map(getIndex).toSeq.sorted)))
+                Some(res.toSeq)
+            else
+                None
+        }
+        else
+            None
+
+    /**
+      *
+      * @param req
+      * @param tok
+      * @param idlPred
+      */
+    private def savePredicate(req: NCRequest, tok: NCToken, idlPred: 
NCIdlFunction): Unit = {
+        val srvReqId = req.getServerRequestId
+
+        reqCache += srvReqId -> req
+
+        val idlCacheReq: mutable.Map[IdlCacheKey, NCIdlFunction] =
+            idlCache.get(srvReqId) match {
+                case Some(m) => m
+                case None =>
+                    val m  = mutable.HashMap.empty[IdlCacheKey, NCIdlFunction]
+
+                    idlCache += srvReqId -> m
+
+                    m
+            }
+
+        idlCacheReq += (tok, tok.getId) -> idlPred
+    }
+
+    /**
+      *
+      * @param tow
+      * @param chunk
+      * @param req
+      * @param variantsToks
+      */
+    private def isMatch(
+        tow: NCIdlContent, chunk: NCProbeSynonymChunk, req: NCRequest, 
variantsToks: Seq[Seq[NCToken]]
+    ): Boolean = {
+        def get0[T](fromToken: NCToken => T, fromWord: NCNlpSentenceToken => 
T): T =
+            if (tow.isLeft) fromToken(tow.swap.toOption.get) else 
fromWord(tow.toOption.get)
+
+        chunk.kind match {
+            case TEXT => chunk.wordStem == get0(_.stem, _.stem)
+
+            case REGEX =>
+                val r = chunk.regex
+
+                r.matcher(get0(_.origText, _.origText)).matches() || 
r.matcher(get0(_.normText, _.normText)).matches()
+
+            case IDL =>
+                val ok =
+                    variantsToks.exists(variantToks =>
+                        get0(t =>
+                            chunk.idlPred.apply(
+                                t,
+                                NCIdlContext(req = req, toks = variantToks)
+                            ).value.asInstanceOf[Boolean], _ => false
+                        )
+                    )
+
+                if (ok)
+                    savePredicate(req, tow.swap.toOption.get, chunk.idlPred)
+
+                ok
+
+            case _ => throw new AssertionError()
+        }
+    }
+
+    /**
+      *
+      * @param s
+      * @param toks
+      */
+    def isMatch(s: Synonym, toks: Seq[NCNlpSentenceToken]): Boolean = {
+        require(toks != null)
+        require(!s.sparse && !s.hasIdl)
+
+        if (toks.length == s.length) {
+            if (s.isTextOnly)
+                toks.zip(s).forall(p => p._1.stem == p._2.wordStem)
+            else
+                toks.zip(s).sortBy(p => getSort(p._2.kind)).forall { case 
(tok, chunk) => isMatch(tok, chunk) }
+        }
+        else
+            false
+    }
+
+    /**
+      *
+      * @param s
+      * @param tows
+      * @param req
+      * @param variantsToks
+      */
+    def isMatch(s: Synonym, tows: Seq[NCIdlContent], req: NCRequest, 
variantsToks: Seq[Seq[NCToken]]): Boolean= {
+        require(tows != null)
+
+        if (tows.length == s.length && tows.count(_.isLeft) >= s.idlChunks)
+            tows.zip(s).sortBy(p => getSort(p._2.kind)).forall {
+                case (tow, chunk) => isMatch(tow, chunk, req, variantsToks)
+            }
+        else
+            false
+    }
+
+    /**
+      *
+      * @param s
+      * @param toks
+      */
+    def sparseMatch(s: Synonym, toks: Seq[NCNlpSentenceToken]): 
Option[Seq[NCNlpSentenceToken]] = {
+        require(toks != null)
+        require(s.sparse && !s.hasIdl)
+
+        sparseMatch0(s, toks, isMatch, (t: NCNlpSentenceToken) => 
t.startCharIndex, shouldBeNeighbors = false)
+    }
+
+    /**
+      *
+      * @param s
+      * @param tows
+      * @param req
+      * @param variantsToks
+      */
+    def sparseMatch(s: Synonym, tows: Seq[NCIdlContent], req: NCRequest, 
variantsToks: Seq[Seq[NCToken]]): Option[Seq[NCIdlContent]] = {
+        require(tows != null)
+        require(req != null)
+        require(s.hasIdl)
+
+        sparseMatch0(
+            s,
+            tows,
+            (t: NCIdlContent, chunk: NCProbeSynonymChunk) => isMatch(t, chunk, 
req, variantsToks),
+            (t: NCIdlContent) => if (t.isLeft) 
t.swap.toOption.get.getStartCharIndex else t.toOption.get.startCharIndex,
+            shouldBeNeighbors = !s.sparse
+        )
+    }
+
+    /**
+      *
+      * @param srvReqId
+      */
+    def getPredicate(srvReqId: String) : Option[(NCRequest, Map[IdlCacheKey, 
NCIdlFunction])] = {
+        val reqData = reqCache.get(srvReqId)
+        val idlData = idlCache.get(srvReqId)
+
+        require(reqData.isDefined && idlData.isDefined || reqData.isEmpty && 
idlData.isEmpty)
+
+        if (reqData.isDefined) Some((reqData.get, idlData.get.toMap)) else None
+    }
+
+    /**
+      *
+      * @param srvReqId
+      * @param toks
+      * @return
+      */
+    def isStillValid(srvReqId: String, toks: Seq[NCToken]): Boolean = {
+        val reqData = reqCache.get(srvReqId)
+        val idlData = idlCache.get(srvReqId)
+
+        require(reqData.isDefined && idlData.isDefined || reqData.isEmpty && 
idlData.isEmpty)
+
+        if (reqData.isDefined) {
+            val req = reqData.get
+            val idl = idlData.get.toMap
+
+            toks.forall(t =>
+                idl.get((t, t.getId)) match {
+                    case Some(f) =>
+                        val x =
+                            f.apply(
+                                t, NCIdlContext(req = req, toks = toks)
+                            ).value.asInstanceOf[Boolean]
+
+
+                        if (!x)
+                            println("x="+x + ", t=" + t  + ", toks=" + toks)
+
+                        x
+
+                    case None => true
+                }
+            )
+        }
+        else
+            true
+    }
+
+    /**
+      *
+      * @param srvReqId
+      */
+    def clearRequestData(srvReqId: String): Unit = {
+        reqCache -= srvReqId
+        idlCache -= srvReqId
+    }
+}

[incubator-nlpcraft] branch NLPCRAFT-443-1 updated: WIP.

Reply via email to