This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-443-1
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-443-1 by this push:
new 72dc24f WIP.
72dc24f is described below
commit 72dc24fc345f28d2bc73401c6881bcae0bcb34b2
Author: Sergey Kamov <[email protected]>
AuthorDate: Wed Sep 22 16:46:41 2021 +0300
WIP.
---
.../org/apache/nlpcraft/probe/NCProbeBoot.scala | 3 +-
.../nlpcraft/probe/mgrs/NCProbeSynonym.scala | 205 -------------
.../nlpcraft/probe/mgrs/NCProbeVariants.scala | 31 +-
.../probe/mgrs/nlp/NCProbeEnrichmentManager.scala | 5 +-
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 10 +-
.../probe/mgrs/sentence/NCSentenceManager.scala | 46 +--
.../probe/mgrs/sentence/NCSynonymsManager.scala | 333 +++++++++++++++++++++
7 files changed, 348 insertions(+), 285 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
index ecf7a18..4df9f53 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala
@@ -49,7 +49,7 @@ import
org.apache.nlpcraft.probe.mgrs.nlp.enrichers.sort.NCSortEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.stopword.NCStopWordEnricher
import
org.apache.nlpcraft.probe.mgrs.nlp.enrichers.suspicious.NCSuspiciousNounsEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.validate.NCValidateManager
-import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.sentence.{NCSentenceManager,
NCSynonymsManager}
import java.io._
import java.util.concurrent.CompletableFuture
@@ -527,6 +527,7 @@ private [probe] object NCProbeBoot extends LazyLogging with
NCOpenCensusTrace {
startedMgrs += NCConnectionManager.start(span)
startedMgrs += NCDialogFlowManager.start(span)
startedMgrs += NCSentenceManager.start(span)
+ startedMgrs += NCSynonymsManager.start(span)
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index e324857..7eefd97 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -17,13 +17,9 @@
package org.apache.nlpcraft.probe.mgrs
-import org.apache.nlpcraft.common.U
import org.apache.nlpcraft.common.nlp.NCNlpSentenceToken
import org.apache.nlpcraft.model._
-import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction}
-import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.{NCIdlContent, saveIdl}
import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind._
-import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
import scala.collection.mutable
@@ -56,199 +52,6 @@ class NCProbeSynonym(
lazy val isValueSynonym: Boolean = value != null
lazy val stems: String = map(_.wordStem).mkString(" ")
- /**
- *
- * @param kind
- * @return
- */
- private def getSort(kind: NCSynonymChunkKind): Int =
- kind match {
- case TEXT => 0
- case IDL => 1
- case REGEX => 2
- case _ => throw new AssertionError(s"Unexpected kind: $kind")
- }
-
- /**
- *
- * @param tok
- * @param chunk
- */
- private def isMatch(tok: NCNlpSentenceToken, chunk: NCProbeSynonymChunk):
Boolean =
- chunk.kind match {
- case TEXT => chunk.wordStem == tok.stem
- case REGEX =>
- val regex = chunk.regex
-
- regex.matcher(tok.origText).matches() ||
regex.matcher(tok.normText).matches()
- case IDL => throw new AssertionError()
- case _ => throw new AssertionError()
- }
-
- /**
- *
- * @param toks
- * @param isMatch
- * @param getIndex
- * @param shouldBeNeighbors
- * @tparam T
- * @return
- */
- private def sparseMatch0[T](
- toks: Seq[T],
- isMatch: (T, NCProbeSynonymChunk) => Boolean,
- getIndex: T => Int,
- shouldBeNeighbors: Boolean
- ): Option[Seq[T]] =
- if (toks.size >= this.size) {
- lazy val res = mutable.ArrayBuffer.empty[T]
- lazy val all = mutable.HashSet.empty[T]
-
- var state = 0
-
- for (chunk <- this if state != -1) {
- val seq =
- if (state == 0) {
- state = 1
-
- toks.filter(t => isMatch(t, chunk))
- }
- else
- toks.filter(t => !res.contains(t) && isMatch(t, chunk))
-
- if (seq.nonEmpty) {
- val head = seq.head
-
- if (!permute && res.nonEmpty && getIndex(head) <=
getIndex(res.last))
- state = -1
- else {
- all ++= seq
-
- if (all.size > this.size)
- state = -1
- else
- res += head
- }
- }
- else
- state = -1
- }
-
- if (state != -1 && all.size == res.size && (!shouldBeNeighbors ||
U.isIncreased(res.map(getIndex).toSeq.sorted)))
- Some(res.toSeq)
- else
- None
- }
- else
- None
-
- /**
- *
- * @param tow
- * @param chunk
- * @param req
- * @param variantsToks
- */
- private def isMatch(
- tow: NCIdlContent, chunk: NCProbeSynonymChunk, req: NCRequest,
variantsToks: Seq[Seq[NCToken]]
- ): Boolean = {
- def get0[T](fromToken: NCToken => T, fromWord: NCNlpSentenceToken =>
T): T =
- if (tow.isLeft) fromToken(tow.swap.toOption.get) else
fromWord(tow.toOption.get)
-
- chunk.kind match {
- case TEXT => chunk.wordStem == get0(_.stem, _.stem)
-
- case REGEX =>
- val r = chunk.regex
-
- r.matcher(get0(_.origText, _.origText)).matches() ||
r.matcher(get0(_.normText, _.normText)).matches()
-
- case IDL =>
- val ok =
- variantsToks.exists(variantToks =>
- get0(t =>
- chunk.idlPred.apply(
- t,
- NCIdlContext(req = req, toks = variantToks)
- ).value.asInstanceOf[Boolean], _ => false
- )
- )
-
- if (ok)
- saveIdl(req, tow.swap.toOption.get, chunk.idlPred)
-
- ok
-
- case _ => throw new AssertionError()
- }
- }
-
- /**
- *
- * @param toks
- */
- def isMatch(toks: Seq[NCNlpSentenceToken]): Boolean = {
- require(toks != null)
- require(!sparse && !hasIdl)
-
- if (toks.length == length) {
- if (isTextOnly)
- toks.zip(this).forall(p => p._1.stem == p._2.wordStem)
- else
- toks.zip(this).sortBy(p => getSort(p._2.kind)).forall { case
(tok, chunk) => isMatch(tok, chunk) }
- }
- else
- false
- }
-
- /**
- *
- * @param tows
- * @param req
- * @param variantsToks
- * @return
- */
- def isMatch(tows: Seq[NCIdlContent], req: NCRequest, variantsToks:
Seq[Seq[NCToken]]): Boolean= {
- require(tows != null)
-
- if (tows.length == length && tows.count(_.isLeft) >= idlChunks)
- tows.zip(this).sortBy(p => getSort(p._2.kind)).forall {
- case (tow, chunk) => isMatch(tow, chunk, req, variantsToks)
- }
- else
- false
- }
-
- /**
- *
- * @param toks
- */
- def sparseMatch(toks: Seq[NCNlpSentenceToken]):
Option[Seq[NCNlpSentenceToken]] = {
- require(toks != null)
- require(sparse && !hasIdl)
-
- sparseMatch0(toks, isMatch, (t: NCNlpSentenceToken) =>
t.startCharIndex, shouldBeNeighbors = false)
- }
-
- /**
- *
- * @param tows
- * @param req
- * @param variantsToks
- */
- def sparseMatch(tows: Seq[NCIdlContent], req: NCRequest, variantsToks:
Seq[Seq[NCToken]]): Option[Seq[NCIdlContent]] = {
- require(tows != null)
- require(req != null)
- require(hasIdl)
-
- sparseMatch0(
- tows,
- (t: NCIdlContent, chunk: NCProbeSynonymChunk) => isMatch(t, chunk,
req, variantsToks),
- (t: NCIdlContent) => if (t.isLeft)
t.swap.toOption.get.getStartCharIndex else t.toOption.get.startCharIndex,
- shouldBeNeighbors = !sparse
- )
- }
-
override def toString(): String = mkString(" ")
// Orders synonyms from least to most significant.
@@ -366,12 +169,4 @@ object NCProbeSynonym {
syn
}
-
- /**
- *
- * @param req
- * @param tok
- * @param idlPred
- */
- def saveIdl(req: NCRequest, tok: NCToken, idlPred: NCIdlFunction): Unit =
NCSentenceManager.saveIdl(req, tok, idlPred)
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
index 39f6969..e876065 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala
@@ -22,8 +22,7 @@ import org.apache.nlpcraft.common.nlp.{NCNlpSentence =>
NlpSentence, NCNlpSenten
import org.apache.nlpcraft.common.{NCE, TOK_META_ALIASES_KEY}
import org.apache.nlpcraft.model.NCVariant
import org.apache.nlpcraft.model.impl.{NCTokenImpl, NCTokenLogger,
NCVariantImpl}
-import org.apache.nlpcraft.model.intent.NCIdlContext
-import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSynonymsManager
import java.io.{Serializable => JSerializable}
import java.util
@@ -269,33 +268,7 @@ object NCProbeVariants {
for ((tok, tokNlp) <- toks.zip(nlpSen) if tokNlp.isUser)
process(tok, tokNlp)
- if (ok) {
- NCSentenceManager.getIdlData(srvReqId) match {
- case Some((req, toksData)) =>
- ok =
- toks.forall(t =>
- toksData.get((t, t.getId)) match {
- case Some(f) =>
- val x =
- f.apply(
- t,
- NCIdlContext(req = req, toks =
toks.toSeq)
- ).value.asInstanceOf[Boolean]
-
-
- if (!x)
- println("x="+x + ", t=" + t +
", toks=" + toks)
- x
-
-
- case None => true
- }
- )
-
- case None => // No-op.
-
- }
- }
+ ok = ok && NCSynonymsManager.isStillValid(srvReqId,
toks.toSeq)
if (ok) Some(new NCVariantImpl(toks.asJava)) else None
})
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index 9af0c61..64049ac 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -43,7 +43,7 @@ import
org.apache.nlpcraft.probe.mgrs.nlp.enrichers.stopword.NCStopWordEnricher
import
org.apache.nlpcraft.probe.mgrs.nlp.enrichers.suspicious.NCSuspiciousNounsEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.impl._
import org.apache.nlpcraft.probe.mgrs.nlp.validate._
-import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.sentence.{NCSentenceManager,
NCSynonymsManager}
import org.apache.nlpcraft.probe.mgrs.{NCProbeMessage, NCProbeVariants}
import java.io.Serializable
@@ -554,7 +554,8 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
var senVars = NCProbeVariants.convert(srvReqId, mdl, sensSeq,
lastPhase = true)
- NCSentenceManager.clearCache(srvReqId)
+ NCSentenceManager.clearRequestData(srvReqId)
+ NCSynonymsManager.clearRequestData(srvReqId)
// Sentence variants can be filtered by model.
val fltSenVars: Seq[(NCVariant, Int)] =
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 7a11806..03c5b5d 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -27,7 +27,7 @@ import
org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
import
org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.NCSynonymChunkKind
import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
-import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
+import org.apache.nlpcraft.probe.mgrs.sentence.{NCSentenceManager,
NCSynonymsManager}
import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants,
NCTokenPartKey, NCProbeSynonym => Synonym}
import java.io.Serializable
@@ -584,7 +584,7 @@ object NCModelEnricher extends NCProbeEnricher {
def tryScan(syns: Seq[Synonym]): Unit =
for (s <- syns if !found)
- if (s.isMatch(toks)) {
+ if
(NCSynonymsManager.isMatch(s, toks)) {
found = true
add("simple continuous
scan", ns, contCache, eId, greedy, toksExt, idxs, s)
}
@@ -604,7 +604,7 @@ object NCModelEnricher extends NCProbeEnricher {
// 1.2 Sparse.
if (!found && mdl.hasSparseSynonyms)
for (s <- get(mdl.sparseSynonyms, eId))
- s.sparseMatch(toks) match {
+ NCSynonymsManager.sparseMatch(s, toks)
match {
case Some(res) =>
add("simple sparse", ns,
contCache, eId, greedy, getSparsedTokens(res, toks), idxs, s)
case None => // No-op.
@@ -627,7 +627,7 @@ object NCModelEnricher extends NCProbeEnricher {
if !found;
data = comb.map(_.data)
)
- if (s.isMatch(data, req, variantsToks)) {
+ if (NCSynonymsManager.isMatch(s, data,
req, variantsToks)) {
val parts = toParts(mdl, ns.srvReqId,
data, s)
add("IDL continuous", ns, contCache,
eId, greedy, toksExt, idxs, s, parts)
@@ -643,7 +643,7 @@ object NCModelEnricher extends NCProbeEnricher {
s <- allSyns;
comb <- allCombs
)
- s.sparseMatch(comb.map(_.data), req,
variantsToks) match {
+ NCSynonymsManager.sparseMatch(s,
comb.map(_.data), req, variantsToks) match {
case Some(res) =>
val typ = if (s.sparse) "IDL
sparse" else "IDL continuous"
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index b0a077a..2e280ac 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -22,9 +22,9 @@ import org.apache.nlpcraft.common.nlp.NCNlpSentence.NoteLink
import org.apache.nlpcraft.common.nlp.pos.NCPennTreebank
import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote,
NCNlpSentenceToken}
import org.apache.nlpcraft.common.{NCE, NCService, U, _}
-import org.apache.nlpcraft.model.intent.NCIdlFunction
-import org.apache.nlpcraft.model.{NCModel, NCRequest, NCToken}
+import org.apache.nlpcraft.model.NCModel
import org.apache.nlpcraft.probe.mgrs.NCTokenPartKey
+import org.apache.nlpcraft.probe.mgrs.sentence.NCSynonymsManager.{idlCache,
reqCache}
import java.io.{Serializable => JSerializable}
import java.util
@@ -44,14 +44,6 @@ object NCSentenceManager extends NCService {
type CacheValue = Seq[Seq[NCNlpSentenceNote]]
private val combCache = mutable.HashMap.empty[String,
mutable.HashMap[CacheKey, CacheValue]]
- type IdlCacheKey = (NCToken, String)
- private val reqCache = mutable.HashMap.empty[String, NCRequest]
- private val idlCache = mutable.HashMap.empty[String,
mutable.HashMap[IdlCacheKey, NCIdlFunction]]
-
- /**
- *
- * @param notes
- */
def getLinks(notes: Seq[NCNlpSentenceNote]): Seq[NoteLink] = {
val noteLinks = mutable.ArrayBuffer.empty[NoteLink]
@@ -822,37 +814,5 @@ object NCSentenceManager extends NCService {
*
* @param srvReqId
*/
- def clearCache(srvReqId: String): Unit = {
- combCache -= srvReqId
- reqCache -= srvReqId
- idlCache -= srvReqId
- }
-
- def saveIdl(req: NCRequest, tok: NCToken, idlPred: NCIdlFunction): Unit = {
- val srvReqId = req.getServerRequestId
-
- reqCache += srvReqId -> req
-
- val idlCacheReq: mutable.Map[IdlCacheKey, NCIdlFunction] =
- idlCache.get(srvReqId) match {
- case Some(m) => m
- case None =>
- val m = mutable.HashMap.empty[IdlCacheKey, NCIdlFunction]
-
- idlCache += srvReqId -> m
-
- m
- }
-
- idlCacheReq += (tok, tok.getId) -> idlPred
- }
-
- def getIdlData(srvReqId: String) : Option[(NCRequest, Map[IdlCacheKey,
NCIdlFunction])] = {
- val reqData = reqCache.get(srvReqId)
- val idlData = idlCache.get(srvReqId)
-
- require(reqData.isDefined && idlData.isDefined || reqData.isEmpty &&
idlData.isEmpty)
-
- if (reqData.isDefined) Some((reqData.get, idlData.get.toMap)) else None
- }
+ def clearRequestData(srvReqId: String): Unit = combCache -= srvReqId
}
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSynonymsManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSynonymsManager.scala
new file mode 100644
index 0000000..e6258ec
--- /dev/null
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSynonymsManager.scala
@@ -0,0 +1,333 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.sentence
+
+import io.opencensus.trace.Span
+import org.apache.nlpcraft.common.nlp.NCNlpSentenceToken
+import org.apache.nlpcraft.common.{NCService, U}
+import org.apache.nlpcraft.model._
+import org.apache.nlpcraft.model.intent.{NCIdlContext, NCIdlFunction}
+import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
+import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{IDL,
NCSynonymChunkKind, REGEX, TEXT}
+import org.apache.nlpcraft.probe.mgrs.{NCProbeSynonymChunk, NCProbeSynonym =>
Synonym}
+
+import scala.collection.mutable
+
+/**
+ *
+ */
+object NCSynonymsManager extends NCService {
+ type IdlCacheKey = (NCToken, String)
+
+ private val reqCache = mutable.HashMap.empty[String, NCRequest]
+ private val idlCache = mutable.HashMap.empty[String,
mutable.HashMap[IdlCacheKey, NCIdlFunction]]
+
+ override def start(parent: Span): NCService = {
+ ackStarting()
+
+ ackStarted()
+ }
+
+ override def stop(parent: Span): Unit = {
+ ackStopping()
+
+ ackStopped()
+ }
+
+ /**
+ *
+ * @param tok
+ * @param chunk
+ */
+ private def isMatch(tok: NCNlpSentenceToken, chunk: NCProbeSynonymChunk):
Boolean =
+ chunk.kind match {
+ case TEXT => chunk.wordStem == tok.stem
+ case REGEX =>
+ val regex = chunk.regex
+
+ regex.matcher(tok.origText).matches() ||
regex.matcher(tok.normText).matches()
+ case IDL => throw new AssertionError()
+ case _ => throw new AssertionError()
+ }
+
+ /**
+ *
+ * @param kind
+ */
+ private def getSort(kind: NCSynonymChunkKind): Int =
+ kind match {
+ case TEXT => 0
+ case IDL => 1
+ case REGEX => 2
+ case _ => throw new AssertionError(s"Unexpected kind: $kind")
+ }
+
+ /**
+ *
+ * @param s
+ * @param toks
+ * @param isMatch
+ * @param getIndex
+ * @param shouldBeNeighbors
+ * @tparam T
+ */
+ private def sparseMatch0[T](
+ s: Synonym,
+ toks: Seq[T],
+ isMatch: (T, NCProbeSynonymChunk) => Boolean,
+ getIndex: T => Int,
+ shouldBeNeighbors: Boolean
+ ): Option[Seq[T]] =
+ if (toks.size >= s.size) {
+ lazy val res = mutable.ArrayBuffer.empty[T]
+ lazy val all = mutable.HashSet.empty[T]
+
+ var state = 0
+
+ for (chunk <- s if state != -1) {
+ val seq =
+ if (state == 0) {
+ state = 1
+
+ toks.filter(t => isMatch(t, chunk))
+ }
+ else
+ toks.filter(t => !res.contains(t) && isMatch(t, chunk))
+
+ if (seq.nonEmpty) {
+ val head = seq.head
+
+ if (!s.permute && res.nonEmpty && getIndex(head) <=
getIndex(res.last))
+ state = -1
+ else {
+ all ++= seq
+
+ if (all.size > s.size)
+ state = -1
+ else
+ res += head
+ }
+ }
+ else
+ state = -1
+ }
+
+ if (state != -1 && all.size == res.size && (!shouldBeNeighbors ||
U.isIncreased(res.map(getIndex).toSeq.sorted)))
+ Some(res.toSeq)
+ else
+ None
+ }
+ else
+ None
+
+ /**
+ *
+ * @param req
+ * @param tok
+ * @param idlPred
+ */
+ private def savePredicate(req: NCRequest, tok: NCToken, idlPred:
NCIdlFunction): Unit = {
+ val srvReqId = req.getServerRequestId
+
+ reqCache += srvReqId -> req
+
+ val idlCacheReq: mutable.Map[IdlCacheKey, NCIdlFunction] =
+ idlCache.get(srvReqId) match {
+ case Some(m) => m
+ case None =>
+ val m = mutable.HashMap.empty[IdlCacheKey, NCIdlFunction]
+
+ idlCache += srvReqId -> m
+
+ m
+ }
+
+ idlCacheReq += (tok, tok.getId) -> idlPred
+ }
+
+ /**
+ *
+ * @param tow
+ * @param chunk
+ * @param req
+ * @param variantsToks
+ */
+ private def isMatch(
+ tow: NCIdlContent, chunk: NCProbeSynonymChunk, req: NCRequest,
variantsToks: Seq[Seq[NCToken]]
+ ): Boolean = {
+ def get0[T](fromToken: NCToken => T, fromWord: NCNlpSentenceToken =>
T): T =
+ if (tow.isLeft) fromToken(tow.swap.toOption.get) else
fromWord(tow.toOption.get)
+
+ chunk.kind match {
+ case TEXT => chunk.wordStem == get0(_.stem, _.stem)
+
+ case REGEX =>
+ val r = chunk.regex
+
+ r.matcher(get0(_.origText, _.origText)).matches() ||
r.matcher(get0(_.normText, _.normText)).matches()
+
+ case IDL =>
+ val ok =
+ variantsToks.exists(variantToks =>
+ get0(t =>
+ chunk.idlPred.apply(
+ t,
+ NCIdlContext(req = req, toks = variantToks)
+ ).value.asInstanceOf[Boolean], _ => false
+ )
+ )
+
+ if (ok)
+ savePredicate(req, tow.swap.toOption.get, chunk.idlPred)
+
+ ok
+
+ case _ => throw new AssertionError()
+ }
+ }
+
+ /**
+ *
+ * @param s
+ * @param toks
+ */
+ def isMatch(s: Synonym, toks: Seq[NCNlpSentenceToken]): Boolean = {
+ require(toks != null)
+ require(!s.sparse && !s.hasIdl)
+
+ if (toks.length == s.length) {
+ if (s.isTextOnly)
+ toks.zip(s).forall(p => p._1.stem == p._2.wordStem)
+ else
+ toks.zip(s).sortBy(p => getSort(p._2.kind)).forall { case
(tok, chunk) => isMatch(tok, chunk) }
+ }
+ else
+ false
+ }
+
+ /**
+ *
+ * @param s
+ * @param tows
+ * @param req
+ * @param variantsToks
+ */
+ def isMatch(s: Synonym, tows: Seq[NCIdlContent], req: NCRequest,
variantsToks: Seq[Seq[NCToken]]): Boolean= {
+ require(tows != null)
+
+ if (tows.length == s.length && tows.count(_.isLeft) >= s.idlChunks)
+ tows.zip(s).sortBy(p => getSort(p._2.kind)).forall {
+ case (tow, chunk) => isMatch(tow, chunk, req, variantsToks)
+ }
+ else
+ false
+ }
+
+ /**
+ *
+ * @param s
+ * @param toks
+ */
+ def sparseMatch(s: Synonym, toks: Seq[NCNlpSentenceToken]):
Option[Seq[NCNlpSentenceToken]] = {
+ require(toks != null)
+ require(s.sparse && !s.hasIdl)
+
+ sparseMatch0(s, toks, isMatch, (t: NCNlpSentenceToken) =>
t.startCharIndex, shouldBeNeighbors = false)
+ }
+
+ /**
+ *
+ * @param s
+ * @param tows
+ * @param req
+ * @param variantsToks
+ */
+ def sparseMatch(s: Synonym, tows: Seq[NCIdlContent], req: NCRequest,
variantsToks: Seq[Seq[NCToken]]): Option[Seq[NCIdlContent]] = {
+ require(tows != null)
+ require(req != null)
+ require(s.hasIdl)
+
+ sparseMatch0(
+ s,
+ tows,
+ (t: NCIdlContent, chunk: NCProbeSynonymChunk) => isMatch(t, chunk,
req, variantsToks),
+ (t: NCIdlContent) => if (t.isLeft)
t.swap.toOption.get.getStartCharIndex else t.toOption.get.startCharIndex,
+ shouldBeNeighbors = !s.sparse
+ )
+ }
+
+ /**
+ *
+ * @param srvReqId
+ */
+ def getPredicate(srvReqId: String) : Option[(NCRequest, Map[IdlCacheKey,
NCIdlFunction])] = {
+ val reqData = reqCache.get(srvReqId)
+ val idlData = idlCache.get(srvReqId)
+
+ require(reqData.isDefined && idlData.isDefined || reqData.isEmpty &&
idlData.isEmpty)
+
+ if (reqData.isDefined) Some((reqData.get, idlData.get.toMap)) else None
+ }
+
+ /**
+ *
+ * @param srvReqId
+ * @param toks
+ * @return
+ */
+ def isStillValid(srvReqId: String, toks: Seq[NCToken]): Boolean = {
+ val reqData = reqCache.get(srvReqId)
+ val idlData = idlCache.get(srvReqId)
+
+ require(reqData.isDefined && idlData.isDefined || reqData.isEmpty &&
idlData.isEmpty)
+
+ if (reqData.isDefined) {
+ val req = reqData.get
+ val idl = idlData.get.toMap
+
+ toks.forall(t =>
+ idl.get((t, t.getId)) match {
+ case Some(f) =>
+ val x =
+ f.apply(
+ t, NCIdlContext(req = req, toks = toks)
+ ).value.asInstanceOf[Boolean]
+
+
+ if (!x)
+ println("x="+x + ", t=" + t + ", toks=" + toks)
+
+ x
+
+ case None => true
+ }
+ )
+ }
+ else
+ true
+ }
+
+ /**
+ *
+ * @param srvReqId
+ */
+ def clearRequestData(srvReqId: String): Unit = {
+ reqCache -= srvReqId
+ idlCache -= srvReqId
+ }
+}