This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-287 by this push:
new d8ec592 WIP.
d8ec592 is described below
commit d8ec5925fea0f5e83d0a40ba4b396ed0689311c0
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Apr 2 17:02:23 2021 +0300
WIP.
---
.../nlpcraft/probe/mgrs/NCProbeSynonym.scala | 98 ++++++++++++++--------
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 41 +++------
.../nlpcraft/model/jiggle/NCJiggleSpec.scala | 90 ++++++++++----------
3 files changed, 118 insertions(+), 111 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index cddf6a2..92e9ece 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -112,37 +112,59 @@ class NCProbeSynonym(
else
None
+ private def collectMatches[T](seq: Seq[T], tryMatch: Seq[T] ⇒
Option[Seq[T]]): Seq[Seq[T]] = {
+ val buf = mutable.ArrayBuffer.empty[Seq[T]]
+
+ var ok = true
+ var arg = seq
+
+ while (ok) {
+ tryMatch(arg) match {
+ case Some(ts) ⇒
+ buf += ts
+ arg = arg.filter(t ⇒ !ts.contains(t))
+ case None ⇒ ok = false
+ }
+ }
+
+ buf
+ }
+
/**
*
* @param sen
* @return
*/
- def trySparseMatch(sen: NCNlpSentenceTokenBuffer):
Option[Seq[NCNlpSentenceToken]] = {
+ def trySparseMatch(sen: NCNlpSentenceTokenBuffer):
Seq[Seq[NCNlpSentenceToken]] = {
require(sen != null)
require(sen.nonEmpty)
require(this.size > 1)
- var ok = true
- val buf = mutable.ArrayBuffer.empty[NCNlpSentenceToken]
+ def trySparseMatch0(sen: Seq[NCNlpSentenceToken]):
Option[Seq[NCNlpSentenceToken]] = {
+ var ok = true
+ val buf = mutable.ArrayBuffer.empty[NCNlpSentenceToken]
- if (isDirect) {
- var lastIdx = 0
- val tokIdxs = sen.zipWithIndex.toMap
+ if (isDirect) {
+ var lastIdx = 0
+ val tokIdxs = sen.zipWithIndex.toMap
- for (chunk ← this if ok)
- sen.drop(lastIdx).find(tok ⇒ isMatch(tok, chunk)) match {
- case Some(tok) ⇒ buf += tok; lastIdx = tokIdxs(tok) + 1
- case None ⇒ ok = false
- }
+ for (chunk ← this if ok)
+ sen.drop(lastIdx).find(tok ⇒ isMatch(tok, chunk)) match {
+ case Some(tok) ⇒ buf += tok; lastIdx = tokIdxs(tok) + 1
+ case None ⇒ ok = false
+ }
+ }
+ else
+ for (chunk ← this if ok)
+ sen.find(tok ⇒ !buf.contains(tok) && isMatch(tok, chunk))
match {
+ case Some(tok) ⇒ buf += tok
+ case None ⇒ ok = false
+ }
+
+ convertResult(ok, buf)
}
- else
- for (chunk ← this if ok)
- sen.find(tok ⇒ !buf.contains(tok) && isMatch(tok, chunk))
match {
- case Some(tok) ⇒ buf += tok
- case None ⇒ ok = false
- }
- convertResult(ok, buf)
+ collectMatches(sen, trySparseMatch0)
}
/**
@@ -192,32 +214,36 @@ class NCProbeSynonym(
* @param sen
* @param req
*/
- def trySparseMatch(sen: Seq[NCDslContent], req: NCRequest):
Option[Seq[NCDslContent]] = {
+ def trySparseMatch(sen: Seq[NCDslContent], req: NCRequest):
Seq[Seq[NCDslContent]] = {
require(sen != null)
require(sen.nonEmpty)
require(this.size > 1)
- var ok = true
- val buf = mutable.ArrayBuffer.empty[NCDslContent]
+ def trySparseMatch0(sen: Seq[NCDslContent]): Option[Seq[NCDslContent]]
= {
+ var ok = true
+ val buf = mutable.ArrayBuffer.empty[NCDslContent]
+
+ if (isDirect) {
+ var lastIdx = 0
+ val tokIdxs = sen.zipWithIndex.toMap
- if (isDirect) {
- var lastIdx = 0
- val tokIdxs = sen.zipWithIndex.toMap
+ for (chunk ← this if ok)
+ sen.drop(lastIdx).find(tow ⇒ isMatch(tow, chunk, req))
match {
+ case Some(t) ⇒ buf += t; lastIdx = tokIdxs(t) + 1
+ case None ⇒ ok = false
+ }
+ }
+ else
+ for (chunk ← this if ok)
+ sen.find(tow ⇒ !buf.contains(tow) && isMatch(tow, chunk,
req)) match {
+ case Some(tow) ⇒ buf += tow
+ case None ⇒ ok = false
+ }
- for (chunk ← this if ok)
- sen.drop(lastIdx).find(tow ⇒ isMatch(tow, chunk, req)) match {
- case Some(t) ⇒ buf += t; lastIdx = tokIdxs(t) + 1
- case None ⇒ ok = false
- }
+ convertResult(ok, buf)
}
- else
- for (chunk ← this if ok)
- sen.find(tow ⇒ !buf.contains(tow) && isMatch(tow, chunk, req))
match {
- case Some(tow) ⇒ buf += tow
- case None ⇒ ok = false
- }
- convertResult(ok, buf)
+ collectMatches(sen ,trySparseMatch0)
}
override def toString(): String = mkString(" ")
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 1e4c756..7df8f02 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -365,46 +365,27 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
"txt" → ns.text) {
_ ⇒
// 1. Simple, sparse.
- for ((elemId, syns) ← mdl.sparseSynonyms) {
- syns.toStream.flatMap(
- syn ⇒ syn.trySparseMatch(ns) match {
- case Some(toks) ⇒ Some(syn, toks)
- case None ⇒ None
- }
- ).headOption match {
- case Some((syn, toks)) ⇒
addMatch(mdl.elements(elemId), toks, syn, Seq.empty)
- case None ⇒ // No-op.
- }
- }
-
- // 2. DSL, sparse.
- for ((elemId, syns) ← mdl.sparseSynonymsDsl) {
- var found = false
+ for ((elemId, syns) ← mdl.sparseSynonyms; syn ← syns)
+ syn.trySparseMatch(ns).foreach(toks ⇒
addMatch(mdl.elements(elemId), toks, syn, Seq.empty))
- for (complex ← complexes if !found) {
+ // 2. DSL, sparse.
+ for ((elemId, syns) ← mdl.sparseSynonymsDsl; syn ← syns) {
+ for (complex ← complexes) {
val comb = complex.tokensComplexes
- syns.toStream.flatMap(
- syn ⇒ syn.trySparseMatch(comb.map(_.data), req)
match {
- case Some(tows) ⇒ Some(syn, tows)
- case None ⇒ None
- }
- ).headOption match {
- case Some((syn, tows)) ⇒
- val toks =
- tows.filter(_.isRight).map(_.right.get) ++
+ syn.trySparseMatch(comb.map(_.data), req).foreach(tows
⇒ {
+ val toks =
+ tows.filter(_.isRight).map(_.right.get) ++
tows.filter(_.isLeft).map(_.left.get).flatMap(w ⇒
ns.filter(
t ⇒ t.startCharIndex >=
w.getStartCharIndex &&
- t.endCharIndex <= w.getEndCharIndex
+ t.endCharIndex <=
w.getEndCharIndex
)
)
- addMatch(mdl.elements(elemId),
toks.sortBy(_.startCharIndex), syn, getParts(comb, syn))
+ addMatch(mdl.elements(elemId),
toks.sortBy(_.startCharIndex), syn, getParts(comb, syn))
- found = true
- case None ⇒ // No-op.
- }
+ })
}
}
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala
index c9e1b5d..9c60c2d 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala
@@ -35,51 +35,51 @@ class NJiggleModel extends NCAbstractTokensModel {
// TODO:
-// override def onContext(ctx: NCContext): NCResult = {
-// val variants = ctx.getVariants.asScala
-//
-// def checkOneVariant(sparsity: Int): Unit = {
-// require(variants.size == 1)
-//
-// val toks = variants.head.asScala.filter(_.getId == "xyz")
-//
-// require(toks.size == 3)
-//
-// checkSparsity(sparsity, toks)
-// }
-//
-// def checkSparsity(sparsity: Int, toks: mutable.Buffer[NCToken]):
Unit =
-//
require(toks.forall(_.getMetadata.get("nlpcraft:nlp:sparsity").asInstanceOf[Int]
== sparsity))
-//
-// def checkExists(sparsity: Int): Unit = {
-// require(
-// variants.exists(v ⇒ {
-// val toks = v.asScala.filter(_.getId == "xyz")
-//
-// toks.size match {
-// case 3 ⇒
-// checkSparsity(sparsity, toks)
-//
-// true
-// case _ ⇒ false
-// }
-// })
-// )
-// }
-//
-// ctx.getRequest.getNormalizedText match {
-// case "x y z x y z x y z" ⇒ checkOneVariant(0)
-// case "x y z test x y z test x y z test" ⇒ checkOneVariant(0)
-// case "x test y z x test y z x y test z" ⇒ checkOneVariant(1)
-// case "x z y x z y x z y" ⇒ checkExists(0)
-// case "x z y test x z y test x z y test" ⇒ checkExists(0)
-// case "x test z y x test z y x test z y" ⇒ checkExists(1)
-//
-// case _ ⇒ throw new AssertionError(s"Unexpected request:
${ctx.getRequest.getNormalizedText}")
-// }
-//
-// NCResult.text("OK")
-// }
+ override def onContext(ctx: NCContext): NCResult = {
+ val variants = ctx.getVariants.asScala
+
+ def checkOneVariant(sparsity: Int): Unit = {
+ require(variants.size == 1)
+
+ val toks = variants.head.asScala.filter(_.getId == "xyz")
+
+ require(toks.size == 3)
+
+ checkSparsity(sparsity, toks)
+ }
+
+ def checkSparsity(sparsity: Int, toks: mutable.Buffer[NCToken]): Unit =
+
require(toks.forall(_.getMetadata.get("nlpcraft:nlp:sparsity").asInstanceOf[Int]
== sparsity))
+
+ def checkExists(sparsity: Int): Unit = {
+ require(
+ variants.exists(v ⇒ {
+ val toks = v.asScala.filter(_.getId == "xyz")
+
+ toks.size match {
+ case 3 ⇒
+ checkSparsity(sparsity, toks)
+
+ true
+ case _ ⇒ false
+ }
+ })
+ )
+ }
+
+ ctx.getRequest.getNormalizedText match {
+ case "x y z x y z x y z" ⇒ checkOneVariant(0)
+ case "x y z test x y z test x y z test" ⇒ checkOneVariant(0)
+ case "x test y z x test y z x y test z" ⇒ checkOneVariant(1)
+ case "x z y x z y x z y" ⇒ checkExists(0)
+ case "x z y test x z y test x z y test" ⇒ checkExists(0)
+ case "x test z y x test z y x test z y" ⇒ checkExists(1)
+
+ case _ ⇒ throw new AssertionError(s"Unexpected request:
${ctx.getRequest.getNormalizedText}")
+ }
+
+ NCResult.text("OK")
+ }
}
@NCTestEnvironment(model = classOf[NJiggleModel], startClient = true)