[incubator-nlpcraft] branch NLPCRAFT-287 updated: WIP.

sergeykamov Fri, 02 Apr 2021 07:02:38 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-287 by this push:
     new d8ec592  WIP.
d8ec592 is described below

commit d8ec5925fea0f5e83d0a40ba4b396ed0689311c0
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Apr 2 17:02:23 2021 +0300

    WIP.
---
 .../nlpcraft/probe/mgrs/NCProbeSynonym.scala       | 98 ++++++++++++++--------
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 41 +++------
 .../nlpcraft/model/jiggle/NCJiggleSpec.scala       | 90 ++++++++++----------
 3 files changed, 118 insertions(+), 111 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index cddf6a2..92e9ece 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -112,37 +112,59 @@ class NCProbeSynonym(
         else
             None
 
+    private def collectMatches[T](seq: Seq[T], tryMatch: Seq[T] ⇒ 
Option[Seq[T]]): Seq[Seq[T]] = {
+        val buf = mutable.ArrayBuffer.empty[Seq[T]]
+
+        var ok = true
+        var arg = seq
+
+        while (ok) {
+            tryMatch(arg) match {
+                case Some(ts) ⇒
+                    buf += ts
+                    arg = arg.filter(t ⇒ !ts.contains(t))
+                case None ⇒ ok = false
+            }
+        }
+
+        buf
+    }
+
     /**
       *
       * @param sen
       * @return
       */
-    def trySparseMatch(sen: NCNlpSentenceTokenBuffer): 
Option[Seq[NCNlpSentenceToken]] = {
+    def trySparseMatch(sen: NCNlpSentenceTokenBuffer): 
Seq[Seq[NCNlpSentenceToken]] = {
         require(sen != null)
         require(sen.nonEmpty)
         require(this.size > 1)
 
-        var ok = true
-        val buf = mutable.ArrayBuffer.empty[NCNlpSentenceToken]
+        def trySparseMatch0(sen: Seq[NCNlpSentenceToken]): 
Option[Seq[NCNlpSentenceToken]] = {
+            var ok = true
+            val buf = mutable.ArrayBuffer.empty[NCNlpSentenceToken]
 
-        if (isDirect) {
-            var lastIdx = 0
-            val tokIdxs = sen.zipWithIndex.toMap
+            if (isDirect) {
+                var lastIdx = 0
+                val tokIdxs = sen.zipWithIndex.toMap
 
-            for (chunk ← this if ok)
-                sen.drop(lastIdx).find(tok ⇒ isMatch(tok, chunk)) match {
-                    case Some(tok) ⇒ buf += tok; lastIdx = tokIdxs(tok) + 1
-                    case None ⇒ ok = false
-                }
+                for (chunk ← this if ok)
+                    sen.drop(lastIdx).find(tok ⇒ isMatch(tok, chunk)) match {
+                        case Some(tok) ⇒ buf += tok; lastIdx = tokIdxs(tok) + 1
+                        case None ⇒ ok = false
+                    }
+            }
+            else
+                for (chunk ← this if ok)
+                    sen.find(tok ⇒ !buf.contains(tok) && isMatch(tok, chunk)) 
match {
+                        case Some(tok) ⇒ buf += tok
+                        case None ⇒ ok = false
+                    }
+
+            convertResult(ok, buf)
         }
-        else
-            for (chunk ← this if ok)
-                sen.find(tok ⇒ !buf.contains(tok) && isMatch(tok, chunk)) 
match {
-                    case Some(tok) ⇒ buf += tok
-                    case None ⇒ ok = false
-                }
 
-        convertResult(ok, buf)
+        collectMatches(sen, trySparseMatch0)
     }
 
     /**
@@ -192,32 +214,36 @@ class NCProbeSynonym(
       * @param sen
       * @param req
       */
-    def trySparseMatch(sen: Seq[NCDslContent], req: NCRequest): 
Option[Seq[NCDslContent]] = {
+    def trySparseMatch(sen: Seq[NCDslContent], req: NCRequest): 
Seq[Seq[NCDslContent]] = {
         require(sen != null)
         require(sen.nonEmpty)
         require(this.size > 1)
 
-        var ok = true
-        val buf = mutable.ArrayBuffer.empty[NCDslContent]
+        def trySparseMatch0(sen: Seq[NCDslContent]): Option[Seq[NCDslContent]] 
= {
+            var ok = true
+            val buf = mutable.ArrayBuffer.empty[NCDslContent]
+
+            if (isDirect) {
+                var lastIdx = 0
+                val tokIdxs = sen.zipWithIndex.toMap
 
-        if (isDirect) {
-            var lastIdx = 0
-            val tokIdxs = sen.zipWithIndex.toMap
+                for (chunk ← this if ok)
+                    sen.drop(lastIdx).find(tow ⇒ isMatch(tow, chunk, req)) 
match {
+                        case Some(t) ⇒ buf += t; lastIdx = tokIdxs(t) + 1
+                        case None ⇒ ok = false
+                    }
+            }
+            else
+                for (chunk ← this if ok)
+                    sen.find(tow ⇒ !buf.contains(tow) && isMatch(tow, chunk, 
req)) match {
+                        case Some(tow) ⇒ buf += tow
+                        case None ⇒ ok = false
+                    }
 
-            for (chunk ← this if ok)
-                sen.drop(lastIdx).find(tow ⇒ isMatch(tow, chunk, req)) match {
-                    case Some(t) ⇒ buf += t; lastIdx = tokIdxs(t) + 1
-                    case None ⇒ ok = false
-                }
+            convertResult(ok, buf)
         }
-        else
-            for (chunk ← this if ok)
-                sen.find(tow ⇒ !buf.contains(tow) && isMatch(tow, chunk, req)) 
match {
-                    case Some(tow) ⇒ buf += tow
-                    case None ⇒ ok = false
-                }
 
-        convertResult(ok, buf)
+        collectMatches(sen ,trySparseMatch0)
     }
 
     override def toString(): String = mkString(" ")
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 1e4c756..7df8f02 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -365,46 +365,27 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                 "txt" → ns.text) {
                 _ ⇒
                 // 1. Simple, sparse.
-                for ((elemId, syns) ← mdl.sparseSynonyms) {
-                    syns.toStream.flatMap(
-                        syn ⇒ syn.trySparseMatch(ns) match {
-                            case Some(toks) ⇒ Some(syn, toks)
-                            case None ⇒ None
-                        }
-                    ).headOption match {
-                        case Some((syn, toks)) ⇒ 
addMatch(mdl.elements(elemId), toks, syn, Seq.empty)
-                        case None ⇒ // No-op.
-                    }
-                }
-
-                // 2. DSL, sparse.
-                for ((elemId, syns) ← mdl.sparseSynonymsDsl) {
-                    var found = false
+                for ((elemId, syns) ← mdl.sparseSynonyms; syn ← syns)
+                    syn.trySparseMatch(ns).foreach(toks ⇒ 
addMatch(mdl.elements(elemId), toks, syn, Seq.empty))
 
-                    for (complex ← complexes if !found) {
+                    // 2. DSL, sparse.
+                for ((elemId, syns) ← mdl.sparseSynonymsDsl; syn ← syns) {
+                    for (complex ← complexes) {
                         val comb = complex.tokensComplexes
 
-                        syns.toStream.flatMap(
-                            syn ⇒ syn.trySparseMatch(comb.map(_.data), req) 
match {
-                                case Some(tows) ⇒ Some(syn, tows)
-                                case None ⇒ None
-                            }
-                        ).headOption match {
-                            case Some((syn, tows)) ⇒
-                                val toks =
-                                    tows.filter(_.isRight).map(_.right.get) ++
+                        syn.trySparseMatch(comb.map(_.data), req).foreach(tows 
⇒ {
+                            val toks =
+                                tows.filter(_.isRight).map(_.right.get) ++
                                     
tows.filter(_.isLeft).map(_.left.get).flatMap(w ⇒
                                         ns.filter(
                                             t ⇒ t.startCharIndex >= 
w.getStartCharIndex &&
-                                            t.endCharIndex <= w.getEndCharIndex
+                                                t.endCharIndex <= 
w.getEndCharIndex
                                         )
                                     )
 
-                                addMatch(mdl.elements(elemId), 
toks.sortBy(_.startCharIndex), syn, getParts(comb, syn))
+                            addMatch(mdl.elements(elemId), 
toks.sortBy(_.startCharIndex), syn, getParts(comb, syn))
 
-                                found = true
-                            case None ⇒ // No-op.
-                        }
+                        })
                     }
                 }
 
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala
index c9e1b5d..9c60c2d 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala
@@ -35,51 +35,51 @@ class NJiggleModel extends NCAbstractTokensModel {
 
     // TODO:
 
-//    override def onContext(ctx: NCContext): NCResult = {
-//        val variants = ctx.getVariants.asScala
-//
-//        def checkOneVariant(sparsity: Int): Unit = {
-//            require(variants.size == 1)
-//
-//            val toks = variants.head.asScala.filter(_.getId == "xyz")
-//
-//            require(toks.size == 3)
-//
-//            checkSparsity(sparsity, toks)
-//        }
-//
-//        def checkSparsity(sparsity: Int, toks: mutable.Buffer[NCToken]): 
Unit =
-//            
require(toks.forall(_.getMetadata.get("nlpcraft:nlp:sparsity").asInstanceOf[Int]
 == sparsity))
-//
-//        def checkExists(sparsity: Int): Unit = {
-//            require(
-//                variants.exists(v ⇒ {
-//                    val toks = v.asScala.filter(_.getId == "xyz")
-//
-//                    toks.size match {
-//                        case 3 ⇒
-//                            checkSparsity(sparsity, toks)
-//
-//                            true
-//                        case _ ⇒ false
-//                    }
-//                })
-//            )
-//        }
-//
-//        ctx.getRequest.getNormalizedText match {
-//            case "x y z x y z x y z" ⇒ checkOneVariant(0)
-//            case "x y z test x y z test x y z test" ⇒ checkOneVariant(0)
-//            case "x test y z x test y z x y test z" ⇒ checkOneVariant(1)
-//            case "x z y x z y x z y" ⇒ checkExists(0)
-//            case "x z y test x z y test x z y test" ⇒ checkExists(0)
-//            case "x test z y x test z y x test z y" ⇒ checkExists(1)
-//
-//            case _ ⇒ throw new AssertionError(s"Unexpected request: 
${ctx.getRequest.getNormalizedText}")
-//        }
-//
-//        NCResult.text("OK")
-//    }
+    override def onContext(ctx: NCContext): NCResult = {
+        val variants = ctx.getVariants.asScala
+
+        def checkOneVariant(sparsity: Int): Unit = {
+            require(variants.size == 1)
+
+            val toks = variants.head.asScala.filter(_.getId == "xyz")
+
+            require(toks.size == 3)
+
+            checkSparsity(sparsity, toks)
+        }
+
+        def checkSparsity(sparsity: Int, toks: mutable.Buffer[NCToken]): Unit =
+            
require(toks.forall(_.getMetadata.get("nlpcraft:nlp:sparsity").asInstanceOf[Int]
 == sparsity))
+
+        def checkExists(sparsity: Int): Unit = {
+            require(
+                variants.exists(v ⇒ {
+                    val toks = v.asScala.filter(_.getId == "xyz")
+
+                    toks.size match {
+                        case 3 ⇒
+                            checkSparsity(sparsity, toks)
+
+                            true
+                        case _ ⇒ false
+                    }
+                })
+            )
+        }
+
+        ctx.getRequest.getNormalizedText match {
+            case "x y z x y z x y z" ⇒ checkOneVariant(0)
+            case "x y z test x y z test x y z test" ⇒ checkOneVariant(0)
+            case "x test y z x test y z x y test z" ⇒ checkOneVariant(1)
+            case "x z y x z y x z y" ⇒ checkExists(0)
+            case "x z y test x z y test x z y test" ⇒ checkExists(0)
+            case "x test z y x test z y x test z y" ⇒ checkExists(1)
+
+            case _ ⇒ throw new AssertionError(s"Unexpected request: 
${ctx.getRequest.getNormalizedText}")
+        }
+
+        NCResult.text("OK")
+    }
 }
 
 @NCTestEnvironment(model = classOf[NJiggleModel], startClient = true)

[incubator-nlpcraft] branch NLPCRAFT-287 updated: WIP.

Reply via email to