This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/master by this push:
     new e67355f  Direct synonyms priorities issue.
e67355f is described below

commit e67355f6329f6e94b27034fc79e781be662a9624
Author: Sergey Kamov <[email protected]>
AuthorDate: Tue Feb 23 17:08:08 2021 +0300

    Direct synonyms priorities issue.
---
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 31 ++++++-
 .../nlpcraft/model/jiggle/NCJiggleSpec.scala       | 99 ++++++++++++++++++++++
 2 files changed, 128 insertions(+), 2 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 28fcd21..bea4eaa 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -431,10 +431,37 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                 "totalJiggledPerms" → permCnt
             )
 
-            val matchCnt = matches.size
+            // Scans by elements that are found with same tokens length.
+            // Inside, for each token we drop all non-optimized combinations.
+            // Example:
+            // 1. element's synonym - 'a b', jiggle factor 4 (default), 
isPermuteSynonyms 'true' (default)
+            // 2. Request 'a b a b',
+            // Initially found 0-1, 1-2, 2-3, 0-3.
+            // 0-3 will be deleted because for 0 and 3 tokens best variants 
found for same element with same tokens length.
+            val matchesNorm =
+                matches.
+                flatMap(m ⇒ m.tokens.map(_ → m)).
+                groupBy { case (t, m) ⇒ (m.element.getId, m.length, t) }.
+                flatMap { case (_, seq) ⇒
+                    def perm[T](list: List[List[T]]): List[List[T]] =
+                        list match {
+                            case Nil ⇒ List(Nil)
+                            case head :: tail ⇒ for (h ← head; t ← perm(tail)) 
yield h :: t
+                        }
+
+                    // Optimization by sparsity sum for each tokens set for 
one element found with same tokens count.
+                    perm(
+                        seq.groupBy { case (tok, _) ⇒ tok }.
+                        map { case (_, seq) ⇒ seq.map { case (_, m) ⇒ m} 
.toList }.toList
+                    ).minBy(_.map(_.sparsity).sum)
+                }.
+                toSeq.
+                distinct
+
+            val matchCnt = matchesNorm.size
 
             // Add notes for all remaining (non-intersecting) matches.
-            for ((m, idx) ← matches.zipWithIndex) {
+            for ((m, idx) ← matchesNorm.zipWithIndex) {
                 if (DEEP_DEBUG)
                     logger.trace(
                         s"Model '${mdl.model.getId}' element found (${idx + 1} 
of $matchCnt) [" +
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala
new file mode 100644
index 0000000..24ca12c
--- /dev/null
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.jiggle
+
+import org.apache.nlpcraft.model.`abstract`.NCAbstractTokensModel
+import org.apache.nlpcraft.model.{NCContext, NCElement, NCResult, NCToken}
+import org.apache.nlpcraft.{NCTestContext, NCTestElement, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+import java.util
+import scala.collection.JavaConverters._
+import scala.collection.mutable
+
+class NJiggleModel1 extends NCAbstractTokensModel {
+    override def getElements: util.Set[NCElement] = Set(NCTestElement("xyz", 
"x y z"))
+
+    // Default values.
+    override def isPermutateSynonyms: Boolean = true
+    override def getJiggleFactor: Int = 4
+
+    override def onContext(ctx: NCContext): NCResult = {
+        val variants = ctx.getVariants.asScala
+
+        def checkOneVariant(sparsity: Int): Unit = {
+            require(variants.size == 1)
+
+            val toks = variants.head.asScala.filter(_.getId == "xyz")
+
+            require(toks.size == 3)
+
+            checkSparsity(sparsity, toks)
+        }
+
+        def checkSparsity(sparsity: Int, toks: mutable.Buffer[NCToken]): Unit =
+            
require(toks.forall(_.getMetadata.get("nlpcraft:nlp:sparsity").asInstanceOf[Int]
 == sparsity))
+
+        def checkExists(sparsity: Int): Unit = {
+            require(
+                variants.exists(v ⇒ {
+                    val toks = v.asScala.filter(_.getId == "xyz")
+
+                    toks.size match {
+                        case 3 ⇒
+                            checkSparsity(sparsity, toks)
+
+                            true
+                        case _ ⇒ false
+                    }
+                })
+            )
+        }
+
+        ctx.getRequest.getNormalizedText match {
+            case "x y z x y z x y z" ⇒ checkOneVariant(0)
+            case "x y z test x y z test x y z test" ⇒ checkOneVariant(0)
+            case "x test y z x test y z x y test z" ⇒ checkOneVariant(1)
+            case "x z y x z y x z y" ⇒ checkExists(0)
+            case "x z y test x z y test x z y test" ⇒ checkExists(0)
+            case "x test z y x test z y x test z y" ⇒ checkExists(1)
+
+            case _ ⇒ throw new AssertionError(s"Unexpected request: 
${ctx.getRequest.getNormalizedText}")
+        }
+
+        NCResult.text("OK")
+    }
+
+}
+
+@NCTestEnvironment(model = classOf[NJiggleModel1], startClient = true)
+class NCJiggleSpec1 extends NCTestContext {
+    @Test
+    def test(): Unit = {
+        checkResult("x y z x y z x y z", "OK")
+        checkResult("x y z test x y z test x y z test", "OK")
+        checkResult("x test y z x test y z x y test z", "OK")
+
+        // We don't check for sparsity > 1 because logic of synonyms 
permutation (neighbors only).
+        // Tests will not be clear.
+
+        checkResult("x z y x z y x z y", "OK")
+        checkResult("x z y test x z y test x z y test", "OK")
+        checkResult("x test z y x test z y x test z y", "OK")
+    }
+}
\ No newline at end of file

Reply via email to