[incubator-nlpcraft] branch master updated: Minor performance improvements.

sergeykamov Sat, 20 Feb 2021 05:59:06 -0800

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/master by this push:
     new 6be2e30  Minor performance improvements.
6be2e30 is described below

commit 6be2e30f41c253296f62de0f6b657bfb41a88d47
Author: Sergey Kamov <[email protected]>
AuthorDate: Sat Feb 20 16:58:49 2021 +0300

    Minor performance improvements.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 158 ++++++++++++++-------
 .../model/NCEnricherNestedModelSpec3.scala         |  61 ++++++++
 2 files changed, 165 insertions(+), 54 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index c479308..23eeff6 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -34,6 +34,54 @@ import scala.language.implicitConversions
 object NCNlpSentence extends LazyLogging {
     implicit def toTokens(x: NCNlpSentence): ArrayBuffer[NCNlpSentenceToken] = 
x.tokens
 
+    private case class NoteLink(note: String, indexes: Seq[Int])
+    private case class PartKey(id: String, start: Int, end: Int) {
+        private def in(i: Int): Boolean = i >= start && i <= end
+        def intersect(id: String, start: Int, end: Int): Boolean = id == 
this.id && (in(start) || in(end))
+    }
+
+    private def getLinks(notes: Seq[NCNlpSentenceNote]): Seq[NoteLink] = {
+        val noteLinks = mutable.ArrayBuffer.empty[NoteLink]
+
+        for (n ← notes.filter(n ⇒ n.noteType == "nlpcraft:limit" || n.noteType 
== "nlpcraft:references"))
+            noteLinks += NoteLink(n("note").asInstanceOf[String], 
n("indexes").asInstanceOf[JList[Int]].asScala)
+
+        for (n ← notes.filter(_.noteType == "nlpcraft:sort")) {
+            def add(noteName: String, idxsName: String): Unit = {
+                val names = n(noteName).asInstanceOf[JList[String]]
+                val idxsSeq = n(idxsName).asInstanceOf[JList[JList[Int]]]
+
+                require(names.size() == idxsSeq.size())
+
+                noteLinks ++=
+                    (for ((name, idxs) ← 
names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
+                        yield NoteLink(name, idxs)
+                    )
+            }
+
+            if (n.contains("subjnotes")) add("subjnotes", "subjindexes")
+            if (n.contains("bynotes")) add("bynotes", "byindexes")
+        }
+
+        noteLinks
+    }
+
+    private def getPartKeys(notes: NCNlpSentenceNote*): Seq[PartKey] =
+        notes.
+            filter(_.isUser).
+            flatMap(n ⇒ {
+                val optList: Option[JList[util.HashMap[String, 
JSerializable]]] = n.dataOpt("parts")
+
+                optList
+            }).flatMap(_.asScala).
+                map(map ⇒
+                    PartKey(
+                        map.get("id").asInstanceOf[String],
+                        map.get("startcharindex").asInstanceOf[Int],
+                        map.get("endcharindex").asInstanceOf[Int]
+                    )
+                ).distinct
+
     /**
       *
       * @param ns
@@ -509,6 +557,20 @@ class NCNlpSentence(
     private def calcHash(): Int =
         Seq(srvReqId, text, enabledBuiltInToks, 
tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)
 
+    private def addDeleted(sen: NCNlpSentence, dels: 
Iterable[NCNlpSentenceNote]): Unit =
+        sen.deletedNotes ++= dels.map(n ⇒ {
+            val savedDelNote = n.clone()
+            val savedDelToks = n.tokenIndexes.map(idx ⇒ this(idx).clone())
+
+            val mainNotes = savedDelToks.flatten.filter(n ⇒ n.noteType != 
"nlpcraft:nlp" && n != savedDelNote)
+
+            // Deleted note's tokens should contains only nlp data and deleted 
notes.
+            for (savedDelTok ← savedDelToks; mainNote ← mainNotes)
+                savedDelTok.remove(mainNote)
+
+            savedDelNote → savedDelToks
+        })
+
     // Deep copy.
     override def clone(): NCNlpSentence =
         new NCNlpSentence(
@@ -559,45 +621,9 @@ class NCNlpSentence(
         if (!mdl.getAbstractTokens.isEmpty) {
             val notes = ns.flatten
 
-            case class Key(id: String, start: Int, end: Int) {
-                private def in(i: Int): Boolean = i >= start && i <= end
-                def intersect(id: String, start: Int, end: Int): Boolean = id 
== this.id && (in(start) || in(end))
-            }
-
-            val keys: Seq[Key] =
-                notes.filter(_.isUser).flatMap(n ⇒ {
-                    val optList: Option[JList[util.HashMap[String, 
JSerializable]]] = n.dataOpt("parts")
-
-                    optList
-                }).flatMap(_.asScala).map(map ⇒ Key(
-                    map.get("id").asInstanceOf[String],
-                    map.get("startcharindex").asInstanceOf[Int],
-                    map.get("endcharindex").asInstanceOf[Int])
-                ).distinct
-
-            case class NoteLink(note: String, indexes: Seq[Int])
-
-            val noteLinks = mutable.ArrayBuffer.empty[NoteLink]
-
-            for (n ← notes.filter(n ⇒ n.noteType == "nlpcraft:limit" || 
n.noteType == "nlpcraft:references"))
-                noteLinks += NoteLink(n("note").asInstanceOf[String], 
n("indexes").asInstanceOf[JList[Int]].asScala)
 
-            for (n ← notes.filter(_.noteType == "nlpcraft:sort")) {
-                def add(noteName: String, idxsName: String): Unit = {
-                    val names = n(noteName).asInstanceOf[JList[String]]
-                    val idxsSeq = n(idxsName).asInstanceOf[JList[JList[Int]]]
-
-                    require(names.size() == idxsSeq.size())
-
-                    noteLinks ++=
-                        (for ((name, idxs) ← 
names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
-                            yield NoteLink(name, idxs)
-                        )
-                }
-
-                if (n.contains("subjnotes")) add("subjnotes", "subjindexes")
-                if (n.contains("bynotes")) add("bynotes", "byindexes")
-            }
+            val keys = getPartKeys(notes :_*)
+            val noteLinks = getLinks(notes)
 
             notes.filter(n ⇒ {
                 val noteToks = ns.tokens.filter(_.contains(n))
@@ -657,11 +683,42 @@ class NCNlpSentence(
 
         redundant.foreach(this.removeNote)
 
-        val delCombs: Seq[NCNlpSentenceNote] =
+        var delCombs: Seq[NCNlpSentenceNote] =
             getNotNlpNotes(this).
                 flatMap(note ⇒ getNotNlpNotes(this.slice(note.tokenFrom, 
note.tokenTo + 1)).filter(_ != note)).
                 distinct
 
+        // Optimization. Deletes all wholly swallowed notes.
+        val links = getLinks(this.flatten)
+
+        val swallowed =
+            delCombs.
+                filter(n ⇒ !links.contains(NoteLink(n.noteType, 
n.tokenIndexes))).
+                filter(getPartKeys(_).isEmpty).
+                flatMap(n ⇒ {
+                    val owners =
+                        delCombs.
+                            filter(_ != n).
+                            flatMap(n1 ⇒
+                                if (getPartKeys(n1).contains(
+                                    PartKey(
+                                        n.noteType,
+                                        this(n.tokenFrom).startCharIndex,
+                                        this(n.tokenTo).endCharIndex)
+                                    )
+                                )
+                                    Some(n1)
+                                else
+                                    None
+                            )
+
+                    if (owners.exists(_.wordIndexes == n.wordIndexes)) Some(n) 
else None
+                })
+
+        delCombs = delCombs.filter(p ⇒ !swallowed.contains(p))
+        addDeleted(this, swallowed)
+        swallowed.foreach(this.removeNote)
+
         val toksByIdx: Seq[Seq[NCNlpSentenceNote]] =
             delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
                 groupBy { case (idx, _) ⇒ idx }.
@@ -678,7 +735,13 @@ class NCNlpSentence(
                     (minDelSize to delCombs.size).
                         flatMap(i ⇒
                             delCombs.combinations(i).
-                                filter(delComb ⇒ 
!toksByIdx.exists(_.count(note ⇒ !delComb.contains(note)) > 1))
+                                filter(delComb ⇒
+                                    !toksByIdx.exists(
+                                        rec ⇒
+                                            rec.size - delCombs.size <= 1 &&
+                                            rec.count(note ⇒ 
!delComb.contains(note)) > 1
+                                    )
+                                )
                         ).
                         sortBy(_.size).
                         map(_.toSet).
@@ -688,20 +751,7 @@ class NCNlpSentence(
                                 val nsClone = this.clone()
 
                                 // Saves deleted notes for sentence and their 
tokens.
-                                nsClone.deletedNotes ++= delComb.map(n ⇒ {
-                                    val savedDelNote = n.clone()
-                                    val savedDelToks = n.tokenIndexes.map(idx 
⇒ nsClone(idx).clone())
-
-                                    val mainNotes =
-                                        savedDelToks.flatten.filter(n ⇒ 
n.noteType != "nlpcraft:nlp" && n != savedDelNote)
-
-                                    // Deleted note's tokens should contains 
only nlp data and deleted notes.
-                                    for (savedDelTok ← savedDelToks; mainNote 
← mainNotes)
-                                        savedDelTok.remove(mainNote)
-
-                                    savedDelNote → savedDelToks
-                                })
-
+                                addDeleted(nsClone, delComb)
                                 delComb.foreach(nsClone.removeNote)
 
                                 // Has overlapped notes for some tokens.
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec3.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec3.scala
new file mode 100644
index 0000000..e1fedca
--- /dev/null
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec3.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
+
+import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentMatch, 
NCModelAdapter, NCResult}
+import org.apache.nlpcraft.{NCTestContext, NCTestElement, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+import java.util
+import scala.collection.JavaConverters._
+
+/**
+  * Nested Elements test model.
+  */
+class NCNestedTestModel3 extends NCModelAdapter(
+    "nlpcraft.nested3.test.mdl", "Nested Data Test Model", "1.0"
+) {
+    override def getElements: util.Set[NCElement] =
+        Set(
+            NCTestElement("e1", "//[a-zA-Z0-9]+//"),
+            NCTestElement("e2", "^^(id == 'e1')^^"),
+        )
+
+    override def getAbstractTokens: util.Set[String] = Set("e1").asJava
+    override def getEnabledBuiltInTokens: util.Set[String] = 
Set.empty[String].asJava
+
+    @NCIntent("intent=onE2 term(t1)={id == 'e2'}[12, 100]")
+    def onAB(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
+}
+
+/**
+ * Nested elements model enricher test.
+ */
+@NCTestEnvironment(model = classOf[NCNestedTestModel3], startClient = true)
+class NCEnricherNestedModelSpec3 extends NCTestContext {
+    @Test
+    def test(): Unit = {
+        println("Started")
+
+        val t = System.currentTimeMillis()
+
+        checkIntent("a " * 12, "onE2")
+
+        println(s"Passed: ${System.currentTimeMillis() - t}")
+    }
+}

[incubator-nlpcraft] branch master updated: Minor performance improvements.

Reply via email to