This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new 6be2e30 Minor performance improvements.
6be2e30 is described below
commit 6be2e30f41c253296f62de0f6b657bfb41a88d47
Author: Sergey Kamov <[email protected]>
AuthorDate: Sat Feb 20 16:58:49 2021 +0300
Minor performance improvements.
---
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 158 ++++++++++++++-------
.../model/NCEnricherNestedModelSpec3.scala | 61 ++++++++
2 files changed, 165 insertions(+), 54 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index c479308..23eeff6 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -34,6 +34,54 @@ import scala.language.implicitConversions
object NCNlpSentence extends LazyLogging {
implicit def toTokens(x: NCNlpSentence): ArrayBuffer[NCNlpSentenceToken] =
x.tokens
+ private case class NoteLink(note: String, indexes: Seq[Int])
+ private case class PartKey(id: String, start: Int, end: Int) {
+ private def in(i: Int): Boolean = i >= start && i <= end
+ def intersect(id: String, start: Int, end: Int): Boolean = id ==
this.id && (in(start) || in(end))
+ }
+
+ private def getLinks(notes: Seq[NCNlpSentenceNote]): Seq[NoteLink] = {
+ val noteLinks = mutable.ArrayBuffer.empty[NoteLink]
+
+ for (n ← notes.filter(n ⇒ n.noteType == "nlpcraft:limit" || n.noteType
== "nlpcraft:references"))
+ noteLinks += NoteLink(n("note").asInstanceOf[String],
n("indexes").asInstanceOf[JList[Int]].asScala)
+
+ for (n ← notes.filter(_.noteType == "nlpcraft:sort")) {
+ def add(noteName: String, idxsName: String): Unit = {
+ val names = n(noteName).asInstanceOf[JList[String]]
+ val idxsSeq = n(idxsName).asInstanceOf[JList[JList[Int]]]
+
+ require(names.size() == idxsSeq.size())
+
+ noteLinks ++=
+ (for ((name, idxs) ←
names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
+ yield NoteLink(name, idxs)
+ )
+ }
+
+ if (n.contains("subjnotes")) add("subjnotes", "subjindexes")
+ if (n.contains("bynotes")) add("bynotes", "byindexes")
+ }
+
+ noteLinks
+ }
+
+ private def getPartKeys(notes: NCNlpSentenceNote*): Seq[PartKey] =
+ notes.
+ filter(_.isUser).
+ flatMap(n ⇒ {
+ val optList: Option[JList[util.HashMap[String,
JSerializable]]] = n.dataOpt("parts")
+
+ optList
+ }).flatMap(_.asScala).
+ map(map ⇒
+ PartKey(
+ map.get("id").asInstanceOf[String],
+ map.get("startcharindex").asInstanceOf[Int],
+ map.get("endcharindex").asInstanceOf[Int]
+ )
+ ).distinct
+
/**
*
* @param ns
@@ -509,6 +557,20 @@ class NCNlpSentence(
private def calcHash(): Int =
Seq(srvReqId, text, enabledBuiltInToks,
tokens).map(_.hashCode()).foldLeft(0)((a, b) ⇒ 31 * a + b)
+ private def addDeleted(sen: NCNlpSentence, dels:
Iterable[NCNlpSentenceNote]): Unit =
+ sen.deletedNotes ++= dels.map(n ⇒ {
+ val savedDelNote = n.clone()
+ val savedDelToks = n.tokenIndexes.map(idx ⇒ this(idx).clone())
+
+ val mainNotes = savedDelToks.flatten.filter(n ⇒ n.noteType !=
"nlpcraft:nlp" && n != savedDelNote)
+
+ // Deleted note's tokens should contains only nlp data and deleted
notes.
+ for (savedDelTok ← savedDelToks; mainNote ← mainNotes)
+ savedDelTok.remove(mainNote)
+
+ savedDelNote → savedDelToks
+ })
+
// Deep copy.
override def clone(): NCNlpSentence =
new NCNlpSentence(
@@ -559,45 +621,9 @@ class NCNlpSentence(
if (!mdl.getAbstractTokens.isEmpty) {
val notes = ns.flatten
- case class Key(id: String, start: Int, end: Int) {
- private def in(i: Int): Boolean = i >= start && i <= end
- def intersect(id: String, start: Int, end: Int): Boolean = id
== this.id && (in(start) || in(end))
- }
-
- val keys: Seq[Key] =
- notes.filter(_.isUser).flatMap(n ⇒ {
- val optList: Option[JList[util.HashMap[String,
JSerializable]]] = n.dataOpt("parts")
-
- optList
- }).flatMap(_.asScala).map(map ⇒ Key(
- map.get("id").asInstanceOf[String],
- map.get("startcharindex").asInstanceOf[Int],
- map.get("endcharindex").asInstanceOf[Int])
- ).distinct
-
- case class NoteLink(note: String, indexes: Seq[Int])
-
- val noteLinks = mutable.ArrayBuffer.empty[NoteLink]
-
- for (n ← notes.filter(n ⇒ n.noteType == "nlpcraft:limit" ||
n.noteType == "nlpcraft:references"))
- noteLinks += NoteLink(n("note").asInstanceOf[String],
n("indexes").asInstanceOf[JList[Int]].asScala)
- for (n ← notes.filter(_.noteType == "nlpcraft:sort")) {
- def add(noteName: String, idxsName: String): Unit = {
- val names = n(noteName).asInstanceOf[JList[String]]
- val idxsSeq = n(idxsName).asInstanceOf[JList[JList[Int]]]
-
- require(names.size() == idxsSeq.size())
-
- noteLinks ++=
- (for ((name, idxs) ←
names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
- yield NoteLink(name, idxs)
- )
- }
-
- if (n.contains("subjnotes")) add("subjnotes", "subjindexes")
- if (n.contains("bynotes")) add("bynotes", "byindexes")
- }
+ val keys = getPartKeys(notes :_*)
+ val noteLinks = getLinks(notes)
notes.filter(n ⇒ {
val noteToks = ns.tokens.filter(_.contains(n))
@@ -657,11 +683,42 @@ class NCNlpSentence(
redundant.foreach(this.removeNote)
- val delCombs: Seq[NCNlpSentenceNote] =
+ var delCombs: Seq[NCNlpSentenceNote] =
getNotNlpNotes(this).
flatMap(note ⇒ getNotNlpNotes(this.slice(note.tokenFrom,
note.tokenTo + 1)).filter(_ != note)).
distinct
+ // Optimization. Deletes all wholly swallowed notes.
+ val links = getLinks(this.flatten)
+
+ val swallowed =
+ delCombs.
+ filter(n ⇒ !links.contains(NoteLink(n.noteType,
n.tokenIndexes))).
+ filter(getPartKeys(_).isEmpty).
+ flatMap(n ⇒ {
+ val owners =
+ delCombs.
+ filter(_ != n).
+ flatMap(n1 ⇒
+ if (getPartKeys(n1).contains(
+ PartKey(
+ n.noteType,
+ this(n.tokenFrom).startCharIndex,
+ this(n.tokenTo).endCharIndex)
+ )
+ )
+ Some(n1)
+ else
+ None
+ )
+
+ if (owners.exists(_.wordIndexes == n.wordIndexes)) Some(n)
else None
+ })
+
+ delCombs = delCombs.filter(p ⇒ !swallowed.contains(p))
+ addDeleted(this, swallowed)
+ swallowed.foreach(this.removeNote)
+
val toksByIdx: Seq[Seq[NCNlpSentenceNote]] =
delCombs.flatMap(note ⇒ note.wordIndexes.map(_ → note)).
groupBy { case (idx, _) ⇒ idx }.
@@ -678,7 +735,13 @@ class NCNlpSentence(
(minDelSize to delCombs.size).
flatMap(i ⇒
delCombs.combinations(i).
- filter(delComb ⇒
!toksByIdx.exists(_.count(note ⇒ !delComb.contains(note)) > 1))
+ filter(delComb ⇒
+ !toksByIdx.exists(
+ rec ⇒
+ rec.size - delCombs.size <= 1 &&
+ rec.count(note ⇒
!delComb.contains(note)) > 1
+ )
+ )
).
sortBy(_.size).
map(_.toSet).
@@ -688,20 +751,7 @@ class NCNlpSentence(
val nsClone = this.clone()
// Saves deleted notes for sentence and their
tokens.
- nsClone.deletedNotes ++= delComb.map(n ⇒ {
- val savedDelNote = n.clone()
- val savedDelToks = n.tokenIndexes.map(idx
⇒ nsClone(idx).clone())
-
- val mainNotes =
- savedDelToks.flatten.filter(n ⇒
n.noteType != "nlpcraft:nlp" && n != savedDelNote)
-
- // Deleted note's tokens should contains
only nlp data and deleted notes.
- for (savedDelTok ← savedDelToks; mainNote
← mainNotes)
- savedDelTok.remove(mainNote)
-
- savedDelNote → savedDelToks
- })
-
+ addDeleted(nsClone, delComb)
delComb.foreach(nsClone.removeNote)
// Has overlapped notes for some tokens.
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec3.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec3.scala
new file mode 100644
index 0000000..e1fedca
--- /dev/null
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec3.scala
@@ -0,0 +1,61 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
+
+import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentMatch,
NCModelAdapter, NCResult}
+import org.apache.nlpcraft.{NCTestContext, NCTestElement, NCTestEnvironment}
+import org.junit.jupiter.api.Test
+
+import java.util
+import scala.collection.JavaConverters._
+
+/**
+ * Nested Elements test model.
+ */
+class NCNestedTestModel3 extends NCModelAdapter(
+ "nlpcraft.nested3.test.mdl", "Nested Data Test Model", "1.0"
+) {
+ override def getElements: util.Set[NCElement] =
+ Set(
+ NCTestElement("e1", "//[a-zA-Z0-9]+//"),
+ NCTestElement("e2", "^^(id == 'e1')^^"),
+ )
+
+ override def getAbstractTokens: util.Set[String] = Set("e1").asJava
+ override def getEnabledBuiltInTokens: util.Set[String] =
Set.empty[String].asJava
+
+ @NCIntent("intent=onE2 term(t1)={id == 'e2'}[12, 100]")
+ def onAB(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
+}
+
+/**
+ * Nested elements model enricher test.
+ */
+@NCTestEnvironment(model = classOf[NCNestedTestModel3], startClient = true)
+class NCEnricherNestedModelSpec3 extends NCTestContext {
+ @Test
+ def test(): Unit = {
+ println("Started")
+
+ val t = System.currentTimeMillis()
+
+ checkIntent("a " * 12, "onE2")
+
+ println(s"Passed: ${System.currentTimeMillis() - t}")
+ }
+}