[incubator-nlpcraft] branch NLPCRAFT-246 updated: WIP.

sergeykamov Thu, 25 Feb 2021 02:16:25 -0800

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-246
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-246 by this push:
     new 92c4048  WIP.
92c4048 is described below

commit 92c4048e8acc553e9b5a576d1cb7fb9ecf309952
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Feb 25 13:16:04 2021 +0300

    WIP.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 70 +++++++++-------------
 .../model/NCEnricherNestedModelSpec2.scala         |  4 +-
 2 files changed, 30 insertions(+), 44 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 0573fe9..42ff9af 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -35,16 +35,29 @@ object NCNlpSentence extends LazyLogging {
     implicit def toTokens(x: NCNlpSentence): ArrayBuffer[NCNlpSentenceToken] = 
x.tokens
 
     private case class NoteLink(note: String, indexes: Seq[Int])
-    private case class PartKey(id: String, start: Int, end: Int) {
+
+    case class PartKey(id: String, start: Int, end: Int) {
+        require(start <= end)
+
         private def in(i: Int): Boolean = i >= start && i <= end
         def intersect(id: String, start: Int, end: Int): Boolean = id == 
this.id && (in(start) || in(end))
     }
+    object PartKey {
+        def apply(m: util.HashMap[String, JSerializable]): PartKey = {
+            def get[T](name: String): T = m.get(name).asInstanceOf[T]
+
+            PartKey(get("id"), get("startcharindex"), get("endcharindex"))
+        }
+
+        def apply(t: NCNlpSentenceNote, sen: NCNlpSentence): PartKey =
+            PartKey(t.noteType, sen(t.tokenFrom).startCharIndex, 
sen(t.tokenTo).endCharIndex)
+    }
 
     private def getLinks(notes: Seq[NCNlpSentenceNote]): Seq[NoteLink] = {
         val noteLinks = mutable.ArrayBuffer.empty[NoteLink]
 
         for (n ← notes.filter(n ⇒ n.noteType == "nlpcraft:limit" || n.noteType 
== "nlpcraft:references"))
-            noteLinks += NoteLink(n("note").asInstanceOf[String], 
n("indexes").asInstanceOf[JList[Int]].asScala)
+            noteLinks += NoteLink(n("note").asInstanceOf[String], 
n("indexes").asInstanceOf[JList[Int]].asScala.sorted)
 
         for (n ← notes.filter(_.noteType == "nlpcraft:sort")) {
             def add(noteName: String, idxsName: String): Unit = {
@@ -55,7 +68,7 @@ object NCNlpSentence extends LazyLogging {
 
                 noteLinks ++=
                     (for ((name, idxs) ← 
names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
-                        yield NoteLink(name, idxs)
+                        yield NoteLink(name, idxs.sorted)
                     )
             }
 
@@ -73,14 +86,7 @@ object NCNlpSentence extends LazyLogging {
                 val optList: Option[JList[util.HashMap[String, 
JSerializable]]] = n.dataOpt("parts")
 
                 optList
-            }).flatMap(_.asScala).
-                map(map ⇒
-                    PartKey(
-                        map.get("id").asInstanceOf[String],
-                        map.get("startcharindex").asInstanceOf[Int],
-                        map.get("endcharindex").asInstanceOf[Int]
-                    )
-                ).distinct
+            }).flatMap(_.asScala).map(m ⇒ PartKey(m)).distinct
 
     /**
       *
@@ -632,7 +638,7 @@ class NCNlpSentence(
 
                 mdl.getAbstractTokens.contains(n.noteType) &&
                 !keys.exists(_.intersect(n.noteType, 
noteToks.head.startCharIndex, noteToks.last.startCharIndex)) &&
-                !noteLinks.contains(NoteLink(n.noteType, n.tokenIndexes))
+                !noteLinks.contains(NoteLink(n.noteType, 
n.tokenIndexes.sorted))
             }).foreach(ns.removeNote)
         }
 
@@ -687,7 +693,7 @@ class NCNlpSentence(
 
         var delCombs: Seq[NCNlpSentenceNote] =
             getNotNlpNotes(this).
-                flatMap(note ⇒ getNotNlpNotes(this.slice(note.tokenFrom, 
note.tokenTo + 1)).filter(_ != note)).
+                flatMap(note ⇒ getNotNlpNotes(note.tokenIndexes.sorted.map(i ⇒ 
this(i))).filter(_ != note)).
                 distinct
 
         // Optimization. Deletes all wholly swallowed notes.
@@ -695,38 +701,18 @@ class NCNlpSentence(
 
         val swallowed =
             delCombs.
-                filter(n ⇒ !links.contains(NoteLink(n.noteType, 
n.tokenIndexes))).
+                // There aren't links on it.
+                filter(n ⇒ !links.contains(NoteLink(n.noteType, 
n.tokenIndexes.sorted))).
+                // It doesn't have links.
                 filter(getPartKeys(_).isEmpty).
-                flatMap(n ⇒ {
-                    val wIdxs = n.wordIndexes.toSet
-
-                    val owners =
-                        delCombs.
-                            filter(_ != n).
-                            flatMap(n1 ⇒
-                                if (getPartKeys(n1).contains(
-                                    PartKey(
-                                        n.noteType,
-                                        this(n.tokenFrom).startCharIndex,
-                                        this(n.tokenTo).endCharIndex)
-                                    )
-                                )
-                                    Some(n1)
-                                else
-                                    None
-                            )
+                flatMap(note ⇒ {
+                    val noteWordsIdxs = note.wordIndexes.toSet
+                    val key = PartKey(note, this)
 
+                    val delCombOthers =
+                        delCombs.filter(_ != note).flatMap(n ⇒ if 
(getPartKeys(n).contains(key)) Some(n) else None)
 
-                    if (owners.exists(
-                        o ⇒ {
-                            val oWIdxs = o.wordIndexes.toSet
-
-                            wIdxs == oWIdxs || wIdxs.subsetOf(oWIdxs)
-                        })
-                    )
-                        Some(n)
-                    else
-                        None
+                    if (delCombOthers.exists(o ⇒ noteWordsIdxs == 
o.wordIndexes.toSet)) Some(note) else None
                 })
 
         delCombs = delCombs.filter(p ⇒ !swallowed.contains(p))
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
index 5897136..957730a 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
@@ -47,8 +47,8 @@ class NCNestedTestModel21 extends 
NCModelAdapter("nlpcraft.nested2.test.mdl", "N
 class NCEnricherNestedModelSpec21 extends NCTestContext {
     @Test
     def test(): Unit = {
-//        checkIntent("word", "onE1")
-//        checkIntent("10 word", "onE1")
+        checkIntent("word", "onE1")
+        checkIntent("10 word", "onE1")
         checkIntent("11 12 word", "onNumAndE1")
     }
 }

[incubator-nlpcraft] branch NLPCRAFT-246 updated: WIP.

Reply via email to