[incubator-nlpcraft] branch NLPCRAFT-443 updated: WIP.

sergeykamov Sat, 25 Sep 2021 03:07:12 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-443 by this push:
     new 3a2beda  WIP.
3a2beda is described below

commit 3a2bedac6c9d2bdf45cb13ae37499b43e3c4cc2f
Author: Sergey Kamov <[email protected]>
AuthorDate: Sat Sep 25 13:06:59 2021 +0300

    WIP.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala |  2 +-
 .../nlpcraft/common/nlp/NCNlpSentenceToken.scala   | 24 +++++++++++-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala |  2 +-
 .../probe/mgrs/sentence/NCSentenceManager.scala    | 45 ++++++++++++++--------
 4 files changed, 55 insertions(+), 18 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 588706a..9d9f4e3 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -59,7 +59,7 @@ class NCNlpSentence(
     @transient
     private var hash: java.lang.Integer = _
 
-    private def calcHash(): Int = tokens.hashCode()
+    private def calcHash(): Int = U.mkJavaHash(tokens)
 
     // Deep copy.
     override def clone(): NCNlpSentence =
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
index fa9cbe6..1c66da1 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
@@ -17,6 +17,7 @@
 
 package org.apache.nlpcraft.common.nlp
 
+import org.apache.nlpcraft.common.U
 import org.apache.nlpcraft.common.nlp.pos._
 
 import java.util.{List => JList}
@@ -56,6 +57,22 @@ case class NCNlpSentenceToken(
     def isSwearWord: Boolean = getNlpValue[Boolean]("swear")
     def isEnglish: Boolean = getNlpValue[Boolean]("english")
 
+    @transient
+    private var hash: java.lang.Integer = _
+
+    //noinspection HashCodeUsesVar
+    override def hashCode(): Int = {
+        if (hash == null)
+            hash = U.mkJavaHash(index, notes, stopsReasons)
+
+        hash
+    }
+
+    override def equals(obj: Any): Boolean = obj match {
+        case x: NCNlpSentenceToken => x.index == index && x.notes == notes && 
x.stopsReasons == stopsReasons
+        case _ => false
+    }
+
     /**
       *
       * @param noteType Note type.
@@ -80,7 +97,11 @@ case class NCNlpSentenceToken(
       *
       * @param note Note.
       */
-    def remove(note: NCNlpSentenceNote): Unit = notes.remove(note)
+    def remove(note: NCNlpSentenceNote): Unit = {
+        notes.remove(note)
+
+        hash = null
+    }
 
     /**
       * Tests whether or not this token contains note.
@@ -162,6 +183,7 @@ case class NCNlpSentenceToken(
       * @param note Element.
       */
     def add(note: NCNlpSentenceNote): Unit = {
+        hash = null
         val added = notes.add(note)
 
         if (added && note.isNlp)
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 0197f2e..a6aba57 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -677,7 +677,7 @@ object NCModelEnricher extends NCProbeEnricher {
     private def normalize(ns: Sentence): Unit = {
         val usrNotes = ns.flatten.filter(_.isUser).distinct
         val links = NCSentenceManager.getLinks(usrNotes)
-        val parts = NCSentenceManager.getPartKeys(usrNotes: _*)
+        val parts = NCSentenceManager.getPartKeys(usrNotes)
 
         val usrNotesIdxs = usrNotes.
             filter(n => !links.contains(NoteLink(n.noteType, 
n.tokenIndexes.sorted))).
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 42b5583..4354d59 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -72,16 +72,31 @@ object NCSentenceManager extends NCService {
 
     /**
       *
+      * @param n
+      */
+    private def getParts(n: NCNlpSentenceNote): Option[Seq[NCTokenPartKey]] = {
+        val res: Option[JList[NCTokenPartKey]]  = n.dataOpt("parts")
+
+        res match {
+            case Some(v) => Some(v.asScala)
+            case None => None
+        }
+    }
+
+    /**
+      *
       * @param notes
       */
-    def getPartKeys(notes: NCNlpSentenceNote*): Seq[NCTokenPartKey] =
-        notes.
-            filter(_.isUser).
-            flatMap(n => {
-                val optList: Option[JList[NCTokenPartKey]] = n.dataOpt("parts")
+    def getPartKeys(notes: Seq[NCNlpSentenceNote]): Seq[NCTokenPartKey] =
+        notes.filter(_.isUser).flatMap(getParts).flatten.distinct
 
-                optList
-            }).flatMap(_.asScala).distinct
+    /**
+      *
+      * @param note
+      * @return
+      */
+    def getPartKeys(note: NCNlpSentenceNote): Seq[NCTokenPartKey] =
+        if (note.isUser) getParts(note).getOrElse(Seq.empty) else Seq.empty
 
     /**
       *
@@ -275,9 +290,9 @@ object NCSentenceManager extends NCService {
     private def fixIndexes(ns: NCNlpSentence, userNoteTypes: Seq[String]): 
Unit = {
         // Replaces other notes indexes.
         for (t <- userNoteTypes :+ "nlpcraft:nlp"; note <- ns.getNotes(t)) {
-            val toks = ns.filter(_.contains(note)).sortBy(_.index)
+            val toks = ns.filter(_.contains(note))
 
-            val newNote = note.clone(toks.map(_.index).toSeq, 
toks.flatMap(_.wordIndexes).toSeq.sorted)
+            val newNote = note.clone(toks.map(_.index), 
toks.flatMap(_.wordIndexes).toSeq.sorted)
 
             toks.foreach(t => {
                 t.remove(note)
@@ -544,17 +559,17 @@ object NCSentenceManager extends NCService {
       */
     private def dropAbstract(mdl: NCModel, ns: NCNlpSentence): Unit =
         if (!mdl.getAbstractTokens.isEmpty) {
-            val notes = ns.flatten
+            val notes = ns.flatten.distinct
 
-            val keys = getPartKeys(notes: _*)
+            val keys = getPartKeys(notes)
             val noteLinks = getLinks(notes)
 
             notes.filter(n => {
-                val noteToks = ns.tokens.filter(_.contains(n))
+                lazy val noteToks = ns.tokens.filter(t => t.index >= 
n.tokenFrom && t.index <= n.tokenTo)
 
                 mdl.getAbstractTokens.contains(n.noteType) &&
-                    !keys.exists(_.intersect(n.noteType, 
noteToks.head.startCharIndex, noteToks.last.startCharIndex)) &&
-                    !noteLinks.contains(NoteLink(n.noteType, 
n.tokenIndexes.sorted))
+                !keys.exists(_.intersect(n.noteType, 
noteToks.head.startCharIndex, noteToks.last.startCharIndex)) &&
+                !noteLinks.contains(NoteLink(n.noteType, 
n.tokenIndexes.sorted))
             }).foreach(ns.removeNote)
         }
 
@@ -645,7 +660,7 @@ object NCSentenceManager extends NCService {
                 // There aren't links on it.
                 filter(n => !links.contains(NoteLink(n.noteType, 
n.tokenIndexes.sorted))).
                 // It doesn't have links.
-                filter(getPartKeys(_).isEmpty).
+                filter(n => getPartKeys(n).isEmpty).
                 flatMap(note => {
                     val noteWordsIdxs = note.wordIndexesSet
                     val key = NCTokenPartKey(note, sen)

[incubator-nlpcraft] branch NLPCRAFT-443 updated: WIP.

Reply via email to