[incubator-nlpcraft] branch NLPCRAFT-443 updated: WIP.

sergeykamov Sun, 19 Sep 2021 07:27:03 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-443 by this push:
     new 614dfe5  WIP.
614dfe5 is described below

commit 614dfe59281f2e7e71a1abd572dd25f2a2075015
Author: Sergey Kamov <[email protected]>
AuthorDate: Sun Sep 19 17:26:54 2021 +0300

    WIP.
---
 .../org/apache/nlpcraft/common/nlp/NCNlpSentence.scala    |  4 ++--
 .../apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala    | 15 +++++++--------
 .../probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala  |  4 ++--
 .../nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala  | 11 ++++++-----
 4 files changed, 17 insertions(+), 17 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 478d930..0f0b462 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -140,8 +140,8 @@ class NCNlpSentence(
 
             // One possible difference - stopwords indexes.
             def wordsEqualOrSimilar0(n1: NCNlpSentenceNote, n2: 
NCNlpSentenceNote): Boolean = {
-                val set1 = n1.wordIndexes.toSet
-                val set2 = n2.wordIndexes.toSet
+                val set1 = n1.wordIndexesSet
+                val set2 = n2.wordIndexesSet
 
                 set1 == set2 || set1.subsetOf(set2) && 
set2.diff(set1).forall(stopIdxs.contains)
             }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index 1574787..63ae6ca 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -33,6 +33,10 @@ import scala.jdk.CollectionConverters.{CollectionHasAsScala, 
SeqHasAsJava}
 class NCNlpSentenceNote(private val values: Map[String, JSerializable]) 
extends JSerializable with NCAsciiLike {
     import NCNlpSentenceNote._
 
+    private lazy val dataWithoutIndexes: Map[String, JSerializable] = 
this.filter(p => !SKIP_CLONE.contains(p._1))
+    private lazy val skipNlp: Map[String, JSerializable] = 
dataWithoutIndexes.filter { case (key, _) => key != "noteType" }
+
+
     @transient
     private lazy val hash = values.hashCode()
 
@@ -42,6 +46,7 @@ class NCNlpSentenceNote(private val values: Map[String, 
JSerializable]) extends
     lazy val tokenTo: Int = values("tokMaxIndex").asInstanceOf[Int] // Last 
index.
     lazy val tokenIndexes: Seq[Int] = 
values("tokWordIndexes").asInstanceOf[JList[Int]].asScala.toSeq // Includes 1st 
and last indices too.
     lazy val wordIndexes: Seq[Int] = 
values("wordIndexes").asInstanceOf[JList[Int]].asScala.toSeq // Includes 1st 
and last indices too.
+    lazy val wordIndexesSet: Set[Int] = wordIndexes.toSet
     lazy val sparsity: Int = values("sparsity").asInstanceOf[Int]
     lazy val isDirect: Boolean = values("direct").asInstanceOf[Boolean]
     lazy val isUser: Boolean = {
@@ -89,7 +94,7 @@ class NCNlpSentenceNote(private val values: Map[String, 
JSerializable]) extends
         this.noteType == n.noteType &&
         this.wordIndexes.size == n.wordIndexes.size &&
         this.wordIndexes.zip(n.wordIndexes).map(p => p._1 - 
p._2).distinct.size == 1 &&
-        this.filter(p => !SKIP_CLONE.contains(p._1)) == n.filter(p => 
!SKIP_CLONE.contains(p._1))
+        this.dataWithoutIndexes == n.dataWithoutIndexes
 
     /**
       *
@@ -102,12 +107,6 @@ class NCNlpSentenceNote(private val values: Map[String, 
JSerializable]) extends
       *
       * @return
       */
-    def skipNlp(): Map[String, JSerializable] =
-        values.filter { case (key, _) => !SKIP_CLONE.contains(key) && key != 
"noteType" }
-
-    /**
-      *
-      */
     def asMetadata(): Map[String, JSerializable] =
         if (isUser)
             values.get("meta") match {
@@ -117,7 +116,7 @@ class NCNlpSentenceNote(private val values: Map[String, 
JSerializable]) extends
         else {
             val md = mutable.Map.empty[String, JSerializable]
 
-            val m = if (noteType != "nlpcraft:nlp") skipNlp() else values
+            val m = if (noteType != "nlpcraft:nlp") skipNlp else values
 
             m.foreach { case (name, value) => md += (name.toLowerCase() -> 
value)}
 
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index e5f9ee2..9706c4c 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -28,7 +28,7 @@ import 
org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.NCSynonymChunkKind
 import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
 import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
 import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
-import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants, 
NCTokenPartKey, NCProbeSynonym => Synonym}
+import org.apache.nlpcraft.probe.mgrs.{NCProbeModel,  NCProbeVariants, 
NCTokenPartKey, NCProbeSynonym => Synonym}
 
 import java.io.Serializable
 import java.util.{List => JList}
@@ -680,7 +680,7 @@ object NCModelEnricher extends NCProbeEnricher {
                     candidateIdx != idx &&
                         candidate.noteType == n.noteType &&
                         candidate.dataOpt("parts") == n.dataOpt("parts") &&
-                        
candidate.wordIndexes.toSet.subsetOf(n.wordIndexes.toSet) &&
+                        candidate.wordIndexesSet.subsetOf(n.wordIndexesSet) &&
                         n.wordIndexes.filter(n => 
!candidate.wordIndexes.contains(n)).
                             forall(wordIdx => ns.tokens.exists(t => 
t.wordIndexes.contains(wordIdx) && t.isStopWord))
                 } match {
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 0c0288d..ee8b719 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -652,7 +652,7 @@ object NCSentenceManager extends NCService {
                 // It doesn't have links.
                 filter(getPartKeys(_).isEmpty).
                 flatMap(note => {
-                    val noteWordsIdxs = note.wordIndexes.toSet
+                    val noteWordsIdxs = note.wordIndexesSet
                     val key = NCTokenPartKey(note, sen)
 
                     val delCombOthers =
@@ -660,7 +660,7 @@ object NCSentenceManager extends NCService {
 
                     if (
                         delCombOthers.nonEmpty &&
-                        !delCombOthers.exists(o => 
noteWordsIdxs.subsetOf(o.wordIndexes.toSet))
+                        !delCombOthers.exists(o => 
noteWordsIdxs.subsetOf(o.wordIndexesSet))
                     )
                         Some(note)
                     else
@@ -777,10 +777,11 @@ object NCSentenceManager extends NCService {
 
         sensWithNotesIdxs = sensWithNotes.zipWithIndex
 
-        sens = sensWithNotesIdxs.filter { case ((s1, notNlpNotes1), idx1) =>
-            !sensWithNotesIdxs.exists { case ((s2, notNlpNotes2), idx2) =>
+        sens = sensWithNotesIdxs.filter { case ((_, notNlpNotes1), idx1) =>
+            !sensWithNotesIdxs.exists { case ((_, notNlpNotes2), idx2) =>
                 idx1 != idx2 && {
-                    notNlpNotes2.size > notNlpNotes1.size && 
notNlpNotes1.forall(t1 => notNlpNotes2.exists(_.equalsWithoutIndexes(t1)))
+                    notNlpNotes2.size > notNlpNotes1.size &&
+                    notNlpNotes1.forall(t1 => 
notNlpNotes2.exists(_.equalsWithoutIndexes(t1)))
                 }
             }
         }.map { case ((sen, _), _) => sen }

[incubator-nlpcraft] branch NLPCRAFT-443 updated: WIP.

Reply via email to