This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-443 by this push:
new 614dfe5 WIP.
614dfe5 is described below
commit 614dfe59281f2e7e71a1abd572dd25f2a2075015
Author: Sergey Kamov <[email protected]>
AuthorDate: Sun Sep 19 17:26:54 2021 +0300
WIP.
---
.../org/apache/nlpcraft/common/nlp/NCNlpSentence.scala | 4 ++--
.../apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala | 15 +++++++--------
.../probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala | 4 ++--
.../nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala | 11 ++++++-----
4 files changed, 17 insertions(+), 17 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 478d930..0f0b462 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -140,8 +140,8 @@ class NCNlpSentence(
// One possible difference - stopwords indexes.
def wordsEqualOrSimilar0(n1: NCNlpSentenceNote, n2:
NCNlpSentenceNote): Boolean = {
- val set1 = n1.wordIndexes.toSet
- val set2 = n2.wordIndexes.toSet
+ val set1 = n1.wordIndexesSet
+ val set2 = n2.wordIndexesSet
set1 == set2 || set1.subsetOf(set2) &&
set2.diff(set1).forall(stopIdxs.contains)
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index 1574787..63ae6ca 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -33,6 +33,10 @@ import scala.jdk.CollectionConverters.{CollectionHasAsScala,
SeqHasAsJava}
class NCNlpSentenceNote(private val values: Map[String, JSerializable])
extends JSerializable with NCAsciiLike {
import NCNlpSentenceNote._
+ private lazy val dataWithoutIndexes: Map[String, JSerializable] =
this.filter(p => !SKIP_CLONE.contains(p._1))
+ private lazy val skipNlp: Map[String, JSerializable] =
dataWithoutIndexes.filter { case (key, _) => key != "noteType" }
+
+
@transient
private lazy val hash = values.hashCode()
@@ -42,6 +46,7 @@ class NCNlpSentenceNote(private val values: Map[String,
JSerializable]) extends
lazy val tokenTo: Int = values("tokMaxIndex").asInstanceOf[Int] // Last
index.
lazy val tokenIndexes: Seq[Int] =
values("tokWordIndexes").asInstanceOf[JList[Int]].asScala.toSeq // Includes 1st
and last indices too.
lazy val wordIndexes: Seq[Int] =
values("wordIndexes").asInstanceOf[JList[Int]].asScala.toSeq // Includes 1st
and last indices too.
+ lazy val wordIndexesSet: Set[Int] = wordIndexes.toSet
lazy val sparsity: Int = values("sparsity").asInstanceOf[Int]
lazy val isDirect: Boolean = values("direct").asInstanceOf[Boolean]
lazy val isUser: Boolean = {
@@ -89,7 +94,7 @@ class NCNlpSentenceNote(private val values: Map[String,
JSerializable]) extends
this.noteType == n.noteType &&
this.wordIndexes.size == n.wordIndexes.size &&
this.wordIndexes.zip(n.wordIndexes).map(p => p._1 -
p._2).distinct.size == 1 &&
- this.filter(p => !SKIP_CLONE.contains(p._1)) == n.filter(p =>
!SKIP_CLONE.contains(p._1))
+ this.dataWithoutIndexes == n.dataWithoutIndexes
/**
*
@@ -102,12 +107,6 @@ class NCNlpSentenceNote(private val values: Map[String,
JSerializable]) extends
*
* @return
*/
- def skipNlp(): Map[String, JSerializable] =
- values.filter { case (key, _) => !SKIP_CLONE.contains(key) && key !=
"noteType" }
-
- /**
- *
- */
def asMetadata(): Map[String, JSerializable] =
if (isUser)
values.get("meta") match {
@@ -117,7 +116,7 @@ class NCNlpSentenceNote(private val values: Map[String,
JSerializable]) extends
else {
val md = mutable.Map.empty[String, JSerializable]
- val m = if (noteType != "nlpcraft:nlp") skipNlp() else values
+ val m = if (noteType != "nlpcraft:nlp") skipNlp else values
m.foreach { case (name, value) => md += (name.toLowerCase() ->
value)}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index e5f9ee2..9706c4c 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -28,7 +28,7 @@ import
org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.NCSynonymChunkKind
import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
-import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants,
NCTokenPartKey, NCProbeSynonym => Synonym}
+import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants,
NCTokenPartKey, NCProbeSynonym => Synonym}
import java.io.Serializable
import java.util.{List => JList}
@@ -680,7 +680,7 @@ object NCModelEnricher extends NCProbeEnricher {
candidateIdx != idx &&
candidate.noteType == n.noteType &&
candidate.dataOpt("parts") == n.dataOpt("parts") &&
-
candidate.wordIndexes.toSet.subsetOf(n.wordIndexes.toSet) &&
+ candidate.wordIndexesSet.subsetOf(n.wordIndexesSet) &&
n.wordIndexes.filter(n =>
!candidate.wordIndexes.contains(n)).
forall(wordIdx => ns.tokens.exists(t =>
t.wordIndexes.contains(wordIdx) && t.isStopWord))
} match {
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 0c0288d..ee8b719 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -652,7 +652,7 @@ object NCSentenceManager extends NCService {
// It doesn't have links.
filter(getPartKeys(_).isEmpty).
flatMap(note => {
- val noteWordsIdxs = note.wordIndexes.toSet
+ val noteWordsIdxs = note.wordIndexesSet
val key = NCTokenPartKey(note, sen)
val delCombOthers =
@@ -660,7 +660,7 @@ object NCSentenceManager extends NCService {
if (
delCombOthers.nonEmpty &&
- !delCombOthers.exists(o =>
noteWordsIdxs.subsetOf(o.wordIndexes.toSet))
+ !delCombOthers.exists(o =>
noteWordsIdxs.subsetOf(o.wordIndexesSet))
)
Some(note)
else
@@ -777,10 +777,11 @@ object NCSentenceManager extends NCService {
sensWithNotesIdxs = sensWithNotes.zipWithIndex
- sens = sensWithNotesIdxs.filter { case ((s1, notNlpNotes1), idx1) =>
- !sensWithNotesIdxs.exists { case ((s2, notNlpNotes2), idx2) =>
+ sens = sensWithNotesIdxs.filter { case ((_, notNlpNotes1), idx1) =>
+ !sensWithNotesIdxs.exists { case ((_, notNlpNotes2), idx2) =>
idx1 != idx2 && {
- notNlpNotes2.size > notNlpNotes1.size &&
notNlpNotes1.forall(t1 => notNlpNotes2.exists(_.equalsWithoutIndexes(t1)))
+ notNlpNotes2.size > notNlpNotes1.size &&
+ notNlpNotes1.forall(t1 =>
notNlpNotes2.exists(_.equalsWithoutIndexes(t1)))
}
}
}.map { case ((sen, _), _) => sen }