This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-443 by this push:
new 3a2beda WIP.
3a2beda is described below
commit 3a2bedac6c9d2bdf45cb13ae37499b43e3c4cc2f
Author: Sergey Kamov <[email protected]>
AuthorDate: Sat Sep 25 13:06:59 2021 +0300
WIP.
---
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 2 +-
.../nlpcraft/common/nlp/NCNlpSentenceToken.scala | 24 +++++++++++-
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 2 +-
.../probe/mgrs/sentence/NCSentenceManager.scala | 45 ++++++++++++++--------
4 files changed, 55 insertions(+), 18 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 588706a..9d9f4e3 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -59,7 +59,7 @@ class NCNlpSentence(
@transient
private var hash: java.lang.Integer = _
- private def calcHash(): Int = tokens.hashCode()
+ private def calcHash(): Int = U.mkJavaHash(tokens)
// Deep copy.
override def clone(): NCNlpSentence =
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
index fa9cbe6..1c66da1 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceToken.scala
@@ -17,6 +17,7 @@
package org.apache.nlpcraft.common.nlp
+import org.apache.nlpcraft.common.U
import org.apache.nlpcraft.common.nlp.pos._
import java.util.{List => JList}
@@ -56,6 +57,22 @@ case class NCNlpSentenceToken(
def isSwearWord: Boolean = getNlpValue[Boolean]("swear")
def isEnglish: Boolean = getNlpValue[Boolean]("english")
+ @transient
+ private var hash: java.lang.Integer = _
+
+ //noinspection HashCodeUsesVar
+ override def hashCode(): Int = {
+ if (hash == null)
+ hash = U.mkJavaHash(index, notes, stopsReasons)
+
+ hash
+ }
+
+ override def equals(obj: Any): Boolean = obj match {
+ case x: NCNlpSentenceToken => x.index == index && x.notes == notes &&
x.stopsReasons == stopsReasons
+ case _ => false
+ }
+
/**
*
* @param noteType Note type.
@@ -80,7 +97,11 @@ case class NCNlpSentenceToken(
*
* @param note Note.
*/
- def remove(note: NCNlpSentenceNote): Unit = notes.remove(note)
+ def remove(note: NCNlpSentenceNote): Unit = {
+ notes.remove(note)
+
+ hash = null
+ }
/**
* Tests whether or not this token contains note.
@@ -162,6 +183,7 @@ case class NCNlpSentenceToken(
* @param note Element.
*/
def add(note: NCNlpSentenceNote): Unit = {
+ hash = null
val added = notes.add(note)
if (added && note.isNlp)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 0197f2e..a6aba57 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -677,7 +677,7 @@ object NCModelEnricher extends NCProbeEnricher {
private def normalize(ns: Sentence): Unit = {
val usrNotes = ns.flatten.filter(_.isUser).distinct
val links = NCSentenceManager.getLinks(usrNotes)
- val parts = NCSentenceManager.getPartKeys(usrNotes: _*)
+ val parts = NCSentenceManager.getPartKeys(usrNotes)
val usrNotesIdxs = usrNotes.
filter(n => !links.contains(NoteLink(n.noteType,
n.tokenIndexes.sorted))).
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index 42b5583..4354d59 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -72,16 +72,31 @@ object NCSentenceManager extends NCService {
/**
*
+ * @param n
+ */
+ private def getParts(n: NCNlpSentenceNote): Option[Seq[NCTokenPartKey]] = {
+ val res: Option[JList[NCTokenPartKey]] = n.dataOpt("parts")
+
+ res match {
+ case Some(v) => Some(v.asScala)
+ case None => None
+ }
+ }
+
+ /**
+ *
* @param notes
*/
- def getPartKeys(notes: NCNlpSentenceNote*): Seq[NCTokenPartKey] =
- notes.
- filter(_.isUser).
- flatMap(n => {
- val optList: Option[JList[NCTokenPartKey]] = n.dataOpt("parts")
+ def getPartKeys(notes: Seq[NCNlpSentenceNote]): Seq[NCTokenPartKey] =
+ notes.filter(_.isUser).flatMap(getParts).flatten.distinct
- optList
- }).flatMap(_.asScala).distinct
+ /**
+ *
+ * @param note
+ * @return
+ */
+ def getPartKeys(note: NCNlpSentenceNote): Seq[NCTokenPartKey] =
+ if (note.isUser) getParts(note).getOrElse(Seq.empty) else Seq.empty
/**
*
@@ -275,9 +290,9 @@ object NCSentenceManager extends NCService {
private def fixIndexes(ns: NCNlpSentence, userNoteTypes: Seq[String]):
Unit = {
// Replaces other notes indexes.
for (t <- userNoteTypes :+ "nlpcraft:nlp"; note <- ns.getNotes(t)) {
- val toks = ns.filter(_.contains(note)).sortBy(_.index)
+ val toks = ns.filter(_.contains(note))
- val newNote = note.clone(toks.map(_.index).toSeq,
toks.flatMap(_.wordIndexes).toSeq.sorted)
+ val newNote = note.clone(toks.map(_.index),
toks.flatMap(_.wordIndexes).toSeq.sorted)
toks.foreach(t => {
t.remove(note)
@@ -544,17 +559,17 @@ object NCSentenceManager extends NCService {
*/
private def dropAbstract(mdl: NCModel, ns: NCNlpSentence): Unit =
if (!mdl.getAbstractTokens.isEmpty) {
- val notes = ns.flatten
+ val notes = ns.flatten.distinct
- val keys = getPartKeys(notes: _*)
+ val keys = getPartKeys(notes)
val noteLinks = getLinks(notes)
notes.filter(n => {
- val noteToks = ns.tokens.filter(_.contains(n))
+ lazy val noteToks = ns.tokens.filter(t => t.index >=
n.tokenFrom && t.index <= n.tokenTo)
mdl.getAbstractTokens.contains(n.noteType) &&
- !keys.exists(_.intersect(n.noteType,
noteToks.head.startCharIndex, noteToks.last.startCharIndex)) &&
- !noteLinks.contains(NoteLink(n.noteType,
n.tokenIndexes.sorted))
+ !keys.exists(_.intersect(n.noteType,
noteToks.head.startCharIndex, noteToks.last.startCharIndex)) &&
+ !noteLinks.contains(NoteLink(n.noteType,
n.tokenIndexes.sorted))
}).foreach(ns.removeNote)
}
@@ -645,7 +660,7 @@ object NCSentenceManager extends NCService {
// There aren't links on it.
filter(n => !links.contains(NoteLink(n.noteType,
n.tokenIndexes.sorted))).
// It doesn't have links.
- filter(getPartKeys(_).isEmpty).
+ filter(n => getPartKeys(n).isEmpty).
flatMap(note => {
val noteWordsIdxs = note.wordIndexesSet
val key = NCTokenPartKey(note, sen)