This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-246
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-246 by this push:
new 92c4048 WIP.
92c4048 is described below
commit 92c4048e8acc553e9b5a576d1cb7fb9ecf309952
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Feb 25 13:16:04 2021 +0300
WIP.
---
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 70 +++++++++-------------
.../model/NCEnricherNestedModelSpec2.scala | 4 +-
2 files changed, 30 insertions(+), 44 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 0573fe9..42ff9af 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -35,16 +35,29 @@ object NCNlpSentence extends LazyLogging {
implicit def toTokens(x: NCNlpSentence): ArrayBuffer[NCNlpSentenceToken] =
x.tokens
private case class NoteLink(note: String, indexes: Seq[Int])
- private case class PartKey(id: String, start: Int, end: Int) {
+
+ case class PartKey(id: String, start: Int, end: Int) {
+ require(start <= end)
+
private def in(i: Int): Boolean = i >= start && i <= end
def intersect(id: String, start: Int, end: Int): Boolean = id ==
this.id && (in(start) || in(end))
}
+ object PartKey {
+ def apply(m: util.HashMap[String, JSerializable]): PartKey = {
+ def get[T](name: String): T = m.get(name).asInstanceOf[T]
+
+ PartKey(get("id"), get("startcharindex"), get("endcharindex"))
+ }
+
+ def apply(t: NCNlpSentenceNote, sen: NCNlpSentence): PartKey =
+ PartKey(t.noteType, sen(t.tokenFrom).startCharIndex,
sen(t.tokenTo).endCharIndex)
+ }
private def getLinks(notes: Seq[NCNlpSentenceNote]): Seq[NoteLink] = {
val noteLinks = mutable.ArrayBuffer.empty[NoteLink]
for (n ← notes.filter(n ⇒ n.noteType == "nlpcraft:limit" || n.noteType
== "nlpcraft:references"))
- noteLinks += NoteLink(n("note").asInstanceOf[String],
n("indexes").asInstanceOf[JList[Int]].asScala)
+ noteLinks += NoteLink(n("note").asInstanceOf[String],
n("indexes").asInstanceOf[JList[Int]].asScala.sorted)
for (n ← notes.filter(_.noteType == "nlpcraft:sort")) {
def add(noteName: String, idxsName: String): Unit = {
@@ -55,7 +68,7 @@ object NCNlpSentence extends LazyLogging {
noteLinks ++=
(for ((name, idxs) ←
names.asScala.zip(idxsSeq.asScala.map(_.asScala)))
- yield NoteLink(name, idxs)
+ yield NoteLink(name, idxs.sorted)
)
}
@@ -73,14 +86,7 @@ object NCNlpSentence extends LazyLogging {
val optList: Option[JList[util.HashMap[String,
JSerializable]]] = n.dataOpt("parts")
optList
- }).flatMap(_.asScala).
- map(map ⇒
- PartKey(
- map.get("id").asInstanceOf[String],
- map.get("startcharindex").asInstanceOf[Int],
- map.get("endcharindex").asInstanceOf[Int]
- )
- ).distinct
+ }).flatMap(_.asScala).map(m ⇒ PartKey(m)).distinct
/**
*
@@ -632,7 +638,7 @@ class NCNlpSentence(
mdl.getAbstractTokens.contains(n.noteType) &&
!keys.exists(_.intersect(n.noteType,
noteToks.head.startCharIndex, noteToks.last.startCharIndex)) &&
- !noteLinks.contains(NoteLink(n.noteType, n.tokenIndexes))
+ !noteLinks.contains(NoteLink(n.noteType,
n.tokenIndexes.sorted))
}).foreach(ns.removeNote)
}
@@ -687,7 +693,7 @@ class NCNlpSentence(
var delCombs: Seq[NCNlpSentenceNote] =
getNotNlpNotes(this).
- flatMap(note ⇒ getNotNlpNotes(this.slice(note.tokenFrom,
note.tokenTo + 1)).filter(_ != note)).
+ flatMap(note ⇒ getNotNlpNotes(note.tokenIndexes.sorted.map(i ⇒
this(i))).filter(_ != note)).
distinct
// Optimization. Deletes all wholly swallowed notes.
@@ -695,38 +701,18 @@ class NCNlpSentence(
val swallowed =
delCombs.
- filter(n ⇒ !links.contains(NoteLink(n.noteType,
n.tokenIndexes))).
+ // There aren't links on it.
+ filter(n ⇒ !links.contains(NoteLink(n.noteType,
n.tokenIndexes.sorted))).
+ // It doesn't have links.
filter(getPartKeys(_).isEmpty).
- flatMap(n ⇒ {
- val wIdxs = n.wordIndexes.toSet
-
- val owners =
- delCombs.
- filter(_ != n).
- flatMap(n1 ⇒
- if (getPartKeys(n1).contains(
- PartKey(
- n.noteType,
- this(n.tokenFrom).startCharIndex,
- this(n.tokenTo).endCharIndex)
- )
- )
- Some(n1)
- else
- None
- )
+ flatMap(note ⇒ {
+ val noteWordsIdxs = note.wordIndexes.toSet
+ val key = PartKey(note, this)
+ val delCombOthers =
+ delCombs.filter(_ != note).flatMap(n ⇒ if
(getPartKeys(n).contains(key)) Some(n) else None)
- if (owners.exists(
- o ⇒ {
- val oWIdxs = o.wordIndexes.toSet
-
- wIdxs == oWIdxs || wIdxs.subsetOf(oWIdxs)
- })
- )
- Some(n)
- else
- None
+ if (delCombOthers.exists(o ⇒ noteWordsIdxs ==
o.wordIndexes.toSet)) Some(note) else None
})
delCombs = delCombs.filter(p ⇒ !swallowed.contains(p))
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
index 5897136..957730a 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
@@ -47,8 +47,8 @@ class NCNestedTestModel21 extends
NCModelAdapter("nlpcraft.nested2.test.mdl", "N
class NCEnricherNestedModelSpec21 extends NCTestContext {
@Test
def test(): Unit = {
-// checkIntent("word", "onE1")
-// checkIntent("10 word", "onE1")
+ checkIntent("word", "onE1")
+ checkIntent("10 word", "onE1")
checkIntent("11 12 word", "onNumAndE1")
}
}