This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-30 by this push:
new eb1bc2d WIP.
eb1bc2d is described below
commit eb1bc2dae9090efd9c2d42331601e15b2509c78d
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Apr 30 14:32:31 2020 +0300
WIP.
---
.../probe/mgrs/nlp/NCProbeEnrichmentManager.scala | 2 +-
.../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 2 +-
.../enrichers/relation/NCRelationEnricher.scala | 2 +-
.../mgrs/nlp/enrichers/sort/NCSortEnricher.scala | 2 +-
.../nlp/enrichers/utils/NCEnricherProcessor.scala | 41 ++++++++++------------
5 files changed, 23 insertions(+), 26 deletions(-)
diff --git
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index bdfebb5..53a54ac 100644
---
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -397,7 +397,7 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
val diff = notes2.filter(n ⇒ !notes1.contains(n))
val diffRedundant = diff.flatMap(n2 ⇒
- notes1.find(n1 ⇒
NCEnricherProcessor.sameForSentence(n1, n2, nlpSen)) match {
+ notes1.find(n1 ⇒
NCEnricherProcessor.equalOrSimilar(n1, n2, nlpSen)) match {
case Some(similar) ⇒ Some(n2 → similar)
case None ⇒ None
}
diff --git
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index 861de12..3e554cc 100644
---
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -216,7 +216,7 @@ object NCLimitEnricher extends NCProbeEnricher {
val note =
NCNlpSentenceNote(m.matched.map(_.index), TOK_ID, params: _*)
- if (!notes.exists(n ⇒
NCEnricherProcessor.sameForSentence(note, n, ns))) {
+ if (!notes.exists(n ⇒
NCEnricherProcessor.equalOrSimilar(note, n, ns))) {
notes += note
m.matched.foreach(_.add(note))
diff --git
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
index fbd9be9..4404647 100644
---
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
+++
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala
@@ -144,7 +144,7 @@ object NCRelationEnricher extends NCProbeEnricher {
"note" → refNote
)
- if (!notes.exists(n ⇒
NCEnricherProcessor.sameForSentence(note, n, ns))) {
+ if (!notes.exists(n ⇒
NCEnricherProcessor.equalOrSimilar(note, n, ns))) {
notes += note
m.matched.filter(_ !=
m.matchedHead).foreach(_.addStopReason(note))
diff --git
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index ed445bc..e46d821 100644
---
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -433,7 +433,7 @@ object NCSortEnricher extends NCProbeEnricher {
def mkNote(params: ArrayBuffer[(String, Any)]): Unit =
{
val note = NCNlpSentenceNote(m.main.map(_.index),
TOK_ID, params: _*)
- if (!notes.exists(n ⇒
NCEnricherProcessor.sameForSentence(note, n, ns))) {
+ if (!notes.exists(n ⇒
NCEnricherProcessor.equalOrSimilar(note, n, ns))) {
notes += note
m.main.foreach(_.add(note))
diff --git
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/utils/NCEnricherProcessor.scala
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/utils/NCEnricherProcessor.scala
index 8aedf49..28eba8b 100644
---
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/utils/NCEnricherProcessor.scala
+++
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/utils/NCEnricherProcessor.scala
@@ -736,58 +736,55 @@ object NCEnricherProcessor extends NCService with
LazyLogging {
* @param sen
* @return
*/
- def sameForSentence(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote, sen:
NCNlpSentence): Boolean = {
+ def equalOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote, sen:
NCNlpSentence): Boolean = {
require(n1.noteType == n2.noteType)
val stopIdxs = sen.filter(_.isStopWord).map(_.index)
// One possible difference - stopwords indexes.
- def wordsEqualOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote):
Boolean = {
+ def wordsEqualOrSimilar0(n1: NCNlpSentenceNote, n2:
NCNlpSentenceNote): Boolean = {
val set1 = n1.wordIndexes.toSet
val set2 = n2.wordIndexes.toSet
set1 == set2 || set1.subsetOf(set2) &&
set2.diff(set1).forall(stopIdxs.contains)
}
- def tokensEqualOrSimilar(set1: Set[NCNlpSentenceToken], set2:
Set[NCNlpSentenceToken]): Boolean =
+ def wordsEqualOrSimilar(n1: NCNlpSentenceNote, n2: NCNlpSentenceNote):
Boolean =
+ wordsEqualOrSimilar0(n1, n2) || wordsEqualOrSimilar0(n2, n1)
+
+ def tokensEqualOrSimilar0(set1: Set[NCNlpSentenceToken], set2:
Set[NCNlpSentenceToken]): Boolean =
set1 == set2 || set1.subsetOf(set2) &&
set2.diff(set1).forall(_.isStopWord)
- def extractList(n: NCNlpSentenceNote, refIdxName: String):
Set[NCNlpSentenceToken] =
+ def tokensEqualOrSimilar(set1: Set[NCNlpSentenceToken], set2:
Set[NCNlpSentenceToken]): Boolean =
+ tokensEqualOrSimilar0(set1, set2) || tokensEqualOrSimilar0(set2,
set1)
+
+ def getList(n: NCNlpSentenceNote, refIdxName: String):
Set[NCNlpSentenceToken] =
n.getOrElse(refIdxName,
Collections.emptyList).asInstanceOf[java.util.List[Int]].asScala.
map(sen(_)).toSet
- def extractListList(n: NCNlpSentenceNote, refIdxName: String):
Set[NCNlpSentenceToken] =
+ def getListList(n: NCNlpSentenceNote, refIdxName: String):
Set[NCNlpSentenceToken] =
n.getOrElse(refIdxName,
Collections.emptyList).asInstanceOf[java.util.List[java.util.List[Int]]].asScala.
flatMap(_.asScala.map(sen(_))).toSet
- def referencesEqualOrNearly(n1: NCNlpSentenceNote, n2:
NCNlpSentenceNote): Boolean = {
+ def referencesEqualOrSimilar0(n1: NCNlpSentenceNote, n2:
NCNlpSentenceNote): Boolean = {
require(n1.noteType == n2.noteType)
n1.noteType match {
case "nlpcraft:sort" ⇒
- val refs11 = extractListList(n1, "subjindexes")
- val refs12 = extractListList(n2, "subjindexes")
-
- val refs21 = extractListList(n1, "byindexes")
- val refs22 = extractListList(n2, "byindexes")
-
- (tokensEqualOrSimilar(refs11, refs12) ||
tokensEqualOrSimilar(refs12, refs11)) &&
- (tokensEqualOrSimilar(refs21, refs22) ||
tokensEqualOrSimilar(refs22, refs21))
-
+ tokensEqualOrSimilar(getListList(n1, "subjindexes"),
getListList(n2, "subjindexes")) &&
+ tokensEqualOrSimilar(getListList(n1, "byindexes"),
getListList(n2, "byindexes"))
case "nlpcraft:limit" | "nlpcraft:reference" ⇒
- val refs1 = extractList(n1, "indexes")
- val refs2 = extractList(n2, "indexes")
-
- tokensEqualOrSimilar(refs1, refs2) ||
tokensEqualOrSimilar(refs2, refs1)
+ tokensEqualOrSimilar(getList(n1, "indexes"), getList(n2,
"indexes"))
case _ ⇒ true
}
}
+ def referencesEqualOrSimilar(n1: NCNlpSentenceNote, n2:
NCNlpSentenceNote): Boolean =
+ referencesEqualOrSimilar0(n1, n2) || referencesEqualOrSimilar0(n2,
n1)
+
def getUniqueKey0(n: NCNlpSentenceNote): Seq[Any] = getKey(n,
withIndexes = false, withReferences = false)
- getUniqueKey0(n1) == getUniqueKey0(n2) &&
- (wordsEqualOrSimilar(n2, n1) || wordsEqualOrSimilar(n1, n2)) &&
- (referencesEqualOrNearly(n2, n1) || referencesEqualOrNearly(n1,
n2))
+ getUniqueKey0(n1) == getUniqueKey0(n2) && wordsEqualOrSimilar(n1, n1)
&& referencesEqualOrSimilar(n2, n1)
}
}