This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-30 by this push:
new b8ab1b6 WIP.
b8ab1b6 is described below
commit b8ab1b6f376f2bbef0c26badf1a1a3cff52cf7bb
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Apr 30 10:47:47 2020 +0300
WIP.
---
.../nlp/enrichers/utils/NCEnricherProcessor.scala | 42 +++++++++++++---------
1 file changed, 26 insertions(+), 16 deletions(-)
diff --git
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/utils/NCEnricherProcessor.scala
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/utils/NCEnricherProcessor.scala
index 6bec7cd..8aedf49 100644
---
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/utils/NCEnricherProcessor.scala
+++
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/utils/NCEnricherProcessor.scala
@@ -752,26 +752,37 @@ object NCEnricherProcessor extends NCService with
LazyLogging {
def tokensEqualOrSimilar(set1: Set[NCNlpSentenceToken], set2:
Set[NCNlpSentenceToken]): Boolean =
set1 == set2 || set1.subsetOf(set2) &&
set2.diff(set1).forall(_.isStopWord)
- val refIdxNames =
- n1.noteType match {
- case "nlpcraft:sort" ⇒ Seq("subjindexes", "byindexes")
- case "nlpcraft:limit" ⇒ Seq("indexes")
- case "nlpcraft:reference" ⇒ Seq("indexes")
-
- case _ ⇒ Seq.empty
- }
+ def extractList(n: NCNlpSentenceNote, refIdxName: String):
Set[NCNlpSentenceToken] =
+ n.getOrElse(refIdxName,
Collections.emptyList).asInstanceOf[java.util.List[Int]].asScala.
+ map(sen(_)).toSet
- def extract(n: NCNlpSentenceNote, refIdxName: String):
Set[NCNlpSentenceToken] =
+ def extractListList(n: NCNlpSentenceNote, refIdxName: String):
Set[NCNlpSentenceToken] =
n.getOrElse(refIdxName,
Collections.emptyList).asInstanceOf[java.util.List[java.util.List[Int]]].asScala.
flatMap(_.asScala.map(sen(_))).toSet
- def referencesEqualOrNearly(n1: NCNlpSentenceNote, n2:
NCNlpSentenceNote): Boolean =
- refIdxNames.isEmpty || refIdxNames.forall(refIdxName ⇒ {
- val refs1 = extract(n1, refIdxName)
- val refs2 = extract(n2, refIdxName)
+ def referencesEqualOrNearly(n1: NCNlpSentenceNote, n2:
NCNlpSentenceNote): Boolean = {
+ require(n1.noteType == n2.noteType)
- tokensEqualOrSimilar(refs1, refs2) ||
tokensEqualOrSimilar(refs2, refs1)
- })
+ n1.noteType match {
+ case "nlpcraft:sort" ⇒
+ val refs11 = extractListList(n1, "subjindexes")
+ val refs12 = extractListList(n2, "subjindexes")
+
+ val refs21 = extractListList(n1, "byindexes")
+ val refs22 = extractListList(n2, "byindexes")
+
+ (tokensEqualOrSimilar(refs11, refs12) ||
tokensEqualOrSimilar(refs12, refs11)) &&
+ (tokensEqualOrSimilar(refs21, refs22) ||
tokensEqualOrSimilar(refs22, refs21))
+
+ case "nlpcraft:limit" | "nlpcraft:reference" ⇒
+ val refs1 = extractList(n1, "indexes")
+ val refs2 = extractList(n2, "indexes")
+
+ tokensEqualOrSimilar(refs1, refs2) ||
tokensEqualOrSimilar(refs2, refs1)
+
+ case _ ⇒ true
+ }
+ }
def getUniqueKey0(n: NCNlpSentenceNote): Seq[Any] = getKey(n,
withIndexes = false, withReferences = false)
@@ -779,5 +790,4 @@ object NCEnricherProcessor extends NCService with
LazyLogging {
(wordsEqualOrSimilar(n2, n1) || wordsEqualOrSimilar(n1, n2)) &&
(referencesEqualOrNearly(n2, n1) || referencesEqualOrNearly(n1,
n2))
}
-
}