This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-30 by this push:
     new b8ab1b6  WIP.
b8ab1b6 is described below

commit b8ab1b6f376f2bbef0c26badf1a1a3cff52cf7bb
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Apr 30 10:47:47 2020 +0300

    WIP.
---
 .../nlp/enrichers/utils/NCEnricherProcessor.scala  | 42 +++++++++++++---------
 1 file changed, 26 insertions(+), 16 deletions(-)

diff --git 
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/utils/NCEnricherProcessor.scala
 
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/utils/NCEnricherProcessor.scala
index 6bec7cd..8aedf49 100644
--- 
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/utils/NCEnricherProcessor.scala
+++ 
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/utils/NCEnricherProcessor.scala
@@ -752,26 +752,37 @@ object NCEnricherProcessor extends NCService with 
LazyLogging {
         def tokensEqualOrSimilar(set1: Set[NCNlpSentenceToken], set2: 
Set[NCNlpSentenceToken]): Boolean =
             set1 == set2 || set1.subsetOf(set2) && 
set2.diff(set1).forall(_.isStopWord)
 
-        val refIdxNames =
-            n1.noteType match {
-                case "nlpcraft:sort" ⇒ Seq("subjindexes", "byindexes")
-                case "nlpcraft:limit" ⇒ Seq("indexes")
-                case "nlpcraft:reference" ⇒ Seq("indexes")
-
-                case _ ⇒ Seq.empty
-            }
+        def extractList(n: NCNlpSentenceNote, refIdxName: String): 
Set[NCNlpSentenceToken] =
+            n.getOrElse(refIdxName, 
Collections.emptyList).asInstanceOf[java.util.List[Int]].asScala.
+                map(sen(_)).toSet
 
-        def extract(n: NCNlpSentenceNote, refIdxName: String): 
Set[NCNlpSentenceToken] =
+        def extractListList(n: NCNlpSentenceNote, refIdxName: String): 
Set[NCNlpSentenceToken] =
             n.getOrElse(refIdxName, 
Collections.emptyList).asInstanceOf[java.util.List[java.util.List[Int]]].asScala.
                 flatMap(_.asScala.map(sen(_))).toSet
 
-        def referencesEqualOrNearly(n1: NCNlpSentenceNote, n2: 
NCNlpSentenceNote): Boolean =
-            refIdxNames.isEmpty || refIdxNames.forall(refIdxName ⇒ {
-                val refs1 = extract(n1, refIdxName)
-                val refs2 = extract(n2, refIdxName)
+        def referencesEqualOrNearly(n1: NCNlpSentenceNote, n2: 
NCNlpSentenceNote): Boolean = {
+            require(n1.noteType == n2.noteType)
 
-                tokensEqualOrSimilar(refs1, refs2) || 
tokensEqualOrSimilar(refs2, refs1)
-            })
+            n1.noteType match {
+                case "nlpcraft:sort" ⇒
+                    val refs11 = extractListList(n1, "subjindexes")
+                    val refs12 = extractListList(n2, "subjindexes")
+
+                    val refs21 = extractListList(n1, "byindexes")
+                    val refs22 = extractListList(n2, "byindexes")
+
+                    (tokensEqualOrSimilar(refs11, refs12) || 
tokensEqualOrSimilar(refs12, refs11)) &&
+                    (tokensEqualOrSimilar(refs21, refs22) || 
tokensEqualOrSimilar(refs22, refs21))
+
+                case "nlpcraft:limit" | "nlpcraft:reference" ⇒
+                    val refs1 = extractList(n1, "indexes")
+                    val refs2 = extractList(n2, "indexes")
+
+                    tokensEqualOrSimilar(refs1, refs2) || 
tokensEqualOrSimilar(refs2, refs1)
+
+                case _ ⇒ true
+            }
+        }
 
         def getUniqueKey0(n: NCNlpSentenceNote): Seq[Any] = getKey(n, 
withIndexes = false, withReferences = false)
 
@@ -779,5 +790,4 @@ object NCEnricherProcessor extends NCService with 
LazyLogging {
             (wordsEqualOrSimilar(n2, n1) || wordsEqualOrSimilar(n1, n2)) &&
             (referencesEqualOrNearly(n2, n1) || referencesEqualOrNearly(n1, 
n2))
     }
-
 }

Reply via email to