[incubator-nlpcraft] branch NLPCRAFT-443 updated: WIP.

sergeykamov Sun, 19 Sep 2021 04:24:37 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-443 by this push:
     new 86a8fed  WIP.
86a8fed is described below

commit 86a8fed8092db33ed20290019169c090ccbf596a
Author: Sergey Kamov <[email protected]>
AuthorDate: Sun Sep 19 14:24:15 2021 +0300

    WIP.
---
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 59 ++++++++++------------
 .../nlp/enrichers/NCServerEnrichmentManager.scala  | 12 ++---
 2 files changed, 33 insertions(+), 38 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 37ca1f4..e5f9ee2 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -283,21 +283,6 @@ object NCModelEnricher extends NCProbeEnricher {
         }
     }
 
-    private def combosTokens1(toks: Seq[NlpToken]): Seq[(Seq[NlpToken], 
Seq[NlpToken])] =
-        combos(toks).flatMap(combo => {
-            val stops = combo.filter(_.isStopWord)
-
-            val stops4Delete = Range.inclusive(1, 
stops.size).flatMap(stops.combinations)
-
-            (Seq(combo) ++ stops4Delete.map(del => combo.filter(t => 
!del.contains(t)))).map(_ -> combo)
-        }).
-            toMap.
-            filter(_._1.nonEmpty).
-            groupBy(_._1).
-            map(p => p._1 -> p._2.values.minBy(p => (-p.size, p.head.index))).
-            sortBy(p => (-p._2.size, -p._1.size, -p._2.head.index, 
-p._1.head.index))
-
-
     /**
       *
       * 1. Prepares combination of tokens (sliding).
@@ -314,6 +299,13 @@ object NCModelEnricher extends NCProbeEnricher {
       */
     private def combosTokens(toks: Seq[NlpToken]): Seq[(Seq[NlpToken], 
Seq[NlpToken])] =
         combos(toks).flatMap(combo => {
+            // TODO: delete after finish task.
+//            val stops = combo.filter(_.isStopWord)
+//
+//            val stops4Delete = Range.inclusive(1, 
stops.size).flatMap(stops.combinations)
+//
+//            (Seq(combo) ++ stops4Delete.map(del => combo.filter(t => 
!del.contains(t)))).map(_ -> combo)
+
             val stops = combo.filter(s => s.isStopWord && s != combo.head && s 
!= combo.last)
 
             val slides = 
mutable.ArrayBuffer.empty[mutable.ArrayBuffer[NlpToken]]
@@ -328,7 +320,7 @@ object NCModelEnricher extends NCProbeEnricher {
 
             var stops4Delete: Seq[Seq[NlpToken]] =
                 if (bigSlides.nonEmpty) {
-                    val allBig = bigSlides.flatMap(p => p)
+                    val allBig = bigSlides.flatten
                     val stops4AllCombs = stops.filter(p => !allBig.contains(p))
 
                     if (stops4AllCombs.nonEmpty)
@@ -336,10 +328,10 @@ object NCModelEnricher extends NCProbeEnricher {
                             seq1 <- Range.inclusive(0, 
stops4AllCombs.size).flatMap(stops4AllCombs.combinations);
                             seq2 <- Range.inclusive(0, 
bigSlides.size).flatMap(bigSlides.combinations)
                         )
-                        yield seq1 ++ seq2.flatMap(p => p)
+                        yield seq1 ++ seq2.flatten
                     else
                         for (seq <- Range.inclusive(0, 
bigSlides.size).flatMap(bigSlides.combinations))
-                            yield seq.flatMap(p => p)
+                            yield seq.toSeq.flatten
                 }
                 else
                     Range.inclusive(1, stops.size).flatMap(stops.combinations)
@@ -517,8 +509,8 @@ object NCModelEnricher extends NCProbeEnricher {
       */
     private def getSparsedTokens(matched: Seq[NlpToken], toks2Match: 
Seq[NlpToken]): Seq[NlpToken] = {
         require(matched.nonEmpty)
-        // Matched tokens should be already sorted.
 
+        // Matched tokens should be already sorted.
         val stopsInside = toks2Match.filter(t =>
             t.isStopWord && !matched.contains(matched) && t.index > 
matched.head.index && t.index < matched.last.index
         )
@@ -677,25 +669,28 @@ object NCModelEnricher extends NCProbeEnricher {
         val links = NCSentenceManager.getLinks(usrNotes)
         val parts = NCSentenceManager.getPartKeys(usrNotes: _*)
 
-        usrNotes.
+        val usrNotesIdxs = usrNotes.
             filter(n => !links.contains(NoteLink(n.noteType, 
n.tokenIndexes.sorted))).
             filter(n => !parts.contains(NCTokenPartKey(n, ns))).
-            foreach(n =>
-                usrNotes.find(candidate =>
-                    candidate != n &&
-                    candidate.noteType == n.noteType &&
-                    candidate.dataOpt("parts") == n.dataOpt("parts") &&
-                    candidate.wordIndexes.toSet.subsetOf(n.wordIndexes.toSet) 
&&
-                    n.wordIndexes.filter(n => 
!candidate.wordIndexes.contains(n)).
-                        forall(wordIdx => ns.tokens.exists(t => 
t.wordIndexes.contains(wordIdx) && t.isStopWord))
-                ) match {
+            zipWithIndex
+
+        usrNotesIdxs.
+            foreach { case (n, idx) =>
+                usrNotesIdxs.find { case (candidate, candidateIdx) =>
+                    candidateIdx != idx &&
+                        candidate.noteType == n.noteType &&
+                        candidate.dataOpt("parts") == n.dataOpt("parts") &&
+                        
candidate.wordIndexes.toSet.subsetOf(n.wordIndexes.toSet) &&
+                        n.wordIndexes.filter(n => 
!candidate.wordIndexes.contains(n)).
+                            forall(wordIdx => ns.tokens.exists(t => 
t.wordIndexes.contains(wordIdx) && t.isStopWord))
+                } match {
                     case Some(better) =>
                         ns.removeNote(n)
-                        // TODO: trace.
-                        logger.info(s"Element removed: $n, better: $better")
+
+                        logger.trace(s"Element removed: $n, better: $better")
                     case None => // No-op.
                 }
-            )
+            }
     }
 
     // TODO: simplify, add tests, check model properties (sparse etc) for 
optimization.
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
index b64999d..03b749f 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
@@ -153,12 +153,12 @@ object NCServerEnrichmentManager extends NCService with 
NCIgniteInstance {
             catching(wrapIE) {
                 cache(normTxt) match {
                     case Some(h) =>
-//                        if (h.enabledBuiltInTokens == 
normEnabledBuiltInToks) {
-//                            prepareAsciiTable(h.sentence).info(logger, 
Some(s"Sentence enriched (from cache): '$normTxt'"))
-//
-//                            h.sentence
-//                        }
-//                        else
+                        if (h.enabledBuiltInTokens == normEnabledBuiltInToks) {
+                            prepareAsciiTable(h.sentence).info(logger, 
Some(s"Sentence enriched (from cache): '$normTxt'"))
+
+                            h.sentence
+                        }
+                        else
                             process(srvReqId, normTxt, enabledBuiltInToks, 
span)
                     case None =>
                         process(srvReqId, normTxt, enabledBuiltInToks, span)

[incubator-nlpcraft] branch NLPCRAFT-443 updated: WIP.

Reply via email to