[incubator-nlpcraft] branch NLPCRAFT-287 updated: WIP.

sergeykamov Fri, 02 Apr 2021 08:58:33 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-287
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-287 by this push:
     new bee2c6f  WIP.
bee2c6f is described below

commit bee2c6f163e09d93fcb5e97836291181174b222d
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Apr 2 18:58:19 2021 +0300

    WIP.
---
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 60 ++++++++++------------
 1 file changed, 26 insertions(+), 34 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 7df8f02..ed80630 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -19,7 +19,7 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
 
 import io.opencensus.trace.Span
 import org.apache.nlpcraft.common._
-import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken, _}
+import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken ⇒ NlpToken, _}
 import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCDslContent
 import 
org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind, 
TEXT}
@@ -32,13 +32,14 @@ import java.io.Serializable
 import java.util
 import scala.collection.JavaConverters._
 import scala.collection.convert.DecorateAsScala
-import scala.collection.mutable.ArrayBuffer
 import scala.collection.{Map, Seq, mutable}
 
 /**
   * Model elements enricher.
   */
 object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
+    type TokenData = (NCToken, NCSynonymChunkKind)
+    
     object Complex {
         def apply(t: NCToken): Complex =
             Complex(
@@ -53,7 +54,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                 maxIndex = t.wordIndexes.last
             )
 
-        def apply(t: NCNlpSentenceToken): Complex =
+        def apply(t: NlpToken): Complex =
             Complex(
                 data = Right(t),
                 isToken = false,
@@ -72,7 +73,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
         isToken: Boolean,
         isWord: Boolean,
         token: NCToken,
-        word: NCNlpSentenceToken,
+        word: NlpToken,
         origText: String,
         wordIndexes: Set[Int],
         minIndex: Int,
@@ -125,9 +126,9 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
     // Found-by-synonym model element.
     case class ElementMatch(
         element: NCElement,
-        tokens: Seq[NCNlpSentenceToken],
+        tokens: Seq[NlpToken],
         synonym: NCProbeSynonym,
-        parts: Seq[(NCToken, NCSynonymChunkKind)]
+        parts: Seq[TokenData]
     ) extends Ordered[ElementMatch] {
         // Tokens sparsity.
         lazy val sparsity: Int = tokens.zipWithIndex.tail.map {
@@ -136,7 +137,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
 
         // Number of tokens.
         lazy val length: Int = tokens.size
-        lazy val tokensSet: Set[NCNlpSentenceToken] = tokens.toSet
+        lazy val tokensSet: Set[NlpToken] = tokens.toSet
 
         override def compare(that: ElementMatch): Int = {
             // Check synonym first, then length and then sparsity.
@@ -193,11 +194,11 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
     private def mark(
         ns: NCNlpSentence,
         elem: NCElement,
-        toks: Seq[NCNlpSentenceToken],
+        toks: Seq[NlpToken],
         direct: Boolean,
         syn: Option[NCProbeSynonym],
         metaOpt: Option[Map[String, Object]],
-        parts: Seq[(NCToken, NCSynonymChunkKind)]
+        parts: Seq[TokenData]
     ): Unit = {
         val params = mutable.ArrayBuffer.empty[(String, AnyRef)]
 
@@ -269,7 +270,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
       * @param toks
       * @param elemId
       */
-    private def alreadyMarked(toks: Seq[NCNlpSentenceToken], elemId: String): 
Boolean = toks.forall(_.isTypeOf(elemId))
+    private def alreadyMarked(toks: Seq[NlpToken], elemId: String): Boolean = 
toks.forall(_.isTypeOf(elemId))
 
     @throws[NCE]
     override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: 
Map[String, Serializable], parent: Span = null): Unit = {
@@ -281,17 +282,12 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
             "txt" → ns.text) { span ⇒
             val cache = mutable.HashSet.empty[Seq[Int]]
             val req = NCRequestImpl(senMeta, ns.srvReqId)
-            val matches = ArrayBuffer.empty[ElementMatch]
+            val matches = mutable.ArrayBuffer.empty[ElementMatch]
 
-            def addMatch(
-                elm: NCElement,
-                toks: Seq[NCNlpSentenceToken],
-                syn: NCProbeSynonym,
-                parts: Seq[(NCToken, NCSynonymChunkKind)]
-            ): Boolean = {
-                val toksSet = toks.toSet
+            def addMatch(elm: NCElement, toks: Seq[NlpToken], syn: 
NCProbeSynonym, parts: Seq[TokenData]): Boolean = {
+                val tokensSet = toks.toSet
 
-                if (!matches.exists(m ⇒ m.element.getId == elm.getId && 
toksSet.subsetOf(m.tokensSet))) {
+                if (!matches.exists(m ⇒ m.element.getId == elm.getId && 
tokensSet.subsetOf(m.tokensSet))) {
                     matches += ElementMatch(elm, toks, syn, parts)
 
                     true
@@ -300,7 +296,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                     false
             }
 
-            def getParts(comb: Seq[Complex], syn: NCProbeSynonym): 
Seq[(NCToken, NCSynonymChunkKind)] =
+            def getParts(comb: Seq[Complex], syn: NCProbeSynonym): 
Seq[TokenData] =
                 comb.zip(syn.map(_.kind)).flatMap {
                     case (complex, kind) ⇒ if (complex.isToken) 
Some(complex.token → kind)
                     else None
@@ -322,16 +318,11 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
               * @param toks
               * @return
               */
-            def tokString(toks: Seq[NCNlpSentenceToken]): String = toks.map(t 
⇒ (t.origText, t.index)).mkString(" ")
+            def tokString(toks: Seq[NlpToken]): String = toks.map(t ⇒ 
(t.origText, t.index)).mkString(" ")
 
             lazy val complexesWords = ns.map(Complex(_))
             lazy val complexes: Seq[ComplexSeq] =
-                NCProbeVariants.
-                    convert(
-                        ns.srvReqId,
-                        mdl,
-                        NCSentenceManager.collapse(mdl.model, ns.clone())
-                    ).
+                NCProbeVariants.convert(ns.srvReqId, mdl, 
NCSentenceManager.collapse(mdl.model, ns.clone())).
                     map(_.asScala).
                     par.
                     flatMap(sen ⇒
@@ -362,13 +353,14 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
             startScopedSpan("synsProc", span,
                 "srvReqId" → ns.srvReqId,
                 "mdlId" → mdl.model.getId,
-                "txt" → ns.text) {
+                "txt" → ns.text
+            ) {
                 _ ⇒
                 // 1. Simple, sparse.
                 for ((elemId, syns) ← mdl.sparseSynonyms; syn ← syns)
                     syn.trySparseMatch(ns).foreach(toks ⇒ 
addMatch(mdl.elements(elemId), toks, syn, Seq.empty))
 
-                    // 2. DSL, sparse.
+                // 2. DSL, sparse.
                 for ((elemId, syns) ← mdl.sparseSynonymsDsl; syn ← syns) {
                     for (complex ← complexes) {
                         val comb = complex.tokensComplexes
@@ -378,13 +370,13 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                                 tows.filter(_.isRight).map(_.right.get) ++
                                     
tows.filter(_.isLeft).map(_.left.get).flatMap(w ⇒
                                         ns.filter(
-                                            t ⇒ t.startCharIndex >= 
w.getStartCharIndex &&
+                                            t ⇒
+                                                t.startCharIndex >= 
w.getStartCharIndex &&
                                                 t.endCharIndex <= 
w.getEndCharIndex
                                         )
                                     )
 
                             addMatch(mdl.elements(elemId), 
toks.sortBy(_.startCharIndex), syn, getParts(comb, syn))
-
                         })
                     }
                 }
@@ -428,9 +420,9 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
 
                             def setFound(
                                 elm: NCElement,
-                                toks: Seq[NCNlpSentenceToken],
+                                toks: Seq[NlpToken],
                                 syn: NCProbeSynonym,
-                                parts: Seq[(NCToken, NCSynonymChunkKind)]
+                                parts: Seq[TokenData]
                             ): Unit =
                                 if (addMatch(elm, toks, syn, parts))
                                     found = true
@@ -544,7 +536,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                     "srvReqId" → ns.srvReqId,
                     "mdlId" → mdl.model.getId,
                     "txt" → ns.text) { _ ⇒
-                    def to(t: NCNlpSentenceToken): NCCustomWord =
+                    def to(t: NlpToken): NCCustomWord =
                         new NCCustomWord {
                             override def getNormalizedText: String = t.normText
                             override def getOriginalText: String = t.origText

[incubator-nlpcraft] branch NLPCRAFT-287 updated: WIP.

Reply via email to