This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-108
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-108 by this push:
     new 127ff7b  WIP.
127ff7b is described below

commit 127ff7b08c73b1ea71ad2ac22b3c68e147e4fa61
Author: Aaron Radzinski <[email protected]>
AuthorDate: Tue Sep 15 22:40:06 2020 -0700

    WIP.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 13 +++----
 .../apache/nlpcraft/model/impl/NCTokenLogger.scala | 30 ++++++++++-----
 .../model/tools/test/NCTestClientBuilder.java      |  4 +-
 .../probe/mgrs/conversation/NCConversation.scala   | 26 +++++++------
 .../probe/mgrs/deploy/NCDeployManager.scala        | 12 +++---
 .../probe/mgrs/nlp/NCProbeEnrichmentManager.scala  | 43 +++++++++++-----------
 .../nlp/enrichers/NCServerEnrichmentManager.scala  | 28 +++++++-------
 .../nlpcraft/common/ascii/NCAsciiTableSpec.scala   |  2 +-
 8 files changed, 85 insertions(+), 73 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 62f91c8..15af813 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -62,7 +62,7 @@ object NCNlpSentence {
           * - compare is relation function linked to date element.
           * - x an y defined as 2 elements: date and num.
           * So, variants 'x (as num) and x (as date)'  and 'x (as date) and x 
(as num)'
-          * should't be excluded, but invalid relation should be deleted for 
these combinations.
+          * should not be excluded, but invalid relation should be deleted for 
these combinations.
           */
         types.size match {
             case 0 ⇒ throw new AssertionError(s"Unexpected empty types 
[notesType=$notesType]")
@@ -445,14 +445,12 @@ import org.apache.nlpcraft.common.nlp.NCNlpSentence._
   *
   * @param srvReqId Server request ID.
   * @param text Normalized text.
-  * @param weight Weight.
   * @param enabledBuiltInToks Enabled built-in tokens.
   * @param tokens Initial buffer.
   */
 class NCNlpSentence(
     val srvReqId: String,
     val text: String,
-    val weight: Double,
     val enabledBuiltInToks: Set[String],
     override val tokens: ArrayBuffer[NCNlpSentenceToken] = new 
ArrayBuffer[NCNlpSentenceToken](32)
 ) extends NCNlpSentenceTokenBuffer(tokens) with java.io.Serializable {
@@ -464,7 +462,7 @@ class NCNlpSentence(
 
     // Deep copy.
     override def clone(): NCNlpSentence =
-        new NCNlpSentence(srvReqId, text, weight, enabledBuiltInToks, 
tokens.map(_.clone()))
+        new NCNlpSentence(srvReqId, text, enabledBuiltInToks, 
tokens.map(_.clone()))
 
     /**
       * Utility method that gets set of notes for given note type collected 
from
@@ -705,9 +703,10 @@ class NCNlpSentence(
     override def equals(obj: Any): Boolean = obj match {
         case x: NCNlpSentence ⇒
             tokens == x.tokens &&
-                srvReqId == x.srvReqId &&
-                text == x.text &&
-                enabledBuiltInToks == x.enabledBuiltInToks
+            srvReqId == x.srvReqId &&
+            text == x.text &&
+            enabledBuiltInToks == x.enabledBuiltInToks
+
         case _ ⇒ false
     }
 }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
index 4908bfc..fc47ff5 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
@@ -26,6 +26,7 @@ import org.apache.nlpcraft.common.ascii._
 import org.apache.nlpcraft.common.nlp._
 import org.apache.nlpcraft.model.NCToken
 import org.apache.nlpcraft.model.impl.NCTokenPimp._
+import org.apache.nlpcraft.common.ansi.NCAnsiColor._
 
 import scala.collection.JavaConverters._
 import scala.collection._
@@ -66,6 +67,7 @@ object NCTokenLogger extends LazyLogging {
                 "pos",
                 "quoted",
                 "stopWord",
+                "freeword",
                 "dict",
                 "wordIndexes",
                 "direct",
@@ -212,11 +214,10 @@ object NCTokenLogger extends LazyLogging {
                     s"type=$t, indexes=[${mkIndexes("indexes")}], note=$note"
 
                 case "nlpcraft:sort" ⇒
-                    var s =
-                        mkStringOpt("subjnotes") match {
-                            case Some(subjnotes) ⇒ s"subjnotes=$subjnotes, 
subjindexes=${mkIndexes("subjindexes")}"
-                            case None ⇒ ""
-                        }
+                    var s = mkStringOpt("subjnotes") match {
+                        case Some(subjnotes) ⇒ s"subjnotes=$subjnotes, 
subjindexes=${mkIndexes("subjindexes")}"
+                        case None ⇒ ""
+                    }
 
                     mkStringOpt("bynotes") match {
                         case Some(bynotes) ⇒
@@ -375,8 +376,7 @@ object NCTokenLogger extends LazyLogging {
     def prepareTable(toks: Seq[NCToken]): NCAsciiTable = {
         val allFree = toks.forall(_.isFreeWord)
 
-        val headers =
-            mutable.ArrayBuffer.empty[String] ++
+        val headers = mutable.ArrayBuffer.empty[String] ++
             Seq(
                 "idx",
                 "origtext",
@@ -384,6 +384,7 @@ object NCTokenLogger extends LazyLogging {
                 "pos",
                 "quoted",
                 "stopword",
+                "freeword",
                 "wordindexes",
                 "direct",
                 "sparsity"
@@ -426,14 +427,23 @@ object NCTokenLogger extends LazyLogging {
                 (d * 1000).intValue / 1000.0
             }
 
+            val origTxtStr =
+                if (tok.isStopWord)
+                    s"$ansiRedFg${tok.origText}$ansiReset"
+                else if (tok.isFreeWord)
+                    s"$ansiYellowFg${tok.origText}$ansiReset"
+                else
+                    tok.origText
+
             val row =
                 Seq(
                     tok.index,
-                    tok.origText,
+                    origTxtStr,
                     tok.lemma,
                     tok.pos,
                     tok.isQuoted,
-                    tok.isStopWord,
+                    if (tok.isStopWord) s"${ansiRedFg}true$ansiReset" else 
"false",
+                    if (tok.isFreeWord) s"${ansiYellowFg}true$ansiReset" else 
"false",
                     s"[${tok.wordIndexes.mkString(",")}]",
                     tok.isDirect,
                     tok.sparsity
@@ -615,7 +625,7 @@ object NCTokenLogger extends LazyLogging {
                     row
                     ++
                     // Token data.
-                    Seq(if (tok.getId == "nlpcraft:nlp") "" else 
s"<<${tok.getId}>> $v") :_*
+                    Seq(if (tok.getId == "nlpcraft:nlp") "" else 
s"<<$ansiBlueFg${tok.getId}$ansiReset>> $v") :_*
                 )
             }
         })
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestClientBuilder.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestClientBuilder.java
index 1db6886..81b78e8 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestClientBuilder.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestClientBuilder.java
@@ -921,7 +921,7 @@ public class NCTestClientBuilder {
          * @throws NCTestClientException Thrown in case of test client errors.
          */
         private NCRequestResultJson restAskSync(String txt) throws 
IOException, NCTestClientException {
-            log.info("'ask/sync' request '{}' sent for data model ID: {}", 
txt, mdlId);
+            log.info("'ask/sync' request '{}' sent for data model: {}", txt, 
mdlId);
 
             return
                 gson.fromJson(
@@ -942,7 +942,7 @@ public class NCTestClientBuilder {
          * @throws NCTestClientException Thrown in case of test client errors.
          */
         private String restAsk(String txt) throws IOException, 
NCTestClientException {
-            log.info("'ask' request '{}' sent for data model ID: {}", txt, 
mdlId);
+            log.info("'ask' request '{}' sent for data model: {}", txt, mdlId);
 
             Map<String, Object> m = gson.fromJson(post(
                 "ask",
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conversation/NCConversation.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conversation/NCConversation.scala
index a7d97cf..1f8f51b 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conversation/NCConversation.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conversation/NCConversation.scala
@@ -248,17 +248,21 @@ case class NCConversation(
     private def ack(): Unit = {
         require(Thread.holdsLock(stm))
 
-        val tbl = NCAsciiTable("Token ID", "Groups", "Text", "Value", "From 
request")
-
-        ctx.asScala.foreach(tok ⇒ tbl += (
-            tok.getId,
-            tok.getGroups,
-            tok.normText,
-            tok.getValue,
-            tok.getServerRequestId
-        ))
-
-        logger.info(s"Conversation tokens [mdlId=$mdlId, 
usrId=$usrId]:\n${tbl.toString()}")
+        if (ctx.isEmpty)
+            logger.info(s"Conversation context is empty for [mdlId=$mdlId, 
usrId=$usrId]")
+        else {
+            val tbl = NCAsciiTable("Token ID", "Groups", "Text", "Value", 
"From request")
+
+            ctx.asScala.foreach(tok ⇒ tbl += (
+                tok.getId,
+                tok.getGroups.asScala.mkString(", "),
+                tok.normText,
+                tok.getValue,
+                tok.getServerRequestId
+            ))
+
+            logger.info(s"Conversation tokens [mdlId=$mdlId, 
usrId=$usrId]:\n${tbl.toString()}")
+        }
     }
 
     /**
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index fe91b8f..71cea85 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -415,7 +415,7 @@ object NCDeployManager extends NCService with 
DecorateAsScala {
                 s"dups=${idAliasDups.mkString(", ")}" +
             "]")
 
-        val dupSyns = mutable.Buffer.empty[(String, Seq[String], String)]
+        val dupSyns = mutable.Buffer.empty[(Seq[String], String)]
 
         // Check for synonym dups across all elements.
         for (
@@ -423,19 +423,17 @@ object NCDeployManager extends NCService with 
DecorateAsScala {
                 syns.groupBy(p ⇒ (p.syn.mkString(" "), p.syn.isDirect)) if 
holders.size > 1 && isDirect
         ) {
             dupSyns.append((
-                mdlId,
                 holders.map(p ⇒ s"id=${p.elmId}${if (p.syn.value == null) "" 
else s", value=${p.syn.value}"}").toSeq,
                 syn
             ))
         }
 
         if (dupSyns.nonEmpty) {
-            val tbl = NCAsciiTable("Model ID", "Elements", "Dup Synonym")
+            val tbl = NCAsciiTable("Elements", "Dup Synonym")
 
-            dupSyns.sortBy(_._1).foreach(row ⇒ tbl += (
+            dupSyns.foreach(row ⇒ tbl += (
                 row._1,
-                row._2,
-                row._3
+                row._2
             ))
 
             logger.warn(s"Dup synonyms in '$mdlId' model:\n${tbl.toString}")
@@ -478,7 +476,7 @@ object NCDeployManager extends NCService with 
DecorateAsScala {
             )
         }
         else
-            logger.warn(s"Model has no intents [mdlId=$mdlId]")
+            logger.warn(s"Model has no intent: $mdlId")
 
         NCProbeModel(
             model = mdl,
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index 5e944d1..24f98d5 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -26,6 +26,7 @@ import java.util.function.Predicate
 import io.opencensus.trace.{Span, Status}
 import org.apache.nlpcraft.common.NCErrorCodes._
 import org.apache.nlpcraft.common._
+import org.apache.nlpcraft.common.ascii.NCAsciiTable
 import org.apache.nlpcraft.common.config.NCConfigurable
 import org.apache.nlpcraft.common.debug.NCLogHolder
 import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote}
@@ -213,18 +214,19 @@ object NCProbeEnrichmentManager extends NCService with 
NCOpenCensusModelStats {
     ): Unit = {
         require(nlpSens.nonEmpty)
 
-        logger.info(
-            s"New sentences received [" +
-                s"txt='${nlpSens.head.text}', " +
-                s"count=${nlpSens.size}, " +
-                s"usrId=$usrId, " +
-                s"mdlId=$mdlId, " +
-                s"srvReqId=$srvReqId" +
-            s"]"
+        var start = System.currentTimeMillis()
+
+        val tbl = NCAsciiTable("Text", "Model ID", "User ID", "Server Request 
ID")
+
+        tbl += (
+            nlpSens.map(_.text),
+            mdlId,
+            usrId,
+            srvReqId
         )
+
+        logger.info(s"New sentence received:\n$tbl")
         
-        var start = System.currentTimeMillis()
-    
         /**
           *
           * @param code Pre or post checker error code.
@@ -313,9 +315,9 @@ object NCProbeEnrichmentManager extends NCService with 
NCOpenCensusModelStats {
             NCConnectionManager.send(msg, span)
             
             if (errMsg.isEmpty)
-                logger.info(s"OK response $msgName sent [srvReqId=$srvReqId, 
type=${resType.getOrElse("")}]")
+                logger.info(s"OK result sent back to server 
[srvReqId=$srvReqId, type=${resType.getOrElse("")}]")
             else
-                logger.info(s"REJECT response $msgName sent 
[srvReqId=$srvReqId, response=${errMsg.get}]")
+                logger.info(s"REJECT response sent back to server 
[srvReqId=$srvReqId, response=${errMsg.get}]")
         }
 
         val mdl = NCModelManager.getModel(mdlId, span)
@@ -336,7 +338,7 @@ object NCProbeEnrichmentManager extends NCService with 
NCOpenCensusModelStats {
                         if (errData.isEmpty)
                             errData = Some((errMsg, errCode))
 
-                        logger.error(s"Pre-enrichment validation 
[text=${nlpSen.text}, weight=${nlpSen.weight}, error=$errMsg]")
+                        logger.error(s"Pre-enrichment validation error 
[text=${nlpSen.text}, error=$errMsg]")
 
                         None
                 }
@@ -374,13 +376,12 @@ object NCProbeEnrichmentManager extends NCService with 
NCOpenCensusModelStats {
                 else
                     None
 
-            val loopEnrichers =
-                Seq(
-                    Some(Holder(NCModelEnricher, () ⇒ 
nlpSen.flatten.filter(_.isUser))),
-                    get("nlpcraft:sort", NCSortEnricher),
-                    get("nlpcraft:limit", NCLimitEnricher),
-                    get("nlpcraft:relation", NCRelationEnricher)
-                ).flatten
+            val loopEnrichers = Seq(
+                Some(Holder(NCModelEnricher, () ⇒ 
nlpSen.flatten.filter(_.isUser))),
+                get("nlpcraft:sort", NCSortEnricher),
+                get("nlpcraft:limit", NCLimitEnricher),
+                get("nlpcraft:relation", NCRelationEnricher)
+            ).flatten
 
             var step = 0
             var continue = true
@@ -479,7 +480,7 @@ object NCProbeEnrichmentManager extends NCService with 
NCOpenCensusModelStats {
             case e: NCValidateException ⇒
                 val (errMsg, errCode) = getError(e.code)
 
-                logger.error(s"Post-enrichment validation: $errMsg ")
+                logger.error(s"Post-enrichment validation error: $errMsg")
 
                 respond(
                     None,
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
index 0fd6bb1..ebad1ee 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
@@ -43,7 +43,7 @@ import scala.util.control.Exception.catching
   */
 object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
     private object Config extends NCConfigurable {
-        def supportNlpCraft: Boolean = 
getStringList("nlpcraft.server.tokenProviders").contains("nlpcraft")
+        def isBuiltInEnrichers: Boolean = 
getStringList("nlpcraft.server.tokenProviders").contains("nlpcraft")
     }
 
     private final val CUSTOM_PREFIXES = Set("google:", "opennlp:", 
"stanford:", "spacy:")
@@ -53,7 +53,7 @@ object NCServerEnrichmentManager extends NCService with 
NCIgniteInstance {
 
     // NOTE: this cache is independent from datasource.
     @volatile private var cache: IgniteCache[String, Holder] = _
-    
+
     private val HEADERS: Map[String, (Int, Seq[String])] =
         Seq(
             "nlpcraft:nlp" → Seq("origText", "index", "pos", "lemma", "stem", 
"bracketed", "quoted", "stopWord", "ne", "nne"),
@@ -97,7 +97,7 @@ object NCServerEnrichmentManager extends NCService with 
NCIgniteInstance {
         enabledBuiltInToks: Set[String],
         parent: Span = null): NCNlpSentence =
         startScopedSpan("process", parent, "srvReqId" → srvReqId, "txt" → 
normTxt) { span ⇒
-            val s = new NCNlpSentence(srvReqId, normTxt, 1, enabledBuiltInToks)
+            val s = new NCNlpSentence(srvReqId, normTxt, enabledBuiltInToks)
 
             // Server-side enrichment pipeline.
             // NOTE: order of enrichers is IMPORTANT.
@@ -105,7 +105,7 @@ object NCServerEnrichmentManager extends NCService with 
NCIgniteInstance {
             NCQuoteEnricher.enrich(s, span)
             NCStopWordEnricher.enrich(s, span)
 
-            if (Config.supportNlpCraft) {
+            if (Config.isBuiltInEnrichers) {
                 if (enabledBuiltInToks.contains("nlpcraft:date"))
                     NCDateEnricher.enrich(s, span)
 
@@ -141,7 +141,7 @@ object NCServerEnrichmentManager extends NCService with 
NCIgniteInstance {
         enabledBuiltInToks: Set[String],
         parent: Span = null): NCNlpSentence = {
         startScopedSpan("enrichPipeline", parent, "srvReqId" → srvReqId, "txt" 
→ txt) { span ⇒
-            val normTxt = NCPreProcessManager.normalize(txt, true, span)
+            val normTxt = NCPreProcessManager.normalize(txt, spellCheck = 
true, span)
 
             if (normTxt != txt)
                 logger.info(s"Sentence normalized to: $normTxt")
@@ -164,7 +164,7 @@ object NCServerEnrichmentManager extends NCService with 
NCIgniteInstance {
             }
         }
     }
-    
+
     /**
       *
       * @param s NLP sentence to ASCII print.
@@ -195,17 +195,17 @@ object NCServerEnrichmentManager extends NCService with 
NCIgniteInstance {
                     )
                 )
         }
-        
+
         val headers = s.flatten.flatMap(mkNoteHeaders).distinct.sortBy(hdr ⇒ {
             val x = HEADERS.
                 find(p ⇒ isType(hdr.noteType, p._1)).
                 getOrElse(throw new NCE(s"Header not found for: 
${hdr.noteType}"))._2
-            
+
             (x._1 * 100) + x._2.indexOf(hdr.noteName)
         })
 
         val tbl = NCAsciiTable(headers.map(_.header): _*)
-        
+
         def mkNoteValue(tok: NCNlpSentenceToken, hdr: Header): Seq[String] =
             
tok.getNotes(hdr.noteType).filter(_.contains(hdr.noteName)).map(_(hdr.noteName).toString()).toSeq
 
@@ -243,12 +243,12 @@ object NCServerEnrichmentManager extends NCService with 
NCIgniteInstance {
         catching(wrapIE) {
             cache = ignite.cache[String, Holder]("sentence-cache")
         }
-        
+
         NCBaseNlpEnricher.start(span)
         NCStopWordEnricher.start(span)
         NCQuoteEnricher.start(span)
 
-        if (Config.supportNlpCraft) {
+        if (Config.isBuiltInEnrichers) {
             // These component can be started independently.
             U.executeParallel(
                 () ⇒ NCDateEnricher.start(span),
@@ -259,16 +259,16 @@ object NCServerEnrichmentManager extends NCService with 
NCIgniteInstance {
         }
 
         ners = NCNlpServerManager.getNers
-        supportedProviders = ners.keySet ++ (if (Config.supportNlpCraft) 
Set("nlpcraft") else Set.empty)
+        supportedProviders = ners.keySet ++ (if (Config.isBuiltInEnrichers) 
Set("nlpcraft") else Set.empty)
 
         super.start()
     }
-    
+
     /**
       * Stops this manager.
       */
     override def stop(parent: Span = null): Unit = startScopedSpan("stop", 
parent) { span ⇒
-        if (Config.supportNlpCraft) {
+        if (Config.isBuiltInEnrichers) {
             NCCoordinatesEnricher.stop(span)
             NCGeoEnricher.stop(span)
             NCNumericEnricher.stop(span)
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/common/ascii/NCAsciiTableSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/common/ascii/NCAsciiTableSpec.scala
index 2c26ecb..6153aac 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/common/ascii/NCAsciiTableSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/common/ascii/NCAsciiTableSpec.scala
@@ -73,7 +73,7 @@ class NCAsciiTableSpec {
 
     @Test
     def testWithVeryBigTable() {
-        val NUM = 10000
+        val NUM = 100
 
         val start = System.currentTimeMillis()
 

Reply via email to