This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-108
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-108 by this push:
new 127ff7b WIP.
127ff7b is described below
commit 127ff7b08c73b1ea71ad2ac22b3c68e147e4fa61
Author: Aaron Radzinski <[email protected]>
AuthorDate: Tue Sep 15 22:40:06 2020 -0700
WIP.
---
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 13 +++----
.../apache/nlpcraft/model/impl/NCTokenLogger.scala | 30 ++++++++++-----
.../model/tools/test/NCTestClientBuilder.java | 4 +-
.../probe/mgrs/conversation/NCConversation.scala | 26 +++++++------
.../probe/mgrs/deploy/NCDeployManager.scala | 12 +++---
.../probe/mgrs/nlp/NCProbeEnrichmentManager.scala | 43 +++++++++++-----------
.../nlp/enrichers/NCServerEnrichmentManager.scala | 28 +++++++-------
.../nlpcraft/common/ascii/NCAsciiTableSpec.scala | 2 +-
8 files changed, 85 insertions(+), 73 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 62f91c8..15af813 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -62,7 +62,7 @@ object NCNlpSentence {
* - compare is relation function linked to date element.
* - x an y defined as 2 elements: date and num.
* So, variants 'x (as num) and x (as date)' and 'x (as date) and x
(as num)'
- * should't be excluded, but invalid relation should be deleted for
these combinations.
+ * should not be excluded, but invalid relation should be deleted for
these combinations.
*/
types.size match {
case 0 ⇒ throw new AssertionError(s"Unexpected empty types
[notesType=$notesType]")
@@ -445,14 +445,12 @@ import org.apache.nlpcraft.common.nlp.NCNlpSentence._
*
* @param srvReqId Server request ID.
* @param text Normalized text.
- * @param weight Weight.
* @param enabledBuiltInToks Enabled built-in tokens.
* @param tokens Initial buffer.
*/
class NCNlpSentence(
val srvReqId: String,
val text: String,
- val weight: Double,
val enabledBuiltInToks: Set[String],
override val tokens: ArrayBuffer[NCNlpSentenceToken] = new
ArrayBuffer[NCNlpSentenceToken](32)
) extends NCNlpSentenceTokenBuffer(tokens) with java.io.Serializable {
@@ -464,7 +462,7 @@ class NCNlpSentence(
// Deep copy.
override def clone(): NCNlpSentence =
- new NCNlpSentence(srvReqId, text, weight, enabledBuiltInToks,
tokens.map(_.clone()))
+ new NCNlpSentence(srvReqId, text, enabledBuiltInToks,
tokens.map(_.clone()))
/**
* Utility method that gets set of notes for given note type collected
from
@@ -705,9 +703,10 @@ class NCNlpSentence(
override def equals(obj: Any): Boolean = obj match {
case x: NCNlpSentence ⇒
tokens == x.tokens &&
- srvReqId == x.srvReqId &&
- text == x.text &&
- enabledBuiltInToks == x.enabledBuiltInToks
+ srvReqId == x.srvReqId &&
+ text == x.text &&
+ enabledBuiltInToks == x.enabledBuiltInToks
+
case _ ⇒ false
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
index 4908bfc..fc47ff5 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
@@ -26,6 +26,7 @@ import org.apache.nlpcraft.common.ascii._
import org.apache.nlpcraft.common.nlp._
import org.apache.nlpcraft.model.NCToken
import org.apache.nlpcraft.model.impl.NCTokenPimp._
+import org.apache.nlpcraft.common.ansi.NCAnsiColor._
import scala.collection.JavaConverters._
import scala.collection._
@@ -66,6 +67,7 @@ object NCTokenLogger extends LazyLogging {
"pos",
"quoted",
"stopWord",
+ "freeword",
"dict",
"wordIndexes",
"direct",
@@ -212,11 +214,10 @@ object NCTokenLogger extends LazyLogging {
s"type=$t, indexes=[${mkIndexes("indexes")}], note=$note"
case "nlpcraft:sort" ⇒
- var s =
- mkStringOpt("subjnotes") match {
- case Some(subjnotes) ⇒ s"subjnotes=$subjnotes,
subjindexes=${mkIndexes("subjindexes")}"
- case None ⇒ ""
- }
+ var s = mkStringOpt("subjnotes") match {
+ case Some(subjnotes) ⇒ s"subjnotes=$subjnotes,
subjindexes=${mkIndexes("subjindexes")}"
+ case None ⇒ ""
+ }
mkStringOpt("bynotes") match {
case Some(bynotes) ⇒
@@ -375,8 +376,7 @@ object NCTokenLogger extends LazyLogging {
def prepareTable(toks: Seq[NCToken]): NCAsciiTable = {
val allFree = toks.forall(_.isFreeWord)
- val headers =
- mutable.ArrayBuffer.empty[String] ++
+ val headers = mutable.ArrayBuffer.empty[String] ++
Seq(
"idx",
"origtext",
@@ -384,6 +384,7 @@ object NCTokenLogger extends LazyLogging {
"pos",
"quoted",
"stopword",
+ "freeword",
"wordindexes",
"direct",
"sparsity"
@@ -426,14 +427,23 @@ object NCTokenLogger extends LazyLogging {
(d * 1000).intValue / 1000.0
}
+ val origTxtStr =
+ if (tok.isStopWord)
+ s"$ansiRedFg${tok.origText}$ansiReset"
+ else if (tok.isFreeWord)
+ s"$ansiYellowFg${tok.origText}$ansiReset"
+ else
+ tok.origText
+
val row =
Seq(
tok.index,
- tok.origText,
+ origTxtStr,
tok.lemma,
tok.pos,
tok.isQuoted,
- tok.isStopWord,
+ if (tok.isStopWord) s"${ansiRedFg}true$ansiReset" else
"false",
+ if (tok.isFreeWord) s"${ansiYellowFg}true$ansiReset" else
"false",
s"[${tok.wordIndexes.mkString(",")}]",
tok.isDirect,
tok.sparsity
@@ -615,7 +625,7 @@ object NCTokenLogger extends LazyLogging {
row
++
// Token data.
- Seq(if (tok.getId == "nlpcraft:nlp") "" else
s"<<${tok.getId}>> $v") :_*
+ Seq(if (tok.getId == "nlpcraft:nlp") "" else
s"<<$ansiBlueFg${tok.getId}$ansiReset>> $v") :_*
)
}
})
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestClientBuilder.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestClientBuilder.java
index 1db6886..81b78e8 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestClientBuilder.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/test/NCTestClientBuilder.java
@@ -921,7 +921,7 @@ public class NCTestClientBuilder {
* @throws NCTestClientException Thrown in case of test client errors.
*/
private NCRequestResultJson restAskSync(String txt) throws
IOException, NCTestClientException {
- log.info("'ask/sync' request '{}' sent for data model ID: {}",
txt, mdlId);
+ log.info("'ask/sync' request '{}' sent for data model: {}", txt,
mdlId);
return
gson.fromJson(
@@ -942,7 +942,7 @@ public class NCTestClientBuilder {
* @throws NCTestClientException Thrown in case of test client errors.
*/
private String restAsk(String txt) throws IOException,
NCTestClientException {
- log.info("'ask' request '{}' sent for data model ID: {}", txt,
mdlId);
+ log.info("'ask' request '{}' sent for data model: {}", txt, mdlId);
Map<String, Object> m = gson.fromJson(post(
"ask",
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conversation/NCConversation.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conversation/NCConversation.scala
index a7d97cf..1f8f51b 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conversation/NCConversation.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conversation/NCConversation.scala
@@ -248,17 +248,21 @@ case class NCConversation(
private def ack(): Unit = {
require(Thread.holdsLock(stm))
- val tbl = NCAsciiTable("Token ID", "Groups", "Text", "Value", "From
request")
-
- ctx.asScala.foreach(tok ⇒ tbl += (
- tok.getId,
- tok.getGroups,
- tok.normText,
- tok.getValue,
- tok.getServerRequestId
- ))
-
- logger.info(s"Conversation tokens [mdlId=$mdlId,
usrId=$usrId]:\n${tbl.toString()}")
+ if (ctx.isEmpty)
+ logger.info(s"Conversation context is empty for [mdlId=$mdlId,
usrId=$usrId]")
+ else {
+ val tbl = NCAsciiTable("Token ID", "Groups", "Text", "Value",
"From request")
+
+ ctx.asScala.foreach(tok ⇒ tbl += (
+ tok.getId,
+ tok.getGroups.asScala.mkString(", "),
+ tok.normText,
+ tok.getValue,
+ tok.getServerRequestId
+ ))
+
+ logger.info(s"Conversation tokens [mdlId=$mdlId,
usrId=$usrId]:\n${tbl.toString()}")
+ }
}
/**
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index fe91b8f..71cea85 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -415,7 +415,7 @@ object NCDeployManager extends NCService with
DecorateAsScala {
s"dups=${idAliasDups.mkString(", ")}" +
"]")
- val dupSyns = mutable.Buffer.empty[(String, Seq[String], String)]
+ val dupSyns = mutable.Buffer.empty[(Seq[String], String)]
// Check for synonym dups across all elements.
for (
@@ -423,19 +423,17 @@ object NCDeployManager extends NCService with
DecorateAsScala {
syns.groupBy(p ⇒ (p.syn.mkString(" "), p.syn.isDirect)) if
holders.size > 1 && isDirect
) {
dupSyns.append((
- mdlId,
holders.map(p ⇒ s"id=${p.elmId}${if (p.syn.value == null) ""
else s", value=${p.syn.value}"}").toSeq,
syn
))
}
if (dupSyns.nonEmpty) {
- val tbl = NCAsciiTable("Model ID", "Elements", "Dup Synonym")
+ val tbl = NCAsciiTable("Elements", "Dup Synonym")
- dupSyns.sortBy(_._1).foreach(row ⇒ tbl += (
+ dupSyns.foreach(row ⇒ tbl += (
row._1,
- row._2,
- row._3
+ row._2
))
logger.warn(s"Dup synonyms in '$mdlId' model:\n${tbl.toString}")
@@ -478,7 +476,7 @@ object NCDeployManager extends NCService with
DecorateAsScala {
)
}
else
- logger.warn(s"Model has no intents [mdlId=$mdlId]")
+ logger.warn(s"Model has no intent: $mdlId")
NCProbeModel(
model = mdl,
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index 5e944d1..24f98d5 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -26,6 +26,7 @@ import java.util.function.Predicate
import io.opencensus.trace.{Span, Status}
import org.apache.nlpcraft.common.NCErrorCodes._
import org.apache.nlpcraft.common._
+import org.apache.nlpcraft.common.ascii.NCAsciiTable
import org.apache.nlpcraft.common.config.NCConfigurable
import org.apache.nlpcraft.common.debug.NCLogHolder
import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote}
@@ -213,18 +214,19 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
): Unit = {
require(nlpSens.nonEmpty)
- logger.info(
- s"New sentences received [" +
- s"txt='${nlpSens.head.text}', " +
- s"count=${nlpSens.size}, " +
- s"usrId=$usrId, " +
- s"mdlId=$mdlId, " +
- s"srvReqId=$srvReqId" +
- s"]"
+ var start = System.currentTimeMillis()
+
+ val tbl = NCAsciiTable("Text", "Model ID", "User ID", "Server Request
ID")
+
+ tbl += (
+ nlpSens.map(_.text),
+ mdlId,
+ usrId,
+ srvReqId
)
+
+ logger.info(s"New sentence received:\n$tbl")
- var start = System.currentTimeMillis()
-
/**
*
* @param code Pre or post checker error code.
@@ -313,9 +315,9 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
NCConnectionManager.send(msg, span)
if (errMsg.isEmpty)
- logger.info(s"OK response $msgName sent [srvReqId=$srvReqId,
type=${resType.getOrElse("")}]")
+ logger.info(s"OK result sent back to server
[srvReqId=$srvReqId, type=${resType.getOrElse("")}]")
else
- logger.info(s"REJECT response $msgName sent
[srvReqId=$srvReqId, response=${errMsg.get}]")
+ logger.info(s"REJECT response sent back to server
[srvReqId=$srvReqId, response=${errMsg.get}]")
}
val mdl = NCModelManager.getModel(mdlId, span)
@@ -336,7 +338,7 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
if (errData.isEmpty)
errData = Some((errMsg, errCode))
- logger.error(s"Pre-enrichment validation
[text=${nlpSen.text}, weight=${nlpSen.weight}, error=$errMsg]")
+ logger.error(s"Pre-enrichment validation error
[text=${nlpSen.text}, error=$errMsg]")
None
}
@@ -374,13 +376,12 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
else
None
- val loopEnrichers =
- Seq(
- Some(Holder(NCModelEnricher, () ⇒
nlpSen.flatten.filter(_.isUser))),
- get("nlpcraft:sort", NCSortEnricher),
- get("nlpcraft:limit", NCLimitEnricher),
- get("nlpcraft:relation", NCRelationEnricher)
- ).flatten
+ val loopEnrichers = Seq(
+ Some(Holder(NCModelEnricher, () ⇒
nlpSen.flatten.filter(_.isUser))),
+ get("nlpcraft:sort", NCSortEnricher),
+ get("nlpcraft:limit", NCLimitEnricher),
+ get("nlpcraft:relation", NCRelationEnricher)
+ ).flatten
var step = 0
var continue = true
@@ -479,7 +480,7 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
case e: NCValidateException ⇒
val (errMsg, errCode) = getError(e.code)
- logger.error(s"Post-enrichment validation: $errMsg ")
+ logger.error(s"Post-enrichment validation error: $errMsg")
respond(
None,
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
index 0fd6bb1..ebad1ee 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
@@ -43,7 +43,7 @@ import scala.util.control.Exception.catching
*/
object NCServerEnrichmentManager extends NCService with NCIgniteInstance {
private object Config extends NCConfigurable {
- def supportNlpCraft: Boolean =
getStringList("nlpcraft.server.tokenProviders").contains("nlpcraft")
+ def isBuiltInEnrichers: Boolean =
getStringList("nlpcraft.server.tokenProviders").contains("nlpcraft")
}
private final val CUSTOM_PREFIXES = Set("google:", "opennlp:",
"stanford:", "spacy:")
@@ -53,7 +53,7 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
// NOTE: this cache is independent from datasource.
@volatile private var cache: IgniteCache[String, Holder] = _
-
+
private val HEADERS: Map[String, (Int, Seq[String])] =
Seq(
"nlpcraft:nlp" → Seq("origText", "index", "pos", "lemma", "stem",
"bracketed", "quoted", "stopWord", "ne", "nne"),
@@ -97,7 +97,7 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
enabledBuiltInToks: Set[String],
parent: Span = null): NCNlpSentence =
startScopedSpan("process", parent, "srvReqId" → srvReqId, "txt" →
normTxt) { span ⇒
- val s = new NCNlpSentence(srvReqId, normTxt, 1, enabledBuiltInToks)
+ val s = new NCNlpSentence(srvReqId, normTxt, enabledBuiltInToks)
// Server-side enrichment pipeline.
// NOTE: order of enrichers is IMPORTANT.
@@ -105,7 +105,7 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
NCQuoteEnricher.enrich(s, span)
NCStopWordEnricher.enrich(s, span)
- if (Config.supportNlpCraft) {
+ if (Config.isBuiltInEnrichers) {
if (enabledBuiltInToks.contains("nlpcraft:date"))
NCDateEnricher.enrich(s, span)
@@ -141,7 +141,7 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
enabledBuiltInToks: Set[String],
parent: Span = null): NCNlpSentence = {
startScopedSpan("enrichPipeline", parent, "srvReqId" → srvReqId, "txt"
→ txt) { span ⇒
- val normTxt = NCPreProcessManager.normalize(txt, true, span)
+ val normTxt = NCPreProcessManager.normalize(txt, spellCheck =
true, span)
if (normTxt != txt)
logger.info(s"Sentence normalized to: $normTxt")
@@ -164,7 +164,7 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
}
}
}
-
+
/**
*
* @param s NLP sentence to ASCII print.
@@ -195,17 +195,17 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
)
)
}
-
+
val headers = s.flatten.flatMap(mkNoteHeaders).distinct.sortBy(hdr ⇒ {
val x = HEADERS.
find(p ⇒ isType(hdr.noteType, p._1)).
getOrElse(throw new NCE(s"Header not found for:
${hdr.noteType}"))._2
-
+
(x._1 * 100) + x._2.indexOf(hdr.noteName)
})
val tbl = NCAsciiTable(headers.map(_.header): _*)
-
+
def mkNoteValue(tok: NCNlpSentenceToken, hdr: Header): Seq[String] =
tok.getNotes(hdr.noteType).filter(_.contains(hdr.noteName)).map(_(hdr.noteName).toString()).toSeq
@@ -243,12 +243,12 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
catching(wrapIE) {
cache = ignite.cache[String, Holder]("sentence-cache")
}
-
+
NCBaseNlpEnricher.start(span)
NCStopWordEnricher.start(span)
NCQuoteEnricher.start(span)
- if (Config.supportNlpCraft) {
+ if (Config.isBuiltInEnrichers) {
// These component can be started independently.
U.executeParallel(
() ⇒ NCDateEnricher.start(span),
@@ -259,16 +259,16 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
}
ners = NCNlpServerManager.getNers
- supportedProviders = ners.keySet ++ (if (Config.supportNlpCraft)
Set("nlpcraft") else Set.empty)
+ supportedProviders = ners.keySet ++ (if (Config.isBuiltInEnrichers)
Set("nlpcraft") else Set.empty)
super.start()
}
-
+
/**
* Stops this manager.
*/
override def stop(parent: Span = null): Unit = startScopedSpan("stop",
parent) { span ⇒
- if (Config.supportNlpCraft) {
+ if (Config.isBuiltInEnrichers) {
NCCoordinatesEnricher.stop(span)
NCGeoEnricher.stop(span)
NCNumericEnricher.stop(span)
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/common/ascii/NCAsciiTableSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/common/ascii/NCAsciiTableSpec.scala
index 2c26ecb..6153aac 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/common/ascii/NCAsciiTableSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/common/ascii/NCAsciiTableSpec.scala
@@ -73,7 +73,7 @@ class NCAsciiTableSpec {
@Test
def testWithVeryBigTable() {
- val NUM = 10000
+ val NUM = 100
val start = System.currentTimeMillis()