This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-384
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-384 by this push:
new 33ce608 WIP.
33ce608 is described below
commit 33ce6086986810817d3031a88203b8be04b640d5
Author: Nikita Ivanov <[email protected]>
AuthorDate: Mon Sep 13 19:41:10 2021 -0700
WIP.
---
.../cargps/src/main/resources/cargps_model.yaml | 3 +++
.../main/resources/samples/cargps_cancel_samples.txt | 10 +++++-----
nlpcraft/src/main/resources/stopwords/stop_words.txt | 4 ++--
.../scala/org/apache/nlpcraft/common/NCService.scala | 2 +-
.../apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala | 1 -
.../org/apache/nlpcraft/model/impl/NCTokenLogger.scala | 17 ++++++++++++-----
.../nlp/enrichers/NCServerEnrichmentManager.scala | 18 ++++++++++++++----
7 files changed, 37 insertions(+), 18 deletions(-)
diff --git a/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml
b/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml
index 0833ea0..cd5fb4e 100644
--- a/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml
+++ b/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml
@@ -35,6 +35,9 @@ macros:
- name: "<WAYPOINT>"
macro: "{waypoint|location|point|stopover|stop over|way
station|stop|checkpoint|stop point} {point|station|_}"
+excludedStopWords:
+ - howdy
+
abstractTokens:
- x:addr:kind
- x:addr:num
diff --git
a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_cancel_samples.txt
b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_cancel_samples.txt
index c6b132d..0e4cb3d 100644
---
a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_cancel_samples.txt
+++
b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_cancel_samples.txt
@@ -19,10 +19,10 @@
# Set of samples (corpus) for automatic unit and regression testing.
#
-Hey truck - stop the navigation!
+#Hey truck - stop the navigation!
Howdy, car, please cancel the routing now.
-Hi car - stop the route.
-Hi car - stop the navigation...
+#Hi car - stop the route.
+#Hi car - stop the navigation...
Howdy truck - quit navigating.
-Hi car - finish off the driving.
-Hi car - cancel the journey.
\ No newline at end of file
+#Hi car - finish off the driving.
+#Hi car - cancel the journey.
\ No newline at end of file
diff --git a/nlpcraft/src/main/resources/stopwords/stop_words.txt
b/nlpcraft/src/main/resources/stopwords/stop_words.txt
index 3629397..dfaa83d 100644
--- a/nlpcraft/src/main/resources/stopwords/stop_words.txt
+++ b/nlpcraft/src/main/resources/stopwords/stop_words.txt
@@ -23,13 +23,13 @@
# - Words with wildcard, symbol `*` (processed as lemma)
#
# Words and POSes can me marked as excluded (symbol `~` before word)
-# Word can be marked as case sensitive (symbol `@` before word)
+# Word can be marked as case-sensitive (symbol `@` before word)
#
# Restrictions:
# - POSes list cannot be defined for multiple words.
# - Only one wildcard can be defined in the word.
# - Wildcard cannot be applied to chunks of words.
-# - Only one case sensitive flag can be defined in the word.
+# - Only one case-sensitive flag can be defined in the word.
#
# Examples:
# ========
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/NCService.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/NCService.scala
index 3b8e292..786e1ea 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/NCService.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/NCService.scala
@@ -110,7 +110,7 @@ abstract class NCService extends LazyLogging with
NCOpenCensusTrace {
* @return
*/
private def padDur(ms: Long): String =
- StringUtils.leftPad(s"${U.now() - ms}ms", 6)
+ StringUtils.leftPad(s"${U.now() - ms}ms", 7)
/**
* Acks started service. Should be called at the end of the `start()`
method.
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index c356550..7e306f4 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -29,7 +29,6 @@ import scala.jdk.CollectionConverters.{CollectionHasAsScala,
SeqHasAsJava}
/**
* Sentence token note is a typed map of KV pairs.
- *
*/
class NCNlpSentenceNote(private val values: Map[String, JSerializable])
extends JSerializable with NCAsciiLike {
import NCNlpSentenceNote._
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
index 2bbc72a..b3005ce 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
@@ -378,6 +378,13 @@ object NCTokenLogger extends LazyLogging {
def prepareTable(toks: Seq[NCToken]): NCAsciiTable = {
val allFree = toks.forall(_.isFreeWord)
+ /**
+ *
+ * @param s
+ * @return
+ */
+ def cc(s: String): String = s"${ansi256Fg(183)}$s$ansiReset"
+
val headers = mutable.ArrayBuffer.empty[String] ++
Seq(
"idx",
@@ -385,15 +392,15 @@ object NCTokenLogger extends LazyLogging {
"lemma",
"pos",
"quoted",
- "stopword",
- "freeword",
+ r("stopword"),
+ y("freeword"),
"wordindexes",
"direct",
"sparsity"
)
if (!allFree)
- headers += "token data"
+ headers += cc("token data")
val tbl = NCAsciiTable(headers)
@@ -628,11 +635,11 @@ object NCTokenLogger extends LazyLogging {
if (tok.getId == "nlpcraft:nlp")
row.map(_.toString)
else
- row.map(s =>
s"${ansi256Fg(183)}${s.toString}${ansiReset}")
+ row.map(s => cc(s.toString))
)
++
// Token data.
- Seq(if (tok.getId == "nlpcraft:nlp") "" else
s"<<${ansi256Fg(183)}${tok.getId}$ansiReset>> $v") :_*
+ Seq(if (tok.getId == "nlpcraft:nlp") "" else
s"<<${cc(tok.getId)}>> $v") :_*
)
}
})
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
index 636b263..03b749f 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
@@ -206,7 +206,10 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
(x._1 * 100) + x._2.indexOf(hdr.noteName)
})
- val tbl = NCAsciiTable(headers.map(_.header))
+ val tbl = NCAsciiTable(headers.map(hdr => {
+ val s = hdr.header
+ if (s == "nlp:stopWord") s"${r(s)}" else s
+ }))
/**
*
@@ -214,15 +217,22 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
* @param hdr
* @return
*/
- def mkNoteValue(tok: NCNlpSentenceToken, hdr: Header): Seq[String] =
+ def mkNoteValue(tok: NCNlpSentenceToken, hdr: Header): Seq[String] = {
+ val isStopWord = tok.isStopWord
+
tok
.getNotes(hdr.noteType)
.filter(_.contains(hdr.noteName))
- .map(_(hdr.noteName).toString())
+ .map(note => {
+ val s = note(hdr.noteName).toString()
+ if (isStopWord) s"${r(s)}" else s
+ })
.toSeq
+ }
- for (tok <- s)
+ for (tok <- s) {
tbl += (headers.map(mkNoteValue(tok, _)): _*)
+ }
tbl
}