[incubator-nlpcraft] branch NLPCRAFT-384 updated: WIP.

aradzinski Mon, 13 Sep 2021 19:41:21 -0700

This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-384
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-384 by this push:
     new 33ce608  WIP.
33ce608 is described below

commit 33ce6086986810817d3031a88203b8be04b640d5
Author: Nikita Ivanov <[email protected]>
AuthorDate: Mon Sep 13 19:41:10 2021 -0700

    WIP.
---
 .../cargps/src/main/resources/cargps_model.yaml        |  3 +++
 .../main/resources/samples/cargps_cancel_samples.txt   | 10 +++++-----
 nlpcraft/src/main/resources/stopwords/stop_words.txt   |  4 ++--
 .../scala/org/apache/nlpcraft/common/NCService.scala   |  2 +-
 .../apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala |  1 -
 .../org/apache/nlpcraft/model/impl/NCTokenLogger.scala | 17 ++++++++++++-----
 .../nlp/enrichers/NCServerEnrichmentManager.scala      | 18 ++++++++++++++----
 7 files changed, 37 insertions(+), 18 deletions(-)

diff --git a/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml 
b/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml
index 0833ea0..cd5fb4e 100644
--- a/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml
+++ b/nlpcraft-examples/cargps/src/main/resources/cargps_model.yaml
@@ -35,6 +35,9 @@ macros:
   - name: "<WAYPOINT>"
     macro: "{waypoint|location|point|stopover|stop over|way 
station|stop|checkpoint|stop point} {point|station|_}"
 
+excludedStopWords:
+  - howdy
+
 abstractTokens:
   - x:addr:kind
   - x:addr:num
diff --git 
a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_cancel_samples.txt 
b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_cancel_samples.txt
index c6b132d..0e4cb3d 100644
--- 
a/nlpcraft-examples/cargps/src/main/resources/samples/cargps_cancel_samples.txt
+++ 
b/nlpcraft-examples/cargps/src/main/resources/samples/cargps_cancel_samples.txt
@@ -19,10 +19,10 @@
 # Set of samples (corpus) for automatic unit and regression testing.
 #
 
-Hey truck - stop the navigation!
+#Hey truck - stop the navigation!
 Howdy, car, please cancel the routing now.
-Hi car - stop the route.
-Hi car - stop the navigation...
+#Hi car - stop the route.
+#Hi car - stop the navigation...
 Howdy truck - quit navigating.
-Hi car - finish off the driving.
-Hi car - cancel the journey.
\ No newline at end of file
+#Hi car - finish off the driving.
+#Hi car - cancel the journey.
\ No newline at end of file
diff --git a/nlpcraft/src/main/resources/stopwords/stop_words.txt 
b/nlpcraft/src/main/resources/stopwords/stop_words.txt
index 3629397..dfaa83d 100644
--- a/nlpcraft/src/main/resources/stopwords/stop_words.txt
+++ b/nlpcraft/src/main/resources/stopwords/stop_words.txt
@@ -23,13 +23,13 @@
 # - Words with wildcard, symbol `*` (processed as lemma)
 #
 # Words and POSes can me marked as excluded (symbol `~` before word)
-# Word can be marked as case sensitive (symbol `@` before word)
+# Word can be marked as case-sensitive (symbol `@` before word)
 #
 # Restrictions:
 # - POSes list cannot be defined for multiple words.
 # - Only one wildcard can be defined in the word.
 # - Wildcard cannot be applied to chunks of words.
-# - Only one case sensitive flag can be defined in the word.
+# - Only one case-sensitive flag can be defined in the word.
 #
 # Examples:
 # ========
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/NCService.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/NCService.scala
index 3b8e292..786e1ea 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/NCService.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/NCService.scala
@@ -110,7 +110,7 @@ abstract class NCService extends LazyLogging with 
NCOpenCensusTrace {
      * @return
      */
     private def padDur(ms: Long): String =
-        StringUtils.leftPad(s"${U.now() - ms}ms", 6)
+        StringUtils.leftPad(s"${U.now() - ms}ms", 7)
 
     /**
      * Acks started service. Should be called at the end of the `start()` 
method.
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index c356550..7e306f4 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -29,7 +29,6 @@ import scala.jdk.CollectionConverters.{CollectionHasAsScala, 
SeqHasAsJava}
 
 /**
   * Sentence token note is a typed map of KV pairs.
-  *
   */
 class NCNlpSentenceNote(private val values: Map[String, JSerializable]) 
extends JSerializable with NCAsciiLike {
     import NCNlpSentenceNote._
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
index 2bbc72a..b3005ce 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
@@ -378,6 +378,13 @@ object NCTokenLogger extends LazyLogging {
     def prepareTable(toks: Seq[NCToken]): NCAsciiTable = {
         val allFree = toks.forall(_.isFreeWord)
 
+        /**
+          *
+          * @param s
+          * @return
+          */
+        def cc(s: String): String = s"${ansi256Fg(183)}$s$ansiReset"
+
         val headers = mutable.ArrayBuffer.empty[String] ++
             Seq(
                 "idx",
@@ -385,15 +392,15 @@ object NCTokenLogger extends LazyLogging {
                 "lemma",
                 "pos",
                 "quoted",
-                "stopword",
-                "freeword",
+                r("stopword"),
+                y("freeword"),
                 "wordindexes",
                 "direct",
                 "sparsity"
             )
 
         if (!allFree)
-            headers += "token data"
+            headers += cc("token data")
 
         val tbl = NCAsciiTable(headers)
 
@@ -628,11 +635,11 @@ object NCTokenLogger extends LazyLogging {
                         if (tok.getId == "nlpcraft:nlp")
                             row.map(_.toString)
                         else
-                            row.map(s => 
s"${ansi256Fg(183)}${s.toString}${ansiReset}")
+                            row.map(s => cc(s.toString))
                     )
                     ++
                     // Token data.
-                    Seq(if (tok.getId == "nlpcraft:nlp") "" else 
s"<<${ansi256Fg(183)}${tok.getId}$ansiReset>> $v") :_*
+                    Seq(if (tok.getId == "nlpcraft:nlp") "" else 
s"<<${cc(tok.getId)}>> $v") :_*
                 )
             }
         })
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
index 636b263..03b749f 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
@@ -206,7 +206,10 @@ object NCServerEnrichmentManager extends NCService with 
NCIgniteInstance {
             (x._1 * 100) + x._2.indexOf(hdr.noteName)
         })
 
-        val tbl = NCAsciiTable(headers.map(_.header))
+        val tbl = NCAsciiTable(headers.map(hdr => {
+            val s = hdr.header
+            if (s == "nlp:stopWord") s"${r(s)}" else s
+        }))
 
         /**
          *
@@ -214,15 +217,22 @@ object NCServerEnrichmentManager extends NCService with 
NCIgniteInstance {
          * @param hdr
          * @return
          */
-        def mkNoteValue(tok: NCNlpSentenceToken, hdr: Header): Seq[String] =
+        def mkNoteValue(tok: NCNlpSentenceToken, hdr: Header): Seq[String] = {
+            val isStopWord = tok.isStopWord
+
             tok
                 .getNotes(hdr.noteType)
                 .filter(_.contains(hdr.noteName))
-                .map(_(hdr.noteName).toString())
+                .map(note => {
+                    val s = note(hdr.noteName).toString()
+                    if (isStopWord) s"${r(s)}" else s
+                })
                 .toSeq
+        }
 
-        for (tok <- s)
+        for (tok <- s) {
             tbl += (headers.map(mkNoteValue(tok, _)): _*)
+        }
 
         tbl
     }

[incubator-nlpcraft] branch NLPCRAFT-384 updated: WIP.

Reply via email to