[incubator-nlpcraft] 03/03: WIP.

sergeykamov Thu, 17 Jun 2021 11:19:39 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-70_NEW
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


commit 692532bd6e02383c7df3ba8cf73d00057995f78d
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Jun 17 21:19:19 2021 +0300

    WIP.
---
 .../probe/mgrs/conn/NCConnectionManager.scala      |  16 ++-
 .../nlpcraft/server/mdo/NCProbeModelMdo.scala      |   6 +-
 .../nlp/enrichers/NCServerEnrichmentManager.scala  |  20 ++-
 .../enrichers/ctxword/ContextWordEnricher.scala    |  51 -------
 .../enrichers/ctxword/NCContextWordEnricher.scala  | 148 +++++++++++++++++++++
 .../nlpcraft/server/probe/NCProbeManager.scala     |  16 ++-
 .../nlpcraft/server/rest/NCBasicRestApi.scala      |   2 +-
 .../server/sugsyn/NCSuggestSynonymManager.scala    | 122 ++++++++++++++---
 .../server/sugsyn/NCSuggestionElement.scala        |  26 ++++
 .../nlpcraft/server/sugsyn/NCWordSuggestion.scala  |  25 ++++
 .../nlpcraft/model/ctxword/NCContextWordSpec.scala | 102 ++++++++++++++
 11 files changed, 444 insertions(+), 90 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
index c911342..c712ed7 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
@@ -34,7 +34,7 @@ import java.util
 import java.util.concurrent.CountDownLatch
 import java.util.{Collections, Properties, TimeZone}
 import scala.collection.mutable
-import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava, 
SeqHasAsJava, SetHasAsScala}
+import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava, 
SetHasAsJava, SetHasAsScala}
 
 /**
   * Probe down/up link connection manager.
@@ -221,18 +221,22 @@ object NCConnectionManager extends NCService {
                                 values,
                                 samples
                             ): (
-                                java.util.Map[String, java.util.Map[String, 
java.util.List[String]]],
-                                java.util.Map[String, java.util.List[String]]
+                                java.util.Map[String, java.util.Map[String, 
java.util.Set[String]]],
+                                java.util.Set[String]
                             ) =
                                 if (ctxWordElems.isEmpty)
-                                    (Collections.emptyMap(), 
Collections.emptyMap())
+                                    (Collections.emptyMap(), 
Collections.emptySet())
                                 else {
                                     (
                                         ctxWordElems.map(e =>
                                             e.getId ->
-                                                e.getValues.asScala.map(p => 
p.getName -> p.getSynonyms).toMap.asJava
+                                                e.getValues.asScala.map(p => 
p.getName -> {
+                                                    val set: util.Set[String] 
= new util.HashSet(p.getSynonyms)
+
+                                                    set
+                                                }).toMap.asJava
                                         ).toMap.asJava,
-                                        wrapper.samples.map(p => p._1 -> 
p._2.flatMap(p => p).asJava).toMap.asJava
+                                        wrapper.samples.flatMap(_._2.flatMap(p 
=> p)).asJava
                                     )
                                 }
 
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
index ad80245..2d0bf58 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala
@@ -22,8 +22,10 @@ import org.apache.nlpcraft.server.mdo.impl._
 
 @NCMdoEntity(sql = false)
 case class NCModelMLConfigMdo(
-    @NCMdoField values: Map[String /*Element ID*/, Map[/*Value*/String, 
/*Synonym*/Seq[String]]],
-    @NCMdoField samples: Map[String /*Element ID*/, Seq[String]/*Samples*/]
+    @NCMdoField probeId: String,
+    @NCMdoField modelId: String,
+    @NCMdoField values: Map[String /*Element ID*/, Map[/*Value*/String, 
/*Synonym*/Set[String]]],
+    @NCMdoField samples: Set[String]
 )
 /**
   * Probe model MDO.
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
index e420676..097a3ca 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
@@ -30,6 +30,7 @@ import org.apache.nlpcraft.server.mdo.NCModelMLConfigMdo
 import org.apache.nlpcraft.server.nlp.core.{NCNlpNerEnricher, 
NCNlpServerManager}
 import org.apache.nlpcraft.server.nlp.enrichers.basenlp.NCBaseNlpEnricher
 import 
org.apache.nlpcraft.server.nlp.enrichers.coordinate.NCCoordinatesEnricher
+import org.apache.nlpcraft.server.nlp.enrichers.ctxword.NCContextWordEnricher
 import org.apache.nlpcraft.server.nlp.enrichers.date.NCDateEnricher
 import org.apache.nlpcraft.server.nlp.enrichers.geo.NCGeoEnricher
 import org.apache.nlpcraft.server.nlp.enrichers.numeric.NCNumericEnricher
@@ -125,6 +126,8 @@ object NCServerEnrichmentManager extends NCService with 
NCIgniteInstance {
                     NCCoordinatesEnricher.enrich(s, span)
             }
 
+            NCContextWordEnricher.enrich(s, span)
+
             ner(s, enabledBuiltInToks)
 
             prepareAsciiTable(s).info(logger, Some(s"Sentence enriched: 
'$normTxt'"))
@@ -160,12 +163,13 @@ object NCServerEnrichmentManager extends NCService with 
NCIgniteInstance {
             catching(wrapIE) {
                 cache(normTxt) match {
                     case Some(h) =>
-                        if (h.enabledBuiltInTokens == normEnabledBuiltInToks) {
-                            prepareAsciiTable(h.sentence).info(logger, 
Some(s"Sentence enriched (from cache): '$normTxt'"))
-
-                            h.sentence
-                        }
-                        else
+                        // TODO: remove
+//                        if (h.enabledBuiltInTokens == 
normEnabledBuiltInToks) {
+//                            prepareAsciiTable(h.sentence).info(logger, 
Some(s"Sentence enriched (from cache): '$normTxt'"))
+//
+//                            h.sentence
+//                        }
+//                        else
                             process(srvReqId, normTxt, enabledBuiltInToks, 
mlConf, span)
                     case None =>
                         process(srvReqId, normTxt, enabledBuiltInToks, mlConf, 
span)
@@ -280,7 +284,8 @@ object NCServerEnrichmentManager extends NCService with 
NCIgniteInstance {
                 () => NCDateEnricher.start(span),
                 () => NCNumericEnricher.start(span),
                 () => NCGeoEnricher.start(span),
-                () => NCCoordinatesEnricher.start(span)
+                () => NCCoordinatesEnricher.start(span),
+                () => NCContextWordEnricher.start(span)
             )
         }
 
@@ -298,6 +303,7 @@ object NCServerEnrichmentManager extends NCService with 
NCIgniteInstance {
         ackStopping()
 
         if (Config.isBuiltInEnrichers) {
+            NCContextWordEnricher.stop(span)
             NCCoordinatesEnricher.stop(span)
             NCGeoEnricher.stop(span)
             NCNumericEnricher.stop(span)
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/ContextWordEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/ContextWordEnricher.scala
deleted file mode 100644
index c2dd843..0000000
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/ContextWordEnricher.scala
+++ /dev/null
@@ -1,51 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one or more
- * contributor license agreements.  See the NOTICE file distributed with
- * this work for additional information regarding copyright ownership.
- * The ASF licenses this file to You under the Apache License, Version 2.0
- * (the "License"); you may not use this file except in compliance with
- * the License.  You may obtain a copy of the License at
- *
- *      https://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing, software
- * distributed under the License is distributed on an "AS IS" BASIS,
- * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
- * See the License for the specific language governing permissions and
- * limitations under the License.
- */
-
-package org.apache.nlpcraft.server.nlp.enrichers.ctxword
-
-import io.opencensus.trace.Span
-import org.apache.nlpcraft.common.NCService
-import org.apache.nlpcraft.common.nlp.NCNlpSentence
-import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnricher
-
-/**
-  * ContextWord enricher.
-  */
-object ContextWordEnricher extends NCServerEnricher {
-    override def start(parent: Span = null): NCService = 
startScopedSpan("start", parent) { _ =>
-        ackStarting()
-        ackStarted()
-    }
-
-    override def stop(parent: Span = null): Unit = startScopedSpan("stop", 
parent) { _ =>
-        ackStopping()
-        ackStopped()
-    }
-
-    override def enrich(ns: NCNlpSentence, parent: Span): Unit = {
-        ns.mlConfig match {
-            case Some(cfg) =>
-                val nouns = ns.tokens.filter(_.pos.startsWith("N"))
-
-                if (nouns.nonEmpty) {
-                    nouns
-                }
-
-            case None => // No-op.
-        }
-    }
-}
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
new file mode 100644
index 0000000..4d61f56
--- /dev/null
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala
@@ -0,0 +1,148 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.server.nlp.enrichers.ctxword
+
+import io.opencensus.trace.Span
+import org.apache.nlpcraft.common.nlp.NCNlpSentence
+import org.apache.nlpcraft.common.nlp.core.NCNlpPorterStemmer
+import org.apache.nlpcraft.common.{NCE, NCService}
+import org.apache.nlpcraft.server.mdo.NCModelMLConfigMdo
+import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnricher
+import org.apache.nlpcraft.server.sugsyn.{NCSuggestSynonymManager, 
NCSuggestionElement, NCWordSuggestion}
+
+import scala.collection.mutable
+import scala.concurrent.Await
+import scala.concurrent.duration.Duration
+
+/**
+  * ContextWord enricher.
+  */
+object NCContextWordEnricher extends NCServerEnricher {
+    case class Key(probeId: String, modelId: String)
+    case class Word(word: String, stem: String)
+
+    object Word {
+        def apply(word: String) = new Word(word, NCNlpPorterStemmer.stem(word))
+    }
+
+    @volatile private var samples: mutable.HashMap[Key, Map[/** Element ID */ 
String, Map[/** Stem */ String, /** Confidence */ Double]]] = _
+    @volatile private var words: mutable.HashMap[Key, Map[/** Element ID */ 
String, Map[/** Stem */ String, /** Confidence */ Double]]] = _
+
+    override def start(parent: Span = null): NCService = 
startScopedSpan("start", parent) { _ =>
+        ackStarting()
+
+        samples = mutable.HashMap.empty
+        words = mutable.HashMap.empty
+
+        ackStarted()
+    }
+
+    override def stop(parent: Span = null): Unit = startScopedSpan("stop", 
parent) { _ =>
+        ackStopping()
+
+        words = null
+        samples = null
+
+        ackStopped()
+    }
+
+    @throws[NCE]
+    private def askSamples(cfg: NCModelMLConfigMdo): Map[String, Map[String, 
Double]] = {
+        println("cfg=" + cfg)
+
+        def parseSample(elemId: String, sample: String, synsStem: Map[String, 
String]): Seq[NCSuggestionElement] = {
+            val pairs = sample.split(" 
").map(_.strip()).filter(_.nonEmpty).zipWithIndex
+
+            println("sample=" + sample)
+            println("pairs=" + pairs)
+
+            pairs.flatMap { case (sampleWord, idx) =>
+                val sampleWordStem: String = 
NCNlpPorterStemmer.stem(sampleWord)
+
+                synsStem.
+                    filter(p => p._2.contains(sampleWordStem)).
+                    map { case (_, synWord) =>
+                        NCSuggestionElement(
+                            elemId,
+                            pairs.map { case (w, i) => if (i != idx) w else 
synWord}.mkString(" "),
+                            Seq(idx)
+                        )
+                    }
+            }
+        }
+
+        case class Record(sentence: NCSuggestionElement, elementName: String)
+
+        val recs: Map[String, Seq[Record]] =
+            (for (
+                (elemId, map) <- cfg.values;
+                (elemName, syns) <- map;
+                synsStem = syns.map(p => NCNlpPorterStemmer.stem(p) -> 
p).toMap;
+                sample <- cfg.samples;
+                sugg <- parseSample(elemId, sample, synsStem)
+            )
+                yield (elemId, Record(sugg, elemName))).groupBy(_._1).map(p => 
p._1 -> p._2.values.toSeq)
+
+        println("recs=" + recs)
+        println("recs.size=" + recs.size)
+
+        // TODO:
+        val res: Map[String, Seq[NCWordSuggestion]] =
+            if (recs.nonEmpty)
+                
Await.result(NCSuggestSynonymManager.suggestWords(recs.flatMap(_._2.map(_.sentence)).toSeq),
 Duration.Inf)
+            else
+                Map.empty
+
+        // TODO: elemName
+        res.map { case (elemId, suggs) =>
+            elemId -> suggs.map(p => NCNlpPorterStemmer.stem(p.word) -> 
p.score).toMap
+        }
+    }
+
+    override def enrich(ns: NCNlpSentence, parent: Span): Unit = {
+        ns.mlConfig match {
+            case Some(cfg) =>
+                val key = Key(cfg.probeId, cfg.modelId)
+
+                val ex =
+                    samples.synchronized { samples.get(key) } match {
+                        case Some(data) => data
+                        case None =>
+                            val data = askSamples(cfg)
+
+                            samples.synchronized { samples += key -> data }
+
+                            data
+                    }
+
+                println("ex="+ex)
+
+                val ws: Map[String, Map[String, Double]] = 
words.getOrElse(key, Map.empty)
+
+                val nouns = ns.tokens.filter(_.pos.startsWith("N"))
+
+                for (n <- nouns; (elemId, stems) <- ex if 
stems.contains(n.stem))
+                    println("EX FOUND elemId=" + elemId + ", n=" + n + ", 
stem=" + stems.toSeq.sortBy(-_._2))
+
+                for (n <- nouns; (elemId, stems) <- ws if 
stems.contains(n.stem))
+                    println("WS FOUND elemId=" + elemId + ", stem=" + stems)
+
+            case None => // No-op.
+        }
+    }
+}
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
index 67acba8..5e11883 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala
@@ -31,7 +31,7 @@ import org.apache.nlpcraft.common.version.NCVersion
 import org.apache.nlpcraft.common.{NCService, _}
 import org.apache.nlpcraft.probe.mgrs.NCProbeMessage
 import org.apache.nlpcraft.server.company.NCCompanyManager
-import org.apache.nlpcraft.server.mdo.{NCCompanyMdo, NCModelMLConfigMdo, 
NCProbeMdo, NCProbeModelMdo, NCUserMdo}
+import org.apache.nlpcraft.server.mdo._
 import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnrichmentManager
 import org.apache.nlpcraft.server.proclog.NCProcessLogManager
 import org.apache.nlpcraft.server.query.NCQueryManager
@@ -45,7 +45,7 @@ import java.util.Collections
 import java.util.concurrent.ConcurrentHashMap
 import scala.collection.mutable
 import scala.concurrent.{ExecutionContext, Future, Promise}
-import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsScala, 
SetHasAsScala}
+import scala.jdk.CollectionConverters.{MapHasAsScala, SetHasAsScala}
 import scala.util.{Failure, Success}
 
 /**
@@ -614,8 +614,8 @@ object NCProbeManager extends NCService {
                             String,
                             String,
                             java.util.Set[String],
-                            java.util.Map[String, java.util.Map[String, 
java.util.List[String]]],
-                            java.util.Map[String, java.util.List[String]]
+                            java.util.Map[String, java.util.Map[String, 
java.util.Set[String]]],
+                            java.util.Set[String]
                         )]]("PROBE_MODELS").
                         map {
                             case (
@@ -630,7 +630,7 @@ object NCProbeManager extends NCService {
                                 require(mdlName != null)
                                 require(mdlVer != null)
                                 require(enabledBuiltInToks != null)
-                                require(values.isEmpty ^ samples.isEmpty)
+                                require(values.isEmpty && samples.isEmpty || 
!values.isEmpty && !samples.isEmpty)
 
                                 NCProbeModelMdo(
                                     id = mdlId,
@@ -641,8 +641,10 @@ object NCProbeManager extends NCService {
                                         if (!values.isEmpty)
                                             Some(
                                                 NCModelMLConfigMdo(
-                                                    values = 
values.asScala.map(p => p._1 -> p._2.asScala.map(p => p._1 -> 
p._2.asScala.toSeq).toMap).toMap,
-                                                    samples = 
samples.asScala.map(p => p._1 -> p._2.asScala.toSeq).toMap
+                                                    probeId = probeId,
+                                                    modelId = mdlId,
+                                                    values = 
values.asScala.map(p => p._1 -> p._2.asScala.map(p => p._1 -> 
p._2.asScala.toSet).toMap).toMap,
+                                                    samples = 
samples.asScala.toSet
                                                 )
                                             )
                                         else
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala
index 45ab892..741b697 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala
@@ -807,7 +807,7 @@ class NCBasicRestApi extends NCRestApi with LazyLogging 
with NCOpenCensusTrace w
 
                 checkModelId(req.mdlId, admUsr.companyId)
 
-                val fut = NCSuggestSynonymManager.suggest(req.mdlId, 
req.minScore, span)
+                val fut = NCSuggestSynonymManager.suggestModel(req.mdlId, 
req.minScore, span)
 
                 successWithJs(
                     fut.collect {
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
index 02366ba..15334b3 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala
@@ -24,7 +24,7 @@ import org.apache.http.HttpResponse
 import org.apache.http.client.ResponseHandler
 import org.apache.http.client.methods.HttpPost
 import org.apache.http.entity.StringEntity
-import org.apache.http.impl.client.HttpClients
+import org.apache.http.impl.client.{CloseableHttpClient, HttpClients}
 import org.apache.http.util.EntityUtils
 import org.apache.nlpcraft.common._
 import org.apache.nlpcraft.common.config.NCConfigurable
@@ -55,7 +55,7 @@ object NCSuggestSynonymManager extends NCService {
     private final val MIN_CNT_MODEL = 20
 
     private final val GSON = new Gson
-    private final val TYPE_RESP = new 
TypeToken[util.List[util.List[Suggestion]]]() {}.getType
+    private final val TYPE_RESP = new 
TypeToken[util.List[util.List[NCWordSuggestion]]]() {}.getType
     private final val SEPARATORS = Seq('?', ',', '.', '-', '!')
 
     private implicit final val ec: ExecutionContext = 
NCThreadPoolManager.getSystemContext
@@ -64,7 +64,7 @@ object NCSuggestSynonymManager extends NCService {
         val urlOpt: Option[String] = 
getStringOpt("nlpcraft.server.ctxword.url")
     }
 
-    private final val HANDLER: ResponseHandler[Seq[Seq[Suggestion]]] =
+    private final val HANDLER: ResponseHandler[Seq[Seq[NCWordSuggestion]]] =
         (resp: HttpResponse) => {
             val code = resp.getStatusLine.getStatusCode
             val e = resp.getEntity
@@ -76,7 +76,7 @@ object NCSuggestSynonymManager extends NCService {
 
             code match {
                 case 200 =>
-                    val data: util.List[util.List[Suggestion]] = 
GSON.fromJson(js, TYPE_RESP)
+                    val data: util.List[util.List[NCWordSuggestion]] = 
GSON.fromJson(js, TYPE_RESP)
 
                     data.asScala.map(p => if (p.isEmpty) Seq.empty else 
p.asScala.tail.toSeq).toSeq
 
@@ -90,7 +90,7 @@ object NCSuggestSynonymManager extends NCService {
             }
         }
 
-    case class Suggestion(word: String, score: Double)
+
     case class RequestData(sentence: String, ex: String, elmId: String, index: 
Int)
     case class RestRequestSentence(text: String, indexes: util.List[Int])
     case class RestRequest(sentences: util.List[RestRequestSentence], limit: 
Int, minScore: Double)
@@ -111,6 +111,19 @@ object NCSuggestSynonymManager extends NCService {
     private def toStem(s: String): String = 
split(s).map(NCNlpPorterStemmer.stem).mkString(" ")
     private def toStemWord(s: String): String = NCNlpPorterStemmer.stem(s)
 
+    @throws[NCE]
+    private def mkUrl = s"${Config.urlOpt.getOrElse(throw new NCE("Context 
word server is not configured."))}/suggestions"
+
+    private def request(cli: CloseableHttpClient, post: HttpPost): 
Seq[Seq[NCWordSuggestion]] = {
+        val resps: Seq[Seq[NCWordSuggestion]] =
+            try
+                cli.execute(post, HANDLER)
+            finally
+                post.releaseConnection()
+
+        resps
+    }
+
     /**
      *
      * @param seq1
@@ -131,14 +144,14 @@ object NCSuggestSynonymManager extends NCService {
     }
 
     /**
-     *
+     * TODO: refactor async call (waiting should be dropped.)
      * @param mdlId
      * @param minScoreOpt
      * @param parent
      * @return
      */
-    def suggest(mdlId: String, minScoreOpt: Option[Double], parent: Span = 
null): Future[NCSuggestSynonymResult] =
-        startScopedSpan("inspect", parent, "mdlId" -> mdlId) { _ =>
+    def suggestModel(mdlId: String, minScoreOpt: Option[Double], parent: Span 
= null): Future[NCSuggestSynonymResult] =
+        startScopedSpan("suggest", parent, "mdlId" -> mdlId) { _ =>
             val now = U.now()
 
             val promise = Promise[NCSuggestSynonymResult]()
@@ -178,7 +191,7 @@ object NCSuggestSynonymManager extends NCService {
                         if (mdlExs.isEmpty)
                             onError(s"Missed intents samples for: `$mdlId``")
                         else {
-                            val url = s"${Config.urlOpt.getOrElse(throw new 
NCE("Context word server is not configured."))}/suggestions"
+                            val url = mkUrl
 
                             val allSamplesCnt = mdlExs.map { case (_, samples) 
=> samples.size }.sum
 
@@ -281,9 +294,9 @@ object NCSuggestSynonymManager extends NCService {
                             if (allReqsCnt == 0)
                                 onError(s"Suggestions cannot be generated for 
model: '$mdlId'")
                             else {
-                                val allSgsts = new ConcurrentHashMap[String, 
util.List[Suggestion]]()
+                                val allSgsts = new ConcurrentHashMap[String, 
util.List[NCWordSuggestion]]()
                                 val cdl = new CountDownLatch(1)
-                                val debugs = 
mutable.HashMap.empty[RequestData, Seq[Suggestion]]
+                                val debugs = 
mutable.HashMap.empty[RequestData, Seq[NCWordSuggestion]]
                                 val cnt = new AtomicInteger(0)
 
                                 val cli = HttpClients.createDefault
@@ -308,10 +321,7 @@ object NCSuggestSynonymManager extends NCService {
                                                 )
                                             )
 
-                                            val resps: Seq[Seq[Suggestion]] = 
try
-                                                cli.execute(post, HANDLER)
-                                            finally
-                                                post.releaseConnection()
+                                            val resps = request(cli, post)
 
                                             require(batch.size == resps.size, 
s"Batch: ${batch.size}, responses: ${resps.size}")
 
@@ -322,7 +332,7 @@ object NCSuggestSynonymManager extends NCService {
                                             logger.debug(s"Executed: $i 
requests...")
 
                                             allSgsts.
-                                                computeIfAbsent(elemId, (_: 
String) => new CopyOnWriteArrayList[Suggestion]()).
+                                                computeIfAbsent(elemId, (_: 
String) => new CopyOnWriteArrayList[NCWordSuggestion]()).
                                                 addAll(resps.flatten.asJava)
 
                                             if (i == allReqsCnt)
@@ -441,6 +451,86 @@ object NCSuggestSynonymManager extends NCService {
         }
 
     /**
+      *
+      * @param sens
+      * @param minScoreOpt
+      * @param parent
+      * @return
+      */
+    def suggestWords(sens: Seq[NCSuggestionElement], minScoreOpt: 
Option[Double] = None, parent: Span = null):
+        Future[Map[String, Seq[NCWordSuggestion]]] =
+        startScopedSpan("suggest", parent) { _ =>
+            val promise = Promise[Map[String, Seq[NCWordSuggestion]]]()
+
+            case class Result(elementId: String, suggestions 
:Seq[NCWordSuggestion])
+
+            val data = new CopyOnWriteArrayList[Result]()
+            val cli = HttpClients.createDefault
+            val batches = sens.sliding(BATCH_SIZE, 
BATCH_SIZE).map(_.toSeq).toSeq
+            val cnt = new AtomicInteger(0)
+
+            for (batch <- batches)
+                U.asFuture(
+                    _ => {
+                        val post = new HttpPost(mkUrl)
+
+                        post.setHeader("Content-Type", "application/json")
+                        post.setEntity(
+                            new StringEntity(
+                                GSON.toJson(
+                                    RestRequest(
+                                        sentences = batch.map(p => 
RestRequestSentence(p.sample, p.indexes.asJava)).asJava,
+                                        minScore = 0,
+                                        limit = MAX_LIMIT
+                                    )
+                                ),
+                                "UTF-8"
+                            )
+                        )
+
+                        val resps = request(cli, post)
+
+                        require(batch.size == resps.size, s"Batch: 
${batch.size}, responses: ${resps.size}")
+
+                        data.addAll(batch.zip(resps).map { case (req, resp) => 
Result(req.elementId, resp) }.asJava )
+
+                        if (cnt.incrementAndGet() == batches.size) {
+                            val min = minScoreOpt.getOrElse(DFLT_MIN_SCORE)
+
+                            val map = data.asScala.groupBy(_.elementId).map(p 
=>
+                                p._1 ->
+                                p._2.
+                                    map(_.suggestions.map(p => 
(toStem(p.word), p.score))).
+                                    map(_.groupBy(_._1)).
+                                    flatMap(p =>
+                                        p.map(p => p._1 ->
+                                            p._1 -> {
+                                            val scores = p._2.map(_._2)
+
+                                            scores.sum / scores.size
+                                        }
+                                    ).
+                                        filter(_._2 >= min).
+                                        map(p => NCWordSuggestion(p._1._2, 
p._2)).toSeq
+                                ).toSeq)
+
+                            promise.success(map)
+                        }
+                        ()
+                    },
+                    (e: Throwable) => {
+                        U.prettyError(logger, "Unexpected error:", e)
+
+                        promise.failure(e)
+
+                    },
+                    (_: Unit) => ()
+                )
+
+            promise.future
+        }
+
+    /**
      *
      * @param parent Optional parent span.
      * @return
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestionElement.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestionElement.scala
new file mode 100644
index 0000000..3634a5a
--- /dev/null
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestionElement.scala
@@ -0,0 +1,26 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.server.sugsyn
+
+/**
+  *
+  * @param elementId
+  * @param sample
+  * @param indexes
+  */
+case class NCSuggestionElement(elementId: String, sample: String, indexes: 
Seq[Int])
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCWordSuggestion.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCWordSuggestion.scala
new file mode 100644
index 0000000..a09b2ca
--- /dev/null
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCWordSuggestion.scala
@@ -0,0 +1,25 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.server.sugsyn
+
+/**
+  *
+  * @param word
+  * @param score
+  */
+case class NCWordSuggestion(word: String, score: Double)
\ No newline at end of file
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
new file mode 100644
index 0000000..4eab17d
--- /dev/null
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
@@ -0,0 +1,102 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.model.ctxword
+
+import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentSample, 
NCIntentTerm, NCModel, NCResult, NCToken, NCValue}
+import org.apache.nlpcraft.{NCTestContext, NCTestElement, NCTestEnvironment}
+import org.junit.jupiter.api.Assertions.{assertFalse, assertTrue}
+import org.junit.jupiter.api.Test
+
+import java.util
+import scala.jdk.CollectionConverters.{SeqHasAsJava, SetHasAsJava}
+
+/**
+  * Test model.
+  */
+class NCContextWordSpecModel extends NCModel {
+    case class Value(name: String, syns: String*) extends NCValue {
+        override def getName: String = name
+        override def getSynonyms: util.List[String] = (Seq(name) ++ 
syns).asJava
+    }
+
+    case class Elem(id: String, values: NCValue*) extends NCElement {
+        override def getId: String = id
+        override def getValues: util.List[NCValue] = values.asJava
+        override def isContextWordSupport: Boolean = true
+    }
+
+    override def getId: String = this.getClass.getSimpleName
+    override def getName: String = this.getClass.getSimpleName
+    override def getVersion: String = "1.0.0"
+
+    override def getElements: util.Set[NCElement] =
+        Set(
+            Elem("class:carBrand", Value("BMW")),
+            Elem("class:animal", Value("fox"), Value("cat", "tomcat")),
+            Elem("class:weather", Value("temperature"), Value("rain"), 
Value("sun"))
+        ).map(p => {
+            val e: NCElement = p
+
+            e
+        }).asJava
+
+    @NCIntentSample(
+        Array(
+            "I like drive my new BMW",
+            "BMW has the best engine",
+            "Luxury cars like Mercedes and BMW  are prime targets",
+            "BMW will install side air bags up front",
+            "A wild cat is very dangerous",
+            "A fox eats hens",
+            "The fox was already in your chicken house",
+            "What is the local temperature",
+            "This is the first day of heavy rain"
+        )
+    )
+    @NCIntent(
+        "intent=classification " +
+        "term(carBrands)~{tok_id() == 'class:carBrand'}* " +
+        "term(animals)~{tok_id() == 'class:animal'}* " +
+        "term(weathers)~{tok_id() == 'class:weather'}* "
+    )
+    def onMatch(
+        @NCIntentTerm("carBrands") carBrands: List[NCToken],
+        @NCIntentTerm("animals") animals: List[NCToken],
+        @NCIntentTerm("weathers") weathers: List[NCToken]
+    ): NCResult = {
+        println("carBrands=" + carBrands)
+        println("animals=" + animals)
+        println("weathers=" + weathers)
+
+        NCResult.text("ok")
+    }
+}
+
+/**
+  * @see NCConversationSpecModel
+  */
+@NCTestEnvironment(model = classOf[NCContextWordSpecModel], startClient = true)
+class NCContextWordSpec extends NCTestContext {
+    @Test
+    @throws[Exception]
+    private[ctxword] def test(): Unit = {
+        val cli = getClient
+
+        cli.ask("I want have a dog, fox, Mercedes, reno, winter, Porsche")
+    }
+}

[incubator-nlpcraft] 03/03: WIP.

Reply via email to