This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-70_NEW in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 692532bd6e02383c7df3ba8cf73d00057995f78d Author: Sergey Kamov <[email protected]> AuthorDate: Thu Jun 17 21:19:19 2021 +0300 WIP. --- .../probe/mgrs/conn/NCConnectionManager.scala | 16 ++- .../nlpcraft/server/mdo/NCProbeModelMdo.scala | 6 +- .../nlp/enrichers/NCServerEnrichmentManager.scala | 20 ++- .../enrichers/ctxword/ContextWordEnricher.scala | 51 ------- .../enrichers/ctxword/NCContextWordEnricher.scala | 148 +++++++++++++++++++++ .../nlpcraft/server/probe/NCProbeManager.scala | 16 ++- .../nlpcraft/server/rest/NCBasicRestApi.scala | 2 +- .../server/sugsyn/NCSuggestSynonymManager.scala | 122 ++++++++++++++--- .../server/sugsyn/NCSuggestionElement.scala | 26 ++++ .../nlpcraft/server/sugsyn/NCWordSuggestion.scala | 25 ++++ .../nlpcraft/model/ctxword/NCContextWordSpec.scala | 102 ++++++++++++++ 11 files changed, 444 insertions(+), 90 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala index c911342..c712ed7 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala @@ -34,7 +34,7 @@ import java.util import java.util.concurrent.CountDownLatch import java.util.{Collections, Properties, TimeZone} import scala.collection.mutable -import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava, SeqHasAsJava, SetHasAsScala} +import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsJava, SetHasAsJava, SetHasAsScala} /** * Probe down/up link connection manager. @@ -221,18 +221,22 @@ object NCConnectionManager extends NCService { values, samples ): ( - java.util.Map[String, java.util.Map[String, java.util.List[String]]], - java.util.Map[String, java.util.List[String]] + java.util.Map[String, java.util.Map[String, java.util.Set[String]]], + java.util.Set[String] ) = if (ctxWordElems.isEmpty) - (Collections.emptyMap(), Collections.emptyMap()) + (Collections.emptyMap(), Collections.emptySet()) else { ( ctxWordElems.map(e => e.getId -> - e.getValues.asScala.map(p => p.getName -> p.getSynonyms).toMap.asJava + e.getValues.asScala.map(p => p.getName -> { + val set: util.Set[String] = new util.HashSet(p.getSynonyms) + + set + }).toMap.asJava ).toMap.asJava, - wrapper.samples.map(p => p._1 -> p._2.flatMap(p => p).asJava).toMap.asJava + wrapper.samples.flatMap(_._2.flatMap(p => p)).asJava ) } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala index ad80245..2d0bf58 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/mdo/NCProbeModelMdo.scala @@ -22,8 +22,10 @@ import org.apache.nlpcraft.server.mdo.impl._ @NCMdoEntity(sql = false) case class NCModelMLConfigMdo( - @NCMdoField values: Map[String /*Element ID*/, Map[/*Value*/String, /*Synonym*/Seq[String]]], - @NCMdoField samples: Map[String /*Element ID*/, Seq[String]/*Samples*/] + @NCMdoField probeId: String, + @NCMdoField modelId: String, + @NCMdoField values: Map[String /*Element ID*/, Map[/*Value*/String, /*Synonym*/Set[String]]], + @NCMdoField samples: Set[String] ) /** * Probe model MDO. diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala index e420676..097a3ca 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala @@ -30,6 +30,7 @@ import org.apache.nlpcraft.server.mdo.NCModelMLConfigMdo import org.apache.nlpcraft.server.nlp.core.{NCNlpNerEnricher, NCNlpServerManager} import org.apache.nlpcraft.server.nlp.enrichers.basenlp.NCBaseNlpEnricher import org.apache.nlpcraft.server.nlp.enrichers.coordinate.NCCoordinatesEnricher +import org.apache.nlpcraft.server.nlp.enrichers.ctxword.NCContextWordEnricher import org.apache.nlpcraft.server.nlp.enrichers.date.NCDateEnricher import org.apache.nlpcraft.server.nlp.enrichers.geo.NCGeoEnricher import org.apache.nlpcraft.server.nlp.enrichers.numeric.NCNumericEnricher @@ -125,6 +126,8 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance { NCCoordinatesEnricher.enrich(s, span) } + NCContextWordEnricher.enrich(s, span) + ner(s, enabledBuiltInToks) prepareAsciiTable(s).info(logger, Some(s"Sentence enriched: '$normTxt'")) @@ -160,12 +163,13 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance { catching(wrapIE) { cache(normTxt) match { case Some(h) => - if (h.enabledBuiltInTokens == normEnabledBuiltInToks) { - prepareAsciiTable(h.sentence).info(logger, Some(s"Sentence enriched (from cache): '$normTxt'")) - - h.sentence - } - else + // TODO: remove +// if (h.enabledBuiltInTokens == normEnabledBuiltInToks) { +// prepareAsciiTable(h.sentence).info(logger, Some(s"Sentence enriched (from cache): '$normTxt'")) +// +// h.sentence +// } +// else process(srvReqId, normTxt, enabledBuiltInToks, mlConf, span) case None => process(srvReqId, normTxt, enabledBuiltInToks, mlConf, span) @@ -280,7 +284,8 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance { () => NCDateEnricher.start(span), () => NCNumericEnricher.start(span), () => NCGeoEnricher.start(span), - () => NCCoordinatesEnricher.start(span) + () => NCCoordinatesEnricher.start(span), + () => NCContextWordEnricher.start(span) ) } @@ -298,6 +303,7 @@ object NCServerEnrichmentManager extends NCService with NCIgniteInstance { ackStopping() if (Config.isBuiltInEnrichers) { + NCContextWordEnricher.stop(span) NCCoordinatesEnricher.stop(span) NCGeoEnricher.stop(span) NCNumericEnricher.stop(span) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/ContextWordEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/ContextWordEnricher.scala deleted file mode 100644 index c2dd843..0000000 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/ContextWordEnricher.scala +++ /dev/null @@ -1,51 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nlpcraft.server.nlp.enrichers.ctxword - -import io.opencensus.trace.Span -import org.apache.nlpcraft.common.NCService -import org.apache.nlpcraft.common.nlp.NCNlpSentence -import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnricher - -/** - * ContextWord enricher. - */ -object ContextWordEnricher extends NCServerEnricher { - override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ => - ackStarting() - ackStarted() - } - - override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ => - ackStopping() - ackStopped() - } - - override def enrich(ns: NCNlpSentence, parent: Span): Unit = { - ns.mlConfig match { - case Some(cfg) => - val nouns = ns.tokens.filter(_.pos.startsWith("N")) - - if (nouns.nonEmpty) { - nouns - } - - case None => // No-op. - } - } -} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala new file mode 100644 index 0000000..4d61f56 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordEnricher.scala @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.server.nlp.enrichers.ctxword + +import io.opencensus.trace.Span +import org.apache.nlpcraft.common.nlp.NCNlpSentence +import org.apache.nlpcraft.common.nlp.core.NCNlpPorterStemmer +import org.apache.nlpcraft.common.{NCE, NCService} +import org.apache.nlpcraft.server.mdo.NCModelMLConfigMdo +import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnricher +import org.apache.nlpcraft.server.sugsyn.{NCSuggestSynonymManager, NCSuggestionElement, NCWordSuggestion} + +import scala.collection.mutable +import scala.concurrent.Await +import scala.concurrent.duration.Duration + +/** + * ContextWord enricher. + */ +object NCContextWordEnricher extends NCServerEnricher { + case class Key(probeId: String, modelId: String) + case class Word(word: String, stem: String) + + object Word { + def apply(word: String) = new Word(word, NCNlpPorterStemmer.stem(word)) + } + + @volatile private var samples: mutable.HashMap[Key, Map[/** Element ID */ String, Map[/** Stem */ String, /** Confidence */ Double]]] = _ + @volatile private var words: mutable.HashMap[Key, Map[/** Element ID */ String, Map[/** Stem */ String, /** Confidence */ Double]]] = _ + + override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ => + ackStarting() + + samples = mutable.HashMap.empty + words = mutable.HashMap.empty + + ackStarted() + } + + override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ => + ackStopping() + + words = null + samples = null + + ackStopped() + } + + @throws[NCE] + private def askSamples(cfg: NCModelMLConfigMdo): Map[String, Map[String, Double]] = { + println("cfg=" + cfg) + + def parseSample(elemId: String, sample: String, synsStem: Map[String, String]): Seq[NCSuggestionElement] = { + val pairs = sample.split(" ").map(_.strip()).filter(_.nonEmpty).zipWithIndex + + println("sample=" + sample) + println("pairs=" + pairs) + + pairs.flatMap { case (sampleWord, idx) => + val sampleWordStem: String = NCNlpPorterStemmer.stem(sampleWord) + + synsStem. + filter(p => p._2.contains(sampleWordStem)). + map { case (_, synWord) => + NCSuggestionElement( + elemId, + pairs.map { case (w, i) => if (i != idx) w else synWord}.mkString(" "), + Seq(idx) + ) + } + } + } + + case class Record(sentence: NCSuggestionElement, elementName: String) + + val recs: Map[String, Seq[Record]] = + (for ( + (elemId, map) <- cfg.values; + (elemName, syns) <- map; + synsStem = syns.map(p => NCNlpPorterStemmer.stem(p) -> p).toMap; + sample <- cfg.samples; + sugg <- parseSample(elemId, sample, synsStem) + ) + yield (elemId, Record(sugg, elemName))).groupBy(_._1).map(p => p._1 -> p._2.values.toSeq) + + println("recs=" + recs) + println("recs.size=" + recs.size) + + // TODO: + val res: Map[String, Seq[NCWordSuggestion]] = + if (recs.nonEmpty) + Await.result(NCSuggestSynonymManager.suggestWords(recs.flatMap(_._2.map(_.sentence)).toSeq), Duration.Inf) + else + Map.empty + + // TODO: elemName + res.map { case (elemId, suggs) => + elemId -> suggs.map(p => NCNlpPorterStemmer.stem(p.word) -> p.score).toMap + } + } + + override def enrich(ns: NCNlpSentence, parent: Span): Unit = { + ns.mlConfig match { + case Some(cfg) => + val key = Key(cfg.probeId, cfg.modelId) + + val ex = + samples.synchronized { samples.get(key) } match { + case Some(data) => data + case None => + val data = askSamples(cfg) + + samples.synchronized { samples += key -> data } + + data + } + + println("ex="+ex) + + val ws: Map[String, Map[String, Double]] = words.getOrElse(key, Map.empty) + + val nouns = ns.tokens.filter(_.pos.startsWith("N")) + + for (n <- nouns; (elemId, stems) <- ex if stems.contains(n.stem)) + println("EX FOUND elemId=" + elemId + ", n=" + n + ", stem=" + stems.toSeq.sortBy(-_._2)) + + for (n <- nouns; (elemId, stems) <- ws if stems.contains(n.stem)) + println("WS FOUND elemId=" + elemId + ", stem=" + stems) + + case None => // No-op. + } + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala index 67acba8..5e11883 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/probe/NCProbeManager.scala @@ -31,7 +31,7 @@ import org.apache.nlpcraft.common.version.NCVersion import org.apache.nlpcraft.common.{NCService, _} import org.apache.nlpcraft.probe.mgrs.NCProbeMessage import org.apache.nlpcraft.server.company.NCCompanyManager -import org.apache.nlpcraft.server.mdo.{NCCompanyMdo, NCModelMLConfigMdo, NCProbeMdo, NCProbeModelMdo, NCUserMdo} +import org.apache.nlpcraft.server.mdo._ import org.apache.nlpcraft.server.nlp.enrichers.NCServerEnrichmentManager import org.apache.nlpcraft.server.proclog.NCProcessLogManager import org.apache.nlpcraft.server.query.NCQueryManager @@ -45,7 +45,7 @@ import java.util.Collections import java.util.concurrent.ConcurrentHashMap import scala.collection.mutable import scala.concurrent.{ExecutionContext, Future, Promise} -import scala.jdk.CollectionConverters.{ListHasAsScala, MapHasAsScala, SetHasAsScala} +import scala.jdk.CollectionConverters.{MapHasAsScala, SetHasAsScala} import scala.util.{Failure, Success} /** @@ -614,8 +614,8 @@ object NCProbeManager extends NCService { String, String, java.util.Set[String], - java.util.Map[String, java.util.Map[String, java.util.List[String]]], - java.util.Map[String, java.util.List[String]] + java.util.Map[String, java.util.Map[String, java.util.Set[String]]], + java.util.Set[String] )]]("PROBE_MODELS"). map { case ( @@ -630,7 +630,7 @@ object NCProbeManager extends NCService { require(mdlName != null) require(mdlVer != null) require(enabledBuiltInToks != null) - require(values.isEmpty ^ samples.isEmpty) + require(values.isEmpty && samples.isEmpty || !values.isEmpty && !samples.isEmpty) NCProbeModelMdo( id = mdlId, @@ -641,8 +641,10 @@ object NCProbeManager extends NCService { if (!values.isEmpty) Some( NCModelMLConfigMdo( - values = values.asScala.map(p => p._1 -> p._2.asScala.map(p => p._1 -> p._2.asScala.toSeq).toMap).toMap, - samples = samples.asScala.map(p => p._1 -> p._2.asScala.toSeq).toMap + probeId = probeId, + modelId = mdlId, + values = values.asScala.map(p => p._1 -> p._2.asScala.map(p => p._1 -> p._2.asScala.toSet).toMap).toMap, + samples = samples.asScala.toSet ) ) else diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala index 45ab892..741b697 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala @@ -807,7 +807,7 @@ class NCBasicRestApi extends NCRestApi with LazyLogging with NCOpenCensusTrace w checkModelId(req.mdlId, admUsr.companyId) - val fut = NCSuggestSynonymManager.suggest(req.mdlId, req.minScore, span) + val fut = NCSuggestSynonymManager.suggestModel(req.mdlId, req.minScore, span) successWithJs( fut.collect { diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala index 02366ba..15334b3 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala @@ -24,7 +24,7 @@ import org.apache.http.HttpResponse import org.apache.http.client.ResponseHandler import org.apache.http.client.methods.HttpPost import org.apache.http.entity.StringEntity -import org.apache.http.impl.client.HttpClients +import org.apache.http.impl.client.{CloseableHttpClient, HttpClients} import org.apache.http.util.EntityUtils import org.apache.nlpcraft.common._ import org.apache.nlpcraft.common.config.NCConfigurable @@ -55,7 +55,7 @@ object NCSuggestSynonymManager extends NCService { private final val MIN_CNT_MODEL = 20 private final val GSON = new Gson - private final val TYPE_RESP = new TypeToken[util.List[util.List[Suggestion]]]() {}.getType + private final val TYPE_RESP = new TypeToken[util.List[util.List[NCWordSuggestion]]]() {}.getType private final val SEPARATORS = Seq('?', ',', '.', '-', '!') private implicit final val ec: ExecutionContext = NCThreadPoolManager.getSystemContext @@ -64,7 +64,7 @@ object NCSuggestSynonymManager extends NCService { val urlOpt: Option[String] = getStringOpt("nlpcraft.server.ctxword.url") } - private final val HANDLER: ResponseHandler[Seq[Seq[Suggestion]]] = + private final val HANDLER: ResponseHandler[Seq[Seq[NCWordSuggestion]]] = (resp: HttpResponse) => { val code = resp.getStatusLine.getStatusCode val e = resp.getEntity @@ -76,7 +76,7 @@ object NCSuggestSynonymManager extends NCService { code match { case 200 => - val data: util.List[util.List[Suggestion]] = GSON.fromJson(js, TYPE_RESP) + val data: util.List[util.List[NCWordSuggestion]] = GSON.fromJson(js, TYPE_RESP) data.asScala.map(p => if (p.isEmpty) Seq.empty else p.asScala.tail.toSeq).toSeq @@ -90,7 +90,7 @@ object NCSuggestSynonymManager extends NCService { } } - case class Suggestion(word: String, score: Double) + case class RequestData(sentence: String, ex: String, elmId: String, index: Int) case class RestRequestSentence(text: String, indexes: util.List[Int]) case class RestRequest(sentences: util.List[RestRequestSentence], limit: Int, minScore: Double) @@ -111,6 +111,19 @@ object NCSuggestSynonymManager extends NCService { private def toStem(s: String): String = split(s).map(NCNlpPorterStemmer.stem).mkString(" ") private def toStemWord(s: String): String = NCNlpPorterStemmer.stem(s) + @throws[NCE] + private def mkUrl = s"${Config.urlOpt.getOrElse(throw new NCE("Context word server is not configured."))}/suggestions" + + private def request(cli: CloseableHttpClient, post: HttpPost): Seq[Seq[NCWordSuggestion]] = { + val resps: Seq[Seq[NCWordSuggestion]] = + try + cli.execute(post, HANDLER) + finally + post.releaseConnection() + + resps + } + /** * * @param seq1 @@ -131,14 +144,14 @@ object NCSuggestSynonymManager extends NCService { } /** - * + * TODO: refactor async call (waiting should be dropped.) * @param mdlId * @param minScoreOpt * @param parent * @return */ - def suggest(mdlId: String, minScoreOpt: Option[Double], parent: Span = null): Future[NCSuggestSynonymResult] = - startScopedSpan("inspect", parent, "mdlId" -> mdlId) { _ => + def suggestModel(mdlId: String, minScoreOpt: Option[Double], parent: Span = null): Future[NCSuggestSynonymResult] = + startScopedSpan("suggest", parent, "mdlId" -> mdlId) { _ => val now = U.now() val promise = Promise[NCSuggestSynonymResult]() @@ -178,7 +191,7 @@ object NCSuggestSynonymManager extends NCService { if (mdlExs.isEmpty) onError(s"Missed intents samples for: `$mdlId``") else { - val url = s"${Config.urlOpt.getOrElse(throw new NCE("Context word server is not configured."))}/suggestions" + val url = mkUrl val allSamplesCnt = mdlExs.map { case (_, samples) => samples.size }.sum @@ -281,9 +294,9 @@ object NCSuggestSynonymManager extends NCService { if (allReqsCnt == 0) onError(s"Suggestions cannot be generated for model: '$mdlId'") else { - val allSgsts = new ConcurrentHashMap[String, util.List[Suggestion]]() + val allSgsts = new ConcurrentHashMap[String, util.List[NCWordSuggestion]]() val cdl = new CountDownLatch(1) - val debugs = mutable.HashMap.empty[RequestData, Seq[Suggestion]] + val debugs = mutable.HashMap.empty[RequestData, Seq[NCWordSuggestion]] val cnt = new AtomicInteger(0) val cli = HttpClients.createDefault @@ -308,10 +321,7 @@ object NCSuggestSynonymManager extends NCService { ) ) - val resps: Seq[Seq[Suggestion]] = try - cli.execute(post, HANDLER) - finally - post.releaseConnection() + val resps = request(cli, post) require(batch.size == resps.size, s"Batch: ${batch.size}, responses: ${resps.size}") @@ -322,7 +332,7 @@ object NCSuggestSynonymManager extends NCService { logger.debug(s"Executed: $i requests...") allSgsts. - computeIfAbsent(elemId, (_: String) => new CopyOnWriteArrayList[Suggestion]()). + computeIfAbsent(elemId, (_: String) => new CopyOnWriteArrayList[NCWordSuggestion]()). addAll(resps.flatten.asJava) if (i == allReqsCnt) @@ -441,6 +451,86 @@ object NCSuggestSynonymManager extends NCService { } /** + * + * @param sens + * @param minScoreOpt + * @param parent + * @return + */ + def suggestWords(sens: Seq[NCSuggestionElement], minScoreOpt: Option[Double] = None, parent: Span = null): + Future[Map[String, Seq[NCWordSuggestion]]] = + startScopedSpan("suggest", parent) { _ => + val promise = Promise[Map[String, Seq[NCWordSuggestion]]]() + + case class Result(elementId: String, suggestions :Seq[NCWordSuggestion]) + + val data = new CopyOnWriteArrayList[Result]() + val cli = HttpClients.createDefault + val batches = sens.sliding(BATCH_SIZE, BATCH_SIZE).map(_.toSeq).toSeq + val cnt = new AtomicInteger(0) + + for (batch <- batches) + U.asFuture( + _ => { + val post = new HttpPost(mkUrl) + + post.setHeader("Content-Type", "application/json") + post.setEntity( + new StringEntity( + GSON.toJson( + RestRequest( + sentences = batch.map(p => RestRequestSentence(p.sample, p.indexes.asJava)).asJava, + minScore = 0, + limit = MAX_LIMIT + ) + ), + "UTF-8" + ) + ) + + val resps = request(cli, post) + + require(batch.size == resps.size, s"Batch: ${batch.size}, responses: ${resps.size}") + + data.addAll(batch.zip(resps).map { case (req, resp) => Result(req.elementId, resp) }.asJava ) + + if (cnt.incrementAndGet() == batches.size) { + val min = minScoreOpt.getOrElse(DFLT_MIN_SCORE) + + val map = data.asScala.groupBy(_.elementId).map(p => + p._1 -> + p._2. + map(_.suggestions.map(p => (toStem(p.word), p.score))). + map(_.groupBy(_._1)). + flatMap(p => + p.map(p => p._1 -> + p._1 -> { + val scores = p._2.map(_._2) + + scores.sum / scores.size + } + ). + filter(_._2 >= min). + map(p => NCWordSuggestion(p._1._2, p._2)).toSeq + ).toSeq) + + promise.success(map) + } + () + }, + (e: Throwable) => { + U.prettyError(logger, "Unexpected error:", e) + + promise.failure(e) + + }, + (_: Unit) => () + ) + + promise.future + } + + /** * * @param parent Optional parent span. * @return diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestionElement.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestionElement.scala new file mode 100644 index 0000000..3634a5a --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestionElement.scala @@ -0,0 +1,26 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.server.sugsyn + +/** + * + * @param elementId + * @param sample + * @param indexes + */ +case class NCSuggestionElement(elementId: String, sample: String, indexes: Seq[Int]) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCWordSuggestion.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCWordSuggestion.scala new file mode 100644 index 0000000..a09b2ca --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCWordSuggestion.scala @@ -0,0 +1,25 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.server.sugsyn + +/** + * + * @param word + * @param score + */ +case class NCWordSuggestion(word: String, score: Double) \ No newline at end of file diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala new file mode 100644 index 0000000..4eab17d --- /dev/null +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala @@ -0,0 +1,102 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.model.ctxword + +import org.apache.nlpcraft.model.{NCElement, NCIntent, NCIntentSample, NCIntentTerm, NCModel, NCResult, NCToken, NCValue} +import org.apache.nlpcraft.{NCTestContext, NCTestElement, NCTestEnvironment} +import org.junit.jupiter.api.Assertions.{assertFalse, assertTrue} +import org.junit.jupiter.api.Test + +import java.util +import scala.jdk.CollectionConverters.{SeqHasAsJava, SetHasAsJava} + +/** + * Test model. + */ +class NCContextWordSpecModel extends NCModel { + case class Value(name: String, syns: String*) extends NCValue { + override def getName: String = name + override def getSynonyms: util.List[String] = (Seq(name) ++ syns).asJava + } + + case class Elem(id: String, values: NCValue*) extends NCElement { + override def getId: String = id + override def getValues: util.List[NCValue] = values.asJava + override def isContextWordSupport: Boolean = true + } + + override def getId: String = this.getClass.getSimpleName + override def getName: String = this.getClass.getSimpleName + override def getVersion: String = "1.0.0" + + override def getElements: util.Set[NCElement] = + Set( + Elem("class:carBrand", Value("BMW")), + Elem("class:animal", Value("fox"), Value("cat", "tomcat")), + Elem("class:weather", Value("temperature"), Value("rain"), Value("sun")) + ).map(p => { + val e: NCElement = p + + e + }).asJava + + @NCIntentSample( + Array( + "I like drive my new BMW", + "BMW has the best engine", + "Luxury cars like Mercedes and BMW are prime targets", + "BMW will install side air bags up front", + "A wild cat is very dangerous", + "A fox eats hens", + "The fox was already in your chicken house", + "What is the local temperature", + "This is the first day of heavy rain" + ) + ) + @NCIntent( + "intent=classification " + + "term(carBrands)~{tok_id() == 'class:carBrand'}* " + + "term(animals)~{tok_id() == 'class:animal'}* " + + "term(weathers)~{tok_id() == 'class:weather'}* " + ) + def onMatch( + @NCIntentTerm("carBrands") carBrands: List[NCToken], + @NCIntentTerm("animals") animals: List[NCToken], + @NCIntentTerm("weathers") weathers: List[NCToken] + ): NCResult = { + println("carBrands=" + carBrands) + println("animals=" + animals) + println("weathers=" + weathers) + + NCResult.text("ok") + } +} + +/** + * @see NCConversationSpecModel + */ +@NCTestEnvironment(model = classOf[NCContextWordSpecModel], startClient = true) +class NCContextWordSpec extends NCTestContext { + @Test + @throws[Exception] + private[ctxword] def test(): Unit = { + val cli = getClient + + cli.ask("I want have a dog, fox, Mercedes, reno, winter, Porsche") + } +}
