This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-41 by this push:
new 890395b WIP.
890395b is described below
commit 890395bbfbfa34966845bad15fec4fe804518177
Author: Sergey Kamov <[email protected]>
AuthorDate: Sun Aug 16 11:27:11 2020 +0300
WIP.
---
.../server/suggestion/NCSuggestionsManager.scala | 36 +++++++++++++++++-----
1 file changed, 28 insertions(+), 8 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/suggestion/NCSuggestionsManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/suggestion/NCSuggestionsManager.scala
index 0533f4e..24bc2f1 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/suggestion/NCSuggestionsManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/suggestion/NCSuggestionsManager.scala
@@ -44,10 +44,14 @@ import scala.collection._
* TODO:
*/
object NCSuggestionsManager extends NCService {
+ // For context word server requests.
private final val DFLT_LIMIT: Int = 20
- private final val MAX_LIMIT: Int = 1000
+ private final val MAX_LIMIT: Int = 10000
private final val DFLT_MIN_SCORE: Double = 0
- private final val MIN_REQUIRED_CNT = 5
+
+ // For warnings.
+ private final val MIN_CNT_INTENT = 5
+ private final val MIN_CNT_MODEL = 20
private object Config extends NCConfigurable {
val urlOpt: Option[String] =
getStringOpt("nlpcraft.server.ctxword.url")
@@ -133,11 +137,24 @@ object NCSuggestionsManager extends NCService {
require(mdl.intentsSamples != null, "Samples cannot be null")
require(mdl.elementsSynonyms != null, "Element synonyms cannot be
null")
require(mdl.macros != null, "Macros cannot be null")
- require(mdl.intentsSamples.forall(_._2.nonEmpty), "Samples cannot
be empty")
+ require(mdl.intentsSamples.forall { case (_, samples) ⇒
samples.nonEmpty}, "Samples cannot be empty")
- mdl.intentsSamples.
- filter { case (_, samples) ⇒ samples.size < MIN_REQUIRED_CNT }.
- foreach { case (intentId, _) ⇒ logger.warn(s"Intent has not
enough samples: $intentId") }
+ if (mdl.intentsSamples.map { case (_, samples) ⇒ samples.size
}.sum < MIN_CNT_MODEL)
+ logger.warn(
+ s"Model: '$mdl' has too small synonyms count. " +
+ "Try to increase their count to improve synonyms
suggestions quality."
+ )
+ else {
+ val ids =
+ mdl.intentsSamples.
+ filter { case (_, samples) ⇒ samples.size <
MIN_CNT_INTENT }.
+ map { case (intentId, _) ⇒ intentId }
+
+ if (ids.nonEmpty)
+ logger.warn(s"Models '$mdl' has intents:
[${ids.mkString(", ")}] with too small synonyms count." +
+ "Try to increase their count to improve synonyms
suggestions quality."
+ )
+ }
val parser = new NCMacroParser()
@@ -146,7 +163,7 @@ object NCSuggestionsManager extends NCService {
val examples =
mdl.
intentsSamples.
- flatMap(_._2).
+ flatMap { case (_, samples) ⇒ samples }.
map(ex ⇒ SEPARATORS.foldLeft(ex)((s, ch) ⇒
s.replaceAll(s"\\$ch", s" $ch "))).
map(ex ⇒ {
val seq = ex.split(" ")
@@ -221,7 +238,9 @@ object NCSuggestionsManager extends NCService {
RestRequest(
sentences = batch.map(p ⇒
RestRequestSentence(p.sentence, Seq(p.index).asJava)).asJava,
min_score =
minScore.getOrElse(DFLT_MIN_SCORE),
- limit = (if (minScore.isDefined)
MAX_LIMIT else DFLT_LIMIT) + 1
+ // If minScore defined, we set big
limit value and in fact only minimal score
+ // is taken into account. Otherwise -
default value.
+ limit = if (minScore.isDefined)
MAX_LIMIT else DFLT_LIMIT
)
),
"UTF-8"
@@ -271,6 +290,7 @@ object NCSuggestionsManager extends NCService {
elemSuggs.
map(sugg ⇒ (sugg, toStem(sugg.word))).
groupBy { case (_, stem) ⇒ stem }.
+ // Drops already defined.
filter { case (stem, _) ⇒ !allSynsStems.contains(stem)
}.
map { case (_, group) ⇒
val seq = group.map { case (sugg, _) ⇒ sugg
}.sortBy(-_.score)