This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-41 by this push:
new 707bfa7 WIP.
707bfa7 is described below
commit 707bfa759ddf5409a0943437eb045c0e3d6f4efb
Author: Sergey Kamov <[email protected]>
AuthorDate: Tue Aug 25 12:47:31 2020 +0300
WIP.
---
.../nlpcraft/server/model/NCEnhanceManager.scala | 73 +++++++++++++++-------
1 file changed, 50 insertions(+), 23 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceManager.scala
index 77495c7..94fe1d0 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/model/NCEnhanceManager.scala
@@ -35,6 +35,7 @@ import org.apache.nlpcraft.common.makro.NCMacroParser
import org.apache.nlpcraft.common.nlp.core.NCNlpPorterStemmer
import org.apache.nlpcraft.common.util.NCUtils
import org.apache.nlpcraft.common.{NCE, NCService}
+import org.apache.nlpcraft.server.mdo.NCProbeModelMdo
import org.apache.nlpcraft.server.model.NCEnhanceType._
import org.apache.nlpcraft.server.probe.NCProbeManager
@@ -42,7 +43,7 @@ import scala.collection.JavaConverters._
import scala.collection._
/**
- * TODO:
+ * TODO: check all texts
*/
object NCEnhanceManager extends NCService {
// 1. SUGGEST_SYNONYMS
@@ -122,6 +123,11 @@ object NCEnhanceManager extends NCService {
private def toStem(s: String): String =
split(s).map(NCNlpPorterStemmer.stem).mkString(" ")
private def toStemWord(s: String): String = NCNlpPorterStemmer.stem(s)
+ /**
+ *
+ * @param seq1
+ * @param seq2
+ */
private def getAllSlices(seq1: Seq[String], seq2: Seq[String]): Seq[Int] =
{
val seq = mutable.Buffer.empty[Int]
@@ -147,13 +153,25 @@ object NCEnhanceManager extends NCService {
/**
*
* @param seq
- * @return
*/
private def norm(seq: Seq[String]): Option[Seq[String]] = if (seq.isEmpty)
None else Some(seq)
/**
- * @param mdlId Model ID.
- * @param parent Parent.
+ *
+ * @param mdl
+ */
+ private def prepareParser(mdl: NCProbeModelMdo): NCMacroParser = {
+ val parser = new NCMacroParser()
+
+ mdl.macros.foreach { case (name, str) ⇒ parser.addMacro(name, str) }
+
+ parser
+ }
+
+ /**
+ *
+ * @param mdlId
+ * @param parent
*/
@throws[NCE]
private def suggestSynonyms(mdlId: String, parent: Span = null): Response =
@@ -171,7 +189,6 @@ object NCEnhanceManager extends NCService {
val warns = mutable.ArrayBuffer.empty[String]
if (allSamplesCnt < SUGGEST_SYNONYMS_MIN_CNT_MODEL)
- // TODO: text
warns +=
s"Model: '$mdlId' has too small intents samples count:
$allSamplesCnt. " +
s"Potentially is can be not enough for suggestions service
high quality work. " +
@@ -185,15 +202,12 @@ object NCEnhanceManager extends NCService {
if (ids.nonEmpty)
warns +=
- // TODO: text
s"Models '$mdlId' has intents: [${ids.mkString(", ")}]
with too small intents samples count." +
s"Potentially it can be not enough for suggestions
service high quality work. " +
s"Try to increase their count at least to
$SUGGEST_SYNONYMS_MIN_CNT_INTENT."
}
- val parser = new NCMacroParser()
-
- mdl.macros.foreach { case (name, str) ⇒ parser.addMacro(name, str)
}
+ val parser = prepareParser(mdl)
// Note that we don't use system tokenizer, because
ContextWordServer doesn't have this tokenizer.
// We just split examples words with spaces. Also we divide
SEPARATORS as separated words.
@@ -397,33 +411,40 @@ object NCEnhanceManager extends NCService {
)
}
+ /**
+ *
+ * @param mdlId
+ * @param parent
+ */
private def validateMacros(mdlId: String, parent: Span = null): Response =
startScopedSpan("validateMacros", parent, "modelId" → mdlId) { _ ⇒
val mdl = NCProbeManager.getModel(mdlId)
val syns = mdl.elementsSynonyms.values.flatten
- // TODO: is it valid?
- val warns = mdl.macros.keys.
- flatMap(m ⇒ if (syns.exists(_.contains(m))) None else
Some(s"Macro is not used: $m")).
- toSeq
-
- Response(warnings = norm(warns))
+ Response(warnings =
+ norm(
+ mdl.macros.keys.
+ // TODO: is it valid check?
+ flatMap(m ⇒ if (syns.exists(_.contains(m))) None else
Some(s"Macro is not used: $m")).
+ toSeq
+ )
+ )
}
-
+ /**
+ *
+ * @param mdlId
+ * @param parent
+ */
private def validateSynonyms(mdlId: String, parent: Span = null): Response
=
startScopedSpan("validateSynonyms", parent, "modelId" → mdlId) { _ ⇒
val warns = mutable.ArrayBuffer.empty[String]
val mdl = NCProbeManager.getModel(mdlId)
- val parser = new NCMacroParser()
-
- mdl.macros.foreach { case (name, str) ⇒ parser.addMacro(name, str)
}
-
+ val parser = prepareParser(mdl)
- val mdlSyns: Map[String, Seq[String]] =
- mdl.elementsSynonyms.map { case (elemId, syns) ⇒ elemId →
syns.flatMap(parser.expand) }
+ val mdlSyns = mdl.elementsSynonyms.map { case (elemId, syns) ⇒
elemId → syns.flatMap(parser.expand) }
mdlSyns.foreach { case (elemId, syns) ⇒
val size = syns.size
@@ -431,7 +452,7 @@ object NCEnhanceManager extends NCService {
if (size == 0)
warns += s"Element: '$elemId' doesn't have synonyms"
else if (size > VALIDATION_SYNONYMS_MANY_SYNS)
- warns += s"Element: '$elemId' have too many synonyms:
$size"
+ warns += s"Element: '$elemId' has too many synonyms: $size"
val others = mdlSyns.filter { case (othId, _) ⇒ othId !=
elemId}
@@ -444,6 +465,12 @@ object NCEnhanceManager extends NCService {
Response(warnings = norm(warns))
}
+ /**
+ *
+ * @param mdlId
+ * @param types
+ * @param parent
+ */
@throws[NCE]
def enhance(mdlId: String, types: Seq[NCEnhanceType], parent: Span =
null): Seq[NCEnhanceResponse] =
startScopedSpan("enhance", parent, "modelId" → mdlId) { _ ⇒