This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-41
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-41 by this push:
     new 55cba61  WIP.
55cba61 is described below

commit 55cba61d448fff7a7a37cfd8df8a538abfa8519f
Author: Sergey Kamov <[email protected]>
AuthorDate: Sun Aug 16 12:28:07 2020 +0300

    WIP.
---
 .../scala/org/apache/nlpcraft/model/NCElement.java |   4 +-
 .../org/apache/nlpcraft/model/NCModelView.java     |   4 +-
 .../apache/nlpcraft/model/impl/NCTokenImpl.scala   |   3 +-
 .../probe/mgrs/conn/NCConnectionManager.scala      |   3 +
 .../probe/mgrs/deploy/NCDeployManager.scala        |  19 +++
 .../nlpcraft/probe/mgrs/deploy/NCModelHolder.scala |   5 +-
 .../nlpcraft/probe/mgrs/model/NCModelManager.scala |   4 +-
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 148 ++++++++++-----------
 .../nlpcraft/server/rest/NCBasicRestApi.scala      |   2 +-
 .../server/suggestion/NCSuggestionsManager.scala   |  18 ++-
 10 files changed, 118 insertions(+), 92 deletions(-)

diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
index d120941..39de184 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
@@ -149,10 +149,10 @@ public interface NCElement extends NCMetadata, 
Serializable {
      *     ]
      * </pre>
      *
-     * @return Element's metadata or {@code null} if none provided. Default 
implementation return {@code null}.
+     * @return Element's metadata or empty collection if none provided. 
Default implementation return empty collection. TODO:
      */
     default Map<String, Object> getMetadata() {
-        return null;
+        return Collections.emptyMap();
     }
 
     /**
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
index ce3dcdd..a0dcf3c 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
@@ -669,7 +669,7 @@ public interface NCModelView extends NCMetadata {
      * }
      * </pre>
      *
-     * @return Optional user defined model metadata.
+     * @return Optional user defined model metadata. TODO: cannot be null
      */
     default Map<String, Object> getMetadata() {
         return DFLT_METADATA;
@@ -808,7 +808,7 @@ public interface NCModelView extends NCMetadata {
      * }
      * </pre>
      *
-     * @return Custom user parsers for model elements or {@code null} if not 
used (default).
+     * @return Custom user parsers for model elements or {@code null} if not 
used (default). TODO: cannot be null!
      */
     default List<NCCustomParser> getParsers() {
         return Collections.emptyList();
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
index 629d1b7..52d1d8d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
@@ -139,8 +139,7 @@ private[nlpcraft] object NCTokenImpl {
                 // Special synthetic meta data element.
                 md.put("nlpcraft:nlp:freeword", false)
 
-                if (elm.getMetadata != null)
-                    elm.getMetadata.asScala.foreach { case (k, v) ⇒ md.put(k, 
v.asInstanceOf[java.io.Serializable]) }
+                elm.getMetadata.asScala.foreach { case (k, v) ⇒ md.put(k, 
v.asInstanceOf[java.io.Serializable]) }
 
                 new NCTokenImpl(
                     mdl.model,
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
index 838bccb..906c9f2 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/conn/NCConnectionManager.scala
@@ -233,6 +233,9 @@ object NCConnectionManager extends NCService {
                         NCModelManager.getAllModels().map(m ⇒ {
                             val mdl = m.model
 
+                            require(m.intentsSamples != null)
+                            // Model already validated.
+
                             // util.HashSet created to avoid scala collections 
serialization error.
                             // Seems to be a Scala bug.
                             (
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index f6d5b21..8bda420 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -80,6 +80,15 @@ object NCDeployManager extends NCService with 
DecorateAsScala {
       */
     @throws[NCE]
     private def wrap(mdl: NCModel): NCModelHolder = {
+        checkCollection("additionalStopWords", mdl.getAdditionalStopWords)
+        checkCollection("elements", mdl.getElements)
+        checkCollection("enabledBuiltInTokens", mdl.getEnabledBuiltInTokens)
+        checkCollection("excludedStopWords", mdl.getExcludedStopWords)
+        checkCollection("parsers", mdl.getParsers)
+        checkCollection("suspiciousWords", mdl.getSuspiciousWords)
+        checkCollection("macros", mdl.getMacros)
+        checkCollection("metadata", mdl.getMetadata)
+
         // Scan for intent annotations in the model class.
         val intents = NCIntentScanner.scan(mdl)
 
@@ -107,6 +116,16 @@ object NCDeployManager extends NCService with 
DecorateAsScala {
 
     /**
       *
+      * @param name
+      * @param col
+      */
+    @throws[NCE]
+    private def checkCollection(name: String, col: Any): Unit =
+        if (col == null)
+            throw new NCE(s"Collection '$name' can be empty but cannot be 
null")
+
+    /**
+      *
       * @param clsName Factory class name.
       */
     @throws[NCE]
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCModelHolder.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCModelHolder.scala
index 1a94481..4786571 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCModelHolder.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCModelHolder.scala
@@ -24,4 +24,7 @@ import org.apache.nlpcraft.model.NCModel
   * @param model
   * @param intentSamples
   */
-case class NCModelHolder(model: NCModel, intentSamples: Map[String, 
Seq[String]])
+case class NCModelHolder(model: NCModel, intentSamples: Map[String, 
Seq[String]]) {
+    require(model != null)
+    require(intentSamples != null)
+}
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
index 9f67045..56940d5 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
@@ -67,11 +67,9 @@ object NCModelManager extends NCService with DecorateAsScala 
{
         checkModelConfig(h.model)
 
         val parser = new NCMacroParser
-        val macros = h.model.getMacros
 
         // Initialize macro parser.
-        if (macros != null)
-            macros.asScala.foreach(t ⇒ parser.addMacro(t._1, t._2))
+        h.model.getMacros.asScala.foreach(t ⇒ parser.addMacro(t._1, t._2))
 
         models += h.model.getId → verifyAndDecorate(h, parser)
 
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 6845150..9ae77f9 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -297,8 +297,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
       */
     private def alreadyMarked(toks: Seq[NCNlpSentenceToken], elemId: String): 
Boolean = toks.forall(_.isTypeOf(elemId))
 
-    def isComplex(mdl: NCModelDecorator): Boolean =
-        mdl.synonymsDsl.nonEmpty || (mdl.model.getParsers != null && 
!mdl.model.getParsers.isEmpty)
+    def isComplex(mdl: NCModelDecorator): Boolean = mdl.synonymsDsl.nonEmpty 
|| !mdl.model.getParsers.isEmpty
 
     @throws[NCE]
     override def enrich(mdl: NCModelDecorator, ns: NCNlpSentence, senMeta: 
Map[String, Serializable], parent: Span = null): Unit =
@@ -432,81 +431,80 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
 
             val parsers = mdl.model.getParsers
 
-            if (parsers != null)
-                for (parser ← parsers.asScala) {
-                    parser.onInit()
-                    
-                    startScopedSpan("customParser", span,
-                        "srvReqId" → ns.srvReqId,
-                        "modelId" → mdl.model.getId,
-                        "txt" → ns.text) { _ ⇒
-                        def to(t: NCNlpSentenceToken): NCCustomWord =
-                            new NCCustomWord {
-                                override def getNormalizedText: String = 
t.normText
-                                override def getOriginalText: String = 
t.origText
-                                override def getStartCharIndex: Int = 
t.startCharIndex
-                                override def getEndCharIndex: Int = 
t.endCharIndex
-                                override def getPos: String = t.pos
-                                override def getPosDescription: String = 
t.posDesc
-                                override def getLemma: String = t.lemma
-                                override def getStem: String = t.stem
-                                override def isStopWord: Boolean = t.isStopWord
-                                override def isBracketed: Boolean = 
t.isBracketed
-                                override def isQuoted: Boolean = t.isQuoted
-                                override def isKnownWord: Boolean = 
t.isKnownWord
-                                override def isSwearWord: Boolean = 
t.isSwearWord
-                                override def isEnglish: Boolean = t.isEnglish
+            for (parser ← parsers.asScala) {
+                parser.onInit()
+
+                startScopedSpan("customParser", span,
+                    "srvReqId" → ns.srvReqId,
+                    "modelId" → mdl.model.getId,
+                    "txt" → ns.text) { _ ⇒
+                    def to(t: NCNlpSentenceToken): NCCustomWord =
+                        new NCCustomWord {
+                            override def getNormalizedText: String = t.normText
+                            override def getOriginalText: String = t.origText
+                            override def getStartCharIndex: Int = 
t.startCharIndex
+                            override def getEndCharIndex: Int = t.endCharIndex
+                            override def getPos: String = t.pos
+                            override def getPosDescription: String = t.posDesc
+                            override def getLemma: String = t.lemma
+                            override def getStem: String = t.stem
+                            override def isStopWord: Boolean = t.isStopWord
+                            override def isBracketed: Boolean = t.isBracketed
+                            override def isQuoted: Boolean = t.isQuoted
+                            override def isKnownWord: Boolean = t.isKnownWord
+                            override def isSwearWord: Boolean = t.isSwearWord
+                            override def isEnglish: Boolean = t.isEnglish
+                        }
+
+                    val res = parser.parse(
+                        NCRequestImpl(senMeta, ns.srvReqId),
+                        mdl.model,
+                        ns.map(to).asJava,
+                        ns.flatten.distinct.filter(!_.isNlp).map(n ⇒ {
+                            val noteId = n.noteType
+                            val words = ns.filter(t ⇒ t.index >= n.tokenFrom 
&& t.index <= n.tokenTo).map(to).asJava
+                            val md = n.asMetadata()
+
+                            new NCCustomElement() {
+                                override def getElementId: String = noteId
+                                override def getWords: util.List[NCCustomWord] 
= words
+                                override def getMetadata: util.Map[String, 
AnyRef] =
+                                    md.map(p ⇒ p._1 → 
p._2.asInstanceOf[AnyRef]).asJava
                             }
-        
-                        val res = parser.parse(
-                            NCRequestImpl(senMeta, ns.srvReqId),
-                            mdl.model,
-                            ns.map(to).asJava,
-                            ns.flatten.distinct.filter(!_.isNlp).map(n ⇒ {
-                                val noteId = n.noteType
-                                val words = ns.filter(t ⇒ t.index >= 
n.tokenFrom && t.index <= n.tokenTo).map(to).asJava
-                                val md = n.asMetadata()
-                
-                                new NCCustomElement() {
-                                    override def getElementId: String = noteId
-                                    override def getWords: 
util.List[NCCustomWord] = words
-                                    override def getMetadata: util.Map[String, 
AnyRef] =
-                                        md.map(p ⇒ p._1 → 
p._2.asInstanceOf[AnyRef]).asJava
-                                }
-                            }).asJava
-                        )
-        
-                        if (res != null)
-                            res.asScala.foreach(e ⇒ {
-                                val elemId = e.getElementId
-                                val words = e.getWords
-                
-                                if (elemId == null)
-                                    throw new NCE(s"Custom model parser cannot 
return 'null' element ID.")
-                
-                                if (words == null || words.isEmpty)
-                                    throw new NCE(s"Custom model parser cannot 
return empty custom tokens [elementId=$elemId]")
-                
-                                val matchedToks = words.asScala.map(w ⇒
-                                    ns.find(t ⇒
-                                        t.startCharIndex == 
w.getStartCharIndex && t.endCharIndex == w.getEndCharIndex
-                                    ).getOrElse(throw new 
AssertionError(s"Custom model parser returned an invalid custom token: $w"))
+                        }).asJava
+                    )
+
+                    if (res != null)
+                        res.asScala.foreach(e ⇒ {
+                            val elemId = e.getElementId
+                            val words = e.getWords
+
+                            if (elemId == null)
+                                throw new NCE(s"Custom model parser cannot 
return 'null' element ID.")
+
+                            if (words == null || words.isEmpty)
+                                throw new NCE(s"Custom model parser cannot 
return empty custom tokens [elementId=$elemId]")
+
+                            val matchedToks = words.asScala.map(w ⇒
+                                ns.find(t ⇒
+                                    t.startCharIndex == w.getStartCharIndex && 
t.endCharIndex == w.getEndCharIndex
+                                ).getOrElse(throw new AssertionError(s"Custom 
model parser returned an invalid custom token: $w"))
+                            )
+
+                            if (!alreadyMarked(matchedToks, elemId))
+                                mark(
+                                    ns,
+                                    elem = mdl.elements.getOrElse(elemId, 
throw new NCE(s"Custom model parser returned unknown element ID: $elemId")),
+                                    toks = matchedToks,
+                                    direct = true,
+                                    syn = None,
+                                    metaOpt = Some(e.getMetadata.asScala),
+                                    parts = Seq.empty
                                 )
-                
-                                if (!alreadyMarked(matchedToks, elemId))
-                                    mark(
-                                        ns,
-                                        elem = mdl.elements.getOrElse(elemId, 
throw new NCE(s"Custom model parser returned unknown element ID: $elemId")),
-                                        toks = matchedToks,
-                                        direct = true,
-                                        syn = None,
-                                        metaOpt = Some(e.getMetadata.asScala),
-                                        parts = Seq.empty
-                                    )
-                            })
-                    }
-                    
-                    parser.onDiscard()
+                        })
                 }
+
+                parser.onDiscard()
+            }
         }
 }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala
index 41c5d58..0c6a34c 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/rest/NCBasicRestApi.scala
@@ -661,7 +661,7 @@ class NCBasicRestApi extends NCRestApi with LazyLogging 
with NCOpenCensusTrace w
                 val admin = authenticateAsAdmin(req.acsTok)
 
                 if (!NCProbeManager.getAllProbes(admin.companyId, 
span).exists(_.models.exists(_.id == req.mdlId)))
-                    throw new NCE(s"Probe not found: ${req.mdlId}")
+                    throw new NCE(s"Probe not found for model: ${req.mdlId}")
 
                 val res: Map[String, Seq[Suggestion]] =
                     NCSuggestionsManager.suggest(req.mdlId, req.minScore, 
span).
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/suggestion/NCSuggestionsManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/suggestion/NCSuggestionsManager.scala
index 24bc2f1..b966937 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/suggestion/NCSuggestionsManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/suggestion/NCSuggestionsManager.scala
@@ -139,20 +139,26 @@ object NCSuggestionsManager extends NCService {
             require(mdl.macros != null, "Macros cannot be null")
             require(mdl.intentsSamples.forall { case (_, samples) ⇒ 
samples.nonEmpty}, "Samples cannot be empty")
 
-            if (mdl.intentsSamples.map { case (_, samples) ⇒ samples.size 
}.sum < MIN_CNT_MODEL)
+            val allSamplesCnt = mdl.intentsSamples.map { case (_, samples) ⇒ 
samples.size }.sum
+
+            if (allSamplesCnt < MIN_CNT_MODEL) {
+                // TODO: text
                 logger.warn(
-                    s"Model: '$mdl' has too small synonyms count. " +
-                    "Try to increase their count to improve synonyms 
suggestions quality."
+                    s"Model: '$mdlId' has too small intents samples count: 
$allSamplesCnt. " +
+                    s"Potentially is can be not enough for suggestions service 
high quality work. " +
+                    s"Try to increase their count at least to $MIN_CNT_MODEL."
                 )
-            else {
+            } else {
                 val ids =
                     mdl.intentsSamples.
                         filter { case (_, samples) ⇒ samples.size < 
MIN_CNT_INTENT }.
                         map { case (intentId, _) ⇒ intentId }
 
                 if (ids.nonEmpty)
-                    logger.warn(s"Models '$mdl' has intents: 
[${ids.mkString(", ")}] with too small synonyms count." +
-                        "Try to increase their count to improve synonyms 
suggestions quality."
+                    // TODO: text
+                    logger.warn(s"Models '$mdlId' has intents: 
[${ids.mkString(", ")}] with too small intents samples count." +
+                        s"Potentially is can be not enough for suggestions 
service high quality work. " +
+                        s"Try to increase their count at least to 
$MIN_CNT_INTENT."
                     )
             }
 

Reply via email to