This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-206
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-206 by this push:
     new 11d05e8  WIP.
11d05e8 is described below

commit 11d05e80933a458a7e468830fafba1e6171d414c
Author: Aaron Radzinski <[email protected]>
AuthorDate: Fri Mar 5 17:55:27 2021 -0800

    WIP.
---
 .../apache/nlpcraft/model/NCModelFileAdapter.java  |   4 +-
 .../model/intent/impl/NCIntentDslCompiler.scala    |  20 +--
 .../apache/nlpcraft/probe/mgrs/NCProbeModel.scala  |   2 +-
 .../probe/mgrs/deploy/NCDeployManager.scala        | 148 +++++++++------------
 4 files changed, 80 insertions(+), 94 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
index b9ad726..ac32ef8 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
@@ -370,9 +370,9 @@ abstract public class NCModelFileAdapter extends 
NCModelAdapter {
     }
 
     /**
-     * Gets list of intents declared in JSON/YML model definition, if any.
+     * Gets list of intents and/or fragments declared in JSON/YML model 
definition, if any.
      *
-     * @return List of intents declared in JSON/YML model definition, 
potentially empty.
+     * @return List of intents and/or fragments declared in JSON/YML model 
definition, potentially empty.
      */
     public Set<String> getIntents() {
         return intents;
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/impl/NCIntentDslCompiler.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/impl/NCIntentDslCompiler.scala
index 9aae4c8..8d4dd62 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/impl/NCIntentDslCompiler.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/impl/NCIntentDslCompiler.scala
@@ -470,16 +470,18 @@ object NCIntentDslCompiler extends LazyLogging {
     ): Set[NCDslIntent] = antlr4(U.readFile(filePath.toFile).mkString("\n"), 
mdlId, filePath.getFileName.toString)
     
     /**
-      * Compiles inline (supplied) fragments and/or intents. Note that 
fragments are accumulated in a static
-      * map keyed by model ID. Only intents are returned, if any.
-      *
-      * @param dsl DSL to compile.
-      * @param mdlId ID of the model DSL belongs to.
-      * @return
-      */
+     * Compiles inline (supplied) fragments and/or intents. Note that 
fragments are accumulated in a static
+     * map keyed by model ID. Only intents are returned, if any.
+     *
+     * @param dsl DSL to compile.
+     * @param mdlId ID of the model DSL belongs to.
+     * @param srcName Optional source name.
+     * @return
+     */
     @throws[NCE]
     def compile(
         dsl: String,
-        mdlId: String
-    ): Set[NCDslIntent] = antlr4(dsl, mdlId, "<inline>")
+        mdlId: String,
+        srcName: String = "<inline>"
+    ): Set[NCDslIntent] = antlr4(dsl, mdlId, srcName)
 }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
index acc2021..f52d8d2 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
@@ -45,5 +45,5 @@ case class NCProbeModel(
     exclStopWordsStems: Set[String],
     suspWordsStems: Set[String],
     elements: Map[String /*Element ID*/ , NCElement],
-    samples: Map[String, Seq[Seq[String]]]
+    samples: Set[(String, Seq[Seq[String]])]
 )
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 4f281d4..e926439 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -43,7 +43,7 @@ import resource.managed
 import scala.collection.JavaConverters._
 import scala.compat.java8.OptionConverters._
 import scala.collection.convert.DecorateAsScala
-import scala.collection.{Map, Seq, Set, mutable}
+import scala.collection.mutable
 import scala.collection.mutable.{ArrayBuffer, ListBuffer}
 import scala.util.control.Exception._
 
@@ -79,6 +79,7 @@ object NCDeployManager extends NCService with DecorateAsScala 
{
     
     type Callback = Function[NCIntentMatch, NCResult]
     type Intent = (NCDslIntent, Callback)
+    type Sample = (String/* Intent ID */, Seq[Seq[String]] /* List of list of 
input samples for that intent. */)
     
     private final val SEPARATORS = Seq('?', ',', '.', '-', '!')
 
@@ -483,7 +484,7 @@ object NCDeployManager extends NCService with 
DecorateAsScala {
 
         if (intents.nonEmpty) {
             // Check the uniqueness of intent IDs.
-            U.getDups(intents.keys.toSeq.map(_.id)) match {
+            U.getDups(intents.map(_._1).toSeq.map(_.id)) match {
                 case ids if ids.nonEmpty ⇒
                     throw new NCE(s"Duplicate intent IDs found [" +
                         s"mdlId=$mdlId, " +
@@ -502,12 +503,12 @@ object NCDeployManager extends NCService with 
DecorateAsScala {
         NCProbeModel(
             model = mdl,
             solver = solver,
-            intents = intents.keySet.toSeq,
+            intents = intents.map(_._1).toSeq,
             synonyms = mkFastAccessMap(filter(syns, dsl = false), 
NCProbeSynonymsWrapper(_)),
             synonymsDsl = mkFastAccessMap(filter(syns, dsl = true), 
_.sorted.reverse),
-            addStopWordsStems = addStopWords.toSet,
-            exclStopWordsStems = exclStopWords.toSet,
-            suspWordsStems = suspWords.toSet,
+            addStopWordsStems = addStopWords,
+            exclStopWordsStems = exclStopWords,
+            suspWordsStems = suspWords,
             elements = mdl.getElements.asScala.map(elm ⇒ (elm.getId, 
elm)).toMap,
             samples = scanSamples(mdl)
         )
@@ -1152,7 +1153,7 @@ object NCDeployManager extends NCService with 
DecorateAsScala {
                 s"callback=${method2Str(mtd)}" +
             s"]")
 
-        val terms = intent.terms.toSeq
+        val terms = intent.terms
 
         // Checks correctness of term IDs.
         // Note we don't restrict them to be duplicated.
@@ -1469,8 +1470,10 @@ object NCDeployManager extends NCService with 
DecorateAsScala {
         val intents = mutable.Buffer.empty[Intent]
         
         for (m ← getAllMethods(mdl)) {
+            val mStr = method2Str(m)
+
             // Process inline intent declarations by @NCIntent annotation.
-            for (ann ← m.getAnnotationsByType(CLS_INTENT); intent ← 
NCIntentDslCompiler.compile(ann.value(), mdl.getId))
+            for (ann ← m.getAnnotationsByType(CLS_INTENT); intent ← 
NCIntentDslCompiler.compile(ann.value(), mdl.getId, mStr))
                 intents += (intent → prepareCallback(m, mdl, intent))
     
             // Process intent references from @NCIntentRef annotation.
@@ -1482,13 +1485,14 @@ object NCDeployManager extends NCService with 
DecorateAsScala {
                         val compiledIntents = adapter
                             .getIntents
                             .asScala
-                            .flatMap(NCIntentDslCompiler.compile(_, mdl.getId))
+                            .flatMap(NCIntentDslCompiler.compile(_, mdl.getId, 
mStr))
             
                         U.getDups(compiledIntents.toSeq.map(_.id)) match {
                             case ids if ids.nonEmpty ⇒
                                 throw new NCE(s"Duplicate intent IDs found [" +
                                     s"mdlId=$mdlId, " +
                                     s"origin=${adapter.getOrigin}, " +
+                                    s"callback=$mStr, " +
                                     s"ids=${ids.mkString(",")}" +
                                 s"]")
                 
@@ -1502,14 +1506,14 @@ object NCDeployManager extends NCService with 
DecorateAsScala {
                                     s"@IntentRef($refId) references unknown 
intent ID [" +
                                         s"mdlId=$mdlId, " +
                                         s"refId=$refId, " +
-                                        s"callback=${method2Str(m)}" +
+                                        s"callback=$mStr" +
                                     s"]")
                         }
         
                     case _ ⇒
                         throw new NCE(s"@IntentRef annotation can only be used 
for models extending 'NCModelFileAdapter' class [" +
                             s"mdlId=$mdlId, " +
-                            s"callback=${method2Str(m)}" +
+                            s"callback=$mStr" +
                         s"]")
                 }
             }
@@ -1524,95 +1528,75 @@ object NCDeployManager extends NCService with 
DecorateAsScala {
       * @param mdl Model to scan.
       */
     @throws[NCE]
-    private def scanSamples(mdl: NCModel): Map[String, Seq[Seq[String]]] = {
-        var annFound = false
+    private def scanSamples(mdl: NCModel): Set[Sample] = {
         val mdlId = mdl.getId
 
-        val samples = getAllMethods(mdl).flatMap(mtd ⇒ {
-            def mkMethodName: String = 
s"$C${mtd.getDeclaringClass.getName}#${mtd.getName}(...)$RST"
-
-            val smpAnns = mtd.getAnnotationsByType(CLS_SAMPLE)
-            val intAnn = mtd.getAnnotation(CLS_INTENT)
-            val refAnn = mtd.getAnnotation(CLS_INTENT_REF)
+        val samples = mutable.Buffer.empty[Sample]
 
-            if (smpAnns.nonEmpty || intAnn != null || refAnn != null) {
-                annFound = true
-
-                def mkIntentId(): String =
-                    if (intAnn != null)
-                        NCIntentDslCompiler.compile(intAnn.value(), mdlId).id
-                    else if (refAnn != null)
-                        refAnn.value().trim
-                    else
-                        throw new AssertionError()
+        for (m ← getAllMethods(mdl)) {
+            val mStr = method2Str(m)
 
-                if (smpAnns.nonEmpty) {
-                    if (intAnn == null && refAnn == null) {
-                        logger.warn(s"`@${CLS_SAMPLE.getSimpleName} annotation 
without corresponding @NCIntent or @NCIntentRef annotations: $mkMethodName")
+            val smpAnns = m.getAnnotationsByType(CLS_SAMPLE)
+            val intAnns = m.getAnnotationsByType(CLS_INTENT)
+            val refAnns = m.getAnnotationsByType(CLS_INTENT_REF)
 
-                        None
-                    }
-                    else {
-                        val samples = smpAnns.toSeq.map(_.value().toSeq)
+            if (smpAnns.nonEmpty) {
+                if (intAnns.isEmpty && refAnns.isEmpty)
+                    throw new NCE(s"@IntentSample annotation without 
corresponding @NCIntent or @NCIntentRef annotations: $mStr")
+                else {
+                    val seqSeq = smpAnns.toSeq.map(_.value().toSeq)
 
-                        if (samples.exists(_.isEmpty)) {
-                            logger.warn(s"@${CLS_SAMPLE.getSimpleName} 
annotation is empty: $mkMethodName")
+                    if (seqSeq.exists(_.isEmpty))
+                        logger.warn(s"@IntentSample annotation is empty: 
$mStr")
+                    if (U.containsDups(seqSeq.flatten.toList))
+                        logger.warn(s"@IntentSample annotation has duplicates: 
$mStr")
 
-                            None
-                        }
-                        else if (U.containsDups(samples.flatten.toList)) {
-                            logger.warn(s"@${CLS_SAMPLE.getSimpleName} 
annotation has duplicates: $mkMethodName")
+                    val distinct = seqSeq.map(_.distinct).distinct
 
-                            // Samples is list of list. Duplicates cannot be 
inside one list,
-                            // but possible between different lists.
-                            Some(mkIntentId() → 
samples.map(_.distinct).distinct)
-                        }
-                        else
-                            Some(mkIntentId() → samples)
-                    }
-                }
-                else {
-                    logger.warn(s"@${CLS_SAMPLE.getSimpleName} annotation is 
missing for: $mkMethodName")
-
-                    None
+                    for (ann ← intAnns)
+                        samples += (NCIntentDslCompiler.compile(ann.value(), 
mdlId, mStr) → distinct)
+                    for (ann ← refAnns)
+                        samples += (ann.value() → distinct)
                 }
             }
-            else
-                None
-        }).toMap
+            else if (intAnns.nonEmpty || refAnns.nonEmpty)
+                logger.warn(s"@IntentSample annotation is missing for: $mStr")
+        }
 
-        val parser = new NCMacroParser
+        if (samples.nonEmpty) {
+            val parser = new NCMacroParser
 
-        mdl.getMacros.asScala.foreach { case (name, str) ⇒ 
parser.addMacro(name, str) }
+            mdl.getMacros.asScala.foreach { case (name, str) ⇒ 
parser.addMacro(name, str) }
 
-        val allSyns: Set[Seq[String]] =
-            mdl.getElements.
-                asScala.
-                flatMap(_.getSynonyms.asScala.flatMap(parser.expand)).
-                map(NCNlpPorterStemmer.stem).map(_.split(" ").toSeq).
-                toSet
+            val allSyns: Set[Seq[String]] =
+                mdl.getElements.
+                    asScala.
+                    flatMap(_.getSynonyms.asScala.flatMap(parser.expand)).
+                    map(NCNlpPorterStemmer.stem).map(_.split(" ").toSeq).
+                    toSet
 
-        case class Case(modelId: String, sample: String)
+            case class Case(modelId: String, sample: String)
 
-        val processed = mutable.HashSet.empty[Case]
+            val processed = mutable.HashSet.empty[Case]
 
-        samples.
-            flatMap { case (_, samples) ⇒ samples.flatten.map(_.toLowerCase) }.
-            map(s ⇒ s → SEPARATORS.foldLeft(s)((s, ch) ⇒ 
s.replaceAll(s"\\$ch", s" $ch "))).
-            foreach {
-                case (s, sNorm) ⇒
-                    if (processed.add(Case(mdlId, s))) {
-                        val seq: Seq[String] = sNorm.split(" 
").map(NCNlpPorterStemmer.stem)
+            samples.
+                flatMap { case (_, samples) ⇒ 
samples.flatten.map(_.toLowerCase) }.
+                map(s ⇒ s → SEPARATORS.foldLeft(s)((s, ch) ⇒ 
s.replaceAll(s"\\$ch", s" $ch "))).
+                foreach {
+                    case (s, sNorm) ⇒
+                        if (processed.add(Case(mdlId, s))) {
+                            val seq: Seq[String] = sNorm.split(" 
").map(NCNlpPorterStemmer.stem)
 
-                        if (!allSyns.exists(_.intersect(seq).nonEmpty))
-                            logger.warn(s"@IntentSample sample doesn't contain 
any direct synonyms [" +
-                                s"mdlId=$mdlId, " +
-                                s"sample='$s'" +
-                            s"]")
-                    }
+                            if (!allSyns.exists(_.intersect(seq).nonEmpty))
+                                logger.warn(s"@IntentSample sample doesn't 
contain any direct synonyms [" +
+                                    s"mdlId=$mdlId, " +
+                                    s"sample='$s'" +
+                                s"]")
+                        }
 
-            }
+                }
+        }
 
-        samples
+        samples.toSet
     }
 }

Reply via email to