This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-50-1 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 3da6911ff7b2dc1c7002a14934320586fa418611 Author: Sergey Kamov <[email protected]> AuthorDate: Thu Sep 30 15:11:36 2021 +0300 Functions enricher. --- .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 2 + .../nlpcraft/common/nlp/NCNlpSentenceNote.scala | 1 + .../org/apache/nlpcraft/model/NCModelView.java | 6 +- .../apache/nlpcraft/model/impl/NCTokenLogger.scala | 16 ++- .../model/intent/solver/NCIntentSolver.scala | 15 +++ .../nlpcraft/model/tools/cmdline/NCCliBase.scala | 2 +- .../org/apache/nlpcraft/probe/NCProbeBoot.scala | 2 + .../nlpcraft/probe/mgrs/NCProbeVariants.scala | 4 +- .../nlpcraft/probe/mgrs/NCTokenPartKey.scala | 5 + .../probe/mgrs/deploy/NCDeployManager.scala | 4 +- .../probe/mgrs/nlp/NCProbeEnrichmentManager.scala | 3 + .../enrichers/function/NCFunctionEnricher.scala | 114 +++++++++++++++++++++ .../probe/mgrs/sentence/NCSentenceManager.scala | 6 +- .../mgrs/nlp/enrichers/NCEnrichersTestBeans.scala | 28 +++++ .../function/NCEnricherFunctionSpec.scala | 72 +++++++++++++ 15 files changed, 271 insertions(+), 9 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala index 9d9f4e3..cb3f09a 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala @@ -186,6 +186,8 @@ class NCNlpSentence( tokensEqualOrSimilar(getList(n1, "indexes"), getList(n2, "indexes")) case "nlpcraft:reference" => tokensEqualOrSimilar(getList(n1, "indexes"), getList(n2, "indexes")) + case "nlpcraft:function" => + tokensEqualOrSimilar(getList(n1, "indexes"), getList(n2, "indexes")) case _ => true } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala index fbf4f01..9d36817 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala @@ -280,6 +280,7 @@ object NCNlpSentenceNote { case "nlpcraft:relation" => Seq("type", "note") ++ addRefs("indexes") case "nlpcraft:sort" => Seq("asc", "subjnotes", "bynotes") ++ addRefs("subjindexes", "byindexes") case "nlpcraft:limit" => Seq("limit", "note") ++ addRefs("indexes", "asc") // Asc flag has sense only with references for limit. + case "nlpcraft:function" => Seq("type", "note") ++ addRefs("indexes") case "nlpcraft:coordinate" => Seq("latitude", "longitude") case "nlpcraft:num" => Seq("from", "to", "unit", "unitType") case x if x.startsWith("google:") => Seq("meta", "mentionsBeginOffsets", "mentionsContents", "mentionsTypes") diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java index 157a3e2..ac96b6a 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java @@ -296,6 +296,7 @@ public interface NCModelView extends NCMetadata { * <li><code>nlpcraft:relation</code></li> * <li><code>nlpcraft:sort</code></li> * <li><code>nlpcraft:limit</code></li> + * <li><code>nlpcraft:function</code></li> * </ul> */ Set<String> DFLT_ENABLED_BUILTIN_TOKENS = @@ -312,7 +313,8 @@ public interface NCModelView extends NCMetadata { "nlpcraft:coordinate", "nlpcraft:relation", "nlpcraft:sort", - "nlpcraft:limit" + "nlpcraft:limit", + "nlpcraft:function" ) ); @@ -1046,6 +1048,7 @@ public interface NCModelView extends NCMetadata { * <li><code>nlpcraft:relation</code></li> * <li><code>nlpcraft:sort</code></li> * <li><code>nlpcraft:limit</code></li> + * <li><code>nlpcraft:function</code></li> // TODO: * </ul> * Note that this method can return an empty list if the data model doesn't need any built-in tokens * for its logic. See {@link NCToken} for the list of all supported built-in tokens. @@ -1216,6 +1219,7 @@ public interface NCModelView extends NCMetadata { * <li><code>nlpcraft:limit</code></li> * <li><code>nlpcraft:sort</code></li> * <li><code>nlpcraft:relation</code></li> + * <li><code>nlpcraft:function</code></li> // TODO: * </ul> * Note that entity cannot be restricted to itself (entity ID cannot appear as key as well as a * part of the value's set). diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala index b3005ce..72105ca 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala @@ -53,6 +53,7 @@ object NCTokenLogger extends LazyLogging { "nlpcraft:relation", "nlpcraft:sort", "nlpcraft:limit", + "nlpcraft:function", "nlpcraft:coordinate" ) @@ -85,7 +86,8 @@ object NCTokenLogger extends LazyLogging { "nlpcraft:date" -> Seq("from", "to", "periods"), "nlpcraft:relation" -> Seq("type", "indexes", "note"), "nlpcraft:sort" -> Seq("asc", "subjnotes", "subjindexes", "bynotes", "byindexes"), - "nlpcraft:limit" -> Seq("limit", "indexes", "asc", "note") + "nlpcraft:limit" -> Seq("limit", "indexes", "asc", "note"), + "nlpcraft:function" -> Seq("type", "indexes", "note") ).map(p => p._1 -> p._2.zipWithIndex.map(p => p._1 -> p._2).toMap) private def format(l: Long): String = new SimpleDateFormat("yyyy/MM/dd").format(new java.util.Date(l)) @@ -244,6 +246,12 @@ object NCTokenLogger extends LazyLogging { s + case "nlpcraft:function" => + val t = mkString("type") + val note = mkString("note") + + s"type=$t, indexes=[${mkIndexes("indexes")}], note=$note" + case "nlpcraft:coordinate" => s"${getValue("latitude")} and ${getValue("longitude")}" case "nlpcraft:num" => @@ -522,6 +530,12 @@ object NCTokenLogger extends LazyLogging { s + case "nlpcraft:function" => + val t = mkString("type") + val note = mkString("note") + + s"type=$t, indexes=[${getIndexes("indexes")}], note=$note" + case "nlpcraft:num" => def mkValue(name: String, fractionalField: String): String = { val d: Double = get(name) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolver.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolver.scala index 573ac4c..421261a 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolver.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/intent/solver/NCIntentSolver.scala @@ -298,6 +298,21 @@ class NCIntentSolver(intents: List[(NCIdlIntent/*Intent*/, NCIntentMatch => NCRe convTokToFix.getMetadata.put(s"nlpcraft:limit:indexes", Collections.singletonList(newRef.getIndex)) } + case "nlpcraft:function" => + val refId = convTokToFix.meta[String]("nlpcraft:function:note") + val refIdxs = convTokToFix.meta[JList[Int]]("nlpcraft:function:indexes").asScala + + require(refIdxs.size == 1) + + val refIdx = refIdxs.head + + if (!vrntNotConvToks.exists(isReference(_, refId, refIdx))) { + val newRef = getNewReferences(refId, Seq(refIdx), _.size == 1).head + + convTokToFix.getMetadata.put(s"nlpcraft:function:note", newRef.getId) + convTokToFix.getMetadata.put(s"nlpcraft:function:indexes", Collections.singletonList(newRef.getIndex)) + } + case "nlpcraft:relation" => val refId = convTokToFix.meta[String]("nlpcraft:relation:note") val refIdxs = convTokToFix.meta[JList[Int]]("nlpcraft:relation:indexes").asScala.sorted diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliBase.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliBase.scala index 381a663..378a27f 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliBase.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliBase.scala @@ -65,7 +65,7 @@ class NCCliBase extends App { // | MAKE SURE TO UPDATE THIS VAR WHEN NUMBER OF SERVICES IS CHANGED. | // +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^+ final val NUM_SRV_SERVICES = 31 /*services*/ + 1 /*progress start*/ - final val NUM_PRB_SERVICES = 24 /*services*/ + 1 /*progress start*/ + final val NUM_PRB_SERVICES = 25 /*services*/ + 1 /*progress start*/ // +^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^^+ // | MAKE SURE TO UPDATE THIS VAR WHEN NUMBER OF SERVICES IS CHANGED. | // +==================================================================+ diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala index 561860f..dd811fc 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala @@ -42,6 +42,7 @@ import org.apache.nlpcraft.probe.mgrs.lifecycle.NCLifecycleManager import org.apache.nlpcraft.probe.mgrs.model.NCModelManager import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnrichmentManager import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.dictionary.NCDictionaryEnricher +import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.function.NCFunctionEnricher import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.limit.NCLimitEnricher import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model.NCModelEnricher import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.relation.NCRelationEnricher @@ -518,6 +519,7 @@ private [probe] object NCProbeBoot extends LazyLogging with NCOpenCensusTrace { startedMgrs += NCStopWordEnricher.start(span) startedMgrs += NCModelEnricher.start(span) startedMgrs += NCLimitEnricher.start(span) + startedMgrs += NCFunctionEnricher.start(span) startedMgrs += NCSortEnricher.start(span) startedMgrs += NCRelationEnricher.start(span) startedMgrs += NCSuspiciousNounsEnricher.start(span) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala index 2b91128..ddcaa19 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeVariants.scala @@ -52,7 +52,7 @@ object NCProbeVariants { ) t.getId match { - case "nlpcraft:relation" | "nlpcraft:limit" => meta += "nlpcraft:relation:indexes" -> IDXS + case "nlpcraft:relation" | "nlpcraft:limit" | "nlpcraft:function" => meta += s"${t.getId}:indexes" -> IDXS case "nlpcraft:sort" => meta += "nlpcraft:sort:subjindexes" -> IDXS2; meta += "nlpcraft:sort:byindexes" -> IDXS2 case _ => // No-op. } @@ -95,7 +95,7 @@ object NCProbeVariants { val ps = mkNlpNoteParams() delNote.noteType match { - case "nlpcraft:relation" | "nlpcraft:limit" => ps += "indexes" -> IDXS + case "nlpcraft:relation" | "nlpcraft:limit" | "nlpcraft:function" => ps += "indexes" -> IDXS case "nlpcraft:sort" => ps += "subjindexes" -> IDXS2; ps += "byindexes" -> IDXS2 case _ => // No-op. } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCTokenPartKey.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCTokenPartKey.scala index c89cae1..6fb9ca7 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCTokenPartKey.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCTokenPartKey.scala @@ -56,6 +56,11 @@ object NCTokenPartKey { "limit" -> part.meta[Double](s"$id:limit"), "note" -> part.meta[String](s"$id:note") ) + case "nlpcraft:function" => + Map( + "type" -> part.meta[String](s"$id:type"), + "note" -> part.meta[String](s"$id:note") + ) case "nlpcraft:sort" => val m = mutable.HashMap.empty[String, Any] diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala index 332dd26..1be7644 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala @@ -1060,11 +1060,11 @@ object NCDeployManager extends NCService { mdl.getElements.asScala.foreach(e => checkMandatoryString(e.getId,"element.id", MODEL_ELEMENT_ID_MAXLEN)) for ((elm, restrs: util.Set[String]) <- mdl.getRestrictedCombinations.asScala) { - if (elm != "nlpcraft:limit" && elm != "nlpcraft:sort" && elm != "nlpcraft:relation") + if (elm != "nlpcraft:limit" && elm != "nlpcraft:sort" && elm != "nlpcraft:relation" && elm != "nlpcraft:function") throw new NCE(s"Unsupported restricting element [" + s"mdlId=$mdlId, " + s"elmId=$elm" + - s"]. Only 'nlpcraft:limit', 'nlpcraft:sort', and 'nlpcraft:relation' are allowed.") + s"]. Only 'nlpcraft:limit', 'nlpcraft:sort', 'nlpcraft:function' and 'nlpcraft:relation' are allowed.") if (restrs.contains(elm)) throw new NCE(s"Element cannot be restricted to itself [" + s"mdlId=$mdlId, " + diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala index 10b2cf7..381f2c9 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala @@ -35,6 +35,7 @@ import org.apache.nlpcraft.probe.mgrs.conversation.NCConversationManager import org.apache.nlpcraft.probe.mgrs.dialogflow.NCDialogFlowManager import org.apache.nlpcraft.probe.mgrs.model.NCModelManager import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.dictionary.NCDictionaryEnricher +import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.function.NCFunctionEnricher import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.limit.NCLimitEnricher import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model.NCModelEnricher import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.relation.NCRelationEnricher @@ -433,6 +434,7 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats { Some(Holder(NCModelEnricher, () => nlpSen.flatten.filter(_.isUser))), get("nlpcraft:sort", NCSortEnricher), get("nlpcraft:limit", NCLimitEnricher), + get("nlpcraft:function", NCFunctionEnricher), get("nlpcraft:relation", NCRelationEnricher) ).flatten @@ -483,6 +485,7 @@ object NCProbeEnrichmentManager extends NCService with NCOpenCensusModelStats { h.enricher match { case NCSortEnricher => same = squeeze("nlpcraft:sort") case NCLimitEnricher => same = squeeze("nlpcraft:limit") + case NCFunctionEnricher => same = squeeze("nlpcraft:function") case NCRelationEnricher => same = squeeze("nlpcraft:relation") case _ => // No-op. diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/function/NCFunctionEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/function/NCFunctionEnricher.scala new file mode 100644 index 0000000..d68c2ac --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/function/NCFunctionEnricher.scala @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.function + +import io.opencensus.trace.Span +import org.apache.nlpcraft.common.NCService +import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager +import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSentenceToken} +import org.apache.nlpcraft.probe.mgrs.NCProbeModel +import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher + +import java.util.Collections +import scala.jdk.CollectionConverters.{MapHasAsScala, SetHasAsScala} + +/** + * + */ +object NCFunctionEnricher extends NCProbeEnricher { + private final val TOK_ID = "nlpcraft:function" + + private case class SingeFunc(name: String, synonyms: Seq[String]) + + private object SingeFunc { + def apply(name: String, syns:String*): SingeFunc = SingeFunc(name, syns) + } + + private final val FUNC_NUM_SINGLE = + Set( + SingeFunc("sin", "sine"), + SingeFunc("cos", "cosine"), + SingeFunc("tan", "tangent"), + SingeFunc("cot", "cotangent"), + SingeFunc("round"), + SingeFunc("floor"), + SingeFunc("max", "maximum"), + SingeFunc("min", "minimum"), + SingeFunc("avg", "average"), + SingeFunc("sum", "summary") + ) + + @volatile private var funcNumSingleData: Map[String, String] = _ + + override def start(parent: Span = null): NCService = startScopedSpan("start", parent) { _ => + ackStarting() + + funcNumSingleData = + FUNC_NUM_SINGLE.flatMap(p => (p.synonyms :+ p.name).toSet.map(NCNlpCoreManager.stem).map(_ -> p.name).toMap).toMap + + ackStarted() + } + + /** + * + * @param parent Optional parent span. + */ + override def stop(parent: Span = null): Unit = startScopedSpan("stop", parent) { _ => + ackStopping() + + funcNumSingleData = null + + ackStopped() + } + + override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span): Unit = { + require(isStarted) + + val restricted = + mdl.model.getRestrictedCombinations.asScala.getOrElse(TOK_ID, java.util.Collections.emptySet()). + asScala + + startScopedSpan( + "enrich", parent, "srvReqId" -> ns.srvReqId, "mdlId" -> mdl.model.getId, "txt" -> ns.text + ) { _ => + val buf = collection.mutable.ArrayBuffer.empty[Seq[NCNlpSentenceToken]] + + for (toks <- ns.tokenMixWithStopWords() if toks.size > 1 && !buf.exists(_.containsSlice(toks))) { + funcNumSingleData.get(toks.head.stem) match { + case Some(f) => + val users = toks.tail.filter(_.isUser) + + if (users.size == 1 && toks.tail.forall(t => users.contains(t) || t.isStopWord)) { + for (typ <- users.head.filter(_.isUser).map(_.noteType) if !restricted.contains(typ)) + toks.head.add( + NCNlpSentenceNote( + Seq(toks.head.index), + TOK_ID, + "type" -> f, + "indexes" -> Collections.singleton(users.head.index), + "note" -> typ + ) + ) + } + + case None => // No-op. + } + } + } + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala index 50137a2..daf6796 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala @@ -46,7 +46,7 @@ object NCSentenceManager extends NCService { def getLinks(notes: Seq[NCNlpSentenceNote]): Seq[NoteLink] = { val noteLinks = mutable.ArrayBuffer.empty[NoteLink] - for (n <- notes.filter(n => n.noteType == "nlpcraft:limit" || n.noteType == "nlpcraft:references")) + for (n <- notes.filter(n => n.noteType == "nlpcraft:limit" || n.noteType == "nlpcraft:references" || n.noteType == "nlpcraft:function")) noteLinks += NoteLink(n("note").asInstanceOf[String], n("indexes").asInstanceOf[JList[Int]].asScala.toSeq.sorted) for (n <- notes.filter(_.noteType == "nlpcraft:sort")) { @@ -516,6 +516,7 @@ object NCSentenceManager extends NCService { fixNoteIndexes("nlpcraft:relation", "indexes", "note", ns) fixNoteIndexes("nlpcraft:limit", "indexes", "note", ns) + fixNoteIndexes("nlpcraft:function", "indexes", "note", ns) fixNoteIndexesList("nlpcraft:sort", "subjindexes", "subjnotes", ns) fixNoteIndexesList("nlpcraft:sort", "byindexes", "bynotes", ns) @@ -527,6 +528,7 @@ object NCSentenceManager extends NCService { val res = fixIndexesReferences("nlpcraft:relation", "indexes", "note", ns, histSeq) && fixIndexesReferences("nlpcraft:limit", "indexes", "note", ns, histSeq) && + fixIndexesReferences("nlpcraft:function", "indexes", "note", ns, histSeq) && fixIndexesReferencesList("nlpcraft:sort", "subjindexes", "subjnotes", ns, histSeq) && fixIndexesReferencesList("nlpcraft:sort", "byindexes", "bynotes", ns, histSeq) @@ -748,7 +750,7 @@ object NCSentenceManager extends NCService { addDeleted(sen, sen, swallowed) swallowed.foreach(sen.removeNote) - var sens = mkVariants( sen, mdl, lastPhase, overlappedNotes) + var sens = mkVariants(sen, mdl, lastPhase, overlappedNotes) sens.par.foreach(sen => sen.foreach(tok => diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnrichersTestBeans.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnrichersTestBeans.scala index b4d2f71..5dd797d 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnrichersTestBeans.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/NCEnrichersTestBeans.scala @@ -248,6 +248,26 @@ case class NCTestRelationToken(text: String, `type`: String, indexes: Seq[Int], s", note=$note>" } +case class NCTestFunctionToken(text: String, `type`: String, indexes: Seq[Int], note: String) extends NCTestToken { + require(text != null) + require(`type` != null) + require(indexes != null) + require(indexes.nonEmpty) + require(note != null) + + override def id: String = "nlpcraft:function" + override def toString: String = + s"$text(function)" + + s"<type=${`type`}" + + s", indexes=[${indexes.mkString(",")}]" + + s", note=$note>" +} + +object NCTestFunctionToken { + def apply(text: String, `type`: String, index: Int, note: String):NCTestFunctionToken = + NCTestFunctionToken(text, `type`, Seq(index), note) +} + case class NCTestLimitToken( text: String, limit: Double, @@ -352,7 +372,15 @@ object NCTestToken { indexes = indexes.asScala.toSeq, note = t.meta("nlpcraft:relation:note") ) + case "nlpcraft:function" => + val indexes: JList[Int] = t.meta("nlpcraft:function:indexes") + NCTestFunctionToken( + txt, + `type` = t.meta("nlpcraft:function:type"), + indexes = indexes.asScala.toSeq, + note = t.meta("nlpcraft:function:note") + ) case "nlpcraft:limit" => val indexes: JList[Int] = t.meta("nlpcraft:limit:indexes") val asc: Optional[Boolean] = t.metaOpt("nlpcraft:limit:asc") diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/function/NCEnricherFunctionSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/function/NCEnricherFunctionSpec.scala new file mode 100644 index 0000000..6cc1929 --- /dev/null +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/function/NCEnricherFunctionSpec.scala @@ -0,0 +1,72 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.function + +import org.apache.nlpcraft.NCTestEnvironment +import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCDefaultTestModel, NCEnricherBaseSpec, NCTestNlpToken => nlp, NCTestFunctionToken => fun, NCTestUserToken => usr} +import org.junit.jupiter.api.Test + +/** + * Limit enricher test. + */ +@NCTestEnvironment(model = classOf[NCDefaultTestModel], startClient = true) +class NCEnricherFunctionSpec extends NCEnricherBaseSpec { + /** + * + * @throws Exception + */ + @Test + def test(): Unit = + runBatch( + _ => checkAll( + "max A test", + Seq( + fun(text = "max", `type` = "max", index = 1, note = "A"), + usr(text = "A", id = "A"), + nlp(text = "test") + ) + ), + _ => checkAll( + "maximum the A, maximum the the A", + Seq( + fun(text = "maximum", `type` = "max", index = 2, note = "A"), + nlp(text = "the", isStop = true), + usr(text = "A", id = "A"), + nlp(text = ",", isStop = true), + fun(text = "maximum", `type` = "max", index = 6, note = "A"), + nlp(text = "the the", isStop = true), + usr(text = "A", id = "A") + ) + ), + _ => checkAll( + "maximum the A, maximum the the A the A", + Seq( + fun(text = "maximum", `type` = "max", index = 2, note = "A"), + nlp(text = "the", isStop = true), + usr(text = "A", id = "A"), + nlp(text = ",", isStop = true), + fun(text = "maximum", `type` = "max", index = 6, note = "A"), + nlp(text = "the the", isStop = true), + usr(text = "A", id = "A"), + nlp(text = "the", isStop = true), + usr(text = "A", id = "A") + ) + ) + + ) +}
