This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push:
new f3e56b3 'jiggleFactor' property deleted. 'permutateSynonyms' property
default value changed. 'sparse' property added.
f3e56b3 is described below
commit f3e56b336f6b2bf80ab47ed03a0e3366136994d7
Author: Sergey Kamov <[email protected]>
AuthorDate: Fri Apr 16 09:49:48 2021 +0300
'jiggleFactor' property deleted. 'permutateSynonyms' property default value
changed. 'sparse' property added.
---
.../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 7 +-
.../nlpcraft/common/nlp/NCNlpSentenceNote.scala | 36 +-
.../nlpcraft/common/pool/NCThreadPoolManager.scala | 2 +-
.../org/apache/nlpcraft/common/util/NCUtils.scala | 65 +-
.../examples/lightswitch/lightswitch_model.yaml | 2 +
.../apache/nlpcraft/examples/sql/sql_model.yaml | 4 +-
.../nlpcraft/examples/sql/sql_model_init.yaml | 4 +-
.../scala/org/apache/nlpcraft/model/NCElement.java | 19 +-
.../apache/nlpcraft/model/NCModelFileAdapter.java | 8 +-
.../org/apache/nlpcraft/model/NCModelView.java | 47 +-
.../apache/nlpcraft/model/impl/NCTokenImpl.scala | 8 +-
.../apache/nlpcraft/model/impl/NCTokenLogger.scala | 10 +-
.../nlpcraft/model/impl/json/NCElementJson.java | 10 +-
.../nlpcraft/model/impl/json/NCModelJson.java | 10 +-
.../model/tools/cmdline/NCCliRestSpec.scala | 2 +-
.../apache/nlpcraft/probe/mgrs/NCProbeModel.scala | 20 +-
.../nlpcraft/probe/mgrs/NCProbeSynonym.scala | 227 ++++--
.../probe/mgrs/deploy/NCDeployManager.scala | 72 +-
.../nlpcraft/probe/mgrs/model/NCModelManager.scala | 28 +-
.../probe/mgrs/nlp/NCProbeEnrichmentManager.scala | 2 +
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 766 ++++++++++-----------
.../mgrs/nlp/enrichers/sort/NCSortEnricher.scala | 26 +-
.../probe/mgrs/sentence/NCSentenceManager.scala | 37 +-
.../nlpcraft/examples/sql/NCSqlModelSpec.scala | 7 -
.../org/apache/nlpcraft/model/NCIdlSpec.scala | 2 +-
.../org/apache/nlpcraft/model/NCIdlSpec2.scala | 2 +-
.../model/abstract/NCAbstractTokensModel.scala | 2 +-
.../nlpcraft/model/intent/idl/NCIdlTestSpec.scala | 2 +-
.../intent/idl/compiler/NCIdlCompilerSpec.scala | 8 +-
.../nlpcraft/model/intent/idl/idl_test_model.yaml | 2 +-
.../model/properties/NCTokensPropertiesSpec.scala | 167 ++---
.../NCSparseSpec.scala} | 29 +-
.../nlpcraft/model/synonyms/NCSynonymsSpec.scala | 61 +-
.../model/NCEnricherNestedModelSpec.scala | 56 +-
.../model/NCEnricherNestedModelSpec2.scala | 9 +-
...ec4.scala => NCEnricherNestedModelSpec41.scala} | 26 +-
.../nlp/enrichers/sort/NCEnricherSortSpec.scala | 199 +++++-
37 files changed, 1180 insertions(+), 804 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
index 91ca5a9..ef42781 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala
@@ -39,13 +39,17 @@ import org.apache.nlpcraft.common.nlp.NCNlpSentence._
* @param text Normalized text.
* @param enabledBuiltInToks Enabled built-in tokens.
* @param tokens Initial buffer.
+ * @param firstProbePhase Processing phase flag.
* @param deletedNotes Deleted overridden notes with their tokens.
+ * @param initNlpNotes Initial NLP tokens.
+ * @param nlpTokens NLP tokens.
*/
class NCNlpSentence(
val srvReqId: String,
val text: String,
val enabledBuiltInToks: Set[String],
override val tokens: mutable.ArrayBuffer[NCNlpSentenceToken] = new
mutable.ArrayBuffer[NCNlpSentenceToken](32),
+ var firstProbePhase: Boolean = true,
private val deletedNotes: mutable.HashMap[NCNlpSentenceNote,
Seq[NCNlpSentenceToken]] = mutable.HashMap.empty,
private var initNlpNotes: Map[NoteKey, NCNlpSentenceNote] = null,
private val nlpTokens: mutable.HashMap[TokenKey, NCNlpSentenceToken] =
mutable.HashMap.empty
@@ -65,7 +69,8 @@ class NCNlpSentence(
tokens = tokens.map(_.clone()),
deletedNotes = deletedNotes.map(p ⇒ p._1.clone() →
p._2.map(_.clone())),
initNlpNotes = initNlpNotes,
- nlpTokens = nlpTokens
+ nlpTokens = nlpTokens,
+ firstProbePhase = firstProbePhase
)
/**
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
index e51ab50..9adbe01 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceNote.scala
@@ -17,18 +17,19 @@
package org.apache.nlpcraft.common.nlp
-import org.apache.nlpcraft.common._
+import org.apache.nlpcraft.common.U
import org.apache.nlpcraft.common.ascii._
import scala.collection.JavaConverters._
import scala.collection.{Seq, Set, mutable}
import scala.language.implicitConversions
+import java.io.{Serializable ⇒ JSerializable}
/**
* Sentence token note is a typed map of KV pairs.
*
*/
-class NCNlpSentenceNote(private val values: Map[String, java.io.Serializable])
extends java.io.Serializable with NCAsciiLike {
+class NCNlpSentenceNote(private val values: Map[String, JSerializable])
extends JSerializable with NCAsciiLike {
import NCNlpSentenceNote._
@transient
@@ -75,7 +76,7 @@ class NCNlpSentenceNote(private val values: Map[String,
java.io.Serializable]) e
)
override def clone(): NCNlpSentenceNote = {
- val m = mutable.Map.empty[String, java.io.Serializable] ++ values
+ val m = mutable.Map.empty[String, JSerializable] ++ values
new NCNlpSentenceNote(m.toMap)
}
@@ -91,20 +92,20 @@ class NCNlpSentenceNote(private val values: Map[String,
java.io.Serializable]) e
*
* @return
*/
- def skipNlp(): Map[String, java.io.Serializable] =
+ def skipNlp(): Map[String, JSerializable] =
values.filter { case (key, _) ⇒ !SKIP_CLONE.contains(key) && key !=
"noteType" }
/**
*
*/
- def asMetadata(): Map[String, java.io.Serializable] =
+ def asMetadata(): Map[String, JSerializable] =
if (isUser)
values.get("meta") match {
- case Some(meta) ⇒ meta.asInstanceOf[Map[String,
java.io.Serializable]]
- case None ⇒ Map.empty[String, java.io.Serializable]
+ case Some(meta) ⇒ meta.asInstanceOf[Map[String, JSerializable]]
+ case None ⇒ Map.empty[String, JSerializable]
}
else {
- val md = mutable.Map.empty[String, java.io.Serializable]
+ val md = mutable.Map.empty[String, JSerializable]
val m = if (noteType != "nlpcraft:nlp") skipNlp() else values
@@ -117,8 +118,8 @@ class NCNlpSentenceNote(private val values: Map[String,
java.io.Serializable]) e
*
* @param kvs
*/
- def clone(kvs : (String, java.io.Serializable)*): NCNlpSentenceNote = {
- val m = mutable.HashMap.empty[String, java.io.Serializable] ++ values
+ def clone(kvs : (String, JSerializable)*): NCNlpSentenceNote = {
+ val m = mutable.HashMap.empty[String, JSerializable] ++ values
kvs.foreach(kv ⇒ m += kv._1 → kv._2)
@@ -174,10 +175,11 @@ class NCNlpSentenceNote(private val values: Map[String,
java.io.Serializable]) e
values.toSeq.sortBy(t ⇒ { // Don't show internal ID.
val typeSort = t._1 match {
case "noteType" ⇒ 0
- case "wordIndexes" ⇒ 1
- case "direct" ⇒ 2
- case "sparsity" ⇒ 3
- case "parts" ⇒ 4
+ case "origText" ⇒ 1
+ case "wordIndexes" ⇒ 2
+ case "direct" ⇒ 3
+ case "sparsity" ⇒ 4
+ case "parts" ⇒ 5
case _ ⇒ 100
}
@@ -205,7 +207,7 @@ object NCNlpSentenceNote {
/**
* To immutable map.
*/
- implicit def values(note: NCNlpSentenceNote): Map[String,
java.io.Serializable] = note.values
+ implicit def values(note: NCNlpSentenceNote): Map[String, JSerializable] =
note.values
/**
* Creates new note with given parameters.
@@ -227,7 +229,7 @@ object NCNlpSentenceNote {
val (sparsity, tokMinIndex, tokMaxIndex, tokWordIndexes, len) =
calc(wordIndexesOpt.getOrElse(indexes))
new NCNlpSentenceNote(
- mutable.HashMap[String, java.io.Serializable]((
+ mutable.HashMap[String, JSerializable]((
params.filter(_._2 != null) :+
("noteType" → typ) :+
("tokMinIndex" → indexes.min) :+
@@ -239,7 +241,7 @@ object NCNlpSentenceNote {
("wordLength" → len) :+
("sparsity" → sparsity) :+
("contiguous" → (sparsity == 0))
- ).map(p ⇒ p._1 → p._2.asInstanceOf[java.io.Serializable]):
_*).toMap
+ ).map(p ⇒ p._1 → p._2.asInstanceOf[JSerializable]): _*).toMap
)
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/pool/NCThreadPoolManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/pool/NCThreadPoolManager.scala
index 58f419d..f53e201 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/pool/NCThreadPoolManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/pool/NCThreadPoolManager.scala
@@ -28,9 +28,9 @@ import scala.concurrent.ExecutionContext
* Common thread pool manager.
*/
object NCThreadPoolManager extends NCService {
+ // TODO: in the future - we may need to open this to user configuration.
/**
* Pools that should NOT default to a system context.
- * TODO: in the future - we may need to open this to user configuration.
*/
private final val NON_SYS_POOLS = Seq(
"probes.communication",
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
index 42023ae..f0c9f2b 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala
@@ -878,8 +878,7 @@ object NCUtils extends LazyLogging {
* @param file File.
*/
@throws[NCE]
- def serialize(file: File, obj: Any): Unit =
- serializePath(file.getAbsolutePath, obj)
+ def serialize(file: File, obj: Any): Unit =
serializePath(file.getAbsolutePath, obj)
/**
* Deserializes data from file.
@@ -908,11 +907,10 @@ object NCUtils extends LazyLogging {
*/
@throws[NCE]
def deserialize[T](arr: Array[Byte]): T =
- try {
+ try
manageInput(new ByteArrayInputStream(arr)) acquireAndGet { in ⇒
in.readObject().asInstanceOf[T]
}
- }
catch {
case e: IOException ⇒ throw new NCE(s"Error deserialization data",
e)
}
@@ -930,14 +928,14 @@ object NCUtils extends LazyLogging {
*
* @param in
*/
- private def manageInput(in: InputStream) =
+ private def manageInput(in: InputStream):
ManagedResource[ObjectInputStream] =
managed(new ObjectInputStream(new BufferedInputStream(in)))
/**
*
* @param out
*/
- private def manageOutput(out: OutputStream) =
+ private def manageOutput(out: OutputStream):
ManagedResource[ObjectOutputStream] =
managed(new ObjectOutputStream(new BufferedOutputStream(out)))
/**
@@ -945,8 +943,7 @@ object NCUtils extends LazyLogging {
*
* @param s String value.
*/
- def wrapQuotes(s: String): String =
- s""""$s""""
+ def wrapQuotes(s: String): String = s""""$s""""
/**
* Recursively removes all files and nested directories in a given folder.
@@ -960,7 +957,7 @@ object NCUtils extends LazyLogging {
try
Files.walkFileTree(rootPath, new SimpleFileVisitor[Path] {
- private def delete(path: Path) = {
+ private def delete(path: Path): FileVisitResult = {
Files.delete(path)
FileVisitResult.CONTINUE
@@ -2113,4 +2110,52 @@ object NCUtils extends LazyLogging {
* @return
*/
def getYamlMapper: ObjectMapper = YAML
-}
+
+ /**
+ *
+ * @param list
+ * @tparam T
+ * @return
+ */
+ def permute[T](list: List[List[T]]): List[List[T]] =
+ list match {
+ case Nil ⇒ List(Nil)
+ case head :: tail ⇒ for (h ← head; t ← permute(tail)) yield h :: t
+ }
+
+ /**
+ *
+ * @param idxs
+ * @return
+ */
+ def isContinuous(idxs: Seq[Int]): Boolean = {
+ require(idxs.nonEmpty)
+
+ idxs.size match {
+ case 0 ⇒ throw new AssertionError()
+ case 1 ⇒ true
+ case _ ⇒
+ val list = idxs.view
+
+ list.zip(list.tail).forall { case (x, y) ⇒ x + 1 == y }
+ }
+ }
+
+ /**
+ *
+ * @param idxs
+ * @return
+ */
+ def isIncreased(idxs: Seq[Int]): Boolean = {
+ require(idxs.nonEmpty)
+
+ idxs.size match {
+ case 0 ⇒ throw new AssertionError()
+ case 1 ⇒ true
+ case _ ⇒
+ val list = idxs.view
+
+ !list.zip(list.tail).exists { case (x, y) ⇒ x > y }
+ }
+ }
+}
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/lightswitch/lightswitch_model.yaml
b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/lightswitch/lightswitch_model.yaml
index c7f4fa6..6ad9b7e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/lightswitch/lightswitch_model.yaml
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/lightswitch/lightswitch_model.yaml
@@ -33,6 +33,8 @@ macros:
- name: "<LIGHT>"
macro: "{all|_} {it|them|light|illumination|lamp|lamplight}"
enabledBuiltInTokens: [] # This example doesn't use any built-in tokens.
+permutateSynonyms: true
+sparse: true
elements:
- id: "ls:loc"
description: "Location of lights."
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/sql_model.yaml
b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/sql_model.yaml
index 856e1ae..0df382a 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/sql_model.yaml
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/sql_model.yaml
@@ -1506,7 +1506,6 @@ maxWords: 50
minTokens: 0
maxTokens: 50
minNonStopwords: 0
-jiggleFactor: 2
maxTotalSynonyms: 2147483647
nonEnglishAllowed: true
notLatinCharsetAllowed: false
@@ -1514,4 +1513,5 @@ swearWordsAllowed: false
noNounsAllowed: true
noUserTokensAllowed: true
dupSynonymsAllowed: true
-permutateSynonyms: true
\ No newline at end of file
+permutateSynonyms: true
+sparse: true
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/sql_model_init.yaml
b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/sql_model_init.yaml
index c85950c..d8cfc3e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/sql_model_init.yaml
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/sql_model_init.yaml
@@ -1415,7 +1415,6 @@ maxWords: 50
minTokens: 0
maxTokens: 50
minNonStopwords: 0
-jiggleFactor: 2
maxTotalSynonyms: 2147483647
nonEnglishAllowed: true
notLatinCharsetAllowed: false
@@ -1423,4 +1422,5 @@ swearWordsAllowed: false
noNounsAllowed: true
noUserTokensAllowed: true
dupSynonymsAllowed: true
-permutateSynonyms: true
+permutateSynonyms: false
+sparse: false
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
index f24bb1f..c0d9ec6 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java
@@ -358,33 +358,28 @@ public interface NCElement extends NCMetadata,
Serializable {
}
/**
- * Measure of how much sparsity is allowed when user input words are
permutated in attempt to
- * match the multi-word synonyms. Zero means no reordering is allowed. One
means
- * that a word in a synonym can move only one position left or right, and
so on. Empirically
- * the value of {@code 2} proved to be a good default value in most cases.
Note that larger
- * values mean that synonym words can be almost in any random place in the
user input which makes
- * synonym matching practically meaningless. Maximum value is
<code>4</code>.
+ * Whether or not this element allows non-stopword words, the gaps, in its
multi-word synonyms.
* <p>
- * This property overrides the value from {@link
NCModelView#getJiggleFactor()} ()}.
+ * This property overrides the value from {@link NCModelView#isSparse()}.
* One should use this property if model's value isn't applicable to this
element.
* <p>
* <b>JSON</b>
* <br>
- * If using JSON/YAML model presentation this is set by
<code>jiggleFactor</code>:
+ * If using JSON/YAML model presentation this is set by
<code>sparse</code>:
* <pre class="brush: js, highlight: [4]">
* "elements": [
* {
* "id": "elem",
- * "jiggleFactor": 1,
+ * "sparse": true,
* ...
* }
* ]
* </pre>
*
- * @return Optional word jiggle factor (sparsity measure) overriding
model's one.
- * @see NCModelView#getJiggleFactor()
+ * @return Optional multi-word synonym sparsity property overriding
model's one.
+ * @see NCModelView#isSparse()
*/
- default Optional<Integer> getJiggleFactor() {
+ default Optional<Boolean> isSparse() {
return Optional.empty();
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
index c39b5f6..843aa2d 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java
@@ -353,8 +353,8 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
}
@Override
- public Optional<Integer> getJiggleFactor() {
- return nvl(js.getJiggleFactor(),
proxy.getJiggleFactor());
+ public Optional<Boolean> isSparse() {
+ return nvl(js.isSparse(), proxy.isSparse());
}
private<T> Optional<T> nvl(T t, T dflt) {
@@ -479,8 +479,8 @@ abstract public class NCModelFileAdapter extends
NCModelAdapter {
}
@Override
- public int getJiggleFactor() {
- return proxy.getJiggleFactor();
+ public boolean isSparse() {
+ return proxy.isSparse();
}
@Override
diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
index 8501148..e7ea299 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java
@@ -124,16 +124,6 @@ public interface NCModelView extends NCMetadata {
long MAX_WORDS_MAX = 100L;
/**
- * Min value for {@link #getJiggleFactor()} method.
- */
- long JIGGLE_FACTOR_MIN = 0L;
-
- /**
- * Max value for {@link #getJiggleFactor()} method.
- */
- long JIGGLE_FACTOR_MAX = 4L;
-
- /**
* Min value for {@link #getMaxElementSynonyms()} method.
*/
long MAX_SYN_MIN = 1L;
@@ -169,9 +159,9 @@ public interface NCModelView extends NCMetadata {
int MODEL_VERSION_MAXLEN = 16;
/**
- * Default value for {@link #getJiggleFactor()} method.
+ * Default value for {@link #isSparse()} method.
*/
- int DFLT_JIGGLE_FACTOR = 2;
+ boolean DFLT_IS_SPARSE = false;
/**
* Default value for {@link #getMaxElementSynonyms()} method.
@@ -199,7 +189,7 @@ public interface NCModelView extends NCMetadata {
int DFLT_CONV_DEPTH = 3;
/**
- * Default value for {@link #getJiggleFactor()} method.
+ * Default value fof {@link #getMetadata()} method.
*/
Map<String, Object> DFLT_METADATA = new HashMap<>();
@@ -266,7 +256,7 @@ public interface NCModelView extends NCMetadata {
/**
* Default value for {@link #isPermutateSynonyms()} method.
*/
- boolean DFLT_IS_PERMUTATE_SYNONYMS = true;
+ boolean DFLT_IS_PERMUTATE_SYNONYMS = false;
/**
* Default value for {@link #isDupSynonymsAllowed()} method.
@@ -696,6 +686,9 @@ public interface NCModelView extends NCMetadata {
* For example, if permutation is allowed the synonym "a b c" will be
automatically converted into a
* sequence of synonyms of "a b c", "b a c", "a c b".
* <p>
+ * Note that individual model elements can override this property using
{@link NCElement#isPermutateSynonyms()}
+ * method.
+ * <p>
* <b>Default</b>
* <br>
* If not provided by the model the default value {@link
#DFLT_IS_PERMUTATE_SYNONYMS} will be used.
@@ -710,6 +703,7 @@ public interface NCModelView extends NCMetadata {
* </pre>
*
* @return Whether or not to permutate multi-word synonyms.
+ * @see NCElement#isPermutateSynonyms()
*/
default boolean isPermutateSynonyms() {
return DFLT_IS_PERMUTATE_SYNONYMS;
@@ -791,30 +785,29 @@ public interface NCModelView extends NCMetadata {
}
/**
- * Measure of how much sparsity is allowed when user input words are
permutated in attempt to
- * match the multi-word synonyms. Zero means no reordering is allowed. One
means
- * that a word in a synonym can move only one position left or right, and
so on. Empirically
- * the value of {@code 2} proved to be a good default value in most cases.
Note that larger
- * values mean that synonym words can be almost in any random place in the
user input which makes
- * synonym matching practically meaningless. Maximum value is
<code>4</code>.
+ * Whether or not this model elements allows non-stopword words, the gaps,
in their multi-word synonyms.
+ * <p>
+ * Note that individual model elements can override this property using
{@link NCElement#isSparse()}
+ * method.
* <p>
* <b>Default</b>
* <br>
- * If not provided by the model the default value {@link
#DFLT_JIGGLE_FACTOR} will be used.
+ * If not provided by the model the default value {@link #DFLT_IS_SPARSE}
will be used.
* <p>
* <b>JSON</b>
* <br>
- * If using JSON/YAML model presentation this is set by
<code>jiggleFactor</code> property:
- * <pre class="brush: js">
+ * If using JSON/YAML model presentation this is set by
<code>sparse</code>:
+ * <pre class="brush: js, highlight: [2]">
* {
- * "jiggleFactor": 2
+ * "sparse": true
* }
* </pre>
*
- * @return Word jiggle factor (sparsity measure).
+ * @return Optional multi-word synonym sparsity model property.
+ * @see NCElement#isSparse()
*/
- default int getJiggleFactor() {
- return DFLT_JIGGLE_FACTOR;
+ default boolean isSparse() {
+ return DFLT_IS_SPARSE;
}
/**
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
index 017ead1..8c5005a 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenImpl.scala
@@ -17,7 +17,7 @@
package org.apache.nlpcraft.model.impl
-import java.io.Serializable
+import java.io.{Serializable ⇒ JSerializable}
import java.util.Collections
import org.apache.nlpcraft.common._
@@ -50,7 +50,7 @@ private[nlpcraft] class NCTokenImpl(
endCharIndex: Int,
meta: Map[String, Object],
isAbstractProp: Boolean
-) extends NCToken with Serializable {
+) extends NCToken with JSerializable {
require(mdl != null)
require(srvReqId != null)
require(id != null)
@@ -106,7 +106,7 @@ private[nlpcraft] object NCTokenImpl {
// nlpcraft:nlp and some optional (after collapsing).
require(tok.size <= 2, s"Unexpected token [size=${tok.size},
token=$tok]")
- val md = mutable.HashMap.empty[String, java.io.Serializable]
+ val md = mutable.HashMap.empty[String, JSerializable]
tok.foreach(n ⇒ {
val id = n.noteType.toLowerCase
@@ -142,7 +142,7 @@ private[nlpcraft] object NCTokenImpl {
// Special synthetic meta data element.
md.put("nlpcraft:nlp:freeword", false)
- elm.getMetadata.asScala.foreach { case (k, v) ⇒ md.put(k,
v.asInstanceOf[java.io.Serializable]) }
+ elm.getMetadata.asScala.foreach { case (k, v) ⇒ md.put(k,
v.asInstanceOf[JSerializable]) }
new NCTokenImpl(
mdl.model,
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
index a99a43a..76889ca 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/NCTokenLogger.scala
@@ -336,10 +336,14 @@ object NCTokenLogger extends LazyLogging {
val v = if (sorted.lengthCompare(1) > 0) vals2String(sorted) else
sorted.map(p ⇒ s"${p._2}").mkString(", ")
- if (note.tokenFrom < note.tokenTo)
- s"$v ${s"<${note.tokenFrom} to ${note.tokenTo}>"}"
+ if (note.tokenFrom < note.tokenTo) {
+ if (note.tokenIndexes.tail.zipWithIndex.forall { case (v, i) ⇒ v
== note.tokenIndexes(i) + 1 })
+ s"$v ${s"<${note.tokenFrom} to ${note.tokenTo}>"}"
+ else
+ s"$v ${s"<${note.tokenIndexes.mkString(",")}>"}"
+ }
else
- s"$v"
+ s"${if (v.isEmpty) "<>" else v}"
}
private def mkCells(hs: Seq[NoteMetadata], t: NCNlpSentenceToken):
Seq[String] = {
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
index 7419938..addca45 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java
@@ -35,7 +35,7 @@ public class NCElementJson {
// Can be null.
private Boolean isPermutateSynonyms;
// Can be null.
- private Integer jiggleFactor;
+ private Boolean isSparse;
public String getParentId() {
return parentId;
@@ -91,10 +91,10 @@ public class NCElementJson {
public void setPermutateSynonyms(Boolean permutateSynonyms) {
isPermutateSynonyms = permutateSynonyms;
}
- public Integer getJiggleFactor() {
- return jiggleFactor;
+ public Boolean isSparse() {
+ return isSparse;
}
- public void setJiggleFactor(Integer jiggleFactor) {
- this.jiggleFactor = jiggleFactor;
+ public void setSparse(Boolean sparse) {
+ isSparse = sparse;
}
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
index 3040c60..d2459d3 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java
@@ -54,10 +54,10 @@ public class NCModelJson {
private boolean isSwearWordsAllowed = DFLT_IS_SWEAR_WORDS_ALLOWED;
private boolean isNoNounsAllowed = DFLT_IS_NO_NOUNS_ALLOWED;
private boolean isNoUserTokensAllowed = DFLT_IS_NO_USER_TOKENS_ALLOWED;
- private int jiggleFactor = DFLT_JIGGLE_FACTOR;
private boolean isDupSynonymsAllowed = DFLT_IS_DUP_SYNONYMS_ALLOWED;
private int maxTotalSynonyms = DFLT_MAX_TOTAL_SYNONYMS;
private boolean isPermutateSynonyms = DFLT_IS_PERMUTATE_SYNONYMS;
+ private boolean isSparse = DFLT_IS_SPARSE;
private int maxElementSynonyms = DFLT_MAX_TOTAL_SYNONYMS;
private boolean maxSynonymsThresholdError =
DFLT_MAX_SYNONYMS_THRESHOLD_ERROR;
private long conversationTimeout = DFLT_CONV_TIMEOUT_MS;
@@ -199,11 +199,11 @@ public class NCModelJson {
public void setNoUserTokensAllowed(boolean noUserTokensAllowed) {
isNoUserTokensAllowed = noUserTokensAllowed;
}
- public int getJiggleFactor() {
- return jiggleFactor;
+ public boolean isSparse() {
+ return isSparse;
}
- public void setJiggleFactor(int jiggleFactor) {
- this.jiggleFactor = jiggleFactor;
+ public void setSparse(boolean sparse) {
+ isSparse = sparse;
}
public boolean isDupSynonymsAllowed() {
return isDupSynonymsAllowed;
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliRestSpec.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliRestSpec.scala
index 47dfca1..7a420bc 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliRestSpec.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCliRestSpec.scala
@@ -38,9 +38,9 @@ private [cmdline] case class RestSpecParameter(
optional: Boolean = false // Mandatory by default.
)
+// TODO: this needs to be loaded dynamically from OpenAPI spec.
/**
* NLPCraft REST specification.
- * TODO: this needs to be loaded dynamically from OpenAPI spec.
*/
private [cmdline] object NCCliRestSpec {
//noinspection DuplicatedCode
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
index e436e2b..03c5cb3 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeModel.scala
@@ -28,9 +28,9 @@ import scala.collection.{Map, Seq}
* @param model
* @param solver
* @param intents
- * @param synonyms
- * @param synonymsDsl
- * @param addStopWordsStems
+ * @param continuousSynonyms
+ * @param sparseSynonyms
+ * @param idlSynonyms
* @param exclStopWordsStems
* @param suspWordsStems
* @param elements
@@ -39,11 +39,19 @@ case class NCProbeModel(
model: NCModel,
solver: NCIntentSolver,
intents: Seq[NCIdlIntent],
- synonyms: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ ,
NCProbeSynonymsWrapper]], // Fast access map.
- synonymsDsl: Map[String /*Element ID*/ , Map[Int /*Synonym length*/ ,
Seq[NCProbeSynonym]]], // Fast access map.
+ continuousSynonyms: Map[String /*Element ID*/ , Map[Int /*Synonym length*/
, NCProbeSynonymsWrapper]], // Fast access map.
+ sparseSynonyms: Map[String /*Element ID*/, Seq[NCProbeSynonym]],
+ idlSynonyms: Map[String /*Element ID*/ , Seq[NCProbeSynonym]], // Fast
access map.
addStopWordsStems: Set[String],
exclStopWordsStems: Set[String],
suspWordsStems: Set[String],
elements: Map[String /*Element ID*/ , NCElement],
samples: Set[(String, Seq[Seq[String]])]
-)
+) {
+ lazy val hasIdlSynonyms: Boolean = idlSynonyms.nonEmpty
+ lazy val hasNoIdlSynonyms: Boolean = continuousSynonyms.nonEmpty ||
sparseSynonyms.nonEmpty
+ lazy val hasSparseSynonyms: Boolean = sparseSynonyms.nonEmpty ||
idlSynonyms.exists(_._2.exists(_.sparse))
+ lazy val hasContinuousSynonyms: Boolean = continuousSynonyms.nonEmpty ||
idlSynonyms.exists(_._2.exists(!_.sparse))
+
+ def hasIdlSynonyms(elemId: String): Boolean = idlSynonyms.contains(elemId)
+}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index 1e9095b..d3d1e5a 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -17,13 +17,14 @@
package org.apache.nlpcraft.probe.mgrs
+import org.apache.nlpcraft.common.U
import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken,
NCNlpSentenceTokenBuffer}
import org.apache.nlpcraft.model._
-import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCDslContent
import org.apache.nlpcraft.model.intent.NCIdlContext
+import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
import org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind._
-import scala.collection.mutable.ArrayBuffer
+import scala.collection.mutable
/**
*
@@ -33,19 +34,24 @@ import scala.collection.mutable.ArrayBuffer
* In this case chunks contain value name.
* @param isDirect Direct or permuted synonym flag.
* @param value Optional value name if this is a value synonym.
+ * @param sparse Flag.
+ * @param permute Flag.
*/
class NCProbeSynonym(
val isElementId: Boolean,
val isValueName: Boolean,
val isDirect: Boolean,
- val value: String = null
-) extends ArrayBuffer[NCProbeSynonymChunk] with Ordered[NCProbeSynonym] {
+ val value: String = null,
+ val sparse: Boolean,
+ val permute: Boolean
+) extends mutable.ArrayBuffer[NCProbeSynonymChunk] with
Ordered[NCProbeSynonym] {
require((isElementId && !isValueName && value == null) || !isElementId)
require((isValueName && value != null) || !isValueName)
-
+
lazy val isTextOnly: Boolean = forall(_.kind == TEXT)
lazy val regexChunks: Int = count(_.kind == REGEX)
- lazy val dslChunks: Int = count(_.kind == IDL)
+ lazy val idlChunks: Int = count(_.kind == IDL)
+ lazy val hasIdl: Boolean = idlChunks != 0
lazy val isValueSynonym: Boolean = value != null
lazy val stems: String = map(_.wordStem).mkString(" ")
lazy val stemsHash: Int = stems.hashCode
@@ -65,28 +71,114 @@ class NCProbeSynonym(
/**
*
+ * @param tok
+ * @param chunk
+ */
+ private def isMatch(tok: NCNlpSentenceToken, chunk: NCProbeSynonymChunk):
Boolean =
+ chunk.kind match {
+ case TEXT ⇒ chunk.wordStem == tok.stem
+ case REGEX ⇒
+ val regex = chunk.regex
+
+ regex.matcher(tok.origText).matches() ||
regex.matcher(tok.normText).matches()
+ case IDL ⇒ throw new AssertionError()
+ case _ ⇒ throw new AssertionError()
+ }
+
+ /**
+ *
* @param toks
+ * @param isMatch
+ * @param getIndex
+ * @param shouldBeNeighbors
+ * @tparam T
* @return
*/
+ private def sparseMatch0[T](
+ toks: Seq[T],
+ isMatch: (T, NCProbeSynonymChunk) ⇒ Boolean,
+ getIndex: T ⇒ Int,
+ shouldBeNeighbors: Boolean
+ ): Option[Seq[T]] =
+ if (toks.size >= this.size) {
+ lazy val res = mutable.ArrayBuffer.empty[T]
+ lazy val all = mutable.HashSet.empty[T]
+
+ var state = 0
+
+ for (chunk ← this if state != -1) {
+ val seq =
+ if (state == 0) {
+ state = 1
+
+ toks.filter(t ⇒ isMatch(t, chunk))
+ }
+ else
+ toks.filter(t ⇒ !res.contains(t) && isMatch(t, chunk))
+
+ if (seq.nonEmpty) {
+ val head = seq.head
+
+ if (!permute && res.nonEmpty && getIndex(head) <=
getIndex(res.last))
+ state = -1
+ else {
+ all ++= seq
+
+ if (all.size > this.size)
+ state = -1
+ else
+ res += head
+ }
+ }
+ else
+ state = -1
+ }
+
+ if (state != -1 && all.size == res.size && (!shouldBeNeighbors ||
U.isIncreased(res.map(getIndex).sorted)))
+ Some(res)
+ else
+ None
+ }
+ else
+ None
+
+ /**
+ *
+ * @param tow
+ * @param chunk
+ * @param req
+ */
+ private def isMatch(tow: NCIdlContent, chunk: NCProbeSynonymChunk, req:
NCRequest): Boolean = {
+ def get0[T](fromToken: NCToken ⇒ T, fromWord: NCNlpSentenceToken ⇒ T):
T =
+ if (tow.isLeft) fromToken(tow.left.get) else
fromWord(tow.right.get)
+
+ chunk.kind match {
+ case TEXT ⇒ chunk.wordStem == get0(_.stem, _.stem)
+ case REGEX ⇒
+ val r = chunk.regex
+
+ r.matcher(get0(_.origText, _.origText)).matches() ||
r.matcher(get0(_.normText, _.normText)).matches()
+
+ case IDL ⇒
+ get0(t ⇒ chunk.idlPred.apply(t, NCIdlContext(req =
req)).value.asInstanceOf[Boolean], _ ⇒ false)
+
+ case _ ⇒ throw new AssertionError()
+ }
+ }
+
+ /**
+ *
+ * @param toks
+ */
def isMatch(toks: NCNlpSentenceTokenBuffer): Boolean = {
require(toks != null)
+ require(!sparse && !hasIdl)
if (toks.length == length) {
if (isTextOnly)
toks.stemsHash == stemsHash && toks.stems == stems
else
- toks.zip(this).sortBy(p ⇒ getSort(p._2.kind)).forall {
- case (tok, chunk) ⇒
- chunk.kind match {
- case TEXT ⇒ chunk.wordStem == tok.stem
- case REGEX ⇒
- val regex = chunk.regex
-
- regex.matcher(tok.origText).matches() ||
regex.matcher(tok.normText).matches()
- case IDL ⇒ throw new AssertionError()
- case _ ⇒ throw new AssertionError()
- }
- }
+ toks.zip(this).sortBy(p ⇒ getSort(p._2.kind)).forall { case
(tok, chunk) ⇒ isMatch(tok, chunk) }
}
else
false
@@ -98,38 +190,50 @@ class NCProbeSynonym(
* @param req
* @return
*/
- def isMatch(tows: Seq[NCDslContent], req: NCRequest): Boolean = {
+ def isMatch(tows: Seq[NCIdlContent], req: NCRequest): Boolean = {
require(tows != null)
- if (tows.length == length && tows.count(_.isLeft) >= dslChunks)
- tows.zip(this).sortBy(p ⇒ getSort(p._2.kind)).forall {
- case (tow, chunk) ⇒
- def get0[T](fromToken: NCToken ⇒ T, fromWord:
NCNlpSentenceToken ⇒ T): T =
- if (tow.isLeft) fromToken(tow.left.get) else
fromWord(tow.right.get)
-
- chunk.kind match {
- case TEXT ⇒
- chunk.wordStem == get0(_.stem, _.stem)
-
- case REGEX ⇒
- val r = chunk.regex
-
- r.matcher(get0(_.origText, _.origText)).matches()
|| r.matcher(get0(_.normText, _.normText)).matches()
-
- case IDL ⇒
- get0(t ⇒ chunk.idlPred.apply(t, NCIdlContext(req =
req)).value.asInstanceOf[Boolean], _ ⇒ false)
-
- case _ ⇒ throw new AssertionError()
- }
- }
+ if (tows.length == length && tows.count(_.isLeft) >= idlChunks)
+ tows.zip(this).sortBy(p ⇒ getSort(p._2.kind)).forall { case (tow,
chunk) ⇒ isMatch(tow, chunk, req) }
else
false
}
+ /**
+ *
+ * @param toks
+ */
+ def sparseMatch(toks: NCNlpSentenceTokenBuffer):
Option[Seq[NCNlpSentenceToken]] = {
+ require(toks != null)
+ require(sparse && !hasIdl)
+
+ sparseMatch0(toks, isMatch, (t: NCNlpSentenceToken) ⇒
t.startCharIndex, shouldBeNeighbors = false)
+ }
+
+ /**
+ *
+ * @param tows
+ * @param req
+ */
+ def sparseMatch(tows: Seq[NCIdlContent], req: NCRequest):
Option[Seq[NCIdlContent]] = {
+ require(tows != null)
+ require(req != null)
+ require(hasIdl)
+
+ sparseMatch0(
+ tows,
+ (t: NCIdlContent, chunk: NCProbeSynonymChunk) ⇒ isMatch(t, chunk,
req),
+ (t: NCIdlContent) ⇒ if (t.isLeft) t.left.get.getStartCharIndex
else t.right.get.startCharIndex,
+ shouldBeNeighbors = !sparse
+ )
+ }
+
override def toString(): String = mkString(" ")
-
+
// Orders synonyms from least to most significant.
override def compare(that: NCProbeSynonym): Int = {
+ require(hasIdl || sparse == that.sparse, s"Invalid comparing
[this=$this, that=$that]")
+
def compareIsValueSynonym(): Int =
isValueSynonym match {
case true if !that.isValueSynonym ⇒ 1
@@ -137,7 +241,7 @@ class NCProbeSynonym(
case _ ⇒ 0
}
-
+
if (that == null)
1
else
@@ -156,15 +260,19 @@ class NCProbeSynonym(
1
else if (!isDirect && that.isDirect)
-1
+ else if (permute && !that.permute)
+ -1
+ else if (!permute && that.permute)
+ 1
else // Both direct or indirect.
isTextOnly match {
case true if !that.isTextOnly ⇒ 1
case false if that.isTextOnly ⇒ -1
case true if that.isTextOnly ⇒
compareIsValueSynonym()
case _ ⇒
- val thisDynCnt = regexChunks + dslChunks
- val thatDynCnt = that.regexChunks +
that.dslChunks
-
+ val thisDynCnt = regexChunks + idlChunks
+ val thatDynCnt = that.regexChunks +
that.idlChunks
+
// Less PoS/regex/IDL chunks means less
uncertainty, i.e. larger weight.
if (thisDynCnt < thatDynCnt)
1
@@ -176,41 +284,41 @@ class NCProbeSynonym(
}
}
}
-
+
override def canEqual(other: Any): Boolean =
other.isInstanceOf[NCProbeSynonym]
-
+
override def equals(other: Any): Boolean = other match {
case that: NCProbeSynonym ⇒
super.equals(that) &&
(that canEqual this) &&
isTextOnly == that.isTextOnly &&
regexChunks == that.regexChunks &&
- dslChunks == that.dslChunks &&
+ idlChunks == that.idlChunks &&
isValueSynonym == that.isValueSynonym &&
isElementId == that.isElementId &&
isValueName == that.isValueName &&
value == that.value
case _ ⇒ false
}
-
+
override def hashCode(): Int = {
val state = Seq(
super.hashCode(),
isTextOnly,
regexChunks,
- dslChunks,
+ idlChunks,
isValueSynonym,
isElementId,
isValueName,
value
)
-
+
state.map(p ⇒ if (p == null) 0 else p.hashCode()).foldLeft(0)((a, b) ⇒
31 * a + b)
}
}
object NCProbeSynonym {
- type NCDslContent = Either[NCToken, NCNlpSentenceToken]
+ type NCIdlContent = Either[NCToken, NCNlpSentenceToken]
/**
*
@@ -219,10 +327,19 @@ object NCProbeSynonym {
* @param isDirect
* @param value
* @param chunks
- * @return
+ * @param sparse
+ * @param permute
*/
- def apply(isElementId: Boolean, isValueName: Boolean, isDirect: Boolean,
value: String, chunks: Seq[NCProbeSynonymChunk]): NCProbeSynonym = {
- var syn = new NCProbeSynonym(isElementId, isValueName, isDirect, value)
+ def apply(
+ isElementId: Boolean,
+ isValueName: Boolean,
+ isDirect: Boolean,
+ value: String,
+ chunks: Seq[NCProbeSynonymChunk],
+ sparse: Boolean,
+ permute: Boolean
+ ): NCProbeSynonym = {
+ var syn = new NCProbeSynonym(isElementId, isValueName, isDirect,
value, sparse, permute)
syn ++= chunks
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
index 687a2d7..b7db8dc 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala
@@ -154,21 +154,6 @@ object NCDeployManager extends NCService with
DecorateAsScala {
s"regex=$ID_REGEX" +
s"]"
)
-
- elm.getJiggleFactor.asScala match {
- case Some(elemJiggleFactor) ⇒
- if (elemJiggleFactor < JIGGLE_FACTOR_MIN ||
elemJiggleFactor > JIGGLE_FACTOR_MAX)
- throw new NCE(
- s"Model element 'jiggleFactor' property is out of
range [" +
- s"mdlId=$mdlId, " +
- s"elm=${elm.getId}, " +
- s"value=$elemJiggleFactor," +
- s"min=$JIGGLE_FACTOR_MIN, " +
- s"max=$JIGGLE_FACTOR_MAX" +
- s"]"
- )
- case None ⇒ // No-op.
- }
}
checkMacros(mdl)
@@ -209,6 +194,10 @@ object NCDeployManager extends NCService with
DecorateAsScala {
val syns = mutable.HashSet.empty[SynonymHolder]
+ def ok(b: Boolean, exp: Boolean): Boolean = if (exp) b else !b
+ def idl(syns: Set[SynonymHolder], idl: Boolean): Set[SynonymHolder] =
syns.filter(s ⇒ ok(s.syn.hasIdl, idl))
+ def sparse(syns: Set[SynonymHolder], sp: Boolean): Set[SynonymHolder]
= syns.filter(s ⇒ ok(s.syn.sparse, sp))
+
var cnt = 0
val maxCnt = mdl.getMaxTotalSynonyms
@@ -228,15 +217,19 @@ object NCDeployManager extends NCService with
DecorateAsScala {
s"]"
)
+ val sparseElem = elm.isSparse.orElse(mdl.isSparse)
+ val permuteElem =
elm.isPermutateSynonyms.orElse(mdl.isPermutateSynonyms)
+
def addSynonym(
isElementId: Boolean,
isValueName: Boolean,
value: String,
- chunks: Seq[NCProbeSynonymChunk]): Unit = {
- def add(chunks: Seq[NCProbeSynonymChunk], isDirect: Boolean):
Unit = {
+ chunks: Seq[NCProbeSynonymChunk]
+ ): Unit = {
+ def add(chunks: Seq[NCProbeSynonymChunk], perm: Boolean,
sparse: Boolean, isDirect: Boolean): Unit = {
val holder = SynonymHolder(
elmId = elmId,
- syn = NCProbeSynonym(isElementId, isValueName,
isDirect, value, chunks)
+ syn = NCProbeSynonym(isElementId, isValueName,
isDirect, value, chunks, sparse, perm)
)
if (syns.add(holder)) {
@@ -269,13 +262,19 @@ object NCDeployManager extends NCService with
DecorateAsScala {
)
}
+ val sp = sparseElem && chunks.size > 1
+
if (
- elm.isPermutateSynonyms.orElse(mdl.isPermutateSynonyms) &&
- !isElementId && chunks.forall(_.wordStem != null)
+ permuteElem &&
+ !sparseElem &&
+ !isElementId &&
+ chunks.forall(_.wordStem != null)
)
- simplePermute(chunks).map(p ⇒ p.map(_.wordStem) →
p).toMap.values.foreach(p ⇒ add(p, p == chunks))
+ simplePermute(chunks).map(p ⇒ p.map(_.wordStem) →
p).toMap.values.foreach(seq ⇒
+ add(seq, isDirect = seq == chunks, perm = true, sparse
= sp)
+ )
else
- add(chunks, isDirect = true)
+ add(chunks, isDirect = true, perm = permuteElem, sparse =
sp)
}
/**
@@ -482,7 +481,7 @@ object NCDeployManager extends NCService with
DecorateAsScala {
// Scan for intent annotations in the model class.
val intents = scanIntents(mdl)
-
+
var solver: NCIntentSolver = null
if (intents.nonEmpty) {
@@ -504,12 +503,18 @@ object NCDeployManager extends NCService with
DecorateAsScala {
else
logger.warn(s"Model has no intent: $mdlId")
+ def toMap(set: Set[SynonymHolder]): Map[String, Seq[NCProbeSynonym]] =
+ set.groupBy(_.elmId).map(p ⇒ p._1 →
p._2.map(_.syn).toSeq.sorted.reverse)
+
+ val simple = idl(syns.toSet, idl = false)
+
NCProbeModel(
model = mdl,
solver = solver,
intents = intents.map(_._1).toSeq,
- synonyms = mkFastAccessMap(filter(syns, dsl = false),
NCProbeSynonymsWrapper(_)),
- synonymsDsl = mkFastAccessMap(filter(syns, dsl = true),
_.sorted.reverse),
+ continuousSynonyms = mkFastAccessMap(sparse(simple, sp = false),
NCProbeSynonymsWrapper(_)),
+ sparseSynonyms = toMap(sparse(simple, sp = true)),
+ idlSynonyms = toMap(idl(syns.toSet, idl = true)),
addStopWordsStems = addStopWords,
exclStopWordsStems = exclStopWords,
suspWordsStems = suspWords,
@@ -903,7 +908,6 @@ object NCDeployManager extends NCService with
DecorateAsScala {
checkNum(mdl.getMinTokens, "minTokens", MIN_TOKENS_MIN, MIN_TOKENS_MAX)
checkNum(mdl.getMaxTokens, "maxTokens", MAX_TOKENS_MIN, MAX_TOKENS_MAX)
checkNum(mdl.getMaxWords, "maxWords", MAX_WORDS_MIN, MAX_WORDS_MAX)
- checkNum(mdl.getJiggleFactor, "jiggleFactor", JIGGLE_FACTOR_MIN,
JIGGLE_FACTOR_MAX)
checkNum(mdl.getMaxElementSynonyms, "maxSynonymsThreshold",
MAX_SYN_MIN, MAX_SYN_MAX)
checkNum(mdl.getConversationDepth, "conversationDepth",
CONV_DEPTH_MIN, CONV_DEPTH_MAX)
@@ -965,18 +969,6 @@ object NCDeployManager extends NCService with
DecorateAsScala {
private def hasWhitespace(s: String): Boolean = s.exists(_.isWhitespace)
/**
- *
- * @param set
- * @param dsl
- */
- private def filter(set: mutable.HashSet[SynonymHolder], dsl: Boolean):
Set[SynonymHolder] =
- set.toSet.filter(s ⇒ {
- val b = s.syn.exists(_.kind == IDL)
-
- if (dsl) b else !b
- })
-
- /**
*
* @param mdl Model.
* @param chunk Synonym chunk.
@@ -1010,8 +1002,8 @@ object NCDeployManager extends NCService with
DecorateAsScala {
}
// IDL-based synonym.
else if (startsAndEnds(IDL_FIX, chunk)) {
- val dsl = stripSuffix(IDL_FIX, chunk)
- val compUnit = NCIdlCompiler.compileSynonym(dsl, mdl,
mdl.getOrigin)
+ val idl = stripSuffix(IDL_FIX, chunk)
+ val compUnit = NCIdlCompiler.compileSynonym(idl, mdl,
mdl.getOrigin)
val x = NCProbeSynonymChunk(alias = compUnit.alias.orNull, kind =
IDL, origText = chunk, idlPred = compUnit.pred)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
index 0ff85af..e2e0833 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/model/NCModelManager.scala
@@ -58,18 +58,26 @@ object NCModelManager extends NCService with
DecorateAsScala {
data.values.foreach(w ⇒ {
val mdl = w.model
- val synCnt = w.synonyms.flatMap(_._2.map(_._2.count)).sum
- val elmCnt = w.elements.keySet.size
- val intentCnt = w.intents.size
+ val contCnt =
w.continuousSynonyms.flatMap(_._2.map(_._2.count)).sum
+ val sparseCnt = w.sparseSynonyms.map(_._2.size).sum
+ val allIdlSyns = w.idlSynonyms.values.flatten
+ val sparseIdlCnt = allIdlSyns.count(_.sparse)
+ val contIdlCnt = allIdlSyns.size - sparseIdlCnt
+
+ def withWarn(i: Int): String = if (i == 0) s"0 ${r("(!)")}"
else i.toString
tbl += Seq(
- s"Name: ${bo(c(mdl.getName))}",
- s"ID: ${bo(mdl.getId)}",
- s"Version: ${mdl.getVersion}",
- s"Origin: ${mdl.getOrigin}",
- s"Elements: $elmCnt" + (if (elmCnt == 0) s" ${r("(!)")}"
else ""),
- s"Synonyms: $synCnt" + (if (synCnt == 0) s" ${r("(!)")}"
else ""),
- s"Intents: $intentCnt" + (if (intentCnt == 0) s"
${r("(!)")}" else "")
+ s"Name: ${bo(c(mdl.getName))}",
+ s"ID: ${bo(mdl.getId)}",
+ s"Version: ${mdl.getVersion}",
+ s"Origin: ${mdl.getOrigin}",
+ s"Elements:
${withWarn(w.elements.keySet.size)}",
+ s"Synonyms:",
+ s" Simple continuous: $contCnt",
+ s" Simple sparse: $sparseCnt",
+ s" IDL continuous: $contIdlCnt",
+ s" IDL sparse: $sparseIdlCnt",
+ s"Intents: ${withWarn(w.intents.size)}"
)
})
}
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
index 4a1466e..aab00ed 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/NCProbeEnrichmentManager.scala
@@ -517,6 +517,8 @@ object NCProbeEnrichmentManager extends NCService with
NCOpenCensusModelStats {
)
})
+ NCSentenceManager.clearCache(srvReqId)
+
// Final validation before execution.
try
sensSeq.foreach(NCValidateManager.postValidate(mdl, _, span))
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 1233c31..1e1b441 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -19,27 +19,30 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
import io.opencensus.trace.Span
import org.apache.nlpcraft.common._
-import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken,
NCNlpSentenceTokenBuffer, _}
+import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken ⇒ NlpToken,
NCNlpSentenceNote ⇒ NlpNote, NCNlpSentence ⇒ Sentence}
import org.apache.nlpcraft.model._
-import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCDslContent
-import
org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind,
TEXT}
+import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
+import
org.apache.nlpcraft.probe.mgrs.NCProbeSynonymChunkKind.{NCSynonymChunkKind, _}
import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher
import org.apache.nlpcraft.probe.mgrs.nlp.impl.NCRequestImpl
import org.apache.nlpcraft.probe.mgrs.sentence.NCSentenceManager
-import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeSynonym,
NCProbeVariants}
+import org.apache.nlpcraft.probe.mgrs.{NCProbeModel, NCProbeVariants,
NCProbeSynonym ⇒ Synonym}
import java.io.Serializable
import java.util
+import java.util.{List ⇒ JList}
import scala.collection.JavaConverters._
import scala.collection.convert.DecorateAsScala
import scala.collection.mutable.ArrayBuffer
import scala.collection.{Map, Seq, mutable}
-import scala.compat.java8.OptionConverters._
/**
* Model elements enricher.
*/
object NCModelEnricher extends NCProbeEnricher with DecorateAsScala {
+ type TokType = (NCToken, NCSynonymChunkKind)
+ type Cache = mutable.Map[String, ArrayBuffer[Seq[Int]]]
+
object Complex {
def apply(t: NCToken): Complex =
Complex(
@@ -54,7 +57,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
maxIndex = t.wordIndexes.last
)
- def apply(t: NCNlpSentenceToken): Complex =
+ def apply(t: NlpToken): Complex =
Complex(
data = Right(t),
isToken = false,
@@ -69,11 +72,11 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
}
case class Complex(
- data: NCDslContent,
+ data: NCIdlContent,
isToken: Boolean,
isWord: Boolean,
token: NCToken,
- word: NCNlpSentenceToken,
+ word: NlpToken,
origText: String,
wordIndexes: Set[Int],
minIndex: Int,
@@ -99,7 +102,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
override def toString: String = {
val idxs = wordIndexes.mkString(",")
- if (isToken) s"'$origText' (${token.getId}) [$idxs]]" else
s"'$origText' [$idxs]"
+ if (isToken && token.getId != "nlpcraft:nlp") s"'$origText'
(${token.getId}) [$idxs]]" else s"'$origText' [$idxs]"
}
}
@@ -123,46 +126,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
override def toString: String = tokensComplexes.mkString(" | ")
}
- // Found-by-synonym model element.
- case class ElementMatch(
- element: NCElement,
- tokens: Seq[NCNlpSentenceToken],
- synonym: NCProbeSynonym,
- parts: Seq[(NCToken, NCSynonymChunkKind)]
- ) extends Ordered[ElementMatch] {
- // Tokens sparsity.
- lazy val sparsity: Int = tokens.zipWithIndex.tail.map {
- case (tok, idx) ⇒ Math.abs(tok.index - tokens(idx - 1).index)
- }.sum - tokens.length + 1
-
- // Number of tokens.
- lazy val length: Int = tokens.size
- private lazy val tokensSet: Set[NCNlpSentenceToken] = tokens.toSet
-
- def isSubSet(toks: Set[NCNlpSentenceToken]): Boolean =
toks.subsetOf(tokensSet)
-
- override def compare(that: ElementMatch): Int = {
- // Check synonym first, then length and then sparsity.
- // Note that less sparsity means more certainty in a match.
-
- if (that == null)
- 1
- else if (synonym < that.synonym)
- -1
- else if (synonym > that.synonym)
- 1
- else if (length < that.length)
- -1
- else if (length > that.length)
- 1
- else if (sparsity < that.sparsity)
- 1
- else if (sparsity > that.sparsity)
- -1
- else
- 0
- }
- }
+ case class ComplexHolder(complexesWords: Seq[Complex], complexes:
Seq[ComplexSeq])
/**
*
@@ -183,67 +147,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
ackStopped()
}
- /**
- * Returns an iterator of tokens arrays where each token is jiggled left
and right by given factor.
- * Note that only one token is jiggled at a time.
- *
- * @param ns NLP sentence to jiggle.
- * @param factor Distance of left or right jiggle, i.e. how far can an
individual token move
- * left or right in the sentence.
- */
- private def jiggle(ns: NCNlpSentenceTokenBuffer, factor: Int):
Iterator[NCNlpSentenceTokenBuffer] = {
- require(factor >= 0)
-
- if (ns.isEmpty)
- Iterator.empty
- else if (factor == 0)
- Iterator.apply(ns)
- else
- new Iterator[NCNlpSentenceTokenBuffer] {
- private val min = -factor
- private val max = factor
- private val sz = ns.size
-
- private var i = 0 // Token index.
- private var d = 0 // Jiggle amount [min, max].
- private var isNext = sz > 0
-
- private def calcNext(): Unit = {
- isNext = false
- d += 1
-
- while (i < sz && !isNext) {
- while (d <= max && !isNext) {
- val p = i + d
-
- if (p >= 0 && p < sz) // Valid new position?
- isNext = true
- else
- d += 1
- }
- if (!isNext) {
- d = min
- i += 1
- }
- }
- }
-
- override def hasNext: Boolean = isNext
-
- override def next(): NCNlpSentenceTokenBuffer = {
- require(isNext)
-
- val buf = NCNlpSentenceTokenBuffer(ns)
-
- if (d != 0)
- buf.insert(i + d, buf.remove(i)) // Jiggle.
-
- calcNext()
-
- buf
- }
- }
- }
+ def isComplex(mdl: NCProbeModel): Boolean = mdl.hasIdlSynonyms ||
!mdl.model.getParsers.isEmpty
/**
*
@@ -252,17 +156,17 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
* @param toks
* @param direct
* @param syn
- * @param metaOpt
* @param parts
+ * @param metaOpt
*/
private def mark(
- ns: NCNlpSentence,
+ ns: Sentence,
elem: NCElement,
- toks: Seq[NCNlpSentenceToken],
+ toks: Seq[NlpToken],
direct: Boolean,
- syn: Option[NCProbeSynonym],
- metaOpt: Option[Map[String, Object]],
- parts: Seq[(NCToken, NCSynonymChunkKind)]
+ syn: Option[Synonym] = None,
+ parts: Seq[TokType] = Seq.empty,
+ metaOpt: Option[Map[String, Object]] = None
): Unit = {
val params = mutable.ArrayBuffer.empty[(String, AnyRef)]
@@ -299,7 +203,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
val idxs = toks.map(_.index).sorted
- val note = NCNlpSentenceNote(idxs, elem.getId, params: _*)
+ val note = NlpNote(idxs, elem.getId, params: _*)
toks.foreach(_.add(note))
@@ -308,6 +212,90 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
}
/**
+ *
+ * @param mdl
+ * @param ns
+ * @param span
+ * @param req
+ */
+ private def processParsers(mdl: NCProbeModel, ns: Sentence, span: Span,
req: NCRequestImpl): Unit = {
+ for (parser ← mdl.model.getParsers.asScala) {
+ parser.onInit()
+
+ startScopedSpan("customParser", span,
+ "srvReqId" → ns.srvReqId,
+ "mdlId" → mdl.model.getId,
+ "txt" → ns.text
+ ) { _ ⇒
+ def to(t: NlpToken): NCCustomWord =
+ new NCCustomWord {
+ override def getNormalizedText: String = t.normText
+ override def getOriginalText: String = t.origText
+ override def getStartCharIndex: Int = t.startCharIndex
+ override def getEndCharIndex: Int = t.endCharIndex
+ override def getPos: String = t.pos
+ override def getPosDescription: String = t.posDesc
+ override def getLemma: String = t.lemma
+ override def getStem: String = t.stem
+ override def isStopWord: Boolean = t.isStopWord
+ override def isBracketed: Boolean = t.isBracketed
+ override def isQuoted: Boolean = t.isQuoted
+ override def isKnownWord: Boolean = t.isKnownWord
+ override def isSwearWord: Boolean = t.isSwearWord
+ override def isEnglish: Boolean = t.isEnglish
+ }
+
+ val res = parser.parse(
+ req,
+ mdl.model,
+ ns.map(to).asJava,
+ ns.flatten.distinct.filter(!_.isNlp).map(n ⇒ {
+ val noteId = n.noteType
+ val words = ns.filter(t ⇒
n.tokenIndexes.contains(t.index)).map(to).asJava
+ val md = n.asMetadata()
+
+ new NCCustomElement() {
+ override def getElementId: String = noteId
+ override def getWords: JList[NCCustomWord] = words
+ override def getMetadata: JavaMeta = md.map(p ⇒
p._1 → p._2.asInstanceOf[AnyRef]).asJava
+ }
+ }).asJava
+ )
+
+ if (res != null)
+ res.asScala.foreach(e ⇒ {
+ val elemId = e.getElementId
+ val words = e.getWords
+
+ if (elemId == null)
+ throw new NCE(s"Custom model parser cannot return
'null' element ID.")
+
+ if (words == null || words.isEmpty)
+ throw new NCE(s"Custom model parser cannot return
empty custom tokens [elementId=$elemId]")
+
+ val matchedToks = words.asScala.map(w ⇒
+ ns.find(t ⇒
+ t.startCharIndex == w.getStartCharIndex &&
t.endCharIndex == w.getEndCharIndex
+ ).getOrElse(throw new AssertionError(s"Custom
model parser returned an invalid custom token: $w"))
+ )
+
+ // Checks element's tokens.
+ if (!alreadyMarked(ns, elemId, matchedToks,
matchedToks.map(_.index).sorted))
+ mark(
+ ns,
+ elem = mdl.elements.getOrElse(elemId, throw
new NCE(s"Custom model parser returned unknown element ID: $elemId")),
+ toks = matchedToks,
+ direct = true,
+ metaOpt = Some(e.getMetadata.asScala)
+ )
+ })
+ }
+
+ parser.onDiscard()
+ }
+ }
+
+ /**
* Gets all sequential permutations of given tokens.
*
* For example, if buffer contains "a b c d" tokens, then this function
will return the
@@ -329,163 +317,198 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
private def combos[T](toks: Seq[T]): Seq[Seq[T]] =
(for (n ← toks.size until 0 by -1) yield
toks.sliding(n)).flatten.map(p ⇒ p)
+ /**
+ *
+ * @param seq
+ * @param s
+ */
+ private def toParts(seq: Seq[NCIdlContent], s: Synonym): Seq[TokType] =
+ seq.zip(s.map(_.kind)).flatMap {
+ case (complex, kind) ⇒ if (complex.isLeft) Some(complex.left.get →
kind) else None
+ }
+
+ /**
+ *
+ * @param tows
+ * @param ns
+ */
+ private def toTokens(tows: Seq[NCIdlContent], ns: Sentence): Seq[NlpToken]
=
+ (
+ tows.filter(_.isRight).map(_.right.get) ++
+ tows.filter(_.isLeft).map(_.left.get).
+ flatMap(w ⇒ ns.filter(t ⇒
t.wordIndexes.intersect(w.wordIndexes).nonEmpty))
+ ).sortBy(_.startCharIndex)
+
+ /**
+ *
+ * @param m
+ * @param id
+ * @return
+ */
+ private def get(m: Map[String , Seq[Synonym]], id: String): Seq[Synonym] =
m.getOrElse(id, Seq.empty)
/**
+ * Gets synonyms sorted in descending order by their weight (already
prepared),
+ * i.e. first synonym in the sequence is the most important one.
*
+ * @param fastMap {Element ID → {Synonym length → T}}
+ * @param elmId
+ * @param len
+ */
+ private def fastAccess[T](fastMap: Map[String, Map[Int, T]], elmId:
String, len: Int): Option[T] =
+ fastMap.getOrElse(elmId, Map.empty[Int, T]).get(len)
+
+ /**
+ *
+ * @param mdl
+ * @param ns
+ */
+ private def mkComplexes(mdl: NCProbeModel, ns: Sentence): ComplexHolder = {
+ val complexesWords = ns.map(Complex(_))
+
+ val complexes =
+ NCProbeVariants.convert(ns.srvReqId, mdl,
NCSentenceManager.collapse(mdl.model, ns.clone())).
+ map(_.asScala).
+ par.
+ flatMap(sen ⇒
+ // Tokens splitting.
+ // For example sentence "A B С D E" (5 words) processed as
3 tokens on first phase after collapsing
+ // 'A B' (2 words), 'C D' (2 words) and 'E' (1 word)
+ // So, result combinations will be:
+ // Token(AB) + Token(CD) + Token(E)
+ // Token(AB) + Word(C) + Word(D) + Token(E)
+ // Word(A) + Word(B) + Token(CD) + Token(E)
+ // Word(A) + Word(B) + Word(C) + Word(D) + Token(E)
+ combos(sen).
+ map(senPartComb ⇒ {
+ sen.flatMap(t ⇒
+ // Single word token is not split as words -
token.
+ // Partly (not strict in) token - word.
+ if (t.wordIndexes.length == 1 ||
senPartComb.contains(t))
+ Seq(Complex(t))
+ else
+ t.wordIndexes.map(complexesWords)
+ )
+ // Drops without tokens (IDL part works with
tokens).
+
}).filter(_.exists(_.isToken)).map(ComplexSeq(_)).distinct
+ ).seq
+
+ ComplexHolder(complexesWords, complexes)
+ }
+
+ /**
+ *
+ * @param h
* @param toks
- * @param elemId
*/
- private def alreadyMarked(toks: Seq[NCNlpSentenceToken], elemId: String):
Boolean = toks.forall(_.isTypeOf(elemId))
+ private def mkCombinations(h: ComplexHolder, toks: Seq[NlpToken], cache:
Set[Seq[Complex]]): Seq[Seq[Complex]] = {
+ val idxs = toks.flatMap(_.wordIndexes).toSet
- @throws[NCE]
- override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta:
Map[String, Serializable], parent: Span = null): Unit = {
- require(isStarted)
+ h.complexes.par.
+ flatMap(complexSeq ⇒ {
+ val rec =
complexSeq.tokensComplexes.filter(_.wordIndexes.exists(idxs.contains))
- startScopedSpan("enrich", parent,
- "srvReqId" → ns.srvReqId,
- "mdlId" → mdl.model.getId,
- "txt" → ns.text) { span ⇒
- val elemsFactors =
mdl.elements.values.flatMap(_.getJiggleFactor.asScala).toSeq
- val elemsMaxFactor: Int = if (elemsFactors.nonEmpty)
elemsFactors.max else 0
-
- val maxJiggleFactor = Math.max(mdl.model.getJiggleFactor,
elemsMaxFactor)
-
- val cache = mutable.HashSet.empty[Seq[Int]]
- val matches = ArrayBuffer.empty[ElementMatch]
-
- /**
- * Gets synonyms sorted in descending order by their weight
(already prepared),
- * i.e. first synonym in the sequence is the most important one.
- *
- * @param fastMap {Element ID → {Synonym length → T}}
- * @param elmId
- * @param len
- */
- def fastAccess[T](fastMap: Map[String, Map[Int, T]], elmId:
String, len: Int): Option[T] =
- fastMap.getOrElse(elmId, Map.empty[Int, T]).get(len)
-
- /**
- *
- * @param toks
- * @return
- */
- def tokString(toks: Seq[NCNlpSentenceToken]): String = toks.map(t
⇒ (t.origText, t.index)).mkString(" ")
-
- var permCnt = 0
-
- lazy val complexesWords = ns.map(Complex(_))
- lazy val complexes =
- NCProbeVariants.
- convert(
- ns.srvReqId,
- mdl,
- NCSentenceManager.collapse(mdl.model, ns.clone())
- ).
- map(_.asScala).
- par.
- flatMap(sen ⇒
- // Tokens splitting.
- // For example sentence "A B С D E" (5 words)
processed as 3 tokens on first phase after collapsing
- // 'A B' (2 words), 'C D' (2 words) and 'E' (1 word)
- // So, result combinations will be:
- // Token(AB) + Token(CD) + Token(E)
- // Token(AB) + Word(C) + Word(D) + Token(E)
- // Word(A) + Word(B) + Token(CD) + Token(E)
- // Word(A) + Word(B) + Word(C) + Word(D) + Token(E)
- combos(sen).
- map(senPartComb ⇒ {
- sen.flatMap(t ⇒
- // Single word token is not split as words
- token.
- // Partly (not strict in) token - word.
- if (t.wordIndexes.length == 1 ||
senPartComb.contains(t))
- Seq(Complex(t))
- else
- t.wordIndexes.map(complexesWords)
- )
- // Drops without tokens (IDL part works with
tokens).
-
}).filter(_.exists(_.isToken)).map(ComplexSeq(_)).distinct
- ).seq
-
- val tokIdxs = ns.map(t ⇒ t → t.wordIndexes).toMap
-
- /**
- *
- * @param perm Permutation to process.
- */
- def procPerm(perm: NCNlpSentenceTokenBuffer): Unit = {
- permCnt += 1
-
- for (toks ← combos(perm)) {
- val key = toks.map(_.index).sorted
-
- if (!cache.contains(key)) {
- cache += key
-
- val idxsSeq = toks.flatMap(tokIdxs)
- val idxsSorted = idxsSeq.sorted
- val idxs = idxsSeq.toSet
- val idxMin = idxsSorted.head
- val idxMax = idxsSorted.last
-
- lazy val sorted = idxsSorted.zipWithIndex.toMap
-
- lazy val dslCombs =
- complexes.par.
- flatMap(complexSeq ⇒ {
- val rec =
complexSeq.tokensComplexes.filter(_.isSubsetOf(idxMin, idxMax, idxs))
-
- // Drops without tokens (IDL part works
with tokens).
- if (rec.nonEmpty)
- Some(
- rec ++
-
(complexSeq.wordsIndexes.intersect(idxs) -- rec.flatMap(_.wordIndexes)).
- map(complexesWords)
- )
- else
- None
- }).
- map(_.sortBy(p ⇒
sorted(p.wordIndexes.head))).seq.groupBy(_.length)
+ // Drops without tokens (IDL part works with tokens).
+ if (rec.nonEmpty) {
+ val data = rec ++
+ (complexSeq.wordsIndexes.intersect(idxs) --
rec.flatMap(_.wordIndexes)).map(h.complexesWords)
- lazy val sparsity = U.calcSparsity(key)
- lazy val tokStems = toks.map(_.stem).mkString(" ")
+ if (!cache.contains(data)) Some(data) else None
+ }
+ else
+ None
+ }).seq
+ }
- // Attempt to match each element.
- for (elm ← mdl.elements.values if !alreadyMarked(toks,
elm.getId)) {
- var found = false
+ private def add(
+ dbgType: String,
+ ns: Sentence,
+ contCache: Cache,
+ elem: NCElement,
+ elemToks: Seq[NlpToken],
+ sliceToksIdxs: Seq[Int],
+ syn: Synonym,
+ parts: Seq[TokType] = Seq.empty)
+ : Unit = {
+ val resIdxs = elemToks.map(_.index)
+ val resIdxsSorted = resIdxs.sorted
+
+ if (resIdxsSorted == sliceToksIdxs && U.isContinuous(resIdxsSorted))
+ contCache(elem.getId) += sliceToksIdxs
+
+ val ok = !alreadyMarked(ns, elem.getId, elemToks, sliceToksIdxs)
+
+ if (ok)
+ mark(ns, elem, elemToks, direct = syn.isDirect &&
U.isIncreased(resIdxs), syn = Some(syn), parts = parts)
+
+ if (DEEP_DEBUG)
+ logger.trace(
+ s"${if (ok) "Added" else "Skipped"} element [" +
+ s"id=${elem.getId}, " +
+ s"type=$dbgType, " +
+ s"text='${elemToks.map(_.origText).mkString(" ")}', " +
+ s"indexes=${resIdxs.mkString("[", ",", "]")}, " +
+ s"allTokensIndexes=${sliceToksIdxs.mkString("[", ",",
"]")}, " +
+ s"synonym=$syn" +
+ s"]"
+ )
+ }
- def addMatch(
- elm: NCElement,
- toks: Seq[NCNlpSentenceToken],
- syn: NCProbeSynonym,
- parts: Seq[(NCToken, NCSynonymChunkKind)]
- ): Unit =
- if (
- (elm.getJiggleFactor.isEmpty ||
elm.getJiggleFactor.get() >= sparsity) &&
- !matches.exists(m ⇒ m.element == elm
&& m.isSubSet(toks.toSet))
- ) {
- found = true
-
- matches += ElementMatch(elm, toks, syn,
parts)
- }
+ @throws[NCE]
+ override def enrich(mdl: NCProbeModel, ns: Sentence, senMeta: Map[String,
Serializable], parent: Span = null): Unit = {
+ require(isStarted)
- // Optimization - plain synonyms can be used only
on first iteration
- if (mdl.synonyms.nonEmpty && !ns.exists(_.isUser))
- fastAccess(mdl.synonyms, elm.getId,
toks.length) match {
- case Some(h) ⇒
- def tryMap(synsMap: Map[String,
NCProbeSynonym], notFound: () ⇒ Unit): Unit =
- synsMap.get(tokStems) match {
- case Some(syn) ⇒
- addMatch(elm, toks, syn,
Seq.empty)
+ startScopedSpan(
+ "enrich", parent, "srvReqId" → ns.srvReqId, "mdlId" →
mdl.model.getId, "txt" → ns.text
+ ) { span ⇒
+ val req = NCRequestImpl(senMeta, ns.srvReqId)
+ val combToks = combos(ns)
+ lazy val ch = mkComplexes(mdl, ns)
+
+ def execute(simpleEnabled: Boolean, idlEnabled: Boolean): Unit =
+ startScopedSpan(
+ "execute", span, "srvReqId" → ns.srvReqId, "mdlId" →
mdl.model.getId, "txt" → ns.text
+ ) { _ ⇒
+ if (DEEP_DEBUG)
+ logger.trace(s"Execution started
[simpleEnabled=$simpleEnabled, idlEnabled=$idlEnabled]")
+
+ val contCache = mutable.HashMap.empty ++
+ mdl.elements.keys.map(k ⇒ k →
mutable.ArrayBuffer.empty[Seq[Int]])
+ lazy val idlCache = mutable.HashSet.empty[Seq[Complex]]
+
+ for (
+ toks ← combToks;
+ idxs = toks.map(_.index);
+ e ← mdl.elements.values;
+ eId = e.getId
+ if
+ !contCache(eId).exists(_.containsSlice(idxs)) &&
+ !alreadyMarked(ns, eId, toks, idxs)
+ ) {
+ // 1. SIMPLE.
+ if (simpleEnabled && (if (idlEnabled)
mdl.hasIdlSynonyms(eId) else !mdl.hasIdlSynonyms(eId))) {
+ lazy val tokStems = toks.map(_.stem).mkString(" ")
+
+ // 1.1 Continuous.
+ var found = false
- if (!found)
- notFound()
+ if (mdl.hasContinuousSynonyms)
+ fastAccess(mdl.continuousSynonyms, eId,
toks.length) match {
+ case Some(h) ⇒
+ def tryMap(syns: Map[String, Synonym],
notFound: () ⇒ Unit): Unit =
+ syns.get(tokStems) match {
+ case Some(s) ⇒
+ found = true
+ add("simple continuous",
ns, contCache, e, toks, idxs, s)
case None ⇒ notFound()
}
- def tryScan(synsSeq:
Seq[NCProbeSynonym]): Unit =
- for (syn ← synsSeq if !found)
- if (syn.isMatch(toks))
- addMatch(elm, toks, syn,
Seq.empty)
+ def tryScan(syns: Seq[Synonym]): Unit =
+ for (s ← syns if !found)
+ if (s.isMatch(toks)) {
+ found = true
+ add("simple continuous
scan", ns, contCache, e, toks, idxs, s)
+ }
tryMap(
h.txtDirectSynonyms,
@@ -493,181 +516,96 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
tryScan(h.notTxtDirectSynonyms)
if (!found)
- tryMap(
- h.txtNotDirectSynonyms,
- () ⇒
tryScan(h.notTxtNotDirectSynonyms)
- )
+
tryMap(h.txtNotDirectSynonyms, () ⇒ tryScan(h.notTxtNotDirectSynonyms))
}
)
case None ⇒ // No-op.
}
- if (mdl.synonymsDsl.nonEmpty) {
- found = false
+ // 1.2 Sparse.
+ if (!found && mdl.hasSparseSynonyms)
+ for (s ← get(mdl.sparseSynonyms, eId))
+ s.sparseMatch(toks) match {
+ case Some(res) ⇒ add("simple sparse",
ns, contCache, e, res, idxs, s)
+ case None ⇒ // No-op.
+ }
+ }
+
+ // 2. IDL.
+ if (idlEnabled) {
+ // 2.1 Continuous.
+ if (!mdl.hasSparseSynonyms) {
+ var found = false
for (
- (len, seq) ← dslCombs;
- syn ← fastAccess(mdl.synonymsDsl,
elm.getId, len).getOrElse(Seq.empty);
- comb ← seq if !found;
+ s ← get(mdl.idlSynonyms, eId);
+ comb ← mkCombinations(ch, toks,
idlCache.toSet);
data = comb.map(_.data)
+ if !found
)
- if (syn.isMatch(data,
NCRequestImpl(senMeta, ns.srvReqId))) {
- val parts =
comb.zip(syn.map(_.kind)).flatMap {
- case (complex, kind) ⇒ if
(complex.isToken) Some(complex.token → kind) else None
- }
+ if (s.isMatch(data, req)) {
+ add("IDL continuous", ns, contCache,
e, toks, idxs, s, toParts(data, s))
- addMatch(elm, toks, syn, parts)
+ idlCache += comb
+
+ found = true
}
}
+ else
+ // 2.2 Sparse.
+ for (s ← get(mdl.idlSynonyms, eId); comb ←
mkCombinations(ch, toks, idlCache.toSet))
+ s.sparseMatch(comb.map(_.data), req) match
{
+ case Some(res) ⇒
+ val typ = if (s.sparse) "IDL
sparse" else "IDL continuous"
+
+ add(typ, ns, contCache, e,
toTokens(res, ns), idxs, s, toParts(res, s))
+
+ idlCache += comb
+ case None ⇒ // No-op.
+ }
}
}
}
- }
-
- startScopedSpan("jiggleProc", span,
- "srvReqId" → ns.srvReqId,
- "mdlId" → mdl.model.getId,
- "txt" → ns.text) { _ ⇒
- // Iterate over depth-limited permutations of the original
sentence with and without stopwords.
- jiggle(ns, maxJiggleFactor).foreach(procPerm)
- jiggle(NCNlpSentenceTokenBuffer(ns.filter(!_.isStopWord)),
maxJiggleFactor).foreach(procPerm)
- }
-
- if (DEEP_DEBUG)
- logger.trace(s"Total jiggled permutations processed: $permCnt")
- addTags(
- span,
- "totalJiggledPerms" → permCnt
- )
+ if (ns.firstProbePhase) {
+ ns.firstProbePhase = false
- // Scans by elements that are found with same tokens length.
- // Inside, for each token we drop all non-optimized combinations.
- // Example:
- // 1. element's synonym - 'a b', jiggle factor 4 (default),
isPermuteSynonyms 'true' (default)
- // 2. Request 'a b a b',
- // Initially found 0-1, 1-2, 2-3, 0-3.
- // 0-3 will be deleted because for 0 and 3 tokens best variants
found for same element with same tokens length.
- val matchesNorm =
- matches.
- flatMap(m ⇒ m.tokens.map(_ → m)).
- groupBy { case (t, m) ⇒ (m.element.getId, m.length, t) }.
- flatMap { case (_, seq) ⇒
- def perm[T](list: List[List[T]]): List[List[T]] =
- list match {
- case Nil ⇒ List(Nil)
- case head :: tail ⇒ for (h ← head; t ←
perm(tail)) yield h :: t
- }
-
- // Optimization by sparsity sum for each tokens set
for one element found with same tokens count.
- perm(
- seq.groupBy { case (tok, _) ⇒ tok }.
- map { case (_, seq) ⇒ seq.map { case (_, m) ⇒
m }.toList }.toList
- ).minBy(_.map(_.sparsity).sum)
- }.
- toSeq.
- distinct
-
- val matchCnt = matchesNorm.size
-
- // Add notes for all remaining (non-intersecting) matches.
- for ((m, idx) ← matchesNorm.zipWithIndex) {
- if (DEEP_DEBUG)
- logger.trace(
- s"Model '${mdl.model.getId}' element found (${idx + 1}
of $matchCnt) [" +
- s"elementId=${m.element.getId}, " +
- s"synonym=${m.synonym}, " +
- s"tokens=${tokString(m.tokens)}" +
- s"]"
- )
-
- val elm = m.element
- val syn = m.synonym
-
- val tokIdxs = m.tokens.map(_.index)
- val direct = syn.isDirect && (tokIdxs == tokIdxs.sorted)
-
- mark(ns, elem = elm, toks = m.tokens, direct = direct, syn =
Some(syn), metaOpt = None, parts = m.parts)
+ if (mdl.hasNoIdlSynonyms)
+ execute(simpleEnabled = true, idlEnabled = false)
+ execute(simpleEnabled = mdl.hasNoIdlSynonyms, idlEnabled =
mdl.hasIdlSynonyms)
}
+ else if (mdl.hasIdlSynonyms)
+ execute(simpleEnabled = false, idlEnabled = true)
- val parsers = mdl.model.getParsers
-
- for (parser ← parsers.asScala) {
- parser.onInit()
-
- startScopedSpan("customParser", span,
- "srvReqId" → ns.srvReqId,
- "mdlId" → mdl.model.getId,
- "txt" → ns.text) { _ ⇒
- def to(t: NCNlpSentenceToken): NCCustomWord =
- new NCCustomWord {
- override def getNormalizedText: String = t.normText
- override def getOriginalText: String = t.origText
- override def getStartCharIndex: Int =
t.startCharIndex
- override def getEndCharIndex: Int = t.endCharIndex
- override def getPos: String = t.pos
- override def getPosDescription: String = t.posDesc
- override def getLemma: String = t.lemma
- override def getStem: String = t.stem
- override def isStopWord: Boolean = t.isStopWord
- override def isBracketed: Boolean = t.isBracketed
- override def isQuoted: Boolean = t.isQuoted
- override def isKnownWord: Boolean = t.isKnownWord
- override def isSwearWord: Boolean = t.isSwearWord
- override def isEnglish: Boolean = t.isEnglish
- }
-
- val res = parser.parse(
- NCRequestImpl(senMeta, ns.srvReqId),
- mdl.model,
- ns.map(to).asJava,
- ns.flatten.distinct.filter(!_.isNlp).map(n ⇒ {
- val noteId = n.noteType
- val words = ns.filter(t ⇒ t.index >= n.tokenFrom
&& t.index <= n.tokenTo).map(to).asJava
- val md = n.asMetadata()
-
- new NCCustomElement() {
- override def getElementId: String = noteId
- override def getWords: util.List[NCCustomWord]
= words
- override def getMetadata: JavaMeta = md.map(p
⇒ p._1 → p._2.asInstanceOf[AnyRef]).asJava
- }
- }).asJava
- )
-
- if (res != null)
- res.asScala.foreach(e ⇒ {
- val elemId = e.getElementId
- val words = e.getWords
-
- if (elemId == null)
- throw new NCE(s"Custom model parser cannot
return 'null' element ID.")
-
- if (words == null || words.isEmpty)
- throw new NCE(s"Custom model parser cannot
return empty custom tokens [elementId=$elemId]")
-
- val matchedToks = words.asScala.map(w ⇒
- ns.find(t ⇒
- t.startCharIndex == w.getStartCharIndex &&
t.endCharIndex == w.getEndCharIndex
- ).getOrElse(throw new AssertionError(s"Custom
model parser returned an invalid custom token: $w"))
- )
-
- if (!alreadyMarked(matchedToks, elemId))
- mark(
- ns,
- elem = mdl.elements.getOrElse(elemId,
throw new NCE(s"Custom model parser returned unknown element ID: $elemId")),
- toks = matchedToks,
- direct = true,
- syn = None,
- metaOpt = Some(e.getMetadata.asScala),
- parts = Seq.empty
- )
- })
- }
-
- parser.onDiscard()
- }
+ processParsers(mdl, ns, span, req)
}
}
- def isComplex(mdl: NCProbeModel): Boolean = mdl.synonymsDsl.nonEmpty ||
!mdl.model.getParsers.isEmpty
+ // TODO: simplify, add tests, check model properties (sparse etc) for
optimization.
+ /**
+ *
+ * @param elemId
+ * @param toks
+ * @param sliceToksIdxsSorted
+ */
+ private def alreadyMarked(ns: Sentence, elemId: String, toks:
Seq[NlpToken], sliceToksIdxsSorted: Seq[Int]): Boolean = {
+ lazy val toksIdxsSorted = toks.map(_.index).sorted
+
+ sliceToksIdxsSorted.map(ns).forall(_.exists(n ⇒ n.noteType == elemId
&& n.isContiguous)) ||
+ toks.exists(_.exists(n ⇒
+ n.noteType == elemId &&
+ (
+ (n.isContiguous &&
+ (sliceToksIdxsSorted.containsSlice(n.tokenIndexes) ||
n.tokenIndexes.containsSlice(toksIdxsSorted))
+ )
+ ||
+ (
+ n.tokenIndexes == toksIdxsSorted ||
+ n.tokenIndexes.containsSlice(toksIdxsSorted) &&
+ U.isContinuous(toksIdxsSorted) &&
+ U.isContinuous(n.tokenIndexes)
+ )
+ )
+ ))
+ }
}
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
index d3853da..4d0b270 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala
@@ -177,7 +177,7 @@ object NCSortEnricher extends NCProbeEnricher {
toks.flatten.
filter(!_.isNlp).
filter(n ⇒ n.tokenIndexes.head >= min && n.tokenIndexes.last <=
max).
- map(n ⇒ NoteData(n.noteType, n.tokenFrom to n.tokenTo)).
+ map(n ⇒ NoteData(n.noteType, n.tokenIndexes)).
sortBy(_.indexes.head).distinct
}
@@ -188,7 +188,7 @@ object NCSortEnricher extends NCProbeEnricher {
*
* @param toksNoteData
*/
- private def split(toks: Seq[NCNlpSentenceToken], toksNoteData:
Seq[NoteData], nullable: Boolean): Seq[Seq[NoteData]] = {
+ private def split(toks: Seq[NCNlpSentenceToken], othersRefs:
Seq[NCNlpSentenceToken], toksNoteData: Seq[NoteData], nullable: Boolean):
Seq[Seq[NoteData]] = {
val res =
if (toksNoteData.nonEmpty) {
val res = mutable.ArrayBuffer.empty[Seq[NoteData]]
@@ -205,8 +205,10 @@ object NCSortEnricher extends NCProbeEnricher {
between.isEmpty || between.forall(p ⇒ p.isStopWord ||
p.stem == stemAnd)
}
- val minIdx = toks.dropWhile(t ⇒ !isUserNotValue(t)).head.index
- val maxIdx = toks.reverse.dropWhile(t ⇒
!isUserNotValue(t)).head.index
+ val toks2 = toks.filter(othersRefs.contains)
+
+ val minIdx = toks2.dropWhile(t ⇒ !isUserNotValue(t)).head.index
+ val maxIdx = toks2.reverse.dropWhile(t ⇒
!isUserNotValue(t)).head.index
require(minIdx <= maxIdx)
@@ -311,12 +313,9 @@ object NCSortEnricher extends NCProbeEnricher {
val others = toks.filter(t ⇒ !all.contains(t))
if (others.nonEmpty) {
- val i1 = others.head.index
- val i2 = others.last.index
+ val idxs = others.map(_.index).toSet
- val othersRefs = others.filter(
- t ⇒ t.exists(n ⇒ isUserNotValue(n) && n.tokenIndexes.head
>= i1 && n.tokenIndexes.last <= i2)
- )
+ val othersRefs = others.filter(t ⇒ t.exists(n ⇒
isUserNotValue(n) && n.tokenIndexes.toSet.subsetOf(idxs)))
if (
othersRefs.nonEmpty &&
@@ -324,8 +323,7 @@ object NCSortEnricher extends NCProbeEnricher {
forall(p ⇒ (p.isStopWord || p.stem == stemAnd) &&
!maskWords.contains(p.stem))
) {
// It removes duplicates (`SORT x x ORDER x x x` converts
to `SORT x ORDER x`)
- val mask = toks.map(getKeyWordType).
- foldLeft("")((x, y) ⇒ if (x.endsWith(y)) x else s"$x
$y").trim
+ val mask = toks.map(getKeyWordType).foldLeft("")((x, y) ⇒
if (x.endsWith(y)) x else s"$x $y").trim
MASKS.get(mask) match {
case Some(typ) ⇒
@@ -349,12 +347,12 @@ object NCSortEnricher extends NCProbeEnricher {
if (data1.nonEmpty || data2.nonEmpty) {
val seq1 =
if (data1.nonEmpty)
- split(part1, data1, nullable = false)
+ split(part1, othersRefs, data1,
nullable = false)
else
- split(part2, data2, nullable = false)
+ split(part2, othersRefs, data2,
nullable = false)
val seq2 =
if (data1.nonEmpty && data2.nonEmpty)
- split(part2, data2, nullable = true)
+ split(part2, othersRefs, data2,
nullable = true)
else
Seq.empty
val asc = orderOpt.flatMap(o ⇒
Some(order(o.synonymIndex)._2))
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
index ad66b8f..f77e035 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/sentence/NCSentenceManager.scala
@@ -37,6 +37,10 @@ import scala.language.implicitConversions
object NCSentenceManager extends NCService {
@volatile private var pool: java.util.concurrent.ForkJoinPool = _
+ type CacheKey = Seq[Set[NCNlpSentenceNote]]
+ type CacheValue = Seq[Seq[NCNlpSentenceNote]]
+ private val combCache = mutable.HashMap.empty[String,
mutable.HashMap[CacheKey, CacheValue]]
+
case class PartKey(id: String, start: Int, end: Int) {
require(start <= end)
@@ -197,7 +201,7 @@ object NCSentenceManager extends NCService {
* @param noteField
* @param ns
*/
- private def fixNoteIndexesList(note: String, idxsField: String, noteField:
String, ns: NCNlpSentence): Unit = {
+ private def fixNoteIndexesList(note: String, idxsField: String, noteField:
String, ns: NCNlpSentence): Unit =
ns.flatMap(_.getNotes(note)).foreach(rel ⇒
rel.dataOpt[JList[JList[Int]]](idxsField) match {
case Some(idxsList) ⇒
@@ -211,7 +215,6 @@ object NCSentenceManager extends NCService {
case None ⇒ // No-op.
}
)
- }
/**
* Copies token.
@@ -686,8 +689,12 @@ object NCSentenceManager extends NCService {
toSeq.sortBy(-_.size)
val seqSens =
-
NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava,
pool).asScala.map(_.asScala).
- par.
+ combCache.
+ getOrElseUpdate(sen.srvReqId,
mutable.HashMap.empty[CacheKey, CacheValue]).
+ getOrElseUpdate(
+ toksByIdx,
+
NCSentenceHelper.findCombinations(toksByIdx.map(_.asJava).asJava,
pool).asScala.map(_.asScala)
+ ).par.
flatMap(delComb ⇒ {
val nsClone = sen.clone()
@@ -734,9 +741,22 @@ object NCSentenceManager extends NCService {
)
)
+ def notNlpNotes(s: NCNlpSentence): Seq[NCNlpSentenceNote] =
s.flatten.filter(!_.isNlp)
+
+ // Drops similar sentences (with same notes structure). Keeps with
more found.
+ sens =
sens.groupBy(notNlpNotes(_).groupBy(_.noteType).keys.toSeq.sorted.distinct).
+ flatMap(p ⇒ {
+ val m: Map[NCNlpSentence, Int] = p._2.map(p ⇒ p →
notNlpNotes(p).size).toMap
+
+ val max = m.values.max
+
+ m.filter(_._2 == max).keys
+ }).
+ toSeq
+
// Drops similar sentences (with same tokens structure).
// Among similar sentences we prefer one with minimal free words count.
- sens.groupBy(_.flatten.filter(!_.isNlp).map(_.getKey(withIndexes =
false))).
+ sens.groupBy(notNlpNotes(_).map(_.getKey(withIndexes = false))).
map { case (_, seq) ⇒ seq.minBy(_.filter(p ⇒ p.isNlp &&
!p.isStopWord).map(_.wordIndexes.length).sum) }.
toSeq
}
@@ -762,8 +782,13 @@ object NCSentenceManager extends NCService {
* @param mdl
* @param sen
* @param lastPhase
- * @return
*/
def collapse(mdl: NCModel, sen: NCNlpSentence, lastPhase: Boolean =
false): Seq[NCNlpSentence] =
collapseSentence(sen, mdl, lastPhase)
+
+ /**
+ *
+ * @param srvReqId
+ */
+ def clearCache(srvReqId: String): Unit = combCache -= srvReqId
}
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
index 3483bd4..55dd983 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/examples/sql/NCSqlModelSpec.scala
@@ -107,13 +107,6 @@ class NCSqlModelSpec extends NCEnricherBaseSpec {
usr(text = "ship date", id = "col:date"),
),
_ ⇒ checkExists(
- txt = "give me the orders sorted by ship date",
- nlp(text = "give me the", isStop = true),
- usr(text = "orders", id = "tbl:orders"),
- srt(text = "sorted by", typ = BY_ONLY, note = "col:date",
index = 3),
- usr(text = "ship date", id = "col:date"),
- ),
- _ ⇒ checkExists(
txt = "give me the orders sorted by ship date asc",
nlp(text = "give me the", isStop = true),
usr(text = "orders", id = "tbl:orders"),
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIdlSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIdlSpec.scala
index a02424e..c92108b 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIdlSpec.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIdlSpec.scala
@@ -28,7 +28,7 @@ import scala.language.implicitConversions
* IDL test model.
*/
class NCIdlSpecModel extends NCModelAdapter(
- "nlpcraft.intents.dsl.test", "IDL Test Model", "1.0"
+ "nlpcraft.intents.idl.test", "IDL Test Model", "1.0"
) {
private implicit def convert(s: String): NCResult = NCResult.text(s)
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIdlSpec2.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIdlSpec2.scala
index 31b4808..dd29cca 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIdlSpec2.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/NCIdlSpec2.scala
@@ -27,7 +27,7 @@ import scala.language.implicitConversions
* IDL test model.
*/
class NCIdlSpecModel2 extends NCModelAdapter(
- "nlpcraft.intents.dsl.test", "IDL Test Model", "1.0"
+ "nlpcraft.intents.idl.test", "IDL Test Model", "1.0"
) {
override def getElements: util.Set[NCElement] = Set(NCTestElement("a"))
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensModel.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensModel.scala
index cae42e0..9216473 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensModel.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensModel.scala
@@ -37,5 +37,5 @@ class NCAbstractTokensModel extends NCModelAdapter(
override def getAbstractTokens: util.Set[String] = Set("nlpcraft:num",
"anyWord").asJava
override def isPermutateSynonyms: Boolean = false
- override def getJiggleFactor: Int = 0
+ override def isSparse: Boolean = false
}
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/NCIdlTestSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/NCIdlTestSpec.scala
index 424da4a..75b45e2 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/NCIdlTestSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/NCIdlTestSpec.scala
@@ -40,7 +40,7 @@ class NCIdlTestSpec {
if (NCEmbeddedProbe.start(null,
Collections.singletonList(classOf[NCIdlTestModel].getName))) {
cli = new NCTestClientBuilder().newBuilder.build
- cli.open("nlpcraft.dsl.test")
+ cli.open("nlpcraft.idl.test")
}
}
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
index 0f09a66..6b5da23 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/compiler/NCIdlCompilerSpec.scala
@@ -37,16 +37,16 @@ class NCIdlCompilerSpec {
/**
*
- * @param dsl
+ * @param idl
*/
- private def checkCompileOk(dsl: String): Unit =
+ private def checkCompileOk(idl: String): Unit =
try {
- NCIdlCompiler.compileIntents(dsl, MODEL, MODEL_ID)
+ NCIdlCompiler.compileIntents(idl, MODEL, MODEL_ID)
assert(true)
}
catch {
- case e: Exception ⇒ assert(false, e)
+ case e: Exception ⇒ assert(assertion = false, e)
}
/**
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/idl_test_model.yaml
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/idl_test_model.yaml
index b436110..a9b70f1 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/idl_test_model.yaml
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/idl/idl_test_model.yaml
@@ -15,7 +15,7 @@
# limitations under the License.
#
-id: "nlpcraft.dsl.test"
+id: "nlpcraft.idl.test"
name: "IDL Test Model"
version: "1.0"
description: "IDL test model."
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/properties/NCTokensPropertiesSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/properties/NCTokensPropertiesSpec.scala
index f244901..ff530cb 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/properties/NCTokensPropertiesSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/properties/NCTokensPropertiesSpec.scala
@@ -35,27 +35,28 @@ abstract class NCTokenPropertiesModelAbstract extends
NCModelAdapter(
@NCIntent("intent=onXY term(t)={tok_id() == 'xy'}")
def onXY(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
+
+ override def isPermutateSynonyms: Boolean = true
+ override def isSparse: Boolean = true
}
case class NCPropTestElement(
- id: String, synonym: String, permFlag: Option[Boolean] = None,
jiggleFactor: Option[Int] = None
+ id: String, synonym: String, perm: Option[Boolean] = None, sparse:
Option[Boolean] = None
) extends NCElement {
override def getId: String = id
override def getSynonyms: util.List[String] =
util.Collections.singletonList(synonym)
- override def isPermutateSynonyms: Optional[lang.Boolean] =
- permFlag match {
- case Some(v) ⇒ Optional.of(v)
- case None ⇒ super.isPermutateSynonyms
- }
- override def getJiggleFactor: Optional[Integer] =
- jiggleFactor match {
+ private def get(opt: Option[Boolean], getSuper: () ⇒
Optional[lang.Boolean]): Optional[lang.Boolean] =
+ opt match {
case Some(v) ⇒ Optional.of(v)
- case None ⇒ super.getJiggleFactor
+ case None ⇒ getSuper()
}
+
+ override def isPermutateSynonyms: Optional[lang.Boolean] = get(perm, () ⇒
super.isPermutateSynonyms)
+ override def isSparse: Optional[lang.Boolean] = get(sparse, () ⇒
super.isSparse)
}
-// 1. Default model. Default behaviour with default jiggle values and permuted
synonyms.
+// 1. All enabled.
class NCTokenPropertiesModel1() extends NCTokenPropertiesModelAbstract
@NCTestEnvironment(model = classOf[NCTokenPropertiesModel1], startClient =
true)
@@ -76,6 +77,7 @@ class NCTokenPropertiesModel1Spec extends NCTestContext {
// 2. Permutation turned off.
class NCTokenPropertiesModel2 extends NCTokenPropertiesModelAbstract {
override def isPermutateSynonyms: Boolean = false
+ override def isSparse: Boolean = true
}
@NCTestEnvironment(model = classOf[NCTokenPropertiesModel2], startClient =
true)
@@ -100,16 +102,10 @@ class NCTokenPropertiesModel2Spec extends NCTestContext {
}
}
-// 3. Permutation turned off for `ab` but enabled by default for 'xy'
+// 3. Sparse turned off.
class NCTokenPropertiesModel3 extends NCTokenPropertiesModelAbstract {
- override def getElements: util.Set[NCElement] = {
- val set: Set[NCElement] = Set(
- NCPropTestElement("ab", "a b", permFlag = Some(false)),
- NCTestElement("xy", "x y")
- )
-
- set.asJava
- }
+ override def isPermutateSynonyms: Boolean = true
+ override def isSparse: Boolean = false
}
@NCTestEnvironment(model = classOf[NCTokenPropertiesModel3], startClient =
true)
@@ -117,26 +113,6 @@ class NCTokenPropertiesModel3Spec extends NCTestContext {
@Test
def test(): Unit = {
checkIntent("a b", "onAB")
- checkIntent("a test test b", "onAB")
- fail("b a")
-
- checkIntent("x y", "onXY")
- checkIntent("x test test y", "onXY")
- checkIntent("y x", "onXY")
- checkIntent("y test test x", "onXY")
- }
-}
-
-// 4. Jiggle factor turned off.
-class NCTokenPropertiesModel4 extends NCTokenPropertiesModelAbstract {
- override def getJiggleFactor: Int = 0
-}
-
-@NCTestEnvironment(model = classOf[NCTokenPropertiesModel4], startClient =
true)
-class NCTokenPropertiesModel4Spec extends NCTestContext {
- @Test
- def test(): Unit = {
- checkIntent("a b", "onAB")
checkIntent("b a", "onAB")
checkIntent("y x", "onXY")
checkIntent("x y", "onXY")
@@ -150,103 +126,82 @@ class NCTokenPropertiesModel4Spec extends NCTestContext {
"b test test a",
"x test test y",
"y test test x"
+
)
}
}
-// 5. Jiggle factor turned off for `ab` but enabled by default for 'xy'
-// Permutation for 'ab' is disabled.
-class NCTokenPropertiesModel5 extends NCTokenPropertiesModelAbstract {
- override def getElements: util.Set[NCElement] = {
- val set: Set[NCElement] = Set(
- NCPropTestElement("ab", "a b", permFlag = Some(false),
jiggleFactor = Some(0)),
- NCTestElement("xy", "x y")
- )
-
- set.asJava
- }
+// 4. Permutation and sparse turned off.
+class NCTokenPropertiesModel4 extends NCTokenPropertiesModelAbstract {
+ override def isPermutateSynonyms: Boolean = false
+ override def isSparse: Boolean = false
}
-@NCTestEnvironment(model = classOf[NCTokenPropertiesModel5], startClient =
true)
-class NCTokenPropertiesModel5Spec extends NCTestContext {
+@NCTestEnvironment(model = classOf[NCTokenPropertiesModel4], startClient =
true)
+class NCTokenPropertiesModel4Spec extends NCTestContext {
@Test
def test(): Unit = {
checkIntent("a b", "onAB")
- fail("b a")
- checkIntent("y x", "onXY")
- checkIntent("x y", "onXY")
-
fail(
+ "b a",
"a test b",
"b test a"
)
- checkIntent("y test x", "onXY")
- checkIntent("x test y", "onXY")
-
- fail(
- "a test test b",
- "b test test a"
- )
- checkIntent("y test test x", "onXY")
- checkIntent("x test test y", "onXY")
}
}
-// 6. Jiggle factor restricted for `ab` but enabled by default for 'xy'.
-// Permutation for 'ab' is disabled.
-class NCTokenPropertiesModel6 extends NCTokenPropertiesModelAbstract {
+// 5. Permutation turned off for `ab` but enabled for 'xy'.
+// Sparse turned on for both of them.
+class NCTokenPropertiesModel5 extends NCTokenPropertiesModelAbstract {
+ override def isPermutateSynonyms: Boolean = true
+ override def isSparse: Boolean = true
+
override def getElements: util.Set[NCElement] = {
val set: Set[NCElement] = Set(
- NCPropTestElement("ab", "a b", permFlag = Some(false),
jiggleFactor = Some(1)),
- NCTestElement("xy", "x y")
+ NCPropTestElement("ab", "a b", perm = Some(false)),
+ NCPropTestElement("xy", "x y", perm = Some(true))
)
set.asJava
}
}
-@NCTestEnvironment(model = classOf[NCTokenPropertiesModel6], startClient =
true)
-class NCTokenPropertiesModel6Spec extends NCTestContext {
+@NCTestEnvironment(model = classOf[NCTokenPropertiesModel5], startClient =
true)
+class NCTokenPropertiesModel5Spec extends NCTestContext {
@Test
def test(): Unit = {
checkIntent("a b", "onAB")
+ checkIntent("a test test b", "onAB")
fail("b a")
- checkIntent("y x", "onXY")
- checkIntent("x y", "onXY")
-
- checkIntent("a test b", "onAB")
- fail("b test a")
- checkIntent("y test x", "onXY")
- checkIntent("x test y", "onXY")
- fail(
- "a test test b",
- "b test test a"
- )
- checkIntent("y test test x", "onXY")
+ checkIntent("x y", "onXY")
checkIntent("x test test y", "onXY")
+ checkIntent("y x", "onXY")
+ checkIntent("y test test x", "onXY")
}
}
-// 7. Jiggle factor turned off for `ab` but enabled by default for 'xy'
-// Permutation for 'ab' - by default.
-class NCTokenPropertiesModel7 extends NCTokenPropertiesModelAbstract {
+// 6. Sparse factor and permutation are turned off for `ab` but enabled for
'xy'.
+class NCTokenPropertiesModel6 extends NCTokenPropertiesModelAbstract {
+ override def isPermutateSynonyms: Boolean = true
+ override def isSparse: Boolean = true
+
override def getElements: util.Set[NCElement] = {
val set: Set[NCElement] = Set(
- NCPropTestElement("ab", "a b", jiggleFactor = Some(0)),
- NCTestElement("xy", "x y")
+ NCPropTestElement("ab", "a b", sparse = Some(false), perm =
Some(false)),
+ NCPropTestElement("xy", "x y")
)
set.asJava
}
}
-@NCTestEnvironment(model = classOf[NCTokenPropertiesModel7], startClient =
true)
-class NCTokenPropertiesModel7Spec extends NCTestContext {
+@NCTestEnvironment(model = classOf[NCTokenPropertiesModel6], startClient =
true)
+class NCTokenPropertiesModel6Spec extends NCTestContext {
@Test
def test(): Unit = {
checkIntent("a b", "onAB")
- checkIntent("b a", "onAB")
+ fail("b a")
checkIntent("y x", "onXY")
checkIntent("x y", "onXY")
@@ -266,41 +221,33 @@ class NCTokenPropertiesModel7Spec extends NCTestContext {
}
}
+// 7. Sparse factor turned off for `ab` but enabled for 'xy'.
+class NCTokenPropertiesModel7 extends NCTokenPropertiesModelAbstract {
+ override def isPermutateSynonyms: Boolean = true
+ override def isSparse: Boolean = true
-// 8. Jiggle factor restricted for `ab` but enabled by default for 'xy'
-// Permutation for 'ab' - by default.
-class NCTokenPropertiesModel8 extends NCTokenPropertiesModelAbstract {
override def getElements: util.Set[NCElement] = {
val set: Set[NCElement] = Set(
- NCPropTestElement("ab", "a b", jiggleFactor = Some(1)),
- NCTestElement("xy", "x y")
+ NCPropTestElement("ab", "a b", perm = Some(true), sparse =
Some(false)),
+ NCPropTestElement("xy", "x y", perm = Some(true), sparse =
Some(true))
)
set.asJava
}
}
-@NCTestEnvironment(model = classOf[NCTokenPropertiesModel8], startClient =
true)
-class NCTokenPropertiesModel8Spec extends NCTestContext {
+@NCTestEnvironment(model = classOf[NCTokenPropertiesModel7], startClient =
true)
+class NCTokenPropertiesModel7Spec extends NCTestContext {
@Test
def test(): Unit = {
checkIntent("a b", "onAB")
checkIntent("b a", "onAB")
+ fail("a test b")
+ fail("b test a")
checkIntent("y x", "onXY")
checkIntent("x y", "onXY")
- checkIntent("a test b", "onAB")
- checkIntent("b test a", "onAB")
- checkIntent("y test x", "onXY")
- checkIntent("x test y", "onXY")
-
- fail(
- "a test test b",
- "b test test a"
- )
checkIntent("y test test x", "onXY")
checkIntent("x test test y", "onXY")
}
}
-
-
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/sparse/NCSparseSpec.scala
similarity index 79%
rename from
nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala
rename to
nlpcraft/src/test/scala/org/apache/nlpcraft/model/sparse/NCSparseSpec.scala
index 6f0ed53..8441532 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/sparse/NCSparseSpec.scala
@@ -15,7 +15,7 @@
* limitations under the License.
*/
-package org.apache.nlpcraft.model.jiggle
+package org.apache.nlpcraft.model.sparse
import org.apache.nlpcraft.model.`abstract`.NCAbstractTokensModel
import org.apache.nlpcraft.model.{NCContext, NCElement, NCResult, NCToken}
@@ -26,30 +26,32 @@ import java.util
import scala.collection.JavaConverters._
import scala.collection.mutable
-class NJiggleModel extends NCAbstractTokensModel {
+class NCSparseModel extends NCAbstractTokensModel {
override def getElements: util.Set[NCElement] = Set(NCTestElement("xyz",
"x y z"))
- // Default values.
override def isPermutateSynonyms: Boolean = true
- override def getJiggleFactor: Int = 4
+ override def isSparse: Boolean = true
override def onContext(ctx: NCContext): NCResult = {
val variants = ctx.getVariants.asScala
def checkOneVariant(sparsity: Int): Unit = {
- require(variants.size == 1)
+ require(variants.size == 1, "There is should be single variant.")
val toks = variants.head.asScala.filter(_.getId == "xyz")
- require(toks.size == 3)
+ require(toks.size == 3, "There are should be 3 `xyz` tokens.")
checkSparsity(sparsity, toks)
}
def checkSparsity(sparsity: Int, toks: mutable.Buffer[NCToken]): Unit =
-
require(toks.forall(_.getMetadata.get("nlpcraft:nlp:sparsity").asInstanceOf[Int]
== sparsity))
+ require(
+
toks.forall(_.getMetadata.get("nlpcraft:nlp:sparsity").asInstanceOf[Int] ==
sparsity),
+ s"Sparsity of each tokens should be: $sparsity."
+ )
- def checkExists(sparsity: Int): Unit = {
+ def checkExists(sparsity: Int): Unit =
require(
variants.exists(v ⇒ {
val toks = v.asScala.filter(_.getId == "xyz")
@@ -59,11 +61,12 @@ class NJiggleModel extends NCAbstractTokensModel {
checkSparsity(sparsity, toks)
true
- case _ ⇒ false
+ case _ ⇒
+ false
}
- })
+ }),
+ s"Variant with 3 `xyz` tokens should be exists."
)
- }
ctx.getRequest.getNormalizedText match {
case "x y z x y z x y z" ⇒ checkOneVariant(0)
@@ -80,8 +83,8 @@ class NJiggleModel extends NCAbstractTokensModel {
}
}
-@NCTestEnvironment(model = classOf[NJiggleModel], startClient = true)
-class NCJiggleSpec extends NCTestContext {
+@NCTestEnvironment(model = classOf[NCSparseModel], startClient = true)
+class NCSparseSpec extends NCTestContext {
@Test
def test(): Unit = {
checkResult("x y z x y z x y z", "OK")
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/synonyms/NCSynonymsSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/synonyms/NCSynonymsSpec.scala
index d5154c8..91c8eb6 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/synonyms/NCSynonymsSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/synonyms/NCSynonymsSpec.scala
@@ -23,11 +23,7 @@ import org.junit.jupiter.api.Test
import java.util
-class NCSynonymsSpecModel extends NCModelAdapter("nlpcraft.syns.test.mdl",
"Synonyms Test Model", "1.0") {
- // Default values.
- override def isPermutateSynonyms: Boolean = true
- override def getJiggleFactor: Int = 4
-
+abstract class NCSynonymsSpecModel extends
NCModelAdapter("nlpcraft.syns.test.mdl", "Synonyms Test Model", "1.0") {
override def getElements: util.Set[NCElement] =
Set(
NCTestElement("e1", "A"),
@@ -55,17 +51,58 @@ class NCSynonymsSpecModel extends
NCModelAdapter("nlpcraft.syns.test.mdl", "Syno
def onE4(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
}
-@NCTestEnvironment(model = classOf[NCSynonymsSpecModel], startClient = true)
class NCSynonymsSpec extends NCTestContext {
- @Test
- def test(): Unit = {
+ def body(testNonDir: Boolean): Unit = {
checkIntent("A", "onE1")
-
checkIntent("X Y Z", "onE2") // Text direct.
- checkIntent("Y X Z", "onE2") // Text not direct.
-
+ if (testNonDir)
+ checkIntent("Y X Z", "onE2") // Text not direct.
checkIntent("AA AA AA", "onE3") // Regex.
-
checkIntent("A A A", "onE4") // Nested.
}
}
+
+class NCSynonymsSpecModel1 extends NCSynonymsSpecModel {
+ override def isPermutateSynonyms: Boolean = true
+ override def isSparse: Boolean = true
+}
+
+@NCTestEnvironment(model = classOf[NCSynonymsSpecModel1], startClient = true)
+class NCSynonymsSpec1 extends NCSynonymsSpec {
+ @Test
+ def test(): Unit = body(testNonDir = true)
+}
+
+class NCSynonymsSpecModel2 extends NCSynonymsSpecModel {
+ override def isPermutateSynonyms: Boolean = false
+ override def isSparse: Boolean = true
+}
+
+@NCTestEnvironment(model = classOf[NCSynonymsSpecModel2], startClient = true)
+class NCSynonymsSpec2 extends NCSynonymsSpec {
+ @Test
+ def test(): Unit = body(testNonDir = false)
+}
+
+class NCSynonymsSpecModel3 extends NCSynonymsSpecModel {
+ override def isPermutateSynonyms: Boolean = true
+ override def isSparse: Boolean = false
+}
+
+@NCTestEnvironment(model = classOf[NCSynonymsSpecModel3], startClient = true)
+class NCSynonymsSpec3 extends NCSynonymsSpec {
+ @Test
+ def test(): Unit = body(testNonDir = true)
+}
+
+class NCSynonymsSpecModel4 extends NCSynonymsSpecModel {
+ override def isPermutateSynonyms: Boolean = false
+ override def isSparse: Boolean = false
+}
+
+@NCTestEnvironment(model = classOf[NCSynonymsSpecModel4], startClient = true)
+class NCSynonymsSpec4 extends NCSynonymsSpec {
+ @Test
+ def test(): Unit = body(testNonDir = false)
+}
+
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
index 54f42f4..91d037b 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala
@@ -18,7 +18,7 @@
package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.model
import org.apache.nlpcraft.model.NCElement
-import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCDefaultTestModel,
NCEnricherBaseSpec, NCTestUserToken ⇒ usr}
+import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCDefaultTestModel,
NCEnricherBaseSpec, NCTestUserToken ⇒ usr, NCTestNlpToken ⇒ nlp}
import org.apache.nlpcraft.{NCTestElement, NCTestEnvironment}
import org.junit.jupiter.api.Test
@@ -27,7 +27,10 @@ import java.util
/**
* Nested Elements test model.
*/
-class NCNestedTestModel extends NCDefaultTestModel {
+class NCNestedTestModel1 extends NCDefaultTestModel {
+ override def isPermutateSynonyms: Boolean = false
+ override def isSparse: Boolean = false
+
override def getElements: util.Set[NCElement] =
Set(
NCTestElement("x1", "{test|_} ^^{tok_id() == 'nlpcraft:date'}^^"),
@@ -42,8 +45,8 @@ class NCNestedTestModel extends NCDefaultTestModel {
/**
* Nested elements model enricher test.
*/
-@NCTestEnvironment(model = classOf[NCNestedTestModel], startClient = true)
-class NCEnricherNestedModelSpec extends NCEnricherBaseSpec {
+@NCTestEnvironment(model = classOf[NCNestedTestModel1], startClient = true)
+class NCEnricherNestedModelSpec1 extends NCEnricherBaseSpec {
@Test
def test(): Unit =
runBatch(
@@ -62,3 +65,48 @@ class NCEnricherNestedModelSpec extends NCEnricherBaseSpec {
)
)
}
+
+class NCNestedTestModel2 extends NCNestedTestModel1 {
+ override def isPermutateSynonyms: Boolean = true
+ override def isSparse: Boolean = true
+}
+
+/**
+ * Nested elements model enricher test.
+ */
+@NCTestEnvironment(model = classOf[NCNestedTestModel2], startClient = true)
+class NCEnricherNestedModelSpec2 extends NCEnricherNestedModelSpec1 {
+ @Test
+ def test2(): Unit =
+ runBatch(
+ _ ⇒ checkExists(
+ "test tomorrow",
+ usr(text = "test tomorrow", id = "x3")
+ ),
+ _ ⇒ checkExists(
+ "tomorrow test",
+ usr(text = "tomorrow test", id = "x3")
+ ),
+ _ ⇒ checkExists(
+ "test xxx tomorrow",
+ usr(text = "test tomorrow", id = "x3"),
+ nlp(text = "xxx"),
+ ),
+ _ ⇒ checkExists(
+ "y the y",
+ usr(text = "y y", id = "y3"),
+ nlp(text = "the", isStop = true)
+ ),
+ _ ⇒ checkExists(
+ "y xxx y",
+ usr(text = "y y", id = "y3"),
+ nlp(text = "xxx")
+ ),
+ _ ⇒ checkExists(
+ "aaa y xxx y",
+ nlp(text = "aaa"),
+ usr(text = "y y", id = "y3"),
+ nlp(text = "xxx")
+ )
+ )
+}
\ No newline at end of file
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
index 1ad05e8..a27a5b4 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala
@@ -27,8 +27,11 @@ import java.util
* Nested Elements test model.
*/
class NCNestedTestModel21 extends NCModelAdapter("nlpcraft.nested2.test.mdl",
"Nested Test Model", "1.0") {
- override def getElements: util.Set[NCElement] =
+ override def getElements: util.Set[NCElement] = {
+ // Note - it defines one simple and one DSL synonyms.
+ // But it should be caught by long (IDL) variant (for `10 word`)
Set(NCTestElement("e1", "{^^{tok_id() == 'nlpcraft:num'}^^|_} word"))
+ }
@NCIntent("intent=onE1 term(t1)={tok_id() == 'e1'}")
def onAB(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
@@ -37,7 +40,7 @@ class NCNestedTestModel21 extends
NCModelAdapter("nlpcraft.nested2.test.mdl", "N
def onNumAndE1(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
override def isPermutateSynonyms: Boolean = false
- override def getJiggleFactor: Int = 0
+ override def isSparse: Boolean = false
}
/**
@@ -58,7 +61,7 @@ class NCEnricherNestedModelSpec21 extends NCTestContext {
*/
class NCNestedTestModel22 extends NCNestedTestModel21 {
override def isPermutateSynonyms: Boolean = true
- override def getJiggleFactor: Int = 4
+ override def isSparse: Boolean = true
}
/**
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec41.scala
similarity index 72%
rename from
nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
rename to
nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec41.scala
index 43320e7..9f5d13f 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec4.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec41.scala
@@ -27,7 +27,7 @@ import scala.collection.JavaConverters._
/**
* Nested Elements test model.
*/
-class NCNestedTestModel4 extends NCModelAdapter(
+class NCNestedTestModel41 extends NCModelAdapter(
"nlpcraft.nested4.test.mdl", "Nested Data Test Model", "1.0"
) {
override def getElements: util.Set[NCElement] =
@@ -41,13 +41,31 @@ class NCNestedTestModel4 extends NCModelAdapter(
@NCIntent("intent=onE2 term(t1)={tok_id() == 'e2'}[8, 100]")
def onAB(ctx: NCIntentMatch): NCResult = NCResult.text("OK")
+
+ override def isPermutateSynonyms: Boolean = false
+ override def isSparse: Boolean = false
}
/**
* It shouldn't be too slow.
*/
-@NCTestEnvironment(model = classOf[NCNestedTestModel4], startClient = true)
-class NCEnricherNestedModelSpec4 extends NCTestContext {
+@NCTestEnvironment(model = classOf[NCNestedTestModel41], startClient = true)
+class NCEnricherNestedModelSpec41 extends NCTestContext {
@Test
def test(): Unit = checkIntent("the a " * 11, "onE2")
-}
\ No newline at end of file
+}
+
+class NCNestedTestModel42 extends NCNestedTestModel41 {
+ override def isPermutateSynonyms: Boolean = true
+ override def isSparse: Boolean = true
+}
+
+/**
+ * It shouldn't be too slow.
+ */
+@NCTestEnvironment(model = classOf[NCNestedTestModel42], startClient = true)
+class NCEnricherNestedModelSpec42 extends NCTestContext {
+ @Test
+ def test(): Unit = checkIntent("the a " * 8, "onE2")
+}
+
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
index cc03066..8f24288 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala
@@ -17,15 +17,31 @@
package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.sort
-import org.apache.nlpcraft.NCTestEnvironment
+import org.apache.nlpcraft.{NCTestElement, NCTestEnvironment}
+import org.apache.nlpcraft.model.NCElement
import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.NCTestSortTokenType._
-import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCDefaultTestModel,
NCEnricherBaseSpec, NCTestNlpToken ⇒ nlp, NCTestSortToken ⇒ srt,
NCTestUserToken ⇒ usr}
+import org.apache.nlpcraft.probe.mgrs.nlp.enrichers.{NCDefaultTestModel,
NCEnricherBaseSpec, NCTestNlpToken => nlp, NCTestSortToken => srt,
NCTestUserToken => usr}
import org.junit.jupiter.api.Test
+import java.util
+import scala.collection.JavaConverters._
+
+class NCDefaultSpecTestModel extends NCDefaultTestModel {
+ override def getElements: util.Set[NCElement] = {
+ (
+ super.getElements.asScala ++
+ Set(NCTestElement("wrapperA", "^^{tok_id() == 'A'}^^ ^^{tok_id()
== 'A'}^^ ^^{tok_id() == 'A'}^^"))
+ ).asJava
+ }
+
+ override def isPermutateSynonyms: Boolean = true
+ override def isSparse: Boolean = true
+}
+
/**
* Sort enricher test.
*/
-@NCTestEnvironment(model = classOf[NCDefaultTestModel], startClient = true)
+@NCTestEnvironment(model = classOf[NCDefaultSpecTestModel], startClient = true)
class NCEnricherSortSpec extends NCEnricherBaseSpec {
/**
*
@@ -204,6 +220,183 @@ class NCEnricherSortSpec extends NCEnricherBaseSpec {
nlp(text = ",", isStop = true),
usr(text = "B", id = "B"),
nlp(text = ", asc", isStop = true)
+ ),
+ _ ⇒ checkExists(
+ "sort A",
+ srt(text = "sort", typ = SUBJ_ONLY, note = "A", index = 1),
+ usr("A", "A")
+ ),
+ _ ⇒ checkExists(
+ "sort A by A",
+ srt(text = "sort", subjNote = "A", subjIndex = 1, byNote =
"A", byIndex = 3),
+ usr(text = "A", id = "A"),
+ nlp(text = "by", isStop = true),
+ usr(text = "A", id = "A")
+ ),
+ _ ⇒ checkExists(
+ "sort A, C by A, C",
+ srt(text = "sort", subjNotes = Seq("A", "C"), subjIndexes =
Seq(1, 3), byNotes = Seq("A", "C"), byIndexes = Seq(5, 7)),
+ usr(text = "A", id = "A"),
+ nlp(text = ",", isStop = true),
+ usr(text = "C", id = "C"),
+ nlp(text = "by", isStop = true),
+ usr(text = "A", id = "A"),
+ nlp(text = ",", isStop = true),
+ usr(text = "C", id = "C")
+ ),
+ _ ⇒ checkExists(
+ "sort A C by A C",
+ srt(text = "sort", subjNotes = Seq("A", "C"), subjIndexes =
Seq(1, 2), byNotes = Seq("A", "C"), byIndexes = Seq(4, 5)),
+ usr(text = "A", id = "A"),
+ usr(text = "C", id = "C"),
+ nlp(text = "by", isStop = true),
+ usr(text = "A", id = "A"),
+ usr(text = "C", id = "C")
+ ),
+ _ ⇒ checkExists(
+ "sort A B by A B",
+ srt(text = "sort", subjNotes = Seq("A", "B"), subjIndexes =
Seq(1, 2), byNotes = Seq("A", "B"), byIndexes = Seq(4, 5)),
+ usr(text = "A", id = "A"),
+ usr(text = "B", id = "B"),
+ nlp(text = "by", isStop = true),
+ usr(text = "A", id = "A"),
+ usr(text = "B", id = "B")
+ ),
+ _ ⇒ checkExists(
+ "sort A B by A B",
+ srt(text = "sort", subjNote = "AB", subjIndex = 1, byNote =
"AB", byIndex = 3),
+ usr(text = "A B", id = "AB"),
+ nlp(text = "by", isStop = true),
+ usr(text = "A B", id = "AB")
+ ),
+ _ ⇒ checkExists(
+ "A classify",
+ usr(text = "A", id = "A"),
+ srt(text = "classify", typ = SUBJ_ONLY, note = "A", index = 0)
+ ),
+ _ ⇒ checkExists(
+ "the A the classify",
+ nlp(text = "the", isStop = true),
+ usr(text = "A", id = "A"),
+ nlp(text = "the", isStop = true),
+ srt(text = "classify", typ = SUBJ_ONLY, note = "A", index = 1)
+ ),
+ _ ⇒ checkExists(
+ "segment A by top down",
+ srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 1,
asc = false),
+ usr(text = "A", id = "A"),
+ nlp(text = "by top down", isStop = true)
+ ),
+ _ ⇒ checkExists(
+ "segment A in bottom up order",
+ srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 1,
asc = true),
+ usr(text = "A", id = "A"),
+ nlp(text = "in bottom up order", isStop = true)
+ ),
+ // `by` is redundant word here
+ _ ⇒ checkExists(
+ "segment A by in bottom up order",
+ srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 1),
+ usr(text = "A", id = "A"),
+ nlp(text = "by"),
+ nlp(text = "in"),
+ nlp(text = "bottom"),
+ nlp(text = "up"),
+ nlp(text = "order")
+ ),
+ _ ⇒ checkExists(
+ "the segment the A the in bottom up the order the",
+ nlp(text = "the", isStop = true),
+ srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 3,
asc = true),
+ nlp(text = "the", isStop = true),
+ usr(text = "A", id = "A"),
+ nlp(text = "the in bottom up the order the", isStop = true)
+ ),
+ _ ⇒ checkExists(
+ "the segment the A the by bottom up the order the",
+ nlp(text = "the", isStop = true),
+ srt(text = "segment", typ = SUBJ_ONLY, note = "A", index = 3,
asc = true),
+ nlp(text = "the", isStop = true),
+ usr(text = "A", id = "A"),
+ nlp(text = "the by bottom up the order the", isStop = true)
+ ),
+ _ ⇒ checkExists(
+ "A classify",
+ usr(text = "A", id = "A"),
+ srt(text = "classify", typ = SUBJ_ONLY, note = "A", index = 0)
+ ),
+ _ ⇒ checkAll(
+ "A B classify",
+ Seq(
+ usr(text = "A B", id = "AB"),
+ srt(text = "classify", typ = SUBJ_ONLY, note = "AB", index
= 0)
+ ),
+ Seq(
+ usr(text = "A", id = "A"),
+ usr(text = "B", id = "B"),
+ srt(text = "classify", subjNotes = Seq("A", "B"),
subjIndexes = Seq(0, 1))
+ ),
+ Seq(
+ usr(text = "A", id = "A"),
+ usr(text = "B", id = "B"),
+ srt(text = "classify", subjNotes = Seq("B"), subjIndexes =
Seq(1))
+ )
+ ),
+ _ ⇒ checkAll(
+ "D classify",
+ Seq(
+ usr(text = "D", id = "D1"),
+ srt(text = "classify", typ = SUBJ_ONLY, note = "D1", index
= 0)
+ ),
+ Seq(
+ usr(text = "D", id = "D2"),
+ srt(text = "classify", typ = SUBJ_ONLY, note = "D2", index
= 0)
+ )
+ ),
+ _ ⇒ checkAll(
+ "sort by A",
+ Seq(
+ srt(text = "sort by", typ = BY_ONLY, note = "A", index =
1),
+ usr(text = "A", id = "A")
+ )
+ ),
+ _ ⇒ checkExists(
+ "organize by A, B top down",
+ srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes =
Seq(1, 3), asc = Some(false)),
+ usr(text = "A", id = "A"),
+ nlp(text = ",", isStop = true),
+ usr(text = "B", id = "B"),
+ nlp(text = "top down", isStop = true)
+ ),
+ _ ⇒ checkExists(
+ "organize by A, B from bottom up order",
+ srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes =
Seq(1, 3), asc = Some(true)),
+ usr(text = "A", id = "A"),
+ nlp(text = ",", isStop = true),
+ usr(text = "B", id = "B"),
+ nlp(text = "from bottom up order", isStop = true)
+ ),
+ _ ⇒ checkExists(
+ "organize by A, B the descending",
+ srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes =
Seq(1, 3), asc = Some(false)),
+ usr(text = "A", id = "A"),
+ nlp(text = ",", isStop = true),
+ usr(text = "B", id = "B"),
+ nlp(text = "the descending", isStop = true)
+ ),
+ _ ⇒ checkExists(
+ "organize by A, B, asc",
+ srt(text = "organize by", byNotes = Seq("A", "B"), byIndexes =
Seq(1, 3), asc = Some(true)),
+ usr(text = "A", id = "A"),
+ nlp(text = ",", isStop = true),
+ usr(text = "B", id = "B"),
+ nlp(text = ", asc", isStop = true)
+ ),
+ _ ⇒ checkExists(
+ "sort A the A the A",
+ srt(text = "sort", typ = SUBJ_ONLY, note = "wrapperA", index =
1),
+ usr("A A A", "wrapperA"),
+ nlp("the the", isStop = true)
)
)
}