This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-287 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 971c896ee1c613437a5232a04aeea3f917ecc368 Author: Sergey Kamov <[email protected]> AuthorDate: Thu Apr 1 13:12:16 2021 +0300 WIP. --- .../scala/org/apache/nlpcraft/model/NCElement.java | 30 +-------- .../apache/nlpcraft/model/NCModelFileAdapter.java | 8 +-- .../org/apache/nlpcraft/model/NCModelView.java | 44 ++----------- .../nlpcraft/model/impl/json/NCElementJson.java | 10 +-- .../nlpcraft/model/impl/json/NCModelJson.java | 10 +-- .../probe/mgrs/deploy/NCDeployManager.scala | 16 ----- .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 76 +--------------------- .../model/abstract/NCAbstractTokensModel.scala | 2 +- .../nlpcraft/model/jiggle/NCJiggleSpec.scala | 2 +- .../model/properties/NCTokensPropertiesSpec.scala | 18 ++--- .../nlpcraft/model/synonyms/NCSynonymsSpec.scala | 2 +- .../model/NCEnricherNestedModelSpec2.scala | 4 +- 12 files changed, 39 insertions(+), 183 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java index f24bb1f..7c88864 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCElement.java @@ -357,34 +357,8 @@ public interface NCElement extends NCMetadata, Serializable { return Optional.empty(); } - /** - * Measure of how much sparsity is allowed when user input words are permutated in attempt to - * match the multi-word synonyms. Zero means no reordering is allowed. One means - * that a word in a synonym can move only one position left or right, and so on. Empirically - * the value of {@code 2} proved to be a good default value in most cases. Note that larger - * values mean that synonym words can be almost in any random place in the user input which makes - * synonym matching practically meaningless. Maximum value is <code>4</code>. - * <p> - * This property overrides the value from {@link NCModelView#getJiggleFactor()} ()}. - * One should use this property if model's value isn't applicable to this element. - * <p> - * <b>JSON</b> - * <br> - * If using JSON/YAML model presentation this is set by <code>jiggleFactor</code>: - * <pre class="brush: js, highlight: [4]"> - * "elements": [ - * { - * "id": "elem", - * "jiggleFactor": 1, - * ... - * } - * ] - * </pre> - * - * @return Optional word jiggle factor (sparsity measure) overriding model's one. - * @see NCModelView#getJiggleFactor() - */ - default Optional<Integer> getJiggleFactor() { + // TODO: + default Optional<Boolean> isSparse() { return Optional.empty(); } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java index c39b5f6..843aa2d 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelFileAdapter.java @@ -353,8 +353,8 @@ abstract public class NCModelFileAdapter extends NCModelAdapter { } @Override - public Optional<Integer> getJiggleFactor() { - return nvl(js.getJiggleFactor(), proxy.getJiggleFactor()); + public Optional<Boolean> isSparse() { + return nvl(js.isSparse(), proxy.isSparse()); } private<T> Optional<T> nvl(T t, T dflt) { @@ -479,8 +479,8 @@ abstract public class NCModelFileAdapter extends NCModelAdapter { } @Override - public int getJiggleFactor() { - return proxy.getJiggleFactor(); + public boolean isSparse() { + return proxy.isSparse(); } @Override diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java index 8f9849a..b0a2ff1 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/NCModelView.java @@ -124,16 +124,6 @@ public interface NCModelView extends NCMetadata { long MAX_WORDS_MAX = 100L; /** - * Min value for {@link #getJiggleFactor()} method. - */ - long JIGGLE_FACTOR_MIN = 0L; - - /** - * Max value for {@link #getJiggleFactor()} method. - */ - long JIGGLE_FACTOR_MAX = 4L; - - /** * Min value for {@link #getMaxElementSynonyms()} method. */ long MAX_SYN_MIN = 1L; @@ -169,9 +159,9 @@ public interface NCModelView extends NCMetadata { int MODEL_VERSION_MAXLEN = 16; /** - * Default value for {@link #getJiggleFactor()} method. + * Default value for {@link #isSparse()} method. */ - int DFLT_JIGGLE_FACTOR = 2; + boolean DFLT_IS_SPARSE = false; /** * Default value for {@link #getMaxElementSynonyms()} method. @@ -199,7 +189,7 @@ public interface NCModelView extends NCMetadata { int DFLT_CONV_DEPTH = 3; /** - * Default value for {@link #getJiggleFactor()} method. + * TODO: */ Map<String, Object> DFLT_METADATA = new HashMap<>(); @@ -266,7 +256,7 @@ public interface NCModelView extends NCMetadata { /** * Default value for {@link #isPermutateSynonyms()} method. */ - boolean DFLT_IS_PERMUTATE_SYNONYMS = true; + boolean DFLT_IS_PERMUTATE_SYNONYMS = false; /** * Default value for {@link #isDupSynonymsAllowed()} method. @@ -791,30 +781,10 @@ public interface NCModelView extends NCMetadata { } /** - * Measure of how much sparsity is allowed when user input words are permutated in attempt to - * match the multi-word synonyms. Zero means no reordering is allowed. One means - * that a word in a synonym can move only one position left or right, and so on. Empirically - * the value of {@code 2} proved to be a good default value in most cases. Note that larger - * values mean that synonym words can be almost in any random place in the user input which makes - * synonym matching practically meaningless. Maximum value is <code>4</code>. - * <p> - * <b>Default</b> - * <br> - * If not provided by the model the default value {@link #DFLT_JIGGLE_FACTOR} will be used. - * <p> - * <b>JSON</b> - * <br> - * If using JSON/YAML model presentation this is set by <code>jiggleFactor</code> property: - * <pre class="brush: js"> - * { - * "jiggleFactor": 2 - * } - * </pre> - * - * @return Word jiggle factor (sparsity measure). + * TODO: */ - default int getJiggleFactor() { - return DFLT_JIGGLE_FACTOR; + default boolean isSparse() { + return DFLT_IS_SPARSE; } /** diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java index 7419938..addca45 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCElementJson.java @@ -35,7 +35,7 @@ public class NCElementJson { // Can be null. private Boolean isPermutateSynonyms; // Can be null. - private Integer jiggleFactor; + private Boolean isSparse; public String getParentId() { return parentId; @@ -91,10 +91,10 @@ public class NCElementJson { public void setPermutateSynonyms(Boolean permutateSynonyms) { isPermutateSynonyms = permutateSynonyms; } - public Integer getJiggleFactor() { - return jiggleFactor; + public Boolean isSparse() { + return isSparse; } - public void setJiggleFactor(Integer jiggleFactor) { - this.jiggleFactor = jiggleFactor; + public void setSparse(Boolean sparse) { + isSparse = sparse; } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java index 3040c60..d2459d3 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/impl/json/NCModelJson.java @@ -54,10 +54,10 @@ public class NCModelJson { private boolean isSwearWordsAllowed = DFLT_IS_SWEAR_WORDS_ALLOWED; private boolean isNoNounsAllowed = DFLT_IS_NO_NOUNS_ALLOWED; private boolean isNoUserTokensAllowed = DFLT_IS_NO_USER_TOKENS_ALLOWED; - private int jiggleFactor = DFLT_JIGGLE_FACTOR; private boolean isDupSynonymsAllowed = DFLT_IS_DUP_SYNONYMS_ALLOWED; private int maxTotalSynonyms = DFLT_MAX_TOTAL_SYNONYMS; private boolean isPermutateSynonyms = DFLT_IS_PERMUTATE_SYNONYMS; + private boolean isSparse = DFLT_IS_SPARSE; private int maxElementSynonyms = DFLT_MAX_TOTAL_SYNONYMS; private boolean maxSynonymsThresholdError = DFLT_MAX_SYNONYMS_THRESHOLD_ERROR; private long conversationTimeout = DFLT_CONV_TIMEOUT_MS; @@ -199,11 +199,11 @@ public class NCModelJson { public void setNoUserTokensAllowed(boolean noUserTokensAllowed) { isNoUserTokensAllowed = noUserTokensAllowed; } - public int getJiggleFactor() { - return jiggleFactor; + public boolean isSparse() { + return isSparse; } - public void setJiggleFactor(int jiggleFactor) { - this.jiggleFactor = jiggleFactor; + public void setSparse(boolean sparse) { + isSparse = sparse; } public boolean isDupSynonymsAllowed() { return isDupSynonymsAllowed; diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala index 687a2d7..46f3ef7 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala @@ -154,21 +154,6 @@ object NCDeployManager extends NCService with DecorateAsScala { s"regex=$ID_REGEX" + s"]" ) - - elm.getJiggleFactor.asScala match { - case Some(elemJiggleFactor) ⇒ - if (elemJiggleFactor < JIGGLE_FACTOR_MIN || elemJiggleFactor > JIGGLE_FACTOR_MAX) - throw new NCE( - s"Model element 'jiggleFactor' property is out of range [" + - s"mdlId=$mdlId, " + - s"elm=${elm.getId}, " + - s"value=$elemJiggleFactor," + - s"min=$JIGGLE_FACTOR_MIN, " + - s"max=$JIGGLE_FACTOR_MAX" + - s"]" - ) - case None ⇒ // No-op. - } } checkMacros(mdl) @@ -903,7 +888,6 @@ object NCDeployManager extends NCService with DecorateAsScala { checkNum(mdl.getMinTokens, "minTokens", MIN_TOKENS_MIN, MIN_TOKENS_MAX) checkNum(mdl.getMaxTokens, "maxTokens", MAX_TOKENS_MIN, MAX_TOKENS_MAX) checkNum(mdl.getMaxWords, "maxWords", MAX_WORDS_MIN, MAX_WORDS_MAX) - checkNum(mdl.getJiggleFactor, "jiggleFactor", JIGGLE_FACTOR_MIN, JIGGLE_FACTOR_MAX) checkNum(mdl.getMaxElementSynonyms, "maxSynonymsThreshold", MAX_SYN_MIN, MAX_SYN_MAX) checkNum(mdl.getConversationDepth, "conversationDepth", CONV_DEPTH_MIN, CONV_DEPTH_MAX) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala index 1233c31..ec2caa1 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala @@ -184,68 +184,6 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { } /** - * Returns an iterator of tokens arrays where each token is jiggled left and right by given factor. - * Note that only one token is jiggled at a time. - * - * @param ns NLP sentence to jiggle. - * @param factor Distance of left or right jiggle, i.e. how far can an individual token move - * left or right in the sentence. - */ - private def jiggle(ns: NCNlpSentenceTokenBuffer, factor: Int): Iterator[NCNlpSentenceTokenBuffer] = { - require(factor >= 0) - - if (ns.isEmpty) - Iterator.empty - else if (factor == 0) - Iterator.apply(ns) - else - new Iterator[NCNlpSentenceTokenBuffer] { - private val min = -factor - private val max = factor - private val sz = ns.size - - private var i = 0 // Token index. - private var d = 0 // Jiggle amount [min, max]. - private var isNext = sz > 0 - - private def calcNext(): Unit = { - isNext = false - d += 1 - - while (i < sz && !isNext) { - while (d <= max && !isNext) { - val p = i + d - - if (p >= 0 && p < sz) // Valid new position? - isNext = true - else - d += 1 - } - if (!isNext) { - d = min - i += 1 - } - } - } - - override def hasNext: Boolean = isNext - - override def next(): NCNlpSentenceTokenBuffer = { - require(isNext) - - val buf = NCNlpSentenceTokenBuffer(ns) - - if (d != 0) - buf.insert(i + d, buf.remove(i)) // Jiggle. - - calcNext() - - buf - } - } - } - - /** * * @param ns * @param elem @@ -345,11 +283,6 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, "txt" → ns.text) { span ⇒ - val elemsFactors = mdl.elements.values.flatMap(_.getJiggleFactor.asScala).toSeq - val elemsMaxFactor: Int = if (elemsFactors.nonEmpty) elemsFactors.max else 0 - - val maxJiggleFactor = Math.max(mdl.model.getJiggleFactor, elemsMaxFactor) - val cache = mutable.HashSet.empty[Seq[Int]] val matches = ArrayBuffer.empty[ElementMatch] @@ -459,10 +392,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { syn: NCProbeSynonym, parts: Seq[(NCToken, NCSynonymChunkKind)] ): Unit = - if ( - (elm.getJiggleFactor.isEmpty || elm.getJiggleFactor.get() >= sparsity) && - !matches.exists(m ⇒ m.element == elm && m.isSubSet(toks.toSet)) - ) { + if (!matches.exists(m ⇒ m.element == elm && m.isSubSet(toks.toSet))) { found = true matches += ElementMatch(elm, toks, syn, parts) @@ -528,9 +458,7 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, "txt" → ns.text) { _ ⇒ - // Iterate over depth-limited permutations of the original sentence with and without stopwords. - jiggle(ns, maxJiggleFactor).foreach(procPerm) - jiggle(NCNlpSentenceTokenBuffer(ns.filter(!_.isStopWord)), maxJiggleFactor).foreach(procPerm) + procPerm(ns) } if (DEEP_DEBUG) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensModel.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensModel.scala index cae42e0..9216473 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensModel.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/abstract/NCAbstractTokensModel.scala @@ -37,5 +37,5 @@ class NCAbstractTokensModel extends NCModelAdapter( override def getAbstractTokens: util.Set[String] = Set("nlpcraft:num", "anyWord").asJava override def isPermutateSynonyms: Boolean = false - override def getJiggleFactor: Int = 0 + override def isSparse: Boolean = false } diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala index 6f0ed53..04900ca 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/jiggle/NCJiggleSpec.scala @@ -31,7 +31,7 @@ class NJiggleModel extends NCAbstractTokensModel { // Default values. override def isPermutateSynonyms: Boolean = true - override def getJiggleFactor: Int = 4 + override def isSparse: Boolean = true override def onContext(ctx: NCContext): NCResult = { val variants = ctx.getVariants.asScala diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/properties/NCTokensPropertiesSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/properties/NCTokensPropertiesSpec.scala index f244901..3efb4a1 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/properties/NCTokensPropertiesSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/properties/NCTokensPropertiesSpec.scala @@ -38,7 +38,7 @@ abstract class NCTokenPropertiesModelAbstract extends NCModelAdapter( } case class NCPropTestElement( - id: String, synonym: String, permFlag: Option[Boolean] = None, jiggleFactor: Option[Int] = None + id: String, synonym: String, permFlag: Option[Boolean] = None, sparse: Option[Boolean] = None ) extends NCElement { override def getId: String = id override def getSynonyms: util.List[String] = util.Collections.singletonList(synonym) @@ -48,10 +48,10 @@ case class NCPropTestElement( case None ⇒ super.isPermutateSynonyms } - override def getJiggleFactor: Optional[Integer] = - jiggleFactor match { + override def isSparse: Optional[lang.Boolean] = + sparse match { case Some(v) ⇒ Optional.of(v) - case None ⇒ super.getJiggleFactor + case None ⇒ super.isSparse } } @@ -129,7 +129,7 @@ class NCTokenPropertiesModel3Spec extends NCTestContext { // 4. Jiggle factor turned off. class NCTokenPropertiesModel4 extends NCTokenPropertiesModelAbstract { - override def getJiggleFactor: Int = 0 + override def isSparse: Boolean = false } @NCTestEnvironment(model = classOf[NCTokenPropertiesModel4], startClient = true) @@ -159,7 +159,7 @@ class NCTokenPropertiesModel4Spec extends NCTestContext { class NCTokenPropertiesModel5 extends NCTokenPropertiesModelAbstract { override def getElements: util.Set[NCElement] = { val set: Set[NCElement] = Set( - NCPropTestElement("ab", "a b", permFlag = Some(false), jiggleFactor = Some(0)), + NCPropTestElement("ab", "a b", permFlag = Some(false), sparse = Some(false)), NCTestElement("xy", "x y") ) @@ -197,7 +197,7 @@ class NCTokenPropertiesModel5Spec extends NCTestContext { class NCTokenPropertiesModel6 extends NCTokenPropertiesModelAbstract { override def getElements: util.Set[NCElement] = { val set: Set[NCElement] = Set( - NCPropTestElement("ab", "a b", permFlag = Some(false), jiggleFactor = Some(1)), + NCPropTestElement("ab", "a b", permFlag = Some(false), sparse = Some(true)), NCTestElement("xy", "x y") ) @@ -233,7 +233,7 @@ class NCTokenPropertiesModel6Spec extends NCTestContext { class NCTokenPropertiesModel7 extends NCTokenPropertiesModelAbstract { override def getElements: util.Set[NCElement] = { val set: Set[NCElement] = Set( - NCPropTestElement("ab", "a b", jiggleFactor = Some(0)), + NCPropTestElement("ab", "a b", sparse = Some(false)), NCTestElement("xy", "x y") ) @@ -272,7 +272,7 @@ class NCTokenPropertiesModel7Spec extends NCTestContext { class NCTokenPropertiesModel8 extends NCTokenPropertiesModelAbstract { override def getElements: util.Set[NCElement] = { val set: Set[NCElement] = Set( - NCPropTestElement("ab", "a b", jiggleFactor = Some(1)), + NCPropTestElement("ab", "a b", sparse = Some(true)), NCTestElement("xy", "x y") ) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/synonyms/NCSynonymsSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/synonyms/NCSynonymsSpec.scala index d5154c8..ada03fa 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/synonyms/NCSynonymsSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/synonyms/NCSynonymsSpec.scala @@ -26,7 +26,7 @@ import java.util class NCSynonymsSpecModel extends NCModelAdapter("nlpcraft.syns.test.mdl", "Synonyms Test Model", "1.0") { // Default values. override def isPermutateSynonyms: Boolean = true - override def getJiggleFactor: Int = 4 + override def isSparse: Boolean = true override def getElements: util.Set[NCElement] = Set( diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala index 1ad05e8..ede9153 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec2.scala @@ -37,7 +37,7 @@ class NCNestedTestModel21 extends NCModelAdapter("nlpcraft.nested2.test.mdl", "N def onNumAndE1(ctx: NCIntentMatch): NCResult = NCResult.text("OK") override def isPermutateSynonyms: Boolean = false - override def getJiggleFactor: Int = 0 + override def isSparse: Boolean = false } /** @@ -58,7 +58,7 @@ class NCEnricherNestedModelSpec21 extends NCTestContext { */ class NCNestedTestModel22 extends NCNestedTestModel21 { override def isPermutateSynonyms: Boolean = true - override def getJiggleFactor: Int = 4 + override def isSparse: Boolean = true } /**
