[incubator-nlpcraft] branch NLPCRAFT-443 updated: WIP.

sergeykamov Sun, 19 Sep 2021 03:17:09 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-443 by this push:
     new 62e5abc  WIP.
62e5abc is described below

commit 62e5abc57acfd24fe955a21538a847a88fe062a5
Author: Sergey Kamov <[email protected]>
AuthorDate: Sun Sep 19 13:16:54 2021 +0300

    WIP.
---
 .../apache/nlpcraft/common/nlp/NCNlpSentenceTokenBuffer.scala |  9 +--------
 .../scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala | 11 +++++------
 2 files changed, 6 insertions(+), 14 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceTokenBuffer.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceTokenBuffer.scala
index a3d1156..3034a5e 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceTokenBuffer.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentenceTokenBuffer.scala
@@ -26,12 +26,6 @@ import scala.language.implicitConversions
   * @param tokens Initial buffer.
   */
 class NCNlpSentenceTokenBuffer(val tokens: ArrayBuffer[NCNlpSentenceToken] = 
new ArrayBuffer[NCNlpSentenceToken](16)) extends java.io.Serializable {
-    /** Stringified stems. */
-    lazy val stems: String = tokens.map(_.stem).mkString(" ")
-
-    /** Stem-based hashcode. */
-    lazy val stemsHash: Int = stems.hashCode()
-
     type SSOT = IndexedSeq[IndexedSeq[Option[NCNlpSentenceToken]]]
     type SST = IndexedSeq[IndexedSeq[NCNlpSentenceToken]]
 
@@ -113,8 +107,7 @@ class NCNlpSentenceTokenBuffer(val tokens: 
ArrayBuffer[NCNlpSentenceToken] = new
 
 object NCNlpSentenceTokenBuffer {
     implicit def toTokens(x: NCNlpSentenceTokenBuffer): 
ArrayBuffer[NCNlpSentenceToken] = x.tokens
-    implicit def toBuf( toks: Iterable[NCNlpSentenceToken]): 
NCNlpSentenceTokenBuffer = apply(toks)
 
-    def apply(toks: Iterable[NCNlpSentenceToken]): NCNlpSentenceTokenBuffer =
+    def apply(toks: Seq[NCNlpSentenceToken]): NCNlpSentenceTokenBuffer =
         new NCNlpSentenceTokenBuffer(new 
ArrayBuffer[NCNlpSentenceToken](toks.size) ++ toks)
 }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
index c370738..809c4e5 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCProbeSynonym.scala
@@ -18,7 +18,7 @@
 package org.apache.nlpcraft.probe.mgrs
 
 import org.apache.nlpcraft.common.U
-import org.apache.nlpcraft.common.nlp.{NCNlpSentenceToken, 
NCNlpSentenceTokenBuffer}
+import org.apache.nlpcraft.common.nlp.NCNlpSentenceToken
 import org.apache.nlpcraft.model._
 import org.apache.nlpcraft.model.intent.NCIdlContext
 import org.apache.nlpcraft.probe.mgrs.NCProbeSynonym.NCIdlContent
@@ -54,7 +54,6 @@ class NCProbeSynonym(
     lazy val hasIdl: Boolean = idlChunks != 0
     lazy val isValueSynonym: Boolean = value != null
     lazy val stems: String = map(_.wordStem).mkString(" ")
-    lazy val stemsHash: Int = stems.hashCode
 
     /**
       *
@@ -171,13 +170,13 @@ class NCProbeSynonym(
       *
       * @param toks
       */
-    def isMatch(toks: NCNlpSentenceTokenBuffer): Boolean = {
+    def isMatch(toks: Seq[NCNlpSentenceToken]): Boolean = {
         require(toks != null)
         require(!sparse && !hasIdl)
 
         if (toks.length == length) {
             if (isTextOnly)
-                toks.stemsHash == stemsHash && toks.stems == stems
+                toks.zip(this).forall(p => p._1.stem == p._2.wordStem)
             else
                 toks.zip(this).sortBy(p => getSort(p._2.kind)).forall { case 
(tok, chunk) => isMatch(tok, chunk) }
         }
@@ -204,11 +203,11 @@ class NCProbeSynonym(
       *
       * @param toks
       */
-    def sparseMatch(toks: NCNlpSentenceTokenBuffer): 
Option[Seq[NCNlpSentenceToken]] = {
+    def sparseMatch(toks: Seq[NCNlpSentenceToken]): 
Option[Seq[NCNlpSentenceToken]] = {
         require(toks != null)
         require(sparse && !hasIdl)
 
-        sparseMatch0(toks.toSeq, isMatch, (t: NCNlpSentenceToken) => 
t.startCharIndex, shouldBeNeighbors = false)
+        sparseMatch0(toks, isMatch, (t: NCNlpSentenceToken) => 
t.startCharIndex, shouldBeNeighbors = false)
     }
 
     /**

[incubator-nlpcraft] branch NLPCRAFT-443 updated: WIP.

Reply via email to