This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-483-1-1
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-483-1-1 by this push:
     new 649904e  WIP.
649904e is described below

commit 649904ed3210525eb09e6f5219d2c56923357041
Author: Sergey Kamov <skhdlem...@gmail.com>
AuthorDate: Fri Mar 11 14:18:08 2022 +0300

    WIP.
---
 .../parser/impl/NCSemanticEntityParserImpl.scala   | 31 ++++++++++++----------
 1 file changed, 17 insertions(+), 14 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/impl/NCSemanticEntityParserImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/impl/NCSemanticEntityParserImpl.scala
index 9fdad68..7de4cde 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/impl/NCSemanticEntityParserImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/mult/entity/parser/impl/NCSemanticEntityParserImpl.scala
@@ -228,9 +228,11 @@ class NCSemanticEntityParserImpl(
                 Map.empty
 
         val cache = mutable.HashSet.empty[Seq[Int]] // Variants (tokens 
without stopwords) can be repeated.
+
         case class Holder(elemId: String, tokens: Seq[NCToken], value: 
Option[String]):
             private val idxs = tokens.map(_.getIndex).toSet
             def isSuperSet(toks: Seq[NCToken]): Boolean = idxs.size > 
toks.size && toks.map(_.getIndex).toSet.subsetOf(idxs)
+
         val hs = mutable.ArrayBuffer.empty[Holder]
 
         for (piece <- getPieces(toks) if 
!hs.exists(_.isSuperSet(piece.baseTokens));
@@ -253,20 +255,21 @@ class NCSemanticEntityParserImpl(
                                         elems.foreach(elem => 
add(elem.elementId, elem.value))
                                     case None => // No-op.
                         // With regex.
-                        if !found then
-                            for ((elemId, syns) <- 
synsHolder.mixedSynonyms.getOrElse(variant.size, Seq.empty))
-                                for (s <- syns if !found)
-                                    found = s.chunks.zip(variant).
-                                        sortBy { (chunk, _) => if chunk.isText 
then 0 else 1 }.
-                                        forall { (chunk, tok) =>
-                                            if chunk.isText then
-                                                chunk.stem == stems(tok) || 
(stems4Lemms.nonEmpty && chunk.stem == stems4Lemms(tok))
-                                            else
-                                                def match0(txt: String) = 
chunk.regex.matcher(txt).matches()
-                                                match0(tok.getText) || 
match0(tok.getText.toLowerCase)
-                                        }
-
-                                    if found then add(elemId, 
Option.when(s.value != null)(s.value))
+                        for ((elemId, syns) <- 
synsHolder.mixedSynonyms.getOrElse(variant.size, Seq.empty))
+                            found = false
+
+                            for (s <- syns if !found)
+                                found = s.chunks.zip(variant).
+                                    sortBy { (chunk, _) => if chunk.isText 
then 0 else 1 }.
+                                    forall { (chunk, tok) =>
+                                        if chunk.isText then
+                                            chunk.stem == stems(tok) || 
(stems4Lemms.nonEmpty && chunk.stem == stems4Lemms(tok))
+                                        else
+                                            def match0(txt: String) = 
chunk.regex.matcher(txt).matches()
+                                            match0(tok.getText) || 
match0(tok.getText.toLowerCase)
+                                    }
+
+                                if found then add(elemId, Option.when(s.value 
!= null)(s.value))
 
         hs.toSeq.map(h => {
             val e = elemsMap(h.elemId)

Reply via email to