This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-443 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 1ccf5c5cc5a489b2565500d31209e4af19aa8187 Author: Sergey Kamov <[email protected]> AuthorDate: Thu Sep 16 12:10:22 2021 +0300 WIP. --- .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 25 ++++++++++++++++++---- .../model/NCEnricherNestedModelSpec.scala | 3 +-- .../nlp/enrichers/sort/NCEnricherSortSpec.scala | 3 +-- 3 files changed, 23 insertions(+), 8 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala index 6908265..22af412 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala @@ -450,6 +450,22 @@ object NCModelEnricher extends NCProbeEnricher { ) } + /** + * + * @param matched + * @param toks2Match + */ + private def getSparsedTokens(matched: Seq[NlpToken], toks2Match: Seq[NlpToken]): Seq[NlpToken] = { + require(matched.nonEmpty) + + // Matched tokens should be already sorted. + val stopsInside = toks2Match.filter(t => + t.isStopWord && !matched.contains(matched) && t.index > matched.head.index && t.index < matched.last.index + ) + + if (stopsInside.nonEmpty) (matched ++ stopsInside).sortBy(_.index) else matched + } + @throws[NCE] override def enrich(mdl: NCProbeModel, ns: Sentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = { require(isStarted) @@ -526,9 +542,10 @@ object NCModelEnricher extends NCProbeEnricher { for (s <- get(mdl.sparseSynonyms, eId)) s.sparseMatch(toks) match { case Some(res) => - println("!!!toks="+toks.map(_.origText)) - println("!!!res="+res.map(_.origText)) - add("simple sparse", ns, contCache, eId, greedy, res, idxs, s) +// println("!!!toks="+toks.map(_.origText)) +// println("!!!res="+res.map(_.origText)) +// println + add("simple sparse", ns, contCache, eId, greedy, getSparsedTokens(res, toks), idxs, s) case None => // No-op. } } @@ -566,7 +583,7 @@ object NCModelEnricher extends NCProbeEnricher { case Some(res) => val typ = if (s.sparse) "IDL sparse" else "IDL continuous" - add(typ, ns, contCache, eId, greedy, toTokens(res, ns), idxs, s, toParts(res, s)) + add(typ, ns, contCache, eId, greedy, getSparsedTokens(toTokens(res, ns), toTokens(comb.map(_.data), ns)), idxs, s, toParts(res, s)) idlCache += comb case None => // No-op. diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala index 4d5d991..8b25e87 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala @@ -94,8 +94,7 @@ class NCEnricherNestedModelSpec2 extends NCEnricherNestedModelSpec1 { ), _ => checkExists( "y the y", - usr(text = "y y", id = "y3"), - nlp(text = "the", isStop = true) + usr(text = "y the y", id = "y3") ), _ => checkExists( "y xxx y", diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala index 228885d..7b8d858 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCEnricherSortSpec.scala @@ -224,8 +224,7 @@ class NCEnricherSortSpec extends NCEnricherBaseSpec { _ => checkExists( "sort A the A the A", srt(text = "sort", typ = SUBJ_ONLY, note = "wrapperA", index = 1), - usr("A A A", "wrapperA"), - nlp("the the", isStop = true) + usr("A the A the A", "wrapperA") ) ) }
