This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-443
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-443 by this push:
new 86a8fed WIP.
86a8fed is described below
commit 86a8fed8092db33ed20290019169c090ccbf596a
Author: Sergey Kamov <[email protected]>
AuthorDate: Sun Sep 19 14:24:15 2021 +0300
WIP.
---
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 59 ++++++++++------------
.../nlp/enrichers/NCServerEnrichmentManager.scala | 12 ++---
2 files changed, 33 insertions(+), 38 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index 37ca1f4..e5f9ee2 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -283,21 +283,6 @@ object NCModelEnricher extends NCProbeEnricher {
}
}
- private def combosTokens1(toks: Seq[NlpToken]): Seq[(Seq[NlpToken],
Seq[NlpToken])] =
- combos(toks).flatMap(combo => {
- val stops = combo.filter(_.isStopWord)
-
- val stops4Delete = Range.inclusive(1,
stops.size).flatMap(stops.combinations)
-
- (Seq(combo) ++ stops4Delete.map(del => combo.filter(t =>
!del.contains(t)))).map(_ -> combo)
- }).
- toMap.
- filter(_._1.nonEmpty).
- groupBy(_._1).
- map(p => p._1 -> p._2.values.minBy(p => (-p.size, p.head.index))).
- sortBy(p => (-p._2.size, -p._1.size, -p._2.head.index,
-p._1.head.index))
-
-
/**
*
* 1. Prepares combination of tokens (sliding).
@@ -314,6 +299,13 @@ object NCModelEnricher extends NCProbeEnricher {
*/
private def combosTokens(toks: Seq[NlpToken]): Seq[(Seq[NlpToken],
Seq[NlpToken])] =
combos(toks).flatMap(combo => {
+ // TODO: delete after finish task.
+// val stops = combo.filter(_.isStopWord)
+//
+// val stops4Delete = Range.inclusive(1,
stops.size).flatMap(stops.combinations)
+//
+// (Seq(combo) ++ stops4Delete.map(del => combo.filter(t =>
!del.contains(t)))).map(_ -> combo)
+
val stops = combo.filter(s => s.isStopWord && s != combo.head && s
!= combo.last)
val slides =
mutable.ArrayBuffer.empty[mutable.ArrayBuffer[NlpToken]]
@@ -328,7 +320,7 @@ object NCModelEnricher extends NCProbeEnricher {
var stops4Delete: Seq[Seq[NlpToken]] =
if (bigSlides.nonEmpty) {
- val allBig = bigSlides.flatMap(p => p)
+ val allBig = bigSlides.flatten
val stops4AllCombs = stops.filter(p => !allBig.contains(p))
if (stops4AllCombs.nonEmpty)
@@ -336,10 +328,10 @@ object NCModelEnricher extends NCProbeEnricher {
seq1 <- Range.inclusive(0,
stops4AllCombs.size).flatMap(stops4AllCombs.combinations);
seq2 <- Range.inclusive(0,
bigSlides.size).flatMap(bigSlides.combinations)
)
- yield seq1 ++ seq2.flatMap(p => p)
+ yield seq1 ++ seq2.flatten
else
for (seq <- Range.inclusive(0,
bigSlides.size).flatMap(bigSlides.combinations))
- yield seq.flatMap(p => p)
+ yield seq.toSeq.flatten
}
else
Range.inclusive(1, stops.size).flatMap(stops.combinations)
@@ -517,8 +509,8 @@ object NCModelEnricher extends NCProbeEnricher {
*/
private def getSparsedTokens(matched: Seq[NlpToken], toks2Match:
Seq[NlpToken]): Seq[NlpToken] = {
require(matched.nonEmpty)
- // Matched tokens should be already sorted.
+ // Matched tokens should be already sorted.
val stopsInside = toks2Match.filter(t =>
t.isStopWord && !matched.contains(matched) && t.index >
matched.head.index && t.index < matched.last.index
)
@@ -677,25 +669,28 @@ object NCModelEnricher extends NCProbeEnricher {
val links = NCSentenceManager.getLinks(usrNotes)
val parts = NCSentenceManager.getPartKeys(usrNotes: _*)
- usrNotes.
+ val usrNotesIdxs = usrNotes.
filter(n => !links.contains(NoteLink(n.noteType,
n.tokenIndexes.sorted))).
filter(n => !parts.contains(NCTokenPartKey(n, ns))).
- foreach(n =>
- usrNotes.find(candidate =>
- candidate != n &&
- candidate.noteType == n.noteType &&
- candidate.dataOpt("parts") == n.dataOpt("parts") &&
- candidate.wordIndexes.toSet.subsetOf(n.wordIndexes.toSet)
&&
- n.wordIndexes.filter(n =>
!candidate.wordIndexes.contains(n)).
- forall(wordIdx => ns.tokens.exists(t =>
t.wordIndexes.contains(wordIdx) && t.isStopWord))
- ) match {
+ zipWithIndex
+
+ usrNotesIdxs.
+ foreach { case (n, idx) =>
+ usrNotesIdxs.find { case (candidate, candidateIdx) =>
+ candidateIdx != idx &&
+ candidate.noteType == n.noteType &&
+ candidate.dataOpt("parts") == n.dataOpt("parts") &&
+
candidate.wordIndexes.toSet.subsetOf(n.wordIndexes.toSet) &&
+ n.wordIndexes.filter(n =>
!candidate.wordIndexes.contains(n)).
+ forall(wordIdx => ns.tokens.exists(t =>
t.wordIndexes.contains(wordIdx) && t.isStopWord))
+ } match {
case Some(better) =>
ns.removeNote(n)
- // TODO: trace.
- logger.info(s"Element removed: $n, better: $better")
+
+ logger.trace(s"Element removed: $n, better: $better")
case None => // No-op.
}
- )
+ }
}
// TODO: simplify, add tests, check model properties (sparse etc) for
optimization.
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
index b64999d..03b749f 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/NCServerEnrichmentManager.scala
@@ -153,12 +153,12 @@ object NCServerEnrichmentManager extends NCService with
NCIgniteInstance {
catching(wrapIE) {
cache(normTxt) match {
case Some(h) =>
-// if (h.enabledBuiltInTokens ==
normEnabledBuiltInToks) {
-// prepareAsciiTable(h.sentence).info(logger,
Some(s"Sentence enriched (from cache): '$normTxt'"))
-//
-// h.sentence
-// }
-// else
+ if (h.enabledBuiltInTokens == normEnabledBuiltInToks) {
+ prepareAsciiTable(h.sentence).info(logger,
Some(s"Sentence enriched (from cache): '$normTxt'"))
+
+ h.sentence
+ }
+ else
process(srvReqId, normTxt, enabledBuiltInToks,
span)
case None =>
process(srvReqId, normTxt, enabledBuiltInToks, span)