This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch 261tmp in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 3b5a380390d346b24391311c7ec771c4ff9f1fac Author: Sergey Kamov <[email protected]> AuthorDate: Sun Mar 7 15:45:09 2021 +0300 WIP. --- .../apache/nlpcraft/common/nlp/NCNlpSentence.scala | 25 +++++++++++++------ .../nlpcraft/common/util/NCComboRecursiveTask.java | 29 ++++++++-------------- .../model/NCEnricherNestedModelSpec.scala | 18 +++++++------- 3 files changed, 37 insertions(+), 35 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala index 9d9cb98..95a98a3 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/NCNlpSentence.scala @@ -626,6 +626,8 @@ object NCNlpSentence extends LazyLogging { flatMap(note ⇒ getNotNlpNotes(note.tokenIndexes.sorted.map(i ⇒ thisSen(i))).filter(_ != note)). distinct + println("delCombs="+delCombs.mkString("\n")) + // Optimization. Deletes all wholly swallowed notes. val links = getLinks(thisSen.flatten) @@ -670,6 +672,7 @@ object NCNlpSentence extends LazyLogging { // } val dict = mutable.HashMap.empty[String, NCNlpSentenceNote] + val dictBack = mutable.HashMap.empty[NCNlpSentenceNote, String] var i = 'A' @@ -677,11 +680,20 @@ object NCNlpSentence extends LazyLogging { toksByIdx.map(seq ⇒ { seq.map( n ⇒ { - val s = s"$i" + val s = + dictBack.get(n) match { + case Some(s) ⇒ s + case None ⇒ { + val s = s"$i" + + i = (i.toInt + 1).toChar - i = (i.toInt + 1).toChar + dict += s → n + dictBack += n → s - dict += s → n + s + } + } s } @@ -695,16 +707,13 @@ object NCNlpSentence extends LazyLogging { val p = new ForkJoinPool() val tmp = NCComboRecursiveTask.findCombinations( - converted.map(_.asJava).asJava, - new Comparator[String]() { - override def compare(n1: String, n2: String): Int = n1.compareTo(n2) - }, + toksByIdx.map(_.asJava).asJava, p ) p.shutdown() - val seq1 = tmp.asScala.map(_.asScala.map(dict)) + val seq1 = tmp.asScala.map(_.asScala) val sens = seq1. diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java index 017c10e..834735b 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCComboRecursiveTask.java @@ -19,7 +19,9 @@ package org.apache.nlpcraft.common.util; import java.util.ArrayList; import java.util.Collection; +import java.util.Collections; import java.util.Comparator; +import java.util.HashSet; import java.util.List; import java.util.concurrent.ForkJoinPool; import java.util.concurrent.RecursiveTask; @@ -42,30 +44,17 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> { this.wordCounts = wordCounts; } - public static <T> List<List<T>> findCombinations(List<List<T>> inp, Comparator<T> comparator, ForkJoinPool pool) { - List<List<T>> uniqueInp = inp.stream() - .filter(row -> inp.stream().noneMatch(it -> !it.equals(row) && it.containsAll(row))) - .map(i -> i.stream().distinct().sorted(comparator).collect(toList())) - .collect(toList()); - - - System.out.println("!!!"); - for (List<T> ts : uniqueInp) { - System.out.println("!!!ts="); - System.out.println(ts.stream().map(Object::toString).collect(Collectors.joining("\n"))); - } - System.out.println("!!!"); + public static <T> List<List<T>> findCombinations(List<List<T>> inp, ForkJoinPool pool) { + List<List<T>> uniqueInp = inp; // Build dictionary of unique words. List<T> dict = uniqueInp.stream() .flatMap(Collection::stream) .distinct() - .sorted(comparator) .collect(toList()); - System.out.println("dict="); - System.out.println(dict.stream().map(Object::toString).collect(Collectors.joining("\n"))); - System.out.println(); + System.out.println("inp=" + inp); + System.out.println("dict=" + dict); if (dict.size() > Long.SIZE) { // Note: Power set of 64 words results in 9223372036854775807 combinations. @@ -87,7 +76,11 @@ public class NCComboRecursiveTask extends RecursiveTask<List<Long>> { NCComboRecursiveTask task = new NCComboRecursiveTask(lo, hi, wordBits, wordCounts); - return pool.invoke(task).stream().map(bits -> bitsToWords(bits, dict)).collect(toList()); + final List<List<T>> res = pool.invoke(task).stream().map(bits -> bitsToWords(bits, dict)).collect(toList()); + + System.out.println("res=" + res); + + return res; } @Override diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala index 9290d56..658feda 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCEnricherNestedModelSpec.scala @@ -47,15 +47,15 @@ class NCEnricherNestedModelSpec extends NCEnricherBaseSpec { @Test def test(): Unit = runBatch( - _ ⇒ checkExists( - "tomorrow", - usr(text = "tomorrow", id = "x3") - ), - _ ⇒ checkExists( - "tomorrow yesterday", - usr(text = "tomorrow", id = "x3"), - usr(text = "yesterday", id = "x3") - ), +// _ ⇒ checkExists( +// "tomorrow", +// usr(text = "tomorrow", id = "x3") +// ), +// _ ⇒ checkExists( +// "tomorrow yesterday", +// usr(text = "tomorrow", id = "x3"), +// usr(text = "yesterday", id = "x3") +// ), _ ⇒ checkExists( "y y", usr(text = "y y", id = "y3")
