This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit d36a9920e6ef3bcb649ff7ef28b8faa79a0dea2e Author: Sergey Kamov <sergeyka...@apache.org> AuthorDate: Fri Sep 18 09:30:56 2020 +0300 WIP. --- .../server/sugsyn/NCSuggestSynonymManager.scala | 34 +++++++++++++++------- .../nlpcraft/server/rest/NCRestModelSpec.scala | 8 ++--- 2 files changed, 27 insertions(+), 15 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala index 9e4c358..b59f2dc 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala @@ -106,7 +106,7 @@ object NCSuggestSynonymManager extends NCService { s"Unsupported symbols: $word" ) } - case class SuggestionResult(synonym: String, ctxWordSrvScore: Double, sgstCnt: Int) + case class SuggestionResult(synonym: String, factor: Double) private def split(s: String): Seq[String] = s.split(" ").toSeq.map(_.trim).filter(_.nonEmpty) private def toStem(s: String): String = split(s).map(NCNlpPorterStemmer.stem).mkString(" ") @@ -191,7 +191,7 @@ object NCSuggestSynonymManager extends NCService { promise.success( NCSuggestSynonymResult( modelId = mdlId, - minScore = minScore, + minScore = 0, durationMs = System.currentTimeMillis() - now, timestamp = now, error = err, @@ -387,13 +387,13 @@ object NCSuggestSynonymManager extends NCService { val seq = group.map { case (sgst, _) ⇒ sgst }.sortBy(-_.score) // Drops repeated. - (seq.head, seq.length) + (seq.head.word, seq.length, seq.map(_.score).sum / seq.size) }. toSeq. - map { case (sgst, cnt) ⇒ (sgst, cnt, sgst.score * cnt / elemSgsts.size) }. + map { case (sgst, cnt, score) ⇒ (sgst, cnt, score * cnt / elemSgsts.size) }. sortBy { case (_, _, sumFactor) ⇒ -sumFactor }. zipWithIndex. - foreach { case ((sgst, cnt, _), _) ⇒ + foreach { case ((word, _, sumFactor), _) ⇒ val seq = res.get(elemId) match { case Some(seq) ⇒ seq @@ -405,19 +405,31 @@ object NCSuggestSynonymManager extends NCService { buf } - seq += SuggestionResult(sgst.word, sgst.score, cnt) + seq += SuggestionResult(word, sumFactor) } } val resJ: util.Map[String, util.List[util.HashMap[String, Any]]] = res.map { case (id, data) ⇒ - id → data.map(d ⇒ { + val factors = data.map(_.factor) + + val min = factors.min + val max = factors.max + var delta = max - min + + if (delta == 0) + delta = max + + def normalize(v: Double): Double = (v - min) / delta + + val norm = data.map(s ⇒ SuggestionResult(s.synonym, normalize(s.factor))). + filter(_.factor >= minScore) + + id → norm.map(d ⇒ { val m = new util.HashMap[String, Any]() - m.put("synonym", d.synonym) - // ContextWord server range is (0, 2) - m.put("ctxWordServerScore", d.ctxWordSrvScore / 2) - m.put("suggestedCount", d.sgstCnt) + m.put("synonym", d.synonym.toLowerCase) + m.put("factor", d.factor) m }).asJava diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala index 2c4ef04..444d38a 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/server/rest/NCRestModelSpec.scala @@ -18,20 +18,20 @@ package org.apache.nlpcraft.server.rest import org.apache.nlpcraft.NCTestEnvironment -import org.apache.nlpcraft.examples.time.TimeModel +import org.apache.nlpcraft.examples.alarm.AlarmModel import org.junit.jupiter.api.Assertions._ import org.junit.jupiter.api.{Disabled, Test} // Enable it and run if context word server started. @Disabled -@NCTestEnvironment(model = classOf[TimeModel], startClient = false) +@NCTestEnvironment(model = classOf[AlarmModel], startClient = false) class NCRestModelSpec extends NCRestSpec { @Test def test(): Unit = { - post("model/sugsyn", "mdlId" → "nlpcraft.time.ex")( + post("model/sugsyn", "mdlId" → "nlpcraft.alarm.ex")( ("$.status", (status: String) ⇒ assertEquals("API_OK", status)) ) - post("model/sugsyn", "mdlId" → "nlpcraft.time.ex", "minScore" → 0.5)( + post("model/sugsyn", "mdlId" → "nlpcraft.alarm.ex", "minScore" → 0.5)( ("$.status", (status: String) ⇒ assertEquals("API_OK", status)) ) }