This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-70_NEW
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-70_NEW by this push:
new dfaf1b8 WIP.
dfaf1b8 is described below
commit dfaf1b85a10bef8af812c19c911df1a70d29f5eb
Author: Sergey Kamov <[email protected]>
AuthorDate: Tue Jul 6 17:12:02 2021 +0300
WIP.
---
.../ctxword/NCContextWordCategoriesEnricher.scala | 12 +++---
.../nlpcraft/model/ctxword/NCContextWordSpec.scala | 50 ++++++++++------------
.../model/ctxword/NCContextWordSpec2.scala | 13 ++----
3 files changed, 33 insertions(+), 42 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordCategoriesEnricher.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordCategoriesEnricher.scala
index a594fea..25faa45 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordCategoriesEnricher.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/ctxword/NCContextWordCategoriesEnricher.scala
@@ -101,16 +101,16 @@ object NCContextWordCategoriesEnricher extends
NCServerEnricher {
* @return
*/
def calculate(confs: Seq[Double]): Option[Double] =
- // Drops if there is not enough data.
+ // Drops if there is not enough data.
if (confs.length < 3)
None
else {
- def avg(seq: Seq[Double]): Double = seq.sum / seq.length
-
// Takes 50% of most important (or first 2 at least) and
calculates average value.
val n = Math.max((confs.length * 0.5).intValue(), 2)
- Some(avg(confs.sortBy(-_).take(n)))
+ val maxN = confs.sortBy(-_).take(n)
+
+ Some(maxN.sum / maxN.length)
}
private def calcWeightedGeoMean(vals2Weights: Map[Double, Double]):
Double =
@@ -125,8 +125,8 @@ object NCContextWordCategoriesEnricher extends
NCServerEnricher {
* @return
*/
def calculate(suggConf: Double, corpusConf: Double): Double =
- // Corpus data is more important. 1:4 is empirical factor.
- calcWeightedGeoMean(Map(suggConf -> 1, corpusConf -> 5))
+ // Corpus data is more important. Empirical factors configured.
+ calcWeightedGeoMean(Map(suggConf -> 1, corpusConf -> 3))
}
@volatile private var valuesStems: mutable.HashMap[ModelProbeKey,
ValuesHolder] = _
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
index 1fb072f..f72cf63 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec.scala
@@ -48,7 +48,7 @@ class NCContextWordSpecModel extends NCModel {
override def getName: String = this.getClass.getSimpleName
override def getVersion: String = "1.0.0"
- val MDL_LEVEL: java.lang.Double = 0.7
+ val MDL_LEVEL: java.lang.Double = 0.68
override def getContextWordCategoriesConfig:
Optional[NCContextWordCategoriesConfig] = {
Optional.of(
@@ -96,26 +96,22 @@ class NCContextWordSpecModel extends NCModel {
override def onContext(ctx: NCContext): NCResult = {
val varRes = ArrayBuffer.empty[String]
- val ok =
- ctx.getVariants.asScala.exists(v => {
- val testGroupToks =
v.asScala.toSeq.filter(_.getGroups.contains("testGroup"))
+ require(ctx.getVariants.size() == 1)
- val elemIds = testGroupToks.map(_.getId).distinct.mkString(" ")
- val words = testGroupToks.map(_.getOriginalText).mkString(" ")
+ val v = ctx.getVariants.asScala.head
- val res = s"$elemIds $words"
+ val testGroupToks =
v.asScala.toSeq.filter(_.getGroups.contains("testGroup"))
- varRes += res
+ val elemIds = testGroupToks.map(_.getId).distinct.mkString(" ")
+ val words = testGroupToks.map(_.getOriginalText).mkString(" ")
- NCContextWordSpecModel.expected == s"$elemIds $words"
- })
-
- NCResult.text(
- if (ok)
+ val res =
+ if (NCContextWordSpecModel.expected == s"$elemIds $words")
"OK"
else
s"ERROR: variant '${NCContextWordSpecModel.expected}' not
found. Found: ${varRes.mkString(", ")}"
- )
+
+ NCResult.text(res)
}
}
@@ -124,7 +120,7 @@ class NCContextWordSpecModel extends NCModel {
*/
@NCTestEnvironment(model = classOf[NCContextWordSpecModel], startClient = true)
class NCContextWordSpec extends NCTestContext {
- private def check(txt: String, elemId: String, words: String*): Unit = {
+ private def checkSingleVariant(txt: String, elemId: String, words:
String*): Unit = {
NCContextWordSpecModel.expected = s"$elemId ${words.mkString(" ")}"
val res = getClient.ask(txt).getResult.get()
@@ -134,17 +130,17 @@ class NCContextWordSpec extends NCTestContext {
@Test
private[ctxword] def test(): Unit = {
-// check("I want to have dogs and foxes", "class:animal", "dogs",
"foxes")
-// check("I bought dog's meat", "class:animal", "dog")
-// check("I bought meat dog's", "class:animal", "dog")
-//
-// check("I want to have a dog and fox", "class:animal", "dog", "fox")
-// check("I fed your fish", "class:animal", "fish")
-//
-// check("I like to drive my Porsche and Volkswagen", "class:cars",
"Porsche", "Volkswagen")
- check("Peugeot added motorcycles to its range in 1901", "class:cars",
"Peugeot")
-
-// check("The frost is possible today", "class:weather", "frost")
-// check("There's a very strong wind from the east now",
"class:weather", "wind")
+ checkSingleVariant("I want to have dogs and foxes", "class:animal",
"dogs", "foxes")
+ checkSingleVariant("I bought dog's meat", "class:animal", "dog")
+ checkSingleVariant("I bought meat dog's", "class:animal", "dog")
+
+ checkSingleVariant("I want to have a dog and fox", "class:animal",
"dog", "fox")
+ checkSingleVariant("I fed your fish", "class:animal", "fish")
+
+ checkSingleVariant("I like to drive my Porsche and Volkswagen",
"class:cars", "Porsche", "Volkswagen")
+ checkSingleVariant("Peugeot added motorcycles to its range year ago",
"class:cars", "Peugeot")
+
+ checkSingleVariant("The frost is possible today", "class:weather",
"frost")
+ checkSingleVariant("There's a very strong wind from the east now",
"class:weather", "wind")
}
}
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec2.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec2.scala
index 9a6577f..8b5ac6e 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec2.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/ctxword/NCContextWordSpec2.scala
@@ -34,13 +34,9 @@ class NCContextWordSpecModel2 extends NCContextWordSpecModel
{
*/
@NCTestEnvironment(model = classOf[NCContextWordSpecModel2], startClient =
true)
class NCContextWordSpec2 extends NCTestContext {
- private def check(txts: String*): Unit =
- for (txt <- txts)
- getClient.ask(txt)
-
@Test
- private[ctxword] def test(): Unit = {
- check(
+ private[ctxword] def test(): Unit =
+ Seq(
"I want to have dogs and foxes",
"I bought dog's meat",
"I bought meat dog's",
@@ -49,10 +45,9 @@ class NCContextWordSpec2 extends NCTestContext {
"I fed your fish",
"I like to drive my Porsche and Volkswagen",
- "Peugeot added motorcycles to its range in 1901",
+ "Peugeot added motorcycles to its range year ago",
"The frost is possible today",
"There's a very strong wind from the east now"
- )
- }
+ ).foreach(getClient.ask)
}