This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-431-430 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 3ce31d194ad1cf71fc84ed38c3c31d0130d3d8b2 Author: Sergey Kamov <[email protected]> AuthorDate: Fri Sep 3 16:55:55 2021 +0300 WIP. --- .../nlpcraft/common/nlp/numeric/NCNumeric.scala | 13 ++- .../common/nlp/numeric/NCNumericManager.scala | 24 ++++- .../nlp/enrichers/numeric/NCNumericEnricher.scala | 111 ++++++++++++--------- 3 files changed, 95 insertions(+), 53 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala index 75a3365..1de9d4b 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumeric.scala @@ -28,14 +28,21 @@ case class NCNumericUnit(name: String, unitType: String) /** * + * @param unit + * @param tokens + */ +case class NCNumericUnitData(unit: NCNumericUnit, tokens: Seq[NCNlpSentenceToken]) + +/** + * * @param tokens * @param value * @param isFractional - * @param unit + * @param unitData */ case class NCNumeric( tokens: Seq[NCNlpSentenceToken], value: Double, isFractional: Boolean, - unit: Option[NCNumericUnit] -) + unitData: Option[NCNumericUnitData] +) \ No newline at end of file diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala index a428ab9..1dab5ef 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala @@ -91,8 +91,17 @@ object NCNumericManager extends NCService { val after = s.drop(num.length) if (num.nonEmpty && after.nonEmpty) { - def mkNumeric(u: NCNumericUnit): Option[NCNumeric] = - Some(NCNumeric(Seq(t), java.lang.Double.valueOf(num), isFractional = isFractional(num), unit = Some(u))) + def mkNumeric(u: NCNumericUnit): Option[NCNumeric] = { + val toks = Seq(t) + + Some( + NCNumeric( + toks, + java.lang.Double.valueOf(num), + isFractional = isFractional(num), + unitData = Some(NCNumericUnitData(u, toks))) + ) + } unitsOrigs.get(after) match { case Some(u) => mkNumeric(u) @@ -123,12 +132,14 @@ object NCNumericManager extends NCService { senWords.indexOfSlice(dtWords) match { case -1 => None case idx => + val toks = senToks.slice(idx, idx + dtWords.length) + Some( NCNumeric( - tokens = senToks.slice(idx, idx + dtWords.length), + tokens = toks, value = dtPeriod.value, isFractional = false, - unit = Some(dtPeriod.unit) + unitData = Some(NCNumericUnitData(dtPeriod.unit, toks)) ) ) } @@ -404,7 +415,10 @@ object NCNumericManager extends NCService { None }).headOption match { case Some((unit, unitToks)) => - val numWithUnit = NCNumeric(seq ++ unitToks, v, isFractional = isFractional, Some(unit)) + val numWithUnit = + NCNumeric( + seq ++ unitToks, v, isFractional = isFractional, Some(NCNumericUnitData(unit, unitToks)) + ) // If unit name is same as user element name, // it returns both variants: numeric with unit and without. diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala index b28f198..b89ff99 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala @@ -198,7 +198,7 @@ object NCNumericEnricher extends NCServerEnricher { private def toString(seq: Seq[NCNlpSentenceToken], sep: String = " ", stem: Boolean = false) = seq.map(t => if (stem) t.stem else t.normText).mkString(sep) - private def mkNote( + private def mkNotes( toks: Seq[NCNlpSentenceToken], from: Double, fromIncl: Boolean, @@ -206,9 +206,10 @@ object NCNumericEnricher extends NCServerEnricher { to: Double, toIncl: Boolean, toFractional: Boolean, - unitOpt: Option[NCNumericUnit] - ): NCNlpSentenceNote = { - val params = mutable.ArrayBuffer.empty[(String, Any)] ++ + unitDataOpt: Option[NCNumericUnitData], + ): Seq[NCNlpSentenceNote] = { + val params = + mutable.ArrayBuffer.empty[(String, Any)] ++ Seq( "from" -> from, "fromIncl" -> fromIncl, @@ -222,14 +223,28 @@ object NCNumericEnricher extends NCServerEnricher { "isToPositiveInfinity" -> (to == MAX_VALUE) ) - unitOpt match { - case Some(unit) => - params += "unit" -> unit.name - params += "unitType" -> unit.unitType - case None => // No-op. + unitDataOpt match { + case Some(unitData) => + def extend(): Seq[(String, Any)] = { + params += "unit" -> unitData.unit.name + params += "unitType" -> unitData.unit.unitType + + params + } + + if (unitData.tokens == toks) + Seq(NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", extend():_*)) + else { + val n1 = NCNlpSentenceNote( + toks.filter(t => !unitData.tokens.contains(t)).map(_.index), "nlpcraft:num", params.clone():_* + ) + val n2 = NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", extend():_*) + + Seq(n1, n2) + } + + case None => Seq(NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", params:_*)) } - - NCNlpSentenceNote(toks.map(_.index), "nlpcraft:num", params:_*) } /** @@ -274,25 +289,28 @@ object NCNumericEnricher extends NCServerEnricher { val prepToks = Seq(getBefore(ts1)) ++ ts1 ++ Seq(getBefore(ts2)) ++ ts2 - val badRange = num1.unit.isDefined && num2.unit.isDefined && num1.unit != num2.unit + val badRange = + num1.unitData.isDefined && + num2.unitData.isDefined && + num1.unitData.get.unit != num2.unitData.get.unit if (!badRange) { val unit = - if (num1.unit.isDefined && num2.unit.isEmpty) - num1.unit - else if (num1.unit.isEmpty && num2.unit.isDefined) - num2.unit - else if (num1.unit.isEmpty && num2.unit.isEmpty) + if (num1.unitData.isDefined && num2.unitData.isEmpty) + num1.unitData + else if (num1.unitData.isEmpty && num2.unitData.isDefined) + num2.unitData + else if (num1.unitData.isEmpty && num2.unitData.isEmpty) None - else{ - require(num1.unit == num2.unit) - - num1.unit + else { + require(num1.unitData.get.unit == num2.unitData.get.unit) + + Some(NCNumericUnitData(num1.unitData.get.unit, num1.tokens ++ num2.tokens)) } - val note = p._2 match { + val notes = p._2 match { case BETWEEN_EXCLUSIVE => - mkNote( + mkNotes( prepToks, d1, fromIncl = false, @@ -303,7 +321,7 @@ object NCNumericEnricher extends NCServerEnricher { unit ) case BETWEEN_INCLUSIVE => - mkNote( + mkNotes( prepToks, d1, fromIncl = true, @@ -315,8 +333,9 @@ object NCNumericEnricher extends NCServerEnricher { ) case _ => throw new AssertionError(s"Illegal note type: ${p._2}.") } - - prepToks.foreach(_.add(note)) + + for (note <- notes) + prepToks.foreach(_.add(note)) processed ++= ts1 processed ++= ts2 @@ -340,10 +359,10 @@ object NCNumericEnricher extends NCServerEnricher { processed ++= toks - val note = + val notes = prep.prepositionType match { case MORE => - mkNote( + mkNotes( toks, num.value, fromIncl = false, @@ -351,10 +370,10 @@ object NCNumericEnricher extends NCServerEnricher { to = MAX_VALUE, toIncl = true, toFractional = num.isFractional, - num.unit + num.unitData ) case MORE_OR_EQUAL => - mkNote( + mkNotes( toks, num.value, fromIncl = true, @@ -362,10 +381,10 @@ object NCNumericEnricher extends NCServerEnricher { to = MAX_VALUE, toIncl = true, toFractional = num.isFractional, - num.unit + num.unitData ) case LESS => - mkNote( + mkNotes( toks, MIN_VALUE, fromIncl = true, @@ -373,10 +392,10 @@ object NCNumericEnricher extends NCServerEnricher { to = num.value, toIncl = false, toFractional = num.isFractional, - num.unit + num.unitData ) case LESS_OR_EQUAL => - mkNote( + mkNotes( toks, MIN_VALUE, fromIncl = true, @@ -384,10 +403,10 @@ object NCNumericEnricher extends NCServerEnricher { to = num.value, toIncl = true, toFractional = num.isFractional, - num.unit + num.unitData ) case EQUAL => - mkNote( + mkNotes( toks, num.value, fromIncl = true, @@ -395,10 +414,10 @@ object NCNumericEnricher extends NCServerEnricher { to = num.value, toIncl = true, toFractional = num.isFractional, - num.unit + num.unitData ) case NOT_EQUAL => - mkNote( + mkNotes( toks, num.value, fromIncl = false, @@ -406,12 +425,13 @@ object NCNumericEnricher extends NCServerEnricher { to = num.value, toIncl = false, toFractional = num.isFractional, - num.unit + num.unitData ) case _ => throw new AssertionError(s"Illegal note type: ${prep.prepositionType}.") } - - toks.foreach(_.add(note)) + + for (note <- notes) + toks.foreach(_.add(note)) } } @@ -423,7 +443,7 @@ object NCNumericEnricher extends NCServerEnricher { // Numeric without conditions. for (num <- nums if !processed.exists(num.tokens.contains)) { - val note = mkNote( + val notes = mkNotes( num.tokens, num.value, fromIncl = true, @@ -431,12 +451,13 @@ object NCNumericEnricher extends NCServerEnricher { num.value, toIncl = true, num.isFractional, - num.unit + num.unitData ) processed ++= num.tokens - - num.tokens.foreach(_.add(note)) + + for (note <- notes) + num.tokens.foreach(_.add(note)) } } }
