This is an automated email from the ASF dual-hosted git repository. aradzinski pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 1c1bfad465b5c6d82daed99d25e51ba631d6ba68 Author: Aaron Radzinski <[email protected]> AuthorDate: Sun Sep 27 15:35:15 2020 -0700 WIP. --- .../dictionary/NCDictionaryEnricher.scala | 5 +++- .../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 5 +++- .../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 5 +++- .../enrichers/relation/NCRelationEnricher.scala | 5 +++- .../mgrs/nlp/enrichers/sort/NCSortEnricher.scala | 5 +++- .../enrichers/stopword/NCStopWordEnricher.scala | 2 ++ .../suspicious/NCSuspiciousNounsEnricher.scala | 5 +++- .../nlp/enrichers/basenlp/NCBaseNlpEnricher.scala | 2 ++ .../coordinate/NCCoordinatesEnricher.scala | 31 ++++++++++++---------- .../server/nlp/enrichers/date/NCDateEnricher.scala | 2 ++ .../server/nlp/enrichers/geo/NCGeoEnricher.scala | 5 +++- .../nlp/enrichers/numeric/NCNumericEnricher.scala | 5 +++- .../nlp/enrichers/quote/NCQuoteEnricher.scala | 4 ++- .../enrichers/stopword/NCStopWordEnricher.scala | 4 ++- 14 files changed, 61 insertions(+), 24 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala index 2be0859..325c90c 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/dictionary/NCDictionaryEnricher.scala @@ -62,7 +62,9 @@ object NCDictionaryEnricher extends NCProbeEnricher { } @throws[NCE] - override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = + override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = { + require(isStarted) + startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, @@ -82,4 +84,5 @@ object NCDictionaryEnricher extends NCProbeEnricher { ) }) } + } } \ No newline at end of file diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala index 2c1f713..43c3748 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala @@ -246,7 +246,9 @@ object NCLimitEnricher extends NCProbeEnricher { } @throws[NCE] - override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = + override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = { + require(isStarted) + startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, @@ -281,6 +283,7 @@ object NCLimitEnricher extends NCProbeEnricher { case None ⇒ // No-op. } } + } /** * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala index 03594d1..e2e2265 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala @@ -311,7 +311,9 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { def isComplex(mdl: NCProbeModel): Boolean = mdl.synonymsDsl.nonEmpty || !mdl.model.getParsers.isEmpty @throws[NCE] - override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = + override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = { + require(isStarted) + startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, @@ -518,4 +520,5 @@ object NCModelEnricher extends NCProbeEnricher with DecorateAsScala { parser.onDiscard() } } + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala index ff4475c..5a52f8c 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/relation/NCRelationEnricher.scala @@ -147,7 +147,9 @@ object NCRelationEnricher extends NCProbeEnricher { } @throws[NCE] - override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = + override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = { + require(isStarted) + startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, @@ -179,6 +181,7 @@ object NCRelationEnricher extends NCProbeEnricher { case None ⇒ // No-op. } } + } /** * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala index f549dd5..d177c10 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/sort/NCSortEnricher.scala @@ -437,7 +437,9 @@ object NCSortEnricher extends NCProbeEnricher { toks.length == toks2.length || toks.count(isImportant) == toks2.count(isImportant) } - override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, meta: Map[String, Serializable], parent: Span): Unit = + override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, meta: Map[String, Serializable], parent: Span): Unit = { + require(isStarted) + startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, @@ -506,6 +508,7 @@ object NCSortEnricher extends NCProbeEnricher { } } } + } /** * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala index 255d91b..b386978 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/stopword/NCStopWordEnricher.scala @@ -218,6 +218,8 @@ object NCStopWordEnricher extends NCProbeEnricher { @throws[NCE] override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = { + require(isStarted) + def mark(stems: Set[String], f: Boolean): Unit = ns.filter(t ⇒ stems.contains(t.stem)).foreach(t ⇒ ns.fixNote(t.getNlpNote, "stopWord" → f)) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala index fa9a3a2..f212687 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/suspicious/NCSuspiciousNounsEnricher.scala @@ -51,11 +51,14 @@ object NCSuspiciousNounsEnricher extends NCProbeEnricher { } @throws[NCE] - override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = + override def enrich(mdl: NCProbeModel, ns: NCNlpSentence, senMeta: Map[String, Serializable], parent: Span = null): Unit = { + require(isStarted) + startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "mdlId" → mdl.model.getId, "txt" → ns.text) { _ ⇒ ns.filter(t ⇒ mdl.suspWordsStems.contains(t.stem)).foreach(t ⇒ ns.fixNote(t.getNlpNote, "suspNoun" → true)) } + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/basenlp/NCBaseNlpEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/basenlp/NCBaseNlpEnricher.scala index cc64123..7bbe5ac 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/basenlp/NCBaseNlpEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/basenlp/NCBaseNlpEnricher.scala @@ -102,6 +102,8 @@ object NCBaseNlpEnricher extends NCServerEnricher { */ @throws[NCE] override def enrich(ns: NCNlpSentence, parent: Span = null) { + require(isStarted) + startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒ // This must be 1st enricher in the pipeline. assume(ns.isEmpty) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/coordinate/NCCoordinatesEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/coordinate/NCCoordinatesEnricher.scala index 2543757..ac82456 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/coordinate/NCCoordinatesEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/coordinate/NCCoordinatesEnricher.scala @@ -181,31 +181,33 @@ object NCCoordinatesEnricher extends NCServerEnricher { */ private def hasStem(toks: Seq[NCNlpSentenceToken], stems: Seq[String]): Boolean = toks.exists(t ⇒ stems.contains(t.stem)) - override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = + override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = { + require(isStarted) + startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒ val nums = NCNumericManager.find(ns).sortBy(_.tokens.head.index) - + if (nums.size >= 2) { val markers = mutable.Buffer.empty[Seq[NCNlpSentenceToken]] - + def areSuitableTokens(toks: Seq[NCNlpSentenceToken]): Boolean = toks.forall(t ⇒ !t.isQuoted && !t.isBracketed) && !markers.exists(_.exists(t ⇒ toks.contains(t))) - + for (toks ← ns.tokenMixWithStopWords() if areSuitableTokens(toks) && MARKERS_STEMS.contains(toks.map(_.stem).mkString(" "))) markers += toks - + val allMarkers = markers.flatten val buf = mutable.Buffer.empty[NCNlpSentenceToken] - + for (pair ← nums.sliding(2) if !buf.exists(t ⇒ pair.flatMap(_.tokens).contains(t))) { var lat = pair.head var lon = pair.last - + val between = ns.slice(lat.tokens.last.index + 1, lon.tokens.head.index) val before = getBefore(ns, ns.take(lat.tokens.head.index), markers) - + val after = getAfter(ns, ns.drop(lon.tokens.last.index + 1), markers) - + if (hasStem(before, lonStems) && hasStem(between, latStems) || hasStem(between, lonStems) && hasStem(after, latStems) || !inRange(lat, 90) && inRange(lat, 180) @@ -216,25 +218,25 @@ object NCCoordinatesEnricher extends NCServerEnricher { } if (inRange(lat, 90) && inRange(lon, 180) && (markers.nonEmpty || similar2Coordinates(lat, lon))) { val normBetween = between.diff(allMarkers) - + if (normBetween.isEmpty || normBetween.forall( t ⇒ t.isEmpty || t.pos == "IN" || SEPS.contains(t.normText) || EQUALS.contains(t.normText)) ) { val extra = (before ++ after ++ between).sortBy(_.index) - + if (markers.exists(extra.containsSlice) || similar2Coordinates(lat, lon)) { val toks = (lat.tokens ++ lon.tokens ++ extra ++ markers.flatten).distinct.sortBy(_.index) - + val note = NCNlpSentenceNote( toks.map(_.index), "nlpcraft:coordinate", "latitude" → lat.value, "longitude" → lon.value ) - + toks.foreach(_.add(note)) - + buf ++= toks } } @@ -242,4 +244,5 @@ object NCCoordinatesEnricher extends NCServerEnricher { } } } + } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala index eb002a5..2070ef9 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala @@ -174,6 +174,8 @@ object NCDateEnricher extends NCServerEnricher { */ @throws[NCE] override def enrich(ns: Sentence, parent: Span = null) { + require(isStarted) + // This stage must not be 1st enrichment stage. assume(ns.nonEmpty) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala index 37bf87f..8162f92 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/geo/NCGeoEnricher.scala @@ -140,7 +140,9 @@ object NCGeoEnricher extends NCServerEnricher { * @throws NCE */ @throws[NCE] - override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = + override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = { + require(isStarted) + startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒ // This stage must not be 1st enrichment stage. assume(ns.nonEmpty) @@ -294,6 +296,7 @@ object NCGeoEnricher extends NCServerEnricher { collapse(ns) } + } private def getValue(note: NCNlpSentenceNote, key: String): String = note(key).asInstanceOf[String] private def getValueOpt(note: NCNlpSentenceNote, key: String): Option[String] = note.get(key) match { diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala index a884c32..18a25fe 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/numeric/NCNumericEnricher.scala @@ -239,7 +239,9 @@ object NCNumericEnricher extends NCServerEnricher { * @throws NCE */ @throws[NCE] - override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = + override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = { + require(isStarted) + startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒ val nums = NCNumericManager.find(ns) @@ -438,4 +440,5 @@ object NCNumericEnricher extends NCServerEnricher { } } + } } \ No newline at end of file diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala index d323785..532aa15 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/quote/NCQuoteEnricher.scala @@ -56,7 +56,9 @@ object NCQuoteEnricher extends NCServerEnricher { * @throws NCE */ @throws[NCE] - override def enrich(ns: NCNlpSentence, parent: Span = null) { + override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = { + require(isStarted) + startScopedSpan("enrich", parent, "srvReqId" → ns.srvReqId, "txt" → ns.text) { _ ⇒ // Clone input sentence. val copy = ns.clone() diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala index d8a1353..71dafc7 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala @@ -531,7 +531,9 @@ object NCStopWordEnricher extends NCServerEnricher { } @throws[NCE] - override def enrich(ns: NCNlpSentence, parent: Span = null) { + override def enrich(ns: NCNlpSentence, parent: Span = null): Unit = { + require(isStarted) + // This stage must not be 1st enrichment stage. assume(ns.nonEmpty)
