This is an automated email from the ASF dual-hosted git repository. aradzinski pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/master by this push: new 11b123bc WIP 11b123bc is described below commit 11b123bc8255eeb44b72464c4f438915f686799d Author: Aaron Radzinski <aradzin...@datalingvo.com> AuthorDate: Thu Dec 1 11:56:54 2022 -0800 WIP --- .../scala/org/apache/nlpcraft/NCPipeline.scala | 50 ++++++++-------------- .../org/apache/nlpcraft/NCPipelineBuilder.scala | 13 +++--- .../internal/impl/NCModelPipelineManager.scala | 14 +++--- .../apache/nlpcraft/nlp/NCVariantFilterSpec.scala | 2 +- .../apache/nlpcraft/nlp/util/NCTestPipeline.scala | 4 +- 5 files changed, 36 insertions(+), 47 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.scala index c1c24428..bb3d0958 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.scala @@ -20,47 +20,35 @@ package org.apache.nlpcraft /** * NLP processing pipeline for the input request. Pipeline is associated with the model. * - * An NLP pipeline is a container for various processing components that take the input text at the beginning of the - * pipeline and produce the list of {@link NCEntity entities} at the end of the pipeline. + * An NLP pipeline is a container for the sequence of processing components that take the input text at the beginning + * of the pipeline and produce the list of [[NCVariant variants]] at the end of the pipeline. * Schematically the pipeline looks like this: * <pre> - * +----------+ +-----------+ - * *=========* +---------+ +---+-------+ | +---+-------+ | - * : Text : -> | Token | -> | Token | | -> | Token | | ----. - * : Input : | Parser | | Enrichers |--+ | Validators |--+ \ - * *=========* +---------+ +-----------+ +------------+ \ - * } - * +-----------+ +----------+ +--------+ / - * *=========* +---+--------+ | +---+-------+ | +---+-----+ | / - * : Entity : <- | Entity | | <- | Entity | | <- | Entity | | <- - * : List : | Validators |--+ | Enrichers |--+ | Parsers |--+ - * *=========* +------------+ +-----------+ +---------+ + * +----------+ +-----------+ +--------+ + * *=========* +---------+ +---+-------+ | +---+-------+ | +---+-----+ | + * : Text : -> | Token | -> | Token | | -> | Token | | -> | Entity | | ----. + * : Input : | Parser | | Enrichers |--+ | Validators |--+ | Parsers |--+ \ + * *=========* +---------+ +-----------+ +------------+ +---------+ \ + * } + * +--------+ +--------+ +-----------+ +----------+ / + * *============* +---+-----+ | +---+-----+ | +---+--------+ | +---+-------+ | / + * : Variants : <- | Variant | | <- | Entity | | <- | Entity | | <- | Entity | | <-' + * : List : | Filters |--+ | Mappers |--+ | Validators |--+ | Enrichers |--+ + * *============* +----- ---+ +----- ---+ +------------+ +-----------+ * </pre> * * Pipeline has the following components: - * <ul> - * <li> - * {@link NCTokenParser} is responsible for taking the input text and tokenize it into a list of - * {@link NCToken - * }. This process is called tokenization, i.e. the process of demarcating and - * classifying sections of a string of input characters. There's only one token parser for the pipeline. - * </li> - * <li> - * After the initial list of token is - * </li> - * </ul> * */ trait NCPipeline: /** - * - * @return */ + * Get the token parser. One token parser is required for the pipeline. + */ def getTokenParser: NCTokenParser /** * Gets the list of entity parser. At least one entity parser is required. - * - * @return */ + */ def getEntityParsers: List[NCEntityParser] /** @@ -86,11 +74,9 @@ trait NCPipeline: /** * */ - def getVariantFilter: Option[NCVariantFilter] = None + def getVariantFilters: List[NCVariantFilter] = List.empty /** - * Gets optional list of entity mappers. - * - * @return Optional list of entity mappers. Can be empty but never `null`. + * Gets optional list of entity mappers. Can return an empty list but never `null`. */ def getEntityMappers: List[NCEntityMapper] = List.empty diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala index f49049a3..d15ba80f 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipelineBuilder.scala @@ -34,7 +34,7 @@ class NCPipelineBuilder: private val tokVals: Buf[NCTokenValidator] = Buf.empty private val entVals: Buf[NCEntityValidator] = Buf.empty private val entMappers: Buf[NCEntityMapper] = Buf.empty - private var varFilter: Option[NCVariantFilter] = None + private var varFilters: Buf[NCVariantFilter] = Buf.empty /** * @@ -134,10 +134,10 @@ class NCPipelineBuilder: this /** - * @param varFilter + * @param varFilters * @return This instance for call chaining. */ - def withVariantFilter(varFilter: NCVariantFilter): NCPipelineBuilder = - this.varFilter = Some(varFilter) + def withVariantFilters(varFilters: List[NCVariantFilter]): NCPipelineBuilder = + this.varFilters ++= varFilters this /** @@ -152,7 +152,8 @@ class NCPipelineBuilder: /** * * @param entMappers - * @return This instance for call chaining. */ + * @return This instance for call chaining. + */ def withEntityMappers(entMappers: List[NCEntityMapper]): NCPipelineBuilder = require(entMappers != null, "List of entity mappers cannot be null.") entMappers.foreach((p: NCEntityMapper) => require(p != null, "Entity mapper cannot be null.")) @@ -234,5 +235,5 @@ class NCPipelineBuilder: override def getEntityParsers: List[NCEntityParser] = entParsers.toList override def getTokenValidators: List[NCTokenValidator] = tokVals.toList override def getEntityValidators: List[NCEntityValidator] = entVals.toList - override def getVariantFilter: Option[NCVariantFilter] = varFilter + override def getVariantFilters: List[NCVariantFilter] = varFilters.toList override def getEntityMappers: List[NCEntityMapper] = entMappers.toList diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala index e97657c1..c2f56c74 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/impl/NCModelPipelineManager.scala @@ -31,6 +31,8 @@ import java.util.concurrent.* import java.util.concurrent.atomic.* import java.util.function.Predicate import scala.concurrent.ExecutionContext +import scala.jdk.CollectionConverters.* + /** * @@ -54,10 +56,10 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: NCPipeline) extends L private val tokVals = nvl(pipeline.getTokenValidators) private val entVals = nvl(pipeline.getEntityValidators) private val entMappers = nvl(pipeline.getEntityMappers) - private val varFilterOpt = pipeline.getVariantFilter + private val varFilters = nvl(pipeline.getVariantFilters) private val allComps: Seq[NCLifecycle] = - tokEnrichers ++ entEnrichers ++ entParsers ++ tokVals ++ entVals ++ entMappers ++ varFilterOpt.toSeq + tokEnrichers ++ entEnrichers ++ entParsers ++ tokVals ++ entVals ++ entMappers ++ varFilters /** * Processes pipeline components. @@ -153,7 +155,6 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: NCPipeline) extends L map { case (_, ents) => if ents.sizeIs > 1 then ents.toSet else Set.empty }.filter(_.nonEmpty) var variants: List[NCVariant] = - import scala.jdk.CollectionConverters.* if overlapEnts.nonEmpty then NCModelPipelineHelper. findCombinations(overlapEnts.map(_.asJava).asJava, pool).asScala. @@ -164,15 +165,15 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: NCPipeline) extends L else List(newVariant(entities)) - if varFilterOpt.isDefined then variants = varFilterOpt.get.filter(req, cfg, variants) + variants = varFilters.foldRight(variants)((filter, vars) => filter.filter(req, cfg, vars)) // Skips empty variants. val vrns = variants.filter(_.getEntities.nonEmpty) - for ((v, i) <- vrns.zipWithIndex) + for (v, i) <- vrns.zipWithIndex do val tbl = NCAsciiTable("EntityId", "Tokens", "Tokens Position", "Properties") - for (e <- v.getEntities) + for e <- v.getEntities do val toks = e.getTokens tbl += ( e.getId, @@ -185,6 +186,7 @@ class NCModelPipelineManager(cfg: NCModelConfig, pipeline: NCPipeline) extends L NCPipelineData(req, vrns, toks) def start(): Unit = processComponents(_.onStart(cfg), "started") + /** * */ diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala index 10254a13..983089ef 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCVariantFilterSpec.scala @@ -52,7 +52,7 @@ class NCVariantFilterSpec extends AnyFunSuite: ) test0( - mkPipeline(_.withVariantFilter((_: NCRequest, _: NCModelConfig, _: List[NCVariant]) => List.empty)), + mkPipeline(_.withVariantFilters(List((_: NCRequest, _: NCModelConfig, _: List[NCVariant]) => List.empty))), false ) } \ No newline at end of file diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala index 6ca853c6..f596d580 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/util/NCTestPipeline.scala @@ -36,7 +36,7 @@ case class NCTestPipeline(tokParser: NCTokenParser) extends NCPropertyMapAdapter val tokVals: Buf[NCTokenValidator] = Buf.empty val entVals: Buf[NCEntityValidator] = Buf.empty val entMappers: Buf[NCEntityMapper] = Buf.empty - var varFilter: Option[NCVariantFilter] = None + var varFilters: Buf[NCVariantFilter] = Buf.empty override def getTokenParser: NCTokenParser = tokParser override def getTokenEnrichers: List[NCTokenEnricher] = tokEnrichers.toList @@ -45,4 +45,4 @@ case class NCTestPipeline(tokParser: NCTokenParser) extends NCPropertyMapAdapter override def getTokenValidators: List[NCTokenValidator] = tokVals.toList override def getEntityValidators: List[NCEntityValidator] = entVals.toList override def getEntityMappers: List[NCEntityMapper] = entMappers.toList - override def getVariantFilter: Option[NCVariantFilter] = varFilter \ No newline at end of file + override def getVariantFilters: List[NCVariantFilter] = varFilters.toList \ No newline at end of file