This is an automated email from the ASF dual-hosted git repository. aradzinski pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit a36551e789ea02d983642c2668cc9f23e655894a Author: Aaron Radzinski <aradzin...@datalingvo.com> AuthorDate: Tue Mar 29 12:57:47 2022 -0700 WIP Javadoc. --- nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.java | 13 ++++++++++--- nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java | 3 ++- 2 files changed, 12 insertions(+), 4 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.java index 1e8b440..9baeae3 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCPipeline.java @@ -22,7 +22,7 @@ import java.util.List; import java.util.Optional; /** - * Input request NLP processing pipeline. + * NLP processing pipeline for the input request. Pipeline is associated with the model. * <p> * An NLP pipeline is a container for various processing components that take the input text at the beginning of the * pipeline and produce the list of {@link NCEntity entities} at the end of the pipeline. @@ -31,7 +31,7 @@ import java.util.Optional; * <pre> * +----------+ +-----------+ * *=========* +---------+ +---+-------+ | +---+-------+ | - * : Text : -> | Token | -> | Token | | -> | Token | | ---. + * : Text : -> | Token | -> | Token | | -> | Token | | ----. * : Input : | Parser | | Enrichers |--+ | Validators |--+ \ * *=========* +---------+ +-----------+ +------------+ \ * \ @@ -45,7 +45,14 @@ import java.util.Optional; * <p> * Pipeline has the following components: * <ul> - * <li></li> + * <li> + * {@link NCTokenParser Token parser} is responsible for taking the input text and tokenize it into a list of + * {@link NCToken tokens}. This process is called tokenization, i.e. the process of demarcating and + * classifying sections of a string of input characters. There's only one token parser for the pipeline. + * </li> + * <li> + * After the initial list of token is + * </li> * </ul> * * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java index 6d5aea6..8d2d142 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCToken.java @@ -19,7 +19,8 @@ package org.apache.nlpcraft; /** * Represents a contiguous substring of the original input text produced by {@link NCTokenParser}. - * See {@link NCPipeline} for documentation on the tokens place in the overall processing pipeline. + * A token is the result of {@link NCTokenParser tokenization} - the process of demarcating and classifying sections of a string of + * input characters. See {@link NCPipeline} for documentation on the tokens place in the overall processing pipeline. * * <span class="hdr">Metadata</span> * Note that both {@link NCToken} and {@link NCEntity} interfaces extend {@link NCPropertyMap} interface