This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-483-1 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-483-1 by this push: new bcd84ed WIP. bcd84ed is described below commit bcd84ed0f442b2ab95504d9601d00eae9b831ca4 Author: Sergey Kamov <skhdlem...@gmail.com> AuthorDate: Wed Mar 2 22:28:59 2022 +0300 WIP. --- .../apache/nlpcraft/NCModelPipelineBuilder.java | 27 ++++++++++++++-------- 1 file changed, 18 insertions(+), 9 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java index 02ac728..05f0110 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelPipelineBuilder.java @@ -71,7 +71,8 @@ public class NCModelPipelineBuilder { } /** - * + * TODO: + * EN Nlp component set. Note it used OpenNLP token parser implementation. * @param lang * @param entParsers */ @@ -81,19 +82,27 @@ public class NCModelPipelineBuilder { if (entParsers.isEmpty()) throw new IllegalArgumentException("At least one entity parser must be defined."); - tokParser = new NCENOpenNLPTokenParser(); + switch (lang) { + case EN: + tokParser = new NCENOpenNLPTokenParser(); - tokEnrichers.add(new NCENOpenNlpLemmaPosTokenEnricher()); - tokEnrichers.add(new NCENStopWordsTokenEnricher()); - tokEnrichers.add(new NСENSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt"))); - tokEnrichers.add(new NCENQuotesTokenEnricher()); - tokEnrichers.add(new NCENDictionaryTokenEnricher()); - tokEnrichers.add(new NCENBracketsTokenEnricher()); + tokEnrichers.add(new NCENOpenNlpLemmaPosTokenEnricher()); + tokEnrichers.add(new NCENStopWordsTokenEnricher()); + tokEnrichers.add(new NСENSwearWordsTokenEnricher(NCResourceReader.getPath("badfilter/swear_words.txt"))); + tokEnrichers.add(new NCENQuotesTokenEnricher()); + tokEnrichers.add(new NCENDictionaryTokenEnricher()); + tokEnrichers.add(new NCENBracketsTokenEnricher()); + + this.entParsers.addAll(entParsers); + default: + throw new IllegalArgumentException("Unsupported language: " + lang); + } - this.entParsers.addAll(entParsers); } /** + * TODO: + * EN Nlp component set. Note it used OpenNLP token parser implementation. * * @param lang * @param entParsers