This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-483 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-483 by this push: new 01d2815 WIP. 01d2815 is described below commit 01d28158345762a91d0cb4819e6c6082cb6f289a Author: Sergey Kamov <skhdlem...@gmail.com> AuthorDate: Wed Mar 2 15:44:28 2022 +0300 WIP. --- .../token/enricher/NCEnLemmaPosTokenEnricher.java | 47 ++++++++++++++++++++++ 1 file changed, 47 insertions(+) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCEnLemmaPosTokenEnricher.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCEnLemmaPosTokenEnricher.java new file mode 100644 index 0000000..aedcf84 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCEnLemmaPosTokenEnricher.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.nlp.en.token.enricher; + +import org.apache.nlpcraft.NCModelConfig; +import org.apache.nlpcraft.NCRequest; +import org.apache.nlpcraft.NCToken; +import org.apache.nlpcraft.NCTokenEnricher; +import org.apache.nlpcraft.internal.util.NCResourceReader; +import org.apache.nlpcraft.nlp.en.token.enricher.impl.NCLemmaPosTokenEnricherImpl; +import org.apache.nlpcraft.nlp.mult.token.enricher.opennlp.NCLemmaPosTokenEnricher; + +import java.util.List; + +/** + * TODO: enriches with <code>lemma</code> and <code>pos</code> properties. + * + * Models can be downloaded from the following resources: + * - tagger: http://opennlp.sourceforge.net/models-1.5/en-pos-maxent.bin + * - lemmatizer: https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict + */ +public class NCEnLemmaPosTokenEnricher extends NCLemmaPosTokenEnricher { + /** + * + */ + public NCEnLemmaPosTokenEnricher() { + super( + NCResourceReader.getPath("opennlp/en-pos-maxent.bin"), + NCResourceReader.getPath("opennlp/en-lemmatizer.dict") + ); + } +}