This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-483
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-483 by this push:
     new 01d2815  WIP.
01d2815 is described below

commit 01d28158345762a91d0cb4819e6c6082cb6f289a
Author: Sergey Kamov <skhdlem...@gmail.com>
AuthorDate: Wed Mar 2 15:44:28 2022 +0300

    WIP.
---
 .../token/enricher/NCEnLemmaPosTokenEnricher.java  | 47 ++++++++++++++++++++++
 1 file changed, 47 insertions(+)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCEnLemmaPosTokenEnricher.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCEnLemmaPosTokenEnricher.java
new file mode 100644
index 0000000..aedcf84
--- /dev/null
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/en/token/enricher/NCEnLemmaPosTokenEnricher.java
@@ -0,0 +1,47 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp.en.token.enricher;
+
+import org.apache.nlpcraft.NCModelConfig;
+import org.apache.nlpcraft.NCRequest;
+import org.apache.nlpcraft.NCToken;
+import org.apache.nlpcraft.NCTokenEnricher;
+import org.apache.nlpcraft.internal.util.NCResourceReader;
+import 
org.apache.nlpcraft.nlp.en.token.enricher.impl.NCLemmaPosTokenEnricherImpl;
+import 
org.apache.nlpcraft.nlp.mult.token.enricher.opennlp.NCLemmaPosTokenEnricher;
+
+import java.util.List;
+
+/**
+ * TODO: enriches with <code>lemma</code> and <code>pos</code> properties.
+ *
+ * Models can be downloaded from the following resources:
+ *  - tagger: http://opennlp.sourceforge.net/models-1.5/en-pos-maxent.bin
+ *  - lemmatizer: 
https://raw.githubusercontent.com/richardwilly98/elasticsearch-opennlp-auto-tagging/master/src/main/resources/models/en-lemmatizer.dict
+ */
+public class NCEnLemmaPosTokenEnricher extends NCLemmaPosTokenEnricher {
+    /**
+     *
+     */
+    public NCEnLemmaPosTokenEnricher() {
+        super(
+            NCResourceReader.getPath("opennlp/en-pos-maxent.bin"),
+            NCResourceReader.getPath("opennlp/en-lemmatizer.dict")
+        );
+    }
+}

Reply via email to