This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
     new a071064  WIP.
a071064 is described below

commit a071064239457eae68fded2e52cb8b8e5a678eec
Author: Sergey Kamov <skhdlem...@gmail.com>
AuthorDate: Tue Jan 4 12:50:41 2022 +0300

    WIP.
---
 .../{NCVariantValidator.java => NCVariant.java}    | 11 +---
 .../org/apache/nlpcraft/NCVariantValidator.java    |  2 +-
 .../nlp/entity/parser/nlp/NCNlpEntityParser.java   | 65 ++++++++++++++++++++++
 .../parser/nlp/impl/NCNlpEntityParserImpl.scala    | 49 ++++++++++++++++
 4 files changed, 117 insertions(+), 10 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariant.java
similarity index 75%
copy from nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
copy to nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariant.java
index 212e242..99f9373 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariant.java
@@ -22,13 +22,6 @@ import java.util.List;
 /**
  *
  */
-public interface NCVariantValidator extends NCLifecycle {
-    /**
-     * Filters all found entities variants.
-     *
-     * @param req
-     * @param cfg
-     * @param toks
-     */
-    List<List<NCEntity>> filter(NCRequest req, NCModelConfig cfg, 
List<List<NCEntity>> variants);
+public interface NCVariant {
+    List<NCToken> getTokens();
 }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
index 212e242..3e0fa0e 100644
--- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java
@@ -30,5 +30,5 @@ public interface NCVariantValidator extends NCLifecycle {
      * @param cfg
      * @param toks
      */
-    List<List<NCEntity>> filter(NCRequest req, NCModelConfig cfg, 
List<List<NCEntity>> variants);
+    List<NCVariant> filter(NCRequest req, NCModelConfig cfg, List<NCVariant> 
variants);
 }
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java
new file mode 100644
index 0000000..efb3a95
--- /dev/null
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java
@@ -0,0 +1,65 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp.entity.parser.nlp;
+
+import org.apache.nlpcraft.NCEntity;
+import org.apache.nlpcraft.NCEntityParser;
+import org.apache.nlpcraft.NCModelConfig;
+import org.apache.nlpcraft.NCRequest;
+import org.apache.nlpcraft.NCToken;
+import org.apache.nlpcraft.nlp.entity.parser.nlp.impl.NCNlpEntityParserImpl;
+import 
org.apache.nlpcraft.nlp.entity.parser.opennlp.impl.NCOpenNlpEntityParserImpl;
+
+import java.util.List;
+import java.util.Objects;
+
+/**
+ * Umbrella for NLP tokens.
+ *
+ * Each entity with ID 'nlp:token' contans one token and have following 
properties copied from its token:
+ * nlp:token:stem, nlp:token:lemma, nlp:token:pos, nlp:token:text, 
nlp:token:index
+ *
+ * <p>
+ * Component is language independent.
+ * <p>
+ */
+public class NCNlpEntityParser implements NCEntityParser {
+    private final NCNlpEntityParserImpl impl;
+
+    /**
+     * @param mdlSrc
+     */
+    public NCNlpEntityParser() {
+        this.impl = new NCNlpEntityParserImpl();
+    }
+
+    @Override
+    public void start(NCModelConfig cfg) {
+        impl.start(cfg);
+    }
+
+    @Override
+    public void stop() {
+        impl.stop();
+    }
+
+    @Override
+    public List<NCEntity> parse(NCRequest req, NCModelConfig cfg, 
List<NCToken> toks) {
+        return impl.parse(req, cfg, toks);
+    }
+}
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNlpEntityParserImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNlpEntityParserImpl.scala
new file mode 100644
index 0000000..a7e4116
--- /dev/null
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNlpEntityParserImpl.scala
@@ -0,0 +1,49 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp.entity.parser.nlp.impl
+
+import org.apache.nlpcraft.*
+
+import java.util
+import java.util.stream.Collectors
+
+/**
+  *
+  */
+object NCNlpEntityParserImpl:
+    private def id = "nlp:token"
+
+import NCNlpEntityParserImpl._
+
+/**
+  *
+  */
+class NCNlpEntityParserImpl extends NCEntityParser:
+    override def parse(req: NCRequest, cfg: NCModelConfig, toks: 
util.List[NCToken]): util.List[NCEntity] =
+        toks.stream().map(t =>
+            new NCPropertyMapAdapter with NCEntity:
+                put(s"$id:stem", t.getStem)
+                put(s"$id:lemma", t.getLemma)
+                put(s"$id:pos", t.getPos)
+                put(s"$id:text", t.getText)
+                put(s"$id:index", t.getIndex)
+
+                override def getTokens: util.List[NCToken] = 
util.Collections.singletonList(t)
+                override def getRequestId: String = req.getRequestId
+                override def getId: String = id
+        ).collect(Collectors.toList)

Reply via email to