This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-472 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push: new a071064 WIP. a071064 is described below commit a071064239457eae68fded2e52cb8b8e5a678eec Author: Sergey Kamov <skhdlem...@gmail.com> AuthorDate: Tue Jan 4 12:50:41 2022 +0300 WIP. --- .../{NCVariantValidator.java => NCVariant.java} | 11 +--- .../org/apache/nlpcraft/NCVariantValidator.java | 2 +- .../nlp/entity/parser/nlp/NCNlpEntityParser.java | 65 ++++++++++++++++++++++ .../parser/nlp/impl/NCNlpEntityParserImpl.scala | 49 ++++++++++++++++ 4 files changed, 117 insertions(+), 10 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariant.java similarity index 75% copy from nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java copy to nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariant.java index 212e242..99f9373 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariant.java @@ -22,13 +22,6 @@ import java.util.List; /** * */ -public interface NCVariantValidator extends NCLifecycle { - /** - * Filters all found entities variants. - * - * @param req - * @param cfg - * @param toks - */ - List<List<NCEntity>> filter(NCRequest req, NCModelConfig cfg, List<List<NCEntity>> variants); +public interface NCVariant { + List<NCToken> getTokens(); } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java index 212e242..3e0fa0e 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCVariantValidator.java @@ -30,5 +30,5 @@ public interface NCVariantValidator extends NCLifecycle { * @param cfg * @param toks */ - List<List<NCEntity>> filter(NCRequest req, NCModelConfig cfg, List<List<NCEntity>> variants); + List<NCVariant> filter(NCRequest req, NCModelConfig cfg, List<NCVariant> variants); } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java new file mode 100644 index 0000000..efb3a95 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/NCNlpEntityParser.java @@ -0,0 +1,65 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.nlp.entity.parser.nlp; + +import org.apache.nlpcraft.NCEntity; +import org.apache.nlpcraft.NCEntityParser; +import org.apache.nlpcraft.NCModelConfig; +import org.apache.nlpcraft.NCRequest; +import org.apache.nlpcraft.NCToken; +import org.apache.nlpcraft.nlp.entity.parser.nlp.impl.NCNlpEntityParserImpl; +import org.apache.nlpcraft.nlp.entity.parser.opennlp.impl.NCOpenNlpEntityParserImpl; + +import java.util.List; +import java.util.Objects; + +/** + * Umbrella for NLP tokens. + * + * Each entity with ID 'nlp:token' contans one token and have following properties copied from its token: + * nlp:token:stem, nlp:token:lemma, nlp:token:pos, nlp:token:text, nlp:token:index + * + * <p> + * Component is language independent. + * <p> + */ +public class NCNlpEntityParser implements NCEntityParser { + private final NCNlpEntityParserImpl impl; + + /** + * @param mdlSrc + */ + public NCNlpEntityParser() { + this.impl = new NCNlpEntityParserImpl(); + } + + @Override + public void start(NCModelConfig cfg) { + impl.start(cfg); + } + + @Override + public void stop() { + impl.stop(); + } + + @Override + public List<NCEntity> parse(NCRequest req, NCModelConfig cfg, List<NCToken> toks) { + return impl.parse(req, cfg, toks); + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNlpEntityParserImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNlpEntityParserImpl.scala new file mode 100644 index 0000000..a7e4116 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/nlp/impl/NCNlpEntityParserImpl.scala @@ -0,0 +1,49 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.nlp.entity.parser.nlp.impl + +import org.apache.nlpcraft.* + +import java.util +import java.util.stream.Collectors + +/** + * + */ +object NCNlpEntityParserImpl: + private def id = "nlp:token" + +import NCNlpEntityParserImpl._ + +/** + * + */ +class NCNlpEntityParserImpl extends NCEntityParser: + override def parse(req: NCRequest, cfg: NCModelConfig, toks: util.List[NCToken]): util.List[NCEntity] = + toks.stream().map(t => + new NCPropertyMapAdapter with NCEntity: + put(s"$id:stem", t.getStem) + put(s"$id:lemma", t.getLemma) + put(s"$id:pos", t.getPos) + put(s"$id:text", t.getText) + put(s"$id:index", t.getIndex) + + override def getTokens: util.List[NCToken] = util.Collections.singletonList(t) + override def getRequestId: String = req.getRequestId + override def getId: String = id + ).collect(Collectors.toList)