This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-472 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push: new 3d50fce WIP. 3d50fce is described below commit 3d50fcefcaf2034bc5f60a7affc864991f078b82 Author: Sergey Kamov <skhdlem...@gmail.com> AuthorDate: Tue Jan 4 17:02:00 2022 +0300 WIP. --- .../scala/org/apache/nlpcraft/NCModelClient.java | 82 ++------ .../org/apache/nlpcraft/NCModelConfigAdapter.java | 170 ---------------- .../org/apache/nlpcraft/NCModelConfigBuilder.java | 215 +++++++++++++++++++++ .../nlpcraft/internal/NCModelClientImpl.scala | 112 +++++++++++ .../parser/opennlp/en/NCEnOpenNlpTokenParser.java | 5 +- .../opennlp/en/NCEnOpenNlpTokenParserStemmer.java | 22 +++ .../parser/opennlp/en/impl/NCEnOpenNlpImpl.scala | 6 +- .../en/impl/NCEnOpenNlpTokenParserStemmerImpl.java | 13 ++ .../opennlp/NCEnOpenNlpTokenParserBenchmark.java | 4 +- .../apache/nlpcraft/nlp/util/NCTestConfig.scala | 6 +- 10 files changed, 385 insertions(+), 250 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java index 3fe9fdb..ec0cdef 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelClient.java @@ -17,6 +17,8 @@ package org.apache.nlpcraft; +import org.apache.nlpcraft.internal.NCModelClientImpl; + import java.util.Map; import java.util.List; import java.util.concurrent.*; @@ -25,87 +27,25 @@ import java.util.concurrent.*; * */ public class NCModelClient implements NCLifecycle { - private final NCModel mdl; + // TODO: move NCModelClientImpl under rigth package. + private final NCModelClientImpl impl; /** * * @param mdl */ public NCModelClient(NCModel mdl) { - this.mdl = mdl; - } - - /** - * - * @throws NCException - */ - private static void verify() throws NCException { - // TODO: - } - - private static void start(ExecutorService s, List<? extends NCLifecycle> list, NCModelConfig cfg) { - assert s != null; - - if (list != null) - list.forEach(p -> s.execute(() -> p.start(cfg))); - } - - private static void stop(ExecutorService s, List<? extends NCLifecycle> list) { - assert s != null; - - if (list != null) - list.forEach(p -> s.execute(() -> p.stop())); - } - - private static void stopExecutorService(ExecutorService s) { - try { - s.awaitTermination(Long.MAX_VALUE, TimeUnit.MILLISECONDS); - } - catch (InterruptedException e) { - throw new NCException("Thread interrupted.", e); - } - } - - private static ExecutorService getExecutorService() { - return Executors.newFixedThreadPool(Runtime.getRuntime().availableProcessors()); + this.impl = new NCModelClientImpl(mdl); } @Override public void start(NCModelConfig cfg) { - verify(); - - cfg.getTokenParser().start(cfg); - - ExecutorService s = getExecutorService(); - - // TODO: start and stop validators. - - try { - start(s, cfg.getEntityParsers(), cfg); - start(s, cfg.getEntityEnrichers(), cfg); - start(s, cfg.getTokenEnrichers(), cfg); - } - finally { - stopExecutorService(s); - } + impl.start(cfg); } @Override public void stop() { - NCModelConfig cfg = mdl.getConfig(); - ExecutorService s = getExecutorService(); - - try { - stop(s, cfg.getTokenEnrichers()); - stop(s, cfg.getEntityEnrichers()); - stop(s, cfg.getEntityParsers()); - stop(s, cfg.getTokenEnrichers()); - } - finally { - stopExecutorService(s); - } - - cfg.getTokenParser().stop(); + impl.stop(); } /** @@ -117,7 +57,7 @@ public class NCModelClient implements NCLifecycle { * @throws NCException */ public CompletableFuture<NCResult> ask(String txt, Map<String, Object> data, String usrId) { - return null; // TODO + return impl.ask(txt, data, usrId); } /** @@ -129,7 +69,7 @@ public class NCModelClient implements NCLifecycle { * @throws NCException */ public NCResult askSync(String txt, Map<String, Object> data, String usrId) { - return null; // TODO + return impl.askSync(txt, data, usrId); } /** @@ -138,7 +78,7 @@ public class NCModelClient implements NCLifecycle { * @throws NCException */ public void clearConversation(String usrId) { - // TODO + impl.clearConversation(usrId); } /** @@ -147,6 +87,6 @@ public class NCModelClient implements NCLifecycle { * @throws NCException */ public void clearDialog(String usrId) { - // TODO + impl.clearDialog(usrId); } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java deleted file mode 100644 index 5f3585c..0000000 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigAdapter.java +++ /dev/null @@ -1,170 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * https://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.nlpcraft; - -import java.util.*; - -/** - * - */ -// TODO: validation for constructor and all setters. -// TODO: do builder instead of it. -public class NCModelConfigAdapter extends NCPropertyMapAdapter implements NCModelConfig { - private final String id; - private final String name; - private final String version; - private final NCTokenParser tokParser; - private final List<NCTokenEnricher> tokEnrichers = new ArrayList<>(); - private final List<NCEntityEnricher> entEnrichers = new ArrayList<>(); - private final List<NCEntityParser> entParsers = new ArrayList<>(); - private final List<NCTokenValidator> tokenValidators = new ArrayList<>(); - private final List<NCEntityValidator> entityValidators = new ArrayList<>(); - private final List<NCVariantValidator> variantsFilters = new ArrayList<>(); - - /** - * - * @param id - * @param name - * @param version - * @param tokParser - */ - public NCModelConfigAdapter(String id, String name, String version, NCTokenParser tokParser, NCEntityParser entParser) { - Objects.requireNonNull(id, "ID cannot be null."); - Objects.requireNonNull(name, "Name cannot be null."); - Objects.requireNonNull(version, "Version cannot be null."); - Objects.requireNonNull(tokParser, "Token parser cannot be null."); - Objects.requireNonNull(entParser, "Entity parser cannot be null."); - - this.id = id; - this.name = name; - this.version = version; - this.tokParser = tokParser; - - entParsers.add(entParser); - } - - /** - * - * @param entParser - */ - public void addEntityParser(NCEntityParser entParser) { - Objects.requireNonNull(entParser, "Entity parser cannot be null."); - - entParsers.add(entParser); - } - - /** - * - * @param tokEnricher - */ - public void addTokenEnricher(NCTokenEnricher tokEnricher) { - Objects.requireNonNull(tokEnricher, "Token enricher cannot be null."); - - tokEnrichers.add(tokEnricher); - } - - /** - * - * @param entEnricher - */ - public void addEntityEnricher(NCEntityEnricher entEnricher) { - Objects.requireNonNull(entEnricher, "Entity enricher cannot be null."); - - entEnrichers.add(entEnricher); - } - - /** - * - * @param entParser - */ - public void addEntityParser(NCTokenValidator tokValidator) { - Objects.requireNonNull(tokValidator, "Token validator cannot be null."); - - tokenValidators.add(tokValidator); - } - - /** - * - * @param entValidator - */ - public void addEntityParser(NCEntityValidator entValidator) { - Objects.requireNonNull(entValidator, "Entity validator cannot be null."); - - entityValidators.add(entValidator); - } - - /** - * - * @param variantFilter - */ - public void addVariantFilter(NCVariantValidator variantFilter) { - Objects.requireNonNull(variantFilter, "Variant filter cannot be null."); - - variantsFilters.add(variantFilter); - } - - @Override - public String getId() { - return id; - } - - @Override - public String getName() { - return name; - } - - @Override - public String getVersion() { - return version; - } - - @Override - public List<NCTokenEnricher> getTokenEnrichers() { - return tokEnrichers; - } - - @Override - public List<NCEntityEnricher> getEntityEnrichers() { - return entEnrichers; - } - - @Override - public NCTokenParser getTokenParser() { - return tokParser; - } - - @Override - public List<NCEntityParser> getEntityParsers() { - return entParsers; - } - - @Override - public List<NCTokenValidator> getTokenValidators() { - return tokenValidators; - } - - @Override - public List<NCEntityValidator> getEntityValidators() { - return entityValidators; - } - - @Override - public List<NCVariantValidator> getVariantValidators() { - return variantsFilters; - } -} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigBuilder.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigBuilder.java new file mode 100644 index 0000000..58e0b4e --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/NCModelConfigBuilder.java @@ -0,0 +1,215 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft; + +import java.util.ArrayList; +import java.util.List; +import java.util.Objects; + +/** + * + */ +// TODO: validation for constructor and all setters. +public class NCModelConfigBuilder { + private abstract class NCModelConfigAdapter extends NCPropertyMapAdapter implements NCModelConfig { } + + private final String id; + private final String name; + private final String version; + + private final List<NCTokenEnricher> tokEnrichers = new ArrayList<>(); + private final List<NCEntityEnricher> entEnrichers = new ArrayList<>(); + private final List<NCEntityParser> entParsers = new ArrayList<>(); + private final List<NCTokenValidator> tokenValidators = new ArrayList<>(); + private final List<NCEntityValidator> entityValidators = new ArrayList<>(); + private final List<NCVariantValidator> variantsValidators = new ArrayList<>(); + + private String description; + private String origin; + + private NCTokenParser tokParser; + + public NCModelConfigBuilder(String id, String name, String version) { + Objects.requireNonNull(id, "Id cannot be null."); + Objects.requireNonNull(name, "Name cannot be null."); + Objects.requireNonNull(version, "Varsion cannot be null"); + + this.id = id; + this.name = name; + this.version = version; + } + + public NCModelConfigBuilder withDescription(String description) { + this.description = description; + + return this; + } + + public NCModelConfigBuilder withOrigin(String origin) { + this.origin = origin; + + return this; + } + + + public NCModelConfigBuilder withTokenParser(NCTokenParser tokParser) { + this.tokParser = tokParser; + + return this; + } + + public NCModelConfigBuilder withTokenEnrichers(List<NCTokenEnricher> tokEnrichers) { + this.tokEnrichers.addAll(tokEnrichers); + + return this; + } + + public NCModelConfigBuilder withTokenEnricher(NCTokenEnricher tokEnricher) { + Objects.requireNonNull(tokEnrichers, "Argument cannot be null."); + + this.tokEnrichers.add(tokEnricher); + + return this; + } + + public NCModelConfigBuilder withEntityEnrichers(List<NCEntityEnricher> entEnrichers) { + this.entEnrichers.addAll(entEnrichers); + + return this; + } + + public NCModelConfigBuilder withEntityEnricher(NCEntityEnricher entEnricher) { + this.entEnrichers.add(entEnricher); + + return this; + } + + public NCModelConfigBuilder withEntityParsers(List<NCEntityParser> entParsers) { + this.entParsers.addAll(entParsers); + + return this; + } + + public NCModelConfigBuilder withEntityParser(NCEntityParser entParser) { + this.entParsers.add(entParser); + + return this; + } + + public NCModelConfigBuilder withTokenValidators(List<NCTokenValidator> tokenValidators) { + this.tokenValidators.addAll(tokenValidators); + + return this; + } + + public NCModelConfigBuilder withTokenValidator(NCTokenValidator tokenValidator) { + this.tokenValidators.add(tokenValidator); + + return this; + } + + public NCModelConfigBuilder withEntityValidators(List<NCEntityValidator> entityValidators) { + this.entityValidators.addAll(entityValidators); + + return this; + } + + public NCModelConfigBuilder withEntityValidator(NCEntityValidator entityValidator) { + this.entityValidators.add(entityValidator); + + return this; + } + + public NCModelConfigBuilder withVariantValidators(List<NCVariantValidator> variantsValidators) { + this.variantsValidators.addAll(variantsValidators); + + return this; + } + + public NCModelConfigBuilder withVariantValidator(NCVariantValidator variantsValidator) { + this.variantsValidators.add(variantsValidator); + + return this; + } + + public NCModelConfig make() { + // TODO: validate. + + return new NCModelConfigAdapter() { + @Override + public NCTokenParser getTokenParser() { + return tokParser; + } + + @Override + public List<NCTokenEnricher> getTokenEnrichers() { + return tokEnrichers; + } + + @Override + public List<NCEntityEnricher> getEntityEnrichers() { + return entEnrichers; + } + + @Override + public List<NCEntityParser> getEntityParsers() { + return entParsers; + } + + @Override + public List<NCTokenValidator> getTokenValidators() { + return tokenValidators; + } + + @Override + public List<NCEntityValidator> getEntityValidators() { + return entityValidators; + } + + @Override + public List<NCVariantValidator> getVariantValidators() { + return variantsValidators; + } + + @Override + public String getId() { + return id; + } + + @Override + public String getName() { + return name; + } + + @Override + public String getVersion() { + return version; + } + + @Override + public String getDescription() { + return description != null ? description : super.getDescription(); + } + + @Override + public String getOrigin() { + return origin != null ? origin : super.getOrigin(); + } + }; + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCModelClientImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCModelClientImpl.scala new file mode 100644 index 0000000..22419fb --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/internal/NCModelClientImpl.scala @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.internal + +import org.apache.nlpcraft.* +import org.apache.nlpcraft.internal.util.NCUtils +import java.util.concurrent.* +import java.util.List as JList +import java.util.Map as JMap + +// TODO: move it to right package. +class NCModelClientImpl(mdl: NCModel) extends NCLifecycle: + /** + * + * @throws NCException */ + private def verify(): Unit = () // TODO: + + /** + * + * @param s + * @param list + * @param cfg + * @tparam T + */ + private def start[T <: NCLifecycle](s: ExecutorService, list: JList[T], cfg: NCModelConfig): Unit = + assert(s != null) + + if list != null then list.forEach(p => s.execute(() => p.start(cfg))) + + /** + * + * @param s + * @param list + * @tparam T + */ + private def stop[T <: NCLifecycle](s: ExecutorService, list: JList[T]): Unit = + assert(s != null) + + if list != null then list.forEach(p => s.execute(() => p.stop())) + + /** + * + * @param s + */ + private def stopExecutorService(s: ExecutorService): Unit = + try + s.awaitTermination(Long.MaxValue, TimeUnit.MILLISECONDS) + catch + case e: InterruptedException => throw new NCException("Thread interrupted.", e) + + /** + * + * @return + */ + private def getExecutorService: ExecutorService = Executors.newFixedThreadPool(Runtime.getRuntime.availableProcessors) + + override def start(cfg: NCModelConfig): Unit = + verify() + + cfg.getTokenParser.start(cfg) + + val s = getExecutorService + + try + start(s, cfg.getEntityParsers, cfg) + start(s, cfg.getEntityEnrichers, cfg) + start(s, cfg.getTokenEnrichers, cfg) + start(s, cfg.getTokenValidators, cfg) + start(s, cfg.getEntityValidators, cfg) + start(s, cfg.getVariantValidators, cfg) + finally + stopExecutorService(s) + + override def stop(): Unit = { + val cfg = mdl.getConfig + + val s = getExecutorService + + try + stop(s, cfg.getVariantValidators) + stop(s, cfg.getEntityValidators) + stop(s, cfg.getTokenValidators) + stop(s, cfg.getTokenEnrichers) + stop(s, cfg.getEntityEnrichers) + stop(s, cfg.getEntityParsers) + stop(s, cfg.getTokenEnrichers) + finally + stopExecutorService(s) + + cfg.getTokenParser.stop() + } + + // TODO: implement + def ask(txt: String, data: JMap[String, AnyRef], usrId: String): CompletableFuture[NCResult] = null + def askSync(txt: String, data: JMap[String, AnyRef], usrId: String): NCResult = null + def clearConversation(usrId: String): Unit = () + def clearDialog(usrId: String): Unit = () diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java index 2c59def..59357e5 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParser.java @@ -55,13 +55,14 @@ public class NCEnOpenNlpTokenParser implements NCTokenParser { * @param lemmaDicSrc Local filesystem path, resources file path or URL for OpenNLP lemmatizer dictionary. * @throws NCException */ - public NCEnOpenNlpTokenParser(String tokMdlSrc, String posMdlSrc, String lemmaDicSrc) { + public NCEnOpenNlpTokenParser(String tokMdlSrc, String posMdlSrc, String lemmaDicSrc, NCEnOpenNlpTokenParserStemmer stemmer) { Objects.requireNonNull(tokMdlSrc, "Tokenizer model path cannot be null."); Objects.requireNonNull(posMdlSrc, "POS model path cannot be null."); Objects.requireNonNull(lemmaDicSrc, "Lemmatizer model path cannot be null."); + Objects.requireNonNull(stemmer, "Stemmer cannot be null."); try { - impl = new NCEnOpenNlpImpl(tokMdlSrc, posMdlSrc, lemmaDicSrc); + impl = new NCEnOpenNlpImpl(tokMdlSrc, posMdlSrc, lemmaDicSrc, stemmer); } catch (Exception e) { throw new NCException("Failed to create OpenNLP token parser.", e); diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserStemmer.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserStemmer.java new file mode 100644 index 0000000..57ab323 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/NCEnOpenNlpTokenParserStemmer.java @@ -0,0 +1,22 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.nlp.token.parser.opennlp.en; + +public interface NCEnOpenNlpTokenParserStemmer { + String stem(String s); +} \ No newline at end of file diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala index f7714a3..60d2172 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpImpl.scala @@ -24,6 +24,7 @@ import opennlp.tools.stemmer.* import opennlp.tools.tokenize.* import org.apache.nlpcraft.* import org.apache.nlpcraft.internal.util.NCUtils +import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParserStemmer import java.io.* import java.util @@ -38,15 +39,13 @@ import scala.jdk.CollectionConverters.* * @param posMdlSrc * @param lemmaDicSrc */ -class NCEnOpenNlpImpl(tokMdl: String, posMdlSrc: String, lemmaDicSrc: String) extends NCTokenParser with LazyLogging: - @volatile private var stemmer: PorterStemmer = _ +class NCEnOpenNlpImpl(tokMdl: String, posMdlSrc: String, lemmaDicSrc: String, stemmer: NCEnOpenNlpTokenParserStemmer) extends NCTokenParser with LazyLogging: @volatile var tagger: POSTaggerME = _ @volatile var lemmatizer: DictionaryLemmatizer = _ @volatile var tokenizer: TokenizerME = _ override def start(cfg: NCModelConfig): Unit = NCUtils.execPar( - () => stemmer = new PorterStemmer, () => tagger = new POSTaggerME(new POSModel(NCUtils.getStream(posMdlSrc))) logger.trace(s"Loaded resource: $posMdlSrc") @@ -65,7 +64,6 @@ class NCEnOpenNlpImpl(tokMdl: String, posMdlSrc: String, lemmaDicSrc: String) e lemmatizer = null tagger = null tokenizer = null - stemmer = null /** * diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpTokenParserStemmerImpl.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpTokenParserStemmerImpl.java new file mode 100644 index 0000000..2c8c9e8 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/token/parser/opennlp/en/impl/NCEnOpenNlpTokenParserStemmerImpl.java @@ -0,0 +1,13 @@ +package org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl; + +import opennlp.tools.stemmer.PorterStemmer; +import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParserStemmer; + +public class NCEnOpenNlpTokenParserStemmerImpl implements NCEnOpenNlpTokenParserStemmer { + private PorterStemmer stemmer = new PorterStemmer(); + + @Override + public String stem(String s) { + return stemmer.stem(s); + } +} diff --git a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java index c30f536..82f213d 100644 --- a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java +++ b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/benchmark/token/parser/opennlp/NCEnOpenNlpTokenParserBenchmark.java @@ -19,6 +19,7 @@ package org.apache.nlpcraft.nlp.benchmark.token.parser.opennlp; import org.apache.nlpcraft.nlp.benchmark.NCBenchmarkAdapter; import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser; +import org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl.NCEnOpenNlpTokenParserStemmerImpl; import org.apache.nlpcraft.nlp.util.NCTestUtils; import org.junit.jupiter.api.Disabled; import org.openjdk.jmh.annotations.*; @@ -54,7 +55,8 @@ public class NCEnOpenNlpTokenParserBenchmark extends NCBenchmarkAdapter { NCEnOpenNlpTokenParser p = new NCEnOpenNlpTokenParser( "opennlp/en-token.bin", "opennlp/en-pos-maxent.bin", - "opennlp/en-lemmatizer.dict" + "opennlp/en-lemmatizer.dict", + new NCEnOpenNlpTokenParserStemmerImpl() ); p.start(null); // TODO: fix it. diff --git a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala index 5e32ee1..4934c11 100644 --- a/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala +++ b/nlpcraft/src/test/java/org/apache/nlpcraft/nlp/util/NCTestConfig.scala @@ -18,7 +18,8 @@ package org.apache.nlpcraft.nlp.util import org.apache.nlpcraft.* -import org.apache.nlpcraft.nlp.token.parser.opennlp.en.NCEnOpenNlpTokenParser +import org.apache.nlpcraft.nlp.token.parser.opennlp.en.* +import org.apache.nlpcraft.nlp.token.parser.opennlp.en.impl.* import java.util.{Optional, ArrayList as JAList, List as JList} @@ -31,7 +32,8 @@ object NCTestConfig: new NCEnOpenNlpTokenParser( "opennlp/en-token.bin", "opennlp/en-pos-maxent.bin", - "opennlp/en-lemmatizer.dict" + "opennlp/en-lemmatizer.dict", + new NCEnOpenNlpTokenParserStemmerImpl() ) override def getTokenParser: NCTokenParser = p