This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-483 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-483 by this push: new 3d2eb56 RU adapters added (example) 3d2eb56 is described below commit 3d2eb567551a18bf37d694d6aa78d23fff7b5c46 Author: Sergey Kamov <skhdlem...@gmail.com> AuthorDate: Fri Feb 25 23:22:30 2022 +0300 RU adapters added (example) --- ...witchModelRu.scala => LightSwitchRuModel.scala} | 15 +++------ .../examples/lightswitch/NCRuPipeline.scala | 37 ++++++++++++++++++++++ .../semantic/NCRuSemanticEntityParser.scala} | 21 ++++++++---- .../enricher/NCRuStopWordsTokenEnricher.scala} | 18 ++++++----- .../token/parser/NCRuTokenParser.scala} | 10 +++--- .../lightswitch/NCModelValidationSpec.scala | 4 +-- 6 files changed, 71 insertions(+), 34 deletions(-) diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchModelRu.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala similarity index 89% rename from nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchModelRu.scala rename to nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala index b36b1b2..5a65ec9 100644 --- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchModelRu.scala +++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/LightSwitchRuModel.scala @@ -18,12 +18,12 @@ package org.apache.nlpcraft.examples.lightswitch import org.apache.nlpcraft.* -import org.apache.nlpcraft.examples.lightswitch.ru.* +import org.apache.nlpcraft.examples.lightswitch.nlp.entity.parser.semantic.NCRuSemanticEntityParser import org.apache.nlpcraft.nlp.entity.parser.nlp.NCNLPEntityParser import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer -import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser import org.apache.nlpcraft.nlp.token.enricher.en.NCStopWordsTokenEnricher +import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser /** * This example provides very simple implementation for NLI-powered light switch. @@ -38,16 +38,9 @@ import org.apache.nlpcraft.nlp.token.enricher.en.NCStopWordsTokenEnricher * See 'README.md' file in the same folder for running and testing instructions. */ -class LightSwitchModelRu extends NCModel: +class LightSwitchRuModel extends NCModel: override val getConfig: NCModelConfig = new NCModelConfig("nlpcraft.lightswitch.ru.ex", "LightSwitch Example Model RU", "1.0") - override val getPipeline: NCModelPipeline = - val tp = new NCTokenParserRu - new NCModelPipelineBuilder( - tp, - new NCSemanticEntityParser(new NCSemanticStemmerRu(), tp, "lightswitch_model_ru.yaml") - ). - withTokenEnricher(new NCStopWordsTokenEnricherRu()). - build() + override val getPipeline: NCModelPipeline = new NCRuPipeline(new NCRuSemanticEntityParser("lightswitch_model_ru.yaml")) /** * Intent and its on-match callback. diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/NCRuPipeline.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/NCRuPipeline.scala new file mode 100644 index 0000000..3fe37d1 --- /dev/null +++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/NCRuPipeline.scala @@ -0,0 +1,37 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.examples.lightswitch + +import org.apache.nlpcraft.* +import org.apache.nlpcraft.examples.lightswitch.nlp.token.enricher.NCRuStopWordsTokenEnricher +import org.apache.nlpcraft.examples.lightswitch.nlp.token.parser.NCRuTokenParser +import org.apache.nlpcraft.internal.util.NCResourceReader +import org.apache.nlpcraft.nlp.token.enricher.en.* +import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser + +import java.util +import java.util.* +import scala.jdk.CollectionConverters.* + +/** + * Default RU implementation based on Open Nlp token parser, and stopword token enricher. + * Also at least one entity parser must be defined. */ +class NCRuPipeline(parser: NCEntityParser) extends NCModelPipeline: + override val getTokenParser: NCTokenParser = new NCRuTokenParser() + override val getEntityParsers: util.List[NCEntityParser] = Seq(parser).asJava + override val getTokenEnrichers: util.List[NCTokenEnricher] = Seq(new NCRuStopWordsTokenEnricher()).asJava diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCSemanticStemmerRu.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/semantic/NCRuSemanticEntityParser.scala similarity index 59% rename from nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCSemanticStemmerRu.scala rename to nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/semantic/NCRuSemanticEntityParser.scala index e49c72c..a0cdd9a 100644 --- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCSemanticStemmerRu.scala +++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/entity/parser/semantic/NCRuSemanticEntityParser.scala @@ -15,12 +15,21 @@ * limitations under the License. */ -package org.apache.nlpcraft.examples.lightswitch.ru +package org.apache.nlpcraft.examples.lightswitch.nlp.entity.parser.semantic import opennlp.tools.stemmer.snowball.SnowballStemmer -import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticStemmer +import org.apache.nlpcraft.examples.lightswitch.nlp.token.parser.NCRuTokenParser +import org.apache.nlpcraft.nlp.entity.parser.semantic.* -class NCSemanticStemmerRu extends NCSemanticStemmer: - private val stemmer = new SnowballStemmer(SnowballStemmer.ALGORITHM.RUSSIAN) - - override def stem(txt: String): String = stemmer.synchronized { stemmer.stem(txt.toLowerCase).toString } +/** + * + * @param src + */ +class NCRuSemanticEntityParser(src: String) extends NCSemanticEntityParser( + new NCSemanticStemmer: + private val stemmer = new SnowballStemmer(SnowballStemmer.ALGORITHM.RUSSIAN) + override def stem(txt: String): String = stemmer.synchronized { stemmer.stem(txt.toLowerCase).toString } + , + new NCRuTokenParser(), + src +) diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCStopWordsTokenEnricherRu.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/enricher/NCRuStopWordsTokenEnricher.scala similarity index 74% rename from nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCStopWordsTokenEnricherRu.scala rename to nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/enricher/NCRuStopWordsTokenEnricher.scala index 0e9c064..e21c3dc 100644 --- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCStopWordsTokenEnricherRu.scala +++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/enricher/NCRuStopWordsTokenEnricher.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.nlpcraft.examples.lightswitch.ru +package org.apache.nlpcraft.examples.lightswitch.nlp.token.enricher import org.apache.lucene.analysis.ru.RussianAnalyzer import org.apache.nlpcraft.* @@ -26,18 +26,20 @@ import scala.jdk.CollectionConverters.* /** * */ -class NCStopWordsTokenEnricherRu extends NCTokenEnricher: +class NCRuStopWordsTokenEnricher extends NCTokenEnricher: private final val stops = RussianAnalyzer.getDefaultStopSet override def enrich(req: NCRequest, cfg: NCModelConfig, toks: util.List[NCToken]): Unit = - toks.asScala.foreach(t => + for (t <- toks.asScala) + val lemma = t.getLemma + lazy val pos = t.getPos + t.put( "stopword", - t.getLemma.length == 1 && !Character.isLetter(t.getLemma.head) || - t.getPos.startsWith("PARTICLE") || - t.getPos.startsWith("INTERJECTION") || - t.getPos.startsWith("PREP") || + lemma.length == 1 && !Character.isLetter(lemma.head) || + pos.startsWith("PARTICLE") || + pos.startsWith("INTERJECTION") || + pos.startsWith("PREP") || stops.contains(t.getLemma) || stops.contains(t.getText.toLowerCase) ) - ) diff --git a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCTokenParserRu.scala b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/parser/NCRuTokenParser.scala similarity index 93% rename from nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCTokenParserRu.scala rename to nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/parser/NCRuTokenParser.scala index 5bda243..d1ddf1b 100644 --- a/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/ru/NCTokenParserRu.scala +++ b/nlpcraft-examples/lightswitch-ru/src/main/java/org/apache/nlpcraft/examples/lightswitch/nlp/token/parser/NCRuTokenParser.scala @@ -15,7 +15,7 @@ * limitations under the License. */ -package org.apache.nlpcraft.examples.lightswitch.ru +package org.apache.nlpcraft.examples.lightswitch.nlp.token.parser import org.apache.lucene.analysis.ru.RussianAnalyzer import org.apache.nlpcraft.* @@ -28,11 +28,9 @@ import org.languagetool.tokenizers.WordTokenizer import java.util import scala.jdk.CollectionConverters.* -object NCTokenParserRu: +object NCRuTokenParser: private val tokenizer = new WordTokenizer - private case class Span(word: String, start: Int, end: Int) - private def nvl(v: String, dflt : => String): String = if v != null then v else dflt private def split(text: String): Seq[Span] = @@ -45,9 +43,9 @@ object NCTokenParserRu: spans.toSeq -import org.apache.nlpcraft.examples.lightswitch.ru.NCTokenParserRu.* +import NCRuTokenParser.* -class NCTokenParserRu extends NCTokenParser: +class NCRuTokenParser extends NCTokenParser: override def tokenize(text: String): util.List[NCToken] = val spans = split(text) val tags = RussianTagger.INSTANCE.tag(spans.map(_.word).asJava).asScala diff --git a/nlpcraft-examples/lightswitch-ru/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala b/nlpcraft-examples/lightswitch-ru/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala index b6d9d1b..69b5793 100644 --- a/nlpcraft-examples/lightswitch-ru/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala +++ b/nlpcraft-examples/lightswitch-ru/src/test/java/org/apache/nlpcraft/examples/lightswitch/NCModelValidationSpec.scala @@ -26,7 +26,5 @@ import scala.util.Using * JUnit models validation. */ class NCModelValidationSpec: - private val MDL = new LightSwitchModelRu - @Test - def test(): Unit = Using.resource(new NCModelClient(MDL)) { client => client.validateSamples() } + def test(): Unit = Using.resource(new NCModelClient(new LightSwitchRuModel)) { client => client.validateSamples() }