This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-483 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 7635454b90b2a4882af559dc4b951d65796e0e20 Author: Sergey Kamov <skhdlem...@gmail.com> AuthorDate: Fri Feb 25 18:44:27 2022 +0300 EN adapters added. --- .../apache/nlpcraft/nlp/NCENDefaultPipeline.java | 89 ++++++++++++++++++++++ .../nlpcraft/nlp/NCENSemanticEntityParser.java | 73 ++++++++++++++++++ .../nlpcraft/nlp/NCENDefaultPipelineSpec.scala | 60 +++++++++++++++ 3 files changed, 222 insertions(+) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENDefaultPipeline.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENDefaultPipeline.java new file mode 100644 index 0000000..d679ed9 --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENDefaultPipeline.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.nlp; + +import org.apache.nlpcraft.NCEntityParser; +import org.apache.nlpcraft.NCModelPipeline; +import org.apache.nlpcraft.NCTokenEnricher; +import org.apache.nlpcraft.NCTokenParser; +import org.apache.nlpcraft.internal.util.NCResourceReader; +import org.apache.nlpcraft.nlp.token.enricher.en.NCBracketsTokenEnricher; +import org.apache.nlpcraft.nlp.token.enricher.en.NCDictionaryTokenEnricher; +import org.apache.nlpcraft.nlp.token.enricher.en.NCQuotesTokenEnricher; +import org.apache.nlpcraft.nlp.token.enricher.en.NCStopWordsTokenEnricher; +import org.apache.nlpcraft.nlp.token.enricher.en.NСSwearWordsTokenEnricher; +import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser; + +import java.util.Arrays; +import java.util.Collections; +import java.util.List; + +/** + * + */ +public class NCENDefaultPipeline implements NCModelPipeline { + private static final NCResourceReader reader = new NCResourceReader(); + + private final NCTokenParser tp = new NCOpenNLPTokenParser( + reader.getPath("opennlp/en-token.bin"), + reader.getPath("opennlp/en-pos-maxent.bin"), + reader.getPath("opennlp/en-lemmatizer.dict") + ); + + private List<NCTokenEnricher> tokenEnrichers = Arrays.asList( + new NCStopWordsTokenEnricher(), + new NСSwearWordsTokenEnricher(reader.getPath("badfilter/swear_words.txt")), + new NCQuotesTokenEnricher(), + new NCDictionaryTokenEnricher(), + new NCBracketsTokenEnricher() + + ); + + private final List<NCEntityParser> parsers; + + /** + * + * @param parsers + */ + public NCENDefaultPipeline(List<NCEntityParser> parsers) { + this.parsers = parsers; + } + + /** + * + * @param parser + */ + public NCENDefaultPipeline(NCEntityParser parser) { + this.parsers = Collections.singletonList(parser); + } + + @Override + public NCTokenParser getTokenParser() { + return tp; + } + + @Override + public List<NCEntityParser> getEntityParsers() { + return parsers; + } + + @Override + public List<NCTokenEnricher> getTokenEnrichers() { + return tokenEnrichers; + } +} diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENSemanticEntityParser.java b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENSemanticEntityParser.java new file mode 100644 index 0000000..9a099dd --- /dev/null +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENSemanticEntityParser.java @@ -0,0 +1,73 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.nlp; + +import org.apache.nlpcraft.NCTokenParser; +import org.apache.nlpcraft.internal.util.NCResourceReader; +import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticElement; +import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser; +import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticStemmer; +import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer; +import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser; + +import java.util.List; +import java.util.Map; + +/** + * + */ +public class NCENSemanticEntityParser extends NCSemanticEntityParser { + private static final NCResourceReader reader = new NCResourceReader(); + + private static NCSemanticStemmer mkStemmer() { + return new NCEnSemanticPorterStemmer(); + } + + private static NCOpenNLPTokenParser mkParser() { + return new NCOpenNLPTokenParser( + reader.getPath("opennlp/en-token.bin"), + reader.getPath("opennlp/en-pos-maxent.bin"), + reader.getPath("opennlp/en-lemmatizer.dict") + ); + } + + /** + * + * @param elms + */ + public NCENSemanticEntityParser(List<NCSemanticElement> elms) { + super(mkStemmer(), mkParser(), elms); + } + + /** + * + * @param macros + * @param elms + */ + public NCENSemanticEntityParser(Map<String, String> macros, List<NCSemanticElement> elms) { + super(mkStemmer(), mkParser(), macros, elms); + } + + /** + * + * @param src + */ + public NCENSemanticEntityParser(String src) { + super(mkStemmer(), mkParser(), src); + } +} diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala new file mode 100644 index 0000000..a8a6d1b --- /dev/null +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala @@ -0,0 +1,60 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * https://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.nlpcraft.nlp + +import org.apache.nlpcraft.* +import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser +import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer +import org.apache.nlpcraft.nlp.util.NCTestModelAdapter +import org.junit.jupiter.api.Test + +import scala.util.Using + +class NCENDefaultPipelineSpec: + /** + * + * @param cfg + * @param pipeline + * @return + */ + private def mkModel(cfg: NCModelConfig, pipeline: NCModelPipeline): NCModel = + new NCModelAdapter(cfg, pipeline): + @NCIntent("intent=ls term(act)={has(ent_groups, 'act')} term(loc)={# == 'ls:loc'}*") + @NCIntentSample(Array( + "Please, put the light out in the upstairs bedroom.", + )) + def onMatch( + @NCIntentTerm("act") actEnt: NCEntity, + @NCIntentTerm("loc") locEnts: List[NCEntity] + ): NCResult = + val status = if actEnt.getId == "ls:on" then "on" else "off" + val locations = if locEnts.isEmpty then "entire house" else locEnts.map(_.mkText()).mkString(", ") + val res = new NCResult() + res.setType(NCResultType.ASK_RESULT) + res.setBody(s"Lights are [$status] in [${locations.toLowerCase}].") + res + + @Test + def test(): Unit = + val cfg = new NCModelConfig("test.id", "Test model", "1.0") + // Default EN pipeline with default EN semantic parser. + val pipeline = new NCENDefaultPipeline(new NCENSemanticEntityParser("models/lightswitch_model.yaml")) + + Using.resource(new NCModelClient(mkModel(cfg, pipeline))) { client => + println(client.ask("Please, put the light out in the upstairs bedroom.", null, "userId").getBody) + } \ No newline at end of file