[incubator-nlpcraft] 01/01: EN adapters added.

sergeykamov Fri, 25 Feb 2022 07:44:49 -0800

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-483
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


commit 7635454b90b2a4882af559dc4b951d65796e0e20
Author: Sergey Kamov <skhdlem...@gmail.com>
AuthorDate: Fri Feb 25 18:44:27 2022 +0300

    EN adapters added.
---
 .../apache/nlpcraft/nlp/NCENDefaultPipeline.java   | 89 ++++++++++++++++++++++
 .../nlpcraft/nlp/NCENSemanticEntityParser.java     | 73 ++++++++++++++++++
 .../nlpcraft/nlp/NCENDefaultPipelineSpec.scala     | 60 +++++++++++++++
 3 files changed, 222 insertions(+)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENDefaultPipeline.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENDefaultPipeline.java
new file mode 100644
index 0000000..d679ed9
--- /dev/null
+++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENDefaultPipeline.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp;
+
+import org.apache.nlpcraft.NCEntityParser;
+import org.apache.nlpcraft.NCModelPipeline;
+import org.apache.nlpcraft.NCTokenEnricher;
+import org.apache.nlpcraft.NCTokenParser;
+import org.apache.nlpcraft.internal.util.NCResourceReader;
+import org.apache.nlpcraft.nlp.token.enricher.en.NCBracketsTokenEnricher;
+import org.apache.nlpcraft.nlp.token.enricher.en.NCDictionaryTokenEnricher;
+import org.apache.nlpcraft.nlp.token.enricher.en.NCQuotesTokenEnricher;
+import org.apache.nlpcraft.nlp.token.enricher.en.NCStopWordsTokenEnricher;
+import org.apache.nlpcraft.nlp.token.enricher.en.NСSwearWordsTokenEnricher;
+import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser;
+
+import java.util.Arrays;
+import java.util.Collections;
+import java.util.List;
+
+/**
+ *
+ */
+public class NCENDefaultPipeline implements NCModelPipeline {
+    private static final NCResourceReader reader = new NCResourceReader();
+
+    private final NCTokenParser tp = new NCOpenNLPTokenParser(
+        reader.getPath("opennlp/en-token.bin"),
+        reader.getPath("opennlp/en-pos-maxent.bin"),
+        reader.getPath("opennlp/en-lemmatizer.dict")
+    );
+
+    private List<NCTokenEnricher> tokenEnrichers = Arrays.asList(
+        new NCStopWordsTokenEnricher(),
+        new 
NСSwearWordsTokenEnricher(reader.getPath("badfilter/swear_words.txt")),
+        new NCQuotesTokenEnricher(),
+        new NCDictionaryTokenEnricher(),
+        new NCBracketsTokenEnricher()
+
+    );
+
+    private final List<NCEntityParser> parsers;
+
+    /**
+     *
+     * @param parsers
+     */
+    public NCENDefaultPipeline(List<NCEntityParser> parsers) {
+        this.parsers = parsers;
+    }
+
+    /**
+     *
+     * @param parser
+     */
+    public NCENDefaultPipeline(NCEntityParser parser) {
+        this.parsers = Collections.singletonList(parser);
+    }
+
+    @Override
+    public NCTokenParser getTokenParser() {
+        return tp;
+    }
+
+    @Override
+    public List<NCEntityParser> getEntityParsers() {
+        return parsers;
+    }
+
+    @Override
+    public List<NCTokenEnricher> getTokenEnrichers() {
+        return tokenEnrichers;
+    }
+}
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENSemanticEntityParser.java 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENSemanticEntityParser.java
new file mode 100644
index 0000000..9a099dd
--- /dev/null
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/NCENSemanticEntityParser.java
@@ -0,0 +1,73 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp;
+
+import org.apache.nlpcraft.NCTokenParser;
+import org.apache.nlpcraft.internal.util.NCResourceReader;
+import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticElement;
+import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser;
+import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticStemmer;
+import 
org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer;
+import org.apache.nlpcraft.nlp.token.parser.opennlp.NCOpenNLPTokenParser;
+
+import java.util.List;
+import java.util.Map;
+
+/**
+ *
+ */
+public class NCENSemanticEntityParser extends NCSemanticEntityParser {
+    private static final NCResourceReader reader = new NCResourceReader();
+
+    private static NCSemanticStemmer mkStemmer() {
+        return new NCEnSemanticPorterStemmer();
+    }
+
+    private static NCOpenNLPTokenParser mkParser() {
+        return new NCOpenNLPTokenParser(
+            reader.getPath("opennlp/en-token.bin"),
+            reader.getPath("opennlp/en-pos-maxent.bin"),
+            reader.getPath("opennlp/en-lemmatizer.dict")
+        );
+    }
+
+    /**
+     *
+     * @param elms
+     */
+    public NCENSemanticEntityParser(List<NCSemanticElement> elms) {
+        super(mkStemmer(), mkParser(), elms);
+    }
+
+    /**
+     *
+     * @param macros
+     * @param elms
+     */
+    public NCENSemanticEntityParser(Map<String, String> macros, 
List<NCSemanticElement> elms) {
+        super(mkStemmer(), mkParser(), macros, elms);
+    }
+
+    /**
+     *
+     * @param src
+     */
+    public NCENSemanticEntityParser(String src) {
+        super(mkStemmer(), mkParser(), src);
+    }
+}
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala
new file mode 100644
index 0000000..a8a6d1b
--- /dev/null
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/NCENDefaultPipelineSpec.scala
@@ -0,0 +1,60 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *      https://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.nlpcraft.nlp
+
+import org.apache.nlpcraft.*
+import org.apache.nlpcraft.nlp.entity.parser.semantic.NCSemanticEntityParser
+import 
org.apache.nlpcraft.nlp.entity.parser.semantic.impl.en.NCEnSemanticPorterStemmer
+import org.apache.nlpcraft.nlp.util.NCTestModelAdapter
+import org.junit.jupiter.api.Test
+
+import scala.util.Using
+
+class NCENDefaultPipelineSpec:
+    /**
+      *
+      * @param cfg
+      * @param pipeline
+      * @return
+      */
+    private def mkModel(cfg: NCModelConfig, pipeline: NCModelPipeline): 
NCModel =
+        new NCModelAdapter(cfg, pipeline):
+            @NCIntent("intent=ls term(act)={has(ent_groups, 'act')} 
term(loc)={# == 'ls:loc'}*")
+            @NCIntentSample(Array(
+                "Please, put the light out in the upstairs bedroom.",
+            ))
+            def onMatch(
+                @NCIntentTerm("act") actEnt: NCEntity,
+                @NCIntentTerm("loc") locEnts: List[NCEntity]
+            ): NCResult =
+                val status = if actEnt.getId == "ls:on" then "on" else "off"
+                val locations = if locEnts.isEmpty then "entire house" else 
locEnts.map(_.mkText()).mkString(", ")
+                val res = new NCResult()
+                res.setType(NCResultType.ASK_RESULT)
+                res.setBody(s"Lights are [$status] in 
[${locations.toLowerCase}].")
+                res
+
+    @Test
+    def test(): Unit =
+        val cfg = new NCModelConfig("test.id", "Test model", "1.0")
+        // Default EN pipeline with default EN semantic parser.
+        val pipeline = new NCENDefaultPipeline(new 
NCENSemanticEntityParser("models/lightswitch_model.yaml"))
+
+        Using.resource(new NCModelClient(mkModel(cfg, pipeline))) { client =>
+            println(client.ask("Please, put the light out in the upstairs 
bedroom.", null, "userId").getBody)
+        }
\ No newline at end of file

[incubator-nlpcraft] 01/01: EN adapters added.

Reply via email to