[incubator-nlpcraft] branch NLPCRAFT-472 updated: WIP.

sergeykamov Thu, 30 Dec 2021 05:29:00 -0800

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-472
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push:
     new c338a38  WIP.
c338a38 is described below

commit c338a3896fd2078530421eefe924757993858bc9
Author: Sergey Kamov <skhdlem...@gmail.com>
AuthorDate: Thu Dec 30 16:28:46 2021 +0300

    WIP.
---
 .../semantic/impl/NCSemanticEntityParserImpl.scala    |  2 +-
 .../semantic/impl/NCSemanticSynonymsProcessor.scala   | 19 +++++++++++++++++--
 2 files changed, 18 insertions(+), 3 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
index 665f0a7..5f355e7 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala
@@ -34,7 +34,7 @@ import scala.jdk.CollectionConverters.*
 object NCSemanticEntityParserImpl:
     def apply(stemmer: NCSemanticTextStemmer, macros: Jmap[String, String], 
elems: JList[NCSemanticElement]): NCSemanticEntityParserImpl =
         require(stemmer != null)
-        require(macros != null)
+        require(elems != null)
 
         new NCSemanticEntityParserImpl(
             stemmer,
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
index 3374d8d..e296472 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala
@@ -24,6 +24,7 @@ import org.apache.nlpcraft.internal.makro.NCMacroParser
 import org.apache.nlpcraft.nlp.entity.parser.semantic.*
 import 
org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticChunkKind.*
 
+import com.typesafe.scalalogging.LazyLogging
 import java.io.InputStream
 import java.util
 import java.util.List as JList
@@ -39,12 +40,26 @@ private[impl] case class NCSemanticSynonymsHolder(
 /**
   *
   */
-private[impl] object NCSemanticSynonymsProcessor:
+private[impl] object NCSemanticSynonymsProcessor extends LazyLogging:
     private final val SUSP_SYNS_CHARS = Seq("?", "*", "+")
     private final val REGEX_FIX = "//"
 
+    // TODO: extend.
     private def validate(macros: Map[String, String], elements: 
Seq[NCSemanticElement]): Unit =
-        () // TODO:
+        if (elements == null || elements.isEmpty)
+            throw new NCException("Elements cannot be empty") // TODO:
+
+        for (e <- elements)
+            if (e.getSynonyms != null)
+                val susp = e.getSynonyms.asScala.filter(syn => 
!syn.contains("//") && SUSP_SYNS_CHARS.exists(susp => syn.contains(susp)))
+
+                if susp.nonEmpty then
+                    logger.warn(
+                        s"Suspicious synonyms detected (use of 
${SUSP_SYNS_CHARS.map(s => s"'$s'").mkString(", ")} chars) [" +
+                            s"elementId=${e.getId}, " +
+                            s"synonyms=[${susp.mkString(", ")}]" +
+                            s"]"
+                    )
 
     private def startsAndEnds(fix: String, s: String): Boolean = 
s.startsWith(fix) && s.endsWith(fix)
     private def mkChunk(stemmer: NCSemanticTextStemmer, chunk: String): 
NCSemanticSynonymChunk =

[incubator-nlpcraft] branch NLPCRAFT-472 updated: WIP.

Reply via email to