This is an automated email from the ASF dual-hosted git repository. sergeykamov pushed a commit to branch NLPCRAFT-472 in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-472 by this push: new c338a38 WIP. c338a38 is described below commit c338a3896fd2078530421eefe924757993858bc9 Author: Sergey Kamov <skhdlem...@gmail.com> AuthorDate: Thu Dec 30 16:28:46 2021 +0300 WIP. --- .../semantic/impl/NCSemanticEntityParserImpl.scala | 2 +- .../semantic/impl/NCSemanticSynonymsProcessor.scala | 19 +++++++++++++++++-- 2 files changed, 18 insertions(+), 3 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala index 665f0a7..5f355e7 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticEntityParserImpl.scala @@ -34,7 +34,7 @@ import scala.jdk.CollectionConverters.* object NCSemanticEntityParserImpl: def apply(stemmer: NCSemanticTextStemmer, macros: Jmap[String, String], elems: JList[NCSemanticElement]): NCSemanticEntityParserImpl = require(stemmer != null) - require(macros != null) + require(elems != null) new NCSemanticEntityParserImpl( stemmer, diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala index 3374d8d..e296472 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/entity/parser/semantic/impl/NCSemanticSynonymsProcessor.scala @@ -24,6 +24,7 @@ import org.apache.nlpcraft.internal.makro.NCMacroParser import org.apache.nlpcraft.nlp.entity.parser.semantic.* import org.apache.nlpcraft.nlp.entity.parser.semantic.impl.NCSemanticChunkKind.* +import com.typesafe.scalalogging.LazyLogging import java.io.InputStream import java.util import java.util.List as JList @@ -39,12 +40,26 @@ private[impl] case class NCSemanticSynonymsHolder( /** * */ -private[impl] object NCSemanticSynonymsProcessor: +private[impl] object NCSemanticSynonymsProcessor extends LazyLogging: private final val SUSP_SYNS_CHARS = Seq("?", "*", "+") private final val REGEX_FIX = "//" + // TODO: extend. private def validate(macros: Map[String, String], elements: Seq[NCSemanticElement]): Unit = - () // TODO: + if (elements == null || elements.isEmpty) + throw new NCException("Elements cannot be empty") // TODO: + + for (e <- elements) + if (e.getSynonyms != null) + val susp = e.getSynonyms.asScala.filter(syn => !syn.contains("//") && SUSP_SYNS_CHARS.exists(susp => syn.contains(susp))) + + if susp.nonEmpty then + logger.warn( + s"Suspicious synonyms detected (use of ${SUSP_SYNS_CHARS.map(s => s"'$s'").mkString(", ")} chars) [" + + s"elementId=${e.getId}, " + + s"synonyms=[${susp.mkString(", ")}]" + + s"]" + ) private def startsAndEnds(fix: String, s: String): Boolean = s.startsWith(fix) && s.endsWith(fix) private def mkChunk(stemmer: NCSemanticTextStemmer, chunk: String): NCSemanticSynonymChunk =