This is an automated email from the ASF dual-hosted git repository. aradzinski pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
commit 405b5ac151bc041f27fe240f0e97d2e2ac096360 Author: Aaron Radzinski <[email protected]> AuthorDate: Sun Dec 13 18:00:13 2020 -0800 split-trim-filter refactoring. --- .../nlpcraft/common/config/NCConfigurable.scala | 6 +- .../nlpcraft/common/makro/NCMacroParser.scala | 3 +- .../common/nlp/numeric/NCNumericGenerator.scala | 3 +- .../common/nlp/numeric/NCNumericManager.scala | 2 +- .../org/apache/nlpcraft/common/util/NCUtils.scala | 35 +++++++++-- .../nlpcraft/examples/sql/db/SqlBuilder.scala | 6 +- .../nlpcraft/examples/sql/db/SqlValueLoader.scala | 4 +- .../nlpcraft/model/tools/cmdline/NCCli.scala | 69 ++++++++++++++++++++-- .../sqlgen/impl/NCSqlModelGeneratorImpl.scala | 18 +++--- .../org/apache/nlpcraft/probe/NCProbeBoot.scala | 2 +- .../probe/mgrs/deploy/NCDeployManager.scala | 14 ++--- .../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala | 5 +- .../geo/tools/metro/NCGeoMetroGenerator.scala | 4 +- .../server/nlp/enrichers/date/NCDateEnricher.scala | 2 +- .../server/nlp/enrichers/date/NCDateParser.scala | 9 +-- .../enrichers/stopword/NCStopWordEnricher.scala | 4 +- .../server/nlp/preproc/NCPreProcessManager.scala | 10 ++-- .../server/nlp/spell/NCSpellCheckManager.scala | 2 +- .../org/apache/nlpcraft/server/sql/NCSql.scala | 2 +- .../apache/nlpcraft/server/sql/NCSqlManager.scala | 2 +- .../server/sugsyn/NCSuggestSynonymManager.scala | 2 +- .../intent/impl/NCIntentSolverEngineSpec.scala | 3 +- .../sqlgen/impl/NCSqlModelGeneratorImplSpec.scala | 5 +- 23 files changed, 152 insertions(+), 60 deletions(-) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/config/NCConfigurable.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/config/NCConfigurable.scala index 222286d..5df0d27 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/config/NCConfigurable.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/config/NCConfigurable.scala @@ -19,7 +19,7 @@ package org.apache.nlpcraft.common.config import com.typesafe.config.{Config, ConfigFactory} import com.typesafe.scalalogging.LazyLogging -import org.apache.nlpcraft.common.NCE +import org.apache.nlpcraft.common._ import scala.collection.JavaConverters._ @@ -256,7 +256,7 @@ trait NCConfigurable { * * @param s */ - private def parseCsv(s: String): Seq[String] = s.split(",").map(_.trim).filter(_.nonEmpty) + private def parseCsv(s: String): Seq[String] = U.splitTrimFilter(s,",") } object NCConfigurable extends LazyLogging { @@ -332,7 +332,7 @@ object NCConfigurable extends LazyLogging { else cfg = ConfigFactory.load(tmpCfg) - val lines = cfg.origin().description().split(",").drop(1).distinct + val lines = U.splitTrimFilter(cfg.origin().description(),",").drop(1).distinct logger.info(s"NLPCraft configuration successfully loaded as a merge of: ${lines.mkString("\n + ", "\n + ", "")}") } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala index 951dddd..fe13ce8 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala @@ -293,8 +293,7 @@ class NCMacroParser { } // Trims all duplicate spaces. - private def trimDupSpaces(s: String) = - s.split(" ").map(_.trim).filter(_.nonEmpty).mkString(" ") + private def trimDupSpaces(s: String) = U.splitTrimFilter(s, " ").mkString(" ") // Processes '\' escapes for '{', '}', '|', and '*'. private def processEscapes(s: String): String = { diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericGenerator.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericGenerator.scala index b0860d4..d404e8f 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericGenerator.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericGenerator.scala @@ -18,6 +18,7 @@ package org.apache.nlpcraft.common.nlp.numeric import java.text.DecimalFormat +import org.apache.nlpcraft.common._ /** * Converts numbers to their textual (word) presentation. @@ -105,7 +106,7 @@ object NCNumericGenerator { val n10x1 = convertSmall(s10x1) - (n10x9 + n10x6 + n10x3 + n10x1).split(" ").filter(!_.isEmpty).mkString(" ") + U.normalize(n10x9 + n10x6 + n10x3 + n10x1," ") } } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala index be9bed3..4fa8ef5 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/nlp/numeric/NCNumericManager.scala @@ -136,7 +136,7 @@ object NCNumericManager extends NCService { ackStarting() genNums = mapResource("numeric/numeric.txt", "utf-8", logger, { - _.filter(s ⇒ !s.isEmpty && !s.trim.startsWith("#")). + _.filter(s ⇒ s.nonEmpty && !s.trim.startsWith("#")). map(_.split("=")). map(s ⇒ (s(1), s(0).toInt)). toMap diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala index 854b642..a4f614e 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/util/NCUtils.scala @@ -34,7 +34,6 @@ import java.util.regex.Pattern import java.util.stream.Collectors import java.util.zip.{ZipInputStream, GZIPInputStream ⇒ GIS, GZIPOutputStream ⇒ GOS} import java.util.{Locale, Properties, Random, Timer, TimerTask, Calendar ⇒ C} - import com.fasterxml.jackson.annotation.JsonInclude.Include import com.fasterxml.jackson.core.`type`.TypeReference import com.fasterxml.jackson.databind.{DeserializationFeature, ObjectMapper} @@ -52,10 +51,10 @@ import org.apache.nlpcraft.common.version.NCVersion import org.jsoup.Jsoup import org.jsoup.nodes.Document import resource._ + import java.net.http.HttpClient import java.net.http.HttpRequest import java.net.http.HttpResponse - import scala.collection.JavaConverters._ import scala.collection._ import scala.concurrent.ExecutionContext.Implicits.global @@ -158,6 +157,34 @@ object NCUtils extends LazyLogging { ANSI_SEQ.matcher(s).replaceAll("") /** + * Trims each sequence string and filters out empty ones. + * + * @param s String to process. + * @return + */ + def trimFilter(s: Seq[String]): Seq[String] = + s.map(_.trim).filter(_.nonEmpty) + + /** + * Splits, trims and filters empty strings for the given string. + * + * @param s String to split. + * @param sep Separator (regex) to split by. + * @return + */ + def splitTrimFilter(s: String, sep: String): Seq[String] = + trimFilter(s.split(sep)) + + /** + * + * @param s + * @param sep + * @return + */ + def normalize(s: String, sep: String): String = + splitTrimFilter(s, sep).mkString(sep) + + /** * Escapes given string for JSON according to RFC 4627 http://www.ietf.org/rfc/rfc4627.txt. * * @param s String to escape. @@ -431,7 +458,7 @@ object NCUtils extends LazyLogging { * @return */ private def readLcTrimFilter(in: BufferedSource): List[String] = - in.getLines().map(_.toLowerCase.trim).filter(s ⇒ !s.isEmpty && !s.startsWith("#")).toList + in.getLines().map(_.toLowerCase.trim).filter(s ⇒ s.nonEmpty && !s.startsWith("#")).toList /** * Reads lines from given file converting to lower case, trimming, and filtering @@ -1145,7 +1172,7 @@ object NCUtils extends LazyLogging { * * @param s String to check. */ - def neon(s: String): Boolean = s != null && !s.isEmpty + def neon(s: String): Boolean = s != null && s.nonEmpty /** * Generates (relatively) unique ID good for a short-term usage. diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlBuilder.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlBuilder.scala index f75a0b4..6b56cac 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlBuilder.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlBuilder.scala @@ -20,6 +20,7 @@ package org.apache.nlpcraft.examples.sql.db import java.sql.Types import com.typesafe.scalalogging.LazyLogging +import org.apache.nlpcraft.common._ import org.apache.nlpcraft.model.tools.sqlgen.NCSqlJoinType._ import org.apache.nlpcraft.model.tools.sqlgen._ import org.apache.nlpcraft.model.tools.sqlgen.impl.NCSqlSortImpl @@ -490,7 +491,7 @@ case class SqlBuilder(schema: NCSqlSchema) extends LazyLogging { val extSorts = extendSort(sortsNorm, tblsNorm, extCols) SqlQuery( - sql = + sql = U.normalize( s""" |SELECT | ${if (distinct) "DISTINCT" else ""} @@ -499,7 +500,8 @@ case class SqlBuilder(schema: NCSqlSchema) extends LazyLogging { | ${if (extConds.isEmpty) "" else s"WHERE ${extConds.mkString(" AND ")}"} | ${if (extSorts.isEmpty) "" else s"ORDER BY ${extSorts.map(sql).mkString(", ")}"} | LIMIT ${limit.flatMap(p ⇒ Some(p.getLimit)).getOrElse(DFLT_LIMIT)} - |""".stripMargin.split(" ").map(_.trim).filter(_.nonEmpty).mkString(" "), + |""".stripMargin, " " + ), parameters = extParams ) } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlValueLoader.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlValueLoader.scala index 4c270e7..353107a 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlValueLoader.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/examples/sql/db/SqlValueLoader.scala @@ -40,8 +40,8 @@ class SqlValueLoader extends NCValueLoader with LazyLogging { SqlAccess.select(SqlQuery(s"SELECT $col FROM $tab WHERE $col IS NOT NULL", Seq.empty), logResult = false). rows. map(_.head). - map(_.toString.trim). - filter(!_.isEmpty). + map(_.trim). + filter(_.nonEmpty). map( v ⇒ new NCValue { override def getName: String = v diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala index 2522670..82e0d0c 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/cmdline/NCCli.scala @@ -1210,7 +1210,7 @@ object NCCli extends App { value = Some("path"), optional = true, desc = - s"Additional JVM classpath component that will be appended to the default NLPCraft JVM classpath. " + + s"Additional JVM classpath that will be appended to the default NLPCraft JVM classpath. " + s"Although this configuration property is optional, when deploying your own models you must " + s"provide this additional classpath for the models and their dependencies this probe will be hosting. " + s"NOTE: this is only optional if you are running example models shipped with NLPCraft." @@ -1273,6 +1273,58 @@ object NCCli extends App { ) ), Command( + name = "test-model", + group = "3. Miscellaneous", + synopsis = s"Runs auto model validation.", + desc = Some( + s"Runs ${y("'NCTestAutoModelValidator'")} model auto-validator for given models." + ), + body = cmdTestModel, + params = Seq( + Parameter( + id = "cp", + names = Seq("--cp", "-p"), + value = Some("path"), + optional = true, + desc = + s"Additional JVM classpath that will be appended to the default NLPCraft JVM classpath. " + + s"Although this configuration property is optional, when testing your own models you must " + + s"provide this additional classpath for the models and their dependencies. " + + s"NOTE: this is only optional if you are testing example models shipped with NLPCraft." + ), + Parameter( + id = "models", + names = Seq("--models", "-m"), + value = Some("<model list>"), + desc = + s"Comma separated list of fully qualified class names for models to test. NOTE: if you provide " + + s"the list of your own models here - you must also provide the additional classpath " + + s"for them via ${y("--cp")} parameter." + ), + Parameter( + id = "jvmopts", + names = Seq("--jvm-opts", "-j"), + value = Some("<jvm flags>"), + optional = true, + desc = + s"Space separated list of JVM flags to use. If not provided, the default ${y("'-ea -Xms1024m'")} flags " + + s"will be used." + ) + ), + examples = Seq( + Example( + usage = Seq( + s"$PROMPT $SCRIPT_NAME test-model ", + " --models=my.package.Model ", + " --cp=/opt/target/classes ", + " --jmv-opts=\"-ea -Xms2048m\"" + ), + desc = + s"Runs model auto-validator for ${y("'my.package.Model'")} model." + ) + ) + ), + Command( name = "info-server", group = "1. Server & Probe Commands", synopsis = s"Info about local server.", @@ -1659,7 +1711,7 @@ object NCCli extends App { case None ⇒ 2 // Default. } val jvmOpts = args.find(_.parameter.id == "jvmopts") match { - case Some(arg) ⇒ stripQuotes(arg.value.get).split(" ").map(_.trim).filter(_.nonEmpty).toSeq + case Some(arg) ⇒ U.splitTrimFilter(stripQuotes(arg.value.get), " ") case None ⇒ Seq("-ea", "-Xms2048m", "-XX:+UseG1GC") } @@ -1861,6 +1913,15 @@ object NCCli extends App { * @param args Arguments, if any, for this command. * @param repl Whether or not running from REPL. */ + private def cmdTestModel(cmd: Command, args: Seq[Argument], repl: Boolean): Unit = { + + } + + /** + * @param cmd Command descriptor. + * @param args Arguments, if any, for this command. + * @param repl Whether or not running from REPL. + */ private def cmdStartProbe(cmd: Command, args: Seq[Argument], repl: Boolean): Unit = { // Ensure that there is a local server running since probe // cannot finish its start unless there's a server to connect to. @@ -1889,7 +1950,7 @@ object NCCli extends App { case None ⇒ null } val jvmOpts = args.find(_.parameter.id == "jvmopts") match { - case Some(arg) ⇒ stripQuotes(arg.value.get).split(" ").map(_.trim).filter(_.nonEmpty).toSeq + case Some(arg) ⇒ U.splitTrimFilter(stripQuotes(arg.value.get), " ") case None ⇒ Seq("-ea", "-Xms1024m") } @@ -2683,7 +2744,7 @@ object NCCli extends App { tbl += (" Pool increment", s"${g(beacon.dbPoolInc)}") tbl += (" Reset on start", s"${g(beacon.dbInit)}") tbl += ("REST:", "") - tbl += (" Endpoint", s"http://${g(beacon.restEndpoint)}") // TODO: https? + tbl += (" Endpoint", s"${g("http://" + beacon.restEndpoint)}") // TODO: https? tbl += (" API provider", s"${g(beacon.restApi)}") tbl += ("Probe:", "") tbl += (" Uplink", s"${g(beacon.upLink)}") diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImpl.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImpl.scala index 3783925..bba6708 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImpl.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImpl.scala @@ -57,7 +57,7 @@ object NCSqlModelGeneratorImpl { val nameLc: String val elmNameLc: String - private val nameWs = elmNameLc.replaceAll("_", " ").split(" ").filter(_.nonEmpty).mkString(" ") + private val nameWs = U.normalize(elmNameLc.replaceAll("_"," ")," ") lazy val synonym = if (elmNameLc == nameWs) @@ -144,7 +144,7 @@ object NCSqlModelGeneratorImpl { * @return */ private def mkPrefixFun(s: String): String ⇒ String = { - val arr = s.split(",").map(_.trim).filter(_.nonEmpty) + val arr = U.splitTrimFilter(s, ",") z ⇒ (for (fix ← arr if z.startsWith(fix)) yield z.substring(fix.length)).headOption.getOrElse(z) } @@ -156,7 +156,7 @@ object NCSqlModelGeneratorImpl { * @return */ private def mkSuffixFun(s: String): String ⇒ String = { - val arr = s.split(",").map(_.trim).filter(_.nonEmpty) + val arr = U.splitTrimFilter(s, ",") z ⇒ (for (fix ← arr if z.endsWith(fix)) yield z.substring(0, z.length - fix.length)).headOption.getOrElse(z) } @@ -168,12 +168,12 @@ object NCSqlModelGeneratorImpl { */ private def mkPredicate(s: String): (String, String) ⇒ Boolean = { def convert(expr: String): (String, String) ⇒ Boolean = { - val s = expr.split("#").filter(!_.isEmpty) + val s = U.splitTrimFilter(expr, "#") val (tbl: String, col: String) = s.length match { - case 1 if !expr.contains("#") ⇒ (s(0), "") // 'table' - case 1 if expr.contains("#") ⇒ ("", s(0)) // '#column' - case 2 ⇒ (s(0), s(1)) // 'table#column' + case 1 if !expr.contains("#") ⇒ (s.head, "") // 'table' + case 1 if expr.contains("#") ⇒ ("", s.head) // '#column' + case 2 ⇒ (s.head, s(1)) // 'table#column' case _ ⇒ throw new Exception(s"Invalid table and/or column filter: $C$expr$RST") } @@ -203,7 +203,7 @@ object NCSqlModelGeneratorImpl { } } - val predicates = s.split(";").map(_.trim()).map(convert) + val predicates = U.splitTrimFilter(s,";").map(convert) (tbl: String, col: String) ⇒ predicates.exists(_(tbl, col)) } @@ -235,7 +235,7 @@ object NCSqlModelGeneratorImpl { * @return */ private def removeSeqDups(syn: String): String = { - val words = syn.split(" ").filter(_.nonEmpty) + val words = U.splitTrimFilter(syn, " ") words .zip(words.map(NCNlpPorterStemmer.stem)) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala index e8ec054..3aab814 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/NCProbeBoot.scala @@ -85,7 +85,7 @@ private [probe] object NCProbeBoot extends LazyLogging with NCOpenCensusTrace { ) { lazy val upLinkString = s"${upLink._1}:${upLink._2}" lazy val downLinkString = s"${downLink._1}:${downLink._2}" - lazy val modelsSeq: Seq[String] = models.split(",").map(_.trim) + lazy val modelsSeq: Seq[String] = U.splitTrimFilter(models,",") } private def mkDefault(): Config = { diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala index d7fb055..8e7f0b7 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/deploy/NCDeployManager.scala @@ -30,7 +30,7 @@ import org.apache.nlpcraft.common.ascii.NCAsciiTable import org.apache.nlpcraft.common.config.NCConfigurable import org.apache.nlpcraft.common.makro.NCMacroParser import org.apache.nlpcraft.common.nlp.core.{NCNlpCoreManager, NCNlpPorterStemmer} -import org.apache.nlpcraft.common.util.NCUtils.{DSL_FIX, REGEX_FIX, escapeJson} +import org.apache.nlpcraft.common.util.NCUtils.{DSL_FIX, REGEX_FIX} import org.apache.nlpcraft.model._ import org.apache.nlpcraft.model.factories.basic.NCBasicModelFactory import org.apache.nlpcraft.model.intent.impl.{NCIntentDslCompiler, NCIntentSolver} @@ -255,10 +255,8 @@ object NCDeployManager extends NCService with DecorateAsScala { var curr = 0 val len = x.length - (2 + 2) // 2 is a prefix/suffix length. Hack... - def splitUp(s: String): Seq[String] = s.split(" ").map(_.trim).filter(_.nonEmpty).toSeq - def processChunk(fix: String): Unit = { - chunks ++= splitUp(x.substring(start, curr)) + chunks ++= U.splitTrimFilter(x.substring(start, curr), " ") x.indexOf(fix, curr + fix.length) match { case -1 ⇒ @@ -286,7 +284,7 @@ object NCDeployManager extends NCService with DecorateAsScala { curr += 1 } - chunks ++= splitUp(x.substring(start)) + chunks ++= U.splitTrimFilter(x.substring(start), " ") chunks.map(mkChunk(mdlId, _)) } @@ -627,7 +625,7 @@ object NCDeployManager extends NCService with DecorateAsScala { case None ⇒ // No-op. } - data ++= Config.models.split(",").map(_.trim).map(makeModelWrapper) + data ++= U.splitTrimFilter(Config.models, ",").map(makeModelWrapper) Config.jarsFolder match { case Some(jarsFolder) ⇒ @@ -790,7 +788,7 @@ object NCDeployManager extends NCService with DecorateAsScala { s"mdlId=${mdl.getId}, " + s"elm=${elm.toString}" + s"]") - else if (elm.getId.length == 0) + else if (elm.getId.isEmpty) throw new NCE(s"Model element ID cannot be empty [" + s"mdlId=${mdl.getId}, " + s"elm=${elm.toString}]" + @@ -932,7 +930,7 @@ object NCDeployManager extends NCService with DecorateAsScala { if (startsAndEnds(REGEX_FIX, chunk)) { val ptrn = stripSuffix(REGEX_FIX, chunk) - if (ptrn.length > 0) + if (ptrn.nonEmpty) try NCProbeSynonymChunk(kind = REGEX, origText = chunk, regex = Pattern.compile(ptrn)) catch { diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala index 43c3748..9e57605 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala @@ -20,11 +20,12 @@ package org.apache.nlpcraft.probe.mgrs.nlp.enrichers.limit import java.io.Serializable import io.opencensus.trace.Span +import org.apache.nlpcraft.common._ import org.apache.nlpcraft.common.makro.NCMacroParser import org.apache.nlpcraft.common.nlp.core.NCNlpCoreManager import org.apache.nlpcraft.common.nlp.numeric.{NCNumeric, NCNumericManager} import org.apache.nlpcraft.common.nlp.{NCNlpSentence, NCNlpSentenceNote, NCNlpSentenceToken} -import org.apache.nlpcraft.common.{NCE, NCService} +import org.apache.nlpcraft.common.NCService import org.apache.nlpcraft.probe.mgrs.NCProbeModel import org.apache.nlpcraft.probe.mgrs.nlp.NCProbeEnricher @@ -194,7 +195,7 @@ object NCLimitEnricher extends NCProbeEnricher { limits= { // Few numbers cannot be in on template. - require(SYNONYMS.forall(_.split(" ").map(_.trim).count(_ == CD) < 2)) + require(SYNONYMS.forall(s ⇒ U.splitTrimFilter(s, " ").count(_ == CD) < 2)) def toMacros(seq: Iterable[String]): String = seq.mkString("|") diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/tools/metro/NCGeoMetroGenerator.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/tools/metro/NCGeoMetroGenerator.scala index 35616dc..8093576 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/tools/metro/NCGeoMetroGenerator.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/geo/tools/metro/NCGeoMetroGenerator.scala @@ -40,10 +40,10 @@ object NCGeoMetroGenerator extends App { case class Holder(name: String) private def deleteBrackets(s: String): String = - s.replaceAll("\\(", " ").replaceAll("\\)", " ").split(" ").map(_.trim).filter(_.nonEmpty).mkString(" ") + U.normalize(s.replaceAll("\\(", " ").replaceAll("\\)", " "), " ") private def generate() { - val lines = U.readPath(in, "UTF-8").toSeq.map(_.trim).filter(_.nonEmpty) + val lines = U.readPath(in, "UTF-8").map(_.trim).filter(_.nonEmpty) // Skips header. val metro = lines.tail.filter(!_.contains("(not set)")).map(line ⇒ Holder(line.takeWhile(_ != ','))) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala index 2070ef9..44b8a49 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateEnricher.scala @@ -57,7 +57,7 @@ object NCDateEnricher extends NCServerEnricher { // Preposition data holder. case class P(text: String) { - val words: Seq[String] = text.split(" ").filter(!_.trim.isEmpty).toSeq + val words: Seq[String] = U.splitTrimFilter(text," ") val length: Int = words.length } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateParser.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateParser.scala index 3abfa49..bb5daea 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateParser.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/date/NCDateParser.scala @@ -19,6 +19,7 @@ package org.apache.nlpcraft.server.nlp.enrichers.date import java.util.{Locale, Calendar ⇒ C} import scala.collection.JavaConverters._ +import org.apache.nlpcraft.common._ /** * Date parser. @@ -344,7 +345,7 @@ object NCDateParser { }).getTimeInMillis } - private def parseInt(s: String): Option[Int] = if (!s.isEmpty) Some(s.toInt) else None + private def parseInt(s: String): Option[Int] = if (s.nonEmpty) Some(s.toInt) else None private def isSign(ch: Char) = ch == '+' || ch == '-' @@ -364,7 +365,7 @@ object NCDateParser { private[date] def calculatePart(fns: String, base: Long): PartResult = { var res = PartResult(base, base, "", Seq.empty[String]) - for (fn ← fns.split(",").map(_.trim)) { + for (fn ← U.splitTrimFilter(fns, ",")) { val resFrom = res.from def after(heads: String*): String = fn.drop(heads.map(_.length).sum) @@ -437,7 +438,7 @@ object NCDateParser { if (shift != 0) shift = years - shift - // Should't be in one function call (last day is relative) + // Should not be in one function call (last day is relative). set(c, C.YEAR → (curYear + shift)) set(c, C.DAY_OF_YEAR → c.getActualMaximum(C.DAY_OF_YEAR)) }) @@ -448,7 +449,7 @@ object NCDateParser { def ld3M(map3m: Map[Int, Int]): PartResult = lastDay((c: C) ⇒ { val n = map3m(MONTH_NUM_MAP(c.get(C.MONTH))) - // Should't be in one function call (last day is relative) + // Should not be in one function call (last day is relative). // Note that keys in `map3m` sorted. set(c, C.MONTH → NUM_MONTH_MAP(map3m.filter(_._2 == n).keys.toSeq.max)) set(c, C.DAY_OF_MONTH → c.getActualMaximum(C.DAY_OF_MONTH)) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala index 71dafc7..71db333 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/enrichers/stopword/NCStopWordEnricher.scala @@ -273,7 +273,7 @@ object NCStopWordEnricher extends NCServerEnricher { drop(idxPos + 1). trim.split(" "). map(_.trim.toUpperCase). - filter(!_.isEmpty). + filter(_.nonEmpty). toSeq. map(p ⇒ if (p.head == '~') p.drop(1).trim → false else p → true). toMap @@ -678,7 +678,7 @@ object NCStopWordEnricher extends NCServerEnricher { val m = readStopWords( U.readResource("stopwords/stop_words.txt", "UTF-8", logger). - map(_.trim).filter(s ⇒ !s.isEmpty && !s.startsWith("#")).toSeq + map(_.trim).filter(s ⇒ s.nonEmpty && !s.startsWith("#")).toSeq ) stopWords = m(false) diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/preproc/NCPreProcessManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/preproc/NCPreProcessManager.scala index 1076f3d..9d0f6f4 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/preproc/NCPreProcessManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/preproc/NCPreProcessManager.scala @@ -18,7 +18,7 @@ package org.apache.nlpcraft.server.nlp.preproc import io.opencensus.trace.Span -import org.apache.nlpcraft.common.NCService +import org.apache.nlpcraft.common._ import org.apache.nlpcraft.server.nlp.spell.NCSpellCheckManager import scala.collection._ @@ -117,20 +117,20 @@ object NCPreProcessManager extends NCService { /** * - * @param sen Input sentence. + * @param sen Input sentence. * @param spellCheck Spell check flag. * @return */ private def collect(sen: Seq[String], spellCheck: Boolean): String = if (spellCheck) - sen.map(NCSpellCheckManager.check).map(_.trim).filter(!_.isEmpty).mkString(" ") + U.trimFilter(sen.map(NCSpellCheckManager.check)).mkString(" ") else - sen.map(_.trim).filter(!_.isEmpty).mkString(" ") + U.trimFilter(sen).mkString(" ") /** * Performs all pre-processing and normalizes the given input raw text. * - * @param rawTxt Raw text to normalize. + * @param rawTxt Raw text to normalize. * @param spellCheck Using spell checking flag. * @return Normalized, pre-processed text. * @param parent Optional parent span. diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala index 8ad7b73..02bafce 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/nlp/spell/NCSpellCheckManager.scala @@ -35,7 +35,7 @@ object NCSpellCheckManager extends NCService { private def isWordUpper(s: String): Boolean = s.forall(_.isUpper) private def isHeadUpper(s: String): Boolean = s.head.isUpper - private def split(s: String): Seq[String] = s.split(" ").filter(!_.isEmpty) + private def split(s: String): Seq[String] = U.splitTrimFilter(s, " ") private def processCase(s: String, sample: String): String = if (isWordUpper(sample)) s.toUpperCase diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSql.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSql.scala index 585f9ed..05ba230 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSql.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSql.scala @@ -138,7 +138,7 @@ object NCSql extends LazyLogging { sql.replace("\n", " "). replace("\t", " "). split(" "). - filter(!_.isEmpty). + filter(_.nonEmpty). mkString(" "). trim diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSqlManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSqlManager.scala index f8ea0f2..d23a632 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSqlManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sql/NCSqlManager.scala @@ -1026,7 +1026,7 @@ object NCSqlManager extends NCService with NCIgniteInstance { mkString("\n"). split(";"). map(_.trim). - filter(!_.isEmpty). + filter(_.nonEmpty). foreach(p ⇒ NCSql.ddl(p)) } diff --git a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala index b15cc8e..fc17201 100644 --- a/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala +++ b/nlpcraft/src/main/scala/org/apache/nlpcraft/server/sugsyn/NCSuggestSynonymManager.scala @@ -108,7 +108,7 @@ object NCSuggestSynonymManager extends NCService { } case class SuggestionResult(synonym: String, score: Double) - private def split(s: String): Seq[String] = s.split(" ").toSeq.map(_.trim).filter(_.nonEmpty) + private def split(s: String): Seq[String] = U.splitTrimFilter(s, " ") private def toStem(s: String): String = split(s).map(NCNlpPorterStemmer.stem).mkString(" ") private def toStemWord(s: String): String = NCNlpPorterStemmer.stem(s) diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/impl/NCIntentSolverEngineSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/impl/NCIntentSolverEngineSpec.scala index 85f75a5..9784972 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/impl/NCIntentSolverEngineSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/intent/impl/NCIntentSolverEngineSpec.scala @@ -17,6 +17,7 @@ package org.apache.nlpcraft.model.intent.impl +import org.apache.nlpcraft.common._ import org.apache.nlpcraft.model.intent.utils.NCDslFlowItem import org.junit.jupiter.api.Assertions.assertTrue import org.junit.jupiter.api.Test @@ -34,7 +35,7 @@ class NCIntentSolverEngineSpec { private def matchFlow(hist: String, flow: (String/*Intent ID*/, Int/*min*/, Int/*max*/)*): Boolean = { NCIntentSolverEngine.matchFlow( flow.toArray.map(x ⇒ NCDslFlowItem(x._1.split('|').map(_.trim), x._2, x._3)), - hist.split(" ").map(_.trim) + U.splitTrimFilter(hist, " ") ) } diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImplSpec.scala b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImplSpec.scala index 3d75592..39b6978 100644 --- a/nlpcraft/src/test/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImplSpec.scala +++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/model/tools/sqlgen/impl/NCSqlModelGeneratorImplSpec.scala @@ -17,6 +17,7 @@ package org.apache.nlpcraft.model.tools.sqlgen.impl +import org.apache.nlpcraft.common._ import org.junit.jupiter.api.Assertions.assertTrue import org.junit.jupiter.api.Test @@ -45,7 +46,7 @@ class NCSqlModelGeneratorImplSpec { * @return */ private def mkPrefixFun(s: String): String ⇒ String = { - val arr = s.split(",").map(_.trim).filter(_.nonEmpty) + val arr = U.splitTrimFilter(s, ",") z ⇒ (for (fix ← arr if z.startsWith(fix)) yield z.substring(fix.length)).headOption.getOrElse(z) } @@ -57,7 +58,7 @@ class NCSqlModelGeneratorImplSpec { * @return */ private def mkSuffixFun(s: String): String ⇒ String = { - val arr = s.split(",").map(_.trim).filter(_.nonEmpty) + val arr = U.splitTrimFilter(s, ",") z ⇒ (for (fix ← arr if z.endsWith(fix)) yield z.substring(0, z.length - fix.length)).headOption.getOrElse(z) }
