This is an automated email from the ASF dual-hosted git repository.
sergeykamov pushed a commit to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-30 by this push:
new 0e82186 WIP.
0e82186 is described below
commit 0e8218644e277853a781d67319a89fbefd4a50b9
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Apr 30 18:49:24 2020 +0300
WIP.
---
.../nlpcraft/examples/sql/SqlModelTest.scala | 39 +++++++++++++++++++
.../org/apache/nlpcraft/probe/mgrs/NCSynonym.scala | 44 ++++++++++++----------
.../mgrs/nlp/enrichers/model/NCModelEnricher.scala | 6 +--
3 files changed, 67 insertions(+), 22 deletions(-)
diff --git a/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
b/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
index fe7851e..56fb264 100644
--- a/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
+++ b/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
@@ -511,6 +511,45 @@ class SqlModelTest {
|LIMIT
| 1000
""".stripMargin
+ ),
+ Case(
+ Seq(
+ "What are the top orders for the last 2 weeks sorted by
order quantity?"
+ ),
+ """SELECT
+ | order_details.quantity,
+ | orders.order_date,
+ | order_details.unit_price,
+ | order_details.discount,
+ | orders.order_id,
+ | orders.required_date,
+ | customers.customer_id,
+ | customers.company_name,
+ | customers.contact_name,
+ | employees.employee_id,
+ | employees.last_name,
+ | employees.first_name,
+ | products.product_id,
+ | products.product_name,
+ | products.quantity_per_unit,
+ | shippers.shipper_id,
+ | shippers.company_name,
+ | shippers.phone
+ |FROM
+ | order_details
+ | INNER JOIN orders ON order_details.order_id =
orders.order_id
+ | INNER JOIN products ON order_details.product_id =
products.product_id
+ | LEFT JOIN customers ON orders.customer_id =
customers.customer_id
+ | LEFT JOIN shippers ON orders.ship_via =
shippers.shipper_id
+ | LEFT JOIN employees ON orders.employee_id =
employees.employee_id
+ |WHERE
+ | orders.order_date >= ?
+ | AND orders.order_date <= ?
+ |ORDER BY
+ | order_details.quantity DESC
+ |LIMIT
+ | 10
+ """.stripMargin
)
)
}
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCSynonym.scala
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCSynonym.scala
index 94f70a3..12cef01 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCSynonym.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCSynonym.scala
@@ -68,22 +68,26 @@ class NCSynonym(
*/
def isMatch(toks: NCNlpSentenceTokenBuffer): Boolean = {
require(toks != null)
-
- if (toks.isEmpty || size != toks.size || isTextOnly && toks.stemsHash
!= stemsHash)
- false
- else if (isTextOnly)
- toks.stemsHash == stemsHash && toks.stems == stems
- else
- // Same length.
- toks.zip(this).sortBy(p ⇒ getSort(p._2.kind)).forall {
- case (tok, chunk) ⇒
- chunk.kind match {
- case TEXT ⇒ chunk.wordStem == tok.stem
- case REGEX ⇒
chunk.regex.matcher(tok.origText).matches() ||
chunk.regex.matcher(tok.normText).matches()
- case DSL ⇒ throw new AssertionError()
- case _ ⇒ throw new AssertionError()
- }
- }
+
+ val ok =
+ if (isTextOnly)
+ toks.stemsHash == stemsHash && toks.stems == stems
+ else
+ // Same length.
+ toks.zip(this).sortBy(p ⇒ getSort(p._2.kind)).forall {
+ case (tok, chunk) ⇒
+ chunk.kind match {
+ case TEXT ⇒ chunk.wordStem == tok.stem
+ case REGEX ⇒
chunk.regex.matcher(tok.origText).matches() ||
chunk.regex.matcher(tok.normText).matches()
+ case DSL ⇒ throw new AssertionError()
+ case _ ⇒ throw new AssertionError()
+ }
+ }
+
+ // Should be called only for valid tokens count (validation optimized
for performance reasons)
+ require(!ok || toks.length == length)
+
+ ok
}
/**
@@ -98,9 +102,7 @@ class NCSynonym(
type Word = NCNlpSentenceToken
type TokenOrWord = Either[Token, Word]
- if (tows.isEmpty || size != tows.size)
- false
- else
+ val ok =
// Same length.
tows.zip(this).sortBy(p ⇒ getSort(p._2.kind)).forall {
case (tow, chunk) ⇒
@@ -118,6 +120,10 @@ class NCSynonym(
case _ ⇒ throw new AssertionError()
}
}
+ // Should be called only for valid tokens count (validation optimized
for performance reasons)
+ require(!ok || tows.length == length)
+
+ ok
}
override def toString(): String = mkString(" ")
diff --git
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index e8b0f0d..b396439 100644
---
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -348,7 +348,7 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
permCnt += 1
for (toks ← combos(perm)) {
- val key = toks.map(_.index)
+ val key = toks.map(_.index).sorted
if (!cache.contains(key)) {
var seq: Seq[Seq[Complex]] = null
@@ -366,8 +366,8 @@ object NCModelEnricher extends NCProbeEnricher with
DecorateAsScala {
matches += ElementMatch(elm, toks, syn,
parts)
}
- // Check synonym matches.
- if (mdl.synonyms.nonEmpty)
+ // Optimization - plain synonyms can be used only
on first iteration
+ if (mdl.synonyms.nonEmpty && !ns.exists(_.isUser))
for (syn ← fastAccess(mdl.synonyms, elm.getId,
toks.length) if !found)
if (syn.isMatch(toks))
addMatch(elm, toks, syn, Seq.empty)