[incubator-nlpcraft] branch NLPCRAFT-30 updated: WIP.

sergeykamov Thu, 30 Apr 2020 08:50:32 -0700

This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-30
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git



The following commit(s) were added to refs/heads/NLPCRAFT-30 by this push:
     new 0e82186  WIP.
0e82186 is described below

commit 0e8218644e277853a781d67319a89fbefd4a50b9
Author: Sergey Kamov <[email protected]>
AuthorDate: Thu Apr 30 18:49:24 2020 +0300

    WIP.
---
 .../nlpcraft/examples/sql/SqlModelTest.scala       | 39 +++++++++++++++++++
 .../org/apache/nlpcraft/probe/mgrs/NCSynonym.scala | 44 ++++++++++++----------
 .../mgrs/nlp/enrichers/model/NCModelEnricher.scala |  6 +--
 3 files changed, 67 insertions(+), 22 deletions(-)

diff --git a/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala 
b/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
index fe7851e..56fb264 100644
--- a/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
+++ b/src/main/scala/org/apache/nlpcraft/examples/sql/SqlModelTest.scala
@@ -511,6 +511,45 @@ class SqlModelTest {
                   |LIMIT
                   |  1000
                   """.stripMargin
+            ),
+            Case(
+                Seq(
+                    "What are the top orders for the last 2 weeks sorted by 
order quantity?"
+                ),
+                """SELECT
+                  |  order_details.quantity,
+                  |  orders.order_date,
+                  |  order_details.unit_price,
+                  |  order_details.discount,
+                  |  orders.order_id,
+                  |  orders.required_date,
+                  |  customers.customer_id,
+                  |  customers.company_name,
+                  |  customers.contact_name,
+                  |  employees.employee_id,
+                  |  employees.last_name,
+                  |  employees.first_name,
+                  |  products.product_id,
+                  |  products.product_name,
+                  |  products.quantity_per_unit,
+                  |  shippers.shipper_id,
+                  |  shippers.company_name,
+                  |  shippers.phone
+                  |FROM
+                  |  order_details
+                  |  INNER JOIN orders ON order_details.order_id = 
orders.order_id
+                  |  INNER JOIN products ON order_details.product_id = 
products.product_id
+                  |  LEFT JOIN customers ON orders.customer_id = 
customers.customer_id
+                  |  LEFT JOIN shippers ON orders.ship_via = 
shippers.shipper_id
+                  |  LEFT JOIN employees ON orders.employee_id = 
employees.employee_id
+                  |WHERE
+                  |  orders.order_date >= ?
+                  |  AND orders.order_date <= ?
+                  |ORDER BY
+                  |  order_details.quantity DESC
+                  |LIMIT
+                  |  10
+                  """.stripMargin
             )
         )
     }
diff --git a/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCSynonym.scala 
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCSynonym.scala
index 94f70a3..12cef01 100644
--- a/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCSynonym.scala
+++ b/src/main/scala/org/apache/nlpcraft/probe/mgrs/NCSynonym.scala
@@ -68,22 +68,26 @@ class NCSynonym(
       */
     def isMatch(toks: NCNlpSentenceTokenBuffer): Boolean = {
         require(toks != null)
-        
-        if (toks.isEmpty || size != toks.size || isTextOnly && toks.stemsHash 
!= stemsHash)
-            false
-        else if (isTextOnly)
-            toks.stemsHash == stemsHash && toks.stems == stems
-        else
-            // Same length.
-            toks.zip(this).sortBy(p ⇒ getSort(p._2.kind)).forall {
-                case (tok, chunk) ⇒
-                    chunk.kind match {
-                        case TEXT ⇒ chunk.wordStem == tok.stem
-                        case REGEX ⇒ 
chunk.regex.matcher(tok.origText).matches() || 
chunk.regex.matcher(tok.normText).matches()
-                        case DSL ⇒ throw new AssertionError()
-                        case _ ⇒ throw new AssertionError()
-                    }
-            }
+
+        val ok =
+            if (isTextOnly)
+                toks.stemsHash == stemsHash && toks.stems == stems
+            else
+                // Same length.
+                toks.zip(this).sortBy(p ⇒ getSort(p._2.kind)).forall {
+                    case (tok, chunk) ⇒
+                        chunk.kind match {
+                            case TEXT ⇒ chunk.wordStem == tok.stem
+                            case REGEX ⇒ 
chunk.regex.matcher(tok.origText).matches() || 
chunk.regex.matcher(tok.normText).matches()
+                            case DSL ⇒ throw new AssertionError()
+                            case _ ⇒ throw new AssertionError()
+                        }
+                }
+
+        // Should be called only for valid tokens count (validation optimized 
for performance reasons)
+        require(!ok || toks.length == length)
+
+        ok
     }
 
     /**
@@ -98,9 +102,7 @@ class NCSynonym(
         type Word = NCNlpSentenceToken
         type TokenOrWord = Either[Token, Word]
 
-        if (tows.isEmpty || size != tows.size)
-            false
-        else
+        val ok =
             // Same length.
             tows.zip(this).sortBy(p ⇒ getSort(p._2.kind)).forall {
                 case (tow, chunk) ⇒
@@ -118,6 +120,10 @@ class NCSynonym(
                         case _ ⇒ throw new AssertionError()
                     }
             }
+        // Should be called only for valid tokens count (validation optimized 
for performance reasons)
+        require(!ok || tows.length == length)
+
+        ok
     }
     
     override def toString(): String = mkString(" ")
diff --git 
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
 
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
index e8b0f0d..b396439 100644
--- 
a/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
+++ 
b/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/model/NCModelEnricher.scala
@@ -348,7 +348,7 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                 permCnt += 1
 
                 for (toks ← combos(perm)) {
-                    val key = toks.map(_.index)
+                    val key = toks.map(_.index).sorted
 
                     if (!cache.contains(key)) {
                         var seq: Seq[Seq[Complex]] = null
@@ -366,8 +366,8 @@ object NCModelEnricher extends NCProbeEnricher with 
DecorateAsScala {
                                     matches += ElementMatch(elm, toks, syn, 
parts)
                                 }
 
-                            // Check synonym matches.
-                            if (mdl.synonyms.nonEmpty)
+                            // Optimization - plain synonyms can be used only 
on first iteration
+                            if (mdl.synonyms.nonEmpty && !ns.exists(_.isUser))
                                 for (syn ← fastAccess(mdl.synonyms, elm.getId, 
toks.length) if !found)
                                     if (syn.isMatch(toks))
                                         addMatch(elm, toks, syn, Seq.empty)

[incubator-nlpcraft] branch NLPCRAFT-30 updated: WIP.

Reply via email to