This is an automated email from the ASF dual-hosted git repository.

sergeykamov pushed a commit to branch NLPCRAFT-520
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-520 by this push:
     new 2e1a72fd WIP.
2e1a72fd is described below

commit 2e1a72fdde3e45d51726f50cdb74807a72cbe7bf
Author: Sergey Kamov <skhem...@gmail.com>
AuthorDate: Mon Dec 19 15:39:40 2022 +0400

    WIP.
---
 .../nlp/enrichers/NCBracketsTokenEnricher.scala    | 54 ++++++++++++----------
 .../nlp/enrichers/NCQuotesTokenEnricher.scala      |  4 +-
 .../enrichers/NCBracketsTokenEnricherSpec.scala    | 18 ++++++--
 3 files changed, 44 insertions(+), 32 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricher.scala
index c0e692a3..b4d8f563 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricher.scala
@@ -21,16 +21,26 @@ import com.typesafe.scalalogging.LazyLogging
 import org.apache.nlpcraft.*
 
 import java.io.*
-import scala.collection.mutable
+import scala.collection.{Map, mutable}
+
+/**
+  * Companion helper.
+  */
+object NCBracketsTokenEnricher:
+    private val BRACKETS = Map("(" -> ")", "{" -> "}", "[" -> "]", "<" -> ">")
+    private val BRACKETS_REVERSED = BRACKETS.map { case (key, value) => value 
-> key }
+
+import NCBracketsTokenEnricher.*
 
 /**
   * Brackets [[NCTokenEnricher token enricher]].
   *
   * This enricher adds `brackets` boolean [[NCPropertyMap metadata]] property 
to the [[NCToken token]]
-  * instance if the word it represents is enclosed in brackets. Supported 
brackets are: `()`, `{}`,
-  * `[]` and `<>`.
+  * instance if the word it represents is enclosed in brackets.
+  *
+  * Supported brackets are: `()`, `{}`, `[]` and `<>`.
   *
-  * **NOTE:** invalid enclosed brackets are ignored.
+  * **NOTE:** invalid enclosed brackets are ignored and for all input tokens 
property `brackets` assigned as `false`.
   */
 //noinspection DuplicatedCode,ScalaWeakerAccess
 class NCBracketsTokenEnricher extends NCTokenEnricher with LazyLogging:
@@ -41,26 +51,20 @@ class NCBracketsTokenEnricher extends NCTokenEnricher with 
LazyLogging:
         var ok = true
 
         def check(expected: String): Unit = if stack.empty() || stack.pop() != 
expected then ok = false
-        def mark(t: NCToken): Unit = map += t -> !stack.isEmpty
+        def add(t: NCToken): Unit = map += t -> !stack.isEmpty
 
-        for (t <- toks if ok)
-            t.getText match
-                case "(" | "{" | "[" | "<" =>
-                    mark(t)
-                    stack.push(t.getText)
-                case ")" =>
-                    check("(")
-                    mark(t)
-                case "}" =>
-                    check("{")
-                    mark(t)
-                case "]" =>
-                    check("[")
-                    mark(t)
-                case ">" =>
-                    check("<")
-                    mark(t)
-                case _ => mark(t)
+        for (t <- toks if ok; txt = t.getText)
+            if BRACKETS.contains(txt) then
+                add(t)
+                stack.push(txt)
+            else if BRACKETS_REVERSED.contains(txt) then
+                check(BRACKETS_REVERSED(txt))
+                add(t)
+            else
+                add(t)
 
-        if ok && stack.isEmpty then map.foreach { (tok, b) => 
tok.put("brackets", b) }
-        else logger.warn(s"Detected invalid brackets in: ${req.getText}")
\ No newline at end of file
+        if ok && stack.isEmpty then
+            map.foreach { (tok, b) => tok.put("brackets", b) }
+        else
+            toks.foreach(_.put("brackets",false))
+            logger.warn(s"Detected invalid brackets in: ${req.getText}")
\ No newline at end of file
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
index 8912e178..f2abb1c8 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/nlp/enrichers/NCQuotesTokenEnricher.scala
@@ -45,8 +45,8 @@ import NCQuotesTokenEnricher.*
   * `false` value indicates otherwise.
   *
   * Supported quotes are: **«**, **»**, **"**, **'**, **&#96;**.
-  * For any invalid cases, like invalid quotes order otr count,
-  * property `quoted` assigned as `false` for all input tokens.
+  *
+  * **NOTE:** invalid enclosed quotes are ignored and for all input tokens 
property `quoted` assigned as `false`.
   */
 //noinspection ScalaWeakerAccess
 class NCQuotesTokenEnricher extends NCTokenEnricher with LazyLogging:
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricherSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricherSpec.scala
index 6739a703..82bcff24 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricherSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/nlp/enrichers/NCBracketsTokenEnricherSpec.scala
@@ -33,16 +33,24 @@ class NCBracketsTokenEnricherSpec extends AnyFunSuite:
       * @param txt
       * @param brackets
       */
-    private def check(txt: String, brackets: Set[Integer]): Unit =
+    private def check(txt: String, brackets: Integer*): Unit =
         val toks = EN_TOK_PARSER.tokenize(txt)
         bracketsEnricher.enrich(NCTestRequest(txt), CFG, toks)
 
         NCTestUtils.printTokens(toks)
 
-        toks.foreach (tok => require(!(tok[Boolean]("brackets") ^ 
brackets.contains(tok.getIndex))))
+        if brackets.isEmpty then require(toks.forall(p => 
!p[Boolean]("brackets")))
+        else toks.foreach (tok => require(!(tok[Boolean]("brackets") ^ 
brackets.contains(tok.getIndex))))
 
     test("test") {
-        check("A [ B C ] D", Set(2, 3))
-        check("A [ B { C } ] D", Set(2, 3, 4, 5))
-        check("A [ B { C } ] [ [ D ] ] [ E ]", Set(2, 3, 4, 5, 8, 9, 10, 13))
+        check("A [ B C ] D", 2, 3)
+        check("A [ B { C } ] D", 2, 3, 4, 5)
+        check("A [ B { C } ] [ [ D ] ] [ E ]", 2, 3, 4, 5, 8, 9, 10, 13)
+
+        // Invalid.
+        check("[[a]")
+        check("[a[")
+        check("{[a[}")
+        check("[")
+        check("}")
     }

Reply via email to