This is an automated email from the ASF dual-hosted git repository.

aradzinski pushed a commit to branch NLPCRAFT-247
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git


The following commit(s) were added to refs/heads/NLPCRAFT-247 by this push:
     new cb5028b  WIP.
cb5028b is described below

commit cb5028b37c26444d07cb23b5d3e3a6c64a3c96da
Author: Aaron Radzinski <[email protected]>
AuthorDate: Tue Feb 23 23:57:50 2021 -0800

    WIP.
---
 .../nlpcraft/common/makro/NCMacroParser.scala      |  2 +-
 .../nlpcraft/common/makro/antlr4/NCMacroDsl.g4     | 12 ++++++--
 .../common/makro/antlr4/NCMacroDslLexer.interp     |  2 +-
 .../common/makro/antlr4/NCMacroDslLexer.java       | 33 +++++++++++-----------
 .../mgrs/nlp/enrichers/limit/NCLimitEnricher.scala |  8 +++---
 .../common/makro/NCMacroCompilerSpec.scala         |  2 ++
 6 files changed, 34 insertions(+), 25 deletions(-)

diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala
index 0f79cea..56d4b20 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala
@@ -69,7 +69,7 @@ object NCMacroParser {
   * - '{A|B|_}' denotes either 'A', or 'B' or nothing ('_').
   * - '{A}[1,2]' denotes 'A' or 'A A'.
   * - '{A}[0,1]' denotes 'A' or nothing (just like '{A|_}').
-  * - '\' can be used only for escaping '{}\<>_[]|,' special symbols.
+  * - '\' must be used for escaping any of '{}\<>_[]|,' special symbols.
   *
   * Examples:
   *      "A {B|C}[1,2] D" ⇒ "A B D", "A C D", "A B B D", "A C C D"
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4
index d214077..85ceabf 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4
@@ -46,17 +46,25 @@ fragment ESC_CHAR: [{}\\<>_[\]|,];
 fragment ESC: '\\' ESC_CHAR;
 fragment TXT_CHAR
     : [~!@#$%^&*()+.]
-    | [-=<>/\\;:`]
+    | [-=<>/\\;:`'"]
     | '\u00B7'
     | 'A'..'Z'
     | 'a'..'z'
     | '0'..'9'
     | '\u0300'..'\u036F'
+    | '\u00A0'..'\u00FF' /* Latin-1 Supplement. */
+    | '\u0100'..'\u017F' /* Latin Extended-A. */
+    | '\u0180'..'\u024F' /* Latin Extended-B. */
+    | '\u1E02'..'\u1EF3' /* Latin Extended Additional. */
+    | '\u0259'..'\u0292' /* IPA Extensions. */
+    | '\u02B0'..'\u02FF' /* Spacing modifier letters. */
     | '\u203F'..'\u2040'
     | '\u00C0'..'\u00D6'
     | '\u00D8'..'\u00F6'
     | '\u00F8'..'\u02FF'
-    | '\u0370'..'\u037D'
+    | '\u0370'..'\u03FF' /* Greek and Coptic. */
+    | '\u1F01'..'\u1FFF' /* Greek Extended. */
+    | '\u0400'..'\u04FF' /* Cyrillic. */
     | '\u037F'..'\u1FFF'
     | '\u200C'..'\u200D'
     | '\u2070'..'\u218F'
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.interp
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.interp
index 184581f..48cd14d 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.interp
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.interp
@@ -50,4 +50,4 @@ mode names:
 DEFAULT_MODE
 
 atn:
-[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 2, 13, 78, 8, 1, 
4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 
9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 4, 
14, 9, 14, 4, 15, 9, 15, 3, 2, 3, 2, 3, 3, 3, 3, 3, 4, 3, 4, 3, 5, 3, 5, 3, 6, 
3, 6, 3, 7, 3, 7, 3, 8, 3, 8, 3, 9, 3, 9, 3, 10, 3, 10, 3, 10, 3, 11, 5, 11, 
52, 10, 11, 3, 12, 3, 12, 3, 12, 7, 12, 57, 10, 12, 12, 12, 14, 12, 60, 11, 12, 
5, 12, 62, 10, 12, 3, 13 [...]
\ No newline at end of file
+[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 2, 13, 78, 8, 1, 
4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 
9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 4, 
14, 9, 14, 4, 15, 9, 15, 3, 2, 3, 2, 3, 3, 3, 3, 3, 4, 3, 4, 3, 5, 3, 5, 3, 6, 
3, 6, 3, 7, 3, 7, 3, 8, 3, 8, 3, 9, 3, 9, 3, 10, 3, 10, 3, 10, 3, 11, 5, 11, 
52, 10, 11, 3, 12, 3, 12, 3, 12, 7, 12, 57, 10, 12, 12, 12, 14, 12, 60, 11, 12, 
5, 12, 62, 10, 12, 3, 13 [...]
\ No newline at end of file
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java
index 889771c..6d8b92f 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java
@@ -113,23 +113,22 @@ public class NCMacroDslLexer extends Lexer {
                
"\f\3\f\7\f9\n\f\f\f\16\f<\13\f\5\f>\n\f\3\r\3\r\6\rB\n\r\r\r\16\rC\3\16"+
                
"\6\16G\n\16\r\16\16\16H\3\16\3\16\3\17\3\17\2\2\20\3\3\5\4\7\5\t\6\13"+
                
"\7\r\b\17\t\21\2\23\2\25\2\27\n\31\13\33\f\35\r\3\2\7\b\2..>>@@]_aa}\177"+
-               
"\27\2##%(*-/@B\\^^``b|\u0080\u0080\u00b9\u00b9\u00c2\u00d8\u00da\u00f8"+
-               
"\u00fa\u037f\u0381\u2001\u200e\u200f\u2041\u2042\u2072\u2191\u2c02\u2ff1"+
-               
"\u3003\ud801\uf902\ufdd1\ufdf2\uffff\3\2\63;\4\2\62;aa\5\2\13\f\16\17"+
-               
"\"\"\2O\2\3\3\2\2\2\2\5\3\2\2\2\2\7\3\2\2\2\2\t\3\2\2\2\2\13\3\2\2\2\2"+
-               
"\r\3\2\2\2\2\17\3\2\2\2\2\27\3\2\2\2\2\31\3\2\2\2\2\33\3\2\2\2\2\35\3"+
-               
"\2\2\2\3\37\3\2\2\2\5!\3\2\2\2\7#\3\2\2\2\t%\3\2\2\2\13\'\3\2\2\2\r)\3"+
-               
"\2\2\2\17+\3\2\2\2\21-\3\2\2\2\23/\3\2\2\2\25\63\3\2\2\2\27=\3\2\2\2\31"+
-               "A\3\2\2\2\33F\3\2\2\2\35L\3\2\2\2\37 \7}\2\2 
\4\3\2\2\2!\"\7\177\2\2\""+
-               
"\6\3\2\2\2#$\7]\2\2$\b\3\2\2\2%&\7_\2\2&\n\3\2\2\2\'(\7~\2\2(\f\3\2\2"+
-               
"\2)*\7.\2\2*\16\3\2\2\2+,\7a\2\2,\20\3\2\2\2-.\t\2\2\2.\22\3\2\2\2/\60"+
-               
"\7^\2\2\60\61\5\21\t\2\61\24\3\2\2\2\62\64\t\3\2\2\63\62\3\2\2\2\64\26"+
-               
"\3\2\2\2\65>\7\62\2\2\66:\t\4\2\2\679\t\5\2\28\67\3\2\2\29<\3\2\2\2:8"+
-               
"\3\2\2\2:;\3\2\2\2;>\3\2\2\2<:\3\2\2\2=\65\3\2\2\2=\66\3\2\2\2>\30\3\2"+
-               
"\2\2?B\5\25\13\2@B\5\23\n\2A?\3\2\2\2A@\3\2\2\2BC\3\2\2\2CA\3\2\2\2CD"+
-               
"\3\2\2\2D\32\3\2\2\2EG\t\6\2\2FE\3\2\2\2GH\3\2\2\2HF\3\2\2\2HI\3\2\2\2"+
-               
"IJ\3\2\2\2JK\b\16\2\2K\34\3\2\2\2LM\13\2\2\2M\36\3\2\2\2\t\2\63:=ACH\3"+
-               "\b\2\2";
+               
"\21\2#-/@B\\^^``b|\u0080\u0080\u00a2\u2001\u200e\u200f\u2041\u2042\u2072"+
+               
"\u2191\u2c02\u2ff1\u3003\ud801\uf902\ufdd1\ufdf2\uffff\3\2\63;\4\2\62"+
+               
";aa\5\2\13\f\16\17\"\"\2O\2\3\3\2\2\2\2\5\3\2\2\2\2\7\3\2\2\2\2\t\3\2"+
+               
"\2\2\2\13\3\2\2\2\2\r\3\2\2\2\2\17\3\2\2\2\2\27\3\2\2\2\2\31\3\2\2\2\2"+
+               
"\33\3\2\2\2\2\35\3\2\2\2\3\37\3\2\2\2\5!\3\2\2\2\7#\3\2\2\2\t%\3\2\2\2"+
+               
"\13\'\3\2\2\2\r)\3\2\2\2\17+\3\2\2\2\21-\3\2\2\2\23/\3\2\2\2\25\63\3\2"+
+               "\2\2\27=\3\2\2\2\31A\3\2\2\2\33F\3\2\2\2\35L\3\2\2\2\37 
\7}\2\2 \4\3\2"+
+               
"\2\2!\"\7\177\2\2\"\6\3\2\2\2#$\7]\2\2$\b\3\2\2\2%&\7_\2\2&\n\3\2\2\2"+
+               
"\'(\7~\2\2(\f\3\2\2\2)*\7.\2\2*\16\3\2\2\2+,\7a\2\2,\20\3\2\2\2-.\t\2"+
+               
"\2\2.\22\3\2\2\2/\60\7^\2\2\60\61\5\21\t\2\61\24\3\2\2\2\62\64\t\3\2\2"+
+               
"\63\62\3\2\2\2\64\26\3\2\2\2\65>\7\62\2\2\66:\t\4\2\2\679\t\5\2\28\67"+
+               
"\3\2\2\29<\3\2\2\2:8\3\2\2\2:;\3\2\2\2;>\3\2\2\2<:\3\2\2\2=\65\3\2\2\2"+
+               
"=\66\3\2\2\2>\30\3\2\2\2?B\5\25\13\2@B\5\23\n\2A?\3\2\2\2A@\3\2\2\2BC"+
+               
"\3\2\2\2CA\3\2\2\2CD\3\2\2\2D\32\3\2\2\2EG\t\6\2\2FE\3\2\2\2GH\3\2\2\2"+
+               
"HF\3\2\2\2HI\3\2\2\2IJ\3\2\2\2JK\b\16\2\2K\34\3\2\2\2LM\13\2\2\2M\36\3"+
+               "\2\2\2\t\2\63:=ACH\3\b\2\2";
        public static final ATN _ATN =
                new ATNDeserializer().deserialize(_serializedATN.toCharArray());
        static {
diff --git 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
index d0ed457..9d3c93c 100644
--- 
a/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
+++ 
b/nlpcraft/src/main/scala/org/apache/nlpcraft/probe/mgrs/nlp/enrichers/limit/NCLimitEnricher.scala
@@ -48,13 +48,13 @@ object NCLimitEnricher extends NCProbeEnricher {
     // - digits (like `25`),
     // - word numbers (like `twenty two`) or
     // - fuzzy numbers (like `few`).
-    private final val CD = "[CD]"
+    private final val CD = "'CD'"
 
     // Possible elements:
-    // - Any macros,
-    // - Special symbol CD (which designates obvious number or fuzzy number 
word)
+    // - Any macros.
+    // - Special symbol CD (which designates obvious number or fuzzy number 
word).
     // - Any simple word.
-    // Note that `CD` is optional (DFLT_LIMIT will be used)
+    // Note that `CD` is optional (DFLT_LIMIT will be used).
     private final val SYNONYMS = Seq(
         s"<TOP_WORDS> {of|_} {$CD|_} {<POST_WORDS>|_}",
         s"$CD of",
diff --git 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroCompilerSpec.scala
 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroCompilerSpec.scala
index 0df7a4c..4560faa 100644
--- 
a/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroCompilerSpec.scala
+++ 
b/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroCompilerSpec.scala
@@ -98,6 +98,8 @@ class NCMacroCompilerSpec {
         checkEq("""a {/abc.\{\}*/     |/d/} c""", Seq("""a /abc.\{\}*/ c""", 
"a /d/ c"))
         checkEq("""a .{b\,  |_}. c""", Seq("""a . b\, . c""", "a . . c"))
         checkEq("a {        {b|c}|_}.", Seq("a .", "a b .", "a c ."))
+        checkEq("°", Seq("°"))
+
     }
 
     @Test

Reply via email to