This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-247
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-247 by this push:
new 19f4e34 WIP.
19f4e34 is described below
commit 19f4e34836793ceebd7ac350dab3c23e5d54087e
Author: Aaron Radzinski <[email protected]>
AuthorDate: Wed Feb 24 12:11:41 2021 -0800
WIP.
---
.../nlpcraft/common/makro/NCMacroCompiler.scala | 8 +-
.../nlpcraft/common/makro/antlr4/NCMacroDsl.g4 | 12 +-
.../nlpcraft/common/makro/antlr4/NCMacroDsl.interp | 6 +-
.../nlpcraft/common/makro/antlr4/NCMacroDsl.tokens | 8 +-
.../common/makro/antlr4/NCMacroDslLexer.interp | 8 +-
.../common/makro/antlr4/NCMacroDslLexer.java | 56 ++++----
.../common/makro/antlr4/NCMacroDslLexer.tokens | 8 +-
.../common/makro/antlr4/NCMacroDslParser.java | 18 ++-
.../nlpcraft/common/makro/NCMacroParserSpec.scala | 153 +++++++--------------
9 files changed, 127 insertions(+), 150 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroCompiler.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroCompiler.scala
index 97b0d76..d49d467 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroCompiler.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroCompiler.scala
@@ -141,7 +141,13 @@ object NCMacroCompiler extends LazyLogging {
}
override def exitSyn(ctx: P.SynContext): Unit = {
- val syn = if (ctx.TXT() != null) ctx.TXT().getText else
ctx.INT().getText
+ val syn = (
+ if (ctx.TXT() != null) ctx.TXT()
+ else if (ctx.INT() != null) ctx.INT()
+ else if (ctx.REGEX_TXT() != null) ctx.REGEX_TXT()
+ else ctx.DSL_TXT()
+ ).getText
+
val buf = stack.top.buffer
require(buf.nonEmpty)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4
index a5ea46e..d2a3ed8 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4
@@ -24,7 +24,7 @@ expr
| expr item
;
item: syn | group;
-syn : (TXT | INT); // NOTE: since TXT and INT overlap - we catch them both
here and resolve in compiler.
+syn : (TXT | INT | REGEX_TXT | DSL_TXT); // NOTE: since TXT and INT overlap -
we catch them both here and resolve in compiler.
group: LCURLY list RCURLY minMax?;
minMax: LBR INT COMMA INT RBR;
list
@@ -37,16 +37,16 @@ list
// Lexer.
LCURLY: '{';
RCURLY: '}';
-LBR: '<';
-RBR: '>';
+LBR: '[';
+RBR: ']';
VERT: '|';
COMMA: ',';
UNDERSCORE: '_';
-fragment ESC_CHAR: [{}\\<>_[\]|,];
+fragment ESC_CHAR: [{}\\_[\]|,];
fragment ESC: '\\' ESC_CHAR;
fragment TXT_CHAR
: [~!@#$%^&*()+.]
- | [-=[\]/\\;:`'"]
+ | [-=<>/\\;:`'"]
| '\u00B7'
| 'A'..'Z'
| 'a'..'z'
@@ -74,6 +74,8 @@ fragment TXT_CHAR
| '\uFDF0'..'\uFFFD'
; // Ignoring ['\u10000-'\uEFFFF].
INT: '0' | [1-9][_0-9]*;
+REGEX_TXT: '//' .*? '//';
+DSL_TXT: '^^' .*? '^^';
TXT: (TXT_CHAR | ESC)+;
WS: [ \r\t\u000C\n]+ -> skip ;
ERR_CHAR: .;
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.interp
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.interp
index 204c1cd..502974a 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.interp
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.interp
@@ -11,6 +11,8 @@ null
null
null
null
+null
+null
token symbolic names:
null
@@ -22,6 +24,8 @@ VERT
COMMA
UNDERSCORE
INT
+REGEX_TXT
+DSL_TXT
TXT
WS
ERR_CHAR
@@ -37,4 +41,4 @@ list
atn:
-[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 3, 13, 66, 4, 2,
9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8,
3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 25, 10, 3, 12, 3, 14, 3,
28, 11, 3, 3, 4, 3, 4, 5, 4, 32, 10, 4, 3, 5, 3, 5, 3, 6, 3, 6, 3, 6, 3, 6, 5,
6, 40, 10, 6, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8,
5, 8, 53, 10, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 7, 8, 61, 10, 8, 12, 8,
14, 8, 64, 11, 8, 3, 8 [...]
\ No newline at end of file
+[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 3, 15, 66, 4, 2,
9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8,
3, 2, 3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 25, 10, 3, 12, 3, 14, 3,
28, 11, 3, 3, 4, 3, 4, 5, 4, 32, 10, 4, 3, 5, 3, 5, 3, 6, 3, 6, 3, 6, 3, 6, 5,
6, 40, 10, 6, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8,
5, 8, 53, 10, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 7, 8, 61, 10, 8, 12, 8,
14, 8, 64, 11, 8, 3, 8 [...]
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.tokens
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.tokens
index 7301461..ed5e876 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.tokens
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.tokens
@@ -6,9 +6,11 @@ VERT=5
COMMA=6
UNDERSCORE=7
INT=8
-TXT=9
-WS=10
-ERR_CHAR=11
+REGEX_TXT=9
+DSL_TXT=10
+TXT=11
+WS=12
+ERR_CHAR=13
'{'=1
'}'=2
'['=3
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.interp
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.interp
index 48cd14d..88fa60f 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.interp
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.interp
@@ -11,6 +11,8 @@ null
null
null
null
+null
+null
token symbolic names:
null
@@ -22,6 +24,8 @@ VERT
COMMA
UNDERSCORE
INT
+REGEX_TXT
+DSL_TXT
TXT
WS
ERR_CHAR
@@ -38,6 +42,8 @@ ESC_CHAR
ESC
TXT_CHAR
INT
+REGEX_TXT
+DSL_TXT
TXT
WS
ERR_CHAR
@@ -50,4 +56,4 @@ mode names:
DEFAULT_MODE
atn:
-[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 2, 13, 78, 8, 1,
4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8,
9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 4,
14, 9, 14, 4, 15, 9, 15, 3, 2, 3, 2, 3, 3, 3, 3, 3, 4, 3, 4, 3, 5, 3, 5, 3, 6,
3, 6, 3, 7, 3, 7, 3, 8, 3, 8, 3, 9, 3, 9, 3, 10, 3, 10, 3, 10, 3, 11, 5, 11,
52, 10, 11, 3, 12, 3, 12, 3, 12, 7, 12, 57, 10, 12, 12, 12, 14, 12, 60, 11, 12,
5, 12, 62, 10, 12, 3, 13 [...]
\ No newline at end of file
+[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 2, 15, 106, 8, 1,
4, 2, 9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8,
9, 8, 4, 9, 9, 9, 4, 10, 9, 10, 4, 11, 9, 11, 4, 12, 9, 12, 4, 13, 9, 13, 4,
14, 9, 14, 4, 15, 9, 15, 4, 16, 9, 16, 4, 17, 9, 17, 3, 2, 3, 2, 3, 3, 3, 3, 3,
4, 3, 4, 3, 5, 3, 5, 3, 6, 3, 6, 3, 7, 3, 7, 3, 8, 3, 8, 3, 9, 3, 9, 3, 10, 3,
10, 3, 10, 3, 11, 5, 11, 56, 10, 11, 3, 12, 3, 12, 3, 12, 7, 12, 61, 10, 12,
12, 12, 14, 12, 64, 11, [...]
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java
index 6d8b92f..455cc2e 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.java
@@ -18,7 +18,7 @@ public class NCMacroDslLexer extends Lexer {
new PredictionContextCache();
public static final int
LCURLY=1, RCURLY=2, LBR=3, RBR=4, VERT=5, COMMA=6,
UNDERSCORE=7, INT=8,
- TXT=9, WS=10, ERR_CHAR=11;
+ REGEX_TXT=9, DSL_TXT=10, TXT=11, WS=12, ERR_CHAR=13;
public static String[] channelNames = {
"DEFAULT_TOKEN_CHANNEL", "HIDDEN"
};
@@ -30,7 +30,7 @@ public class NCMacroDslLexer extends Lexer {
private static String[] makeRuleNames() {
return new String[] {
"LCURLY", "RCURLY", "LBR", "RBR", "VERT", "COMMA",
"UNDERSCORE", "ESC_CHAR",
- "ESC", "TXT_CHAR", "INT", "TXT", "WS", "ERR_CHAR"
+ "ESC", "TXT_CHAR", "INT", "REGEX_TXT", "DSL_TXT",
"TXT", "WS", "ERR_CHAR"
};
}
public static final String[] ruleNames = makeRuleNames();
@@ -44,7 +44,7 @@ public class NCMacroDslLexer extends Lexer {
private static String[] makeSymbolicNames() {
return new String[] {
null, "LCURLY", "RCURLY", "LBR", "RBR", "VERT",
"COMMA", "UNDERSCORE",
- "INT", "TXT", "WS", "ERR_CHAR"
+ "INT", "REGEX_TXT", "DSL_TXT", "TXT", "WS", "ERR_CHAR"
};
}
private static final String[] _SYMBOLIC_NAMES = makeSymbolicNames();
@@ -106,29 +106,35 @@ public class NCMacroDslLexer extends Lexer {
public ATN getATN() { return _ATN; }
public static final String _serializedATN =
-
"\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\2\rN\b\1\4\2\t\2\4"+
+
"\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\2\17j\b\1\4\2\t\2\4"+
"\3\t\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t\7\4\b\t\b\4\t\t\t\4\n\t\n\4\13\t"+
-
"\13\4\f\t\f\4\r\t\r\4\16\t\16\4\17\t\17\3\2\3\2\3\3\3\3\3\4\3\4\3\5\3"+
-
"\5\3\6\3\6\3\7\3\7\3\b\3\b\3\t\3\t\3\n\3\n\3\n\3\13\5\13\64\n\13\3\f\3"+
-
"\f\3\f\7\f9\n\f\f\f\16\f<\13\f\5\f>\n\f\3\r\3\r\6\rB\n\r\r\r\16\rC\3\16"+
-
"\6\16G\n\16\r\16\16\16H\3\16\3\16\3\17\3\17\2\2\20\3\3\5\4\7\5\t\6\13"+
-
"\7\r\b\17\t\21\2\23\2\25\2\27\n\31\13\33\f\35\r\3\2\7\b\2..>>@@]_aa}\177"+
-
"\21\2#-/@B\\^^``b|\u0080\u0080\u00a2\u2001\u200e\u200f\u2041\u2042\u2072"+
-
"\u2191\u2c02\u2ff1\u3003\ud801\uf902\ufdd1\ufdf2\uffff\3\2\63;\4\2\62"+
-
";aa\5\2\13\f\16\17\"\"\2O\2\3\3\2\2\2\2\5\3\2\2\2\2\7\3\2\2\2\2\t\3\2"+
-
"\2\2\2\13\3\2\2\2\2\r\3\2\2\2\2\17\3\2\2\2\2\27\3\2\2\2\2\31\3\2\2\2\2"+
-
"\33\3\2\2\2\2\35\3\2\2\2\3\37\3\2\2\2\5!\3\2\2\2\7#\3\2\2\2\t%\3\2\2\2"+
-
"\13\'\3\2\2\2\r)\3\2\2\2\17+\3\2\2\2\21-\3\2\2\2\23/\3\2\2\2\25\63\3\2"+
- "\2\2\27=\3\2\2\2\31A\3\2\2\2\33F\3\2\2\2\35L\3\2\2\2\37
\7}\2\2 \4\3\2"+
-
"\2\2!\"\7\177\2\2\"\6\3\2\2\2#$\7]\2\2$\b\3\2\2\2%&\7_\2\2&\n\3\2\2\2"+
-
"\'(\7~\2\2(\f\3\2\2\2)*\7.\2\2*\16\3\2\2\2+,\7a\2\2,\20\3\2\2\2-.\t\2"+
-
"\2\2.\22\3\2\2\2/\60\7^\2\2\60\61\5\21\t\2\61\24\3\2\2\2\62\64\t\3\2\2"+
-
"\63\62\3\2\2\2\64\26\3\2\2\2\65>\7\62\2\2\66:\t\4\2\2\679\t\5\2\28\67"+
-
"\3\2\2\29<\3\2\2\2:8\3\2\2\2:;\3\2\2\2;>\3\2\2\2<:\3\2\2\2=\65\3\2\2\2"+
-
"=\66\3\2\2\2>\30\3\2\2\2?B\5\25\13\2@B\5\23\n\2A?\3\2\2\2A@\3\2\2\2BC"+
-
"\3\2\2\2CA\3\2\2\2CD\3\2\2\2D\32\3\2\2\2EG\t\6\2\2FE\3\2\2\2GH\3\2\2\2"+
-
"HF\3\2\2\2HI\3\2\2\2IJ\3\2\2\2JK\b\16\2\2K\34\3\2\2\2LM\13\2\2\2M\36\3"+
- "\2\2\2\t\2\63:=ACH\3\b\2\2";
+
"\13\4\f\t\f\4\r\t\r\4\16\t\16\4\17\t\17\4\20\t\20\4\21\t\21\3\2\3\2\3"+
+
"\3\3\3\3\4\3\4\3\5\3\5\3\6\3\6\3\7\3\7\3\b\3\b\3\t\3\t\3\n\3\n\3\n\3\13"+
+
"\5\138\n\13\3\f\3\f\3\f\7\f=\n\f\f\f\16\f@\13\f\5\fB\n\f\3\r\3\r\3\r\3"+
+
"\r\7\rH\n\r\f\r\16\rK\13\r\3\r\3\r\3\r\3\16\3\16\3\16\3\16\7\16T\n\16"+
+
"\f\16\16\16W\13\16\3\16\3\16\3\16\3\17\3\17\6\17^\n\17\r\17\16\17_\3\20"+
+
"\6\20c\n\20\r\20\16\20d\3\20\3\20\3\21\3\21\4IU\2\22\3\3\5\4\7\5\t\6\13"+
+
"\7\r\b\17\t\21\2\23\2\25\2\27\n\31\13\33\f\35\r\37\16!\17\3\2\7\6\2.."+
+
"]_aa}\177\21\2#-/@B\\^^``b|\u0080\u0080\u00a2\u2001\u200e\u200f\u2041"+
+
"\u2042\u2072\u2191\u2c02\u2ff1\u3003\ud801\uf902\ufdd1\ufdf2\uffff\3\2"+
+
"\63;\4\2\62;aa\5\2\13\f\16\17\"\"\2m\2\3\3\2\2\2\2\5\3\2\2\2\2\7\3\2\2"+
+
"\2\2\t\3\2\2\2\2\13\3\2\2\2\2\r\3\2\2\2\2\17\3\2\2\2\2\27\3\2\2\2\2\31"+
+
"\3\2\2\2\2\33\3\2\2\2\2\35\3\2\2\2\2\37\3\2\2\2\2!\3\2\2\2\3#\3\2\2\2"+
+
"\5%\3\2\2\2\7\'\3\2\2\2\t)\3\2\2\2\13+\3\2\2\2\r-\3\2\2\2\17/\3\2\2\2"+
+
"\21\61\3\2\2\2\23\63\3\2\2\2\25\67\3\2\2\2\27A\3\2\2\2\31C\3\2\2\2\33"+
+
"O\3\2\2\2\35]\3\2\2\2\37b\3\2\2\2!h\3\2\2\2#$\7}\2\2$\4\3\2\2\2%&\7\177"+
+
"\2\2&\6\3\2\2\2\'(\7]\2\2(\b\3\2\2\2)*\7_\2\2*\n\3\2\2\2+,\7~\2\2,\f\3"+
+
"\2\2\2-.\7.\2\2.\16\3\2\2\2/\60\7a\2\2\60\20\3\2\2\2\61\62\t\2\2\2\62"+
+
"\22\3\2\2\2\63\64\7^\2\2\64\65\5\21\t\2\65\24\3\2\2\2\668\t\3\2\2\67\66"+
+
"\3\2\2\28\26\3\2\2\29B\7\62\2\2:>\t\4\2\2;=\t\5\2\2<;\3\2\2\2=@\3\2\2"+
+
"\2><\3\2\2\2>?\3\2\2\2?B\3\2\2\2@>\3\2\2\2A9\3\2\2\2A:\3\2\2\2B\30\3\2"+
+
"\2\2CD\7\61\2\2DE\7\61\2\2EI\3\2\2\2FH\13\2\2\2GF\3\2\2\2HK\3\2\2\2IJ"+
+
"\3\2\2\2IG\3\2\2\2JL\3\2\2\2KI\3\2\2\2LM\7\61\2\2MN\7\61\2\2N\32\3\2\2"+
+
"\2OP\7`\2\2PQ\7`\2\2QU\3\2\2\2RT\13\2\2\2SR\3\2\2\2TW\3\2\2\2UV\3\2\2"+
+
"\2US\3\2\2\2VX\3\2\2\2WU\3\2\2\2XY\7`\2\2YZ\7`\2\2Z\34\3\2\2\2[^\5\25"+
+
"\13\2\\^\5\23\n\2][\3\2\2\2]\\\3\2\2\2^_\3\2\2\2_]\3\2\2\2_`\3\2\2\2`"+
+
"\36\3\2\2\2ac\t\6\2\2ba\3\2\2\2cd\3\2\2\2db\3\2\2\2de\3\2\2\2ef\3\2\2"+
+ "\2fg\b\20\2\2g
\3\2\2\2hi\13\2\2\2i\"\3\2\2\2\13\2\67>AIU]_d\3\b\2\2";
public static final ATN _ATN =
new ATNDeserializer().deserialize(_serializedATN.toCharArray());
static {
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.tokens
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.tokens
index 7301461..ed5e876 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.tokens
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslLexer.tokens
@@ -6,9 +6,11 @@ VERT=5
COMMA=6
UNDERSCORE=7
INT=8
-TXT=9
-WS=10
-ERR_CHAR=11
+REGEX_TXT=9
+DSL_TXT=10
+TXT=11
+WS=12
+ERR_CHAR=13
'{'=1
'}'=2
'['=3
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslParser.java
index 6b45ab1..04fd18b 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslParser.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslParser.java
@@ -18,7 +18,7 @@ public class NCMacroDslParser extends Parser {
new PredictionContextCache();
public static final int
LCURLY=1, RCURLY=2, LBR=3, RBR=4, VERT=5, COMMA=6,
UNDERSCORE=7, INT=8,
- TXT=9, WS=10, ERR_CHAR=11;
+ REGEX_TXT=9, DSL_TXT=10, TXT=11, WS=12, ERR_CHAR=13;
public static final int
RULE_makro = 0, RULE_expr = 1, RULE_item = 2, RULE_syn = 3,
RULE_group = 4,
RULE_minMax = 5, RULE_list = 6;
@@ -38,7 +38,7 @@ public class NCMacroDslParser extends Parser {
private static String[] makeSymbolicNames() {
return new String[] {
null, "LCURLY", "RCURLY", "LBR", "RBR", "VERT",
"COMMA", "UNDERSCORE",
- "INT", "TXT", "WS", "ERR_CHAR"
+ "INT", "REGEX_TXT", "DSL_TXT", "TXT", "WS", "ERR_CHAR"
};
}
private static final String[] _SYMBOLIC_NAMES = makeSymbolicNames();
@@ -239,6 +239,8 @@ public class NCMacroDslParser extends Parser {
_errHandler.sync(this);
switch (_input.LA(1)) {
case INT:
+ case REGEX_TXT:
+ case DSL_TXT:
case TXT:
enterOuterAlt(_localctx, 1);
{
@@ -271,6 +273,8 @@ public class NCMacroDslParser extends Parser {
public static class SynContext extends ParserRuleContext {
public TerminalNode TXT() { return
getToken(NCMacroDslParser.TXT, 0); }
public TerminalNode INT() { return
getToken(NCMacroDslParser.INT, 0); }
+ public TerminalNode REGEX_TXT() { return
getToken(NCMacroDslParser.REGEX_TXT, 0); }
+ public TerminalNode DSL_TXT() { return
getToken(NCMacroDslParser.DSL_TXT, 0); }
public SynContext(ParserRuleContext parent, int invokingState) {
super(parent, invokingState);
}
@@ -294,7 +298,7 @@ public class NCMacroDslParser extends Parser {
{
setState(31);
_la = _input.LA(1);
- if ( !(_la==INT || _la==TXT) ) {
+ if ( !((((_la) & ~0x3f) == 0 && ((1L << _la) & ((1L <<
INT) | (1L << REGEX_TXT) | (1L << DSL_TXT) | (1L << TXT))) != 0)) ) {
_errHandler.recoverInline(this);
}
else {
@@ -467,6 +471,8 @@ public class NCMacroDslParser extends Parser {
switch (_input.LA(1)) {
case LCURLY:
case INT:
+ case REGEX_TXT:
+ case DSL_TXT:
case TXT:
{
setState(46);
@@ -569,13 +575,13 @@ public class NCMacroDslParser extends Parser {
}
public static final String _serializedATN =
-
"\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\3\rB\4\2\t\2\4\3\t"+
+
"\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\3\17B\4\2\t\2\4\3\t"+
"\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t\7\4\b\t\b\3\2\3\2\3\2\3\3\3\3\3\3\3\3"+
"\3\3\7\3\31\n\3\f\3\16\3\34\13\3\3\4\3\4\5\4
\n\4\3\5\3\5\3\6\3\6\3\6"+
"\3\6\5\6(\n\6\3\7\3\7\3\7\3\7\3\7\3\7\3\b\3\b\3\b\3\b\3\b\5\b\65\n\b\3"+
"\b\3\b\3\b\3\b\3\b\3\b\7\b=\n\b\f\b\16\b@\13\b\3\b\2\4\4\16\t\2\4\6\b"+
-
"\n\f\16\2\3\3\2\n\13\2@\2\20\3\2\2\2\4\23\3\2\2\2\6\37\3\2\2\2\b!\3\2"+
-
"\2\2\n#\3\2\2\2\f)\3\2\2\2\16\64\3\2\2\2\20\21\5\4\3\2\21\22\7\2\2\3\22"+
+
"\n\f\16\2\3\3\2\n\r\2@\2\20\3\2\2\2\4\23\3\2\2\2\6\37\3\2\2\2\b!\3\2\2"+
+
"\2\n#\3\2\2\2\f)\3\2\2\2\16\64\3\2\2\2\20\21\5\4\3\2\21\22\7\2\2\3\22"+
"\3\3\2\2\2\23\24\b\3\1\2\24\25\5\6\4\2\25\32\3\2\2\2\26\27\f\3\2\2\27"+
"\31\5\6\4\2\30\26\3\2\2\2\31\34\3\2\2\2\32\30\3\2\2\2\32\33\3\2\2\2\33"+
"\5\3\2\2\2\34\32\3\2\2\2\35 \5\b\5\2\36
\5\n\6\2\37\35\3\2\2\2\37\36\3"+
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroParserSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroParserSpec.scala
index da74df3..18cccbd 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroParserSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroParserSpec.scala
@@ -60,16 +60,17 @@ class NCMacroParserSpec {
* @param txt Text to expand.
* @param exp Expected expansion strings.
*/
- def testParser(txt: String, exp: Seq[String]): Unit = {
+ def checkEq(txt: String, exp: Seq[String]): Unit = {
val z = parser.expand(txt).sorted
val w = exp.sorted
-
+
if (z != w)
println(s"$z != $w")
-
+
assertTrue(z == w)
}
-
+
+ // @Test
def testPerformance() {
val start = currentTime
@@ -83,6 +84,23 @@ class NCMacroParserSpec {
println(s"${N * 1000 / duration} expansions/sec.")
}
+
+ /**
+ *
+ * @param txt
+ */
+ private def checkError(txt: String): Unit = {
+ try {
+ parser.expand(txt)
+
+ assert(false)
+ } catch {
+ case e: NCE ⇒
+ println(e.getMessage)
+ assert(true)
+ }
+ }
+
@Test
def testExpand() {
// Make sure we can parse these.
@@ -104,112 +122,37 @@ class NCMacroParserSpec {
parser.expand("<METRICS_B>")
parser.expand("<METRICS>")
- testParser("<A> {b|_} c", Seq(
- "aaa b c",
- "aaa c"
- ))
-
- testParser("<B> {b|_} c", Seq(
- "aaa bbb b c",
- "aaa bbb c"
- ))
-
- testParser("{tl;dr|j/k}", Seq(
- "tl;dr",
- "j/k"
- ))
-
- testParser("a {b|_}. c", Seq(
- "a b . c",
- "a . c"
- ))
-
- testParser("""a {/abc.*/|\{\_\}} c""", Seq(
- "a /abc.*/ c",
- "a {_} c"
- ))
-
- testParser("""{`a`|\`a\`}""", Seq(
- "`a`",
- """\`a\`"""
- ))
-
- testParser("""a {/abc.\{\}*/|/d/} c""", Seq(
- "a /abc.{}*/ c",
- "a /d/ c"
- ))
-
- testParser("""a .{b\, |_}. c""", Seq(
- "a . b, . c",
- "a . . c"
- ))
-
- testParser("a {{b|c}|_}.", Seq(
- "a .",
- "a b .",
- "a c ."
- ))
-
- testParser("a {{{<C>}}|_} c", Seq(
- "a aaa bbb z c",
- "a aaa bbb w c",
- "a c"
- ))
-
- testParser("a {b|_}", Seq(
- "a b",
- "a"
- ))
-
- testParser("a {b|_}d", Seq(
- "a b d",
- "a d"
- ))
-
- testParser("a {b|_} d", Seq(
- "a b d",
- "a d"
- ))
-
- testParser("a {b|_} d", Seq(
- "a b d",
- "a d"
- ))
-
- testParser("a {b}", Seq(
- "a b"
- ))
-
- testParser("a {b} {c|_}", Seq(
- "a b",
- "a b c"
- ))
-
- testParser("a {{b|c}}", Seq(
- "a b",
- "a c"
- ))
+ checkEq("<A> {b|_} c", Seq("aaa b c", "aaa c"))
+ checkEq("<B> {b|_} c", Seq("aaa bbb b c", "aaa bbb c"))
+ checkEq("{tl;dr|j/k}", Seq("tl;dr", "j/k"))
+ checkEq("a {b|_}. c", Seq("a b . c", "a . c"))
+ checkEq("""a {/abc.*/|\{\_\}} c""", Seq("a /abc.*/ c", "a {_} c"))
+ checkEq("""{`a`|\`a\`}""", Seq("`a`", """\`a\`"""))
+ checkEq("""a {/abc.\{\}*/|/d/} c""", Seq("a /abc.{}*/ c", "a /d/ c"))
+ checkEq("""a .{b\, |_}. c""", Seq("a . b, . c", "a . . c"))
+ checkEq("a {{b|c}|_}.", Seq("a .", "a b .", "a c ."))
+ checkEq("a {{{<C>}}|_} c", Seq("a aaa bbb z c", "a aaa bbb w c", "a
c"))
+ checkEq("a {b|_}", Seq("a b", "a"))
+ checkEq("a {b|_}d", Seq("a b d", "a d"))
+ checkEq("a {b|_} d", Seq("a b d", "a d"))
+ checkEq("a {b|_} d", Seq("a b d", "a d"))
+ checkEq("a {b}", Seq("a b"))
+ checkEq("a {b} {c|_}", Seq("a b", "a b c"))
+ checkEq("a {{b|c}}", Seq("a b", "a c"))
+ checkEq("a {b|_|{g}[1,2]}", Seq("a", "a b", "a g", "a g g"))
+ checkEq("a {b|_|{//[]{}//}[1,2]}", Seq("a", "a b", "a //[]{}//", "a
//[]{}// //[]{}//"))
+ checkEq("a {b|_|{//[]^^// ^^{_}^^}[1,2]}", Seq("a", "a b", "a //[]^^//
^^{_}^^", "a //[]^^// ^^{_}^^ //[]^^// ^^{_}^^"))
+ checkEq("//[a-zA-Z0-9]+//", Seq("//[a-zA-Z0-9]+//"))
+ checkEq("the ^^[internal](id == 'anyWord')^^", Seq("the
^^[internal](id == 'anyWord')^^"))
+ checkEq("{A}[0,1] ^^[internal](id == 'anyWord')^^",
Seq("^^[internal](id == 'anyWord')^^", "A ^^[internal](id == 'anyWord')^^"))
+ checkEq("w1 ^^id == 'nlpcraft:num'^^ w2", Seq("w1 ^^id ==
'nlpcraft:num'^^ w2"))
+ checkEq("before limit ^^[limitAlias](id == 'nlpcraft:limit')^^",
Seq("before limit ^^[limitAlias](id == 'nlpcraft:limit')^^"))
+ checkEq("wrap ^^[wrapLimitAlias](id == 'wrapLimit')^^", Seq("wrap
^^[wrapLimitAlias](id == 'wrapLimit')^^"))
checkError("a {| b")
checkError("{a}}")
}
- /**
- *
- * @param txt
- */
- private def checkError(txt: String): Unit = {
- try {
- parser.expand(txt)
-
- assert(false)
- } catch {
- case e: NCE ⇒
- println(e.getMessage)
- assert(true)
- }
- }
-
@Test
def testLimit() {
checkError("<METRICS> <USER> <BY> <WEBSITE> <BY> <SES> <BY> <METRICS>
<BY> <USER> <BY> <METRICS>")