This is an automated email from the ASF dual-hosted git repository.
aradzinski pushed a commit to branch NLPCRAFT-247
in repository https://gitbox.apache.org/repos/asf/incubator-nlpcraft.git
The following commit(s) were added to refs/heads/NLPCRAFT-247 by this push:
new a31a047 WIP.
a31a047 is described below
commit a31a047894be88ef361d09269680ec843bb5fefc
Author: Aaron Radzinzski <[email protected]>
AuthorDate: Tue Feb 23 07:56:07 2021 -0800
WIP.
---
.../nlpcraft/common/makro/NCMacroCompiler.scala | 23 +-
.../nlpcraft/common/makro/NCMacroParser.scala | 268 ++-------------------
.../nlpcraft/common/makro/antlr4/NCMacroDsl.g4 | 4 +-
.../nlpcraft/common/makro/antlr4/NCMacroDsl.interp | 2 +-
.../common/makro/antlr4/NCMacroDslParser.java | 107 ++++----
.../scala/org/apache/nlpcraft/NCTestElement.scala | 1 +
.../common/makro/NCMacroCompilerSpec.scala | 2 +
.../nlpcraft/common/makro/NCMacroParserSpec.scala | 79 +-----
8 files changed, 120 insertions(+), 366 deletions(-)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroCompiler.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroCompiler.scala
index 9804281..30bf7f5 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroCompiler.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroCompiler.scala
@@ -29,6 +29,7 @@ import scala.collection.mutable
*
*/
object NCMacroCompiler extends LazyLogging {
+ private final val MAX_SYN = 1000
/**
*
@@ -68,12 +69,21 @@ object NCMacroCompiler extends LazyLogging {
* @param ctx
* @return
*/
- def compilerError(errMsg: String)(implicit ctx: ParserRuleContext):
NCE = {
+ private def compilerError(errMsg: String)(implicit ctx:
ParserRuleContext): NCE = {
val tok = ctx.start
new NCE(mkCompilerError(errMsg, tok.getLine,
tok.getCharPositionInLine, in))
}
+ /**
+ *
+ * @param buf
+ * @param ctx
+ */
+ private def checkMaxSyn(buf: mutable.Buffer[String])(implicit ctx:
ParserRuleContext): Unit =
+ if (buf.size > MAX_SYN)
+ throw compilerError(s"Exceeded max number ($MAX_SYN) of macro
expansions: ${buf.size}")
+
override def enterExpr(ctx: NCMacroDslParser.ExprContext): Unit = {
val buf = mutable.Buffer.empty[String]
@@ -90,9 +100,13 @@ object NCMacroCompiler extends LazyLogging {
}
override def exitExpr(ctx: NCMacroDslParser.ExprContext): Unit = {
+ implicit val evidence: ParserRuleContext = ctx
+
if (stack.size > 1) {
val expr = stack.pop()
val prn = stack.top
+
+ checkMaxSyn(expr.buffer)
require(expr.buffer.nonEmpty)
@@ -104,7 +118,14 @@ object NCMacroCompiler extends LazyLogging {
}
override def exitGroup(ctx: NCMacroDslParser.GroupContext): Unit = {
+ implicit val evidence: ParserRuleContext = ctx
+
val grp = stack.pop()
+
+ // Remove dups.
+ grp.buffer = grp.buffer.distinct
+
+ checkMaxSyn(grp.buffer)
require(grp.isGroup)
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala
index 3266c7c..6763343 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/NCMacroParser.scala
@@ -18,13 +18,12 @@
package org.apache.nlpcraft.common.makro
import org.apache.nlpcraft.common._
-import org.apache.nlpcraft.common.util.NCUtils._
-
import scala.collection.JavaConverters._
import scala.collection._
object NCMacroParser {
private final val CHARS = "[A-Za-z0-9-_]+"
+ private final val ESC_CHARS = """{}\<>_[]|,"""
private final val MACRO_REGEX = s"<$CHARS>".r
private final val BROKEN_MACRO_REGEX1 = s"<$CHARS".r
private final val BROKEN_MACRO_REGEX2 = s"$CHARS>".r
@@ -68,13 +67,15 @@ object NCMacroParser {
* - all macros should start with '<' and end with '>'.
* - '{A|B}' denotes either 'A' or 'B'.
* - '{A|B|_}' denotes either 'A', or 'B' or nothing ('_').
- * - '\' can be used only for escaping '{', '}', '|', and '_' special symbols.
+ * - '{A}[1,2]' denotes 'A' or 'A A'.
+ * - '{A}[0,1]' denotes 'A' or nothing (just like '{A|_}').
+ * - '\' can be used only for escaping '{}\<>_[]|,' special symbols.
*
* Examples:
- * "A {B|C} D" ⇒ "A B D", "A C D"
+ * "A {B|C}[1,2] D" ⇒ "A B D", "A C D", "A B B D", "A C C D"
* "A \{B\|C\} D" ⇒ "A {B|C} D"
* "A {B|_} D" ⇒ "A D", "A B D"
- * "A {_|B|C} D" ⇒ "A D", "A B D", "A C D"
+ * "A {_|B|C} {D}[1,2]" ⇒ "A D", "A B D", "A C D", "A D D", "A B D D",
"A C D D"
* "A <MACRO>" ⇒ "A ..." based on <MACRO> content.
* "A {<MACRO>|_}" ⇒ "A", "A ..." based on <MACRO> content.
*
@@ -84,218 +85,21 @@ object NCMacroParser {
class NCMacroParser {
import NCMacroParser._
- // Maximum number of expanded strings.
- private final val MAX_LIMIT = 500000
-
- // Macros.
private val macros = new java.util.concurrent.ConcurrentHashMap[String,
String]().asScala
/**
- * A token matched in the input string for LR parser.
- *
- * @param head Token text.
- * @param tail Remaining part of the input string, if any.
- */
- case class Token(head: String, tail: String)
-
- /**
- * Splits '{...}' option group into sequence of token. Note that
- * '|' separator will be excluded from tokens.
+ * Trims all duplicate spaces.
*
- * @param txt Option group text to parse.
+ * @param s
+ * @return
*/
- @throws[NCE]
- private[makro] def parseGroup(txt: String): Seq[Token] = {
- require(txt != null)
-
- var s = txt
- val last = s.length() - 1
-
- if (s.head != '{' || s.charAt(last) != '}')
- throw new NCE(s"Invalid option group: $txt")
-
- s = s.substring(1, last) // Trim out opening '{' and closing '}'.
-
- /**
- *
- * @param s Text to parse to get next group item.
- */
- def nextGroupItem(s: String): Option[Token] = {
- if (s.isEmpty)
- None
- else {
- var i = 0
- val len = s.length()
- var found = false
- var isEscape = false
- var depth = 0
-
- while (i < len && !found) {
- val ch = s.charAt(i)
-
- if (ch == '\\' && !isEscape)
- isEscape = true
- else {
- if (!isEscape)
- ch match {
- case '}' ⇒ depth -= 1
- case '{' ⇒ depth += 1
- case '|' ⇒ if (depth == 0) found = true
- case _ ⇒
- }
-
- isEscape = false
- }
-
- i += 1
- }
-
- if (depth != 0)
- throw new NCE(s"Uneven curly brackets: $txt")
- if (isEscape)
- throw new NCE(s"Incomplete '\\' escape usage: $txt")
-
- if (!found)
- Some(Token(s.substring(0), ""))
- else
- Some(Token(s.substring(0, i - 1), s.substring(i)))
- }
- }
-
- var toks = Seq.empty[Token]
-
- var item = nextGroupItem(s)
-
- while (item.isDefined) {
- toks :+= item.get
-
- item = nextGroupItem(item.get.tail)
- }
-
- toks
- }
+ private def trimDupSpaces(s: String) = U.splitTrimFilter(s, "
").mkString(" ")
/**
- * Gets the next lexical token.
- *
- * Special symbols are: ' ', '{', '}', '_' and '|'. Use `\` for escaping.
*
- * @param s Input string to get the next lexical token from.
+ * @param s
+ * @return
*/
- @throws[NCE]
- private[makro] def nextToken(s: String): Option[Token] = {
- require(s != null)
-
- def procMarker(fix: String): Option[Token] = {
- if (s.startsWith(fix))
- s.substring(fix.length).indexOf(fix) match {
- case -1 ⇒ throw new NCE(s"Uneven '$fix' marker: $s")
- case i ⇒
- val tail = i + 2 * fix.length
-
- Some(Token(s.substring(0, tail), s.substring(tail)))
- }
- else
- None
- }
-
- if (s.isEmpty)
- None
- else {
- // Check prefixes first.
- val tok = procMarker(DSL_FIX) match {
- case t: Some[Token] ⇒ t
- case None ⇒ procMarker(REGEX_FIX) match {
- case t: Some[Token] ⇒ t
- case None ⇒ None
- }
- }
-
- if (tok.isDefined)
- tok
- else {
- val len = s.length
- var found = false
- var isEscape = false
-
- if (s.startsWith(DSL_FIX)) {
- val i = s.substring(DSL_FIX.length).indexOf(DSL_FIX)
-
- if (i == -1)
- throw new NCE(s"Uneven '$DSL_FIX' marker: $s")
-
- val tail = i + 2 * DSL_FIX.length
-
- Some(Token(s.substring(0, tail), s.substring(tail)))
- }
- else if (s.charAt(0) == '{') { // Option group.
- var depth = 0
- var i = 1
-
- while (i < len && !found) {
- val ch = s.charAt(i)
-
- if (ch == '\\' && !isEscape)
- isEscape = true
- else {
- if (!isEscape)
- ch match {
- case '}' ⇒ if (depth == 0) found = true
else depth -= 1
- case '{' ⇒ depth += 1
- case _ ⇒
- }
-
- isEscape = false
- }
-
- i += 1
- }
-
- if (depth != 0 || !found)
- throw new NCE(s"Uneven curly brackets: $s")
- if (isEscape)
- throw new NCE(s"Incomplete '\\' escape usage: $s")
-
- Some(Token(s.substring(0, i), s.substring(i)))
- }
- else { // Not an option group.
- var i = 0
-
- while (i < len && !found) {
- val ch = s.charAt(i)
-
- if (ch == '\\' && !isEscape)
- isEscape = true
- else {
- if (!isEscape)
- ch match {
- case '|' | '_' | '}' ⇒ throw new
NCE(s"Suspicious '$ch' at pos $i: '$s'")
- case '{' ⇒ found = true // Found start of
the option group.
- case _ ⇒
- }
-
- isEscape = false
- }
-
- i += 1
- }
-
- if (isEscape)
- throw new NCE(s"Incomplete '\\' escape usage: $s")
-
- if (!found)
- Some(Token(s.substring(0), ""))
- else
- Some(Token(s.substring(0, i - 1), s.substring(i - 1)))
- }
- }
- }
- }
-
- // Trims all duplicate spaces.
- private def trimDupSpaces(s: String) = U.splitTrimFilter(s, "
").mkString(" ")
-
- // Processes '\' escapes for '{', '}', '|', and '_'.
private def processEscapes(s: String): String = {
val len = s.length()
val buf = new StringBuilder()
@@ -308,7 +112,7 @@ class NCMacroParser {
if (ch == '\\' && !isEscape)
isEscape = true
else {
- if (isEscape && ch != '|' && ch != '}' && ch != '{' && ch !=
'_')
+ if (isEscape && ESC_CHARS.contains(ch))
buf += '\\'
buf += ch
@@ -323,50 +127,6 @@ class NCMacroParser {
}
/**
- * LR-parser.
- *
- * @param s Text to expand.
- */
- @throws[NCE]
- private def expand0(s: String): Seq[String] = {
- require(s != null)
-
- if (s.isEmpty)
- Seq.empty
- else if (s.head == '/' && s.last == '/') // Don't macro-process regex.
- Seq(s)
- else {
- /**
- * Mixes (multiplies) given string with tails.
- *
- * @param s String to mix in.
- * @param tails Sequence of tail strings (potentially empty).
- */
- def mixTails(s: String, tails: Seq[String]): Seq[String] =
- if (tails.isEmpty) Seq(s)
- else tails.map(t ⇒ s + t)
-
- val res = nextToken(s) match {
- case None ⇒ Seq.empty
- case Some(tok) ⇒
- val tails = expand0(tok.tail)
- if (tok.head.head == '{') // Option group.
- parseGroup(tok.head).flatMap(x ⇒
- if (x.head == "_")
- mixTails("", tails)
- else
- expand0(x.head).flatMap(z ⇒ mixTails(z, tails))
- )
- else // Plain text.
- mixTails(tok.head, tails)
- }
- if (res.lengthCompare(MAX_LIMIT) > 0)
- throw new NCE(s"Maximum expansion length reached: $MAX_LIMIT")
- res
- }
- }
-
- /**
* Expand given string.
*
* @param txt Text to expand.
@@ -398,7 +158,7 @@ class NCMacroParser {
if (BROKEN_MACRO_REGEX1.findFirstIn(s).isDefined ||
BROKEN_MACRO_REGEX2.findFirstIn(s).isDefined)
throw new NCE(s"Suspicious or invalid macro in: $txt")
- U.distinct(expand0(s).toList map trimDupSpaces map processEscapes)
+ U.distinct(NCMacroCompiler.compile(s).toList map trimDupSpaces map
processEscapes)
}
/**
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4
index 2b98e6d..2200729 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.g4
@@ -28,7 +28,9 @@ group: LCURLY list RCURLY minMax?;
minMax: LBR INT COMMA INT RBR;
list
: expr
- | list VERT (expr | UNDERSCORE)
+ | list VERT expr
+ | list VERT UNDERSCORE
+ | UNDERSCORE VERT list
;
LCURLY: '{';
RCURLY: '}';
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.interp
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.interp
index 16ca7b6..39bf6bb 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.interp
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDsl.interp
@@ -37,4 +37,4 @@ list
atn:
-[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 3, 13, 61, 4, 2,
9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8,
3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 24, 10, 3, 12, 3, 14, 3, 27,
11, 3, 3, 4, 3, 4, 5, 4, 31, 10, 4, 3, 5, 3, 5, 3, 6, 3, 6, 3, 6, 3, 6, 5, 6,
39, 10, 6, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3,
8, 3, 8, 5, 8, 54, 10, 8, 7, 8, 56, 10, 8, 12, 8, 14, 8, 59, 11, 8, 3, 8, 2, 4,
4, 14, 9, 2, 4, 6, 8, [...]
\ No newline at end of file
+[3, 24715, 42794, 33075, 47597, 16764, 15335, 30598, 22884, 3, 13, 65, 4, 2,
9, 2, 4, 3, 9, 3, 4, 4, 9, 4, 4, 5, 9, 5, 4, 6, 9, 6, 4, 7, 9, 7, 4, 8, 9, 8,
3, 2, 3, 2, 3, 3, 3, 3, 3, 3, 3, 3, 3, 3, 7, 3, 24, 10, 3, 12, 3, 14, 3, 27,
11, 3, 3, 4, 3, 4, 5, 4, 31, 10, 4, 3, 5, 3, 5, 3, 6, 3, 6, 3, 6, 3, 6, 5, 6,
39, 10, 6, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 7, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 5,
8, 52, 10, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 3, 8, 7, 8, 60, 10, 8, 12, 8, 14,
8, 63, 11, 8, 3, 8, 2, 4 [...]
\ No newline at end of file
diff --git
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslParser.java
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslParser.java
index fc227ae..6cf0ca9 100644
---
a/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslParser.java
+++
b/nlpcraft/src/main/scala/org/apache/nlpcraft/common/makro/antlr4/NCMacroDslParser.java
@@ -425,11 +425,11 @@ public class NCMacroDslParser extends Parser {
public ExprContext expr() {
return getRuleContext(ExprContext.class,0);
}
+ public TerminalNode UNDERSCORE() { return
getToken(NCMacroDslParser.UNDERSCORE, 0); }
+ public TerminalNode VERT() { return
getToken(NCMacroDslParser.VERT, 0); }
public ListContext list() {
return getRuleContext(ListContext.class,0);
}
- public TerminalNode VERT() { return
getToken(NCMacroDslParser.VERT, 0); }
- public TerminalNode UNDERSCORE() { return
getToken(NCMacroDslParser.UNDERSCORE, 0); }
public ListContext(ParserRuleContext parent, int invokingState)
{
super(parent, invokingState);
}
@@ -459,52 +459,72 @@ public class NCMacroDslParser extends Parser {
int _alt;
enterOuterAlt(_localctx, 1);
{
- {
- setState(45);
- expr(0);
+ setState(49);
+ _errHandler.sync(this);
+ switch (_input.LA(1)) {
+ case LCURLY:
+ case INT:
+ case TXT:
+ {
+ setState(45);
+ expr(0);
+ }
+ break;
+ case UNDERSCORE:
+ {
+ setState(46);
+ match(UNDERSCORE);
+ setState(47);
+ match(VERT);
+ setState(48);
+ list(1);
+ }
+ break;
+ default:
+ throw new NoViableAltException(this);
}
_ctx.stop = _input.LT(-1);
- setState(55);
+ setState(59);
_errHandler.sync(this);
- _alt = getInterpreter().adaptivePredict(_input,4,_ctx);
+ _alt = getInterpreter().adaptivePredict(_input,5,_ctx);
while ( _alt!=2 &&
_alt!=org.antlr.v4.runtime.atn.ATN.INVALID_ALT_NUMBER ) {
if ( _alt==1 ) {
if ( _parseListeners!=null )
triggerExitRuleEvent();
_prevctx = _localctx;
{
- {
- _localctx = new ListContext(_parentctx,
_parentState);
- pushNewRecursionContext(_localctx,
_startState, RULE_list);
- setState(47);
- if (!(precpred(_ctx, 1))) throw new
FailedPredicateException(this, "precpred(_ctx, 1)");
- setState(48);
- match(VERT);
- setState(51);
+ setState(57);
_errHandler.sync(this);
- switch (_input.LA(1)) {
- case LCURLY:
- case INT:
- case TXT:
+ switch (
getInterpreter().adaptivePredict(_input,4,_ctx) ) {
+ case 1:
{
- setState(49);
+ _localctx = new
ListContext(_parentctx, _parentState);
+
pushNewRecursionContext(_localctx, _startState, RULE_list);
+ setState(51);
+ if (!(precpred(_ctx, 3))) throw
new FailedPredicateException(this, "precpred(_ctx, 3)");
+ setState(52);
+ match(VERT);
+ setState(53);
expr(0);
}
break;
- case UNDERSCORE:
+ case 2:
{
- setState(50);
+ _localctx = new
ListContext(_parentctx, _parentState);
+
pushNewRecursionContext(_localctx, _startState, RULE_list);
+ setState(54);
+ if (!(precpred(_ctx, 2))) throw
new FailedPredicateException(this, "precpred(_ctx, 2)");
+ setState(55);
+ match(VERT);
+ setState(56);
match(UNDERSCORE);
}
break;
- default:
- throw new
NoViableAltException(this);
- }
}
}
}
- setState(57);
+ setState(61);
_errHandler.sync(this);
- _alt =
getInterpreter().adaptivePredict(_input,4,_ctx);
+ _alt =
getInterpreter().adaptivePredict(_input,5,_ctx);
}
}
}
@@ -538,28 +558,31 @@ public class NCMacroDslParser extends Parser {
private boolean list_sempred(ListContext _localctx, int predIndex) {
switch (predIndex) {
case 1:
- return precpred(_ctx, 1);
+ return precpred(_ctx, 3);
+ case 2:
+ return precpred(_ctx, 2);
}
return true;
}
public static final String _serializedATN =
-
"\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\3\r=\4\2\t\2\4\3\t"+
+
"\3\u608b\ua72a\u8133\ub9ed\u417c\u3be7\u7786\u5964\3\rA\4\2\t\2\4\3\t"+
"\3\4\4\t\4\4\5\t\5\4\6\t\6\4\7\t\7\4\b\t\b\3\2\3\2\3\3\3\3\3\3\3\3\3\3"+
"\7\3\30\n\3\f\3\16\3\33\13\3\3\4\3\4\5\4\37\n\4\3\5\3\5\3\6\3\6\3\6\3"+
-
"\6\5\6\'\n\6\3\7\3\7\3\7\3\7\3\7\3\7\3\b\3\b\3\b\3\b\3\b\3\b\3\b\5\b\66"+
-
"\n\b\7\b8\n\b\f\b\16\b;\13\b\3\b\2\4\4\16\t\2\4\6\b\n\f\16\2\3\3\2\n\13"+
- "\2:\2\20\3\2\2\2\4\22\3\2\2\2\6\36\3\2\2\2\b
\3\2\2\2\n\"\3\2\2\2\f(\3"+
-
"\2\2\2\16.\3\2\2\2\20\21\5\4\3\2\21\3\3\2\2\2\22\23\b\3\1\2\23\24\5\6"+
-
"\4\2\24\31\3\2\2\2\25\26\f\3\2\2\26\30\5\6\4\2\27\25\3\2\2\2\30\33\3\2"+
-
"\2\2\31\27\3\2\2\2\31\32\3\2\2\2\32\5\3\2\2\2\33\31\3\2\2\2\34\37\5\b"+
- "\5\2\35\37\5\n\6\2\36\34\3\2\2\2\36\35\3\2\2\2\37\7\3\2\2\2
!\t\2\2\2"+
-
"!\t\3\2\2\2\"#\7\3\2\2#$\5\16\b\2$&\7\4\2\2%\'\5\f\7\2&%\3\2\2\2&\'\3"+
-
"\2\2\2\'\13\3\2\2\2()\7\5\2\2)*\7\n\2\2*+\7\b\2\2+,\7\n\2\2,-\7\6\2\2"+
-
"-\r\3\2\2\2./\b\b\1\2/\60\5\4\3\2\609\3\2\2\2\61\62\f\3\2\2\62\65\7\7"+
-
"\2\2\63\66\5\4\3\2\64\66\7\t\2\2\65\63\3\2\2\2\65\64\3\2\2\2\668\3\2\2"+
-
"\2\67\61\3\2\2\28;\3\2\2\29\67\3\2\2\29:\3\2\2\2:\17\3\2\2\2;9\3\2\2\2"+
- "\7\31\36&\659";
+
"\6\5\6\'\n\6\3\7\3\7\3\7\3\7\3\7\3\7\3\b\3\b\3\b\3\b\3\b\5\b\64\n\b\3"+
+
"\b\3\b\3\b\3\b\3\b\3\b\7\b<\n\b\f\b\16\b?\13\b\3\b\2\4\4\16\t\2\4\6\b"+
+
"\n\f\16\2\3\3\2\n\13\2?\2\20\3\2\2\2\4\22\3\2\2\2\6\36\3\2\2\2\b \3\2"+
+
"\2\2\n\"\3\2\2\2\f(\3\2\2\2\16\63\3\2\2\2\20\21\5\4\3\2\21\3\3\2\2\2\22"+
+
"\23\b\3\1\2\23\24\5\6\4\2\24\31\3\2\2\2\25\26\f\3\2\2\26\30\5\6\4\2\27"+
+
"\25\3\2\2\2\30\33\3\2\2\2\31\27\3\2\2\2\31\32\3\2\2\2\32\5\3\2\2\2\33"+
+
"\31\3\2\2\2\34\37\5\b\5\2\35\37\5\n\6\2\36\34\3\2\2\2\36\35\3\2\2\2\37"+
+ "\7\3\2\2\2
!\t\2\2\2!\t\3\2\2\2\"#\7\3\2\2#$\5\16\b\2$&\7\4\2\2%\'\5\f"+
+
"\7\2&%\3\2\2\2&\'\3\2\2\2\'\13\3\2\2\2()\7\5\2\2)*\7\n\2\2*+\7\b\2\2+"+
+
",\7\n\2\2,-\7\6\2\2-\r\3\2\2\2./\b\b\1\2/\64\5\4\3\2\60\61\7\t\2\2\61"+
+
"\62\7\7\2\2\62\64\5\16\b\3\63.\3\2\2\2\63\60\3\2\2\2\64=\3\2\2\2\65\66"+
+
"\f\5\2\2\66\67\7\7\2\2\67<\5\4\3\289\f\4\2\29:\7\7\2\2:<\7\t\2\2;\65\3"+
+
"\2\2\2;8\3\2\2\2<?\3\2\2\2=;\3\2\2\2=>\3\2\2\2>\17\3\2\2\2?=\3\2\2\2\b"+
+ "\31\36&\63;=";
public static final ATN _ATN =
new ATNDeserializer().deserialize(_serializedATN.toCharArray());
static {
diff --git a/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala
index 5faaedd..d6b73c2 100644
--- a/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala
+++ b/nlpcraft/src/test/scala/org/apache/nlpcraft/NCTestElement.scala
@@ -21,6 +21,7 @@ import org.apache.nlpcraft.model.NCElement
import java.util
import scala.collection.JavaConverters._
+import scala.language.implicitConversions
/**
* Simple test element.
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroCompilerSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroCompilerSpec.scala
index 35a90e1..120c988 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroCompilerSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroCompilerSpec.scala
@@ -59,6 +59,8 @@ class NCMacroCompilerSpec {
@Test
def testOkCompiler(): Unit = {
checkEq("A", Seq("A"))
+ checkEq("{_|A|_}", Seq("", "A"))
+ checkEq("{A|_}", Seq("", "A"))
checkEq(" A ", Seq("A"))
checkEq("A B", Seq("A B"))
checkEq("""A {Москва|_|\|}""", Seq("A", "A Москва", """A \|"""))
diff --git
a/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroParserSpec.scala
b/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroParserSpec.scala
index 8927d17..4e4b5bd 100644
---
a/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroParserSpec.scala
+++
b/nlpcraft/src/test/scala/org/apache/nlpcraft/common/makro/NCMacroParserSpec.scala
@@ -57,48 +57,30 @@ class NCMacroParserSpec {
/**
*
- * @param txt Text to find next token in.
- * @param tokHead Expected head value of the token.
- * @param tokTail Expected tail value of the token.
- */
- def testToken(txt: String, tokHead: String, tokTail: String): Unit = {
- val tok = parser.nextToken(txt)
-
- assertTrue(tok.get.head == tokHead)
- assertTrue(tok.get.tail == tokTail)
- }
-
- /**
- *
* @param txt Text to expand.
* @param exp Expected expansion strings.
*/
- def testParser(txt: String, exp: Seq[String]): Unit =
- assertTrue(parser.expand(txt).sorted == exp.sorted)
-
- /**
- *
- * @param txt Group text.
- * @param grps Sequence of group's elements.
- */
- def testGroup(txt: String, grps: Seq[String]): Unit = {
- val elms = parser.parseGroup(txt)
+ def testParser(txt: String, exp: Seq[String]): Unit = {
+ val z = parser.expand(txt).sorted
+ val w = exp.sorted
- assertTrue(grps == elms.map(_.head))
+ if (z != w)
+ println(s"$z != $w")
+
+ assertTrue(z == w)
}
-
- @Test
+
def testPerformance() {
val start = currentTime
val N = 50000
for (_ ← 0 to N)
- parser.expand("a {{{<C>}}|{_}} {c|d|e|f|g|h|j|k|l|n|m|p|r}")
+ parser.expand("a {{{<C>}}} {c|d|e|f|g|h|j|k|l|n|m|p|r}")
val duration = currentTime - start
- println(s"${N * 1000 / duration} ops/second.")
+ println(s"${N * 1000 / duration} expansions/sec.")
}
@Test
@@ -138,7 +120,7 @@ class NCMacroParserSpec {
))
testParser("a {b|_}. c", Seq(
- "a b. c",
+ "a b . c",
"a . c"
))
@@ -149,7 +131,7 @@ class NCMacroParserSpec {
testParser("""{`a`|\`a\`}""", Seq(
"`a`",
- "\\`a\\`"
+ """\`a\`"""
))
testParser("""a {/abc.\{\}*/|/d/} c""", Seq(
@@ -220,43 +202,6 @@ class NCMacroParserSpec {
}
@Test
- def testOptionGroup() {
- testGroup("{a {b|c} | d}", Seq("a {b|c} ", " d"))
- testGroup("{a|b}", Seq("a", "b"))
- testGroup("{a}", Seq("a"))
- testGroup("{{{a}}}", Seq("{{a}}"))
- testGroup("{{{a}}|{b}}", Seq("{{a}}", "{b}"))
- testGroup("{a {c}|b|_}", Seq("a {c}", "b", "_"))
- testGroup("""{/abc.\_/|\{\_\}}""", Seq("/abc.\\_/", "\\{\\_\\}"))
-
- ignoreNCE { parser.parseGroup("a"); assertTrue(false) }
- ignoreNCE { parser.parseGroup("{a"); assertTrue(false) }
- ignoreNCE { parser.parseGroup("a}"); assertTrue(false) }
- }
-
- @Test
- def testParseTokens() {
- testToken("""a \_ b""", """a \_ b""", "")
- testToken("""a \\\_ b""", """a \\\_ b""", "")
- testToken("""a \{\_\_\_\} b""", """a \{\_\_\_\} b""", "")
- testToken("""a{b\|\_\}|c}""", "a", """{b\|\_\}|c}""")
- testToken("""/\|\_\{\}/ a {bc|d}""", """/\|\_\{\}/ a """, """{bc|d}""")
- testToken("{a} b", "{a}", " b")
- testToken("{a|{c|d}}", "{a|{c|d}}", "")
- testToken("{a {c|d} xxx {f|g}} b", "{a {c|d} xxx {f|g}}", " b")
- testToken("c{a} b", "c", "{a} b")
- testToken("{{{a}}}", "{{{a}}}", "")
-
- ignoreNCE { parser.nextToken("a } b"); assertTrue(false) }
- ignoreNCE { parser.nextToken("{c b"); assertTrue(false) }
- ignoreNCE { parser.nextToken("a | b"); assertTrue(false) }
- ignoreNCE { parser.nextToken("a |_"); assertTrue(false) }
-
- assertTrue(parser.nextToken("").isEmpty)
- assertTrue(parser.nextToken(" ").isDefined)
- }
-
- @Test
def testLimit() {
ignoreNCE {
parser.expand("<METRICS> <USER> <BY> <WEBSITE> <BY> <SES> <BY>
<METRICS> <BY> <USER> <BY> <METRICS>")