Revision: 19149
http://sourceforge.net/p/gate/code/19149
Author: ian_roberts
Date: 2016-03-16 13:36:57 +0000 (Wed, 16 Mar 2016)
Log Message:
-----------
Allow more than just ASCII letters and numbers in bare unquoted term queries -
I've lifted a JavaCC lexer rule for Java identifiers, which allow any sequence
of Unicode letters and digits as long as it starts with a letter.
Modified Paths:
--------------
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.java
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.jj
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserConstants.java
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserTokenManager.java
Modified:
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.java
2016-03-14 15:00:51 UTC (rev 19148)
+++ mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.java
2016-03-16 13:36:57 UTC (rev 19149)
@@ -1386,8 +1386,8 @@
public ParseException generateParseException() {
jj_expentries.removeAllElements();
- boolean[] la1tokens = new boolean[45];
- for (int i = 0; i < 45; i++) {
+ boolean[] la1tokens = new boolean[47];
+ for (int i = 0; i < 47; i++) {
la1tokens[i] = false;
}
if (jj_kind >= 0) {
@@ -1406,7 +1406,7 @@
}
}
}
- for (int i = 0; i < 45; i++) {
+ for (int i = 0; i < 47; i++) {
if (la1tokens[i]) {
jj_expentry = new int[1];
jj_expentry[0] = i;
Modified:
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.jj
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.jj
2016-03-14 15:00:51 UTC (rev 19148)
+++ mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.jj
2016-03-16 13:36:57 UTC (rev 19149)
@@ -523,7 +523,7 @@
| "\"REGEX\"" | "\\-">}
// special characters
-TOKEN:{<#special: "!" | "£" | "$" | "%" | "^" | "*" | "_" | "@" |
+TOKEN:{<#special: "!" | "£" | "$" | "€" | "¢" | "%" | "^" | "*" | "_" | "@" |
"'" | "~" | "#" | "`" | "¬">}
// reserved characters
@@ -553,11 +553,55 @@
TOKEN:{<regex: "REGEX">}
// token could be
-// any character from A-Z,a-z,0-9, special characters and escape sequences
-TOKEN:{<tok: ((["A"-"Z", "a"-"z", "0"-"9"]) | "\\[" | "\\]" | "\\("
- | "\\)" | "\\:" | "\\+" | "\\&" | "\\|" | "\\?" | "\\\\" | "\\."
- | "\"AND\"" | "\"OR\"" | "\"IN\"" | "\"OVER\"" | "\\\"" | "\\="
- | "\\{" | "\\}" | "\\<" | "\\>" | "\\," | "\"REGEX\"" |
<special>)+>}
+// special characters and escape sequences
+TOKEN:{
+ <tok: (
+ (( "\\[" | "\\]" | "\\("
+ | "\\)" | "\\:" | "\\+" | "\\&" | "\\|" | "\\?" | "\\\\" | "\\."
+ | "\"AND\"" | "\"OR\"" | "\"IN\"" | "\"OVER\"" | "\\\"" | "\\="
+ | "\\{" | "\\}" | "\\<" | "\\>" | "\\," | "\"REGEX\"" | <special>)+)
+ // Java identifier
+ | (<LETTER> (<LETTER>|<DIGIT>)* )
+ )>
+|
+ < #LETTER:
+ [
+ "\u0024",
+ "\u0041"-"\u005a",
+ "\u005f",
+ "\u0061"-"\u007a",
+ "\u00c0"-"\u00d6",
+ "\u00d8"-"\u00f6",
+ "\u00f8"-"\u00ff",
+ "\u0100"-"\u1fff",
+ "\u3040"-"\u318f",
+ "\u3300"-"\u337f",
+ "\u3400"-"\u3d2d",
+ "\u4e00"-"\u9fff",
+ "\uf900"-"\ufaff"
+ ]
+ >
+|
+ < #DIGIT:
+ [
+ "\u0030"-"\u0039",
+ "\u0660"-"\u0669",
+ "\u06f0"-"\u06f9",
+ "\u0966"-"\u096f",
+ "\u09e6"-"\u09ef",
+ "\u0a66"-"\u0a6f",
+ "\u0ae6"-"\u0aef",
+ "\u0b66"-"\u0b6f",
+ "\u0be7"-"\u0bef",
+ "\u0c66"-"\u0c6f",
+ "\u0ce6"-"\u0cef",
+ "\u0d66"-"\u0d6f",
+ "\u0e50"-"\u0e59",
+ "\u0ed0"-"\u0ed9",
+ "\u1040"-"\u1049"
+ ]
+ >
+}
// lexical analyser
Modified:
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserConstants.java
===================================================================
---
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserConstants.java
2016-03-14 15:00:51 UTC (rev 19148)
+++
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserConstants.java
2016-03-16 13:36:57 UTC (rev 19149)
@@ -32,6 +32,8 @@
int rightsquarebracket = 42;
int regex = 43;
int tok = 44;
+ int LETTER = 45;
+ int DIGIT = 46;
int DEFAULT = 0;
int IN_STRING = 1;
@@ -82,6 +84,8 @@
"\"]\"",
"\"REGEX\"",
"<tok>",
+ "<LETTER>",
+ "<DIGIT>",
};
}
Modified:
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserTokenManager.java
===================================================================
---
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserTokenManager.java
2016-03-14 15:00:51 UTC (rev 19148)
+++
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserTokenManager.java
2016-03-16 13:36:57 UTC (rev 19149)
@@ -30,44 +30,44 @@
case 0:
if ((active0 & 0x40L) != 0L)
return 15;
- if ((active0 & 0x84800000000L) != 0L)
+ if ((active0 & 0x10000000000L) != 0L)
{
jjmatchedKind = 44;
- return 45;
+ return 2;
}
- if ((active0 & 0x10000000000L) != 0L)
+ if ((active0 & 0x84800000000L) != 0L)
{
jjmatchedKind = 44;
- return 2;
+ return 46;
}
return -1;
case 1:
+ if ((active0 & 0x4000000000L) != 0L)
+ return 46;
if ((active0 & 0x90800000000L) != 0L)
{
jjmatchedKind = 44;
jjmatchedPos = 1;
- return 45;
+ return 46;
}
- if ((active0 & 0x4000000000L) != 0L)
- return 45;
return -1;
case 2:
if ((active0 & 0x90800000000L) != 0L)
{
jjmatchedKind = 44;
jjmatchedPos = 2;
- return 45;
+ return 46;
}
return -1;
case 3:
+ if ((active0 & 0x10000000000L) != 0L)
+ return 46;
if ((active0 & 0x80800000000L) != 0L)
{
jjmatchedKind = 44;
jjmatchedPos = 3;
- return 45;
+ return 46;
}
- if ((active0 & 0x10000000000L) != 0L)
- return 45;
return -1;
default :
return -1;
@@ -169,7 +169,7 @@
return jjMoveStringLiteralDfa2_0(active0, 0x800000000L);
case 78:
if ((active0 & 0x4000000000L) != 0L)
- return jjStartNfaWithStates_0(1, 38, 45);
+ return jjStartNfaWithStates_0(1, 38, 46);
break;
case 86:
return jjMoveStringLiteralDfa2_0(active0, 0x10000000000L);
@@ -215,7 +215,7 @@
return jjMoveStringLiteralDfa4_0(active0, 0x80000000000L);
case 82:
if ((active0 & 0x10000000000L) != 0L)
- return jjStartNfaWithStates_0(3, 40, 45);
+ return jjStartNfaWithStates_0(3, 40, 46);
break;
case 85:
return jjMoveStringLiteralDfa4_0(active0, 0x800000000L);
@@ -237,11 +237,11 @@
{
case 83:
if ((active0 & 0x800000000L) != 0L)
- return jjStartNfaWithStates_0(4, 35, 45);
+ return jjStartNfaWithStates_0(4, 35, 46);
break;
case 88:
if ((active0 & 0x80000000000L) != 0L)
- return jjStartNfaWithStates_0(4, 43, 45);
+ return jjStartNfaWithStates_0(4, 43, 46);
break;
default :
break;
@@ -279,13 +279,34 @@
jjCheckNAdd(jjnextStates[start + 1]);
}
static final long[] jjbitVec0 = {
- 0x0L, 0x0L, 0x100800000000L, 0x0L
+ 0x0L, 0x0L, 0x100c00000000L, 0x0L
};
+static final long[] jjbitVec1 = {
+ 0x0L, 0x0L, 0x100000000000L, 0x0L
+};
+static final long[] jjbitVec2 = {
+ 0x1ff00000fffffffeL, 0xffffffffffffc000L, 0xffffffffL, 0x600000000000000L
+};
+static final long[] jjbitVec4 = {
+ 0x0L, 0x0L, 0x0L, 0xff7fffffff7fffffL
+};
+static final long[] jjbitVec5 = {
+ 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL
+};
+static final long[] jjbitVec6 = {
+ 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffL, 0x0L
+};
+static final long[] jjbitVec7 = {
+ 0xffffffffffffffffL, 0xffffffffffffffffL, 0x0L, 0x0L
+};
+static final long[] jjbitVec8 = {
+ 0x3fffffffffffL, 0x0L, 0x0L, 0x0L
+};
private final int jjMoveNfa_0(int startState, int curPos)
{
int[] nextStates;
int startsAt = 0;
- jjnewStateCnt = 45;
+ jjnewStateCnt = 47;
int i = 1;
jjstateSet[0] = startState;
int j, kind = 0x7fffffff;
@@ -300,18 +321,14 @@
{
switch(jjstateSet[--i])
{
- case 45:
- if ((0x3ff04ba00000000L & l) != 0L)
+ case 1:
+ if ((0x3ff000000000000L & l) != 0L)
{
- if (kind > 44)
- kind = 44;
- jjCheckNAddStates(0, 2);
+ if (kind > 18)
+ kind = 18;
+ jjCheckNAdd(0);
}
- else if (curChar == 34)
- jjAddStates(3, 7);
- break;
- case 2:
- if ((0x3ff04ba00000000L & l) != 0L)
+ else if ((0x4ba00000000L & l) != 0L)
{
if (kind > 44)
kind = 44;
@@ -319,28 +336,26 @@
}
else if (curChar == 34)
jjAddStates(3, 7);
- break;
- case 1:
- if ((0x3ff04ba00000000L & l) != 0L)
- {
- if (kind > 44)
- kind = 44;
- jjCheckNAddStates(0, 2);
- }
- else if (curChar == 34)
- jjAddStates(3, 7);
else if (curChar == 38)
{
if (kind > 34)
kind = 34;
}
- if ((0x3ff000000000000L & l) != 0L)
+ if (curChar == 36)
{
- if (kind > 18)
- kind = 18;
- jjCheckNAdd(0);
+ if (kind > 44)
+ kind = 44;
+ jjCheckNAdd(46);
}
break;
+ case 2:
+ case 46:
+ if ((0x3ff001000000000L & l) == 0L)
+ break;
+ if (kind > 44)
+ kind = 44;
+ jjCheckNAdd(46);
+ break;
case 0:
if ((0x3ff000000000000L & l) == 0L)
break;
@@ -353,7 +368,7 @@
kind = 34;
break;
case 8:
- if ((0x3ff04ba00000000L & l) == 0L)
+ if ((0x4ba00000000L & l) == 0L)
break;
if (kind > 44)
kind = 44;
@@ -437,6 +452,13 @@
kind = 44;
jjCheckNAddStates(0, 2);
break;
+ case 45:
+ if (curChar != 36)
+ break;
+ if (kind > 44)
+ kind = 44;
+ jjCheckNAdd(46);
+ break;
default : break;
}
} while(i != startsAt);
@@ -448,15 +470,30 @@
{
switch(jjstateSet[--i])
{
- case 45:
- if ((0x47ffffffc7ffffffL & l) != 0L)
+ case 1:
+ if ((0x7fffffe87fffffeL & l) != 0L)
{
if (kind > 44)
kind = 44;
- jjCheckNAddStates(0, 2);
+ jjCheckNAdd(46);
}
else if (curChar == 92)
jjCheckNAddStates(8, 25);
+ else if (curChar == 124)
+ {
+ if (kind > 33)
+ kind = 33;
+ }
+ if ((0x40000001c0000001L & l) != 0L)
+ {
+ if (kind > 44)
+ kind = 44;
+ jjCheckNAddStates(0, 2);
+ }
+ else if (curChar == 65)
+ jjstateSet[jjnewStateCnt++] = 6;
+ else if (curChar == 79)
+ jjstateSet[jjnewStateCnt++] = 2;
break;
case 15:
if (curChar == 82)
@@ -471,39 +508,18 @@
jjstateSet[jjnewStateCnt++] = 16;
break;
case 2:
- if ((0x47ffffffc7ffffffL & l) != 0L)
+ if ((0x7fffffe87fffffeL & l) != 0L)
{
if (kind > 44)
kind = 44;
- jjCheckNAddStates(0, 2);
+ jjCheckNAdd(46);
}
- else if (curChar == 92)
- jjCheckNAddStates(8, 25);
if (curChar == 82)
{
if (kind > 33)
kind = 33;
}
break;
- case 1:
- if ((0x47ffffffc7ffffffL & l) != 0L)
- {
- if (kind > 44)
- kind = 44;
- jjCheckNAddStates(0, 2);
- }
- else if (curChar == 92)
- jjCheckNAddStates(8, 25);
- else if (curChar == 124)
- {
- if (kind > 33)
- kind = 33;
- }
- if (curChar == 65)
- jjstateSet[jjnewStateCnt++] = 6;
- else if (curChar == 79)
- jjstateSet[jjnewStateCnt++] = 2;
- break;
case 3:
if (curChar == 79)
jjstateSet[jjnewStateCnt++] = 2;
@@ -521,7 +537,7 @@
jjstateSet[jjnewStateCnt++] = 6;
break;
case 8:
- if ((0x47ffffffc7ffffffL & l) == 0L)
+ if ((0x40000001c0000001L & l) == 0L)
break;
if (kind > 44)
kind = 44;
@@ -626,6 +642,20 @@
kind = 44;
jjCheckNAddStates(0, 2);
break;
+ case 45:
+ if ((0x7fffffe87fffffeL & l) == 0L)
+ break;
+ if (kind > 44)
+ kind = 44;
+ jjCheckNAdd(46);
+ break;
+ case 46:
+ if ((0x7fffffe87fffffeL & l) == 0L)
+ break;
+ if (kind > 44)
+ kind = 44;
+ jjCheckNAdd(46);
+ break;
default : break;
}
} while(i != startsAt);
@@ -641,27 +671,41 @@
{
switch(jjstateSet[--i])
{
- case 45:
- case 8:
- if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
+ case 1:
+ if (jjCanMove_0(hiByte, i1, i2, l1, l2))
+ {
+ if (kind > 44)
+ kind = 44;
+ jjCheckNAddStates(0, 2);
+ }
+ if (jjCanMove_1(hiByte, i1, i2, l1, l2))
+ {
+ if (kind > 44)
+ kind = 44;
+ jjCheckNAdd(46);
+ }
+ break;
+ case 2:
+ case 46:
+ if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
break;
if (kind > 44)
kind = 44;
- jjCheckNAddStates(0, 2);
+ jjCheckNAdd(46);
break;
- case 2:
+ case 8:
if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
break;
if (kind > 44)
kind = 44;
jjCheckNAddStates(0, 2);
break;
- case 1:
- if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
+ case 45:
+ if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
break;
if (kind > 44)
kind = 44;
- jjCheckNAddStates(0, 2);
+ jjCheckNAdd(46);
break;
default : break;
}
@@ -674,7 +718,7 @@
kind = 0x7fffffff;
}
++curPos;
- if ((i = jjnewStateCnt) == (startsAt = 45 - (jjnewStateCnt = startsAt)))
+ if ((i = jjnewStateCnt) == (startsAt = 47 - (jjnewStateCnt = startsAt)))
return curPos;
try { curChar = input_stream.readChar(); }
catch(java.io.IOException e) { return curPos; }
@@ -762,10 +806,10 @@
}
return jjStartNfa_1(0, active0);
}
-static final long[] jjbitVec1 = {
+static final long[] jjbitVec9 = {
0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL,
0xffffffffffffffffL
};
-static final long[] jjbitVec3 = {
+static final long[] jjbitVec10 = {
0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL
};
private final int jjMoveNfa_1(int startState, int curPos)
@@ -867,7 +911,7 @@
switch(jjstateSet[--i])
{
case 5:
- if (jjCanMove_1(hiByte, i1, i2, l1, l2) && kind > 16)
+ if (jjCanMove_2(hiByte, i1, i2, l1, l2) && kind > 16)
kind = 16;
break;
default : break;
@@ -897,6 +941,8 @@
{
case 0:
return ((jjbitVec0[i2] & l2) != 0L);
+ case 32:
+ return ((jjbitVec1[i2] & l2) != 0L);
default :
return false;
}
@@ -906,26 +952,46 @@
switch(hiByte)
{
case 0:
- return ((jjbitVec3[i2] & l2) != 0L);
+ return ((jjbitVec4[i2] & l2) != 0L);
+ case 48:
+ return ((jjbitVec5[i2] & l2) != 0L);
+ case 49:
+ return ((jjbitVec6[i2] & l2) != 0L);
+ case 51:
+ return ((jjbitVec7[i2] & l2) != 0L);
+ case 61:
+ return ((jjbitVec8[i2] & l2) != 0L);
default :
- if ((jjbitVec1[i1] & l1) != 0L)
+ if ((jjbitVec2[i1] & l1) != 0L)
return true;
return false;
}
}
+private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1,
long l2)
+{
+ switch(hiByte)
+ {
+ case 0:
+ return ((jjbitVec10[i2] & l2) != 0L);
+ default :
+ if ((jjbitVec9[i1] & l1) != 0L)
+ return true;
+ return false;
+ }
+}
public static final String[] jjstrLiteralImages = {
"", null, null, null, null, null, null, null, null, null, null, null, null,
null, null, null, null, null, null, null, null, "\74\75", "\76\75", "\74",
"\76",
"\173", "\175", "\50", "\51", "\56", "\75", "\72", "\54", null, null,
"\115\111\116\125\123", "\53", "\77", "\111\116", "\55", "\117\126\105\122",
"\133", "\135",
-"\122\105\107\105\130", null, };
+"\122\105\107\105\130", null, null, null, };
public static final String[] lexStateNames = {
"DEFAULT",
"IN_STRING",
};
public static final int[] jjnewLexState = {
-1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1,
-1, -1, -1, -1, -1, -1,
- -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1,
+ -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1,
-1, -1, -1,
};
static final long[] jjtoToken = {
0x1fffffe60001L,
@@ -937,8 +1003,8 @@
0x1ffc0L,
};
protected SimpleCharStream input_stream;
-private final int[] jjrounds = new int[45];
-private final int[] jjstateSet = new int[90];
+private final int[] jjrounds = new int[47];
+private final int[] jjstateSet = new int[94];
StringBuffer image;
int jjimageLen;
int lengthOfMatch;
@@ -963,7 +1029,7 @@
{
int i;
jjround = 0x80000001;
- for (i = 45; i-- > 0;)
+ for (i = 47; i-- > 0;)
jjrounds[i] = 0x80000000;
}
public void ReInit(SimpleCharStream stream, int lexState)
This was sent by the SourceForge.net collaborative development platform, the
world's largest Open Source development site.
------------------------------------------------------------------------------
Transform Data into Opportunity.
Accelerate data analysis in your applications with
Intel Data Analytics Acceleration Library.
Click to learn more.
http://pubads.g.doubleclick.net/gampad/clk?id=278785231&iu=/4140
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs