Revision: 19149
          http://sourceforge.net/p/gate/code/19149
Author:   ian_roberts
Date:     2016-03-16 13:36:57 +0000 (Wed, 16 Mar 2016)
Log Message:
-----------
Allow more than just ASCII letters and numbers in bare unquoted term queries - 
I've lifted a JavaCC lexer rule for Java identifiers, which allow any sequence 
of Unicode letters and digits as long as it starts with a letter.

Modified Paths:
--------------
    mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.java
    mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.jj
    
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserConstants.java
    
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserTokenManager.java

Modified: 
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.java
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.java  
2016-03-14 15:00:51 UTC (rev 19148)
+++ mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.java  
2016-03-16 13:36:57 UTC (rev 19149)
@@ -1386,8 +1386,8 @@
 
   public ParseException generateParseException() {
     jj_expentries.removeAllElements();
-    boolean[] la1tokens = new boolean[45];
-    for (int i = 0; i < 45; i++) {
+    boolean[] la1tokens = new boolean[47];
+    for (int i = 0; i < 47; i++) {
       la1tokens[i] = false;
     }
     if (jj_kind >= 0) {
@@ -1406,7 +1406,7 @@
         }
       }
     }
-    for (int i = 0; i < 45; i++) {
+    for (int i = 0; i < 47; i++) {
       if (la1tokens[i]) {
         jj_expentry = new int[1];
         jj_expentry[0] = i;

Modified: 
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.jj
===================================================================
--- mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.jj    
2016-03-14 15:00:51 UTC (rev 19148)
+++ mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParser.jj    
2016-03-16 13:36:57 UTC (rev 19149)
@@ -523,7 +523,7 @@
                  | "\"REGEX\"" | "\\-">}
 
 // special characters
-TOKEN:{<#special: "!" | "£" | "$" | "%" | "^" | "*" | "_" | "@" | 
+TOKEN:{<#special: "!" | "£" | "$" | "€" | "¢" | "%" | "^" | "*" | "_" | "@" | 
                   "'" | "~" | "#" | "`" | "¬">}
 
 // reserved characters
@@ -553,11 +553,55 @@
 TOKEN:{<regex: "REGEX">}
 
 // token could be
-// any character from A-Z,a-z,0-9, special characters and escape sequences 
-TOKEN:{<tok: ((["A"-"Z", "a"-"z", "0"-"9"]) | "\\[" | "\\]" | "\\(" 
-              | "\\)" | "\\:" | "\\+" | "\\&" | "\\|" | "\\?" | "\\\\" | "\\." 
-              | "\"AND\"" | "\"OR\"" | "\"IN\"" | "\"OVER\"" | "\\\"" | "\\=" 
-              | "\\{" | "\\}" | "\\<" | "\\>" | "\\," | "\"REGEX\"" | 
<special>)+>}
+// special characters and escape sequences 
+TOKEN:{
+  <tok: (
+      ((  "\\[" | "\\]" | "\\(" 
+        | "\\)" | "\\:" | "\\+" | "\\&" | "\\|" | "\\?" | "\\\\" | "\\." 
+        | "\"AND\"" | "\"OR\"" | "\"IN\"" | "\"OVER\"" | "\\\"" | "\\=" 
+        | "\\{" | "\\}" | "\\<" | "\\>" | "\\," | "\"REGEX\"" | <special>)+)
+        // Java identifier
+        | (<LETTER> (<LETTER>|<DIGIT>)* )
+  )>
+|
+  < #LETTER:
+      [
+       "\u0024",
+       "\u0041"-"\u005a",
+       "\u005f",
+       "\u0061"-"\u007a",
+       "\u00c0"-"\u00d6",
+       "\u00d8"-"\u00f6",
+       "\u00f8"-"\u00ff",
+       "\u0100"-"\u1fff",
+       "\u3040"-"\u318f",
+       "\u3300"-"\u337f",
+       "\u3400"-"\u3d2d",
+       "\u4e00"-"\u9fff",
+       "\uf900"-"\ufaff"
+      ]
+  >
+|
+  < #DIGIT:
+      [
+       "\u0030"-"\u0039",
+       "\u0660"-"\u0669",
+       "\u06f0"-"\u06f9",
+       "\u0966"-"\u096f",
+       "\u09e6"-"\u09ef",
+       "\u0a66"-"\u0a6f",
+       "\u0ae6"-"\u0aef",
+       "\u0b66"-"\u0b6f",
+       "\u0be7"-"\u0bef",
+       "\u0c66"-"\u0c6f",
+       "\u0ce6"-"\u0cef",
+       "\u0d66"-"\u0d6f",
+       "\u0e50"-"\u0e59",
+       "\u0ed0"-"\u0ed9",
+       "\u1040"-"\u1049"
+      ]
+  >
+}
 
 // lexical analyser
 

Modified: 
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserConstants.java
===================================================================
--- 
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserConstants.java
 2016-03-14 15:00:51 UTC (rev 19148)
+++ 
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserConstants.java
 2016-03-16 13:36:57 UTC (rev 19149)
@@ -32,6 +32,8 @@
   int rightsquarebracket = 42;
   int regex = 43;
   int tok = 44;
+  int LETTER = 45;
+  int DIGIT = 46;
 
   int DEFAULT = 0;
   int IN_STRING = 1;
@@ -82,6 +84,8 @@
     "\"]\"",
     "\"REGEX\"",
     "<tok>",
+    "<LETTER>",
+    "<DIGIT>",
   };
 
 }

Modified: 
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserTokenManager.java
===================================================================
--- 
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserTokenManager.java
      2016-03-14 15:00:51 UTC (rev 19148)
+++ 
mimir/trunk/mimir-core/src/gate/mimir/search/query/parser/QueryParserTokenManager.java
      2016-03-16 13:36:57 UTC (rev 19149)
@@ -30,44 +30,44 @@
       case 0:
          if ((active0 & 0x40L) != 0L)
             return 15;
-         if ((active0 & 0x84800000000L) != 0L)
+         if ((active0 & 0x10000000000L) != 0L)
          {
             jjmatchedKind = 44;
-            return 45;
+            return 2;
          }
-         if ((active0 & 0x10000000000L) != 0L)
+         if ((active0 & 0x84800000000L) != 0L)
          {
             jjmatchedKind = 44;
-            return 2;
+            return 46;
          }
          return -1;
       case 1:
+         if ((active0 & 0x4000000000L) != 0L)
+            return 46;
          if ((active0 & 0x90800000000L) != 0L)
          {
             jjmatchedKind = 44;
             jjmatchedPos = 1;
-            return 45;
+            return 46;
          }
-         if ((active0 & 0x4000000000L) != 0L)
-            return 45;
          return -1;
       case 2:
          if ((active0 & 0x90800000000L) != 0L)
          {
             jjmatchedKind = 44;
             jjmatchedPos = 2;
-            return 45;
+            return 46;
          }
          return -1;
       case 3:
+         if ((active0 & 0x10000000000L) != 0L)
+            return 46;
          if ((active0 & 0x80800000000L) != 0L)
          {
             jjmatchedKind = 44;
             jjmatchedPos = 3;
-            return 45;
+            return 46;
          }
-         if ((active0 & 0x10000000000L) != 0L)
-            return 45;
          return -1;
       default :
          return -1;
@@ -169,7 +169,7 @@
          return jjMoveStringLiteralDfa2_0(active0, 0x800000000L);
       case 78:
          if ((active0 & 0x4000000000L) != 0L)
-            return jjStartNfaWithStates_0(1, 38, 45);
+            return jjStartNfaWithStates_0(1, 38, 46);
          break;
       case 86:
          return jjMoveStringLiteralDfa2_0(active0, 0x10000000000L);
@@ -215,7 +215,7 @@
          return jjMoveStringLiteralDfa4_0(active0, 0x80000000000L);
       case 82:
          if ((active0 & 0x10000000000L) != 0L)
-            return jjStartNfaWithStates_0(3, 40, 45);
+            return jjStartNfaWithStates_0(3, 40, 46);
          break;
       case 85:
          return jjMoveStringLiteralDfa4_0(active0, 0x800000000L);
@@ -237,11 +237,11 @@
    {
       case 83:
          if ((active0 & 0x800000000L) != 0L)
-            return jjStartNfaWithStates_0(4, 35, 45);
+            return jjStartNfaWithStates_0(4, 35, 46);
          break;
       case 88:
          if ((active0 & 0x80000000000L) != 0L)
-            return jjStartNfaWithStates_0(4, 43, 45);
+            return jjStartNfaWithStates_0(4, 43, 46);
          break;
       default :
          break;
@@ -279,13 +279,34 @@
    jjCheckNAdd(jjnextStates[start + 1]);
 }
 static final long[] jjbitVec0 = {
-   0x0L, 0x0L, 0x100800000000L, 0x0L
+   0x0L, 0x0L, 0x100c00000000L, 0x0L
 };
+static final long[] jjbitVec1 = {
+   0x0L, 0x0L, 0x100000000000L, 0x0L
+};
+static final long[] jjbitVec2 = {
+   0x1ff00000fffffffeL, 0xffffffffffffc000L, 0xffffffffL, 0x600000000000000L
+};
+static final long[] jjbitVec4 = {
+   0x0L, 0x0L, 0x0L, 0xff7fffffff7fffffL
+};
+static final long[] jjbitVec5 = {
+   0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffffffffffffffL
+};
+static final long[] jjbitVec6 = {
+   0xffffffffffffffffL, 0xffffffffffffffffL, 0xffffL, 0x0L
+};
+static final long[] jjbitVec7 = {
+   0xffffffffffffffffL, 0xffffffffffffffffL, 0x0L, 0x0L
+};
+static final long[] jjbitVec8 = {
+   0x3fffffffffffL, 0x0L, 0x0L, 0x0L
+};
 private final int jjMoveNfa_0(int startState, int curPos)
 {
    int[] nextStates;
    int startsAt = 0;
-   jjnewStateCnt = 45;
+   jjnewStateCnt = 47;
    int i = 1;
    jjstateSet[0] = startState;
    int j, kind = 0x7fffffff;
@@ -300,18 +321,14 @@
          {
             switch(jjstateSet[--i])
             {
-               case 45:
-                  if ((0x3ff04ba00000000L & l) != 0L)
+               case 1:
+                  if ((0x3ff000000000000L & l) != 0L)
                   {
-                     if (kind > 44)
-                        kind = 44;
-                     jjCheckNAddStates(0, 2);
+                     if (kind > 18)
+                        kind = 18;
+                     jjCheckNAdd(0);
                   }
-                  else if (curChar == 34)
-                     jjAddStates(3, 7);
-                  break;
-               case 2:
-                  if ((0x3ff04ba00000000L & l) != 0L)
+                  else if ((0x4ba00000000L & l) != 0L)
                   {
                      if (kind > 44)
                         kind = 44;
@@ -319,28 +336,26 @@
                   }
                   else if (curChar == 34)
                      jjAddStates(3, 7);
-                  break;
-               case 1:
-                  if ((0x3ff04ba00000000L & l) != 0L)
-                  {
-                     if (kind > 44)
-                        kind = 44;
-                     jjCheckNAddStates(0, 2);
-                  }
-                  else if (curChar == 34)
-                     jjAddStates(3, 7);
                   else if (curChar == 38)
                   {
                      if (kind > 34)
                         kind = 34;
                   }
-                  if ((0x3ff000000000000L & l) != 0L)
+                  if (curChar == 36)
                   {
-                     if (kind > 18)
-                        kind = 18;
-                     jjCheckNAdd(0);
+                     if (kind > 44)
+                        kind = 44;
+                     jjCheckNAdd(46);
                   }
                   break;
+               case 2:
+               case 46:
+                  if ((0x3ff001000000000L & l) == 0L)
+                     break;
+                  if (kind > 44)
+                     kind = 44;
+                  jjCheckNAdd(46);
+                  break;
                case 0:
                   if ((0x3ff000000000000L & l) == 0L)
                      break;
@@ -353,7 +368,7 @@
                      kind = 34;
                   break;
                case 8:
-                  if ((0x3ff04ba00000000L & l) == 0L)
+                  if ((0x4ba00000000L & l) == 0L)
                      break;
                   if (kind > 44)
                      kind = 44;
@@ -437,6 +452,13 @@
                   kind = 44;
                   jjCheckNAddStates(0, 2);
                   break;
+               case 45:
+                  if (curChar != 36)
+                     break;
+                  if (kind > 44)
+                     kind = 44;
+                  jjCheckNAdd(46);
+                  break;
                default : break;
             }
          } while(i != startsAt);
@@ -448,15 +470,30 @@
          {
             switch(jjstateSet[--i])
             {
-               case 45:
-                  if ((0x47ffffffc7ffffffL & l) != 0L)
+               case 1:
+                  if ((0x7fffffe87fffffeL & l) != 0L)
                   {
                      if (kind > 44)
                         kind = 44;
-                     jjCheckNAddStates(0, 2);
+                     jjCheckNAdd(46);
                   }
                   else if (curChar == 92)
                      jjCheckNAddStates(8, 25);
+                  else if (curChar == 124)
+                  {
+                     if (kind > 33)
+                        kind = 33;
+                  }
+                  if ((0x40000001c0000001L & l) != 0L)
+                  {
+                     if (kind > 44)
+                        kind = 44;
+                     jjCheckNAddStates(0, 2);
+                  }
+                  else if (curChar == 65)
+                     jjstateSet[jjnewStateCnt++] = 6;
+                  else if (curChar == 79)
+                     jjstateSet[jjnewStateCnt++] = 2;
                   break;
                case 15:
                   if (curChar == 82)
@@ -471,39 +508,18 @@
                      jjstateSet[jjnewStateCnt++] = 16;
                   break;
                case 2:
-                  if ((0x47ffffffc7ffffffL & l) != 0L)
+                  if ((0x7fffffe87fffffeL & l) != 0L)
                   {
                      if (kind > 44)
                         kind = 44;
-                     jjCheckNAddStates(0, 2);
+                     jjCheckNAdd(46);
                   }
-                  else if (curChar == 92)
-                     jjCheckNAddStates(8, 25);
                   if (curChar == 82)
                   {
                      if (kind > 33)
                         kind = 33;
                   }
                   break;
-               case 1:
-                  if ((0x47ffffffc7ffffffL & l) != 0L)
-                  {
-                     if (kind > 44)
-                        kind = 44;
-                     jjCheckNAddStates(0, 2);
-                  }
-                  else if (curChar == 92)
-                     jjCheckNAddStates(8, 25);
-                  else if (curChar == 124)
-                  {
-                     if (kind > 33)
-                        kind = 33;
-                  }
-                  if (curChar == 65)
-                     jjstateSet[jjnewStateCnt++] = 6;
-                  else if (curChar == 79)
-                     jjstateSet[jjnewStateCnt++] = 2;
-                  break;
                case 3:
                   if (curChar == 79)
                      jjstateSet[jjnewStateCnt++] = 2;
@@ -521,7 +537,7 @@
                      jjstateSet[jjnewStateCnt++] = 6;
                   break;
                case 8:
-                  if ((0x47ffffffc7ffffffL & l) == 0L)
+                  if ((0x40000001c0000001L & l) == 0L)
                      break;
                   if (kind > 44)
                      kind = 44;
@@ -626,6 +642,20 @@
                   kind = 44;
                   jjCheckNAddStates(0, 2);
                   break;
+               case 45:
+                  if ((0x7fffffe87fffffeL & l) == 0L)
+                     break;
+                  if (kind > 44)
+                     kind = 44;
+                  jjCheckNAdd(46);
+                  break;
+               case 46:
+                  if ((0x7fffffe87fffffeL & l) == 0L)
+                     break;
+                  if (kind > 44)
+                     kind = 44;
+                  jjCheckNAdd(46);
+                  break;
                default : break;
             }
          } while(i != startsAt);
@@ -641,27 +671,41 @@
          {
             switch(jjstateSet[--i])
             {
-               case 45:
-               case 8:
-                  if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
+               case 1:
+                  if (jjCanMove_0(hiByte, i1, i2, l1, l2))
+                  {
+                     if (kind > 44)
+                        kind = 44;
+                     jjCheckNAddStates(0, 2);
+                  }
+                  if (jjCanMove_1(hiByte, i1, i2, l1, l2))
+                  {
+                     if (kind > 44)
+                        kind = 44;
+                     jjCheckNAdd(46);
+                  }
+                  break;
+               case 2:
+               case 46:
+                  if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
                      break;
                   if (kind > 44)
                      kind = 44;
-                  jjCheckNAddStates(0, 2);
+                  jjCheckNAdd(46);
                   break;
-               case 2:
+               case 8:
                   if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
                      break;
                   if (kind > 44)
                      kind = 44;
                   jjCheckNAddStates(0, 2);
                   break;
-               case 1:
-                  if (!jjCanMove_0(hiByte, i1, i2, l1, l2))
+               case 45:
+                  if (!jjCanMove_1(hiByte, i1, i2, l1, l2))
                      break;
                   if (kind > 44)
                      kind = 44;
-                  jjCheckNAddStates(0, 2);
+                  jjCheckNAdd(46);
                   break;
                default : break;
             }
@@ -674,7 +718,7 @@
          kind = 0x7fffffff;
       }
       ++curPos;
-      if ((i = jjnewStateCnt) == (startsAt = 45 - (jjnewStateCnt = startsAt)))
+      if ((i = jjnewStateCnt) == (startsAt = 47 - (jjnewStateCnt = startsAt)))
          return curPos;
       try { curChar = input_stream.readChar(); }
       catch(java.io.IOException e) { return curPos; }
@@ -762,10 +806,10 @@
    }
    return jjStartNfa_1(0, active0);
 }
-static final long[] jjbitVec1 = {
+static final long[] jjbitVec9 = {
    0xfffffffffffffffeL, 0xffffffffffffffffL, 0xffffffffffffffffL, 
0xffffffffffffffffL
 };
-static final long[] jjbitVec3 = {
+static final long[] jjbitVec10 = {
    0x0L, 0x0L, 0xffffffffffffffffL, 0xffffffffffffffffL
 };
 private final int jjMoveNfa_1(int startState, int curPos)
@@ -867,7 +911,7 @@
             switch(jjstateSet[--i])
             {
                case 5:
-                  if (jjCanMove_1(hiByte, i1, i2, l1, l2) && kind > 16)
+                  if (jjCanMove_2(hiByte, i1, i2, l1, l2) && kind > 16)
                      kind = 16;
                   break;
                default : break;
@@ -897,6 +941,8 @@
    {
       case 0:
          return ((jjbitVec0[i2] & l2) != 0L);
+      case 32:
+         return ((jjbitVec1[i2] & l2) != 0L);
       default : 
          return false;
    }
@@ -906,26 +952,46 @@
    switch(hiByte)
    {
       case 0:
-         return ((jjbitVec3[i2] & l2) != 0L);
+         return ((jjbitVec4[i2] & l2) != 0L);
+      case 48:
+         return ((jjbitVec5[i2] & l2) != 0L);
+      case 49:
+         return ((jjbitVec6[i2] & l2) != 0L);
+      case 51:
+         return ((jjbitVec7[i2] & l2) != 0L);
+      case 61:
+         return ((jjbitVec8[i2] & l2) != 0L);
       default : 
-         if ((jjbitVec1[i1] & l1) != 0L)
+         if ((jjbitVec2[i1] & l1) != 0L)
             return true;
          return false;
    }
 }
+private static final boolean jjCanMove_2(int hiByte, int i1, int i2, long l1, 
long l2)
+{
+   switch(hiByte)
+   {
+      case 0:
+         return ((jjbitVec10[i2] & l2) != 0L);
+      default : 
+         if ((jjbitVec9[i1] & l1) != 0L)
+            return true;
+         return false;
+   }
+}
 public static final String[] jjstrLiteralImages = {
 "", null, null, null, null, null, null, null, null, null, null, null, null, 
 null, null, null, null, null, null, null, null, "\74\75", "\76\75", "\74", 
"\76", 
 "\173", "\175", "\50", "\51", "\56", "\75", "\72", "\54", null, null, 
 "\115\111\116\125\123", "\53", "\77", "\111\116", "\55", "\117\126\105\122", 
"\133", "\135", 
-"\122\105\107\105\130", null, };
+"\122\105\107\105\130", null, null, null, };
 public static final String[] lexStateNames = {
    "DEFAULT", 
    "IN_STRING", 
 };
 public static final int[] jjnewLexState = {
    -1, -1, -1, -1, -1, -1, 1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 0, -1, 
-1, -1, -1, -1, -1, -1, 
-   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
-1, 
+   -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, -1, 
-1, -1, -1, 
 };
 static final long[] jjtoToken = {
    0x1fffffe60001L, 
@@ -937,8 +1003,8 @@
    0x1ffc0L, 
 };
 protected SimpleCharStream input_stream;
-private final int[] jjrounds = new int[45];
-private final int[] jjstateSet = new int[90];
+private final int[] jjrounds = new int[47];
+private final int[] jjstateSet = new int[94];
 StringBuffer image;
 int jjimageLen;
 int lengthOfMatch;
@@ -963,7 +1029,7 @@
 {
    int i;
    jjround = 0x80000001;
-   for (i = 45; i-- > 0;)
+   for (i = 47; i-- > 0;)
       jjrounds[i] = 0x80000000;
 }
 public void ReInit(SimpleCharStream stream, int lexState)

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Transform Data into Opportunity.
Accelerate data analysis in your applications with
Intel Data Analytics Acceleration Library.
Click to learn more.
http://pubads.g.doubleclick.net/gampad/clk?id=278785231&iu=/4140
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to