Ok have a fix.

Its not perfect though. I think there is a small design flaw with way 
reluctant matches are handled which makes a proper fix very difficult.

Have made sure that programs get dumped on test failures and also for 
interactive tests. ( a small helper script is attached which if put into the 
build directory can be used to run RETest.)

Fixed a bug that caused various failures when the same compiler was used for 
multiple compilations. Did a bit of cruising past the end of its program 
chaining all sorts of goodies together. (Usually did not affect the program 
but occasionally caused the compiler to get ArrayIndexOutOfBounds).

Michael

? Clustering.patch
? Clustering-v2.patch
? RECompiler.java
? RETest.java
? Reluctant.patch
? build/run-tests.sh
? docs/RETest2.txt
? docs/RETest3.txt
Index: build/build-regexp.xml
===================================================================
RCS file: /home/cvspublic/jakarta-regexp/build/build-regexp.xml,v
retrieving revision 1.6
diff -u -r1.6 build-regexp.xml
--- build/build-regexp.xml      2001/02/11 23:04:21     1.6
+++ build/build-regexp.xml      2001/02/18 12:44:26
@@ -8,6 +8,7 @@
     <!-- =================================================================== -->
     <!-- Initializes some variables                                          -->
     <!-- =================================================================== -->
+    <property file="${user.home}/.jakarta-regexp.properties"/>
     <property name="ant.home" value="."/>
     <property name="Name" value="Jakarta-Regexp"/>
     <property name="year" value="2001"/>
Index: docs/RETest.txt
===================================================================
RCS file: /home/cvspublic/jakarta-regexp/docs/RETest.txt,v
retrieving revision 1.2
diff -u -r1.2 RETest.txt
--- docs/RETest.txt     2001/02/11 23:04:21     1.2
+++ docs/RETest.txt     2001/02/18 12:44:27
@@ -978,3 +978,37 @@
 www.test.com
 YES
 www.test.com
+
+#163
+abc.*?x+yz
+abcaaaaaxyzbbbbxyz
+YES
+abcaaaaaxyz
+
+#164
+abc.+?x+yz
+abcaaaaaxyzbbbbxyz
+YES
+abcaaaaaxyz
+
+#165
+a.+?(c|d)
+aaaacaaaaad
+YES
+aaaac
+c
+
+#166
+a.+(c|d)
+aaaacaaaaad
+YES
+aaaacaaaaad
+d
+
+#167
+a+?b+?c+?
+aaabccaaabbbccc
+YES
+aaabc
+
+
Index: src/java/org/apache/regexp/RECompiler.java
===================================================================
RCS file: /home/cvspublic/jakarta-regexp/src/java/org/apache/regexp/RECompiler.java,v
retrieving revision 1.3
diff -u -r1.3 RECompiler.java
--- src/java/org/apache/regexp/RECompiler.java  2001/02/11 23:04:22     1.3
+++ src/java/org/apache/regexp/RECompiler.java  2001/02/18 12:44:32
@@ -71,6 +71,7 @@
  * @see recompile
  *
  * @author <a href="mailto:[EMAIL PROTECTED]">Jonathan Locke</a>
+ * @author <a href="mailto:[EMAIL PROTECTED]">Michael McCallum</a>
  * @version $Id: RECompiler.java,v 1.3 2001/02/11 23:04:22 jon Exp $
  */
 public class RECompiler
@@ -203,14 +204,29 @@
     void setNextOfEnd(int node, int pointTo)
     {
         // Traverse the chain until the next offset is 0
-        int next;
-        while ((next = instruction[node + RE.offsetNext]) != 0)
-        {
+        int next = instruction[node + RE.offsetNext];
+        // while the 'node' is not the last in the chain
+        // and the 'node' is not the last in the program.
+        while ( next != 0 && node < lenInstruction )
+        {
+            // if the node we are supposed to point to is in the chain then
+            // point to the end of the program instead.
+            // Michael McCallum <[EMAIL PROTECTED]>
+            // FIXME: // This is a _hack_ to stop infinite programs.
+            // I believe that the implementation of the reluctant matches is wrong but
+            // have not worked out a better way yet.
+            if ( node == pointTo ) {
+              pointTo = lenInstruction;
+            }
             node += next;
+            next = instruction[node + RE.offsetNext];
         }
-
-        // Point the last node in the chain to pointTo.
-        instruction[node + RE.offsetNext] = (char)(short)(pointTo - node);
+        // if we have reached the end of the program then dont set the pointTo.
+        // im not sure if this will break any thing but passes all the tests.
+        if ( node < lenInstruction ) {
+            // Point the last node in the chain to pointTo.
+            instruction[node + RE.offsetNext] = (char)(short)(pointTo - node);
+        }
     }
 
     /**
@@ -1258,13 +1274,18 @@
         setNextOfEnd(ret, end);
 
         // Hook the ends of each branch to the end node
-        for (int next = -1, i = ret; next != 0; next = instruction[i + 
RE.offsetNext], i += next)
+        int currentNode = ret;
+        int nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
+        // while the next node o
+        while ( nextNodeOffset != 0 && currentNode < lenInstruction )
         {
             // If branch, make the end of the branch's operand chain point to the end 
node.
-            if (instruction[i + RE.offsetOpcode] == RE.OP_BRANCH)
+            if ( instruction[ currentNode + RE.offsetOpcode ] == RE.OP_BRANCH )
             {
-                setNextOfEnd(i + RE.nodeSize, end);
+                setNextOfEnd( currentNode + RE.nodeSize, end );
             }
+            nextNodeOffset = instruction[ currentNode + RE.offsetNext ];
+            currentNode += nextNodeOffset;
         }
 
         // Return the node list
Index: src/java/org/apache/regexp/REDebugCompiler.java
===================================================================
RCS file: 
/home/cvspublic/jakarta-regexp/src/java/org/apache/regexp/REDebugCompiler.java,v
retrieving revision 1.1
diff -u -r1.1 REDebugCompiler.java
--- src/java/org/apache/regexp/REDebugCompiler.java     2000/04/27 01:22:33     1.1
+++ src/java/org/apache/regexp/REDebugCompiler.java     2001/02/18 12:44:33
@@ -95,6 +95,8 @@
         hashOpcode.put(new Integer(RE.OP_CLOSE),            "OP_CLOSE");
         hashOpcode.put(new Integer(RE.OP_BACKREF),          "OP_BACKREF");
         hashOpcode.put(new Integer(RE.OP_POSIXCLASS),       "OP_POSIXCLASS");
+        hashOpcode.put(new Integer(RE.OP_OPEN_CLUSTER),      "OP_OPEN_CLUSTER");
+        hashOpcode.put(new Integer(RE.OP_CLOSE_CLUSTER),      "OP_CLOSE_CLUSTER");
     }
 
     /**
@@ -146,6 +148,38 @@
         // Return opcode as a string and opdata value
         return opcodeToString(opcode) + ", opdata = " + opdata;
     }
+
+    /**
+     * Inserts a node with a given opcode and opdata at insertAt.  The node relative 
+next
+     * pointer is initialized to 0.
+     * @param opcode Opcode for new node
+     * @param opdata Opdata for new node (only the low 16 bits are currently used)
+     * @param insertAt Index at which to insert the new node in the program * /
+    void nodeInsert(char opcode, int opdata, int insertAt) {
+        System.out.println( "====> " + opcode + " " + opdata + " " + insertAt );
+        PrintWriter writer = new PrintWriter( System.out );
+        dumpProgram( writer );
+        super.nodeInsert( opcode, opdata, insertAt );
+        System.out.println( "====< " );
+        dumpProgram( writer );
+        writer.flush();
+    }/**/
+
+
+    /**
+    * Appends a node to the end of a node chain
+    * @param node Start of node chain to traverse
+    * @param pointTo Node to have the tail of the chain point to * /
+    void setNextOfEnd(int node, int pointTo) {
+        System.out.println( "====> " + node + " " + pointTo );
+        PrintWriter writer = new PrintWriter( System.out );
+        dumpProgram( writer );
+        super.setNextOfEnd( node, pointTo );
+        System.out.println( "====< " );
+        dumpProgram( writer );
+        writer.flush();
+    }/**/
+
 
     /**
      * Dumps the current program to a PrintWriter
Index: src/java/org/apache/regexp/RETest.java
===================================================================
RCS file: /home/cvspublic/jakarta-regexp/src/java/org/apache/regexp/RETest.java,v
retrieving revision 1.3
diff -u -r1.3 RETest.java
--- src/java/org/apache/regexp/RETest.java      2001/02/11 23:04:22     1.3
+++ src/java/org/apache/regexp/RETest.java      2001/02/18 12:44:35
@@ -65,6 +65,7 @@
  *
  * @author <a href="mailto:[EMAIL PROTECTED]">Jonathan Locke</a>
  * @author <a href="mailto:[EMAIL PROTECTED]">Jon S. Stevens</a>
+ * @author <a href="mailto:[EMAIL PROTECTED]">Michael McCallum</a>
  * @version $Id: RETest.java,v 1.3 2001/02/11 23:04:22 jon Exp $
  */
 public class RETest
@@ -144,10 +145,13 @@
             say("\n" + expr + "\n");
 
             // Show program for compiled expression
-            compiler.dumpProgram(new PrintWriter(System.out));
+            PrintWriter writer = new PrintWriter( System.out );
+            compiler.dumpProgram( writer );
+            writer.flush();
 
+            boolean running = true;
             // Test matching against compiled expression
-            while (true)
+            while ( running )
             {
                 // Read from keyboard
                 BufferedReader br = new BufferedReader(new 
InputStreamReader(System.in));
@@ -155,18 +159,26 @@
                 System.out.flush();
                 String match = br.readLine();
 
-                // Try a match against the keyboard input
-                if (r.match(match))
+                if ( match != null )
                 {
-                    say("Match successful.");
+                    // Try a match against the keyboard input
+                    if (r.match(match))
+                    {
+                        say("Match successful.");
+                    }
+                    else
+                    {
+                        say("Match failed.");
+                    }
+
+                    // Show subparen registers
+                    showParens(r);
                 }
                 else
                 {
-                    say("Match failed.");
+                    running = false;
+                    System.out.println();
                 }
-
-                // Show subparen registers
-                showParens(r);
             }
         }
         catch (Exception e)
@@ -187,8 +199,9 @@
     }
 
     /**
-     * Fail with an error
-     * @param s Failure description
+    * Fail with an error.
+    * Will print a big failure message to System.out.
+    * @param s Failure description
     */
     void fail(String s)
     {
@@ -199,8 +212,11 @@
         say("*******************************************************");
         say("\n");
         say(s);
-        say("");        
-        compiler.dumpProgram(new PrintWriter(System.out));
+        say("");
+        // make sure the writer gets flushed.
+        PrintWriter writer = new PrintWriter( System.out );
+        compiler.dumpProgram( writer );
+        writer.flush();
         say("\n");
     }
 
@@ -371,7 +387,9 @@
                     }
 
                     // Wasn't supposed to be an error
-                    fail("Produces the unexpected error \"" + e.getMessage() + "\"");
+                    String message = e.getMessage() == null ? e.toString() : 
+e.getMessage();
+                    fail("Produces an unexpected exception \"" + message + "\"");
+                    e.printStackTrace();
                 }
                 catch (Error e)
                 {
Index: xdocs/RETest.txt
===================================================================
RCS file: /home/cvspublic/jakarta-regexp/xdocs/RETest.txt,v
retrieving revision 1.2
diff -u -r1.2 RETest.txt
--- xdocs/RETest.txt    2001/02/11 23:04:23     1.2
+++ xdocs/RETest.txt    2001/02/18 12:44:36
@@ -978,3 +978,35 @@
 www.test.com
 YES
 www.test.com
+
+#163
+abc.*?x+yz
+abcaaaaaxyzbbbbxyz
+YES
+abcaaaaaxyz
+
+#164
+abc.+?x+yz
+abcaaaaaxyzbbbbxyz
+YES
+abcaaaaaxyz
+
+#165
+a.+?(c|d)
+aaaacaaaaad
+YES
+aaaac
+c
+
+#166
+a.+(c|d)
+aaaacaaaaad
+YES
+aaaacaaaaad
+d
+
+#167
+a+?b+?c+?
+aaabccaaabbbccc
+YES
+aaabc

run-tests.sh

Reply via email to