http://nagoya.apache.org/bugzilla/show_bug.cgi?id=1884
*** shadow/1884 Tue Jun 12 19:01:32 2001
--- shadow/1884.tmp.23797 Tue Jul 10 05:43:26 2001
***************
*** 2,9 ****
| PatternMatcherInput(String, int, int) has several problems with Awk on ORO |
+----------------------------------------------------------------------------+
| Bug #: 1884 Product: ORO |
! | Status: ASSIGNED Version: Unknown |
! | Resolution: Platform: All |
| Severity: Normal OS/Version: All |
| Priority: Low Component: Main |
+----------------------------------------------------------------------------+
--- 2,9 ----
| PatternMatcherInput(String, int, int) has several problems with Awk on ORO |
+----------------------------------------------------------------------------+
| Bug #: 1884 Product: ORO |
! | Status: RESOLVED Version: Unknown |
! | Resolution: FIXED Platform: All |
| Severity: Normal OS/Version: All |
| Priority: Low Component: Main |
+----------------------------------------------------------------------------+
***************
*** 84,87 ****
This bug was likely caused when the last changes to PatternMatcherInput were
made. Even though Perl5Matcher was updated, AwkMatcher was probably not.
The bug is being assigned with the intent of working on it over the weekend
! of 06/16.
--- 84,340 ----
This bug was likely caused when the last changes to PatternMatcherInput were
made. Even though Perl5Matcher was updated, AwkMatcher was probably not.
The bug is being assigned with the intent of working on it over the weekend
! of 06/16.
!
! ------- Additional Comments From [EMAIL PROTECTED] 2001-07-10 05:43 -------
! Fixed the problem. Requesting that the original bug reporter verify the fix
! before closing the bug report. For the benefit of the bug reporter, I am
! including the diff.
!
! dfs 01/07/10 05:39:23
!
! Modified: src/java/org/apache/oro/text/awk AwkMatcher.java
! Log:
! Fixed the following bug reported by [EMAIL PROTECTED]:
!
! Using PatternMatcherInput(String input, int begin, int length) with Awk
! requires length = length_of_substring + begin instead of
! length_of_substring as the documentation indicates. Also, MatchResult
! beginOffset(int) and endOffset(int) return offsets from -begin instead
! of zero. There is no problem with Perl5.
!
! The fix is klugey and indicative of a need to redesign and reimplement
! the AwkMatcher input representation and traversal system.
!
! PR: 1884
!
! Revision Changes Path
! 1.6 +45 -31
! jakarta-oro/src/java/org/apache/oro/text/awk/AwkMatcher.java
!
! Index: AwkMatcher.java
! ===================================================================
! RCS file:
! /home/cvs/jakarta-oro/src/java/org/apache/oro/text/awk/AwkMatcher.java,v
! retrieving revision 1.5
! retrieving revision 1.6
! diff -u -r1.5 -r1.6
! --- AwkMatcher.java 2001/05/20 23:55:21 1.5
! +++ AwkMatcher.java 2001/07/10 12:39:18 1.6
! @@ -58,7 +58,7 @@
! */
!
! /*
! - * $Id: AwkMatcher.java,v 1.5 2001/05/20 23:55:21 dfs Exp $
! + * $Id: AwkMatcher.java,v 1.6 2001/07/10 12:39:18 dfs Exp $
! */
! import java.io.*;
!
! @@ -90,6 +90,15 @@
! private AwkPattern __awkPattern;
! private int __offsets[] = new int[2];
!
! + /**
! + * A kluge variable to make PatternMatcherInput matches work when
! + * their begin offset is non-zero. This kluge is caused by the
! + * misguided notion that AwkStreamInput could be overloaded to do
! + * both stream and fixed buffer matches. The whole input representation
! + * scheme has to be scrapped and redone. -- dfs 2001/07/10
! + */
! + private int __beginOffset;
! +
! public AwkMatcher() {
! __scratchBuffer = new AwkStreamInput();
! __scratchBuffer._endOfStreamReached = true;
! @@ -120,7 +129,7 @@
!
! __scratchBuffer._buffer = input;
! __scratchBuffer._bufferSize = input.length;
! - __scratchBuffer._bufferOffset = 0;
! + __scratchBuffer._bufferOffset = __beginOffset = 0;
! __scratchBuffer._endOfStreamReached = true;
! __streamSearchBuffer = __scratchBuffer;
! __offsets[0] = offset;
! @@ -203,7 +212,7 @@
!
! __awkPattern = (AwkPattern)pattern;
! __scratchBuffer._buffer = input.getBuffer();
! - __scratchBuffer._bufferOffset = input.getBeginOffset();
! + __scratchBuffer._bufferOffset = __beginOffset = input.getBeginOffset();
! __offsets[0] = input.getCurrentOffset();
!
! __scratchBuffer._bufferSize = input.length();
! @@ -254,7 +263,7 @@
! __awkPattern = (AwkPattern)pattern;
! __scratchBuffer._buffer = input;
! __scratchBuffer._bufferSize = input.length;
! - __scratchBuffer._bufferOffset = 0;
! + __scratchBuffer._bufferOffset = __beginOffset = 0;
! __scratchBuffer._endOfStreamReached = true;
! __streamSearchBuffer = __scratchBuffer;
! __offsets[0] = 0;
! @@ -331,7 +340,7 @@
! __awkPattern = (AwkPattern)pattern;
! __scratchBuffer._buffer = input.getBuffer();
! __scratchBuffer._bufferSize = input.length();
! - __scratchBuffer._bufferOffset = input.getBeginOffset();
! + __scratchBuffer._bufferOffset = __beginOffset = input.getBeginOffset();
! __offsets[0] = input.getBeginOffset();
! __scratchBuffer._endOfStreamReached = true;
! __streamSearchBuffer = __scratchBuffer;
! @@ -391,7 +400,7 @@
!
! __scratchBuffer._buffer = input;
! __scratchBuffer._bufferSize = input.length;
! - __scratchBuffer._bufferOffset = 0;
! + __scratchBuffer._bufferOffset = __beginOffset = 0;
! __scratchBuffer._endOfStreamReached = true;
! __streamSearchBuffer = __scratchBuffer;
! __lastMatchedBufferOffset = 0;
! @@ -499,23 +508,20 @@
! public boolean contains(PatternMatcherInput input, Pattern pattern) {
! __awkPattern = (AwkPattern)pattern;
! __scratchBuffer._buffer = input.getBuffer();
! - __scratchBuffer._bufferOffset = input.getBeginOffset();
! + __scratchBuffer._bufferOffset = __beginOffset = input.getBeginOffset();
! __lastMatchedBufferOffset = input.getCurrentOffset();
!
! // Begin anchor requires match occur at beginning of input
! // No need to adjust current offset if no match found.
! if(__awkPattern._hasBeginAnchor) {
! - int begin;
! -
! - begin = input.getBeginOffset();
! - if(begin != __lastMatchedBufferOffset ||
! - !__awkPattern._fastMap[__scratchBuffer._buffer[begin]]) {
! + if(__beginOffset != __lastMatchedBufferOffset ||
! + !__awkPattern._fastMap[__scratchBuffer._buffer[__beginOffset]]) {
! __lastMatchResult = null;
! return false;
! }
! }
!
! - __scratchBuffer._bufferSize = input.length();
! + __scratchBuffer._bufferSize = input.length();
! __scratchBuffer._endOfStreamReached = true;
! __streamSearchBuffer = __scratchBuffer;
! try {
! @@ -612,6 +618,7 @@
!
! __lastMatchedBufferOffset = input._currentOffset;
! __streamSearchBuffer = input;
! + __beginOffset = 0;
! _search();
! input._currentOffset = __lastMatchedBufferOffset;
! return (__lastMatchResult != null);
! @@ -620,13 +627,15 @@
!
! private int __streamMatchPrefix() throws IOException {
! int token, current = AwkPattern._START_STATE, lastState, transition;
! - int offset, initialOffset;
! + int offset, initialOffset, maxOffset;
! int lastMatchedOffset = -1;
! int[] tstateArray;
!
! offset = initialOffset = __offsets[0];
! + maxOffset = __streamSearchBuffer._bufferSize + __beginOffset;
! +
! test:
! - while(offset < __streamSearchBuffer._bufferSize) {
! + while(offset < maxOffset) {
! token = __streamSearchBuffer._buffer[offset++];
!
! if(current < __awkPattern._numStates) {
! @@ -638,25 +647,27 @@
! __awkPattern._createNewState(lastState, token, tstateArray);
! current = tstateArray[token];
! }
! +
! if(current == AwkPattern._INVALID_STATE){
! break test;
! - }
! - else if(__awkPattern._endStates.get(current)){
! + } else if(__awkPattern._endStates.get(current)){
! lastMatchedOffset = offset;
! }
! - if(offset == __streamSearchBuffer._bufferSize){
! - offset = __streamSearchBuffer._reallocate(initialOffset);
!
! + if(offset == maxOffset){
! + offset =
! + __streamSearchBuffer._reallocate(initialOffset) + __beginOffset;
! +
! + maxOffset = __streamSearchBuffer._bufferSize + __beginOffset;
! +
! // If we're at the end of the stream, don't reset values
! - if(offset != __streamSearchBuffer._bufferSize){
! + if(offset != maxOffset){
! if(lastMatchedOffset != -1)
! lastMatchedOffset-=initialOffset;
! initialOffset = 0;
! }
! -
! }
! - }
! - else
! + } else
! break;
! }
!
! @@ -669,7 +680,7 @@
! // End anchor requires match occur at end of input
! if(__awkPattern._hasEndAnchor &&
! (!__streamSearchBuffer._endOfStreamReached ||
! - lastMatchedOffset < __streamSearchBuffer._bufferSize))
! + lastMatchedOffset < __streamSearchBuffer._bufferSize + __beginOffset))
! return -1;
!
! return (lastMatchedOffset - initialOffset);
! @@ -685,7 +696,8 @@
! __lastMatchResult = null;
!
! while(true){
! - if(__lastMatchedBufferOffset >= __streamSearchBuffer._bufferSize){
! + if(__lastMatchedBufferOffset >=
! + __streamSearchBuffer._bufferSize + __beginOffset) {
! if(__streamSearchBuffer._endOfStreamReached){
! // Get rid of reference now that it should no longer be used.
! __streamSearchBuffer = null;
! @@ -697,8 +709,8 @@
! }
! }
!
! - for(position=__lastMatchedBufferOffset;
! - position < __streamSearchBuffer._bufferSize;
! + for(position = __lastMatchedBufferOffset;
! + position < __streamSearchBuffer._bufferSize + __beginOffset;
! position = __offsets[0] + 1) {
!
! __offsets[0] = position;
! @@ -707,16 +719,14 @@
!
! __lastMatchResult = new AwkMatchResult(
! new String(__streamSearchBuffer._buffer, __offsets[0],
! - tokensMatched),
! - __offsets[0] + __streamSearchBuffer._bufferOffset);
! + tokensMatched), __offsets[0]);
!
! __lastMatchedBufferOffset =
! (tokensMatched > 0 ? __offsets[1] + 1 : __offsets[0] + 1);
!
! return;
! } else if(__awkPattern._matchesNullString) {
! - __lastMatchResult = new AwkMatchResult(new String(),
! - position + __streamSearchBuffer._bufferOffset);
! + __lastMatchResult = new AwkMatchResult(new String(), position);
!
! __lastMatchedBufferOffset = position + 1;
!
! @@ -741,3 +751,7 @@
! public MatchResult getMatch() { return __lastMatchResult; }
!
! }
! +
! +
! +
! +