mmidy 2002/08/19 15:44:14
Modified: java/src/org/apache/xalan/templates Tag: xslt20
ElemAnalyzeString.java
java/src/org/apache/xpath/functions Tag: xslt20
FuncReplace.java FuncTokenize.java
java/src/org/apache/xpath/parser/regexp Tag: xslt20
BMPattern.java Token.java
Log:
Fix a couple bugs in regular expressions
Revision Changes Path
No revision
No revision
1.1.2.2 +46 -14
xml-xalan/java/src/org/apache/xalan/templates/Attic/ElemAnalyzeString.java
Index: ElemAnalyzeString.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xalan/templates/Attic/ElemAnalyzeString.java,v
retrieving revision 1.1.2.1
retrieving revision 1.1.2.2
diff -u -r1.1.2.1 -r1.1.2.2
--- ElemAnalyzeString.java 16 Aug 2002 21:23:47 -0000 1.1.2.1
+++ ElemAnalyzeString.java 19 Aug 2002 22:44:13 -0000 1.1.2.2
@@ -323,43 +323,75 @@
String selectResult = m_selectExpression.execute(xctxt).str();
XSequenceImpl matchSeq = new XSequenceImpl();
- XSequenceImpl noMatchSeq = new XSequenceImpl();;
-
-
+ XSequenceImpl noMatchSeq = new XSequenceImpl();
+
RegularExpression regexp = new RegularExpression(regexValue, flagsValue);
- int index = 0;
+ int groups = regexp.getNumberOfGroups();
+ Token tokenTree = regexp.getTokenTree();
+ Token child;
+ int index = 0;
int length = selectResult.length();
- int i=0, j=0;
+ int i=0, j=0, t=0;
while (index < length)
{
- int[] range = regexp.matchString(selectResult, index, length);
- int start = range[0];
- int end = range[1];
- if (end >0)
+ if(tokenTree.size() > 0)
{
- matchSeq.insertItemAt(new XString(selectResult.substring(start,
end)), i++);
- noMatchSeq.insertItemAt(new XString(selectResult.substring(index,
start)), j++);
+ for(i=0; i<tokenTree.size(); i++)
+ {
+ child = tokenTree.getChild(i);
+ regexp.compileToken(child);
+ int[] range = regexp.matchString(selectResult, index,
length);
+ int start = range[0];
+ int end = range[1];
+ if (end >=0)
+ {
+ if (child.getType() == Token.PAREN)
+ matchSeq.insertItemAt(new XString(selectResult.substring(start,
end)), t++);
+ noMatchSeq.insertItemAt(new
XString(selectResult.substring(index, start)), j++);
index = end;
+ }
+ else
+ {
+ noMatchSeq.insertItemAt(new XString(selectResult.substring(index)),
j++);
+ index = length;
+ }
+ }
}
else
{
+ int[] range = regexp.matchString(selectResult, index, length);
+ int start = range[0];
+ int end = range[1];
+ if (end >=0)
+ {
+ matchSeq.insertItemAt(new XString(selectResult.substring(start,
end)), i++);
+ noMatchSeq.insertItemAt(new
XString(selectResult.substring(index, start)), j++);
+ index = end;
+ }
+ else
+ {
noMatchSeq.insertItemAt(new XString(selectResult.substring(index)),
j++);
index = length;
- }
- if (m_nonMatchingSubstring != null)
+ }
+ }
+
+
+
+ if (m_nonMatchingSubstring != null)
{
m_nonMatchingSubstring.setRegexGroup(noMatchSeq);
xctxt.setSAXLocator(m_nonMatchingSubstring);
transformer.setCurrentElement(m_nonMatchingSubstring);
m_nonMatchingSubstring.execute(transformer);
}
- if (m_matchingSubstring != null)
+ if (m_matchingSubstring != null && matchSeq.getLength()>0)
{
m_matchingSubstring.setRegexGroup(matchSeq);
xctxt.setSAXLocator(m_matchingSubstring);
transformer.setCurrentElement(m_matchingSubstring);
m_matchingSubstring.execute(transformer);
}
+
}
}
finally
No revision
No revision
1.1.2.2 +9 -6
xml-xalan/java/src/org/apache/xpath/functions/Attic/FuncReplace.java
Index: FuncReplace.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xpath/functions/Attic/FuncReplace.java,v
retrieving revision 1.1.2.1
retrieving revision 1.1.2.2
diff -u -r1.1.2.1 -r1.1.2.2
--- FuncReplace.java 16 Aug 2002 21:23:47 -0000 1.1.2.1
+++ FuncReplace.java 19 Aug 2002 22:44:13 -0000 1.1.2.2
@@ -87,7 +87,7 @@
String replace = m_arg2.execute(xctxt).str();
String flags = "";
if (m_args != null)
- flags = m_args[3].execute(xctxt).str();
+ flags = m_args[0].execute(xctxt).str();
RegularExpression regex = new RegularExpression(pattern, flags);
String outString = "";
@@ -104,6 +104,7 @@
while (index < length)
{
String[] s = new String[tokenTree.size()];
+ int t=0;
for(int i=0; i<tokenTree.size(); i++)
{
child = tokenTree.getChild(i);
@@ -111,15 +112,16 @@
int[] range = regex.matchString(input, index, length);
int start = range[0];
int end = range[1];
- if (end >0)
+ if (end >= 0)
{
- s[i] = input.substring(start, end);
+ if (child.getType() == Token.PAREN)
+ s[t++] = input.substring(start, end);
outString = outString + input.substring(index, start);
index = end;
}
else
{
- s[i] = "";
+ s[t++] = "";
outString = outString + input.substring(index);
index = length;
}
@@ -141,7 +143,8 @@
}
else
{
- repVars = repVars +
s[Integer.parseInt(String.valueOf(replace.charAt(indexVar+1)))];
+ // need to account for the fact that our array starts
at 0
+ repVars = repVars +
s[Integer.parseInt(String.valueOf(replace.charAt(indexVar+1))) - 1];
start = indexVar + 2;
}
}
@@ -162,7 +165,7 @@
int[] range = regex.matchString(input, index, length);
int start = range[0];
int end = range[1];
- if (end >0)
+ if (end >= 0)
{
outString = outString + input.substring(index, start);
outString = outString + replace;
1.1.2.2 +1 -1
xml-xalan/java/src/org/apache/xpath/functions/Attic/FuncTokenize.java
Index: FuncTokenize.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xpath/functions/Attic/FuncTokenize.java,v
retrieving revision 1.1.2.1
retrieving revision 1.1.2.2
diff -u -r1.1.2.1 -r1.1.2.2
--- FuncTokenize.java 16 Aug 2002 21:23:48 -0000 1.1.2.1
+++ FuncTokenize.java 19 Aug 2002 22:44:13 -0000 1.1.2.2
@@ -100,7 +100,7 @@
int[] range = regex.matchString(input, index, length);
int start = range[0];
int end = range[1];
- if (end >0)
+ if (end >= 0)
{
seq.insertItemAt(new XString(input.substring(index, start)),
i++);
index = end;
No revision
No revision
1.1.2.2 +1 -2
xml-xalan/java/src/org/apache/xpath/parser/regexp/Attic/BMPattern.java
Index: BMPattern.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xpath/parser/regexp/Attic/BMPattern.java,v
retrieving revision 1.1.2.1
retrieving revision 1.1.2.2
diff -u -r1.1.2.1 -r1.1.2.2
--- BMPattern.java 16 Aug 2002 21:23:48 -0000 1.1.2.1
+++ BMPattern.java 19 Aug 2002 22:44:13 -0000 1.1.2.2
@@ -130,8 +130,7 @@
*/
public int matches(String str, int start, int limit) {
if (this.ignoreCase) return this.matchesIgnoreCase(str, start,
limit);
- System.out.println("<< " + this.pattern.toString());
- int plength = this.pattern.length;
+ int plength = this.pattern.length;
if (plength == 0) return start;
int index = start+plength;
while (index <= limit) {
1.1.2.2 +24 -20
xml-xalan/java/src/org/apache/xpath/parser/regexp/Attic/Token.java
Index: Token.java
===================================================================
RCS file:
/home/cvs/xml-xalan/java/src/org/apache/xpath/parser/regexp/Attic/Token.java,v
retrieving revision 1.1.2.1
retrieving revision 1.1.2.2
diff -u -r1.1.2.1 -r1.1.2.2
--- Token.java 16 Aug 2002 21:23:48 -0000 1.1.2.1
+++ Token.java 19 Aug 2002 22:44:13 -0000 1.1.2.2
@@ -67,26 +67,26 @@
static final boolean COUNTTOKENS = true;
static int tokens = 0;
- static final int CHAR = 0; // Literal char
- static final int DOT = 11; // .
- static final int CONCAT = 1; // XY
- static final int UNION = 2; // X|Y|Z
- static final int CLOSURE = 3; // X*
- static final int RANGE = 4; // [a-zA-Z] etc.
- static final int NRANGE = 5; // [^a-zA-Z] etc.
- static final int PAREN = 6; // (X) or (?:X)
- static final int EMPTY = 7; //
- static final int ANCHOR = 8; // ^ $ \b \B \< \> \A \Z \z
- static final int NONGREEDYCLOSURE = 9; // *? +?
- static final int STRING = 10; // strings
- static final int BACKREFERENCE = 12; // back references
- static final int LOOKAHEAD = 20; // (?=...)
- static final int NEGATIVELOOKAHEAD = 21; // (?!...)
- static final int LOOKBEHIND = 22; // (?<=...)
- static final int NEGATIVELOOKBEHIND = 23; // (?<!...)
- static final int INDEPENDENT = 24; // (?>...)
- static final int MODIFIERGROUP = 25; // (?ims-ims:...)
- static final int CONDITION = 26; // (?(...)yes|no)
+ public static final int CHAR = 0; // Literal char
+ public static final int DOT = 11; // .
+ public static final int CONCAT = 1; // XY
+ public static final int UNION = 2; // X|Y|Z
+ public static final int CLOSURE = 3; // X*
+ public static final int RANGE = 4; // [a-zA-Z] etc.
+ public static final int NRANGE = 5; // [^a-zA-Z] etc.
+ public static final int PAREN = 6; // (X) or (?:X)
+ public static final int EMPTY = 7; //
+ public static final int ANCHOR = 8; // ^ $ \b \B \< \> \A
\Z \z
+ public static final int NONGREEDYCLOSURE = 9; // *? +?
+ public static final int STRING = 10; // strings
+ public static final int BACKREFERENCE = 12; // back references
+ public static final int LOOKAHEAD = 20; // (?=...)
+ public static final int NEGATIVELOOKAHEAD = 21; // (?!...)
+ public static final int LOOKBEHIND = 22; // (?<=...)
+ public static final int NEGATIVELOOKBEHIND = 23; // (?<!...)
+ public static final int INDEPENDENT = 24; // (?>...)
+ public static final int MODIFIERGROUP = 25; // (?ims-ims:...)
+ public static final int CONDITION = 26; // (?(...)yes|no)
static final int UTF16_MAX = 0x10ffff;
@@ -273,6 +273,10 @@
}
int getChar() {
return -1;
+ }
+
+ public int getType() {
+ return type;
}
public String toString() {
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]