Author: borisk
Date: Thu Jun 19 02:55:58 2008
New Revision: 669438
URL: http://svn.apache.org/viewvc?rev=669438&view=rev
Log:
Make regular expressions thread-safe and use runtime memory manager when
executing. Add new allMatches() function for advanced use-cases. Patch by John
Snelson (XERCESC-1803).
Modified:
xerces/c/trunk/src/xercesc/util/regx/BMPattern.cpp
xerces/c/trunk/src/xercesc/util/regx/BMPattern.hpp
xerces/c/trunk/src/xercesc/util/regx/RegularExpression.cpp
xerces/c/trunk/src/xercesc/util/regx/RegularExpression.hpp
Modified: xerces/c/trunk/src/xercesc/util/regx/BMPattern.cpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/regx/BMPattern.cpp?rev=669438&r1=669437&r2=669438&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/regx/BMPattern.cpp (original)
+++ xerces/c/trunk/src/xercesc/util/regx/BMPattern.cpp Thu Jun 19 02:55:58 2008
@@ -99,7 +99,7 @@
// ---------------------------------------------------------------------------
// BMPattern: matches methods
// ---------------------------------------------------------------------------
-int BMPattern::matches(const XMLCh* const content, XMLSize_t start, XMLSize_t
limit) {
+int BMPattern::matches(const XMLCh* const content, XMLSize_t start, XMLSize_t
limit) const {
const XMLSize_t patternLen = XMLString::stringLen(fPattern);
// Uppercase Content
Modified: xerces/c/trunk/src/xercesc/util/regx/BMPattern.hpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/regx/BMPattern.hpp?rev=669438&r1=669437&r2=669438&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/regx/BMPattern.hpp (original)
+++ xerces/c/trunk/src/xercesc/util/regx/BMPattern.hpp Thu Jun 19 02:55:58 2008
@@ -100,7 +100,7 @@
* This method will perform a match of the given content against a
* predefined pattern.
*/
- int matches(const XMLCh* const content, XMLSize_t start, XMLSize_t
limit);
+ int matches(const XMLCh* const content, XMLSize_t start, XMLSize_t
limit) const;
//@}
Modified: xerces/c/trunk/src/xercesc/util/regx/RegularExpression.cpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/regx/RegularExpression.cpp?rev=669438&r1=669437&r2=669438&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/regx/RegularExpression.cpp (original)
+++ xerces/c/trunk/src/xercesc/util/regx/RegularExpression.cpp Thu Jun 19
02:55:58 2008
@@ -57,7 +57,7 @@
RangeToken* RegularExpression::fWordRange = 0;
bool RegularExpression::matchIgnoreCase(const XMLInt32 ch1,
- const XMLInt32 ch2)
+ const XMLInt32 ch2) const
{
if (ch1 >= 0x10000)
{
@@ -118,6 +118,7 @@
, fOffsets(0)
, fMatch(0)
, fString(0)
+ , fOptions(0)
, fMemoryManager(manager)
{
}
@@ -132,6 +133,7 @@
, fOffsets(0)
, fMatch(0)
, fString(src->fString)
+ , fOptions(src->fOptions)
, fMemoryManager(src->fMemoryManager)
{
if(src->fOffsets)
@@ -155,6 +157,7 @@
fSize=other.fSize;
fStringMaxLen=other.fStringMaxLen;
fString=other.fString;
+ fOptions=other.fOptions;
if (fOffsets)
fMemoryManager->deallocate(fOffsets);//delete [] fOffsets;
fOffsets=0;
@@ -194,7 +197,8 @@
, const XMLSize_t stringLen
, const XMLSize_t start
, const XMLSize_t limit
- , const int noClosures)
+ , const int noClosures
+ , const unsigned int options)
{
fString = string;
fStringMaxLen = stringLen;
@@ -212,6 +216,7 @@
}
fSize = noClosures;
+ fOptions = options;
for (int i = 0; i< fSize; i++)
fOffsets[i] = -1;
@@ -441,7 +446,7 @@
// RegularExpression: Matching methods
// ---------------------------------------------------------------------------
bool RegularExpression::matches(const char* const expression
- , MemoryManager* const manager)
+ , MemoryManager* const manager) const
{
XMLCh* tmpBuf = XMLString::transcode(expression, manager);
ArrayJanitor<XMLCh> janBuf(tmpBuf, manager);
@@ -450,7 +455,7 @@
bool RegularExpression::matches(const char* const expression
, const XMLSize_t start, const XMLSize_t end
- , MemoryManager* const manager)
+ , MemoryManager* const manager) const
{
XMLCh* tmpBuf = XMLString::transcode(expression, manager);
@@ -460,7 +465,7 @@
bool RegularExpression::matches(const char* const expression
, Match* const match
- , MemoryManager* const manager)
+ , MemoryManager* const manager) const
{
XMLCh* tmpBuf = XMLString::transcode(expression, manager);
@@ -470,7 +475,7 @@
bool RegularExpression::matches(const char* const expression, const XMLSize_t
start
, const XMLSize_t end, Match* const pMatch
- , MemoryManager* const manager)
+ , MemoryManager* const manager) const
{
XMLCh* tmpBuf = XMLString::transcode(expression, manager);
@@ -482,34 +487,34 @@
// ---------------------------------------------------------------------------
// RegularExpression: Matching methods - Wide char version
// ---------------------------------------------------------------------------
-bool RegularExpression::matches(const XMLCh* const expression, MemoryManager*
const manager)
+bool RegularExpression::matches(const XMLCh* const expression, MemoryManager*
const manager) const
{
return matches(expression, 0, XMLString::stringLen(expression), 0,
manager);
}
bool RegularExpression::matches(const XMLCh* const expression
, const XMLSize_t start, const XMLSize_t end
- , MemoryManager* const manager)
+ , MemoryManager* const manager) const
{
return matches(expression, start, end, 0, manager);
}
bool RegularExpression::matches(const XMLCh* const expression
, Match* const match
- , MemoryManager* const manager)
+ , MemoryManager* const manager) const
{
return matches(expression, 0, XMLString::stringLen(expression), match,
manager);
}
bool RegularExpression::matches(const XMLCh* const expression, const XMLSize_t
start
, const XMLSize_t end, Match* const pMatch
- , MemoryManager* const manager)
+ , MemoryManager* const manager) const
{
Context context(manager);
XMLSize_t strLength = XMLString::stringLen(expression);
- context.reset(expression, strLength, start, end, fNoClosures);
+ context.reset(expression, strLength, start, end, fNoClosures, fOptions);
bool adoptMatch = false;
Match* lMatch = pMatch;
@@ -518,7 +523,7 @@
lMatch->setNoGroups(fNoGroups);
}
else if (fHasBackReferences) {
- lMatch = new (fMemoryManager) Match(fMemoryManager);
+ lMatch = new (manager) Match(manager);
lMatch->setNoGroups(fNoGroups);
adoptMatch = true;
}
@@ -669,21 +674,23 @@
// ---------------------------------------------------------------------------
// RegularExpression: Tokenize methods
// ---------------------------------------------------------------------------
-RefArrayVectorOf<XMLCh>* RegularExpression::tokenize(const char* const
expression)
+RefArrayVectorOf<XMLCh>* RegularExpression::tokenize(const char* const
expression,
+ MemoryManager* const
manager) const
{
- XMLCh* tmpBuf = XMLString::transcode(expression, fMemoryManager);
- ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);
- return tokenize(tmpBuf, 0, XMLString::stringLen(tmpBuf));
+ XMLCh* tmpBuf = XMLString::transcode(expression, manager);
+ ArrayJanitor<XMLCh> janBuf(tmpBuf, manager);
+ return tokenize(tmpBuf, 0, XMLString::stringLen(tmpBuf), manager);
}
RefArrayVectorOf<XMLCh>* RegularExpression::tokenize(const char* const
expression,
- const XMLSize_t start,
const XMLSize_t end)
+ const XMLSize_t start,
const XMLSize_t end,
+ MemoryManager* const
manager) const
{
- XMLCh* tmpBuf = XMLString::transcode(expression, fMemoryManager);
- ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);
- return tokenize(tmpBuf, start, end);
+ XMLCh* tmpBuf = XMLString::transcode(expression, manager);
+ ArrayJanitor<XMLCh> janBuf(tmpBuf, manager);
+ return tokenize(tmpBuf, start, end, manager);
}
@@ -691,126 +698,75 @@
// ---------------------------------------------------------------------------
// RegularExpression: Tokenize methods - Wide char version
// ---------------------------------------------------------------------------
-RefArrayVectorOf<XMLCh>* RegularExpression::tokenize(const XMLCh* const
expression)
-{
- return tokenize(expression, 0, XMLString::stringLen(expression), 0);
-}
-
RefArrayVectorOf<XMLCh>* RegularExpression::tokenize(const XMLCh* const
expression,
- const XMLSize_t start,
const XMLSize_t end)
+ MemoryManager* const
manager) const
{
- return tokenize(expression, start, end, 0);
+ return tokenize(expression, 0, XMLString::stringLen(expression), manager);
}
-RefArrayVectorOf<XMLCh>* RegularExpression::tokenize(const XMLCh* const
expression,
+RefArrayVectorOf<XMLCh>* RegularExpression::tokenize(const XMLCh* const
matchString,
const XMLSize_t start,
const XMLSize_t end,
- RefVectorOf<Match> *subEx)
+ MemoryManager* const
manager) const
{
-
- RefArrayVectorOf<XMLCh>* tokenStack = new (fMemoryManager)
RefArrayVectorOf<XMLCh>(16, true, fMemoryManager);
-
- Context context(fMemoryManager);
-
- XMLSize_t strLength = XMLString::stringLen(expression);
-
- context.reset(expression, strLength, start, end, fNoClosures);
-
- Match* lMatch = 0;
- bool adoptMatch = false;
-
- if (subEx || fHasBackReferences) {
- lMatch = new (fMemoryManager) Match(fMemoryManager);
- adoptMatch = true;
- lMatch->setNoGroups(fNoGroups);
+ // check if matches zero length string - throw error if so
+ if(matches(XMLUni::fgZeroLenString, manager)){
+ ThrowXMLwithMemMgr(RuntimeException,
XMLExcepts::Regex_RepPatMatchesZeroString, manager);
}
+
+ RefVectorOf<Match> *subEx = new (manager) RefVectorOf<Match>(10, true,
manager);
+ Janitor<RefVectorOf<Match> > janSubEx(subEx);
- if (context.fAdoptMatch)
- delete context.fMatch;
-
- context.fMatch = lMatch;
- context.fAdoptMatch = adoptMatch;
+ allMatches(matchString, start, end, subEx, manager);
+ RefArrayVectorOf<XMLCh> *tokens = new (manager)
RefArrayVectorOf<XMLCh>(16, true, manager);
XMLSize_t tokStart = start;
- XMLSize_t matchStart = start;
- for (; matchStart <= end; matchStart++) {
+ unsigned int i = 0;
+ for(; i < subEx->size(); ++i) {
+ Match *match = subEx->elementAt(i);
+ XMLSize_t matchStart = match->getStartPos(0);
- int iMatchEnd = match(&context, fOperations, matchStart, 1);
+ XMLCh *token = (XMLCh*)manager->allocate((matchStart + 1 - tokStart) *
sizeof(XMLCh));
+ XMLString::subString(token, matchString, tokStart, matchStart,
manager);
+ tokens->addElement(token);
- if (iMatchEnd != -1) {
- XMLSize_t matchEnd=iMatchEnd;
-
- if (context.fMatch != 0) {
- context.fMatch->setStartPos(0, (int)context.fStart);
- context.fMatch->setEndPos(0, (int)matchEnd);
- }
-
- if (subEx){
- subEx->addElement(context.fMatch);
- lMatch = new (fMemoryManager) Match(*(context.fMatch));
- adoptMatch = true;
+ tokStart = match->getEndPos(0);
+ }
- context.fAdoptMatch = adoptMatch;
- context.fMatch = lMatch;
- }
+ XMLCh *token = (XMLCh*)manager->allocate((end + 1 - tokStart) *
sizeof(XMLCh));
+ XMLString::subString(token, matchString, tokStart, end, manager);
+ tokens->addElement(token);
- XMLCh* token;
- if (tokStart == matchStart){
+ return tokens;
+}
- if (tokStart == strLength){
- tokStart--;
- break;
- }
+void RegularExpression::allMatches(const XMLCh* const matchString, const
XMLSize_t start, const XMLSize_t end,
+ RefVectorOf<Match> *subEx, MemoryManager*
const manager) const
+{
+ Context context(manager);
+ context.reset(matchString, XMLString::stringLen(matchString), start, end,
fNoClosures, fOptions);
- token = (XMLCh*) fMemoryManager->allocate(sizeof(XMLCh));//new
XMLCh[1];
- token[0] = chNull;
+ context.fMatch = new (manager) Match(manager);
+ context.fMatch->setNoGroups(fNoGroups);
+ context.fAdoptMatch = true;
- // When you tokenize using zero string, will return each
- // token in the string. Since the zero string will also
- // match the start/end characters, resulting in empty
- // tokens, we ignore them and do not add them to the stack.
- if (!XMLString::equals(fPattern, &chNull))
- tokenStack->addElement(token);
- else
- fMemoryManager->deallocate(token);//delete[] token;
-
- } else {
- token = (XMLCh*) fMemoryManager->allocate
- (
- (matchStart + 1 - tokStart) * sizeof(XMLCh)
- );//new XMLCh[matchStart + 1 - tokStart];
- XMLString::subString(token, expression, tokStart, matchStart,
fMemoryManager);
- tokenStack->addElement(token);
- }
+ XMLSize_t matchStart = start;
+ while(matchStart <= end) {
+ XMLSize_t matchEnd = match(&context, fOperations, matchStart, 1);
+ if(matchEnd != -1) {
+ context.fMatch->setStartPos(0, matchStart);
+ context.fMatch->setEndPos(0, matchEnd);
- tokStart = matchEnd;
+ subEx->addElement(context.fMatch);
+
+ context.fMatch = new (manager) Match(*(context.fMatch));
+ context.fAdoptMatch = true;
- //decrement matchStart as will increment it at the top of the loop
- if (matchStart < matchEnd - 1)
- matchStart = matchEnd - 1;
+ matchStart = matchEnd;
+ } else {
+ ++matchStart;
}
}
-
- XMLCh* token;
-
- if (matchStart == tokStart + 1){
- token = (XMLCh*) fMemoryManager->allocate(sizeof(XMLCh));//new
XMLCh[1];
- token[0] = chNull;
-
- } else {
- token = (XMLCh*) fMemoryManager->allocate
- (
- (strLength + 1 - tokStart) * sizeof(XMLCh)
- );//new XMLCh[strLength + 1 - tokStart];
- XMLString::subString(token, expression, tokStart, strLength,
fMemoryManager);
- }
-
- if (!XMLString::equals(fPattern, &chNull))
- tokenStack->addElement(token);
- else
- fMemoryManager->deallocate(token);//delete[] token;
-
- return tokenStack;
}
@@ -818,28 +774,30 @@
// RegularExpression: Replace methods
// -----------------------------------------------------------------------
XMLCh* RegularExpression::replace(const char* const matchString,
- const char* const replaceString)
+ const char* const replaceString,
+ MemoryManager* const manager) const
{
- XMLCh* tmpBuf = XMLString::transcode(matchString, fMemoryManager);
- ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);
- XMLCh* tmpBuf2 = XMLString::transcode(replaceString, fMemoryManager);
- ArrayJanitor<XMLCh> janBuf2(tmpBuf2, fMemoryManager);
+ XMLCh* tmpBuf = XMLString::transcode(matchString, manager);
+ ArrayJanitor<XMLCh> janBuf(tmpBuf, manager);
+ XMLCh* tmpBuf2 = XMLString::transcode(replaceString, manager);
+ ArrayJanitor<XMLCh> janBuf2(tmpBuf2, manager);
- return replace(tmpBuf, tmpBuf2, 0, XMLString::stringLen(tmpBuf));
+ return replace(tmpBuf, tmpBuf2, 0, XMLString::stringLen(tmpBuf), manager);
}
XMLCh* RegularExpression::replace(const char* const matchString,
const char* const replaceString,
- const XMLSize_t start, const XMLSize_t end)
+ const XMLSize_t start, const XMLSize_t end,
+ MemoryManager* const manager) const
{
- XMLCh* tmpBuf = XMLString::transcode(matchString, fMemoryManager);
- ArrayJanitor<XMLCh> janBuf(tmpBuf, fMemoryManager);
- XMLCh* tmpBuf2 = XMLString::transcode(replaceString, fMemoryManager);
- ArrayJanitor<XMLCh> janBuf2(tmpBuf2, fMemoryManager);
+ XMLCh* tmpBuf = XMLString::transcode(matchString, manager);
+ ArrayJanitor<XMLCh> janBuf(tmpBuf, manager);
+ XMLCh* tmpBuf2 = XMLString::transcode(replaceString, manager);
+ ArrayJanitor<XMLCh> janBuf2(tmpBuf2, manager);
- return replace(tmpBuf, tmpBuf2, start, end);
+ return replace(tmpBuf, tmpBuf2, start, end, manager);
}
@@ -847,60 +805,114 @@
// RegularExpression: Replace methods - Wide char version
// ---------------------------------------------------------------------------
XMLCh* RegularExpression::replace(const XMLCh* const matchString,
- const XMLCh* const replaceString)
+ const XMLCh* const replaceString,
+ MemoryManager* const manager) const
{
return replace(matchString, replaceString, 0,
- XMLString::stringLen(matchString));
+ XMLString::stringLen(matchString), manager);
}
XMLCh* RegularExpression::replace(const XMLCh* const matchString,
const XMLCh* const replaceString,
- const XMLSize_t start, const XMLSize_t end)
+ const XMLSize_t start, const XMLSize_t end,
+ MemoryManager* const manager) const
{
-
- //check if matches zero length string - throw error if so
- if (matches(XMLUni::fgZeroLenString, fMemoryManager)){
- ThrowXMLwithMemMgr(RuntimeException,
XMLExcepts::Regex_RepPatMatchesZeroString, fMemoryManager);
+ // check if matches zero length string - throw error if so
+ if(matches(XMLUni::fgZeroLenString, manager)){
+ ThrowXMLwithMemMgr(RuntimeException,
XMLExcepts::Regex_RepPatMatchesZeroString, manager);
}
-
- RefVectorOf<Match> *subEx = new (fMemoryManager) RefVectorOf<Match>(10,
true, fMemoryManager);
+
+ RefVectorOf<Match> *subEx = new (manager) RefVectorOf<Match>(10, true,
manager);
Janitor<RefVectorOf<Match> > janSubEx(subEx);
- //Call to tokenize with Match vector so that we keep track of the locations
- //of the subExpression within each of the matches
- RefArrayVectorOf<XMLCh>* tokenStack = tokenize(matchString, start, end,
subEx);
- Janitor<RefArrayVectorOf<XMLCh> > janTokStack(tokenStack);
+ allMatches(matchString, start, end, subEx, manager);
- XMLBuffer result(1023, fMemoryManager);
+ XMLBuffer result(1023, manager);
+ int tokStart = start;
+
+ unsigned int i = 0;
+ for(; i < subEx->size(); ++i) {
+ Match *match = subEx->elementAt(i);
+ int matchStart = match->getStartPos(0);
+
+ if(matchStart > tokStart)
+ result.append(matchString + tokStart, matchStart - tokStart);
+ subInExp(replaceString, matchString, match, result, manager);
+
+ tokStart = match->getEndPos(0);
+ }
+
+ if(end > tokStart)
+ result.append(matchString + tokStart, end - tokStart);
+
+ return XMLString::replicate(result.getRawBuffer(), manager);
+}
- int numSubEx = 0;
+/*
+ * Helper for Replace. This method prepares the replacement string by
substituting
+ * in actual values for parenthesized sub expressions.
+ *
+ * An error will be thrown if:
+ * 1) there is chBackSlash not followed by a chDollarSign or chBackSlash
+ * 2) there is an unescaped chDollarSign which is not followed by a digit
+ *
+ */
+void RegularExpression::subInExp(const XMLCh* const repString,
+ const XMLCh* const origString,
+ const Match* subEx,
+ XMLBuffer &result,
+ MemoryManager* const manager) const
+{
+ int numSubExp = subEx->getNoGroups() - 1;
- if (subEx && subEx->size() > 0)
- numSubEx = subEx->elementAt(0)->getNoGroups() - 1;
+ for(const XMLCh *ptr = repString; *ptr != chNull; ++ptr) {
+ if(*ptr == chDollarSign) {
+ ++ptr;
+
+ // check that after the $ is a digit
+ if(!XMLString::isDigit(*ptr)) {
+ // invalid replace string - $ must be followed by a digit
+ ThrowXMLwithMemMgr(RuntimeException,
XMLExcepts::Regex_InvalidRepPattern, manager);
+ }
- int tokStackSize = tokenStack->size();
- const XMLCh* curRepString = XMLString::replicate(replaceString,
fMemoryManager);
+ int index = *ptr - chDigit_0;
- for (int i = 0; i < tokStackSize; i++){
+ const XMLCh *dig = ptr + 1;
+ while(XMLString::isDigit(*dig)) {
+ int newIndex = index * 10 + (*dig - chDigit_0);
+ if(newIndex > numSubExp) break;
+
+ index = newIndex;
+ ptr = dig;
+ ++dig;
+ }
- result.append(tokenStack->elementAt(i));
+ // now check that the index is legal
+ if(index <= numSubExp) {
+ int start = subEx->getStartPos(index);
+ int end = subEx->getEndPos(index);
+
+ // now copy the substring into the new string
+ if(start < end) {
+ result.append(origString + start, end - start);
+ }
+ }
- if (i != tokStackSize - 1) {
+ } else {
+ if(*ptr == chBackSlash) {
+ ++ptr;
- //if there are subExpressions, then determine the string we want to
- //substitute in.
- if (numSubEx != 0) {
- fMemoryManager->deallocate((XMLCh*)curRepString);
- curRepString = subInExp(replaceString, matchString,
subEx->elementAt(i));
+ // if you have a slash and then a character that's not a $ or
/,
+ // then it's an invalid replace string
+ if(*ptr != chDollarSign && *ptr != chBackSlash) {
+ ThrowXMLwithMemMgr(RuntimeException,
XMLExcepts::Regex_InvalidRepPattern, manager);
+ }
}
- result.append(curRepString);
+
+ result.append(*ptr);
}
}
-
- fMemoryManager->deallocate((XMLCh*)curRepString);
- return XMLString::replicate(result.getRawBuffer(), fMemoryManager);
-
}
@@ -981,8 +993,8 @@
RE_RuntimeContext(const Op *op, XMLSize_t offs) : op_(op), offs_(offs) { }
};
-int RegularExpression::match(Context* const context, const Op* const operations
- , XMLSize_t offset,
const short direction)
+int RegularExpression::match(Context* const context, const Op* const
operations,
+ XMLSize_t offset, const short direction) const
{
ValueStackOf<RE_RuntimeContext>* opStack=NULL;
Janitor<ValueStackOf<RE_RuntimeContext> > janStack(NULL);
@@ -992,7 +1004,7 @@
janStack.reset(opStack);
}
const Op* tmpOp = operations;
- bool ignoreCase = isSet(fOptions, IGNORE_CASE);
+ bool ignoreCase = isSet(context->fOptions, IGNORE_CASE);
int doReturn;
while (tmpOp != 0) {
@@ -1248,7 +1260,7 @@
bool RegularExpression::matchChar(Context* const context,
const XMLInt32 ch, XMLSize_t& offset,
- const short direction, const bool ignoreCase)
+ const short direction, const bool
ignoreCase) const
{
if(direction < 0 && offset==0)
return false;
@@ -1274,7 +1286,7 @@
}
bool RegularExpression::matchDot(Context* const context, XMLSize_t& offset,
- const short direction)
+ const short direction) const
{
if(direction < 0 && offset==0)
return false;
@@ -1289,7 +1301,7 @@
if (!context->nextCh(strCh, tmpOffset, direction))
return false;
- if (!isSet(fOptions, SINGLE_LINE)) {
+ if (!isSet(context->fOptions, SINGLE_LINE)) {
if (direction > 0 && RegxUtil::isEOLChar(strCh))
return false;
@@ -1304,7 +1316,7 @@
bool RegularExpression::matchRange(Context* const context, const Op* const op,
XMLSize_t& offset, const short direction,
- const bool ignoreCase)
+ const bool ignoreCase) const
{
if(direction < 0 && offset==0)
return false;
@@ -1337,7 +1349,7 @@
}
bool RegularExpression::matchAnchor(Context* const context, const XMLInt32 ch,
- const XMLSize_t offset)
+ const XMLSize_t offset) const
{
switch ((XMLCh) ch) {
case chLatin_A:
@@ -1348,10 +1360,10 @@
if (context->fLength == 0)
break;
{
- wordType after = getWordType(context->fString, context->fStart,
+ wordType after = getWordType(context, context->fString,
context->fStart,
context->fLimit, offset);
if (after == wordTypeIgnore
- || after == getPreviousWordType(context->fString,
+ || after == getPreviousWordType(context, context->fString,
context->fStart,
context->fLimit, offset))
break;
@@ -1361,10 +1373,10 @@
if (context->fLength == 0)
return false;
{
- wordType after = getWordType(context->fString, context->fStart,
+ wordType after = getWordType(context, context->fString,
context->fStart,
context->fLimit, offset);
if (after == wordTypeIgnore
- || after == getPreviousWordType(context->fString,
+ || after == getPreviousWordType(context, context->fString,
context->fStart,
context->fLimit, offset))
return false;
@@ -1372,7 +1384,7 @@
break;
case chLatin_Z:
case chDollarSign:
- if ( (XMLCh) ch == chDollarSign && isSet(fOptions, MULTIPLE_LINE)) {
+ if ( (XMLCh) ch == chDollarSign && isSet(context->fOptions,
MULTIPLE_LINE)) {
if (!(offset == context->fLimit || (offset < context->fLimit
&& RegxUtil::isEOLChar(context->fString[offset]))))
return false;
@@ -1394,7 +1406,7 @@
break;
case chAt:
case chCaret:
- if ( (XMLCh) ch == chCaret && !isSet(fOptions, MULTIPLE_LINE)) {
+ if ( (XMLCh) ch == chCaret && !isSet(context->fOptions,
MULTIPLE_LINE)) {
if (offset != context->fStart)
return false;
@@ -1410,9 +1422,9 @@
if (context->fLength == 0 || offset == context->fLimit)
return false;
- if (getWordType(context->fString, context->fStart, context->fLimit,
+ if (getWordType(context, context->fString, context->fStart,
context->fLimit,
offset) != wordTypeLetter
- || getPreviousWordType(context->fString, context->fStart,
+ || getPreviousWordType(context, context->fString, context->fStart,
context->fLimit, offset) != wordTypeOther)
return false;
break;
@@ -1420,9 +1432,9 @@
if (context->fLength == 0 || offset == context->fStart)
return false;
- if (getWordType(context->fString, context->fStart, context->fLimit,
+ if (getWordType(context, context->fString, context->fStart,
context->fLimit,
offset) != wordTypeOther
- || getPreviousWordType(context->fString, context->fStart,
+ || getPreviousWordType(context, context->fString, context->fStart,
context->fLimit, offset) != wordTypeLetter)
return false;
break;
@@ -1434,10 +1446,10 @@
bool RegularExpression::matchBackReference(Context* const context,
const XMLInt32 refNo, XMLSize_t&
offset,
const short direction,
- const bool ignoreCase)
+ const bool ignoreCase) const
{
if (refNo <=0 || refNo >= fNoGroups)
- ThrowXMLwithMemMgr(IllegalArgumentException,
XMLExcepts::Regex_BadRefNo, fMemoryManager);
+ ThrowXMLwithMemMgr(IllegalArgumentException,
XMLExcepts::Regex_BadRefNo, context->fMemoryManager);
if (context->fMatch->getStartPos(refNo) < 0
|| context->fMatch->getEndPos(refNo) < 0)
@@ -1469,7 +1481,7 @@
bool RegularExpression::matchString(Context* const context,
const XMLCh* const literal, XMLSize_t&
offset,
- const short direction, const bool
ignoreCase)
+ const short direction, const bool
ignoreCase) const
{
XMLSize_t length = XMLString::stringLen(literal);
if(direction < 0 && offset<length)
@@ -1494,7 +1506,7 @@
}
int RegularExpression::matchCapture(Context* const context, const Op* const op,
- XMLSize_t offset, const short direction)
+ XMLSize_t offset, const short direction)
const
{
// No check is made for nullness of fMatch as the function is only called
if
// fMatch is not null.
@@ -1520,7 +1532,7 @@
int RegularExpression::matchUnion(Context* const context,
const Op* const op, XMLSize_t offset,
- const short direction)
+ const short direction) const
{
unsigned int opSize = op->getSize();
@@ -1546,7 +1558,7 @@
bool RegularExpression::matchCondition(Context* const context,
const Op* const op, XMLSize_t
offset,
- const short direction)
+ const short direction) const
{
int refNo = op->getRefNo();
@@ -1644,87 +1656,6 @@
}
/*
- * Helper for Replace. This method prepares the replacement string by
substituting
- * in actual values for parenthesized sub expressions.
- *
- * An error will be thrown if:
- * 1) repString references an undefined subExpression
- * 2) there is an unescaped chDollar which is not followed by a digit
- *
- */
-const XMLCh* RegularExpression::subInExp(const XMLCh* const repString,
- const XMLCh* const origString,
- const Match* subEx)
-{
-
- int numSubExp = subEx->getNoGroups() - 1;
-
- if (numSubExp == 0)
- return XMLString::replicate(repString, fMemoryManager);
-
- bool notEscaped = true;
-
- XMLBuffer newString(1023, fMemoryManager);
-
- XMLCh indexStr[2]; //holds the string rep of a
-
- indexStr[1] = chNull;
- int index = -1;
-
- for (const XMLCh* ptr = repString; *ptr != chNull; ptr++){
-
- if ((*ptr == chDollarSign) && notEscaped) {
-
- ptr++;
-
- //check that after the $ is a digit
- if (!XMLString::isDigit(*ptr)){
-
- //invalid replace string - $ must be followed by a digit
- ThrowXMLwithMemMgr(RuntimeException,
XMLExcepts::Regex_InvalidRepPattern, fMemoryManager);
- }
-
- indexStr[0] = *ptr; //get the digit
- index = XMLString::parseInt(indexStr, fMemoryManager); //convert
it to an int
-
- //now check that the index is legal
- if (index > numSubExp){
- ThrowXMLwithMemMgr(RuntimeException,
XMLExcepts::Regex_InvalidRepPattern, fMemoryManager);
- }
-
- int start = subEx->getStartPos(index);
- int end = subEx->getEndPos(index);
-
- //now copy the substring into the new string
- for (int i=start; i<end; i++){
- newString.append(origString[i]);
- }
-
- } else {
-
- //if you have a slash and then a character that's not a $ or /,
- //then it's an invalid replace string
- if (!notEscaped && (*ptr != chDollarSign && *ptr != chBackSlash)){
- ThrowXMLwithMemMgr(RuntimeException,
XMLExcepts::Regex_InvalidRepPattern, fMemoryManager);
- }
-
- if (*ptr == chBackSlash){
- notEscaped = false;
- continue;
-
- }else
- notEscaped = true;
-
- newString.append(*ptr);
- }
- }
-
- return XMLString::replicate(newString.getRawBuffer(), fMemoryManager);
-
-}
-
-
-/*
* Prepares for matching. This method is called during construction.
*/
void RegularExpression::prepare() {
@@ -1812,17 +1743,17 @@
}
}
-RegularExpression::wordType RegularExpression::getCharType(const XMLCh ch) {
-
- if (!isSet(fOptions, UNICODE_WORD_BOUNDARY)) {
+RegularExpression::wordType RegularExpression::getCharType(Context* const
context, const XMLCh ch) const
+{
+ if (!isSet(context->fOptions, UNICODE_WORD_BOUNDARY)) {
- if (isSet(fOptions, USE_UNICODE_CATEGORY)) {
+ if (isSet(context->fOptions, USE_UNICODE_CATEGORY)) {
if (fWordRange == 0) {
fWordRange = fTokenFactory->getRange(fgUniIsWord);
if (fWordRange == 0)
- ThrowXMLwithMemMgr1(RuntimeException,
XMLExcepts::Regex_RangeTokenGetError, fgUniIsWord, fMemoryManager);
+ ThrowXMLwithMemMgr1(RuntimeException,
XMLExcepts::Regex_RangeTokenGetError, fgUniIsWord, context->fMemoryManager);
}
return fWordRange->match(ch) ? wordTypeLetter : wordTypeOther;
Modified: xerces/c/trunk/src/xercesc/util/regx/RegularExpression.hpp
URL:
http://svn.apache.org/viewvc/xerces/c/trunk/src/xercesc/util/regx/RegularExpression.hpp?rev=669438&r1=669437&r2=669438&view=diff
==============================================================================
--- xerces/c/trunk/src/xercesc/util/regx/RegularExpression.hpp (original)
+++ xerces/c/trunk/src/xercesc/util/regx/RegularExpression.hpp Thu Jun 19
02:55:58 2008
@@ -104,45 +104,57 @@
// -----------------------------------------------------------------------
// Matching methods
// -----------------------------------------------------------------------
- bool matches(const char* const matchString, MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager);
- bool matches(const char* const matchString, const XMLSize_t start,
- const XMLSize_t end, MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager);
- bool matches(const char* const matchString, Match* const pMatch,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
- bool matches(const char* const matchString, const XMLSize_t start,
- const XMLSize_t end, Match* const pMatch, MemoryManager*
const manager = XMLPlatformUtils::fgMemoryManager);
-
- bool matches(const XMLCh* const matchString, MemoryManager* const manager
= XMLPlatformUtils::fgMemoryManager);
- bool matches(const XMLCh* const matchString, const XMLSize_t start,
- const XMLSize_t end, MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager);
- bool matches(const XMLCh* const matchString, Match* const pMatch,
MemoryManager* const manager = XMLPlatformUtils::fgMemoryManager);
- bool matches(const XMLCh* const matchString, const XMLSize_t start,
- const XMLSize_t end, Match* const pMatch, MemoryManager*
const manager = XMLPlatformUtils::fgMemoryManager);
+ bool matches(const char* const matchString,
+ MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
+ bool matches(const char* const matchString, const XMLSize_t start, const
XMLSize_t end,
+ MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
+ bool matches(const char* const matchString, Match* const pMatch,
+ MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
+ bool matches(const char* const matchString, const XMLSize_t start, const
XMLSize_t end,
+ Match* const pMatch, MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
+
+ bool matches(const XMLCh* const matchString,
+ MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
+ bool matches(const XMLCh* const matchString, const XMLSize_t start, const
XMLSize_t end,
+ MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
+ bool matches(const XMLCh* const matchString, Match* const pMatch,
+ MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
+ bool matches(const XMLCh* const matchString, const XMLSize_t start, const
XMLSize_t end,
+ Match* const pMatch, MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
+ void allMatches(const XMLCh* const matchString, const XMLSize_t start,
const XMLSize_t end,
+ RefVectorOf<Match> *subEx, MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
// -----------------------------------------------------------------------
// Tokenize methods
// -----------------------------------------------------------------------
// Note: The caller owns the string vector that is returned, and is
responsible
// for deleting it.
- RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString);
- RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString, const
XMLSize_t start,
- const XMLSize_t end);
+ RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString,
+ MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
+ RefArrayVectorOf<XMLCh> *tokenize(const char* const matchString, const
XMLSize_t start, const XMLSize_t end,
+ MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
- RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString);
RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString,
- const XMLSize_t start, const XMLSize_t
end);
+ MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
+ RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString, const
XMLSize_t start, const XMLSize_t end,
+ MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
// -----------------------------------------------------------------------
// Replace methods
// -----------------------------------------------------------------------
// Note: The caller owns the XMLCh* that is returned, and is responsible
for
// deleting it.
- XMLCh *replace(const char* const matchString, const char* const
replaceString);
XMLCh *replace(const char* const matchString, const char* const
replaceString,
- const XMLSize_t start, const XMLSize_t end);
+ MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
+ XMLCh *replace(const char* const matchString, const char* const
replaceString,
+ const XMLSize_t start, const XMLSize_t end,
+ MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
- XMLCh *replace(const XMLCh* const matchString, const XMLCh* const
replaceString);
XMLCh *replace(const XMLCh* const matchString, const XMLCh* const
replaceString,
- const XMLSize_t start, const XMLSize_t end);
+ MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
+ XMLCh *replace(const XMLCh* const matchString, const XMLCh* const
replaceString,
+ const XMLSize_t start, const XMLSize_t end,
+ MemoryManager* const manager =
XMLPlatformUtils::fgMemoryManager) const;
// -----------------------------------------------------------------------
// Static initialize and cleanup methods
@@ -182,7 +194,8 @@
Context& operator= (const Context& other);
inline const XMLCh* getString() const { return fString; }
void reset(const XMLCh* const string, const XMLSize_t stringLen,
- const XMLSize_t start, const XMLSize_t limit, const int
noClosures);
+ const XMLSize_t start, const XMLSize_t limit, const int
noClosures,
+ const unsigned int options);
bool nextCh(XMLInt32& ch, XMLSize_t& offset, const short
direction);
bool fAdoptMatch;
@@ -194,6 +207,7 @@
int* fOffsets;
Match* fMatch;
const XMLCh* fString;
+ unsigned int fOptions;
MemoryManager* fMemoryManager;
};
@@ -208,65 +222,53 @@
// -----------------------------------------------------------------------
void prepare();
int parseOptions(const XMLCh* const options);
- wordType getWordType(const XMLCh* const target, const XMLSize_t begin,
- const XMLSize_t end, const XMLSize_t offset);
- wordType getCharType(const XMLCh ch);
- wordType getPreviousWordType(const XMLCh* const target,
- const XMLSize_t start, const XMLSize_t
end,
- XMLSize_t offset);
+ wordType getWordType(Context* const context, const XMLCh* const target,
+ const XMLSize_t begin, const XMLSize_t end,
+ const XMLSize_t offset) const;
+ wordType getCharType(Context* const context, const XMLCh ch) const;
+ wordType getPreviousWordType(Context* const context, const XMLCh* const
target,
+ const XMLSize_t start, const XMLSize_t end,
+ XMLSize_t offset) const;
/**
* Matching helpers
*/
int match(Context* const context, const Op* const operations, XMLSize_t
offset,
- const short direction);
- bool matchIgnoreCase(const XMLInt32 ch1, const XMLInt32 ch2);
+ const short direction) const;
+ bool matchIgnoreCase(const XMLInt32 ch1, const XMLInt32 ch2) const;
/**
* Helper methods used by match(Context* ...)
*/
bool matchChar(Context* const context, const XMLInt32 ch, XMLSize_t&
offset,
- const short direction, const bool ignoreCase);
- bool matchDot(Context* const context, XMLSize_t& offset, const short
direction);
+ const short direction, const bool ignoreCase) const;
+ bool matchDot(Context* const context, XMLSize_t& offset, const short
direction) const;
bool matchRange(Context* const context, const Op* const op,
- XMLSize_t& offset, const short direction, const bool
ignoreCase);
+ XMLSize_t& offset, const short direction, const bool
ignoreCase) const;
bool matchAnchor(Context* const context, const XMLInt32 ch,
- const XMLSize_t offset);
+ const XMLSize_t offset) const;
bool matchBackReference(Context* const context, const XMLInt32 ch,
XMLSize_t& offset, const short direction,
- const bool ignoreCase);
+ const bool ignoreCase) const;
bool matchString(Context* const context, const XMLCh* const literal,
- XMLSize_t& offset, const short direction, const bool
ignoreCase);
+ XMLSize_t& offset, const short direction, const bool
ignoreCase) const;
int matchUnion(Context* const context, const Op* const op, XMLSize_t
offset,
- const short direction);
+ const short direction) const;
int matchCapture(Context* const context, const Op* const op, XMLSize_t
offset,
- const short direction);
+ const short direction) const;
bool matchCondition(Context* const context, const Op* const op, XMLSize_t
offset,
- const short direction);
+ const short direction) const;
int matchModifier(Context* const context, const Op* const op, XMLSize_t
offset,
- const short direction);
+ const short direction) const;
/**
- * Tokenize helper
- *
- * This overloaded tokenize is for internal use only. It provides a way
to
- * keep track of the sub-expressions in each match of the pattern.
- *
- * It is called by the other tokenize methods, and by the replace
method.
- * The caller is responsible for the deletion of the returned
- * RefArrayVectorOf<XMLCh*>
- */
- RefArrayVectorOf<XMLCh> *tokenize(const XMLCh* const matchString,
- const XMLSize_t start, const XMLSize_t
end,
- RefVectorOf<Match> *subEx);
- /**
* Replace helpers
- *
- * Note: the caller owns the XMLCh* that is returned
*/
- const XMLCh *subInExp(const XMLCh* const repString,
- const XMLCh* const origString,
- const Match* subEx);
+ void subInExp(const XMLCh* const repString,
+ const XMLCh* const origString,
+ const Match* subEx,
+ XMLBuffer &result,
+ MemoryManager* const manager) const;
/**
* Converts a token tree into an operation tree
*/
@@ -302,10 +304,10 @@
XMLSize_t fMinLength;
unsigned int fNoClosures;
unsigned int fOptions;
- BMPattern* fBMPattern;
+ const BMPattern* fBMPattern;
XMLCh* fPattern;
XMLCh* fFixedString;
- Op* fOperations;
+ const Op* fOperations;
Token* fTokenTree;
RangeToken* fFirstChar;
static RangeToken* fWordRange;
@@ -574,40 +576,42 @@
inline int RegularExpression::matchModifier(Context* const context,
const Op* const op, XMLSize_t
offset,
- const short direction)
+ const short direction) const
{
int saveOptions = fOptions;
- fOptions |= (int) op->getData();
- fOptions &= (int) ~op->getData2();
+ context->fOptions |= (int) op->getData();
+ context->fOptions &= (int) ~op->getData2();
int ret = match(context, op->getChild(), offset, direction);
- fOptions = saveOptions;
+ context->fOptions = saveOptions;
return ret;
}
- inline RegularExpression::wordType RegularExpression::getWordType(const
XMLCh* const target
+ inline RegularExpression::wordType RegularExpression::getWordType(Context*
const context
+ , const
XMLCh* const target
, const
XMLSize_t begin
, const
XMLSize_t end
- , const
XMLSize_t offset)
+ , const
XMLSize_t offset) const
{
if (offset < begin || offset >= end)
return wordTypeOther;
- return getCharType(target[offset]);
+ return getCharType(context, target[offset]);
}
inline
- RegularExpression::wordType RegularExpression::getPreviousWordType(const
XMLCh* const target
+ RegularExpression::wordType RegularExpression::getPreviousWordType(Context*
const context
+ , const
XMLCh* const target
, const
XMLSize_t start
, const
XMLSize_t end
- ,
XMLSize_t offset)
+ ,
XMLSize_t offset) const
{
- wordType ret = getWordType(target, start, end, --offset);
+ wordType ret = getWordType(context, target, start, end, --offset);
while (ret == wordTypeIgnore) {
- ret = getWordType(target, start, end, --offset);
+ ret = getWordType(context, target, start, end, --offset);
}
return ret;
---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]