knoaman 2004/09/28 17:24:01 Modified: c/src/xercesc/internal XMLReader.cpp XMLReader.hpp Log: Performance: improve src offset calculation. Patch by Anthony O'Dowd. Revision Changes Path 1.25 +45 -8 xml-xerces/c/src/xercesc/internal/XMLReader.cpp Index: XMLReader.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLReader.cpp,v retrieving revision 1.24 retrieving revision 1.25 diff -u -r1.24 -r1.25 --- XMLReader.cpp 8 Sep 2004 13:56:14 -0000 1.24 +++ XMLReader.cpp 29 Sep 2004 00:24:01 -0000 1.25 @@ -279,6 +279,7 @@ { // This represents no data from the source fCharSizeBuf[fCharsAvail] = 0; + fCharOfsBuf[fCharsAvail] = 0; fCharBuf[fCharsAvail++] = chSpace; } } @@ -374,6 +375,7 @@ { // This represents no data from the source fCharSizeBuf[fCharsAvail] = 0; + fCharOfsBuf[fCharsAvail] = 0; fCharBuf[fCharsAvail++] = chSpace; } } @@ -401,11 +403,15 @@ // Take the current source offset and add in the sizes that we've // eaten from the source so far. // - unsigned int offset = fSrcOfsBase; - for (unsigned int index = 0; index < fCharIndex; index++) - offset += fCharSizeBuf[index]; + if( fCharIndex == 0 ) { + return fSrcOfsBase; + } + + if( fCharIndex < fCharsAvail ) { + return (fSrcOfsBase + fCharOfsBuf[fCharIndex]); + } - return offset; + return (fSrcOfsBase + fCharOfsBuf[fCharIndex-1] + fCharSizeBuf[fCharIndex-1]); } @@ -536,6 +542,17 @@ fCharIndex++; } } + // If there's a utf-8 BOM (0xEF 0xBB 0xBF), skip past it. + else { + const char* asChars = (const char*)fRawByteBuf; + if ((fRawBytesAvail > XMLRecognizer::fgUTF8BOMLen )&& + (XMLString::compareNString( asChars + , XMLRecognizer::fgUTF8BOM + , XMLRecognizer::fgUTF8BOMLen) == 0) && !startInd) + { + fCharIndex += XMLRecognizer::fgUTF8BOMLen; + } + } } } @@ -547,6 +564,15 @@ if (!fCharsAvail) fNoMore = true; + // Calculate fCharOfsBuf using the elements from fCharBufSize + if (fCalculateSrcOfs) + { + fCharOfsBuf[0] = 0; + for (unsigned int index = 1; index < fCharsAvail; ++index) { + fCharOfsBuf[index] = fCharOfsBuf[index-1]+fCharSizeBuf[index-1]; + } + } + return (fCharsAvail != 0); } @@ -1263,11 +1289,13 @@ if (fRawBytesAvail < 2) break; + unsigned int postBOMIndex = 0; const UTF16Ch* asUTF16 = (const UTF16Ch*)&fRawByteBuf[fRawBufIndex]; if ((*asUTF16 == chUnicodeMarker) || (*asUTF16 == chSwappedUnicodeMarker)) { fRawBufIndex += sizeof(UTF16Ch); asUTF16++; + postBOMIndex = fRawBufIndex; } // First check that there are enough raw bytes for there to even @@ -1275,7 +1303,7 @@ // if (fRawBytesAvail - fRawBufIndex < XMLRecognizer::fgUTF16PreLen) { - fRawBufIndex = 0; + fRawBufIndex = postBOMIndex; break; } @@ -1287,7 +1315,7 @@ { if (memcmp(asUTF16, XMLRecognizer::fgUTF16BPre, XMLRecognizer::fgUTF16PreLen)) { - fRawBufIndex = 0; + fRawBufIndex = postBOMIndex; break; } } @@ -1295,7 +1323,7 @@ { if (memcmp(asUTF16, XMLRecognizer::fgUTF16LPre, XMLRecognizer::fgUTF16PreLen)) { - fRawBufIndex = 0; + fRawBufIndex = postBOMIndex; break; } } @@ -1372,6 +1400,15 @@ // if ((fType == Type_PE) && (fRefFrom == RefFrom_NonLiteral)) fCharBuf[fCharsAvail++] = chSpace; + + // Calculate fCharOfsBuf buffer using the elements from fCharBufSize + if (fCalculateSrcOfs) + { + fCharOfsBuf[0] = 0; + for (unsigned int index = 1; index < fCharsAvail; ++index) { + fCharOfsBuf[index] = fCharOfsBuf[index-1]+fCharSizeBuf[index-1]; + } + } } 1.20 +9 -0 xml-xerces/c/src/xercesc/internal/XMLReader.hpp Index: XMLReader.hpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/internal/XMLReader.hpp,v retrieving revision 1.19 retrieving revision 1.20 diff -u -r1.19 -r1.20 --- XMLReader.hpp 8 Sep 2004 13:56:14 -0000 1.19 +++ XMLReader.hpp 29 Sep 2004 00:24:01 -0000 1.20 @@ -16,6 +16,9 @@ /* * $Log$ + * Revision 1.20 2004/09/29 00:24:01 knoaman + * Performance: improve src offset calculation. Patch by Anthony O'Dowd. + * * Revision 1.19 2004/09/08 13:56:14 peiyongz * Apache License Version 2.0 * @@ -424,6 +427,11 @@ // to make the internalized char fCharBuf[x]. This only contains // useful data if fSrcOfsSupported is true. // + // fCharOfsBuf + // This buffer is an array that contains the offset in the + // fRawByteBuf buffer of each char in the fCharBuf buffer. It + // only contains useful data if fSrcOfsSupported is true. + // // fCurCol // fCurLine // The current line and column that we are in within this reader's @@ -548,6 +556,7 @@ XMLCh fCharBuf[kCharBufSize]; unsigned int fCharsAvail; unsigned char fCharSizeBuf[kCharBufSize]; + unsigned int fCharOfsBuf[kCharBufSize]; XMLSSize_t fCurCol; XMLSSize_t fCurLine; XMLRecognizer::Encodings fEncoding;
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]