tng 2002/12/20 14:10:48 Modified: c/src/xercesc/validators/DTD DTDScanner.cpp DTDValidator.cpp c/src/xercesc/validators/schema SchemaValidator.cpp c/src/xercesc/validators/schema/identity XercesXPath.cpp Log: XML 1.1 Revision Changes Path 1.19 +105 -53 xml-xerces/c/src/xercesc/validators/DTD/DTDScanner.cpp Index: DTDScanner.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/validators/DTD/DTDScanner.cpp,v retrieving revision 1.18 retrieving revision 1.19 diff -u -r1.18 -r1.19 --- DTDScanner.cpp 18 Dec 2002 14:17:55 -0000 1.18 +++ DTDScanner.cpp 20 Dec 2002 22:10:47 -0000 1.19 @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.19 2002/12/20 22:10:47 tng + * XML 1.1 + * * Revision 1.18 2002/12/18 14:17:55 gareth * Fix to bug #13438. When you eant a vector that calls delete[] on its members you should use RefArrayVectorOf. * @@ -803,7 +806,7 @@ fReaderMgr->getNextChar(); break; } - else if (XMLReader::isWhitespace(nextCh)) + else if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh)) { // // If advanced callbacks are enabled and we have a doc @@ -915,6 +918,7 @@ bool firstNonWS = false; bool gotLeadingSurrogate = false; bool escaped; + bool charref_expanded = false; while (true) { try @@ -962,6 +966,7 @@ gotLeadingSurrogate = false; continue; } + charref_expanded = true; } // Check for correct surrogate pairs @@ -980,25 +985,29 @@ fScanner->emitError(XMLErrs::Expected2ndSurrogateChar); } // Its got to at least be a valid XML character - else if (!XMLReader::isXMLChar(nextCh)) { - - XMLCh tmpBuf[9]; - XMLString::binToText - ( - nextCh - , tmpBuf - , 8 - , 16 - ); - fScanner->emitError - ( - XMLErrs::InvalidCharacterInAttrValue - , attrName - , tmpBuf - ); + else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh)) { + // if it was a character reference and is control char, then it's ok + if (!(charref_expanded && fReaderMgr->getCurrentReader()->isControlChar(nextCh))) + { + XMLCh tmpBuf[9]; + XMLString::binToText + ( + nextCh + , tmpBuf + , 8 + , 16 + ); + fScanner->emitError + ( + XMLErrs::InvalidCharacterInAttrValue + , attrName + , tmpBuf + ); + } } gotLeadingSurrogate = false; + charref_expanded = false; } // @@ -1029,7 +1038,7 @@ { if (curState == InWhitespace) { - if (!XMLReader::isWhitespace(nextCh)) + if (!fReaderMgr->getCurrentReader()->isWhitespace(nextCh)) { if (firstNonWS) toFill.append(chSpace); @@ -1043,7 +1052,7 @@ } else if (curState == InContent) { - if (XMLReader::isWhitespace(nextCh)) + if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh)) { curState = InWhitespace; continue; @@ -1509,7 +1518,7 @@ fScanner->emitError(XMLErrs::Expected2ndSurrogateChar); } // Its got to at least be a valid XML character - else if (!XMLReader::isXMLChar(nextCh)) { + else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh)) { XMLCh tmpBuf[9]; XMLString::binToText @@ -2147,6 +2156,7 @@ XMLCh nextCh; XMLCh secondCh = 0; bool gotLeadingSurrogate = false; + bool charref_expanded = false; while (true) { // Get the second char if we have one, else get another @@ -2216,6 +2226,8 @@ gotLeadingSurrogate = false; continue; } + charref_expanded = true; + } else { @@ -2265,20 +2277,25 @@ if ((nextCh < 0xDC00) || (nextCh > 0xDFFF)) fScanner->emitError(XMLErrs::Expected2ndSurrogateChar); } - else if (!XMLReader::isXMLChar(nextCh)) + else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh)) { - XMLCh tmpBuf[9]; - XMLString::binToText - ( - nextCh - , tmpBuf - , 8 - , 16 - ); - fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf); - fReaderMgr->skipPastChar(quoteCh); - return false; + // if it was a character reference and is control char, then it's ok + if (!(charref_expanded && fReaderMgr->getCurrentReader()->isControlChar(nextCh))) + { + XMLCh tmpBuf[9]; + XMLString::binToText + ( + nextCh + , tmpBuf + , 8 + , 16 + ); + fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf); + fReaderMgr->skipPastChar(quoteCh); + return false; + } } + charref_expanded = false; gotLeadingSurrogate = false; } @@ -2570,7 +2587,7 @@ } } - else if (XMLReader::isWhitespace(nextCh)) + else if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh)) { // // If we have a doc type handler, and advanced callbacks are @@ -2629,13 +2646,13 @@ return; } else if (!nextCh) - { - return; // nothing left - } - else + { + return; // nothing left + } + else { fReaderMgr->getNextChar(); - if (!XMLReader::isXMLChar(nextCh)) + if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh)) { XMLCh tmpBuf[9]; XMLString::binToText @@ -2647,7 +2664,7 @@ ); fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf); } - else + else { fScanner->emitError(XMLErrs::InvalidDocumentStructure); } @@ -2851,6 +2868,7 @@ // to parse until we hit its end. // unsigned long depth = 1; + bool gotLeadingSurrogate = false; while (true) { const XMLCh nextCh = fReaderMgr->getNextChar(); @@ -2883,17 +2901,50 @@ } } } - else if (!XMLReader::isXMLChar(nextCh)) + // Deal with surrogate pairs + else if ((nextCh >= 0xD800) && (nextCh <= 0xDBFF)) { - XMLCh tmpBuf[9]; - XMLString::binToText - ( - nextCh - , tmpBuf - , 8 - , 16 - ); - fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf); + // Its a leading surrogate. If we already got one, then + // issue an error, else set leading flag to make sure that + // we look for a trailing next time. + if (gotLeadingSurrogate) + fScanner->emitError(XMLErrs::Expected2ndSurrogateChar); + else + gotLeadingSurrogate = true; + } + else + { + // If its a trailing surrogate, make sure that we are + // prepared for that. Else, its just a regular char so make + // sure that we were not expected a trailing surrogate. + if ((nextCh >= 0xDC00) && (nextCh <= 0xDFFF)) + { + // Its trailing, so make sure we were expecting it + if (!gotLeadingSurrogate) + fScanner->emitError(XMLErrs::Unexpected2ndSurrogateChar); + } + else + { + // Its just a char, so make sure we were not expecting a + // trailing surrogate. + if (gotLeadingSurrogate) + fScanner->emitError(XMLErrs::Expected2ndSurrogateChar); + + // Its got to at least be a valid XML character + else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh)) + { + XMLCh tmpBuf[9]; + XMLString::binToText + ( + nextCh + , tmpBuf + , 8 + , 16 + ); + fScanner->emitError(XMLErrs::InvalidCharacter, tmpBuf); + } + } + gotLeadingSurrogate = false; } } } @@ -2968,7 +3019,7 @@ fScanner->getValidator()->emitError(XMLValid::PartialMarkupInPE); } } - else if (XMLReader::isWhitespace(nextCh)) + else if (fReaderMgr->getCurrentReader()->isWhitespace(nextCh)) { // // IF we are doing advanced callbacks and have a doc type @@ -3572,7 +3623,7 @@ fScanner->emitError(XMLErrs::Expected2ndSurrogateChar); } // Its got to at least be a valid XML character - else if (!XMLReader::isXMLChar(nextCh)) { + else if (!fReaderMgr->getCurrentReader()->isXMLChar(nextCh)) { XMLCh tmpBuf[9]; XMLString::binToText @@ -3656,7 +3707,7 @@ // If its not a valid public id char, then report it but keep going // since that's the best recovery scheme. // - if (!XMLReader::isPublicIdChar(nextCh)) + if (!fReaderMgr->getCurrentReader()->isPublicIdChar(nextCh)) { XMLCh tmpBuf[9]; XMLString::binToText @@ -3746,7 +3797,8 @@ } // If its not our supported version, issue an error but continue - if (!XMLString::equals(bbVersion.getRawBuffer(), XMLUni::fgSupportedVersion)) + if (!XMLString::equals(bbVersion.getRawBuffer(), XMLUni::fgVersion1_0) && + !XMLString::equals(bbVersion.getRawBuffer(), XMLUni::fgVersion1_1)) fScanner->emitError(XMLErrs::UnsupportedXMLVersion, bbVersion.getRawBuffer()); } 1.11 +4 -4 xml-xerces/c/src/xercesc/validators/DTD/DTDValidator.cpp Index: DTDValidator.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/validators/DTD/DTDValidator.cpp,v retrieving revision 1.10 retrieving revision 1.11 diff -u -r1.10 -r1.11 --- DTDValidator.cpp 28 Nov 2002 19:20:33 -0000 1.10 +++ DTDValidator.cpp 20 Dec 2002 22:10:47 -0000 1.11 @@ -318,7 +318,7 @@ if (firstNameChar) { // If its not, emit and error but try to keep going - if (!XMLReader::isFirstNameChar(*valPtr)) + if (!getReaderMgr()->getCurrentReader()->isFirstNameChar(*valPtr)) emitError(XMLValid::AttrValNotName, fullName); valPtr++; } @@ -330,7 +330,7 @@ // If we hit a whitespace, its either a break between two // or more values, or an error if we have a single value. // - if (XMLReader::isWhitespace(*valPtr)) + if (getReaderMgr()->getCurrentReader()->isWhitespace(*valPtr)) { if (!multipleValues) { @@ -348,7 +348,7 @@ if (doNamespace && *valPtr == chColon && firstNameChar) getScanner()->emitError(XMLErrs::ColonNotLegalWithNS); - if (!XMLReader::isNameChar(*valPtr)) + if (!getReaderMgr()->getCurrentReader()->isNameChar(*valPtr)) { emitError(XMLValid::AttrValNotName, fullName); return; 1.24 +11 -8 xml-xerces/c/src/xercesc/validators/schema/SchemaValidator.cpp Index: SchemaValidator.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/validators/schema/SchemaValidator.cpp,v retrieving revision 1.23 retrieving revision 1.24 diff -u -r1.23 -r1.24 --- SchemaValidator.cpp 12 Dec 2002 20:53:28 -0000 1.23 +++ SchemaValidator.cpp 20 Dec 2002 22:10:47 -0000 1.24 @@ -1,7 +1,7 @@ /* * The Apache Software License, Version 1.1 * - * Copyright (c) 2001 The Apache Software Foundation. All rights + * Copyright (c) 2001-2002 The Apache Software Foundation. All rights * reserved. * * Redistribution and use in source and binary forms, with or without @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.24 2002/12/20 22:10:47 tng + * XML 1.1 + * * Revision 1.23 2002/12/12 20:53:28 knoaman * Schema Errata E1-15. * @@ -480,7 +483,7 @@ fCurrentDV->validate(value); } } - } + } catch (XMLException& idve) { emitError (XMLValid::DatatypeError, idve.getType(), idve.getMessage()); } @@ -676,9 +679,9 @@ attDefDV->validate(attrValue); } - } + } catch (XMLException& idve) { - emitError (XMLValid::DatatypeError, idve.getType(), idve.getMessage()); + emitError (XMLValid::DatatypeError, idve.getType(), idve.getMessage()); } catch (...) { emitError(XMLValid::GenericError); @@ -1124,14 +1127,14 @@ } else if (fWhiteSpace == DatatypeValidator::REPLACE) { - if (XMLReader::isWhitespace(nextCh)) + if (getReaderMgr()->getCurrentReader()->isWhitespace(nextCh)) nextCh = chSpace; } else // COLLAPSE case { if (curState == InWhitespace) { - if (!XMLReader::isWhitespace(nextCh)) + if (!getReaderMgr()->getCurrentReader()->isWhitespace(nextCh)) { if (firstNonWS) toFill.append(chSpace); @@ -1146,7 +1149,7 @@ } else if (curState == InContent) { - if (XMLReader::isWhitespace(nextCh)) + if (getReaderMgr()->getCurrentReader()->isWhitespace(nextCh)) { curState = InWhitespace; srcPtr++; @@ -1163,7 +1166,7 @@ } srcPtr--; nextCh = *srcPtr; - if (XMLReader::isWhitespace(nextCh)) + if (getReaderMgr()->getCurrentReader()->isWhitespace(nextCh)) fTrailing = true; } 1.5 +9 -6 xml-xerces/c/src/xercesc/validators/schema/identity/XercesXPath.cpp Index: XercesXPath.cpp =================================================================== RCS file: /home/cvs/xml-xerces/c/src/xercesc/validators/schema/identity/XercesXPath.cpp,v retrieving revision 1.4 retrieving revision 1.5 diff -u -r1.4 -r1.5 --- XercesXPath.cpp 4 Dec 2002 18:21:23 -0000 1.4 +++ XercesXPath.cpp 20 Dec 2002 22:10:48 -0000 1.5 @@ -56,6 +56,9 @@ /* * $Log$ + * Revision 1.5 2002/12/20 22:10:48 tng + * XML 1.1 + * * Revision 1.4 2002/12/04 18:21:23 knoaman * Identity constraint fix. * @@ -687,7 +690,7 @@ ch = data[currentOffset]; - while (XMLReader::isWhitespace(ch)) { + while (XMLChar1_0::isWhitespace(ch)) { if (++currentOffset == endOffset) { break; @@ -757,13 +760,13 @@ addToken(tokens, XercesXPath::EXPRTOKEN_PERIOD); starIsMultiplyOperator = true; currentOffset++; - } else if (XMLReader::isWhitespace(ch)) { + } else if (XMLChar1_0::isWhitespace(ch)) { do { if (++currentOffset == endOffset) break; ch = data[currentOffset]; - } while (XMLReader::isWhitespace(ch)); + } while (XMLChar1_0::isWhitespace(ch)); if (currentOffset == endOffset || ch == chPipe) { addToken(tokens, XercesXPath::EXPRTOKEN_PERIOD); @@ -1088,7 +1091,7 @@ // // [39] ExprWhitespace ::= S // - while (XMLReader::isWhitespace(ch)) { + while (XMLChar1_0::isWhitespace(ch)) { if (++currentOffset == endOffset) { break; } @@ -1224,7 +1227,7 @@ XMLCh ch = data[currentOffset]; - if (!XMLReader::isXMLLetter(ch) && ch != chUnderscore) { + if (!XMLChar1_0::isXMLLetter(ch) && ch != chUnderscore) { return currentOffset; } @@ -1232,7 +1235,7 @@ ch = data[currentOffset]; - if (ch == chColon || !XMLReader::isNameChar(ch)) { + if (ch == chColon || !XMLChar1_0::isNameChar(ch)) { break; } }
--------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]