Diff
Modified: trunk/Source/WebCore/ChangeLog (91024 => 91025)
--- trunk/Source/WebCore/ChangeLog 2011-07-14 19:51:40 UTC (rev 91024)
+++ trunk/Source/WebCore/ChangeLog 2011-07-14 20:08:08 UTC (rev 91025)
@@ -1,3 +1,23 @@
+2011-07-14 Jeffrey Pfau <jp...@apple.com>
+
+ Character reference parser for new XML parser
+ https://bugs.webkit.org/show_bug.cgi?id=64398
+
+ Reviewed by Adam Barth.
+
+ Refactored out the HTML entity parser and added a common template for parsing character references.
+
+ * WebCore.xcodeproj/project.pbxproj:
+ * html/parser/HTMLEntityParser.cpp:
+ (WebCore::consumeHTMLEntity):
+ * xml/parser/CharacterReferenceParserInlineMethods.h: Copied from Source/WebCore/html/parser/HTMLEntityParser.cpp.
+ (WebCore::isHexDigit):
+ (WebCore::unconsumeCharacters):
+ (WebCore::consumeCharacterReference):
+ * xml/parser/XMLCharacterReferenceParser.cpp: Added.
+ (WebCore::consumeXMLCharacterReference):
+ * xml/parser/XMLCharacterReferenceParser.h: Added.
+
2011-07-14 Ilya Tikhonovsky <loi...@chromium.org>
Reviewed by Pavel Feldman.
Modified: trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj (91024 => 91025)
--- trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj 2011-07-14 19:51:40 UTC (rev 91024)
+++ trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj 2011-07-14 20:08:08 UTC (rev 91025)
@@ -55,6 +55,9 @@
00B9318C13BA8DCC0035A948 /* XMLDocumentParserScope.h in Headers */ = {isa = PBXBuildFile; fileRef = 00B9318613BA867F0035A948 /* XMLDocumentParserScope.h */; };
00CA93B213C6691600F7FE95 /* NewXMLDocumentParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 00CA93B113C6691600F7FE95 /* NewXMLDocumentParser.h */; };
00CA93B513C6697C00F7FE95 /* NewXMLDocumentParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 00CA93B413C6697C00F7FE95 /* NewXMLDocumentParser.cpp */; };
+ 00D0464A13C4D14500326FCC /* XMLCharacterReferenceParser.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 00D0464813C4D14500326FCC /* XMLCharacterReferenceParser.cpp */; };
+ 00D0464B13C4D14500326FCC /* XMLCharacterReferenceParser.h in Headers */ = {isa = PBXBuildFile; fileRef = 00D0464913C4D14500326FCC /* XMLCharacterReferenceParser.h */; };
+ 00022E6913CE1BBA00282D5B /* CharacterReferenceParserInlineMethods.h in Headers */ = {isa = PBXBuildFile; fileRef = 00022E6813CE1BBA00282D5B /* CharacterReferenceParserInlineMethods.h */; };
052BFCE9128ABF1500FD338D /* GeolocationClientMock.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 052BFCE8128ABF1500FD338D /* GeolocationClientMock.cpp */; };
052BFCEB128ABF2100FD338D /* GeolocationClientMock.h in Headers */ = {isa = PBXBuildFile; fileRef = 052BFCEA128ABF2100FD338D /* GeolocationClientMock.h */; settings = {ATTRIBUTES = (Private, ); }; };
05FD69E012845D4300B2BEB3 /* DOMTimeStamp.h in Headers */ = {isa = PBXBuildFile; fileRef = 05FD69DF12845D4300B2BEB3 /* DOMTimeStamp.h */; settings = {ATTRIBUTES = (Private, ); }; };
@@ -6411,6 +6414,9 @@
00B9318613BA867F0035A948 /* XMLDocumentParserScope.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = XMLDocumentParserScope.h; sourceTree = "<group>"; };
00CA93B113C6691600F7FE95 /* NewXMLDocumentParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = NewXMLDocumentParser.h; sourceTree = "<group>"; };
00CA93B413C6697C00F7FE95 /* NewXMLDocumentParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = NewXMLDocumentParser.cpp; sourceTree = "<group>"; };
+ 00D0464813C4D14500326FCC /* XMLCharacterReferenceParser.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = XMLCharacterReferenceParser.cpp; sourceTree = "<group>"; };
+ 00D0464913C4D14500326FCC /* XMLCharacterReferenceParser.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = XMLCharacterReferenceParser.h; sourceTree = "<group>"; };
+ 00022E6813CE1BBA00282D5B /* CharacterReferenceParserInlineMethods.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = CharacterReferenceParserInlineMethods.h; sourceTree = "<group>"; };
052BFCE8128ABF1500FD338D /* GeolocationClientMock.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; name = GeolocationClientMock.cpp; path = mock/GeolocationClientMock.cpp; sourceTree = "<group>"; };
052BFCEA128ABF2100FD338D /* GeolocationClientMock.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = GeolocationClientMock.h; path = mock/GeolocationClientMock.h; sourceTree = "<group>"; };
05FD69DF12845D4300B2BEB3 /* DOMTimeStamp.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = DOMTimeStamp.h; sourceTree = "<group>"; };
@@ -12697,8 +12703,11 @@
00B9318013BA867F0035A948 /* parser */ = {
isa = PBXGroup;
children = (
+ 00022E6813CE1BBA00282D5B /* CharacterReferenceParserInlineMethods.h */,
00CA93B413C6697C00F7FE95 /* NewXMLDocumentParser.cpp */,
00CA93B113C6691600F7FE95 /* NewXMLDocumentParser.h */,
+ 00D0464813C4D14500326FCC /* XMLCharacterReferenceParser.cpp */,
+ 00D0464913C4D14500326FCC /* XMLCharacterReferenceParser.h */,
00B9318113BA867F0035A948 /* XMLDocumentParser.cpp */,
00B9318213BA867F0035A948 /* XMLDocumentParser.h */,
00B9318313BA867F0035A948 /* XMLDocumentParserLibxml2.cpp */,
@@ -20278,6 +20287,7 @@
6550B69E099DF0270090D781 /* CDATASection.h in Headers */,
514185EE0CD65F0400763C99 /* ChangeVersionWrapper.h in Headers */,
6550B6A0099DF0270090D781 /* CharacterData.h in Headers */,
+ 00022E6913CE1BBA00282D5B /* CharacterReferenceParserInlineMethods.h in Headers */,
B2C3DA2A0D006C1D00EF6F26 /* CharsetData.h in Headers */,
F55B3DB21251F12D003EF269 /* CheckboxInputType.h in Headers */,
A00B721A11DE6428008AB9FF /* CheckedInt.h in Headers */,
@@ -23048,6 +23058,7 @@
93309E24099E64920056E581 /* WrapContentsInDummySpanCommand.h in Headers */,
9BAF3B2412C1A39800014BF1 /* WritingDirection.h in Headers */,
6565820209D1508D000E61D7 /* XLinkNames.h in Headers */,
+ 00D0464B13C4D14500326FCC /* XMLCharacterReferenceParser.h in Headers */,
00B9318813BA8DBA0035A948 /* XMLDocumentParser.h in Headers */,
00B9318C13BA8DCC0035A948 /* XMLDocumentParserScope.h in Headers */,
59C28046138DC2410079B7E2 /* XMLErrors.h in Headers */,
@@ -25817,6 +25828,7 @@
5112247810CFB8F4008099D7 /* WorkerThreadableWebSocketChannel.cpp in Sources */,
93309E23099E64920056E581 /* WrapContentsInDummySpanCommand.cpp in Sources */,
A833C7CC0A2CF07400D57664 /* XLinkNames.cpp in Sources */,
+ 00D0464A13C4D14500326FCC /* XMLCharacterReferenceParser.cpp in Sources */,
00B9318713BA8DB30035A948 /* XMLDocumentParser.cpp in Sources */,
00B9318913BA8DBC0035A948 /* XMLDocumentParserLibxml2.cpp in Sources */,
00B9318B13BA8DC90035A948 /* XMLDocumentParserScope.cpp in Sources */,
Modified: trunk/Source/WebCore/html/parser/HTMLEntityParser.cpp (91024 => 91025)
--- trunk/Source/WebCore/html/parser/HTMLEntityParser.cpp 2011-07-14 19:51:40 UTC (rev 91024)
+++ trunk/Source/WebCore/html/parser/HTMLEntityParser.cpp 2011-07-14 20:08:08 UTC (rev 91025)
@@ -28,6 +28,7 @@
#include "config.h"
#include "HTMLEntityParser.h"
+#include "CharacterReferenceParserInlineMethods.h"
#include "HTMLEntitySearch.h"
#include "HTMLEntityTable.h"
#include <wtf/Vector.h>
@@ -45,211 +46,104 @@
0x02DC, 0x2122, 0x0161, 0x203A, 0x0153, 0x009D, 0x017E, 0x0178, // 98-9F
};
-inline UChar adjustEntity(UChar32 value)
-{
- if ((value & ~0x1F) != 0x0080)
- return value;
- return windowsLatin1ExtensionArray[value - 0x80];
-}
-
-inline UChar32 legalEntityFor(UChar32 value)
-{
- // FIXME: A number of specific entity values generate parse errors.
- if (value == 0 || value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF))
- return 0xFFFD;
- if (U_IS_BMP(value))
- return adjustEntity(value);
- return value;
-}
-
-inline bool convertToUTF16(UChar32 value, Vector<UChar, 16>& decodedEntity)
-{
- if (U_IS_BMP(value)) {
- UChar character = static_cast<UChar>(value);
- ASSERT(character == value);
- decodedEntity.append(character);
- return true;
- }
- decodedEntity.append(U16_LEAD(value));
- decodedEntity.append(U16_TRAIL(value));
- return true;
-}
-
-inline bool isHexDigit(UChar cc)
-{
- return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F');
-}
-
inline bool isAlphaNumeric(UChar cc)
{
return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z');
}
-void unconsumeCharacters(SegmentedString& source, const Vector<UChar, 10>& consumedCharacters)
-{
- if (consumedCharacters.size() == 1)
- source.push(consumedCharacters[0]);
- else if (consumedCharacters.size() == 2) {
- source.push(consumedCharacters[0]);
- source.push(consumedCharacters[1]);
- } else
- source.prepend(SegmentedString(String(consumedCharacters.data(), consumedCharacters.size())));
-}
+class HTMLEntityParser {
+public:
+ inline static UChar adjustEntity(UChar32 value)
+ {
+ if ((value & ~0x1F) != 0x0080)
+ return value;
+ return windowsLatin1ExtensionArray[value - 0x80];
+ }
-}
+ inline static UChar32 legalEntityFor(UChar32 value)
+ {
+ // FIXME: A number of specific entity values generate parse errors.
+ if (!value || value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF))
+ return 0xFFFD;
+ if (U_IS_BMP(value))
+ return adjustEntity(value);
+ return value;
+ }
-bool consumeHTMLEntity(SegmentedString& source, Vector<UChar, 16>& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter)
-{
- ASSERT(!additionalAllowedCharacter || additionalAllowedCharacter == '"' || additionalAllowedCharacter == '\'' || additionalAllowedCharacter == '>');
- ASSERT(!notEnoughCharacters);
- ASSERT(decodedEntity.isEmpty());
+ inline static bool convertToUTF16(UChar32 value, Vector<UChar, 16>& decodedEntity)
+ {
+ if (U_IS_BMP(value)) {
+ UChar character = static_cast<UChar>(value);
+ ASSERT(character == value);
+ decodedEntity.append(character);
+ return true;
+ }
+ decodedEntity.append(U16_LEAD(value));
+ decodedEntity.append(U16_TRAIL(value));
+ return true;
+ }
- enum EntityState {
- Initial,
- Number,
- MaybeHexLowerCaseX,
- MaybeHexUpperCaseX,
- Hex,
- Decimal,
- Named
- };
- EntityState entityState = Initial;
- UChar32 result = 0;
- Vector<UChar, 10> consumedCharacters;
+ inline static bool acceptMalformed() { return true; }
- while (!source.isEmpty()) {
- UChar cc = *source;
- switch (entityState) {
- case Initial: {
- if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '<' || cc == '&')
- return false;
- if (additionalAllowedCharacter && cc == additionalAllowedCharacter)
- return false;
- if (cc == '#') {
- entityState = Number;
+ inline static bool consumeNamedEntity(SegmentedString& source, Vector<UChar, 16>& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter, UChar& cc)
+ {
+ Vector<UChar, 10> consumedCharacters;
+ HTMLEntitySearch entitySearch;
+ while (!source.isEmpty()) {
+ cc = *source;
+ entitySearch.advance(cc);
+ if (!entitySearch.isEntityPrefix())
break;
- }
- if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) {
- entityState = Named;
- continue;
- }
- return false;
+ consumedCharacters.append(cc);
+ source.advanceAndASSERT(cc);
}
- case Number: {
- if (cc == 'x') {
- entityState = MaybeHexLowerCaseX;
- break;
- }
- if (cc == 'X') {
- entityState = MaybeHexUpperCaseX;
- break;
- }
- if (cc >= '0' && cc <= '9') {
- entityState = Decimal;
- continue;
- }
- source.push('#');
+ notEnoughCharacters = source.isEmpty();
+ if (notEnoughCharacters) {
+ // We can't an entity because there might be a longer entity
+ // that we could match if we had more data.
+ unconsumeCharacters(source, consumedCharacters);
return false;
}
- case MaybeHexLowerCaseX: {
- if (isHexDigit(cc)) {
- entityState = Hex;
- continue;
- }
- source.push('#');
- source.push('x');
+ if (!entitySearch.mostRecentMatch()) {
+ ASSERT(!entitySearch.currentValue());
+ unconsumeCharacters(source, consumedCharacters);
return false;
}
- case MaybeHexUpperCaseX: {
- if (isHexDigit(cc)) {
- entityState = Hex;
- continue;
- }
- source.push('#');
- source.push('X');
- return false;
- }
- case Hex: {
- if (cc >= '0' && cc <= '9')
- result = result * 16 + cc - '0';
- else if (cc >= 'a' && cc <= 'f')
- result = result * 16 + 10 + cc - 'a';
- else if (cc >= 'A' && cc <= 'F')
- result = result * 16 + 10 + cc - 'A';
- else {
- if (cc == ';')
- source.advanceAndASSERT(cc);
- return convertToUTF16(legalEntityFor(result), decodedEntity);
- }
- break;
- }
- case Decimal: {
- if (cc >= '0' && cc <= '9')
- result = result * 10 + cc - '0';
- else {
- if (cc == ';')
- source.advanceAndASSERT(cc);
- return convertToUTF16(legalEntityFor(result), decodedEntity);
- }
- break;
- }
- case Named: {
- HTMLEntitySearch entitySearch;
- while (!source.isEmpty()) {
+ if (entitySearch.mostRecentMatch()->length != entitySearch.currentLength()) {
+ // We've consumed too many characters. We need to walk the
+ // source back to the point at which we had consumed an
+ // actual entity.
+ unconsumeCharacters(source, consumedCharacters);
+ consumedCharacters.clear();
+ const int length = entitySearch.mostRecentMatch()->length;
+ const UChar* reference = entitySearch.mostRecentMatch()->entity;
+ for (int i = 0; i < length; ++i) {
cc = *source;
- entitySearch.advance(cc);
- if (!entitySearch.isEntityPrefix())
- break;
+ ASSERT_UNUSED(reference, cc == *reference++);
consumedCharacters.append(cc);
source.advanceAndASSERT(cc);
+ ASSERT(!source.isEmpty());
}
- notEnoughCharacters = source.isEmpty();
- if (notEnoughCharacters) {
- // We can't an entity because there might be a longer entity
- // that we could match if we had more data.
- unconsumeCharacters(source, consumedCharacters);
- return false;
- }
- if (!entitySearch.mostRecentMatch()) {
- ASSERT(!entitySearch.currentValue());
- unconsumeCharacters(source, consumedCharacters);
- return false;
- }
- if (entitySearch.mostRecentMatch()->length != entitySearch.currentLength()) {
- // We've consumed too many characters. We need to walk the
- // source back to the point at which we had consumed an
- // actual entity.
- unconsumeCharacters(source, consumedCharacters);
- consumedCharacters.clear();
- const int length = entitySearch.mostRecentMatch()->length;
- const UChar* reference = entitySearch.mostRecentMatch()->entity;
- for (int i = 0; i < length; ++i) {
- cc = *source;
- ASSERT_UNUSED(reference, cc == *reference++);
- consumedCharacters.append(cc);
- source.advanceAndASSERT(cc);
- ASSERT(!source.isEmpty());
- }
- cc = *source;
- }
- if (entitySearch.mostRecentMatch()->lastCharacter() == ';'
- || !additionalAllowedCharacter
- || !(isAlphaNumeric(cc) || cc == '=')) {
- return convertToUTF16(entitySearch.mostRecentMatch()->value, decodedEntity);
- }
- unconsumeCharacters(source, consumedCharacters);
- return false;
+ cc = *source;
}
+ if (entitySearch.mostRecentMatch()->lastCharacter() == ';'
+ || !additionalAllowedCharacter
+ || !(isAlphaNumeric(cc) || cc == '=')) {
+ return convertToUTF16(entitySearch.mostRecentMatch()->value, decodedEntity);
}
- consumedCharacters.append(cc);
- source.advanceAndASSERT(cc);
+ unconsumeCharacters(source, consumedCharacters);
+ return false;
}
- ASSERT(source.isEmpty());
- notEnoughCharacters = true;
- unconsumeCharacters(source, consumedCharacters);
- return false;
+};
+
}
+
+bool consumeHTMLEntity(SegmentedString& source, Vector<UChar, 16>& decodedEntity, bool& notEnoughCharacters, UChar additionalAllowedCharacter)
+{
+ return consumeCharacterReference<HTMLEntityParser>(source, decodedEntity, notEnoughCharacters, additionalAllowedCharacter);
+}
+
UChar decodeNamedEntity(const char* name)
{
HTMLEntitySearch search;
Added: trunk/Source/WebCore/xml/parser/CharacterReferenceParserInlineMethods.h (0 => 91025)
--- trunk/Source/WebCore/xml/parser/CharacterReferenceParserInlineMethods.h (rev 0)
+++ trunk/Source/WebCore/xml/parser/CharacterReferenceParserInlineMethods.h 2011-07-14 20:08:08 UTC (rev 91025)
@@ -0,0 +1,169 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef CharacterReferenceParserInlineMethods_h
+#define CharacterReferenceParserInlineMethods_h
+
+#include <wtf/Vector.h>
+
+namespace WebCore {
+
+inline bool isHexDigit(UChar cc)
+{
+ return (cc >= '0' && cc <= '9') || (cc >= 'a' && cc <= 'f') || (cc >= 'A' && cc <= 'F');
+}
+
+inline void unconsumeCharacters(SegmentedString& source, const Vector<UChar, 10>& consumedCharacters)
+{
+ if (consumedCharacters.size() == 1)
+ source.push(consumedCharacters[0]);
+ else if (consumedCharacters.size() == 2) {
+ source.push(consumedCharacters[0]);
+ source.push(consumedCharacters[1]);
+ } else
+ source.prepend(SegmentedString(String(consumedCharacters.data(), consumedCharacters.size())));
+}
+
+template <typename ParserFunctions>
+bool consumeCharacterReference(SegmentedString& source, Vector<UChar, 16>& decodedCharacter, bool& notEnoughCharacters, UChar additionalAllowedCharacter)
+{
+ ASSERT(!additionalAllowedCharacter || additionalAllowedCharacter == '"' || additionalAllowedCharacter == '\'' || additionalAllowedCharacter == '>');
+ ASSERT(!notEnoughCharacters);
+ ASSERT(decodedCharacter.isEmpty());
+
+ enum EntityState {
+ Initial,
+ Number,
+ MaybeHexLowerCaseX,
+ MaybeHexUpperCaseX,
+ Hex,
+ Decimal,
+ Named
+ };
+ EntityState entityState = Initial;
+ UChar32 result = 0;
+ Vector<UChar, 10> consumedCharacters;
+
+ while (!source.isEmpty()) {
+ UChar cc = *source;
+ switch (entityState) {
+ case Initial: {
+ if (cc == '\x09' || cc == '\x0A' || cc == '\x0C' || cc == ' ' || cc == '<' || cc == '&')
+ return false;
+ if (additionalAllowedCharacter && cc == additionalAllowedCharacter)
+ return false;
+ if (cc == '#') {
+ entityState = Number;
+ break;
+ }
+ if ((cc >= 'a' && cc <= 'z') || (cc >= 'A' && cc <= 'Z')) {
+ entityState = Named;
+ continue;
+ }
+ return false;
+ }
+ case Number: {
+ if (cc == 'x') {
+ entityState = MaybeHexLowerCaseX;
+ break;
+ }
+ if (cc == 'X') {
+ entityState = MaybeHexUpperCaseX;
+ break;
+ }
+ if (cc >= '0' && cc <= '9') {
+ entityState = Decimal;
+ continue;
+ }
+ source.push('#');
+ return false;
+ }
+ case MaybeHexLowerCaseX: {
+ if (isHexDigit(cc)) {
+ entityState = Hex;
+ continue;
+ }
+ source.push('#');
+ source.push('x');
+ return false;
+ }
+ case MaybeHexUpperCaseX: {
+ if (isHexDigit(cc)) {
+ entityState = Hex;
+ continue;
+ }
+ source.push('#');
+ source.push('X');
+ return false;
+ }
+ case Hex: {
+ if (cc >= '0' && cc <= '9')
+ result = result * 16 + cc - '0';
+ else if (cc >= 'a' && cc <= 'f')
+ result = result * 16 + 10 + cc - 'a';
+ else if (cc >= 'A' && cc <= 'F')
+ result = result * 16 + 10 + cc - 'A';
+ else if (cc == ';') {
+ source.advanceAndASSERT(cc);
+ return ParserFunctions::convertToUTF16(ParserFunctions::legalEntityFor(result), decodedCharacter);
+ } else if (ParserFunctions::acceptMalformed())
+ return ParserFunctions::convertToUTF16(ParserFunctions::legalEntityFor(result), decodedCharacter);
+ else {
+ unconsumeCharacters(source, consumedCharacters);
+ return false;
+ }
+ break;
+ }
+ case Decimal: {
+ if (cc >= '0' && cc <= '9')
+ result = result * 10 + cc - '0';
+ else if (cc == ';') {
+ source.advanceAndASSERT(cc);
+ return ParserFunctions::convertToUTF16(ParserFunctions::legalEntityFor(result), decodedCharacter);
+ } else if (ParserFunctions::acceptMalformed())
+ return ParserFunctions::convertToUTF16(ParserFunctions::legalEntityFor(result), decodedCharacter);
+ else {
+ unconsumeCharacters(source, consumedCharacters);
+ return false;
+ }
+ break;
+ }
+ case Named: {
+ return ParserFunctions::consumeNamedEntity(source, decodedCharacter, notEnoughCharacters, additionalAllowedCharacter, cc);
+ }
+ }
+ consumedCharacters.append(cc);
+ source.advanceAndASSERT(cc);
+ }
+ ASSERT(source.isEmpty());
+ notEnoughCharacters = true;
+ unconsumeCharacters(source, consumedCharacters);
+ return false;
+}
+
+}
+
+#endif // CharacterReferenceParserInlineMethods_h
Added: trunk/Source/WebCore/xml/parser/XMLCharacterReferenceParser.cpp (0 => 91025)
--- trunk/Source/WebCore/xml/parser/XMLCharacterReferenceParser.cpp (rev 0)
+++ trunk/Source/WebCore/xml/parser/XMLCharacterReferenceParser.cpp 2011-07-14 20:08:08 UTC (rev 91025)
@@ -0,0 +1,78 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2009 Torch Mobile, Inc. http://www.torchmobile.com/
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#include "config.h"
+#include "XMLCharacterReferenceParser.h"
+
+using namespace WTF;
+
+#include "CharacterReferenceParserInlineMethods.h"
+
+namespace WebCore {
+
+namespace {
+
+class XMLCharacterReferenceParser {
+public:
+ inline static UChar32 legalEntityFor(UChar32 value)
+ {
+ // FIXME: A number of specific entity values generate parse errors.
+ if (!value || value > 0x10FFFF || (value >= 0xD800 && value <= 0xDFFF))
+ return 0xFFFD;
+ return value;
+ }
+
+ inline static bool convertToUTF16(UChar32 value, Vector<UChar, 16>& decodedCharacter)
+ {
+ if (U_IS_BMP(value)) {
+ UChar character = static_cast<UChar>(value);
+ ASSERT(character == value);
+ decodedCharacter.append(character);
+ return true;
+ }
+ decodedCharacter.append(U16_LEAD(value));
+ decodedCharacter.append(U16_TRAIL(value));
+ return true;
+ }
+
+ inline static bool acceptMalformed() { return false; }
+
+ inline static bool consumeNamedEntity(SegmentedString&, Vector<UChar, 16>&, bool&, UChar, UChar&)
+ {
+ ASSERT_NOT_REACHED();
+ return false;
+ }
+};
+
+}
+
+bool consumeXMLCharacterReference(SegmentedString& source, Vector<UChar, 16>& decodedCharacter, bool& notEnoughCharacters)
+{
+ return consumeCharacterReference<XMLCharacterReferenceParser>(source, decodedCharacter, notEnoughCharacters, 0);
+}
+
+} // namespace WebCore
Added: trunk/Source/WebCore/xml/parser/XMLCharacterReferenceParser.h (0 => 91025)
--- trunk/Source/WebCore/xml/parser/XMLCharacterReferenceParser.h (rev 0)
+++ trunk/Source/WebCore/xml/parser/XMLCharacterReferenceParser.h 2011-07-14 20:08:08 UTC (rev 91025)
@@ -0,0 +1,38 @@
+/*
+ * Copyright (C) 2008 Apple Inc. All Rights Reserved.
+ * Copyright (C) 2010 Google, Inc. All Rights Reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY APPLE INC. ``AS IS'' AND ANY
+ * EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+ * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL APPLE INC. OR
+ * CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
+ * EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
+ * PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
+ * PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
+ * OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+ * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+ * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+ */
+
+#ifndef XMLCharacterReferenceParser_h
+#define XMLCharacterReferenceParser_h
+
+#include "SegmentedString.h"
+
+namespace WebCore {
+
+bool consumeXMLCharacterReference(SegmentedString&, Vector<UChar, 16>& decodedCharacter, bool& notEnoughCharacters);
+
+}
+
+#endif // XMLCharacterReferenceParser_h