- Revision
- 123008
- Author
- msab...@apple.com
- Date
- 2012-07-18 13:22:43 -0700 (Wed, 18 Jul 2012)
Log Message
Make TextCodecLatin1 handle 8 bit data without converting to UChar's
https://bugs.webkit.org/show_bug.cgi?id=90319
Reviewed by Oliver Hunt.
Source/WebCore:
Updated codec to create 8 bit strings where possible.
We assume that the incoming stream can all be decoded as 8-bit values.
If we find a 16-bit value, we take the already decoded data and
copy / convert it to a 16-bit buffer and then continue process the rest
of the stream as 16-bits.
No new tests, functionality covered with existing tests.
* platform/text/TextCodecASCIIFastPath.h:
(WebCore::copyASCIIMachineWord):
* platform/text/TextCodecLatin1.cpp:
(WebCore::TextCodecLatin1::decode):
Source/WTF:
* wtf/text/StringImpl.h:
(StringImpl): Exported LChar variant of adopt().
* wtf/text/WTFString.h:
(WTF::String::createUninitialized): Exported LChar variant.
Modified Paths
Diff
Modified: trunk/Source/WTF/ChangeLog (123007 => 123008)
--- trunk/Source/WTF/ChangeLog 2012-07-18 20:21:34 UTC (rev 123007)
+++ trunk/Source/WTF/ChangeLog 2012-07-18 20:22:43 UTC (rev 123008)
@@ -1,3 +1,15 @@
+2012-07-18 Michael Saboff <msab...@apple.com>
+
+ Make TextCodecLatin1 handle 8 bit data without converting to UChar's
+ https://bugs.webkit.org/show_bug.cgi?id=90319
+
+ Reviewed by Oliver Hunt.
+
+ * wtf/text/StringImpl.h:
+ (StringImpl): Exported LChar variant of adopt().
+ * wtf/text/WTFString.h:
+ (WTF::String::createUninitialized): Exported LChar variant.
+
2012-07-18 Rob Buis <rb...@rim.com>
Alignment crash in MIMESniffer
Modified: trunk/Source/WTF/wtf/text/StringImpl.h (123007 => 123008)
--- trunk/Source/WTF/wtf/text/StringImpl.h 2012-07-18 20:21:34 UTC (rev 123007)
+++ trunk/Source/WTF/wtf/text/StringImpl.h 2012-07-18 20:22:43 UTC (rev 123008)
@@ -282,7 +282,7 @@
return adoptRef(new StringImpl(rep->m_data16 + offset, length, ownerRep));
}
- static PassRefPtr<StringImpl> createUninitialized(unsigned length, LChar*& data);
+ WTF_EXPORT_PRIVATE static PassRefPtr<StringImpl> createUninitialized(unsigned length, LChar*& data);
WTF_EXPORT_PRIVATE static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data);
template <typename T> static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, T*& output)
{
@@ -336,8 +336,8 @@
return empty();
}
- static PassRefPtr<StringImpl> adopt(StringBuffer<LChar>& buffer);
- WTF_EXPORT_PRIVATE static PassRefPtr<StringImpl> adopt(StringBuffer<UChar>& buffer);
+ WTF_EXPORT_PRIVATE static PassRefPtr<StringImpl> adopt(StringBuffer<UChar>&);
+ WTF_EXPORT_PRIVATE static PassRefPtr<StringImpl> adopt(StringBuffer<LChar>&);
#if PLATFORM(QT) && HAVE(QT5)
static PassRefPtr<StringImpl> adopt(QStringData*);
Modified: trunk/Source/WTF/wtf/text/WTFString.h (123007 => 123008)
--- trunk/Source/WTF/wtf/text/WTFString.h 2012-07-18 20:21:34 UTC (rev 123007)
+++ trunk/Source/WTF/wtf/text/WTFString.h 2012-07-18 20:22:43 UTC (rev 123008)
@@ -321,6 +321,7 @@
// into the buffer returned in data before the returned string is used.
// Failure to do this will have unpredictable results.
static String createUninitialized(unsigned length, UChar*& data) { return StringImpl::createUninitialized(length, data); }
+ static String createUninitialized(unsigned length, LChar*& data) { return StringImpl::createUninitialized(length, data); }
WTF_EXPORT_PRIVATE void split(const String& separator, Vector<String>& result) const;
WTF_EXPORT_PRIVATE void split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const;
Modified: trunk/Source/WebCore/ChangeLog (123007 => 123008)
--- trunk/Source/WebCore/ChangeLog 2012-07-18 20:21:34 UTC (rev 123007)
+++ trunk/Source/WebCore/ChangeLog 2012-07-18 20:22:43 UTC (rev 123008)
@@ -1,3 +1,23 @@
+2012-07-18 Michael Saboff <msab...@apple.com>
+
+ Make TextCodecLatin1 handle 8 bit data without converting to UChar's
+ https://bugs.webkit.org/show_bug.cgi?id=90319
+
+ Reviewed by Oliver Hunt.
+
+ Updated codec to create 8 bit strings where possible.
+ We assume that the incoming stream can all be decoded as 8-bit values.
+ If we find a 16-bit value, we take the already decoded data and
+ copy / convert it to a 16-bit buffer and then continue process the rest
+ of the stream as 16-bits.
+
+ No new tests, functionality covered with existing tests.
+
+ * platform/text/TextCodecASCIIFastPath.h:
+ (WebCore::copyASCIIMachineWord):
+ * platform/text/TextCodecLatin1.cpp:
+ (WebCore::TextCodecLatin1::decode):
+
2012-07-18 Dimitri Glazkov <dglaz...@chromium.org>
Fix up old name in RuleSet::addRulesFromSheet
Modified: trunk/Source/WebCore/platform/text/TextCodecASCIIFastPath.h (123007 => 123008)
--- trunk/Source/WebCore/platform/text/TextCodecASCIIFastPath.h 2012-07-18 20:21:34 UTC (rev 123007)
+++ trunk/Source/WebCore/platform/text/TextCodecASCIIFastPath.h 2012-07-18 20:22:43 UTC (rev 123008)
@@ -33,6 +33,11 @@
template<size_t size> struct UCharByteFiller;
template<> struct UCharByteFiller<4> {
+ static void copy(LChar* destination, const uint8_t* source)
+ {
+ memcpy(destination, source, 4);
+ }
+
static void copy(UChar* destination, const uint8_t* source)
{
destination[0] = source[0];
@@ -42,6 +47,11 @@
}
};
template<> struct UCharByteFiller<8> {
+ static void copy(LChar* destination, const uint8_t* source)
+ {
+ memcpy(destination, source, 8);
+ }
+
static void copy(UChar* destination, const uint8_t* source)
{
destination[0] = source[0];
@@ -55,6 +65,11 @@
}
};
+inline void copyASCIIMachineWord(LChar* destination, const uint8_t* source)
+{
+ UCharByteFiller<sizeof(WTF::MachineWord)>::copy(destination, source);
+}
+
inline void copyASCIIMachineWord(UChar* destination, const uint8_t* source)
{
UCharByteFiller<sizeof(WTF::MachineWord)>::copy(destination, source);
Modified: trunk/Source/WebCore/platform/text/TextCodecLatin1.cpp (123007 => 123008)
--- trunk/Source/WebCore/platform/text/TextCodecLatin1.cpp 2012-07-18 20:21:34 UTC (rev 123007)
+++ trunk/Source/WebCore/platform/text/TextCodecLatin1.cpp 2012-07-18 20:22:43 UTC (rev 123008)
@@ -120,13 +120,15 @@
String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&)
{
- UChar* characters;
+ LChar* characters;
+ if (!length)
+ return emptyString();
String result = String::createUninitialized(length, characters);
const uint8_t* source = reinterpret_cast<const uint8_t*>(bytes);
const uint8_t* end = reinterpret_cast<const uint8_t*>(bytes + length);
const uint8_t* alignedEnd = alignToMachineWord(end);
- UChar* destination = characters;
+ LChar* destination = characters;
while (source < end) {
if (isASCII(*source)) {
@@ -149,6 +151,9 @@
*destination = *source;
} else {
useLookupTable:
+ if (table[*source] > 0xff)
+ goto upConvertTo16Bit;
+
*destination = table[*source];
}
@@ -157,6 +162,54 @@
}
return result;
+
+upConvertTo16Bit:
+ UChar* characters16;
+ String result16 = String::createUninitialized(length, characters16);
+
+ UChar* destination16 = characters16;
+
+ // Zero extend and copy already processed 8 bit data
+ LChar* ptr8 = characters;
+ LChar* endPtr8 = destination;
+
+ while (ptr8 < endPtr8)
+ *destination16++ = *ptr8++;
+
+ // Handle the character that triggered the 16 bit path
+ *destination16 = table[*source];
+ ++source;
+ ++destination16;
+
+ while (source < end) {
+ if (isASCII(*source)) {
+ // Fast path for ASCII. Most Latin-1 text will be ASCII.
+ if (isAlignedToMachineWord(source)) {
+ while (source < alignedEnd) {
+ MachineWord chunk = *reinterpret_cast_ptr<const MachineWord*>(source);
+
+ if (!isAllASCII<LChar>(chunk))
+ goto useLookupTable16;
+
+ copyASCIIMachineWord(destination16, source);
+ source += sizeof(MachineWord);
+ destination16 += sizeof(MachineWord);
+ }
+
+ if (source == end)
+ break;
+ }
+ *destination16 = *source;
+ } else {
+useLookupTable16:
+ *destination16 = table[*source];
+ }
+
+ ++source;
+ ++destination16;
+ }
+
+ return result16;
}
static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length, UnencodableHandling handling)