Title: [123008] trunk/Source
Revision
123008
Author
msab...@apple.com
Date
2012-07-18 13:22:43 -0700 (Wed, 18 Jul 2012)

Log Message

Make TextCodecLatin1 handle 8 bit data without converting to UChar's
https://bugs.webkit.org/show_bug.cgi?id=90319

Reviewed by Oliver Hunt.

Source/WebCore: 

Updated codec to create 8 bit strings where possible.
We assume that the incoming stream can all be decoded as 8-bit values.
If we find a 16-bit value, we take the already decoded data and
copy / convert it to a 16-bit buffer and then continue process the rest
of the stream as 16-bits.

No new tests, functionality covered with existing tests.

* platform/text/TextCodecASCIIFastPath.h:
(WebCore::copyASCIIMachineWord):
* platform/text/TextCodecLatin1.cpp:
(WebCore::TextCodecLatin1::decode):

Source/WTF: 

* wtf/text/StringImpl.h:
(StringImpl): Exported LChar variant of adopt().
* wtf/text/WTFString.h:
(WTF::String::createUninitialized): Exported LChar variant.

Modified Paths

Diff

Modified: trunk/Source/WTF/ChangeLog (123007 => 123008)


--- trunk/Source/WTF/ChangeLog	2012-07-18 20:21:34 UTC (rev 123007)
+++ trunk/Source/WTF/ChangeLog	2012-07-18 20:22:43 UTC (rev 123008)
@@ -1,3 +1,15 @@
+2012-07-18  Michael Saboff  <msab...@apple.com>
+
+        Make TextCodecLatin1 handle 8 bit data without converting to UChar's
+        https://bugs.webkit.org/show_bug.cgi?id=90319
+
+        Reviewed by Oliver Hunt.
+
+        * wtf/text/StringImpl.h:
+        (StringImpl): Exported LChar variant of adopt().
+        * wtf/text/WTFString.h:
+        (WTF::String::createUninitialized): Exported LChar variant.
+
 2012-07-18  Rob Buis  <rb...@rim.com>
 
         Alignment crash in MIMESniffer

Modified: trunk/Source/WTF/wtf/text/StringImpl.h (123007 => 123008)


--- trunk/Source/WTF/wtf/text/StringImpl.h	2012-07-18 20:21:34 UTC (rev 123007)
+++ trunk/Source/WTF/wtf/text/StringImpl.h	2012-07-18 20:22:43 UTC (rev 123008)
@@ -282,7 +282,7 @@
         return adoptRef(new StringImpl(rep->m_data16 + offset, length, ownerRep));
     }
 
-    static PassRefPtr<StringImpl> createUninitialized(unsigned length, LChar*& data);
+    WTF_EXPORT_PRIVATE static PassRefPtr<StringImpl> createUninitialized(unsigned length, LChar*& data);
     WTF_EXPORT_PRIVATE static PassRefPtr<StringImpl> createUninitialized(unsigned length, UChar*& data);
     template <typename T> static ALWAYS_INLINE PassRefPtr<StringImpl> tryCreateUninitialized(unsigned length, T*& output)
     {
@@ -336,8 +336,8 @@
         return empty();
     }
 
-    static PassRefPtr<StringImpl> adopt(StringBuffer<LChar>& buffer);
-    WTF_EXPORT_PRIVATE static PassRefPtr<StringImpl> adopt(StringBuffer<UChar>& buffer);
+    WTF_EXPORT_PRIVATE static PassRefPtr<StringImpl> adopt(StringBuffer<UChar>&);
+    WTF_EXPORT_PRIVATE static PassRefPtr<StringImpl> adopt(StringBuffer<LChar>&);
 
 #if PLATFORM(QT) && HAVE(QT5)
     static PassRefPtr<StringImpl> adopt(QStringData*);

Modified: trunk/Source/WTF/wtf/text/WTFString.h (123007 => 123008)


--- trunk/Source/WTF/wtf/text/WTFString.h	2012-07-18 20:21:34 UTC (rev 123007)
+++ trunk/Source/WTF/wtf/text/WTFString.h	2012-07-18 20:22:43 UTC (rev 123008)
@@ -321,6 +321,7 @@
     // into the buffer returned in data before the returned string is used.
     // Failure to do this will have unpredictable results.
     static String createUninitialized(unsigned length, UChar*& data) { return StringImpl::createUninitialized(length, data); }
+    static String createUninitialized(unsigned length, LChar*& data) { return StringImpl::createUninitialized(length, data); }
 
     WTF_EXPORT_PRIVATE void split(const String& separator, Vector<String>& result) const;
     WTF_EXPORT_PRIVATE void split(const String& separator, bool allowEmptyEntries, Vector<String>& result) const;

Modified: trunk/Source/WebCore/ChangeLog (123007 => 123008)


--- trunk/Source/WebCore/ChangeLog	2012-07-18 20:21:34 UTC (rev 123007)
+++ trunk/Source/WebCore/ChangeLog	2012-07-18 20:22:43 UTC (rev 123008)
@@ -1,3 +1,23 @@
+2012-07-18  Michael Saboff  <msab...@apple.com>
+
+        Make TextCodecLatin1 handle 8 bit data without converting to UChar's
+        https://bugs.webkit.org/show_bug.cgi?id=90319
+
+        Reviewed by Oliver Hunt.
+
+        Updated codec to create 8 bit strings where possible.
+        We assume that the incoming stream can all be decoded as 8-bit values.
+        If we find a 16-bit value, we take the already decoded data and
+        copy / convert it to a 16-bit buffer and then continue process the rest
+        of the stream as 16-bits.
+
+        No new tests, functionality covered with existing tests.
+
+        * platform/text/TextCodecASCIIFastPath.h:
+        (WebCore::copyASCIIMachineWord):
+        * platform/text/TextCodecLatin1.cpp:
+        (WebCore::TextCodecLatin1::decode):
+
 2012-07-18  Dimitri Glazkov  <dglaz...@chromium.org>
 
         Fix up old name in RuleSet::addRulesFromSheet

Modified: trunk/Source/WebCore/platform/text/TextCodecASCIIFastPath.h (123007 => 123008)


--- trunk/Source/WebCore/platform/text/TextCodecASCIIFastPath.h	2012-07-18 20:21:34 UTC (rev 123007)
+++ trunk/Source/WebCore/platform/text/TextCodecASCIIFastPath.h	2012-07-18 20:22:43 UTC (rev 123008)
@@ -33,6 +33,11 @@
 
 template<size_t size> struct UCharByteFiller;
 template<> struct UCharByteFiller<4> {
+    static void copy(LChar* destination, const uint8_t* source)
+    {
+        memcpy(destination, source, 4);
+    }
+    
     static void copy(UChar* destination, const uint8_t* source)
     {
         destination[0] = source[0];
@@ -42,6 +47,11 @@
     }
 };
 template<> struct UCharByteFiller<8> {
+    static void copy(LChar* destination, const uint8_t* source)
+    {
+        memcpy(destination, source, 8);
+    }
+
     static void copy(UChar* destination, const uint8_t* source)
     {
         destination[0] = source[0];
@@ -55,6 +65,11 @@
     }
 };
 
+inline void copyASCIIMachineWord(LChar* destination, const uint8_t* source)
+{
+    UCharByteFiller<sizeof(WTF::MachineWord)>::copy(destination, source);
+}
+
 inline void copyASCIIMachineWord(UChar* destination, const uint8_t* source)
 {
     UCharByteFiller<sizeof(WTF::MachineWord)>::copy(destination, source);

Modified: trunk/Source/WebCore/platform/text/TextCodecLatin1.cpp (123007 => 123008)


--- trunk/Source/WebCore/platform/text/TextCodecLatin1.cpp	2012-07-18 20:21:34 UTC (rev 123007)
+++ trunk/Source/WebCore/platform/text/TextCodecLatin1.cpp	2012-07-18 20:22:43 UTC (rev 123008)
@@ -120,13 +120,15 @@
 
 String TextCodecLatin1::decode(const char* bytes, size_t length, bool, bool, bool&)
 {
-    UChar* characters;
+    LChar* characters;
+    if (!length)
+        return emptyString();
     String result = String::createUninitialized(length, characters);
 
     const uint8_t* source = reinterpret_cast<const uint8_t*>(bytes);
     const uint8_t* end = reinterpret_cast<const uint8_t*>(bytes + length);
     const uint8_t* alignedEnd = alignToMachineWord(end);
-    UChar* destination = characters;
+    LChar* destination = characters;
 
     while (source < end) {
         if (isASCII(*source)) {
@@ -149,6 +151,9 @@
             *destination = *source;
         } else {
 useLookupTable:
+            if (table[*source] > 0xff)
+                goto upConvertTo16Bit;
+
             *destination = table[*source];
         }
 
@@ -157,6 +162,54 @@
     }
 
     return result;
+    
+upConvertTo16Bit:
+    UChar* characters16;
+    String result16 = String::createUninitialized(length, characters16);
+
+    UChar* destination16 = characters16;
+
+    // Zero extend and copy already processed 8 bit data
+    LChar* ptr8 = characters;
+    LChar* endPtr8 = destination;
+
+    while (ptr8 < endPtr8)
+        *destination16++ = *ptr8++;
+
+    // Handle the character that triggered the 16 bit path
+    *destination16 = table[*source];
+    ++source;
+    ++destination16;
+
+    while (source < end) {
+        if (isASCII(*source)) {
+            // Fast path for ASCII. Most Latin-1 text will be ASCII.
+            if (isAlignedToMachineWord(source)) {
+                while (source < alignedEnd) {
+                    MachineWord chunk = *reinterpret_cast_ptr<const MachineWord*>(source);
+                    
+                    if (!isAllASCII<LChar>(chunk))
+                        goto useLookupTable16;
+                    
+                    copyASCIIMachineWord(destination16, source);
+                    source += sizeof(MachineWord);
+                    destination16 += sizeof(MachineWord);
+                }
+                
+                if (source == end)
+                    break;
+            }
+            *destination16 = *source;
+        } else {
+useLookupTable16:
+            *destination16 = table[*source];
+        }
+        
+        ++source;
+        ++destination16;
+    }
+    
+    return result16;
 }
 
 static CString encodeComplexWindowsLatin1(const UChar* characters, size_t length, UnencodableHandling handling)
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
http://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to