Skip to site navigation (Press enter)

[webkit-changes] [106417] trunk/Source/JavaScriptCore

msaboff Tue, 31 Jan 2012 17:49:24 -0800

Title: [106417] trunk/Source/_javascript_Core

Revision: 106417
Author: msab...@apple.com
Date: 2012-01-31 17:49:15 -0800 (Tue, 31 Jan 2012)

Log Message

StringProtoFuncToUpperCase should call StringImpl::upper similar to StringProtoToLowerCase
https://bugs.webkit.org/show_bug.cgi?id=76647


Reviewed by Darin Adler.

Changed stringProtoFuncToUpperCase to call StringImpl::upper() in a manor similar
to stringProtoFuncToLowerCase().  Fixed StringImpl::upper() to handle to special
cases.  One case is s-sharp (0xdf) which converts to "SS".  The other case is 
for characters which become 16 bit values when converted to upper case.  For
those, we up convert the the source string and use the 16 bit path.

* runtime/StringPrototype.cpp:
(JSC::stringProtoFuncToUpperCase):
* wtf/text/StringImpl.cpp:
(WTF::StringImpl::upper):
* wtf/unicode/CharacterNames.h:
(smallLetterSharpS): New constant

Modified Paths

trunk/Source/_javascript_Core/ChangeLog
trunk/Source/_javascript_Core/runtime/StringPrototype.cpp
trunk/Source/_javascript_Core/wtf/text/StringImpl.cpp
trunk/Source/_javascript_Core/wtf/unicode/CharacterNames.h

Diff

Modified: trunk/Source/_javascript_Core/ChangeLog (106416 => 106417)


--- trunk/Source/_javascript_Core/ChangeLog	2012-02-01 01:41:35 UTC (rev 106416)
+++ trunk/Source/_javascript_Core/ChangeLog	2012-02-01 01:49:15 UTC (rev 106417)
@@ -1,3 +1,23 @@
+2012-01-31  Michael Saboff  <msab...@apple.com>
+
+        StringProtoFuncToUpperCase should call StringImpl::upper similar to StringProtoToLowerCase
+        https://bugs.webkit.org/show_bug.cgi?id=76647
+
+        Reviewed by Darin Adler.
+
+        Changed stringProtoFuncToUpperCase to call StringImpl::upper() in a manor similar
+        to stringProtoFuncToLowerCase().  Fixed StringImpl::upper() to handle to special
+        cases.  One case is s-sharp (0xdf) which converts to "SS".  The other case is 
+        for characters which become 16 bit values when converted to upper case.  For
+        those, we up convert the the source string and use the 16 bit path.
+
+        * runtime/StringPrototype.cpp:
+        (JSC::stringProtoFuncToUpperCase):
+        * wtf/text/StringImpl.cpp:
+        (WTF::StringImpl::upper):
+        * wtf/unicode/CharacterNames.h:
+        (smallLetterSharpS): New constant
+
 2012-01-31  Oliver Hunt  <oli...@apple.com>
 
         Remove unneeded sourceId property

Modified: trunk/Source/_javascript_Core/runtime/StringPrototype.cpp (106416 => 106417)


--- trunk/Source/_javascript_Core/runtime/StringPrototype.cpp	2012-02-01 01:41:35 UTC (rev 106416)
+++ trunk/Source/_javascript_Core/runtime/StringPrototype.cpp	2012-02-01 01:49:15 UTC (rev 106417)
@@ -1184,9 +1184,9 @@
     if (!sSize)
         return JSValue::encode(sVal);
 
-    StringImpl* ourImpl = s.impl();   
+    StringImpl* ourImpl = s.impl();
     RefPtr<StringImpl> lower = ourImpl->lower();
-    if (ourImpl == lower.get())
+    if (ourImpl == lower)
         return JSValue::encode(sVal);
     return JSValue::encode(jsString(exec, UString(lower.release())));
 }
@@ -1203,32 +1203,11 @@
     if (!sSize)
         return JSValue::encode(sVal);
 
-    const UChar* sData = s.characters();
-    Vector<UChar> buffer(sSize);
-
-    UChar ored = 0;
-    for (int i = 0; i < sSize; i++) {
-        UChar c = sData[i];
-        ored |= c;
-        buffer[i] = toASCIIUpper(c);
-    }
-    if (!(ored & ~0x7f))
-        return JSValue::encode(jsString(exec, UString::adopt(buffer)));
-
-    bool error;
-    int length = Unicode::toUpper(buffer.data(), sSize, sData, sSize, &error);
-    if (error) {
-        buffer.resize(length);
-        length = Unicode::toUpper(buffer.data(), length, sData, sSize, &error);
-        if (error)
-            return JSValue::encode(sVal);
-    }
-    if (length == sSize) {
-        if (memcmp(buffer.data(), sData, length * sizeof(UChar)) == 0)
-            return JSValue::encode(sVal);
-    } else
-        buffer.resize(length);
-    return JSValue::encode(jsString(exec, UString::adopt(buffer)));
+    StringImpl* sImpl = s.impl();
+    RefPtr<StringImpl> upper = sImpl->upper();
+    if (sImpl == upper)
+        return JSValue::encode(sVal);
+    return JSValue::encode(jsString(exec, UString(upper.release())));
 }
 
 EncodedJSValue JSC_HOST_CALL stringProtoFuncLocaleCompare(ExecState* exec)

Modified: trunk/Source/_javascript_Core/wtf/text/StringImpl.cpp (106416 => 106417)


--- trunk/Source/_javascript_Core/wtf/text/StringImpl.cpp	2012-02-01 01:41:35 UTC (rev 106416)
+++ trunk/Source/_javascript_Core/wtf/text/StringImpl.cpp	2012-02-01 01:49:15 UTC (rev 106417)
@@ -30,7 +30,9 @@
 #include "StringHash.h"
 #include <wtf/StdLibExtras.h>
 #include <wtf/WTFThreadData.h>
+#include <wtf/unicode/CharacterNames.h>
 
+
 using namespace std;
 
 namespace WTF {
@@ -378,19 +380,53 @@
             return newImpl.release();
 
         // Do a slower implementation for cases that include non-ASCII Latin-1 characters.
-        for (int32_t i = 0; i < length; i++)
-            data8[i] = static_cast<LChar>(Unicode::toUpper(m_data8[i]));
+        int numberSharpSCharacters = 0;
 
+        // There are two special cases.
+        //  1. latin-1 characters when converted to upper case are 16 bit characters.
+        //  2. Lower case sharp-S converts to "SS" (two characters)
+        for (int32_t i = 0; i < length; i++) {
+            LChar c = m_data8[i];
+            if (UNLIKELY(c == smallLetterSharpS))
+                numberSharpSCharacters++;
+            UChar upper = Unicode::toUpper(c);
+            if (UNLIKELY(upper > 0xff)) {
+                // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
+                goto upconvert;
+            }
+            data8[i] = static_cast<LChar>(upper);
+        }
+
+        if (!numberSharpSCharacters)
+            return newImpl.release();
+
+        // We have numberSSCharacters sharp-s characters, but none of the other special characters.
+        newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
+
+        LChar* dest = data8;
+
+        for (int32_t i = 0; i < length; i++) {
+            LChar c = m_data8[i];
+            if (c == smallLetterSharpS) {
+                *dest++ = 'S';
+                *dest++ = 'S';
+            } else
+                *dest++ = static_cast<LChar>(Unicode::toUpper(c));
+        }
+
         return newImpl.release();
     }
 
+upconvert:
+    const UChar* source16 = characters();
+
     UChar* data16;
     RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
     
     // Do a faster loop for the case where all the characters are ASCII.
     UChar ored = 0;
     for (int i = 0; i < length; i++) {
-        UChar c = m_data16[i];
+        UChar c = source16[i];
         ored |= c;
         data16[i] = toASCIIUpper(c);
     }
@@ -400,11 +436,11 @@
     // Do a slower implementation for cases that include non-ASCII characters.
     bool error;
     newImpl = createUninitialized(m_length, data16);
-    int32_t realLength = Unicode::toUpper(data16, length, m_data16, m_length, &error);
+    int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &error);
     if (!error && realLength == length)
         return newImpl;
     newImpl = createUninitialized(realLength, data16);
-    Unicode::toUpper(data16, realLength, m_data16, m_length, &error);
+    Unicode::toUpper(data16, realLength, source16, m_length, &error);
     if (error)
         return this;
     return newImpl.release();

Modified: trunk/Source/_javascript_Core/wtf/unicode/CharacterNames.h (106416 => 106417)


--- trunk/Source/_javascript_Core/wtf/unicode/CharacterNames.h	2012-02-01 01:41:35 UTC (rev 106416)
+++ trunk/Source/_javascript_Core/wtf/unicode/CharacterNames.h	2012-02-01 01:49:15 UTC (rev 106417)
@@ -72,6 +72,7 @@
 const UChar rightToLeftMark = 0x200F;
 const UChar rightToLeftOverride = 0x202E;
 const UChar sesameDot = 0xFE45;
+const UChar smallLetterSharpS = 0x00DF;
 const UChar softHyphen = 0x00AD;
 const UChar space = 0x0020;
 const UChar tibetanMarkIntersyllabicTsheg = 0x0F0B;

_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
http://lists.webkit.org/mailman/listinfo.cgi/webkit-changes