- Revision
- 106417
- Author
- msab...@apple.com
- Date
- 2012-01-31 17:49:15 -0800 (Tue, 31 Jan 2012)
Log Message
StringProtoFuncToUpperCase should call StringImpl::upper similar to StringProtoToLowerCase
https://bugs.webkit.org/show_bug.cgi?id=76647
Reviewed by Darin Adler.
Changed stringProtoFuncToUpperCase to call StringImpl::upper() in a manor similar
to stringProtoFuncToLowerCase(). Fixed StringImpl::upper() to handle to special
cases. One case is s-sharp (0xdf) which converts to "SS". The other case is
for characters which become 16 bit values when converted to upper case. For
those, we up convert the the source string and use the 16 bit path.
* runtime/StringPrototype.cpp:
(JSC::stringProtoFuncToUpperCase):
* wtf/text/StringImpl.cpp:
(WTF::StringImpl::upper):
* wtf/unicode/CharacterNames.h:
(smallLetterSharpS): New constant
Modified Paths
Diff
Modified: trunk/Source/_javascript_Core/ChangeLog (106416 => 106417)
--- trunk/Source/_javascript_Core/ChangeLog 2012-02-01 01:41:35 UTC (rev 106416)
+++ trunk/Source/_javascript_Core/ChangeLog 2012-02-01 01:49:15 UTC (rev 106417)
@@ -1,3 +1,23 @@
+2012-01-31 Michael Saboff <msab...@apple.com>
+
+ StringProtoFuncToUpperCase should call StringImpl::upper similar to StringProtoToLowerCase
+ https://bugs.webkit.org/show_bug.cgi?id=76647
+
+ Reviewed by Darin Adler.
+
+ Changed stringProtoFuncToUpperCase to call StringImpl::upper() in a manor similar
+ to stringProtoFuncToLowerCase(). Fixed StringImpl::upper() to handle to special
+ cases. One case is s-sharp (0xdf) which converts to "SS". The other case is
+ for characters which become 16 bit values when converted to upper case. For
+ those, we up convert the the source string and use the 16 bit path.
+
+ * runtime/StringPrototype.cpp:
+ (JSC::stringProtoFuncToUpperCase):
+ * wtf/text/StringImpl.cpp:
+ (WTF::StringImpl::upper):
+ * wtf/unicode/CharacterNames.h:
+ (smallLetterSharpS): New constant
+
2012-01-31 Oliver Hunt <oli...@apple.com>
Remove unneeded sourceId property
Modified: trunk/Source/_javascript_Core/runtime/StringPrototype.cpp (106416 => 106417)
--- trunk/Source/_javascript_Core/runtime/StringPrototype.cpp 2012-02-01 01:41:35 UTC (rev 106416)
+++ trunk/Source/_javascript_Core/runtime/StringPrototype.cpp 2012-02-01 01:49:15 UTC (rev 106417)
@@ -1184,9 +1184,9 @@
if (!sSize)
return JSValue::encode(sVal);
- StringImpl* ourImpl = s.impl();
+ StringImpl* ourImpl = s.impl();
RefPtr<StringImpl> lower = ourImpl->lower();
- if (ourImpl == lower.get())
+ if (ourImpl == lower)
return JSValue::encode(sVal);
return JSValue::encode(jsString(exec, UString(lower.release())));
}
@@ -1203,32 +1203,11 @@
if (!sSize)
return JSValue::encode(sVal);
- const UChar* sData = s.characters();
- Vector<UChar> buffer(sSize);
-
- UChar ored = 0;
- for (int i = 0; i < sSize; i++) {
- UChar c = sData[i];
- ored |= c;
- buffer[i] = toASCIIUpper(c);
- }
- if (!(ored & ~0x7f))
- return JSValue::encode(jsString(exec, UString::adopt(buffer)));
-
- bool error;
- int length = Unicode::toUpper(buffer.data(), sSize, sData, sSize, &error);
- if (error) {
- buffer.resize(length);
- length = Unicode::toUpper(buffer.data(), length, sData, sSize, &error);
- if (error)
- return JSValue::encode(sVal);
- }
- if (length == sSize) {
- if (memcmp(buffer.data(), sData, length * sizeof(UChar)) == 0)
- return JSValue::encode(sVal);
- } else
- buffer.resize(length);
- return JSValue::encode(jsString(exec, UString::adopt(buffer)));
+ StringImpl* sImpl = s.impl();
+ RefPtr<StringImpl> upper = sImpl->upper();
+ if (sImpl == upper)
+ return JSValue::encode(sVal);
+ return JSValue::encode(jsString(exec, UString(upper.release())));
}
EncodedJSValue JSC_HOST_CALL stringProtoFuncLocaleCompare(ExecState* exec)
Modified: trunk/Source/_javascript_Core/wtf/text/StringImpl.cpp (106416 => 106417)
--- trunk/Source/_javascript_Core/wtf/text/StringImpl.cpp 2012-02-01 01:41:35 UTC (rev 106416)
+++ trunk/Source/_javascript_Core/wtf/text/StringImpl.cpp 2012-02-01 01:49:15 UTC (rev 106417)
@@ -30,7 +30,9 @@
#include "StringHash.h"
#include <wtf/StdLibExtras.h>
#include <wtf/WTFThreadData.h>
+#include <wtf/unicode/CharacterNames.h>
+
using namespace std;
namespace WTF {
@@ -378,19 +380,53 @@
return newImpl.release();
// Do a slower implementation for cases that include non-ASCII Latin-1 characters.
- for (int32_t i = 0; i < length; i++)
- data8[i] = static_cast<LChar>(Unicode::toUpper(m_data8[i]));
+ int numberSharpSCharacters = 0;
+ // There are two special cases.
+ // 1. latin-1 characters when converted to upper case are 16 bit characters.
+ // 2. Lower case sharp-S converts to "SS" (two characters)
+ for (int32_t i = 0; i < length; i++) {
+ LChar c = m_data8[i];
+ if (UNLIKELY(c == smallLetterSharpS))
+ numberSharpSCharacters++;
+ UChar upper = Unicode::toUpper(c);
+ if (UNLIKELY(upper > 0xff)) {
+ // Since this upper-cased character does not fit in an 8-bit string, we need to take the 16-bit path.
+ goto upconvert;
+ }
+ data8[i] = static_cast<LChar>(upper);
+ }
+
+ if (!numberSharpSCharacters)
+ return newImpl.release();
+
+ // We have numberSSCharacters sharp-s characters, but none of the other special characters.
+ newImpl = createUninitialized(m_length + numberSharpSCharacters, data8);
+
+ LChar* dest = data8;
+
+ for (int32_t i = 0; i < length; i++) {
+ LChar c = m_data8[i];
+ if (c == smallLetterSharpS) {
+ *dest++ = 'S';
+ *dest++ = 'S';
+ } else
+ *dest++ = static_cast<LChar>(Unicode::toUpper(c));
+ }
+
return newImpl.release();
}
+upconvert:
+ const UChar* source16 = characters();
+
UChar* data16;
RefPtr<StringImpl> newImpl = createUninitialized(m_length, data16);
// Do a faster loop for the case where all the characters are ASCII.
UChar ored = 0;
for (int i = 0; i < length; i++) {
- UChar c = m_data16[i];
+ UChar c = source16[i];
ored |= c;
data16[i] = toASCIIUpper(c);
}
@@ -400,11 +436,11 @@
// Do a slower implementation for cases that include non-ASCII characters.
bool error;
newImpl = createUninitialized(m_length, data16);
- int32_t realLength = Unicode::toUpper(data16, length, m_data16, m_length, &error);
+ int32_t realLength = Unicode::toUpper(data16, length, source16, m_length, &error);
if (!error && realLength == length)
return newImpl;
newImpl = createUninitialized(realLength, data16);
- Unicode::toUpper(data16, realLength, m_data16, m_length, &error);
+ Unicode::toUpper(data16, realLength, source16, m_length, &error);
if (error)
return this;
return newImpl.release();
Modified: trunk/Source/_javascript_Core/wtf/unicode/CharacterNames.h (106416 => 106417)
--- trunk/Source/_javascript_Core/wtf/unicode/CharacterNames.h 2012-02-01 01:41:35 UTC (rev 106416)
+++ trunk/Source/_javascript_Core/wtf/unicode/CharacterNames.h 2012-02-01 01:49:15 UTC (rev 106417)
@@ -72,6 +72,7 @@
const UChar rightToLeftMark = 0x200F;
const UChar rightToLeftOverride = 0x202E;
const UChar sesameDot = 0xFE45;
+const UChar smallLetterSharpS = 0x00DF;
const UChar softHyphen = 0x00AD;
const UChar space = 0x0020;
const UChar tibetanMarkIntersyllabicTsheg = 0x0F0B;