Diff
Modified: releases/WebKitGTK/webkit-2.24/Source/_javascript_Core/ChangeLog (245454 => 245455)
--- releases/WebKitGTK/webkit-2.24/Source/_javascript_Core/ChangeLog 2019-05-17 11:25:20 UTC (rev 245454)
+++ releases/WebKitGTK/webkit-2.24/Source/_javascript_Core/ChangeLog 2019-05-17 11:25:29 UTC (rev 245455)
@@ -1,3 +1,29 @@
+2019-03-16 Darin Adler <[email protected]>
+
+ Improve normalization code, including moving from unorm.h to unorm2.h
+ https://bugs.webkit.org/show_bug.cgi?id=195330
+
+ Reviewed by Michael Catanzaro.
+
+ * runtime/JSString.h: Move StringViewWithUnderlyingString to StringView.h.
+
+ * runtime/StringPrototype.cpp: Include unorm2.h instead of unorm.h.
+ (JSC::normalizer): Added. Function to create normalizer object given
+ enumeration value indicating which is selected. Simplified because we
+ know the function will not fail and so we don't need error handling code.
+ (JSC::normalize): Changed this function to take a JSString* so we can
+ optimize the case where no normalization is needed. Added an early exit
+ if the string is stored as 8-bit and another if the string is already
+ normalized, using unorm2_isNormalized. Changed error handling to only
+ check cases that can actually fail in practice. Also did other small
+ optimizations like passing VM rather than ExecState.
+ (JSC::stringProtoFuncNormalize): Used smaller enumeration names that are
+ identical to the names used in the API and normalization parlance rather
+ than longer ones that expand the acronyms. Updated to pass JSString* to
+ the normalize function, so we can optimize 8-bit and already-normalized
+ cases, rather than callling the expensive String::upconvertedCharacters
+ function. Use throwVMRangeError.
+
2019-05-07 Yusuke Suzuki <[email protected]>
[JSC] DFG_ASSERT failed in lowInt52
Modified: releases/WebKitGTK/webkit-2.24/Source/_javascript_Core/runtime/JSString.h (245454 => 245455)
--- releases/WebKitGTK/webkit-2.24/Source/_javascript_Core/runtime/JSString.h 2019-05-17 11:25:20 UTC (rev 245454)
+++ releases/WebKitGTK/webkit-2.24/Source/_javascript_Core/runtime/JSString.h 2019-05-17 11:25:29 UTC (rev 245455)
@@ -67,12 +67,6 @@
bool isJSString(JSValue);
JSString* asString(JSValue);
-struct StringViewWithUnderlyingString {
- StringView view;
- String underlyingString;
-};
-
-
// In 64bit architecture, JSString and JSRopeString have the following memory layout to make sizeof(JSString) == 16 and sizeof(JSRopeString) == 32.
// JSString has only one pointer. We use it for String. length() and is8Bit() queries go to StringImpl. In JSRopeString, we reuse the above pointer
// place for the 1st fiber. JSRopeString has three fibers so its size is 48. To keep length and is8Bit flag information in JSRopeString, JSRopeString
Modified: releases/WebKitGTK/webkit-2.24/Source/_javascript_Core/runtime/StringPrototype.cpp (245454 => 245455)
--- releases/WebKitGTK/webkit-2.24/Source/_javascript_Core/runtime/StringPrototype.cpp 2019-05-17 11:25:20 UTC (rev 245454)
+++ releases/WebKitGTK/webkit-2.24/Source/_javascript_Core/runtime/StringPrototype.cpp 2019-05-17 11:25:29 UTC (rev 245455)
@@ -49,7 +49,7 @@
#include "SuperSampler.h"
#include <algorithm>
#include <unicode/uconfig.h>
-#include <unicode/unorm.h>
+#include <unicode/unorm2.h>
#include <unicode/ustring.h>
#include <wtf/ASCIICType.h>
#include <wtf/MathExtras.h>
@@ -1805,58 +1805,84 @@
return JSValue::encode(JSStringIterator::create(exec, exec->jsCallee()->globalObject(vm)->stringIteratorStructure(), string));
}
-enum class NormalizationForm {
- CanonicalComposition,
- CanonicalDecomposition,
- CompatibilityComposition,
- CompatibilityDecomposition
-};
+enum class NormalizationForm { NFC, NFD, NFKC, NFKD };
-static JSValue normalize(ExecState* exec, const UChar* source, size_t sourceLength, NormalizationForm form)
+static constexpr bool normalizationAffects8Bit(NormalizationForm form)
{
- VM& vm = exec->vm();
- auto scope = DECLARE_THROW_SCOPE(vm);
+ switch (form) {
+ case NormalizationForm::NFC:
+ return false;
+ case NormalizationForm::NFD:
+ return true;
+ case NormalizationForm::NFKC:
+ return false;
+ case NormalizationForm::NFKD:
+ return true;
+ }
+ ASSERT_NOT_REACHED();
+ return true;
+}
+static const UNormalizer2* normalizer(NormalizationForm form)
+{
UErrorCode status = U_ZERO_ERROR;
- // unorm2_get*Instance() documentation says: "Returns an unmodifiable singleton instance. Do not delete it."
const UNormalizer2* normalizer = nullptr;
switch (form) {
- case NormalizationForm::CanonicalComposition:
+ case NormalizationForm::NFC:
normalizer = unorm2_getNFCInstance(&status);
break;
- case NormalizationForm::CanonicalDecomposition:
+ case NormalizationForm::NFD:
normalizer = unorm2_getNFDInstance(&status);
break;
- case NormalizationForm::CompatibilityComposition:
+ case NormalizationForm::NFKC:
normalizer = unorm2_getNFKCInstance(&status);
break;
- case NormalizationForm::CompatibilityDecomposition:
+ case NormalizationForm::NFKD:
normalizer = unorm2_getNFKDInstance(&status);
break;
}
+ ASSERT(normalizer);
+ ASSERT(U_SUCCESS(status));
+ return normalizer;
+}
- if (!normalizer || U_FAILURE(status))
- return throwTypeError(exec, scope);
+static JSValue normalize(ExecState* exec, JSString* string, NormalizationForm form)
+{
+ VM& vm = exec->vm();
+ auto scope = DECLARE_THROW_SCOPE(vm);
- int32_t normalizedStringLength = unorm2_normalize(normalizer, source, sourceLength, nullptr, 0, &status);
+ auto viewWithString = string->viewWithUnderlyingString(exec);
+ RETURN_IF_EXCEPTION(scope, { });
- if (U_FAILURE(status) && status != U_BUFFER_OVERFLOW_ERROR) {
- // The behavior is not specified when normalize fails.
- // Now we throw a type error since it seems that the contents of the string are invalid.
- return throwTypeError(exec, scope);
- }
+ StringView view = viewWithString.view;
+ if (view.is8Bit() && (!normalizationAffects8Bit(form) || charactersAreAllASCII(view.characters8(), view.length())))
+ RELEASE_AND_RETURN(scope, string);
- UChar* buffer = nullptr;
- auto impl = StringImpl::tryCreateUninitialized(normalizedStringLength, buffer);
- if (!impl)
+ const UNormalizer2* normalizer = JSC::normalizer(form);
+
+ // Since ICU does not offer functions that can perform normalization or check for
+ // normalization with input that is Latin-1, we need to upconvert to UTF-16 at this point.
+ auto characters = view.upconvertedCharacters();
+
+ UErrorCode status = U_ZERO_ERROR;
+ UBool isNormalized = unorm2_isNormalized(normalizer, characters, view.length(), &status);
+ ASSERT(U_SUCCESS(status));
+ if (isNormalized)
+ RELEASE_AND_RETURN(scope, string);
+
+ int32_t normalizedStringLength = unorm2_normalize(normalizer, characters, view.length(), nullptr, 0, &status);
+ ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
+
+ UChar* buffer;
+ auto result = StringImpl::tryCreateUninitialized(normalizedStringLength, buffer);
+ if (!result)
return throwOutOfMemoryError(exec, scope);
status = U_ZERO_ERROR;
- unorm2_normalize(normalizer, source, sourceLength, buffer, normalizedStringLength, &status);
- if (U_FAILURE(status))
- return throwTypeError(exec, scope);
+ unorm2_normalize(normalizer, characters, view.length(), buffer, normalizedStringLength, &status);
+ ASSERT(U_SUCCESS(status));
- RELEASE_AND_RETURN(scope, jsString(exec, WTFMove(impl)));
+ RELEASE_AND_RETURN(scope, jsString(&vm, WTFMove(result)));
}
EncodedJSValue JSC_HOST_CALL stringProtoFuncNormalize(ExecState* exec)
@@ -1867,29 +1893,28 @@
JSValue thisValue = exec->thisValue();
if (!checkObjectCoercible(thisValue))
return throwVMTypeError(exec, scope);
- auto viewWithString = thisValue.toString(exec)->viewWithUnderlyingString(exec);
- RETURN_IF_EXCEPTION(scope, encodedJSValue());
- StringView view = viewWithString.view;
+ JSString* string = thisValue.toString(exec);
+ RETURN_IF_EXCEPTION(scope, { });
- NormalizationForm form = NormalizationForm::CanonicalComposition;
- // Verify that the argument is provided and is not undefined.
- if (!exec->argument(0).isUndefined()) {
- String formString = exec->uncheckedArgument(0).toWTFString(exec);
- RETURN_IF_EXCEPTION(scope, encodedJSValue());
+ auto form = NormalizationForm::NFC;
+ JSValue formValue = exec->argument(0);
+ if (!formValue.isUndefined()) {
+ String formString = formValue.toWTFString(exec);
+ RETURN_IF_EXCEPTION(scope, { });
if (formString == "NFC")
- form = NormalizationForm::CanonicalComposition;
+ form = NormalizationForm::NFC;
else if (formString == "NFD")
- form = NormalizationForm::CanonicalDecomposition;
+ form = NormalizationForm::NFD;
else if (formString == "NFKC")
- form = NormalizationForm::CompatibilityComposition;
+ form = NormalizationForm::NFKC;
else if (formString == "NFKD")
- form = NormalizationForm::CompatibilityDecomposition;
+ form = NormalizationForm::NFKD;
else
- return throwVMError(exec, scope, createRangeError(exec, "argument does not match any normalization form"_s));
+ return throwVMRangeError(exec, scope, "argument does not match any normalization form"_s);
}
- RELEASE_AND_RETURN(scope, JSValue::encode(normalize(exec, view.upconvertedCharacters(), view.length(), form)));
+ RELEASE_AND_RETURN(scope, JSValue::encode(normalize(exec, string, form)));
}
} // namespace JSC
Modified: releases/WebKitGTK/webkit-2.24/Source/WTF/ChangeLog (245454 => 245455)
--- releases/WebKitGTK/webkit-2.24/Source/WTF/ChangeLog 2019-05-17 11:25:20 UTC (rev 245454)
+++ releases/WebKitGTK/webkit-2.24/Source/WTF/ChangeLog 2019-05-17 11:25:29 UTC (rev 245455)
@@ -1,3 +1,41 @@
+2019-03-16 Darin Adler <[email protected]>
+
+ Improve normalization code, including moving from unorm.h to unorm2.h
+ https://bugs.webkit.org/show_bug.cgi?id=195330
+
+ Reviewed by Michael Catanzaro.
+
+ * wtf/URLHelpers.cpp: Removed unneeded include of unorm.h since the
+ normalization code is now in StringView.cpp.
+ (WTF::URLHelpers::escapeUnsafeCharacters): Renamed from
+ createStringWithEscapedUnsafeCharacters since it now only creates
+ a new string if one is needed. Use unsigned for string lengths, since
+ that's what WTF::String uses, not size_t. Added a first loop so that
+ we can return the string unmodified if no lookalike characters are
+ found. Removed unnecessary round trip from UTF-16 and then back in
+ the case where the character is not a lookalike.
+ (WTF::URLHelpers::toNormalizationFormC): Deleted. Moved this logic
+ into the WTF::normalizedNFC function in StringView.cpp.
+ (WTF::URLHelpers::userVisibleURL): Call escapeUnsafeCharacters and
+ normalizedNFC. The normalizedNFC function is better in multiple ways,
+ but primarily it handles 8-bit strings and other already-normalized
+ strings much more efficiently.
+
+ * wtf/text/StringView.cpp:
+ (WTF::normalizedNFC): Added. This has two overloads. One is for when
+ we already have a String, and want to re-use it if no normalization
+ is needed, and another is when we only have a StringView, and may need
+ to allocate a String to hold the result. Includes a fast special case
+ for 8-bit and already-normalized strings, and uses the same strategy
+ that JSC::normalize was already using: calls unorm2_normalize twice,
+ first just to determine the length.
+
+ * wtf/text/StringView.h: Added normalizedNFC, which can be called with
+ either a StringView or a String. Also moved StringViewWithUnderlyingString
+ here from JSString.h, here for use as the return value of normalizedNFC;
+ it is used for a similar purpose in the _javascript_Core rope implementation.
+ Also removed an inaccurate comment.
+
2019-05-07 Brent Fulgham <[email protected]>
Correct JSON parser to address unterminated escape character
Modified: releases/WebKitGTK/webkit-2.24/Source/WTF/wtf/URLHelpers.cpp (245454 => 245455)
--- releases/WebKitGTK/webkit-2.24/Source/WTF/wtf/URLHelpers.cpp 2019-05-17 11:25:20 UTC (rev 245454)
+++ releases/WebKitGTK/webkit-2.24/Source/WTF/wtf/URLHelpers.cpp 2019-05-17 11:25:29 UTC (rev 245455)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2005, 2007, 2014 Apple Inc. All rights reserved.
+ * Copyright (C) 2005-2019 Apple Inc. All rights reserved.
* Copyright (C) 2018 Igalia S.L.
*
* Redistribution and use in source and binary forms, with or without
@@ -33,7 +33,6 @@
#include "URLParser.h"
#include <mutex>
#include <unicode/uidna.h>
-#include <unicode/unorm.h>
#include <unicode/uscript.h>
namespace WTF {
@@ -735,17 +734,35 @@
return result;
}
-static String createStringWithEscapedUnsafeCharacters(const String& sourceBuffer)
+static String escapeUnsafeCharacters(const String& sourceBuffer)
{
+ unsigned length = sourceBuffer.length();
+
+ Optional<UChar32> previousCodePoint;
+
+ unsigned i;
+ for (i = 0; i < length; ) {
+ UChar32 c = sourceBuffer.characterStartingAt(i);
+ if (isLookalikeCharacter(previousCodePoint, sourceBuffer.characterStartingAt(i)))
+ break;
+ previousCodePoint = c;
+ i += U16_LENGTH(c);
+ }
+
+ if (i == length)
+ return sourceBuffer;
+
Vector<UChar, urlBytesBufferLength> outBuffer;
- const size_t length = sourceBuffer.length();
+ outBuffer.grow(i);
+ if (sourceBuffer.is8Bit())
+ StringImpl::copyCharacters(outBuffer.data(), sourceBuffer.characters8(), i);
+ else
+ StringImpl::copyCharacters(outBuffer.data(), sourceBuffer.characters16(), i);
- Optional<UChar32> previousCodePoint;
- size_t i = 0;
- while (i < length) {
+ for (; i < length; ) {
UChar32 c = sourceBuffer.characterStartingAt(i);
-
+ unsigned characterLength = U16_LENGTH(c);
if (isLookalikeCharacter(previousCodePoint, c)) {
uint8_t utf8Buffer[4];
size_t offset = 0;
@@ -752,7 +769,7 @@
UBool failure = false;
U8_APPEND(utf8Buffer, offset, 4, c, failure)
ASSERT(!failure);
-
+
for (size_t j = 0; j < offset; ++j) {
outBuffer.append('%');
outBuffer.append(upperNibbleToASCIIHexDigit(utf8Buffer[j]));
@@ -759,52 +776,16 @@
outBuffer.append(lowerNibbleToASCIIHexDigit(utf8Buffer[j]));
}
} else {
- UChar utf16Buffer[2];
- size_t offset = 0;
- UBool failure = false;
- U16_APPEND(utf16Buffer, offset, 2, c, failure)
- ASSERT(!failure);
- for (size_t j = 0; j < offset; ++j)
- outBuffer.append(utf16Buffer[j]);
+ for (unsigned j = 0; j < characterLength; ++j)
+ outBuffer.append(sourceBuffer[i + j]);
}
previousCodePoint = c;
- i += U16_LENGTH(c);
+ i += characterLength;
}
+
return String::adopt(WTFMove(outBuffer));
}
-static String toNormalizationFormC(const String& string)
-{
- Vector<UChar> sourceBuffer = string.charactersWithNullTermination();
- ASSERT(sourceBuffer.last() == '\0');
- sourceBuffer.removeLast();
-
- UErrorCode uerror = U_ZERO_ERROR;
- const UNormalizer2* normalizer = unorm2_getNFCInstance(&uerror);
- if (U_FAILURE(uerror))
- return { };
-
- UNormalizationCheckResult checkResult = unorm2_quickCheck(normalizer, sourceBuffer.data(), sourceBuffer.size(), &uerror);
- if (U_FAILURE(uerror))
- return { };
-
- // No need to normalize if already normalized.
- if (checkResult == UNORM_YES)
- return string;
-
- Vector<UChar, urlBytesBufferLength> normalizedCharacters(sourceBuffer.size());
- auto normalizedLength = unorm2_normalize(normalizer, sourceBuffer.data(), sourceBuffer.size(), normalizedCharacters.data(), normalizedCharacters.size(), &uerror);
- if (uerror == U_BUFFER_OVERFLOW_ERROR) {
- uerror = U_ZERO_ERROR;
- normalizedCharacters.resize(normalizedLength);
- normalizedLength = unorm2_normalize(normalizer, sourceBuffer.data(), sourceBuffer.size(), normalizedCharacters.data(), normalizedLength, &uerror);
- }
- if (U_FAILURE(uerror))
- return { };
-
- return String(normalizedCharacters.data(), normalizedLength);
-}
-
String userVisibleURL(const CString& url)
{
auto* before = reinterpret_cast<const unsigned char*>(url.data());
@@ -889,8 +870,7 @@
result = mappedResult;
}
- auto normalized = toNormalizationFormC(result);
- return createStringWithEscapedUnsafeCharacters(normalized);
+ return escapeUnsafeCharacters(normalizedNFC(result));
}
} // namespace URLHelpers
Modified: releases/WebKitGTK/webkit-2.24/Source/WTF/wtf/text/StringView.cpp (245454 => 245455)
--- releases/WebKitGTK/webkit-2.24/Source/WTF/wtf/text/StringView.cpp 2019-05-17 11:25:20 UTC (rev 245454)
+++ releases/WebKitGTK/webkit-2.24/Source/WTF/wtf/text/StringView.cpp 2019-05-17 11:25:29 UTC (rev 245455)
@@ -1,6 +1,6 @@
/*
-Copyright (C) 2014-2017 Apple Inc. All rights reserved.
+Copyright (C) 2014-2019 Apple Inc. All rights reserved.
Redistribution and use in source and binary forms, with or without
modification, are permitted provided that the following conditions
@@ -29,6 +29,7 @@
#include <mutex>
#include <unicode/ubrk.h>
+#include <unicode/unorm2.h>
#include <wtf/HashMap.h>
#include <wtf/Lock.h>
#include <wtf/NeverDestroyed.h>
@@ -240,6 +241,43 @@
return convertASCIICase<ASCIICase::Upper>(static_cast<const UChar*>(m_characters), m_length);
}
+StringViewWithUnderlyingString normalizedNFC(StringView string)
+{
+ // Latin-1 characters are unaffected by normalization.
+ if (string.is8Bit())
+ return { string, { } };
+
+ UErrorCode status = U_ZERO_ERROR;
+ const UNormalizer2* normalizer = unorm2_getNFCInstance(&status);
+ ASSERT(U_SUCCESS(status));
+
+ // No need to normalize if already normalized.
+ UBool checkResult = unorm2_isNormalized(normalizer, string.characters16(), string.length(), &status);
+ if (checkResult)
+ return { string, { } };
+
+ unsigned normalizedLength = unorm2_normalize(normalizer, string.characters16(), string.length(), nullptr, 0, &status);
+ ASSERT(status == U_BUFFER_OVERFLOW_ERROR);
+
+ UChar* characters;
+ String result = String::createUninitialized(normalizedLength, characters);
+
+ status = U_ZERO_ERROR;
+ unorm2_normalize(normalizer, string.characters16(), string.length(), characters, normalizedLength, &status);
+ ASSERT(U_SUCCESS(status));
+
+ StringView view { result };
+ return { view, WTFMove(result) };
+}
+
+String normalizedNFC(const String& string)
+{
+ auto result = normalizedNFC(StringView { string });
+ if (result.underlyingString.isNull())
+ return string;
+ return result.underlyingString;
+}
+
#if CHECK_STRINGVIEW_LIFETIME
// Manage reference count manually so UnderlyingString does not need to be defined in the header.
Modified: releases/WebKitGTK/webkit-2.24/Source/WTF/wtf/text/StringView.h (245454 => 245455)
--- releases/WebKitGTK/webkit-2.24/Source/WTF/wtf/text/StringView.h 2019-05-17 11:25:20 UTC (rev 245454)
+++ releases/WebKitGTK/webkit-2.24/Source/WTF/wtf/text/StringView.h 2019-05-17 11:25:29 UTC (rev 245455)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2014-2017 Apple Inc. All rights reserved.
+ * Copyright (C) 2014-2019 Apple Inc. All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -211,6 +211,16 @@
inline bool operator!=(const LChar*a, StringView b) { return !equal(b, a); }
inline bool operator!=(const char*a, StringView b) { return !equal(b, a); }
+struct StringViewWithUnderlyingString;
+
+// This returns a StringView of the normalized result, and a String that is either
+// null, if the input was already normalized, or contains the normalized result
+// and needs to be kept around so the StringView remains valid. Typically the
+// easiest way to use it correctly is to put it into a local and use the StringView.
+WTF_EXPORT_PRIVATE StringViewWithUnderlyingString normalizedNFC(StringView);
+
+WTF_EXPORT_PRIVATE String normalizedNFC(const String&);
+
}
#include <wtf/text/AtomicString.h>
@@ -218,12 +228,17 @@
namespace WTF {
+struct StringViewWithUnderlyingString {
+ StringView view;
+ String underlyingString;
+};
+
inline StringView::StringView()
{
- // FIXME: It's peculiar that null strings are 16-bit and empty strings return 8-bit (according to the is8Bit function).
}
#if CHECK_STRINGVIEW_LIFETIME
+
inline StringView::~StringView()
{
setUnderlyingString(nullptr);
@@ -280,6 +295,7 @@
return *this;
}
+
#endif // CHECK_STRINGVIEW_LIFETIME
inline void StringView::initialize(const LChar* characters, unsigned length)
@@ -996,3 +1012,4 @@
using WTF::append;
using WTF::equal;
using WTF::StringView;
+using WTF::StringViewWithUnderlyingString;
Modified: releases/WebKitGTK/webkit-2.24/Source/WebCore/ChangeLog (245454 => 245455)
--- releases/WebKitGTK/webkit-2.24/Source/WebCore/ChangeLog 2019-05-17 11:25:20 UTC (rev 245454)
+++ releases/WebKitGTK/webkit-2.24/Source/WebCore/ChangeLog 2019-05-17 11:25:29 UTC (rev 245455)
@@ -1,3 +1,32 @@
+2019-03-16 Darin Adler <[email protected]>
+
+ Improve normalization code, including moving from unorm.h to unorm2.h
+ https://bugs.webkit.org/show_bug.cgi?id=195330
+
+ Reviewed by Michael Catanzaro.
+
+ * editing/TextIterator.cpp: Include unorm2.h.
+ (WebCore::normalizeCharacters): Rewrote to use unorm2_normalize rather than
+ unorm_normalize, but left the logic otherwise the same.
+
+ * platform/graphics/SurrogatePairAwareTextIterator.cpp: Include unorm2.h.
+ (WebCore::SurrogatePairAwareTextIterator::normalizeVoicingMarks):
+ Use unorm2_composePair instead of unorm_normalize.
+
+ * platform/graphics/cairo/FontCairoHarfbuzzNG.cpp:
+ (characterSequenceIsEmoji): Changed to use existing SurrogatePairAwareTextIterator.
+ (FontCascade::fontForCombiningCharacterSequence): Use normalizedNFC instead of
+ calling unorm2_normalize directly.
+
+ * WebCore/platform/graphics/freetype/SimpleFontDataFreeType.cpp:
+ Removed unneeded include of <unicode/normlzr.h>.
+
+ * platform/text/TextEncoding.cpp:
+ (WebCore::TextEncoding::encode const): Use normalizedNFC instead of the
+ code that was here. The normalizedNFC function is better in multiple ways,
+ but primarily it handles 8-bit strings and other already-normalized
+ strings much more efficiently.
+
2019-05-15 Zalan Bujtas <[email protected]>
Do not create a shape object outside of the layout context
Modified: releases/WebKitGTK/webkit-2.24/Source/WebCore/editing/TextIterator.cpp (245454 => 245455)
--- releases/WebKitGTK/webkit-2.24/Source/WebCore/editing/TextIterator.cpp 2019-05-17 11:25:20 UTC (rev 245454)
+++ releases/WebKitGTK/webkit-2.24/Source/WebCore/editing/TextIterator.cpp 2019-05-17 11:25:29 UTC (rev 245455)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2019 Apple Inc. All rights reserved.
* Copyright (C) 2005 Alexey Proskuryakov.
*
* Redistribution and use in source and binary forms, with or without
@@ -60,6 +60,7 @@
#include "TextControlInnerElements.h"
#include "VisiblePosition.h"
#include "VisibleUnits.h"
+#include <unicode/unorm2.h>
#include <wtf/Function.h>
#include <wtf/text/CString.h>
#include <wtf/text/StringBuilder.h>
@@ -71,10 +72,9 @@
#include <wtf/text/TextBreakIteratorInternalICU.h>
#endif
+namespace WebCore {
-namespace WebCore {
using namespace WTF::Unicode;
-
using namespace HTMLNames;
// Buffer that knows how to compare with a search target.
@@ -2014,32 +2014,27 @@
return false;
}
-ALLOW_DEPRECATED_DECLARATIONS_BEGIN
-// NOTE: ICU's unorm_normalize function is deprecated.
-
static void normalizeCharacters(const UChar* characters, unsigned length, Vector<UChar>& buffer)
{
- ASSERT(length);
+ UErrorCode status = U_ZERO_ERROR;
+ const UNormalizer2* normalizer = unorm2_getNFCInstance(&status);
+ ASSERT(U_SUCCESS(status));
buffer.resize(length);
- UErrorCode status = U_ZERO_ERROR;
- size_t bufferSize = unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), length, &status);
- ASSERT(status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING || status == U_BUFFER_OVERFLOW_ERROR);
- ASSERT(bufferSize);
+ auto normalizedLength = unorm2_normalize(normalizer, characters, length, buffer.data(), length, &status);
+ ASSERT(U_SUCCESS(status) || status == U_BUFFER_OVERFLOW_ERROR);
- buffer.resize(bufferSize);
+ buffer.resize(normalizedLength);
- if (status == U_ZERO_ERROR || status == U_STRING_NOT_TERMINATED_WARNING)
+ if (U_SUCCESS(status))
return;
status = U_ZERO_ERROR;
- unorm_normalize(characters, length, UNORM_NFC, 0, buffer.data(), bufferSize, &status);
- ASSERT(status == U_STRING_NOT_TERMINATED_WARNING);
+ unorm2_normalize(normalizer, characters, length, buffer.data(), length, &status);
+ ASSERT(U_SUCCESS(status));
}
-ALLOW_DEPRECATED_DECLARATIONS_END
-
static bool isNonLatin1Separator(UChar32 character)
{
ASSERT_ARG(character, character >= 256);
Modified: releases/WebKitGTK/webkit-2.24/Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.cpp (245454 => 245455)
--- releases/WebKitGTK/webkit-2.24/Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.cpp 2019-05-17 11:25:20 UTC (rev 245454)
+++ releases/WebKitGTK/webkit-2.24/Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.cpp 2019-05-17 11:25:29 UTC (rev 245455)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2003, 2006, 2008, 2009, 2010, 2011 Apple Inc. All rights reserved.
+ * Copyright (C) 2003-2019 Apple Inc. All rights reserved.
* Copyright (C) 2008 Holger Hans Peter Freyther
* Copyright (C) Research In Motion Limited 2011. All rights reserved.
*
@@ -23,7 +23,7 @@
#include "config.h"
#include "SurrogatePairAwareTextIterator.h"
-#include <unicode/unorm.h>
+#include <unicode/unorm2.h>
namespace WebCore {
@@ -69,29 +69,24 @@
return true;
}
-ALLOW_DEPRECATED_DECLARATIONS_BEGIN
-// NOTE: ICU's unorm_normalize function is deprecated.
-
UChar32 SurrogatePairAwareTextIterator::normalizeVoicingMarks()
{
// According to http://www.unicode.org/Public/UNIDATA/UCD.html#Canonical_Combining_Class_Values
- static const uint8_t hiraganaKatakanaVoicingMarksCombiningClass = 8;
+ static constexpr uint8_t hiraganaKatakanaVoicingMarksCombiningClass = 8;
if (m_currentIndex + 1 >= m_endIndex)
return 0;
if (u_getCombiningClass(m_characters[1]) == hiraganaKatakanaVoicingMarksCombiningClass) {
- // Normalize into composed form using 3.2 rules.
- UChar normalizedCharacters[2] = { 0, 0 };
- UErrorCode uStatus = U_ZERO_ERROR;
- int32_t resultLength = unorm_normalize(m_characters, 2, UNORM_NFC, UNORM_UNICODE_3_2, &normalizedCharacters[0], 2, &uStatus);
- if (resultLength == 1 && !uStatus)
- return normalizedCharacters[0];
+ UErrorCode status = U_ZERO_ERROR;
+ const UNormalizer2* normalizer = unorm2_getNFCInstance(&status);
+ ASSERT(U_SUCCESS(status));
+ auto composedCharacter = unorm2_composePair(normalizer, m_characters[0], m_characters[1]);
+ if (composedCharacter > 0)
+ return composedCharacter;
}
return 0;
}
-ALLOW_DEPRECATED_DECLARATIONS_END
-
}
Modified: releases/WebKitGTK/webkit-2.24/Source/WebCore/platform/graphics/cairo/FontCairoHarfbuzzNG.cpp (245454 => 245455)
--- releases/WebKitGTK/webkit-2.24/Source/WebCore/platform/graphics/cairo/FontCairoHarfbuzzNG.cpp 2019-05-17 11:25:20 UTC (rev 245454)
+++ releases/WebKitGTK/webkit-2.24/Source/WebCore/platform/graphics/cairo/FontCairoHarfbuzzNG.cpp 2019-05-17 11:25:29 UTC (rev 245455)
@@ -32,7 +32,6 @@
#include "CharacterProperties.h"
#include "FontCache.h"
#include "SurrogatePairAwareTextIterator.h"
-#include <unicode/normlzr.h>
namespace WebCore {
@@ -46,11 +45,10 @@
return false;
}
-static bool characterSequenceIsEmoji(const Vector<UChar, 4>& normalizedCharacters, int32_t normalizedLength)
+static bool characterSequenceIsEmoji(SurrogatePairAwareTextIterator& iterator, UChar32 firstCharacter, unsigned firstClusterLength)
{
- UChar32 character;
- unsigned clusterLength = 0;
- SurrogatePairAwareTextIterator iterator(normalizedCharacters.data(), 0, normalizedLength, normalizedLength);
+ UChar32 character = firstCharacter;
+ unsigned clusterLength = firstClusterLength;
if (!iterator.consume(character, clusterLength))
return false;
@@ -100,36 +98,27 @@
return false;
}
-const Font* FontCascade::fontForCombiningCharacterSequence(const UChar* characters, size_t length) const
+const Font* FontCascade::fontForCombiningCharacterSequence(const UChar* originalCharacters, size_t originalLength) const
{
- UErrorCode error = U_ZERO_ERROR;
- Vector<UChar, 4> normalizedCharacters(length);
- const auto* normalizer = unorm2_getNFCInstance(&error);
- if (U_FAILURE(error))
- return nullptr;
- int32_t normalizedLength = unorm2_normalize(normalizer, characters, length, normalizedCharacters.data(), length, &error);
- if (U_FAILURE(error)) {
- if (error != U_BUFFER_OVERFLOW_ERROR)
- return nullptr;
+ auto normalizedString = normalizedNFC(StringView { originalCharacters, static_cast<unsigned>(originalLength) });
- error = U_ZERO_ERROR;
- normalizedCharacters.resize(normalizedLength);
- normalizedLength = unorm2_normalize(normalizer, characters, length, normalizedCharacters.data(), normalizedLength, &error);
- if (U_FAILURE(error))
- return nullptr;
- }
+ // Code below relies on normalizedNFC never narrowing a 16-bit input string into an 8-bit output string.
+ // At the time of this writing, the function never does this, but in theory a future version could, and
+ // we would then need to add code paths here for the simpler 8-bit case.
+ auto characters = normalizedString.view.characters16();
+ auto length = normalizedString.view.length();
UChar32 character;
unsigned clusterLength = 0;
- SurrogatePairAwareTextIterator iterator(normalizedCharacters.data(), 0, normalizedLength, normalizedLength);
+ SurrogatePairAwareTextIterator iterator(characters, 0, length, length);
if (!iterator.consume(character, clusterLength))
return nullptr;
- bool isEmoji = characterSequenceIsEmoji(normalizedCharacters, normalizedLength);
+ bool isEmoji = characterSequenceIsEmoji(iterator, character, clusterLength);
const Font* baseFont = glyphDataForCharacter(character, false, NormalVariant).font;
if (baseFont
- && (static_cast<int32_t>(clusterLength) == normalizedLength || baseFont->canRenderCombiningCharacterSequence(characters, length))
+ && (clusterLength == length || baseFont->canRenderCombiningCharacterSequence(characters, length))
&& (!isEmoji || baseFont->platformData().isColorBitmapFont()))
return baseFont;
Modified: releases/WebKitGTK/webkit-2.24/Source/WebCore/platform/graphics/freetype/SimpleFontDataFreeType.cpp (245454 => 245455)
--- releases/WebKitGTK/webkit-2.24/Source/WebCore/platform/graphics/freetype/SimpleFontDataFreeType.cpp 2019-05-17 11:25:20 UTC (rev 245454)
+++ releases/WebKitGTK/webkit-2.24/Source/WebCore/platform/graphics/freetype/SimpleFontDataFreeType.cpp 2019-05-17 11:25:29 UTC (rev 245455)
@@ -50,7 +50,6 @@
#include <ft2build.h>
#include FT_TRUETYPE_TABLES_H
#include FT_TRUETYPE_TAGS_H
-#include <unicode/normlzr.h>
#include <wtf/MathExtras.h>
namespace WebCore {
Modified: releases/WebKitGTK/webkit-2.24/Source/WebCore/platform/text/TextEncoding.cpp (245454 => 245455)
--- releases/WebKitGTK/webkit-2.24/Source/WebCore/platform/text/TextEncoding.cpp 2019-05-17 11:25:20 UTC (rev 245454)
+++ releases/WebKitGTK/webkit-2.24/Source/WebCore/platform/text/TextEncoding.cpp 2019-05-17 11:25:29 UTC (rev 245455)
@@ -1,5 +1,5 @@
/*
- * Copyright (C) 2004-2017 Apple Inc. All rights reserved.
+ * Copyright (C) 2004-2019 Apple Inc. All rights reserved.
* Copyright (C) 2006 Alexey Proskuryakov <[email protected]>
* Copyright (C) 2007-2009 Torch Mobile, Inc.
*
@@ -31,10 +31,8 @@
#include "DecodeEscapeSequences.h"
#include "TextCodec.h"
#include "TextEncodingRegistry.h"
-#include <unicode/unorm.h>
#include <wtf/NeverDestroyed.h>
#include <wtf/StdLibExtras.h>
-#include <wtf/text/CString.h>
#include <wtf/text/StringView.h>
namespace WebCore {
@@ -71,48 +69,18 @@
return newTextCodec(*this)->decode(data, length, true, stopOnError, sawError);
}
-ALLOW_DEPRECATED_DECLARATIONS_BEGIN
-// NOTE: ICU's unorm_quickCheck and unorm_normalize functions are deprecated.
-
-Vector<uint8_t> TextEncoding::encode(StringView text, UnencodableHandling handling) const
+Vector<uint8_t> TextEncoding::encode(StringView string, UnencodableHandling handling) const
{
- if (!m_name || text.isEmpty())
+ if (!m_name || string.isEmpty())
return { };
- // FIXME: Consider adding a fast case for ASCII.
-
// FIXME: What's the right place to do normalization?
// It's a little strange to do it inside the encode function.
// Perhaps normalization should be an explicit step done before calling encode.
-
- auto upconvertedCharacters = text.upconvertedCharacters();
-
- const UChar* source = upconvertedCharacters;
- unsigned sourceLength = text.length();
-
- Vector<UChar> normalizedCharacters;
-
- UErrorCode err = U_ZERO_ERROR;
- if (unorm_quickCheck(source, sourceLength, UNORM_NFC, &err) != UNORM_YES) {
- // First try using the length of the original string, since normalization to NFC rarely increases length.
- normalizedCharacters.grow(sourceLength);
- int32_t normalizedLength = unorm_normalize(source, sourceLength, UNORM_NFC, 0, normalizedCharacters.data(), sourceLength, &err);
- if (err == U_BUFFER_OVERFLOW_ERROR) {
- err = U_ZERO_ERROR;
- normalizedCharacters.resize(normalizedLength);
- normalizedLength = unorm_normalize(source, sourceLength, UNORM_NFC, 0, normalizedCharacters.data(), normalizedLength, &err);
- }
- ASSERT(U_SUCCESS(err));
-
- source = normalizedCharacters.data();
- sourceLength = normalizedLength;
- }
-
- return newTextCodec(*this)->encode(StringView { source, sourceLength }, handling);
+ auto normalizedString = normalizedNFC(string);
+ return newTextCodec(*this)->encode(normalizedString.view, handling);
}
-ALLOW_DEPRECATED_DECLARATIONS_END
-
const char* TextEncoding::domName() const
{
if (noExtendedTextEncodingNameUsed())