Log Message
Implement grapheme cluster iterator on StringView https://bugs.webkit.org/show_bug.cgi?id=159598
Reviewed by Anders Carlsson. This is in preparation for honoring the second argument to FontFaceSet.load(). Source/WTF: * wtf/text/StringView.cpp: (WTF::StringView::GraphemeClusters::Iterator::Impl::Impl): (WTF::StringView::GraphemeClusters::Iterator::Impl::operator++): (WTF::StringView::GraphemeClusters::Iterator::Impl::operator*): (WTF::StringView::GraphemeClusters::Iterator::Impl::operator==): (WTF::StringView::GraphemeClusters::Iterator::Impl::computeIndexEnd): (WTF::StringView::GraphemeClusters::Iterator::Iterator): (WTF::StringView::GraphemeClusters::Iterator::~Iterator): (WTF::StringView::GraphemeClusters::Iterator::operator*): (WTF::StringView::GraphemeClusters::Iterator::operator==): (WTF::StringView::GraphemeClusters::Iterator::operator!=): * wtf/text/StringView.h: (WTF::StringView::GraphemeClusters::GraphemeClusters): * text/TextBreakIterator.cpp: (WebCore::NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator): * text/TextBreakIterator.h: Tools: * TestWebKitAPI/Tests/WTF/StringView.cpp: (TestWebKitAPI::compareLoopIterations): (TestWebKitAPI::TEST):
Modified Paths
Diff
Modified: trunk/Source/WTF/ChangeLog (203077 => 203078)
--- trunk/Source/WTF/ChangeLog 2016-07-11 19:43:19 UTC (rev 203077)
+++ trunk/Source/WTF/ChangeLog 2016-07-11 19:50:40 UTC (rev 203078)
@@ -1,3 +1,29 @@
+2016-07-11 Myles C. Maxfield <[email protected]>
+
+ Implement grapheme cluster iterator on StringView
+ https://bugs.webkit.org/show_bug.cgi?id=159598
+
+ Reviewed by Anders Carlsson.
+
+ This is in preparation for honoring the second argument to FontFaceSet.load().
+
+ * wtf/text/StringView.cpp:
+ (WTF::StringView::GraphemeClusters::Iterator::Impl::Impl):
+ (WTF::StringView::GraphemeClusters::Iterator::Impl::operator++):
+ (WTF::StringView::GraphemeClusters::Iterator::Impl::operator*):
+ (WTF::StringView::GraphemeClusters::Iterator::Impl::operator==):
+ (WTF::StringView::GraphemeClusters::Iterator::Impl::computeIndexEnd):
+ (WTF::StringView::GraphemeClusters::Iterator::Iterator):
+ (WTF::StringView::GraphemeClusters::Iterator::~Iterator):
+ (WTF::StringView::GraphemeClusters::Iterator::operator*):
+ (WTF::StringView::GraphemeClusters::Iterator::operator==):
+ (WTF::StringView::GraphemeClusters::Iterator::operator!=):
+ * wtf/text/StringView.h:
+ (WTF::StringView::GraphemeClusters::GraphemeClusters):
+ * text/TextBreakIterator.cpp:
+ (WebCore::NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator):
+ * text/TextBreakIterator.h:
+
2016-07-10 Myles C. Maxfield <[email protected]>
Move breaking iterator code to WTF
Modified: trunk/Source/WTF/wtf/text/StringView.cpp (203077 => 203078)
--- trunk/Source/WTF/wtf/text/StringView.cpp 2016-07-11 19:43:19 UTC (rev 203077)
+++ trunk/Source/WTF/wtf/text/StringView.cpp 2016-07-11 19:50:40 UTC (rev 203078)
@@ -31,6 +31,8 @@
#include <wtf/HashMap.h>
#include <wtf/Lock.h>
#include <wtf/NeverDestroyed.h>
+#include <wtf/Optional.h>
+#include <wtf/text/TextBreakIterator.h>
#include <wtf/unicode/UTF8.h>
namespace WTF {
@@ -91,6 +93,89 @@
return findCommon(*this, matchString, start);
}
+class StringView::GraphemeClusters::Iterator::Impl {
+public:
+ Impl(const StringView& stringView, Optional<NonSharedCharacterBreakIterator>&& iterator, unsigned index)
+ : m_stringView(stringView)
+ , m_iterator(WTFMove(iterator))
+ , m_index(index)
+ , m_indexEnd(computeIndexEnd())
+ {
+ }
+
+ void operator++()
+ {
+ ASSERT(m_indexEnd > m_index);
+ m_index = m_indexEnd;
+ m_indexEnd = computeIndexEnd();
+ }
+
+ StringView operator*() const
+ {
+ if (m_stringView.is8Bit())
+ return StringView(m_stringView.characters8() + m_index, m_indexEnd - m_index);
+ return StringView(m_stringView.characters16() + m_index, m_indexEnd - m_index);
+ }
+
+ bool operator==(const Impl& other) const
+ {
+ ASSERT(&m_stringView == &other.m_stringView);
+ auto result = m_index == other.m_index;
+ ASSERT(!result || m_indexEnd == other.m_indexEnd);
+ return result;
+ }
+
+ unsigned computeIndexEnd()
+ {
+ if (!m_iterator)
+ return 0;
+ if (m_index == m_stringView.length())
+ return m_index;
+ return textBreakFollowing(m_iterator.value(), m_index);
+ }
+
+private:
+ const StringView& m_stringView;
+ Optional<NonSharedCharacterBreakIterator> m_iterator;
+ unsigned m_index;
+ unsigned m_indexEnd;
+};
+
+StringView::GraphemeClusters::Iterator::Iterator(const StringView& stringView, unsigned index)
+ : m_impl(std::make_unique<Impl>(stringView, stringView.isNull() ? Nullopt : Optional<NonSharedCharacterBreakIterator>(NonSharedCharacterBreakIterator(stringView)), index))
+{
+}
+
+StringView::GraphemeClusters::Iterator::~Iterator()
+{
+}
+
+StringView::GraphemeClusters::Iterator::Iterator(Iterator&& other)
+ : m_impl(WTFMove(other.m_impl))
+{
+}
+
+auto StringView::GraphemeClusters::Iterator::operator++() -> Iterator&
+{
+ ++(*m_impl);
+ return *this;
+}
+
+StringView StringView::GraphemeClusters::Iterator::operator*() const
+{
+ return **m_impl;
+}
+
+bool StringView::GraphemeClusters::Iterator::operator==(const Iterator& other) const
+{
+ return *m_impl == *(other.m_impl);
+}
+
+bool StringView::GraphemeClusters::Iterator::operator!=(const Iterator& other) const
+{
+ return !(*this == other);
+}
+
#if CHECK_STRINGVIEW_LIFETIME
// Manage reference count manually so UnderlyingString does not need to be defined in the header.
Modified: trunk/Source/WTF/wtf/text/StringView.h (203077 => 203078)
--- trunk/Source/WTF/wtf/text/StringView.h 2016-07-11 19:43:19 UTC (rev 203077)
+++ trunk/Source/WTF/wtf/text/StringView.h 2016-07-11 19:50:40 UTC (rev 203078)
@@ -82,6 +82,9 @@
class CodePoints;
CodePoints codePoints() const;
+ class GraphemeClusters;
+ GraphemeClusters graphemeClusters() const;
+
bool is8Bit() const;
const LChar* characters8() const;
const UChar* characters16() const;
@@ -580,6 +583,18 @@
return equalIgnoringASCIICaseCommon(a, b);
}
+class StringView::GraphemeClusters {
+public:
+ explicit GraphemeClusters(const StringView&);
+
+ class Iterator;
+ Iterator begin() const;
+ Iterator end() const;
+
+private:
+ StringView m_stringView;
+};
+
class StringView::CodePoints {
public:
explicit CodePoints(const StringView&);
@@ -604,6 +619,29 @@
StringView m_stringView;
};
+class StringView::GraphemeClusters::Iterator {
+public:
+ WTF_EXPORT_PRIVATE Iterator() = delete;
+ WTF_EXPORT_PRIVATE Iterator(const StringView&, unsigned index);
+ WTF_EXPORT_PRIVATE ~Iterator();
+
+ Iterator(const Iterator&) = delete;
+ WTF_EXPORT_PRIVATE Iterator(Iterator&&);
+ Iterator& operator=(const Iterator&) = delete;
+ Iterator& operator=(Iterator&&) = delete;
+
+ WTF_EXPORT_PRIVATE StringView operator*() const;
+ WTF_EXPORT_PRIVATE Iterator& operator++();
+
+ WTF_EXPORT_PRIVATE bool operator==(const Iterator&) const;
+ WTF_EXPORT_PRIVATE bool operator!=(const Iterator&) const;
+
+private:
+ class Impl;
+
+ std::unique_ptr<Impl> m_impl;
+};
+
class StringView::CodePoints::Iterator {
public:
Iterator(const StringView&, unsigned index);
@@ -637,6 +675,11 @@
unsigned m_index;
};
+inline auto StringView::graphemeClusters() const -> GraphemeClusters
+{
+ return GraphemeClusters(*this);
+}
+
inline auto StringView::codePoints() const -> CodePoints
{
return CodePoints(*this);
@@ -647,6 +690,21 @@
return CodeUnits(*this);
}
+inline StringView::GraphemeClusters::GraphemeClusters(const StringView& stringView)
+ : m_stringView(stringView)
+{
+}
+
+inline auto StringView::GraphemeClusters::begin() const -> Iterator
+{
+ return Iterator(m_stringView, 0);
+}
+
+inline auto StringView::GraphemeClusters::end() const -> Iterator
+{
+ return Iterator(m_stringView, m_stringView.length());
+}
+
inline StringView::CodePoints::CodePoints(const StringView& stringView)
: m_stringView(stringView)
{
Modified: trunk/Source/WTF/wtf/text/TextBreakIterator.cpp (203077 => 203078)
--- trunk/Source/WTF/wtf/text/TextBreakIterator.cpp 2016-07-11 19:43:19 UTC (rev 203077)
+++ trunk/Source/WTF/wtf/text/TextBreakIterator.cpp 2016-07-11 19:50:40 UTC (rev 203078)
@@ -819,7 +819,7 @@
{
m_iterator = nonSharedCharacterBreakIterator;
- bool createdIterator = m_iterator && compareAndSwapNonSharedCharacterBreakIterator(m_iterator, 0);
+ bool createdIterator = m_iterator && compareAndSwapNonSharedCharacterBreakIterator(m_iterator, nullptr);
if (!createdIterator)
m_iterator = initializeIterator(UBRK_CHARACTER);
if (!m_iterator)
@@ -834,7 +834,13 @@
ubrk_close(reinterpret_cast<UBreakIterator*>(m_iterator));
}
+NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(NonSharedCharacterBreakIterator&& other)
+ : m_iterator(nullptr)
+{
+ std::swap(m_iterator, other.m_iterator);
+}
+
// Iterator implemenation.
int textBreakFirst(TextBreakIterator* iterator)
Modified: trunk/Source/WTF/wtf/text/TextBreakIterator.h (203077 => 203078)
--- trunk/Source/WTF/wtf/text/TextBreakIterator.h 2016-07-11 19:43:19 UTC (rev 203077)
+++ trunk/Source/WTF/wtf/text/TextBreakIterator.h 2016-07-11 19:50:40 UTC (rev 203078)
@@ -198,6 +198,8 @@
WTF_EXPORT_PRIVATE NonSharedCharacterBreakIterator(StringView);
WTF_EXPORT_PRIVATE ~NonSharedCharacterBreakIterator();
+ NonSharedCharacterBreakIterator(NonSharedCharacterBreakIterator&&);
+
operator TextBreakIterator*() const { return m_iterator; }
private:
Modified: trunk/Tools/ChangeLog (203077 => 203078)
--- trunk/Tools/ChangeLog 2016-07-11 19:43:19 UTC (rev 203077)
+++ trunk/Tools/ChangeLog 2016-07-11 19:50:40 UTC (rev 203078)
@@ -1,3 +1,16 @@
+2016-07-11 Myles C. Maxfield <[email protected]>
+
+ Implement grapheme cluster iterator on StringView
+ https://bugs.webkit.org/show_bug.cgi?id=159598
+
+ Reviewed by Anders Carlsson.
+
+ This is in preparation for honoring the second argument to FontFaceSet.load().
+
+ * TestWebKitAPI/Tests/WTF/StringView.cpp:
+ (TestWebKitAPI::compareLoopIterations):
+ (TestWebKitAPI::TEST):
+
2016-07-11 Nan Wang <[email protected]>
AX: WKWebView should have API to prevent pinch-to-zoom always being allowed
Modified: trunk/Tools/TestWebKitAPI/Tests/WTF/StringView.cpp (203077 => 203078)
--- trunk/Tools/TestWebKitAPI/Tests/WTF/StringView.cpp 2016-07-11 19:43:19 UTC (rev 203077)
+++ trunk/Tools/TestWebKitAPI/Tests/WTF/StringView.cpp 2016-07-11 19:50:40 UTC (rev 203078)
@@ -78,6 +78,14 @@
SUCCEED();
}
+bool compareLoopIterations(StringView::GraphemeClusters graphemeClusters, std::vector<StringView> expected)
+{
+ std::vector<StringView> actual;
+ for (auto graphemeCluster : graphemeClusters)
+ actual.push_back(graphemeCluster);
+ return actual == expected;
+}
+
bool compareLoopIterations(StringView::CodePoints codePoints, std::vector<UChar32> expected)
{
std::vector<UChar32> actual;
@@ -103,43 +111,86 @@
TEST(WTF, StringViewIterators)
{
- compareLoopIterations(StringView().codePoints(), { });
- compareLoopIterations(StringView().codeUnits(), { });
+ EXPECT_TRUE(compareLoopIterations(StringView().codePoints(), { }));
+ EXPECT_TRUE(compareLoopIterations(StringView().codeUnits(), { }));
+ EXPECT_TRUE(compareLoopIterations(StringView().graphemeClusters(), { }));
- compareLoopIterations(StringView::empty().codePoints(), { });
- compareLoopIterations(StringView::empty().codeUnits(), { });
+ EXPECT_TRUE(compareLoopIterations(StringView::empty().codePoints(), { }));
+ EXPECT_TRUE(compareLoopIterations(StringView::empty().codeUnits(), { }));
+ EXPECT_TRUE(compareLoopIterations(StringView::empty().graphemeClusters(), { }));
- compareLoopIterations(StringView(String("hello")).codePoints(), {'h', 'e', 'l', 'l', 'o'});
- compareLoopIterations(StringView(String("hello")).codeUnits(), {'h', 'e', 'l', 'l', 'o'});
+ String helo("helo");
+ StringView heloView(helo);
+ EXPECT_TRUE(compareLoopIterations(heloView.codePoints(), {'h', 'e', 'l', 'o'}));
+ EXPECT_TRUE(compareLoopIterations(heloView.codeUnits(), {'h', 'e', 'l', 'o'}));
+ EXPECT_TRUE(compareLoopIterations(heloView.graphemeClusters(), {
+ StringView(heloView.characters8(), 1),
+ StringView(heloView.characters8() + 1, 1),
+ StringView(heloView.characters8() + 2, 1),
+ StringView(heloView.characters8() + 3, 1)}));
+
StringBuilder b;
build(b, {0xD800, 0xDD55}); // Surrogates for unicode code point U+10155
EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0x10155}));
EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0xD800, 0xDD55}));
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {StringView(b.toString())}));
build(b, {0xD800}); // Leading surrogate only
EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0xD800}));
EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0xD800}));
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {StringView(b.toString())}));
build(b, {0xD800, 0xD801}); // Two leading surrogates
EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0xD800, 0xD801}));
EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0xD800, 0xD801}));
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {StringView(b.characters16(), 1), StringView(b.characters16() + 1, 1)}));
build(b, {0xDD55}); // Trailing surrogate only
EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0xDD55}));
EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0xDD55}));
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {StringView(b.toString())}));
build(b, {0xD800, 'h'}); // Leading surrogate followed by non-surrogate
EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0xD800, 'h'}));
EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0xD800, 'h'}));
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {StringView(b.characters16(), 1), StringView(b.characters16() + 1, 1)}));
build(b, {0x0306}); // "COMBINING BREVE"
EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0x0306}));
EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0x0306}));
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {StringView(b.toString())}));
build(b, {0x0306, 0xD800, 0xDD55, 'h', 'e', 'l', 'o'}); // Mix of single code unit and multi code unit code points
EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0x0306, 0x10155, 'h', 'e', 'l', 'o'}));
EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0x0306, 0xD800, 0xDD55, 'h', 'e', 'l', 'o'}));
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {
+ StringView(b.characters16(), 1),
+ StringView(b.characters16() + 1, 2),
+ StringView(b.characters16() + 3, 1),
+ StringView(b.characters16() + 4, 1),
+ StringView(b.characters16() + 5, 1),
+ StringView(b.characters16() + 6, 1)}));
+
+ build(b, {'e', 0x0301}); // "COMBINING ACUTE"
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {'e', 0x0301}));
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {'e', 0x0301}));
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {StringView(b.toString())}));
+
+ build(b, {'e', 0x0301, 0x0306, 'a'}); // "COMBINING ACUTE" "COMBINING BREVE"
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {'e', 0x0301, 0x0306, 'a'}));
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {'e', 0x0301, 0x0306, 'a'}));
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {
+ StringView(b.characters16(), 3),
+ StringView(b.characters16() + 3, 1),
+ }));
+
+ build(b, {0x1112, 0x116f, 0x11b6, 0x1107, 0x1161, 0x11B8}); // Korean combining Jamo
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0x1112, 0x116f, 0x11b6, 0x1107, 0x1161, 0x11B8}));
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0x1112, 0x116f, 0x11b6, 0x1107, 0x1161, 0x11B8}));
+ EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {
+ StringView(b.characters16(), 3),
+ StringView(b.characters16() + 3, 3)}));
}
TEST(WTF, StringViewEqualIgnoringASCIICaseBasic)
_______________________________________________ webkit-changes mailing list [email protected] https://lists.webkit.org/mailman/listinfo/webkit-changes
