Title: [203078] trunk
Revision
203078
Author
[email protected]
Date
2016-07-11 12:50:40 -0700 (Mon, 11 Jul 2016)

Log Message

Implement grapheme cluster iterator on StringView
https://bugs.webkit.org/show_bug.cgi?id=159598

Reviewed by Anders Carlsson.

This is in preparation for honoring the second argument to FontFaceSet.load().

Source/WTF:

* wtf/text/StringView.cpp:
(WTF::StringView::GraphemeClusters::Iterator::Impl::Impl):
(WTF::StringView::GraphemeClusters::Iterator::Impl::operator++):
(WTF::StringView::GraphemeClusters::Iterator::Impl::operator*):
(WTF::StringView::GraphemeClusters::Iterator::Impl::operator==):
(WTF::StringView::GraphemeClusters::Iterator::Impl::computeIndexEnd):
(WTF::StringView::GraphemeClusters::Iterator::Iterator):
(WTF::StringView::GraphemeClusters::Iterator::~Iterator):
(WTF::StringView::GraphemeClusters::Iterator::operator*):
(WTF::StringView::GraphemeClusters::Iterator::operator==):
(WTF::StringView::GraphemeClusters::Iterator::operator!=):
* wtf/text/StringView.h:
(WTF::StringView::GraphemeClusters::GraphemeClusters):
* text/TextBreakIterator.cpp:
(WebCore::NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator):
* text/TextBreakIterator.h:

Tools:

* TestWebKitAPI/Tests/WTF/StringView.cpp:
(TestWebKitAPI::compareLoopIterations):
(TestWebKitAPI::TEST):

Modified Paths

Diff

Modified: trunk/Source/WTF/ChangeLog (203077 => 203078)


--- trunk/Source/WTF/ChangeLog	2016-07-11 19:43:19 UTC (rev 203077)
+++ trunk/Source/WTF/ChangeLog	2016-07-11 19:50:40 UTC (rev 203078)
@@ -1,3 +1,29 @@
+2016-07-11  Myles C. Maxfield  <[email protected]>
+
+        Implement grapheme cluster iterator on StringView
+        https://bugs.webkit.org/show_bug.cgi?id=159598
+
+        Reviewed by Anders Carlsson.
+
+        This is in preparation for honoring the second argument to FontFaceSet.load().
+
+        * wtf/text/StringView.cpp:
+        (WTF::StringView::GraphemeClusters::Iterator::Impl::Impl):
+        (WTF::StringView::GraphemeClusters::Iterator::Impl::operator++):
+        (WTF::StringView::GraphemeClusters::Iterator::Impl::operator*):
+        (WTF::StringView::GraphemeClusters::Iterator::Impl::operator==):
+        (WTF::StringView::GraphemeClusters::Iterator::Impl::computeIndexEnd):
+        (WTF::StringView::GraphemeClusters::Iterator::Iterator):
+        (WTF::StringView::GraphemeClusters::Iterator::~Iterator):
+        (WTF::StringView::GraphemeClusters::Iterator::operator*):
+        (WTF::StringView::GraphemeClusters::Iterator::operator==):
+        (WTF::StringView::GraphemeClusters::Iterator::operator!=):
+        * wtf/text/StringView.h:
+        (WTF::StringView::GraphemeClusters::GraphemeClusters):
+        * text/TextBreakIterator.cpp:
+        (WebCore::NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator):
+        * text/TextBreakIterator.h:
+
 2016-07-10  Myles C. Maxfield  <[email protected]>
 
         Move breaking iterator code to WTF

Modified: trunk/Source/WTF/wtf/text/StringView.cpp (203077 => 203078)


--- trunk/Source/WTF/wtf/text/StringView.cpp	2016-07-11 19:43:19 UTC (rev 203077)
+++ trunk/Source/WTF/wtf/text/StringView.cpp	2016-07-11 19:50:40 UTC (rev 203078)
@@ -31,6 +31,8 @@
 #include <wtf/HashMap.h>
 #include <wtf/Lock.h>
 #include <wtf/NeverDestroyed.h>
+#include <wtf/Optional.h>
+#include <wtf/text/TextBreakIterator.h>
 #include <wtf/unicode/UTF8.h>
 
 namespace WTF {
@@ -91,6 +93,89 @@
     return findCommon(*this, matchString, start);
 }
 
+class StringView::GraphemeClusters::Iterator::Impl {
+public:
+    Impl(const StringView& stringView, Optional<NonSharedCharacterBreakIterator>&& iterator, unsigned index)
+        : m_stringView(stringView)
+        , m_iterator(WTFMove(iterator))
+        , m_index(index)
+        , m_indexEnd(computeIndexEnd())
+    {
+    }
+
+    void operator++()
+    {
+        ASSERT(m_indexEnd > m_index);
+        m_index = m_indexEnd;
+        m_indexEnd = computeIndexEnd();
+    }
+
+    StringView operator*() const
+    {
+        if (m_stringView.is8Bit())
+            return StringView(m_stringView.characters8() + m_index, m_indexEnd - m_index);
+        return StringView(m_stringView.characters16() + m_index, m_indexEnd - m_index);
+    }
+
+    bool operator==(const Impl& other) const
+    {
+        ASSERT(&m_stringView == &other.m_stringView);
+        auto result = m_index == other.m_index;
+        ASSERT(!result || m_indexEnd == other.m_indexEnd);
+        return result;
+    }
+
+    unsigned computeIndexEnd()
+    {
+        if (!m_iterator)
+            return 0;
+        if (m_index == m_stringView.length())
+            return m_index;
+        return textBreakFollowing(m_iterator.value(), m_index);
+    }
+
+private:
+    const StringView& m_stringView;
+    Optional<NonSharedCharacterBreakIterator> m_iterator;
+    unsigned m_index;
+    unsigned m_indexEnd;
+};
+
+StringView::GraphemeClusters::Iterator::Iterator(const StringView& stringView, unsigned index)
+    : m_impl(std::make_unique<Impl>(stringView, stringView.isNull() ? Nullopt : Optional<NonSharedCharacterBreakIterator>(NonSharedCharacterBreakIterator(stringView)), index))
+{
+}
+
+StringView::GraphemeClusters::Iterator::~Iterator()
+{
+}
+
+StringView::GraphemeClusters::Iterator::Iterator(Iterator&& other)
+    : m_impl(WTFMove(other.m_impl))
+{
+}
+
+auto StringView::GraphemeClusters::Iterator::operator++() -> Iterator&
+{
+    ++(*m_impl);
+    return *this;
+}
+
+StringView StringView::GraphemeClusters::Iterator::operator*() const
+{
+    return **m_impl;
+}
+
+bool StringView::GraphemeClusters::Iterator::operator==(const Iterator& other) const
+{
+    return *m_impl == *(other.m_impl);
+}
+
+bool StringView::GraphemeClusters::Iterator::operator!=(const Iterator& other) const
+{
+    return !(*this == other);
+}
+
 #if CHECK_STRINGVIEW_LIFETIME
 
 // Manage reference count manually so UnderlyingString does not need to be defined in the header.

Modified: trunk/Source/WTF/wtf/text/StringView.h (203077 => 203078)


--- trunk/Source/WTF/wtf/text/StringView.h	2016-07-11 19:43:19 UTC (rev 203077)
+++ trunk/Source/WTF/wtf/text/StringView.h	2016-07-11 19:50:40 UTC (rev 203078)
@@ -82,6 +82,9 @@
     class CodePoints;
     CodePoints codePoints() const;
 
+    class GraphemeClusters;
+    GraphemeClusters graphemeClusters() const;
+
     bool is8Bit() const;
     const LChar* characters8() const;
     const UChar* characters16() const;
@@ -580,6 +583,18 @@
     return equalIgnoringASCIICaseCommon(a, b);
 }
 
+class StringView::GraphemeClusters {
+public:
+    explicit GraphemeClusters(const StringView&);
+
+    class Iterator;
+    Iterator begin() const;
+    Iterator end() const;
+
+private:
+    StringView m_stringView;
+};
+
 class StringView::CodePoints {
 public:
     explicit CodePoints(const StringView&);
@@ -604,6 +619,29 @@
     StringView m_stringView;
 };
 
+class StringView::GraphemeClusters::Iterator {
+public:
+    WTF_EXPORT_PRIVATE Iterator() = delete;
+    WTF_EXPORT_PRIVATE Iterator(const StringView&, unsigned index);
+    WTF_EXPORT_PRIVATE ~Iterator();
+
+    Iterator(const Iterator&) = delete;
+    WTF_EXPORT_PRIVATE Iterator(Iterator&&);
+    Iterator& operator=(const Iterator&) = delete;
+    Iterator& operator=(Iterator&&) = delete;
+
+    WTF_EXPORT_PRIVATE StringView operator*() const;
+    WTF_EXPORT_PRIVATE Iterator& operator++();
+
+    WTF_EXPORT_PRIVATE bool operator==(const Iterator&) const;
+    WTF_EXPORT_PRIVATE bool operator!=(const Iterator&) const;
+
+private:
+    class Impl;
+
+    std::unique_ptr<Impl> m_impl;
+};
+
 class StringView::CodePoints::Iterator {
 public:
     Iterator(const StringView&, unsigned index);
@@ -637,6 +675,11 @@
     unsigned m_index;
 };
 
+inline auto StringView::graphemeClusters() const -> GraphemeClusters
+{
+    return GraphemeClusters(*this);
+}
+
 inline auto StringView::codePoints() const -> CodePoints
 {
     return CodePoints(*this);
@@ -647,6 +690,21 @@
     return CodeUnits(*this);
 }
 
+inline StringView::GraphemeClusters::GraphemeClusters(const StringView& stringView)
+    : m_stringView(stringView)
+{
+}
+
+inline auto StringView::GraphemeClusters::begin() const -> Iterator
+{
+    return Iterator(m_stringView, 0);
+}
+
+inline auto StringView::GraphemeClusters::end() const -> Iterator
+{
+    return Iterator(m_stringView, m_stringView.length());
+}
+
 inline StringView::CodePoints::CodePoints(const StringView& stringView)
     : m_stringView(stringView)
 {

Modified: trunk/Source/WTF/wtf/text/TextBreakIterator.cpp (203077 => 203078)


--- trunk/Source/WTF/wtf/text/TextBreakIterator.cpp	2016-07-11 19:43:19 UTC (rev 203077)
+++ trunk/Source/WTF/wtf/text/TextBreakIterator.cpp	2016-07-11 19:50:40 UTC (rev 203078)
@@ -819,7 +819,7 @@
 {
     m_iterator = nonSharedCharacterBreakIterator;
 
-    bool createdIterator = m_iterator && compareAndSwapNonSharedCharacterBreakIterator(m_iterator, 0);
+    bool createdIterator = m_iterator && compareAndSwapNonSharedCharacterBreakIterator(m_iterator, nullptr);
     if (!createdIterator)
         m_iterator = initializeIterator(UBRK_CHARACTER);
     if (!m_iterator)
@@ -834,7 +834,13 @@
         ubrk_close(reinterpret_cast<UBreakIterator*>(m_iterator));
 }
 
+NonSharedCharacterBreakIterator::NonSharedCharacterBreakIterator(NonSharedCharacterBreakIterator&& other)
+    : m_iterator(nullptr)
+{
+    std::swap(m_iterator, other.m_iterator);
+}
 
+
 // Iterator implemenation.
 
 int textBreakFirst(TextBreakIterator* iterator)

Modified: trunk/Source/WTF/wtf/text/TextBreakIterator.h (203077 => 203078)


--- trunk/Source/WTF/wtf/text/TextBreakIterator.h	2016-07-11 19:43:19 UTC (rev 203077)
+++ trunk/Source/WTF/wtf/text/TextBreakIterator.h	2016-07-11 19:50:40 UTC (rev 203078)
@@ -198,6 +198,8 @@
     WTF_EXPORT_PRIVATE NonSharedCharacterBreakIterator(StringView);
     WTF_EXPORT_PRIVATE ~NonSharedCharacterBreakIterator();
 
+    NonSharedCharacterBreakIterator(NonSharedCharacterBreakIterator&&);
+
     operator TextBreakIterator*() const { return m_iterator; }
 
 private:

Modified: trunk/Tools/ChangeLog (203077 => 203078)


--- trunk/Tools/ChangeLog	2016-07-11 19:43:19 UTC (rev 203077)
+++ trunk/Tools/ChangeLog	2016-07-11 19:50:40 UTC (rev 203078)
@@ -1,3 +1,16 @@
+2016-07-11  Myles C. Maxfield  <[email protected]>
+
+        Implement grapheme cluster iterator on StringView
+        https://bugs.webkit.org/show_bug.cgi?id=159598
+
+        Reviewed by Anders Carlsson.
+
+        This is in preparation for honoring the second argument to FontFaceSet.load().
+
+        * TestWebKitAPI/Tests/WTF/StringView.cpp:
+        (TestWebKitAPI::compareLoopIterations):
+        (TestWebKitAPI::TEST):
+
 2016-07-11  Nan Wang  <[email protected]>
 
         AX: WKWebView should have API to prevent pinch-to-zoom always being allowed

Modified: trunk/Tools/TestWebKitAPI/Tests/WTF/StringView.cpp (203077 => 203078)


--- trunk/Tools/TestWebKitAPI/Tests/WTF/StringView.cpp	2016-07-11 19:43:19 UTC (rev 203077)
+++ trunk/Tools/TestWebKitAPI/Tests/WTF/StringView.cpp	2016-07-11 19:50:40 UTC (rev 203078)
@@ -78,6 +78,14 @@
         SUCCEED();
 }
 
+bool compareLoopIterations(StringView::GraphemeClusters graphemeClusters, std::vector<StringView> expected)
+{
+    std::vector<StringView> actual;
+    for (auto graphemeCluster : graphemeClusters)
+        actual.push_back(graphemeCluster);
+    return actual == expected;
+}
+
 bool compareLoopIterations(StringView::CodePoints codePoints, std::vector<UChar32> expected)
 {
     std::vector<UChar32> actual;
@@ -103,43 +111,86 @@
 
 TEST(WTF, StringViewIterators)
 {
-    compareLoopIterations(StringView().codePoints(), { });
-    compareLoopIterations(StringView().codeUnits(), { });
+    EXPECT_TRUE(compareLoopIterations(StringView().codePoints(), { }));
+    EXPECT_TRUE(compareLoopIterations(StringView().codeUnits(), { }));
+    EXPECT_TRUE(compareLoopIterations(StringView().graphemeClusters(), { }));
 
-    compareLoopIterations(StringView::empty().codePoints(), { });
-    compareLoopIterations(StringView::empty().codeUnits(), { });
+    EXPECT_TRUE(compareLoopIterations(StringView::empty().codePoints(), { }));
+    EXPECT_TRUE(compareLoopIterations(StringView::empty().codeUnits(), { }));
+    EXPECT_TRUE(compareLoopIterations(StringView::empty().graphemeClusters(), { }));
 
-    compareLoopIterations(StringView(String("hello")).codePoints(), {'h', 'e', 'l', 'l', 'o'});
-    compareLoopIterations(StringView(String("hello")).codeUnits(), {'h', 'e', 'l', 'l', 'o'});
+    String helo("helo");
+    StringView heloView(helo);
 
+    EXPECT_TRUE(compareLoopIterations(heloView.codePoints(), {'h', 'e', 'l', 'o'}));
+    EXPECT_TRUE(compareLoopIterations(heloView.codeUnits(), {'h', 'e', 'l', 'o'}));
+    EXPECT_TRUE(compareLoopIterations(heloView.graphemeClusters(), {
+        StringView(heloView.characters8(), 1),
+        StringView(heloView.characters8() + 1, 1),
+        StringView(heloView.characters8() + 2, 1),
+        StringView(heloView.characters8() + 3, 1)}));
+
     StringBuilder b;
     build(b, {0xD800, 0xDD55}); // Surrogates for unicode code point U+10155
     EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0x10155}));
     EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0xD800, 0xDD55}));
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {StringView(b.toString())}));
 
     build(b, {0xD800}); // Leading surrogate only
     EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0xD800}));
     EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0xD800}));
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {StringView(b.toString())}));
 
     build(b, {0xD800, 0xD801}); // Two leading surrogates
     EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0xD800, 0xD801}));
     EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0xD800, 0xD801}));
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {StringView(b.characters16(), 1), StringView(b.characters16() + 1, 1)}));
 
     build(b, {0xDD55}); // Trailing surrogate only
     EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0xDD55}));
     EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0xDD55}));
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {StringView(b.toString())}));
 
     build(b, {0xD800, 'h'}); // Leading surrogate followed by non-surrogate
     EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0xD800, 'h'}));
     EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0xD800, 'h'}));
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {StringView(b.characters16(), 1), StringView(b.characters16() + 1, 1)}));
 
     build(b, {0x0306}); // "COMBINING BREVE"
     EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0x0306}));
     EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0x0306}));
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {StringView(b.toString())}));
 
     build(b, {0x0306, 0xD800, 0xDD55, 'h', 'e', 'l', 'o'}); // Mix of single code unit and multi code unit code points
     EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0x0306, 0x10155, 'h', 'e', 'l', 'o'}));
     EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0x0306, 0xD800, 0xDD55, 'h', 'e', 'l', 'o'}));
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {
+        StringView(b.characters16(), 1),
+        StringView(b.characters16() + 1, 2),
+        StringView(b.characters16() + 3, 1),
+        StringView(b.characters16() + 4, 1),
+        StringView(b.characters16() + 5, 1),
+        StringView(b.characters16() + 6, 1)}));
+
+    build(b, {'e', 0x0301}); // "COMBINING ACUTE"
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {'e', 0x0301}));
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {'e', 0x0301}));
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {StringView(b.toString())}));
+
+    build(b, {'e', 0x0301, 0x0306, 'a'}); // "COMBINING ACUTE" "COMBINING BREVE"
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {'e', 0x0301, 0x0306, 'a'}));
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {'e', 0x0301, 0x0306, 'a'}));
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {
+        StringView(b.characters16(), 3),
+        StringView(b.characters16() + 3, 1),
+        }));
+
+    build(b, {0x1112, 0x116f, 0x11b6, 0x1107, 0x1161, 0x11B8}); // Korean combining Jamo
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codePoints(), {0x1112, 0x116f, 0x11b6, 0x1107, 0x1161, 0x11B8}));
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).codeUnits(), {0x1112, 0x116f, 0x11b6, 0x1107, 0x1161, 0x11B8}));
+    EXPECT_TRUE(compareLoopIterations(StringView(b.toString()).graphemeClusters(), {
+        StringView(b.characters16(), 3),
+        StringView(b.characters16() + 3, 3)}));
 }
 
 TEST(WTF, StringViewEqualIgnoringASCIICaseBasic)
_______________________________________________
webkit-changes mailing list
[email protected]
https://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to