Title: [89690] trunk/Source/WebCore
Revision
89690
Author
[email protected]
Date
2011-06-24 11:48:25 -0700 (Fri, 24 Jun 2011)

Log Message

2011-06-24  Nikolas Zimmermann  <[email protected]>

        Reviewed by Dirk Schulze.

        Refactor text iterator code respecting surrogate pairs from WidthIterator
        https://bugs.webkit.org/show_bug.cgi?id=63319

        WidthIterator contains a surrogate pair aware text iterator that is needed in other parts of WebCore.
        Refactor the code into its own SurrogatePairAwareTextIterator class. It can be used like this:

        UChar32 character = 0;
        unsigned clusterLength = 0;

        SurrogatePairAwareTextIterator textIterator(myString.characters(), 0, myString.length(), myString.length());
        while (textIterator.consume(character, clusterLength)) {
            // Either clusterLength is 1 and UChar32 fits into a single UChar.
            // Or     clusterLength is 2 and the UChar32 has been computed by combining the high & low surrogate pairs
            ...
        
            textIterator.advance(clusterLength);
        }

        SVGGlyphMap is the next candidate that wants to make use of SurrogatePairAwareTextIterator.
        Doesn't affect any tests, just refactoring.

        * CMakeLists.txt: Add SurrogatePairAwareTextIterator.* to build.
        * GNUmakefile.list.am: Ditto.
        * WebCore.gypi: Ditto.
        * WebCore.pro: Ditto.
        * WebCore.vcproj/WebCore.vcproj: Ditto.
        * WebCore.xcodeproj/project.pbxproj: Ditto.
        * platform/graphics/SurrogatePairAwareTextIterator.cpp: Copied from platform/graphics/WidthIterator.cpp.
        * platform/graphics/SurrogatePairAwareTextIterator.h: Added.
        (WebCore::SurrogatePairAwareTextIterator::currentCharacter):
        * platform/graphics/WidthIterator.cpp:
        (WebCore::WidthIterator::advance):
        * platform/graphics/WidthIterator.h:

Modified Paths

Added Paths

Diff

Modified: trunk/Source/WebCore/CMakeLists.txt (89689 => 89690)


--- trunk/Source/WebCore/CMakeLists.txt	2011-06-24 18:47:14 UTC (rev 89689)
+++ trunk/Source/WebCore/CMakeLists.txt	2011-06-24 18:48:25 UTC (rev 89690)
@@ -1086,6 +1086,7 @@
     platform/graphics/ShadowBlur.cpp
     platform/graphics/SimpleFontData.cpp
     platform/graphics/StringTruncator.cpp
+    platform/graphics/SurrogatePairAwareTextIterator.cpp
     platform/graphics/WidthIterator.cpp
 
     platform/graphics/filters/DistantLightSource.cpp

Modified: trunk/Source/WebCore/ChangeLog (89689 => 89690)


--- trunk/Source/WebCore/ChangeLog	2011-06-24 18:47:14 UTC (rev 89689)
+++ trunk/Source/WebCore/ChangeLog	2011-06-24 18:48:25 UTC (rev 89690)
@@ -1,3 +1,41 @@
+2011-06-24  Nikolas Zimmermann  <[email protected]>
+
+        Reviewed by Dirk Schulze.
+
+        Refactor text iterator code respecting surrogate pairs from WidthIterator
+        https://bugs.webkit.org/show_bug.cgi?id=63319
+
+        WidthIterator contains a surrogate pair aware text iterator that is needed in other parts of WebCore.
+        Refactor the code into its own SurrogatePairAwareTextIterator class. It can be used like this:
+
+        UChar32 character = 0;
+        unsigned clusterLength = 0;
+
+        SurrogatePairAwareTextIterator textIterator(myString.characters(), 0, myString.length(), myString.length());
+        while (textIterator.consume(character, clusterLength)) {
+            // Either clusterLength is 1 and UChar32 fits into a single UChar.
+            // Or     clusterLength is 2 and the UChar32 has been computed by combining the high & low surrogate pairs
+            ...
+        
+            textIterator.advance(clusterLength);
+        }
+
+        SVGGlyphMap is the next candidate that wants to make use of SurrogatePairAwareTextIterator.
+        Doesn't affect any tests, just refactoring.
+
+        * CMakeLists.txt: Add SurrogatePairAwareTextIterator.* to build.
+        * GNUmakefile.list.am: Ditto.
+        * WebCore.gypi: Ditto.
+        * WebCore.pro: Ditto.
+        * WebCore.vcproj/WebCore.vcproj: Ditto.
+        * WebCore.xcodeproj/project.pbxproj: Ditto.
+        * platform/graphics/SurrogatePairAwareTextIterator.cpp: Copied from platform/graphics/WidthIterator.cpp.
+        * platform/graphics/SurrogatePairAwareTextIterator.h: Added.
+        (WebCore::SurrogatePairAwareTextIterator::currentCharacter):
+        * platform/graphics/WidthIterator.cpp:
+        (WebCore::WidthIterator::advance):
+        * platform/graphics/WidthIterator.h:
+
 2011-06-24  Adrienne Walker  <[email protected]>
 
         Reviewed by Simon Fraser.

Modified: trunk/Source/WebCore/GNUmakefile.list.am (89689 => 89690)


--- trunk/Source/WebCore/GNUmakefile.list.am	2011-06-24 18:47:14 UTC (rev 89689)
+++ trunk/Source/WebCore/GNUmakefile.list.am	2011-06-24 18:48:25 UTC (rev 89690)
@@ -2542,6 +2542,8 @@
 	Source/WebCore/platform/graphics/StringTruncator.cpp \
 	Source/WebCore/platform/graphics/StringTruncator.h \
 	Source/WebCore/platform/graphics/StrokeStyleApplier.h \
+	Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.cpp \
+	Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.h \
 	Source/WebCore/platform/graphics/SVGGlyph.cpp \
 	Source/WebCore/platform/graphics/SVGGlyph.h \
 	Source/WebCore/platform/graphics/TextRenderingMode.h \

Modified: trunk/Source/WebCore/WebCore.gypi (89689 => 89690)


--- trunk/Source/WebCore/WebCore.gypi	2011-06-24 18:47:14 UTC (rev 89689)
+++ trunk/Source/WebCore/WebCore.gypi	2011-06-24 18:48:25 UTC (rev 89690)
@@ -3987,6 +3987,8 @@
             'platform/graphics/SVGGlyph.cpp',
             'platform/graphics/StringTruncator.cpp',
             'platform/graphics/StrokeStyleApplier.h',
+            'platform/graphics/SurrogatePairAwareTextIterator.cpp',
+            'platform/graphics/SurrogatePairAwareTextIterator.h',
             'platform/graphics/Tile.h',
             'platform/graphics/TiledBackingStore.cpp',
             'platform/graphics/TiledBackingStore.h',

Modified: trunk/Source/WebCore/WebCore.pro (89689 => 89690)


--- trunk/Source/WebCore/WebCore.pro	2011-06-24 18:47:14 UTC (rev 89689)
+++ trunk/Source/WebCore/WebCore.pro	2011-06-24 18:48:25 UTC (rev 89690)
@@ -3202,10 +3202,12 @@
     SOURCES += \
         platform/graphics/FontFastPath.cpp \
         platform/graphics/GlyphPageTreeNode.cpp \
-        platform/graphics/WidthIterator.cpp
+        platform/graphics/WidthIterator.cpp \
+        platform/graphics/SurrogatePairAwareTextIterator.cpp
 
     HEADERS += \
-        platform/graphics/WidthIterator.h
+        platform/graphics/WidthIterator.h \
+        platform/graphics/SurrogatePairAwareTextIterator.h
 }
 
 contains(DEFINES, ENABLE_GEOLOCATION=1) {

Modified: trunk/Source/WebCore/WebCore.vcproj/WebCore.vcproj (89689 => 89690)


--- trunk/Source/WebCore/WebCore.vcproj/WebCore.vcproj	2011-06-24 18:47:14 UTC (rev 89689)
+++ trunk/Source/WebCore/WebCore.vcproj/WebCore.vcproj	2011-06-24 18:48:25 UTC (rev 89690)
@@ -31345,6 +31345,14 @@
 					>
 				</File>
 				<File
+					RelativePath="..\platform\graphics\SurrogatePairAwareTextIterator.cpp"
+					>
+				</File>
+				<File
+					RelativePath="..\platform\graphics\SurrogatePairAwareTextIterator.h"
+					>
+				</File>
+				<File
 					RelativePath="..\platform\text\TextBoundaries.cpp"
 					>
 				</File>

Modified: trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj (89689 => 89690)


--- trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj	2011-06-24 18:47:14 UTC (rev 89689)
+++ trunk/Source/WebCore/WebCore.xcodeproj/project.pbxproj	2011-06-24 18:48:25 UTC (rev 89690)
@@ -118,6 +118,8 @@
 		087281570F26B9B600AFC596 /* OptionGroupElement.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 087281530F26B9B600AFC596 /* OptionGroupElement.cpp */; };
 		087281580F26B9B600AFC596 /* OptionGroupElement.h in Headers */ = {isa = PBXBuildFile; fileRef = 087281540F26B9B600AFC596 /* OptionGroupElement.h */; };
 		0873B86B136064EA00A522C2 /* GlyphPage.h in Headers */ = {isa = PBXBuildFile; fileRef = 0873B86A136064EA00A522C2 /* GlyphPage.h */; settings = {ATTRIBUTES = (Private, ); }; };
+		087558C513B4A57D00F49307 /* SurrogatePairAwareTextIterator.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 087558C313B4A57D00F49307 /* SurrogatePairAwareTextIterator.cpp */; };
+		087558C613B4A57D00F49307 /* SurrogatePairAwareTextIterator.h in Headers */ = {isa = PBXBuildFile; fileRef = 087558C413B4A57D00F49307 /* SurrogatePairAwareTextIterator.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		087B84961272CEC800A14417 /* SVGAnimatedAngle.h in Headers */ = {isa = PBXBuildFile; fileRef = 087B84951272CEC700A14417 /* SVGAnimatedAngle.h */; settings = {ATTRIBUTES = (Private, ); }; };
 		087E0AF613606D0B00FA4BA8 /* SVGGlyph.cpp in Sources */ = {isa = PBXBuildFile; fileRef = 087E0AF413606D0B00FA4BA8 /* SVGGlyph.cpp */; };
 		087E0AF713606D0B00FA4BA8 /* SVGGlyph.h in Headers */ = {isa = PBXBuildFile; fileRef = 087E0AF513606D0B00FA4BA8 /* SVGGlyph.h */; settings = {ATTRIBUTES = (Private, ); }; };
@@ -6487,6 +6489,8 @@
 		087281530F26B9B600AFC596 /* OptionGroupElement.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = OptionGroupElement.cpp; sourceTree = "<group>"; };
 		087281540F26B9B600AFC596 /* OptionGroupElement.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = OptionGroupElement.h; sourceTree = "<group>"; };
 		0873B86A136064EA00A522C2 /* GlyphPage.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = GlyphPage.h; sourceTree = "<group>"; };
+		087558C313B4A57D00F49307 /* SurrogatePairAwareTextIterator.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SurrogatePairAwareTextIterator.cpp; sourceTree = "<group>"; };
+		087558C413B4A57D00F49307 /* SurrogatePairAwareTextIterator.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SurrogatePairAwareTextIterator.h; sourceTree = "<group>"; };
 		087B84951272CEC700A14417 /* SVGAnimatedAngle.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SVGAnimatedAngle.h; sourceTree = "<group>"; };
 		087E0AF413606D0B00FA4BA8 /* SVGGlyph.cpp */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.cpp.cpp; path = SVGGlyph.cpp; sourceTree = "<group>"; };
 		087E0AF513606D0B00FA4BA8 /* SVGGlyph.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; path = SVGGlyph.h; sourceTree = "<group>"; };
@@ -18049,6 +18053,8 @@
 				B2C3DA540D006CD600EF6F26 /* SimpleFontData.h */,
 				B23540F00D00782E002382FA /* StringTruncator.cpp */,
 				B23540F10D00782E002382FA /* StringTruncator.h */,
+				087558C313B4A57D00F49307 /* SurrogatePairAwareTextIterator.cpp */,
+				087558C413B4A57D00F49307 /* SurrogatePairAwareTextIterator.h */,
 				087E0AF413606D0B00FA4BA8 /* SVGGlyph.cpp */,
 				087E0AF513606D0B00FA4BA8 /* SVGGlyph.h */,
 				930FC6891072B9280045293E /* TextRenderingMode.h */,
@@ -23064,6 +23070,7 @@
 				97B1F02F13B025D200F5103F /* SharedBufferChunkReader.h in Headers */,
 				43B9336913B261B1004584BF /* SVGAnimatedPointList.h in Headers */,
 				43A625F813B3304000AC94B8 /* SVGAnimatedColor.h in Headers */,
+				087558C613B4A57D00F49307 /* SurrogatePairAwareTextIterator.h in Headers */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};
@@ -25827,6 +25834,7 @@
 				43B9336A13B261B1004584BF /* SVGAnimatedPointList.cpp in Sources */,
 				43A625F913B3304000AC94B8 /* SVGAnimatedColor.cpp in Sources */,
 				43A6266713B3D11000AC94B8 /* SVGAnimatedString.cpp in Sources */,
+				087558C513B4A57D00F49307 /* SurrogatePairAwareTextIterator.cpp in Sources */,
 			);
 			runOnlyForDeploymentPostprocessing = 0;
 		};

Copied: trunk/Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.cpp (from rev 89663, trunk/Source/WebCore/platform/graphics/WidthIterator.cpp) (0 => 89690)


--- trunk/Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.cpp	                        (rev 0)
+++ trunk/Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.cpp	2011-06-24 18:48:25 UTC (rev 89690)
@@ -0,0 +1,119 @@
+/*
+ * Copyright (C) 2003, 2006, 2008, 2009, 2010, 2011 Apple Inc. All rights reserved.
+ * Copyright (C) 2008 Holger Hans Peter Freyther
+ * Copyright (C) Research In Motion Limited 2011. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#include "config.h"
+#include "SurrogatePairAwareTextIterator.h"
+
+#if USE(ICU_UNICODE)
+#include <unicode/unorm.h>
+#endif
+
+using namespace WTF;
+using namespace Unicode;
+
+namespace WebCore {
+
+SurrogatePairAwareTextIterator::SurrogatePairAwareTextIterator(const UChar* characters, int currentCharacter, int lastCharacter, int endCharacter)
+    : m_characters(characters)
+    , m_currentCharacter(currentCharacter)
+    , m_lastCharacter(lastCharacter)
+    , m_endCharacter(endCharacter)
+{
+}
+
+bool SurrogatePairAwareTextIterator::consume(UChar32& character, unsigned& clusterLength)
+{
+    if (m_currentCharacter >= m_lastCharacter)
+        return false;
+
+    character = *m_characters;
+    clusterLength = 1;
+
+    if (character < 0x3041)
+        return true;
+
+    if (character <= 0x30FE) {
+        // Deal with Hiragana and Katakana voiced and semi-voiced syllables.
+        // Normalize into composed form, and then look for glyph with base + combined mark.
+        // Check above for character range to minimize performance impact.
+        if (UChar32 normalized = normalizeVoicingMarks()) {
+            character = normalized;
+            clusterLength = 2;
+        }
+        return true;
+    }
+
+    if (!U16_IS_SURROGATE(character))
+        return true;
+
+    // If we have a surrogate pair, make sure it starts with the high part.
+    if (!U16_IS_SURROGATE_LEAD(character))
+        return false;
+
+    // Do we have a surrogate pair? If so, determine the full Unicode (32 bit) code point before glyph lookup.
+    // Make sure we have another character and it's a low surrogate.
+    if (m_currentCharacter + 1 >= m_endCharacter)
+        return false;
+
+    UChar low = m_characters[1];
+    if (!U16_IS_TRAIL(low))
+        return false;
+
+    character = U16_GET_SUPPLEMENTARY(character, low);
+    clusterLength = 2;
+    return true;
+}
+
+void SurrogatePairAwareTextIterator::advance(unsigned advanceLength)
+{
+    m_characters += advanceLength;
+    m_currentCharacter += advanceLength;
+}
+
+UChar32 SurrogatePairAwareTextIterator::normalizeVoicingMarks()
+{
+    // According to http://www.unicode.org/Public/UNIDATA/UCD.html#Canonical_Combining_Class_Values
+    static const uint8_t hiraganaKatakanaVoicingMarksCombiningClass = 8;
+
+    if (m_currentCharacter + 1 >= m_endCharacter)
+        return 0;
+
+    if (combiningClass(m_characters[1]) == hiraganaKatakanaVoicingMarksCombiningClass) {
+#if USE(ICU_UNICODE)
+        // Normalize into composed form using 3.2 rules.
+        UChar normalizedCharacters[2] = { 0, 0 };
+        UErrorCode uStatus = U_ZERO_ERROR;  
+        int32_t resultLength = unorm_normalize(m_characters, 2, UNORM_NFC, UNORM_UNICODE_3_2, &normalizedCharacters[0], 2, &uStatus);
+        if (resultLength == 1 && !uStatus)
+            return normalizedCharacters[0];
+#elif USE(QT4_UNICODE)
+        QString tmp(reinterpret_cast<const QChar*>(m_characters), 2);
+        QString res = tmp.normalized(QString::NormalizationForm_C, QChar::Unicode_3_2);
+        if (res.length() == 1)
+            return res.at(0).unicode();
+#endif
+    }
+
+    return 0;
+}
+
+}

Added: trunk/Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.h (0 => 89690)


--- trunk/Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.h	                        (rev 0)
+++ trunk/Source/WebCore/platform/graphics/SurrogatePairAwareTextIterator.h	2011-06-24 18:48:25 UTC (rev 89690)
@@ -0,0 +1,51 @@
+/*
+ * Copyright (C) Research In Motion Limited 2011. All rights reserved.
+ *
+ * This library is free software; you can redistribute it and/or
+ * modify it under the terms of the GNU Library General Public
+ * License as published by the Free Software Foundation; either
+ * version 2 of the License, or (at your option) any later version.
+ *
+ * This library is distributed in the hope that it will be useful,
+ * but WITHOUT ANY WARRANTY; without even the implied warranty of
+ * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU
+ * Library General Public License for more details.
+ *
+ * You should have received a copy of the GNU Library General Public License
+ * along with this library; see the file COPYING.LIB.  If not, write to
+ * the Free Software Foundation, Inc., 51 Franklin Street, Fifth Floor,
+ * Boston, MA 02110-1301, USA.
+ *
+ */
+
+#ifndef SurrogatePairAwareTextIterator_h
+#define SurrogatePairAwareTextIterator_h
+
+#include <wtf/text/WTFString.h>
+
+namespace WebCore {
+
+class SurrogatePairAwareTextIterator {
+public:
+    // The passed in UChar pointer starts at 'currentCharacter'. The iterator operatoes on the range [currentCharacter, lastCharacter].
+    // 'endCharacter' denotes the maximum length of the UChar array, which might exceed 'lastCharacter'.
+    SurrogatePairAwareTextIterator(const UChar*, int currentCharacter, int lastCharacter, int endCharacter);
+
+    bool consume(UChar32& character, unsigned& clusterLength);
+    void advance(unsigned advanceLength);
+
+    int currentCharacter() const { return m_currentCharacter; }
+    const UChar* characters() const { return m_characters; }
+
+private:
+    UChar32 normalizeVoicingMarks();
+
+    const UChar* m_characters;
+    int m_currentCharacter;
+    int m_lastCharacter;
+    int m_endCharacter;
+};
+
+}
+
+#endif

Modified: trunk/Source/WebCore/platform/graphics/WidthIterator.cpp (89689 => 89690)


--- trunk/Source/WebCore/platform/graphics/WidthIterator.cpp	2011-06-24 18:47:14 UTC (rev 89689)
+++ trunk/Source/WebCore/platform/graphics/WidthIterator.cpp	2011-06-24 18:48:25 UTC (rev 89690)
@@ -25,26 +25,19 @@
 #include "Font.h"
 #include "GlyphBuffer.h"
 #include "SimpleFontData.h"
+#include "SurrogatePairAwareTextIterator.h"
 #include "TextRun.h"
 #include <wtf/MathExtras.h>
 
-#if USE(ICU_UNICODE)
-#include <unicode/unorm.h>
-#endif
-
 using namespace WTF;
 using namespace Unicode;
 using namespace std;
 
 namespace WebCore {
 
-// According to http://www.unicode.org/Public/UNIDATA/UCD.html#Canonical_Combining_Class_Values
-static const uint8_t hiraganaKatakanaVoicingMarksCombiningClass = 8;
-
 WidthIterator::WidthIterator(const Font* font, const TextRun& run, HashSet<const SimpleFontData*>* fallbackFonts, bool accountForGlyphBounds, bool forTextEmphasis)
     : m_font(font)
     , m_run(run)
-    , m_end(run.length())
     , m_currentCharacter(0)
     , m_runWidthSoFar(0)
     , m_isAfterExpansion(!run.allowsLeadingExpansion())
@@ -63,7 +56,7 @@
         m_expansionPerOpportunity = 0;
     else {
         bool isAfterExpansion = m_isAfterExpansion;
-        unsigned expansionOpportunityCount = Font::expansionOpportunityCount(m_run.characters(), m_end, m_run.ltr() ? LTR : RTL, isAfterExpansion);
+        unsigned expansionOpportunityCount = Font::expansionOpportunityCount(m_run.characters(), m_run.length(), m_run.ltr() ? LTR : RTL, isAfterExpansion);
         if (isAfterExpansion && !m_run.allowsTrailingExpansion())
             expansionOpportunityCount--;
 
@@ -76,15 +69,12 @@
 
 void WidthIterator::advance(int offset, GlyphBuffer* glyphBuffer)
 {
-    if (offset > m_end)
-        offset = m_end;
+    if (offset > m_run.length())
+        offset = m_run.length();
 
-    int currentCharacter = m_currentCharacter;
-    if (currentCharacter >= offset)
+    if (int(m_currentCharacter) >= offset)
         return;
 
-    const UChar* cp = m_run.data(currentCharacter);
-
     bool rtl = m_run.rtl();
     bool hasExtraSpacing = (m_font->letterSpacing() || m_font->wordSpacing() || m_expansion) && !m_run.spacingDisabled();
 
@@ -93,37 +83,11 @@
     const SimpleFontData* primaryFont = m_font->primaryFont();
     const SimpleFontData* lastFontData = primaryFont;
 
-    while (currentCharacter < offset) {
-        UChar32 c = *cp;
-        unsigned clusterLength = 1;
-        if (c >= 0x3041) {
-            if (c <= 0x30FE) {
-                // Deal with Hiragana and Katakana voiced and semi-voiced syllables.
-                // Normalize into composed form, and then look for glyph with base + combined mark.
-                // Check above for character range to minimize performance impact.
-                UChar32 normalized = normalizeVoicingMarks(currentCharacter);
-                if (normalized) {
-                    c = normalized;
-                    clusterLength = 2;
-                }
-            } else if (U16_IS_SURROGATE(c)) {
-                if (!U16_IS_SURROGATE_LEAD(c))
-                    break;
-
-                // Do we have a surrogate pair?  If so, determine the full Unicode (32 bit)
-                // code point before glyph lookup.
-                // Make sure we have another character and it's a low surrogate.
-                if (currentCharacter + 1 >= m_run.length())
-                    break;
-                UChar low = cp[1];
-                if (!U16_IS_TRAIL(low))
-                    break;
-                c = U16_GET_SUPPLEMENTARY(c, low);
-                clusterLength = 2;
-            }
-        }
-
-        const GlyphData& glyphData = m_font->glyphDataForCharacter(c, rtl);
+    UChar32 character = 0;
+    unsigned clusterLength = 0;
+    SurrogatePairAwareTextIterator textIterator(m_run.data(m_currentCharacter), m_currentCharacter, offset, m_run.length());
+    while (textIterator.consume(character, clusterLength)) {
+        const GlyphData& glyphData = m_font->glyphDataForCharacter(character, rtl);
         Glyph glyph = glyphData.glyph;
         const SimpleFontData* fontData = glyphData.fontData;
 
@@ -131,7 +95,7 @@
 
         // Now that we have a glyph and font data, get its width.
         float width;
-        if (c == '\t' && m_run.allowTabs()) {
+        if (character == '\t' && m_run.allowTabs()) {
             float tabWidth = m_font->tabWidth(*fontData);
             width = tabWidth - fmodf(m_run.xPos() + m_runWidthSoFar, tabWidth);
         } else {
@@ -148,10 +112,10 @@
             if (m_fallbackFonts && fontData != primaryFont) {
                 // FIXME: This does a little extra work that could be avoided if
                 // glyphDataForCharacter() returned whether it chose to use a small caps font.
-                if (!m_font->isSmallCaps() || c == toUpper(c))
+                if (!m_font->isSmallCaps() || character == toUpper(character))
                     m_fallbackFonts->add(fontData);
                 else {
-                    const GlyphData& uppercaseGlyphData = m_font->glyphDataForCharacter(toUpper(c), rtl);
+                    const GlyphData& uppercaseGlyphData = m_font->glyphDataForCharacter(toUpper(character), rtl);
                     if (uppercaseGlyphData.fontData != primaryFont)
                         m_fallbackFonts->add(uppercaseGlyphData.fontData);
                 }
@@ -164,8 +128,8 @@
                 width += m_font->letterSpacing();
 
             static bool expandAroundIdeographs = Font::canExpandAroundIdeographsInComplexText();
-            bool treatAsSpace = Font::treatAsSpace(c);
-            if (treatAsSpace || (expandAroundIdeographs && Font::isCJKIdeographOrSymbol(c))) {
+            bool treatAsSpace = Font::treatAsSpace(character);
+            if (treatAsSpace || (expandAroundIdeographs && Font::isCJKIdeographOrSymbol(character))) {
                 // Distribute the run's total expansion evenly over all expansion opportunities in the run.
                 if (m_expansion) {
                     if (!treatAsSpace && !m_isAfterExpansion) {
@@ -179,8 +143,8 @@
                                 glyphBuffer->expandLastAdvance(m_expansionPerOpportunity);
                         }
                     }
-                    if (m_run.allowsTrailingExpansion() || (m_run.ltr() && currentCharacter + clusterLength < static_cast<size_t>(m_run.length()))
-                        || (m_run.rtl() && currentCharacter)) {
+                    if (m_run.allowsTrailingExpansion() || (m_run.ltr() && textIterator.currentCharacter() + clusterLength < static_cast<size_t>(m_run.length()))
+                        || (m_run.rtl() && textIterator.currentCharacter())) {
                         m_expansion -= m_expansionPerOpportunity;
                         width += m_expansionPerOpportunity;
                         m_isAfterExpansion = true;
@@ -190,7 +154,7 @@
 
                 // Account for word spacing.
                 // We apply additional space between "words" by adding width to the space character.
-                if (treatAsSpace && currentCharacter && !Font::treatAsSpace(cp[-1]) && m_font->wordSpacing())
+                if (treatAsSpace && textIterator.currentCharacter() && !Font::treatAsSpace(textIterator.characters()[-1]) && m_font->wordSpacing())
                     width += m_font->wordSpacing();
             } else
                 m_isAfterExpansion = false;
@@ -198,17 +162,15 @@
 
         if (m_accountForGlyphBounds) {
             bounds = fontData->boundsForGlyph(glyph);
-            if (!currentCharacter)
+            if (!textIterator.currentCharacter())
                 m_firstGlyphOverflow = max<float>(0, -bounds.x());
         }
 
-        if (m_forTextEmphasis && !Font::canReceiveTextEmphasis(c))
+        if (m_forTextEmphasis && !Font::canReceiveTextEmphasis(character))
             glyph = 0;
 
         // Advance past the character we just dealt with.
-        cp += clusterLength;
-        currentCharacter += clusterLength;
-
+        textIterator.advance(clusterLength);
         m_runWidthSoFar += width;
 
         if (glyphBuffer)
@@ -221,7 +183,7 @@
         }
     }
 
-    m_currentCharacter = currentCharacter;
+    m_currentCharacter = textIterator.currentCharacter();
 }
 
 bool WidthIterator::advanceOneCharacter(float& width, GlyphBuffer* glyphBuffer)
@@ -235,27 +197,4 @@
     return glyphBuffer->size() > oldSize;
 }
 
-UChar32 WidthIterator::normalizeVoicingMarks(int currentCharacter)
-{
-    if (currentCharacter + 1 < m_end) {
-        if (combiningClass(m_run[currentCharacter + 1]) == hiraganaKatakanaVoicingMarksCombiningClass) {
-#if USE(ICU_UNICODE)
-            // Normalize into composed form using 3.2 rules.
-            UChar normalizedCharacters[2] = { 0, 0 };
-            UErrorCode uStatus = U_ZERO_ERROR;  
-            int32_t resultLength = unorm_normalize(m_run.data(currentCharacter), 2,
-                UNORM_NFC, UNORM_UNICODE_3_2, &normalizedCharacters[0], 2, &uStatus);
-            if (resultLength == 1 && uStatus == 0)
-                return normalizedCharacters[0];
-#elif USE(QT4_UNICODE)
-            QString tmp(reinterpret_cast<const QChar*>(m_run.data(currentCharacter)), 2);
-            QString res = tmp.normalized(QString::NormalizationForm_C, QChar::Unicode_3_2);
-            if (res.length() == 1)
-                return res.at(0).unicode();
-#endif
-        }
-    }
-    return 0;
 }
-
-}

Modified: trunk/Source/WebCore/platform/graphics/WidthIterator.h (89689 => 89690)


--- trunk/Source/WebCore/platform/graphics/WidthIterator.h	2011-06-24 18:47:14 UTC (rev 89689)
+++ trunk/Source/WebCore/platform/graphics/WidthIterator.h	2011-06-24 18:48:25 UTC (rev 89690)
@@ -46,7 +46,6 @@
     const Font* m_font;
 
     const TextRun& m_run;
-    int m_end;
 
     unsigned m_currentCharacter;
     float m_runWidthSoFar;
@@ -55,8 +54,6 @@
     bool m_isAfterExpansion;
 
 private:
-    UChar32 normalizeVoicingMarks(int currentCharacter);
-
     HashSet<const SimpleFontData*>* m_fallbackFonts;
     bool m_accountForGlyphBounds;
     float m_maxGlyphBoundingBoxY;
_______________________________________________
webkit-changes mailing list
[email protected]
http://lists.webkit.org/mailman/listinfo.cgi/webkit-changes

Reply via email to