Title: [123467] trunk/Source/WebCore
Revision
123467
Author
allan.jen...@nokia.com
Date
2012-07-24 06:37:43 -0700 (Tue, 24 Jul 2012)

Log Message

Fix blend filter for autovectorizing
https://bugs.webkit.org/show_bug.cgi?id=91398

Reviewed by Nikolas Zimmermann.

To support auto-vectorizing, the loop had to be unswitched, and regular arrays used that
did not do boundary-checks in the inner loop. Finally the integer division by 255
was optimized not use integer division intrinsics.

On a X86-64 architechture using GCC -O3 on the file, this gives a speed-up of 2.9x.

* platform/graphics/filters/FEBlend.cpp:
(WebCore::fastDivideBy255):
(BlendNormal):
(WebCore::BlendNormal::apply):
(BlendMultiply):
(WebCore::BlendMultiply::apply):
(BlendScreen):
(WebCore::BlendScreen::apply):
(BlendDarken):
(WebCore::BlendDarken::apply):
(BlendLighten):
(WebCore::BlendLighten::apply):
(BlendUnknown):
(WebCore::BlendUnknown::apply):
(WebCore::platformApply):
(WebCore::FEBlend::platformApplyGeneric):
(WebCore::FEBlend::platformApplySoftware):
* platform/graphics/filters/FEBlend.h:
(FEBlend):

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (123466 => 123467)


--- trunk/Source/WebCore/ChangeLog	2012-07-24 13:33:57 UTC (rev 123466)
+++ trunk/Source/WebCore/ChangeLog	2012-07-24 13:37:43 UTC (rev 123467)
@@ -1,3 +1,36 @@
+2012-07-24  Allan Sandfeld Jensen  <allan.jen...@nokia.com>
+
+        Fix blend filter for autovectorizing
+        https://bugs.webkit.org/show_bug.cgi?id=91398
+
+        Reviewed by Nikolas Zimmermann.
+
+        To support auto-vectorizing, the loop had to be unswitched, and regular arrays used that 
+        did not do boundary-checks in the inner loop. Finally the integer division by 255
+        was optimized not use integer division intrinsics.
+
+        On a X86-64 architechture using GCC -O3 on the file, this gives a speed-up of 2.9x.
+
+        * platform/graphics/filters/FEBlend.cpp:
+        (WebCore::fastDivideBy255):
+        (BlendNormal):
+        (WebCore::BlendNormal::apply):
+        (BlendMultiply):
+        (WebCore::BlendMultiply::apply):
+        (BlendScreen):
+        (WebCore::BlendScreen::apply):
+        (BlendDarken):
+        (WebCore::BlendDarken::apply):
+        (BlendLighten):
+        (WebCore::BlendLighten::apply):
+        (BlendUnknown):
+        (WebCore::BlendUnknown::apply):
+        (WebCore::platformApply):
+        (WebCore::FEBlend::platformApplyGeneric):
+        (WebCore::FEBlend::platformApplySoftware):
+        * platform/graphics/filters/FEBlend.h:
+        (FEBlend):
+
 2012-07-23  Hans Wennborg  <h...@chromium.org>
 
         Speech _javascript_ API: Add the SpeechRecognitionResult.emma attribute

Modified: trunk/Source/WebCore/platform/graphics/filters/FEBlend.cpp (123466 => 123467)


--- trunk/Source/WebCore/platform/graphics/filters/FEBlend.cpp	2012-07-24 13:33:57 UTC (rev 123466)
+++ trunk/Source/WebCore/platform/graphics/filters/FEBlend.cpp	2012-07-24 13:37:43 UTC (rev 123467)
@@ -3,6 +3,7 @@
  * Copyright (C) 2004, 2005 Rob Buis <b...@kde.org>
  * Copyright (C) 2005 Eric Seidel <e...@webkit.org>
  * Copyright (C) 2009 Dirk Schulze <k...@webkit.org>
+ * Copyright (C) 2012 Nokia Corporation and/or its subsidiary(-ies)
  *
  * This library is free software; you can redistribute it and/or
  * modify it under the terms of the GNU Library General Public
@@ -62,71 +63,102 @@
     return true;
 }
 
-static inline unsigned char normal(unsigned char colorA, unsigned char colorB, unsigned char alphaA, unsigned char)
+static inline unsigned char fastDivideBy255(uint16_t value)
 {
-    return (((255 - alphaA) * colorB + colorA * 255) / 255);
+    // This is an approximate algorithm for division by 255, but it gives accurate results for 16bit values.
+    uint16_t quotient = value >> 8;
+    uint16_t remainder = value - (quotient * 255) + 1;
+    return quotient + (remainder >> 8);
 }
 
-static inline unsigned char multiply(unsigned char colorA, unsigned char colorB, unsigned char alphaA, unsigned char alphaB)
-{
-    return (((255 - alphaA) * colorB + (255 - alphaB + colorB) * colorA) / 255);
-}
+class BlendNormal {
+public:
+    static unsigned char apply(unsigned char colorA, unsigned char colorB, unsigned char alphaA, unsigned char)
+    {
+        return fastDivideBy255((255 - alphaA) * colorB + colorA * 255);
+    }
+};
 
-static inline unsigned char screen(unsigned char colorA, unsigned char colorB, unsigned char, unsigned char)
-{
-    return (((colorB + colorA) * 255 - colorA * colorB) / 255);
-}
+class BlendMultiply {
+public:
+    static unsigned char apply(unsigned char colorA, unsigned char colorB, unsigned char alphaA, unsigned char alphaB)
+    {
+        return fastDivideBy255((255 - alphaA) * colorB + (255 - alphaB + colorB) * colorA);
+    }
+};
 
-static inline unsigned char darken(unsigned char colorA, unsigned char colorB, unsigned char alphaA, unsigned char alphaB)
-{
-    return ((std::min((255 - alphaA) * colorB + colorA * 255, (255 - alphaB) * colorA + colorB * 255)) / 255);
-}
+class BlendScreen {
+public:
+    static unsigned char apply(unsigned char colorA, unsigned char colorB, unsigned char, unsigned char)
+    {
+        return fastDivideBy255((colorB + colorA) * 255 - colorA * colorB);
+    }
+};
 
-static inline unsigned char lighten(unsigned char colorA, unsigned char colorB, unsigned char alphaA, unsigned char alphaB)
+class BlendDarken {
+public:
+    static unsigned char apply(unsigned char colorA, unsigned char colorB, unsigned char alphaA, unsigned char alphaB)
+    {
+        return fastDivideBy255(std::min((255 - alphaA) * colorB + colorA * 255, (255 - alphaB) * colorA + colorB * 255));
+    }
+};
+
+class BlendLighten {
+public:
+    static unsigned char apply(unsigned char colorA, unsigned char colorB, unsigned char alphaA, unsigned char alphaB)
+    {
+        return fastDivideBy255(std::max((255 - alphaA) * colorB + colorA * 255, (255 - alphaB) * colorA + colorB * 255));
+    }
+};
+
+class BlendUnknown {
+public:
+    static unsigned char apply(unsigned char, unsigned char, unsigned char, unsigned char)
+    {
+        return 0;
+    }
+};
+
+template<typename BlendFunctor>
+static void platformApply(unsigned char* sourcePixelA, unsigned char* sourcePixelB,
+                          unsigned char* destinationPixel, unsigned pixelArrayLength)
 {
-    return ((std::max((255 - alphaA) * colorB + colorA * 255, (255 - alphaB) * colorA + colorB * 255)) / 255);
+    unsigned len = pixelArrayLength / 4;
+    for (unsigned pixelOffset = 0; pixelOffset < len; pixelOffset++) {
+        unsigned char alphaA = sourcePixelA[3];
+        unsigned char alphaB = sourcePixelB[3];
+        destinationPixel[0] = BlendFunctor::apply(sourcePixelA[0], sourcePixelB[0], alphaA, alphaB);
+        destinationPixel[1] = BlendFunctor::apply(sourcePixelA[1], sourcePixelB[1], alphaA, alphaB);
+        destinationPixel[2] = BlendFunctor::apply(sourcePixelA[2], sourcePixelB[2], alphaA, alphaB);
+        destinationPixel[3] = 255 - fastDivideBy255((255 - alphaA) * (255 - alphaB));
+        sourcePixelA += 4;
+        sourcePixelB += 4;
+        destinationPixel += 4;
+    }
 }
 
-void FEBlend::platformApplyGeneric(PassRefPtr<Uint8ClampedArray> pixelArrayA, PassRefPtr<Uint8ClampedArray> pixelArrayB,
-                                   Uint8ClampedArray* dstPixelArray, unsigned pixelArrayLength)
+void FEBlend::platformApplyGeneric(unsigned char* sourcePixelA, unsigned char* sourcePixelB,
+                                   unsigned char* destinationPixel, unsigned pixelArrayLength)
 {
-    RefPtr<Uint8ClampedArray> srcPixelArrayA = pixelArrayA;
-    RefPtr<Uint8ClampedArray> srcPixelArrayB = pixelArrayB;
-
-    for (unsigned pixelOffset = 0; pixelOffset < pixelArrayLength; pixelOffset += 4) {
-        unsigned char alphaA = srcPixelArrayA->item(pixelOffset + 3);
-        unsigned char alphaB = srcPixelArrayB->item(pixelOffset + 3);
-        for (unsigned channel = 0; channel < 3; ++channel) {
-            unsigned char colorA = srcPixelArrayA->item(pixelOffset + channel);
-            unsigned char colorB = srcPixelArrayB->item(pixelOffset + channel);
-            unsigned char result;
-
-            switch (m_mode) {
-            case FEBLEND_MODE_NORMAL:
-                result = normal(colorA, colorB, alphaA, alphaB);
-                break;
-            case FEBLEND_MODE_MULTIPLY:
-                result = multiply(colorA, colorB, alphaA, alphaB);
-                break;
-            case FEBLEND_MODE_SCREEN:
-                result = screen(colorA, colorB, alphaA, alphaB);
-                break;
-            case FEBLEND_MODE_DARKEN:
-                result = darken(colorA, colorB, alphaA, alphaB);
-                break;
-            case FEBLEND_MODE_LIGHTEN:
-                result = lighten(colorA, colorB, alphaA, alphaB);
-                break;
-            case FEBLEND_MODE_UNKNOWN:
-            default:
-                result = 0;
-                break;
-            }
-
-            dstPixelArray->set(pixelOffset + channel, result);
-        }
-        unsigned char alphaR = 255 - ((255 - alphaA) * (255 - alphaB)) / 255;
-        dstPixelArray->set(pixelOffset + 3, alphaR);
+    switch (m_mode) {
+    case FEBLEND_MODE_NORMAL:
+        platformApply<BlendNormal>(sourcePixelA, sourcePixelB, destinationPixel, pixelArrayLength);
+        break;
+    case FEBLEND_MODE_MULTIPLY:
+        platformApply<BlendMultiply>(sourcePixelA, sourcePixelB, destinationPixel, pixelArrayLength);
+        break;
+    case FEBLEND_MODE_SCREEN:
+        platformApply<BlendScreen>(sourcePixelA, sourcePixelB, destinationPixel, pixelArrayLength);
+        break;
+    case FEBLEND_MODE_DARKEN:
+        platformApply<BlendDarken>(sourcePixelA, sourcePixelB, destinationPixel, pixelArrayLength);
+        break;
+    case FEBLEND_MODE_LIGHTEN:
+        platformApply<BlendLighten>(sourcePixelA, sourcePixelB, destinationPixel, pixelArrayLength);
+        break;
+    case FEBLEND_MODE_UNKNOWN:
+        platformApply<BlendUnknown>(sourcePixelA, sourcePixelB, destinationPixel, pixelArrayLength);
+        break;
     }
 }
 
@@ -165,7 +197,7 @@
         reinterpret_cast<uint32_t*>(dstPixelArray->data())[0] = sourceBAndDest[0];
     }
 #else
-    platformApplyGeneric(srcPixelArrayA, srcPixelArrayB, dstPixelArray, pixelArrayLength);
+    platformApplyGeneric(srcPixelArrayA->data(), srcPixelArrayB->data(), dstPixelArray->data(), pixelArrayLength);
 #endif
 }
 

Modified: trunk/Source/WebCore/platform/graphics/filters/FEBlend.h (123466 => 123467)


--- trunk/Source/WebCore/platform/graphics/filters/FEBlend.h	2012-07-24 13:33:57 UTC (rev 123466)
+++ trunk/Source/WebCore/platform/graphics/filters/FEBlend.h	2012-07-24 13:37:43 UTC (rev 123467)
@@ -45,8 +45,8 @@
     BlendModeType blendMode() const;
     bool setBlendMode(BlendModeType);
 
-    void platformApplyGeneric(PassRefPtr<Uint8ClampedArray> pixelArrayA, PassRefPtr<Uint8ClampedArray> pixelArrayB,
-                              Uint8ClampedArray* dstPixelArray, unsigned pixelArrayLength);
+    void platformApplyGeneric(unsigned char* srcPixelArrayA, unsigned char* srcPixelArrayB, unsigned char* dstPixelArray,
+                           unsigned colorArrayLength);
     void platformApplyNEON(unsigned char* srcPixelArrayA, unsigned char* srcPixelArrayB, unsigned char* dstPixelArray,
                            unsigned colorArrayLength);
 
_______________________________________________
webkit-changes mailing list
webkit-changes@lists.webkit.org
http://lists.webkit.org/mailman/listinfo/webkit-changes

Reply via email to