Title: [86763] trunk/Source/WebCore
Revision
86763
Author
[email protected]
Date
2011-05-18 08:20:04 -0700 (Wed, 18 May 2011)

Log Message

2011-05-18  Renata Hodovan  <[email protected]>

        Reviewed by Nikolas Zimmermann.

        Apply the ParallelJobs support to FEGaussianBlur
        https://bugs.webkit.org/show_bug.cgi?id=61049

        The Gaussian blur filter of SVG can consume lots of resources if it is
        applied to a large area. The computation can be distributed to multiple
        cores if the architecture supports.
        The average performance progression is about 15% on dual-core machines.

        Developed in cooperation with Gabor Loki and Zoltan Herczeg.


        * platform/graphics/filters/FEGaussianBlur.cpp:
        (WebCore::FEGaussianBlur::platformApplyWorker):
        (WebCore::FEGaussianBlur::platformApply):
        * platform/graphics/filters/FEGaussianBlur.h:

Modified Paths

Diff

Modified: trunk/Source/WebCore/ChangeLog (86762 => 86763)


--- trunk/Source/WebCore/ChangeLog	2011-05-18 15:06:14 UTC (rev 86762)
+++ trunk/Source/WebCore/ChangeLog	2011-05-18 15:20:04 UTC (rev 86763)
@@ -1,3 +1,23 @@
+2011-05-18  Renata Hodovan  <[email protected]>
+
+        Reviewed by Nikolas Zimmermann.
+
+        Apply the ParallelJobs support to FEGaussianBlur
+        https://bugs.webkit.org/show_bug.cgi?id=61049
+
+        The Gaussian blur filter of SVG can consume lots of resources if it is
+        applied to a large area. The computation can be distributed to multiple
+        cores if the architecture supports.
+        The average performance progression is about 15% on dual-core machines.
+
+        Developed in cooperation with Gabor Loki and Zoltan Herczeg.
+
+
+        * platform/graphics/filters/FEGaussianBlur.cpp:
+        (WebCore::FEGaussianBlur::platformApplyWorker):
+        (WebCore::FEGaussianBlur::platformApply):
+        * platform/graphics/filters/FEGaussianBlur.h:
+
 2011-05-18  Pavel Feldman  <[email protected]>
 
         Reviewed by Yury Semikhatsky.

Modified: trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.cpp (86762 => 86763)


--- trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.cpp	2011-05-18 15:06:14 UTC (rev 86762)
+++ trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.cpp	2011-05-18 15:20:04 UTC (rev 86763)
@@ -35,6 +35,7 @@
 
 #include <wtf/ByteArray.h>
 #include <wtf/MathExtras.h>
+#include <wtf/ParallelJobs.h>
 
 using std::max;
 
@@ -130,8 +131,90 @@
     }
 }
 
+#if ENABLE(PARALLEL_JOBS)
+void FEGaussianBlur::platformApplyWorker(PlatformApplyParameters* parameters)
+{
+    IntSize paintSize(parameters->width, parameters->height);
+#if CPU(ARM_NEON) && COMPILER(GCC)
+    parameters->filter->platformApplyNeon(parameters->srcPixelArray.get(), parameters->dstPixelArray.get(),
+        parameters->kernelSizeX, parameters->kernelSizeY, paintSize);
+#else
+    parameters->filter->platformApplyGeneric(parameters->srcPixelArray.get(), parameters->dstPixelArray.get(),
+        parameters->kernelSizeX, parameters->kernelSizeY, paintSize);
+#endif
+}
+#endif
+
 inline void FEGaussianBlur::platformApply(ByteArray* srcPixelArray, ByteArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize)
 {
+#if ENABLE(PARALLEL_JOBS)
+    int scanline = 4 * paintSize.width();
+    int extraHeight = 3 * kernelSizeY * 0.5f;
+    int optimalThreadNumber = (paintSize.width() * paintSize.height()) / (s_minimalRectDimension + extraHeight * paintSize.width());
+
+    if (optimalThreadNumber > 1) {
+        ParallelJobs<PlatformApplyParameters> parallelJobs(&platformApplyWorker, optimalThreadNumber);
+
+        int jobs = parallelJobs.numberOfJobs();
+        if (jobs > 1) {
+            int blockHeight = paintSize.height() / jobs;
+            --jobs;
+            for (int job = jobs; job >= 0; --job) {
+                PlatformApplyParameters& params = parallelJobs.parameter(job);
+                params.filter = this;
+
+                int startY;
+                int endY;
+                if (!job) {
+                    startY = 0;
+                    endY = blockHeight + extraHeight;
+                    params.srcPixelArray = srcPixelArray;
+                    params.dstPixelArray = tmpPixelArray;
+                } else {
+                    if (job == jobs) {
+                        startY = job * blockHeight - extraHeight;
+                        endY = paintSize.height();
+                    } else {
+                        startY = job * blockHeight - extraHeight;
+                        endY = (job + 1) * blockHeight + extraHeight;
+                    }
+
+                    int blockSize = (endY - startY) * scanline;
+                    params.srcPixelArray = ByteArray::create(blockSize);
+                    params.dstPixelArray = ByteArray::create(blockSize);
+                    memcpy(params.srcPixelArray->data(), srcPixelArray->data() + startY * scanline, blockSize);
+                }
+
+                params.width = paintSize.width();
+                params.height = endY - startY;
+                params.kernelSizeX = kernelSizeX;
+                params.kernelSizeY = kernelSizeY;
+            }
+
+            parallelJobs.execute();
+
+            // Copy together the parts of the image.
+            for (int job = jobs; job >= 1; --job) {
+                PlatformApplyParameters& params = parallelJobs.parameter(job);
+                int sourceOffset;
+                int destinationOffset;
+                int size;
+                if (job == jobs) {
+                    sourceOffset = extraHeight * scanline;
+                    destinationOffset = job * blockHeight * scanline;
+                    size = (paintSize.height() - job * blockHeight) * scanline;
+                } else {
+                    sourceOffset = extraHeight * scanline;
+                    destinationOffset = job * blockHeight * scanline;
+                    size = blockHeight * scanline;
+                }
+                memcpy(srcPixelArray->data() + destinationOffset, params.srcPixelArray->data() + sourceOffset, size);
+            }
+            return;
+        }
+    }
+#endif // PARALLEL_JOBS
+
     // The selection here eventually should happen dynamically on some platforms.
 #if CPU(ARM_NEON) && COMPILER(GCC)
     platformApplyNeon(srcPixelArray, tmpPixelArray, kernelSizeX, kernelSizeY, paintSize);

Modified: trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.h (86762 => 86763)


--- trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.h	2011-05-18 15:06:14 UTC (rev 86762)
+++ trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.h	2011-05-18 15:20:04 UTC (rev 86763)
@@ -44,20 +44,38 @@
     virtual void dump();
     
     virtual void determineAbsolutePaintRect();
+    static void calculateKernelSize(Filter*, unsigned& kernelSizeX, unsigned& kernelSizeY, float stdX, float stdY);
 
     virtual TextStream& externalRepresentation(TextStream&, int indention) const;
 
-    static void calculateKernelSize(Filter*, unsigned& kernelSizeX, unsigned& kernelSizeY, float stdX, float stdY);
+private:
+#if ENABLE(PARALLEL_JOBS)
+    static const int s_minimalRectDimension = 100 * 100; // Empirical data limit for parallel jobs
 
+    template<typename Type>
+    friend class ParallelJobs;
+
+    struct PlatformApplyParameters {
+        FEGaussianBlur* filter;
+        RefPtr<ByteArray> srcPixelArray;
+        RefPtr<ByteArray> dstPixelArray;
+        int width;
+        int height;
+        unsigned kernelSizeX;
+        unsigned kernelSizeY;
+    };
+
+    static void platformApplyWorker(PlatformApplyParameters*);
+#endif // ENABLE(PARALLEL_JOBS)
+
+    FEGaussianBlur(Filter*, float, float);
+
     static inline void kernelPosition(int boxBlur, unsigned& std, int& dLeft, int& dRight);
     inline void platformApply(ByteArray* srcPixelArray, ByteArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize);
 
     inline void platformApplyGeneric(ByteArray* srcPixelArray, ByteArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize);
     inline void platformApplyNeon(ByteArray* srcPixelArray, ByteArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize);
 
-private:
-    FEGaussianBlur(Filter*, float, float);
-
     float m_stdX;
     float m_stdY;
 };
_______________________________________________
webkit-changes mailing list
[email protected]
http://lists.webkit.org/mailman/listinfo.cgi/webkit-changes

Reply via email to