- Revision
- 86763
- Author
- [email protected]
- Date
- 2011-05-18 08:20:04 -0700 (Wed, 18 May 2011)
Log Message
2011-05-18 Renata Hodovan <[email protected]>
Reviewed by Nikolas Zimmermann.
Apply the ParallelJobs support to FEGaussianBlur
https://bugs.webkit.org/show_bug.cgi?id=61049
The Gaussian blur filter of SVG can consume lots of resources if it is
applied to a large area. The computation can be distributed to multiple
cores if the architecture supports.
The average performance progression is about 15% on dual-core machines.
Developed in cooperation with Gabor Loki and Zoltan Herczeg.
* platform/graphics/filters/FEGaussianBlur.cpp:
(WebCore::FEGaussianBlur::platformApplyWorker):
(WebCore::FEGaussianBlur::platformApply):
* platform/graphics/filters/FEGaussianBlur.h:
Modified Paths
Diff
Modified: trunk/Source/WebCore/ChangeLog (86762 => 86763)
--- trunk/Source/WebCore/ChangeLog 2011-05-18 15:06:14 UTC (rev 86762)
+++ trunk/Source/WebCore/ChangeLog 2011-05-18 15:20:04 UTC (rev 86763)
@@ -1,3 +1,23 @@
+2011-05-18 Renata Hodovan <[email protected]>
+
+ Reviewed by Nikolas Zimmermann.
+
+ Apply the ParallelJobs support to FEGaussianBlur
+ https://bugs.webkit.org/show_bug.cgi?id=61049
+
+ The Gaussian blur filter of SVG can consume lots of resources if it is
+ applied to a large area. The computation can be distributed to multiple
+ cores if the architecture supports.
+ The average performance progression is about 15% on dual-core machines.
+
+ Developed in cooperation with Gabor Loki and Zoltan Herczeg.
+
+
+ * platform/graphics/filters/FEGaussianBlur.cpp:
+ (WebCore::FEGaussianBlur::platformApplyWorker):
+ (WebCore::FEGaussianBlur::platformApply):
+ * platform/graphics/filters/FEGaussianBlur.h:
+
2011-05-18 Pavel Feldman <[email protected]>
Reviewed by Yury Semikhatsky.
Modified: trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.cpp (86762 => 86763)
--- trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.cpp 2011-05-18 15:06:14 UTC (rev 86762)
+++ trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.cpp 2011-05-18 15:20:04 UTC (rev 86763)
@@ -35,6 +35,7 @@
#include <wtf/ByteArray.h>
#include <wtf/MathExtras.h>
+#include <wtf/ParallelJobs.h>
using std::max;
@@ -130,8 +131,90 @@
}
}
+#if ENABLE(PARALLEL_JOBS)
+void FEGaussianBlur::platformApplyWorker(PlatformApplyParameters* parameters)
+{
+ IntSize paintSize(parameters->width, parameters->height);
+#if CPU(ARM_NEON) && COMPILER(GCC)
+ parameters->filter->platformApplyNeon(parameters->srcPixelArray.get(), parameters->dstPixelArray.get(),
+ parameters->kernelSizeX, parameters->kernelSizeY, paintSize);
+#else
+ parameters->filter->platformApplyGeneric(parameters->srcPixelArray.get(), parameters->dstPixelArray.get(),
+ parameters->kernelSizeX, parameters->kernelSizeY, paintSize);
+#endif
+}
+#endif
+
inline void FEGaussianBlur::platformApply(ByteArray* srcPixelArray, ByteArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize)
{
+#if ENABLE(PARALLEL_JOBS)
+ int scanline = 4 * paintSize.width();
+ int extraHeight = 3 * kernelSizeY * 0.5f;
+ int optimalThreadNumber = (paintSize.width() * paintSize.height()) / (s_minimalRectDimension + extraHeight * paintSize.width());
+
+ if (optimalThreadNumber > 1) {
+ ParallelJobs<PlatformApplyParameters> parallelJobs(&platformApplyWorker, optimalThreadNumber);
+
+ int jobs = parallelJobs.numberOfJobs();
+ if (jobs > 1) {
+ int blockHeight = paintSize.height() / jobs;
+ --jobs;
+ for (int job = jobs; job >= 0; --job) {
+ PlatformApplyParameters& params = parallelJobs.parameter(job);
+ params.filter = this;
+
+ int startY;
+ int endY;
+ if (!job) {
+ startY = 0;
+ endY = blockHeight + extraHeight;
+ params.srcPixelArray = srcPixelArray;
+ params.dstPixelArray = tmpPixelArray;
+ } else {
+ if (job == jobs) {
+ startY = job * blockHeight - extraHeight;
+ endY = paintSize.height();
+ } else {
+ startY = job * blockHeight - extraHeight;
+ endY = (job + 1) * blockHeight + extraHeight;
+ }
+
+ int blockSize = (endY - startY) * scanline;
+ params.srcPixelArray = ByteArray::create(blockSize);
+ params.dstPixelArray = ByteArray::create(blockSize);
+ memcpy(params.srcPixelArray->data(), srcPixelArray->data() + startY * scanline, blockSize);
+ }
+
+ params.width = paintSize.width();
+ params.height = endY - startY;
+ params.kernelSizeX = kernelSizeX;
+ params.kernelSizeY = kernelSizeY;
+ }
+
+ parallelJobs.execute();
+
+ // Copy together the parts of the image.
+ for (int job = jobs; job >= 1; --job) {
+ PlatformApplyParameters& params = parallelJobs.parameter(job);
+ int sourceOffset;
+ int destinationOffset;
+ int size;
+ if (job == jobs) {
+ sourceOffset = extraHeight * scanline;
+ destinationOffset = job * blockHeight * scanline;
+ size = (paintSize.height() - job * blockHeight) * scanline;
+ } else {
+ sourceOffset = extraHeight * scanline;
+ destinationOffset = job * blockHeight * scanline;
+ size = blockHeight * scanline;
+ }
+ memcpy(srcPixelArray->data() + destinationOffset, params.srcPixelArray->data() + sourceOffset, size);
+ }
+ return;
+ }
+ }
+#endif // PARALLEL_JOBS
+
// The selection here eventually should happen dynamically on some platforms.
#if CPU(ARM_NEON) && COMPILER(GCC)
platformApplyNeon(srcPixelArray, tmpPixelArray, kernelSizeX, kernelSizeY, paintSize);
Modified: trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.h (86762 => 86763)
--- trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.h 2011-05-18 15:06:14 UTC (rev 86762)
+++ trunk/Source/WebCore/platform/graphics/filters/FEGaussianBlur.h 2011-05-18 15:20:04 UTC (rev 86763)
@@ -44,20 +44,38 @@
virtual void dump();
virtual void determineAbsolutePaintRect();
+ static void calculateKernelSize(Filter*, unsigned& kernelSizeX, unsigned& kernelSizeY, float stdX, float stdY);
virtual TextStream& externalRepresentation(TextStream&, int indention) const;
- static void calculateKernelSize(Filter*, unsigned& kernelSizeX, unsigned& kernelSizeY, float stdX, float stdY);
+private:
+#if ENABLE(PARALLEL_JOBS)
+ static const int s_minimalRectDimension = 100 * 100; // Empirical data limit for parallel jobs
+ template<typename Type>
+ friend class ParallelJobs;
+
+ struct PlatformApplyParameters {
+ FEGaussianBlur* filter;
+ RefPtr<ByteArray> srcPixelArray;
+ RefPtr<ByteArray> dstPixelArray;
+ int width;
+ int height;
+ unsigned kernelSizeX;
+ unsigned kernelSizeY;
+ };
+
+ static void platformApplyWorker(PlatformApplyParameters*);
+#endif // ENABLE(PARALLEL_JOBS)
+
+ FEGaussianBlur(Filter*, float, float);
+
static inline void kernelPosition(int boxBlur, unsigned& std, int& dLeft, int& dRight);
inline void platformApply(ByteArray* srcPixelArray, ByteArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize);
inline void platformApplyGeneric(ByteArray* srcPixelArray, ByteArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize);
inline void platformApplyNeon(ByteArray* srcPixelArray, ByteArray* tmpPixelArray, unsigned kernelSizeX, unsigned kernelSizeY, IntSize& paintSize);
-private:
- FEGaussianBlur(Filter*, float, float);
-
float m_stdX;
float m_stdY;
};