Commit: 247b869967812891f6b77585184a3b09f3f16a18
Author: Sergey Sharybin
Date:   Mon Oct 6 14:59:26 2014 +0200
Branches: master
https://developer.blender.org/rB247b869967812891f6b77585184a3b09f3f16a18

Compositor: implement OpenCL backend for gaussian blur

Pretty much straightforward change which gives around 30%
speedup on my laptop and around 2x speedup on desktop in
the BI (which uses gts580). Tested with huge blurs (like
10% of blur) which was rather common during Caminandes.

For now OpenCL is only limited for blur size more than
100 pixels.

This is a bit experimental still, feedback is welcome.

Reviewers: jbakker, lukastoenne

Subscribers: ton

Differential Revision: https://developer.blender.org/D576

===================================================================

M       source/blender/compositor/nodes/COM_BlurNode.cpp
M       source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp
M       source/blender/compositor/operations/COM_GaussianXBlurOperation.h
M       source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp
M       source/blender/compositor/operations/COM_GaussianYBlurOperation.h
M       source/blender/compositor/operations/COM_OpenCLKernels.cl

===================================================================

diff --git a/source/blender/compositor/nodes/COM_BlurNode.cpp 
b/source/blender/compositor/nodes/COM_BlurNode.cpp
index 76e52c1..f3d0c33 100644
--- a/source/blender/compositor/nodes/COM_BlurNode.cpp
+++ b/source/blender/compositor/nodes/COM_BlurNode.cpp
@@ -105,6 +105,7 @@ void BlurNode::convertToOperations(NodeConverter 
&converter, const CompositorCon
                GaussianXBlurOperation *operationx = new 
GaussianXBlurOperation();
                operationx->setData(data);
                operationx->setQuality(quality);
+               operationx->checkOpenCL();
                
                converter.addOperation(operationx);
                converter.mapInputSocket(getInputSocket(1), 
operationx->getInputSocket(1));
@@ -112,6 +113,7 @@ void BlurNode::convertToOperations(NodeConverter 
&converter, const CompositorCon
                GaussianYBlurOperation *operationy = new 
GaussianYBlurOperation();
                operationy->setData(data);
                operationy->setQuality(quality);
+               operationy->checkOpenCL();
 
                converter.addOperation(operationy);
                converter.mapInputSocket(getInputSocket(1), 
operationy->getInputSocket(1));
diff --git 
a/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp 
b/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp
index 0aefba3..0838d28 100644
--- a/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp
+++ b/source/blender/compositor/operations/COM_GaussianXBlurOperation.cpp
@@ -21,6 +21,7 @@
  */
 
 #include "COM_GaussianXBlurOperation.h"
+#include "COM_OpenCLDevice.h"
 #include "BLI_math.h"
 #include "MEM_guardedalloc.h"
 
@@ -124,6 +125,32 @@ void GaussianXBlurOperation::executePixel(float output[4], 
int x, int y, void *d
        mul_v4_v4fl(output, color_accum, 1.0f / multiplier_accum);
 }
 
+void GaussianXBlurOperation::executeOpenCL(OpenCLDevice *device,
+                                           MemoryBuffer *outputMemoryBuffer, 
cl_mem clOutputBuffer,
+                                           MemoryBuffer **inputMemoryBuffers, 
list<cl_mem> *clMemToCleanUp,
+                                           list<cl_kernel> *clKernelsToCleanUp)
+{
+       cl_kernel gaussianXBlurOperationKernel = 
device->COM_clCreateKernel("gaussianXBlurOperationKernel", NULL);
+       cl_int filter_size = this->m_filtersize;
+
+       cl_mem gausstab = clCreateBuffer(device->getContext(),
+                                        CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+                                        sizeof(float) * (this->m_filtersize * 
2 + 1),
+                                        this->m_gausstab,
+                                        NULL);
+
+       
device->COM_clAttachMemoryBufferToKernelParameter(gaussianXBlurOperationKernel, 
0, 1, clMemToCleanUp, inputMemoryBuffers, this->m_inputProgram);
+       
device->COM_clAttachOutputMemoryBufferToKernelParameter(gaussianXBlurOperationKernel,
 2, clOutputBuffer);
+       
device->COM_clAttachMemoryBufferOffsetToKernelParameter(gaussianXBlurOperationKernel,
 3, outputMemoryBuffer);
+       clSetKernelArg(gaussianXBlurOperationKernel, 4, sizeof(cl_int), 
&filter_size);
+       device->COM_clAttachSizeToKernelParameter(gaussianXBlurOperationKernel, 
5, this);
+       clSetKernelArg(gaussianXBlurOperationKernel, 6, sizeof(cl_mem), 
&gausstab);
+
+       device->COM_clEnqueueRange(gaussianXBlurOperationKernel, 
outputMemoryBuffer, 7, this);
+
+       clReleaseMemObject(gausstab);
+}
+
 void GaussianXBlurOperation::deinitExecution()
 {
        BlurBaseOperation::deinitExecution();
diff --git a/source/blender/compositor/operations/COM_GaussianXBlurOperation.h 
b/source/blender/compositor/operations/COM_GaussianXBlurOperation.h
index e391320..d7ae8b1 100644
--- a/source/blender/compositor/operations/COM_GaussianXBlurOperation.h
+++ b/source/blender/compositor/operations/COM_GaussianXBlurOperation.h
@@ -40,7 +40,12 @@ public:
         * @brief the inner loop of this program
         */
        void executePixel(float output[4], int x, int y, void *data);
-       
+
+       void executeOpenCL(OpenCLDevice *device,
+                          MemoryBuffer *outputMemoryBuffer, cl_mem 
clOutputBuffer,
+                          MemoryBuffer **inputMemoryBuffers, list<cl_mem> 
*clMemToCleanUp,
+                          list<cl_kernel> *clKernelsToCleanUp);
+
        /**
         * @brief initialize the execution
         */
@@ -53,5 +58,9 @@ public:
        
        void *initializeTileData(rcti *rect);
        bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation 
*readOperation, rcti *output);
+
+       void checkOpenCL() {
+               this->setOpenCL(m_data.sizex >= 128);
+       }
 };
 #endif
diff --git 
a/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp 
b/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp
index a05a1ab..6172f95 100644
--- a/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp
+++ b/source/blender/compositor/operations/COM_GaussianYBlurOperation.cpp
@@ -21,6 +21,7 @@
  */
 
 #include "COM_GaussianYBlurOperation.h"
+#include "COM_OpenCLDevice.h"
 #include "BLI_math.h"
 #include "MEM_guardedalloc.h"
 
@@ -126,6 +127,32 @@ void GaussianYBlurOperation::executePixel(float output[4], 
int x, int y, void *d
        mul_v4_v4fl(output, color_accum, 1.0f / multiplier_accum);
 }
 
+void GaussianYBlurOperation::executeOpenCL(OpenCLDevice *device,
+                                           MemoryBuffer *outputMemoryBuffer, 
cl_mem clOutputBuffer,
+                                           MemoryBuffer **inputMemoryBuffers, 
list<cl_mem> *clMemToCleanUp,
+                                           list<cl_kernel> *clKernelsToCleanUp)
+{
+       cl_kernel gaussianYBlurOperationKernel = 
device->COM_clCreateKernel("gaussianYBlurOperationKernel", NULL);
+       cl_int filter_size = this->m_filtersize;
+
+       cl_mem gausstab = clCreateBuffer(device->getContext(),
+                                        CL_MEM_READ_ONLY | CL_MEM_USE_HOST_PTR,
+                                        sizeof(float) * (this->m_filtersize * 
2 + 1),
+                                        this->m_gausstab,
+                                        NULL);
+
+       
device->COM_clAttachMemoryBufferToKernelParameter(gaussianYBlurOperationKernel, 
0, 1, clMemToCleanUp, inputMemoryBuffers, this->m_inputProgram);
+       
device->COM_clAttachOutputMemoryBufferToKernelParameter(gaussianYBlurOperationKernel,
 2, clOutputBuffer);
+       
device->COM_clAttachMemoryBufferOffsetToKernelParameter(gaussianYBlurOperationKernel,
 3, outputMemoryBuffer);
+       clSetKernelArg(gaussianYBlurOperationKernel, 4, sizeof(cl_int), 
&filter_size);
+       device->COM_clAttachSizeToKernelParameter(gaussianYBlurOperationKernel, 
5, this);
+       clSetKernelArg(gaussianYBlurOperationKernel, 6, sizeof(cl_mem), 
&gausstab);
+
+       device->COM_clEnqueueRange(gaussianYBlurOperationKernel, 
outputMemoryBuffer, 7, this);
+
+       clReleaseMemObject(gausstab);
+}
+
 void GaussianYBlurOperation::deinitExecution()
 {
        BlurBaseOperation::deinitExecution();
diff --git a/source/blender/compositor/operations/COM_GaussianYBlurOperation.h 
b/source/blender/compositor/operations/COM_GaussianYBlurOperation.h
index 22b6562..4b5751c 100644
--- a/source/blender/compositor/operations/COM_GaussianYBlurOperation.h
+++ b/source/blender/compositor/operations/COM_GaussianYBlurOperation.h
@@ -40,7 +40,12 @@ public:
         * the inner loop of this program
         */
        void executePixel(float output[4], int x, int y, void *data);
-       
+
+       void executeOpenCL(OpenCLDevice *device,
+                          MemoryBuffer *outputMemoryBuffer, cl_mem 
clOutputBuffer,
+                          MemoryBuffer **inputMemoryBuffers, list<cl_mem> 
*clMemToCleanUp,
+                          list<cl_kernel> *clKernelsToCleanUp);
+
        /**
         * @brief initialize the execution
         */
@@ -53,5 +58,9 @@ public:
        
        void *initializeTileData(rcti *rect);
        bool determineDependingAreaOfInterest(rcti *input, ReadBufferOperation 
*readOperation, rcti *output);
+
+       void checkOpenCL() {
+               this->setOpenCL(m_data.sizex >= 128);
+       }
 };
 #endif
diff --git a/source/blender/compositor/operations/COM_OpenCLKernels.cl 
b/source/blender/compositor/operations/COM_OpenCLKernels.cl
index 00b3825..1b965eb 100644
--- a/source/blender/compositor/operations/COM_OpenCLKernels.cl
+++ b/source/blender/compositor/operations/COM_OpenCLKernels.cl
@@ -250,3 +250,66 @@ __kernel void directionalBlurKernel(__read_only image2d_t 
inputImage,  __write_o
 
        write_imagef(output, coords, col);
 }
+
+// KERNEL --- GAUSSIAN BLUR ---
+__kernel void gaussianXBlurOperationKernel(__read_only image2d_t inputImage,
+                                           int2 offsetInput,
+                                           __write_only image2d_t output,
+                                           int2 offsetOutput,
+                                           int filter_size,
+                                           int2 dimension,
+                                           __global float *gausstab,
+                                           int2 offset)
+{
+       float4 color = {0.0f, 0.0f, 0.0f, 0.0f};
+       int2 coords = {get_global_id(0), get_global_id(1)};
+       coords += offset;
+       const int2 realCoordinate = coords + offsetOutput;
+       int2 inputCoordinate = realCoordinate - offsetInput;
+       float weight = 0.0f;
+
+       int xmin = max(realCoordinate.x - filter_size,     0) - offsetInput.x;
+       int xmax = min(realCoordinate.x + filter_size + 1, dimension.x) - 
offsetInput.x;
+
+       for (int nx = xmin, i = max(filter_size - realCoordinate.x, 0); nx < 
xmax; ++nx, ++i) {
+               float w = gausstab[i];
+               inputCoordinate.x = nx;
+               color += read_imagef(inputImage, SAMPLER_NEAREST, 
inputCoordinate) * w;
+               weight += w;
+       }
+
+       color *= (1.0f / weight);
+
+       write_imagef(output, coords, color);
+}
+
+__kernel void gaussianYBlurOperationKernel(__read_only image2d_t inputImage,
+                                           int2 offsetInput,
+                                           __write_only image2d_t output,
+                                           int2 offsetOutput,
+                                           int filter_size,
+                                           int2 dimension,
+                                           __global float *gausstab,
+                                           int2 offset)
+{
+       float4 color = {0.0f, 0.0f, 0.0f, 0.0f};
+       int2 coords = {get_global_id(0), get_global_id(1)};
+       coords += offset;
+       const int2 realCoordinate = coords + offsetOutput;
+       int2 inputCoordinate = realCoordinate - offsetInput;
+       float weight = 0.0f;
+
+       int ymin = max(realCoordinate.y 

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to