[Bf-blender-cvs] [4923af4c77] cycles-tiles-rework: Cycles: Deduplicate sample range setting and getting

Mai Lavelle Fri, 13 Jan 2017 23:27:07 -0800

Commit: 4923af4c773f64c54f1b46695ecb55525c03a437
Author: Mai Lavelle
Date:   Sat Jan 14 02:01:00 2017 -0500
Branches: cycles-tiles-rework
https://developer.blender.org/rB4923af4c773f64c54f1b46695ecb55525c03a437


Cycles: Deduplicate sample range setting and getting

===================================================================

M       intern/cycles/kernel/CMakeLists.txt
A       intern/cycles/kernel/kernel_sample_range.h
M       intern/cycles/kernel/kernels/cuda/kernel.cu
M       intern/cycles/kernel/kernels/opencl/kernel.cl

===================================================================

diff --git a/intern/cycles/kernel/CMakeLists.txt 
b/intern/cycles/kernel/CMakeLists.txt
index 56bcafbce3..e0fbc57edd 100644
--- a/intern/cycles/kernel/CMakeLists.txt
+++ b/intern/cycles/kernel/CMakeLists.txt
@@ -72,6 +72,7 @@ set(SRC_HEADERS
        kernel_projection.h
        kernel_queues.h
        kernel_random.h
+       kernel_sample_range.h
        kernel_shader.h
        kernel_shadow.h
        kernel_subsurface.h
diff --git a/intern/cycles/kernel/kernel_sample_range.h 
b/intern/cycles/kernel/kernel_sample_range.h
new file mode 100644
index 0000000000..6694a7e959
--- /dev/null
+++ b/intern/cycles/kernel/kernel_sample_range.h
@@ -0,0 +1,99 @@
+/*
+ * Copyright 2011-2016 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+ccl_device_inline void kernel_set_sample_range(
+               ccl_global SampleRange *sample_ranges,
+               int range,
+               ccl_global float *buffer,
+               ccl_global uint *rng_state,
+               int sample,
+               int sx,
+               int sy,
+               int sw,
+               int sh,
+               int offset,
+               int stride)
+{
+       ccl_global SampleRange* sample_range = &sample_ranges[range];
+
+       sample_range->buffer = buffer;
+       sample_range->rng_state = rng_state;
+       sample_range->sample = sample;
+       sample_range->x = sx;
+       sample_range->y = sy;
+       sample_range->w = sw;
+       sample_range->h = sh;
+       sample_range->offset = offset;
+       sample_range->stride = stride;
+
+       if(range == 0) {
+               sample_range->work_offset = 0;
+       }
+       else {
+               ccl_global SampleRange* prev_range = &sample_ranges[range-1];
+               sample_range->work_offset = prev_range->work_offset + 
prev_range->w * prev_range->h;
+       }
+}
+
+ccl_device_inline bool kernel_pixel_sample_for_thread(
+               KernelGlobals *kg,
+               ccl_global SampleRange *sample_ranges,
+               int num_sample_ranges,
+               int *thread_x,
+               int *thread_y,
+               int *thread_sample,
+               ccl_global SampleRange **thread_sample_range)
+{
+       /* order threads to maintain inner block coherency */
+       const int group_id = ccl_group_id(0) + ccl_num_groups(0) * 
ccl_group_id(1);
+       const int local_thread_id = ccl_local_id(0) + ccl_local_id(1) * 
ccl_local_size(0);
+
+       const int thread_id = group_id * (ccl_local_size(0) * 
ccl_local_size(1)) + local_thread_id;
+
+       /* find which sample range belongs to this thread */
+       ccl_global SampleRange* sample_range = NULL;
+
+       for(int i = 0; i < num_sample_ranges; i++) {
+               if(thread_id >= sample_ranges[i].work_offset &&
+                  thread_id < sample_ranges[i].work_offset + 
sample_ranges[i].w * sample_ranges[i].h)
+               {
+                       sample_range = &sample_ranges[i];
+               }
+       }
+
+       /* check if theres work for this thread */
+       if(!sample_range) {
+               return false;
+       }
+
+       int work_offset = thread_id - sample_range->work_offset;
+
+       if(work_offset < 0 || work_offset >= sample_range->w * sample_range->h) 
{
+               return false;
+       }
+
+       if(thread_sample_range) *thread_sample_range = sample_range;
+       if(thread_x) *thread_x = (work_offset % sample_range->w) + 
sample_range->x;
+       if(thread_y) *thread_y = (work_offset / sample_range->w) + 
sample_range->y;
+       if(thread_sample) *thread_sample = sample_range->sample;
+
+       return true;
+}
+
+CCL_NAMESPACE_END
+
diff --git a/intern/cycles/kernel/kernels/cuda/kernel.cu 
b/intern/cycles/kernel/kernels/cuda/kernel.cu
index 2c73ba4888..e90c2c902c 100644
--- a/intern/cycles/kernel/kernels/cuda/kernel.cu
+++ b/intern/cycles/kernel/kernels/cuda/kernel.cu
@@ -24,6 +24,7 @@
 #include "../../kernel_path.h"
 #include "../../kernel_path_branched.h"
 #include "../../kernel_bake.h"
+#include "../../kernel_sample_range.h"
 
 /* device data taken from CUDA occupancy calculator */
 
@@ -123,68 +124,37 @@
 
 /* kernels */
 extern "C" __global__ void
-kernel_cuda_set_sample_range(SampleRange *sample_ranges, int range, float 
*buffer, uint *rng_state, int sample, int sx, int sy, int sw, int sh, int 
offset, int stride)
+kernel_cuda_set_sample_range(
+       SampleRange *sample_ranges,
+       int range,
+       float *buffer,
+       uint *rng_state,
+       int sample,
+       int sx,
+       int sy,
+       int sw,
+       int sh,
+       int offset,
+       int stride)
 {
-       SampleRange* sample_range = &sample_ranges[range];
-
-       sample_range->buffer = buffer;
-       sample_range->rng_state = rng_state;
-       sample_range->sample = sample;
-       sample_range->x = sx;
-       sample_range->y = sy;
-       sample_range->w = sw;
-       sample_range->h = sh;
-       sample_range->offset = offset;
-       sample_range->stride = stride;
-
-       if(range == 0) {
-               sample_range->work_offset = 0;
-       }
-       else {
-               SampleRange* prev_range = &sample_ranges[range-1];
-               sample_range->work_offset = prev_range->work_offset + 
prev_range->w * prev_range->h;
-       }
+       kernel_set_sample_range(sample_ranges, range, buffer, rng_state, 
sample, sx, sy, sw, sh, offset, stride);
 }
 
 extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, CUDA_KERNEL_MAX_REGISTERS)
 kernel_cuda_path_trace(SampleRange *sample_ranges, int num_sample_ranges)
 {
-       /* order threads to maintain inner block coherency */
-       const int group_id = blockIdx.x + gridDim.x * blockIdx.y;
-       const int local_thread_id = threadIdx.x + threadIdx.y * blockDim.x;
-
-       const int thread_id = group_id * (blockDim.x * blockDim.x) + 
local_thread_id;
-
-       /* find which sample range belongs to this thread */
-       SampleRange* sample_range = NULL;
-
-       for(int i = 0; i < num_sample_ranges; i++) {
-               if(thread_id >= sample_ranges[i].work_offset &&
-                  thread_id < sample_ranges[i].work_offset + 
sample_ranges[i].w * sample_ranges[i].h)
-               {
-                       sample_range = &sample_ranges[i];
-               }
-       }
-
-       /* check if theres work for this thread */
-       if(!sample_range) {
-               return;
-       }
-
-       int work_offset = thread_id - sample_range->work_offset;
+       ccl_global SampleRange* sample_range;
+       int x, y, sample;
 
-       if(work_offset < 0 || work_offset >= sample_range->w * sample_range->h) 
{
+       if(!kernel_pixel_sample_for_thread(kg, sample_ranges, 
num_sample_ranges, &x, &y, &sample, &sample_range)) {
                return;
        }
 
-       int x = (work_offset % sample_range->w) + sample_range->x;
-       int y = (work_offset / sample_range->w) + sample_range->y;
-
        kernel_path_trace(NULL,
                          sample_range->buffer,
                          sample_range->rng_state,
-                         sample_range->sample,
+                         sample,
                          x, y,
                          sample_range->offset,
                          sample_range->stride);
@@ -195,41 +165,17 @@ extern "C" __global__ void
 CUDA_LAUNCH_BOUNDS(CUDA_THREADS_BLOCK_WIDTH, 
CUDA_KERNEL_BRANCHED_MAX_REGISTERS)
 kernel_cuda_branched_path_trace(SampleRange *sample_ranges, int 
num_sample_ranges)
 {
-       /* order threads to maintain inner block coherency */
-       const int group_id = blockIdx.x + gridDim.x * blockIdx.y;
-       const int local_thread_id = threadIdx.x + threadIdx.y * blockDim.x;
-
-       const int thread_id = group_id * (blockDim.x * blockDim.x) + 
local_thread_id;
-
-       /* find which sample range belongs to this thread */
-       SampleRange* sample_range = NULL;
-
-       for(int i = 0; i < num_sample_ranges; i++) {
-               if(thread_id >= sample_ranges[i].work_offset &&
-                  thread_id < sample_ranges[i].work_offset + 
sample_ranges[i].w * sample_ranges[i].h)
-               {
-                       sample_range = &sample_ranges[i];
-               }
-       }
+       ccl_global SampleRange* sample_range;
+       int x, y, sample;
 
-       /* check if theres work for this thread */
-       if(!sample_range) {
+       if(!kernel_pixel_sample_for_thread(kg, sample_ranges, 
num_sample_ranges, &x, &y, &sample, &sample_range)) {
                return;
        }
 
-       int work_offset = thread_id - sample_range->work_offset;
-
-       if(work_offset < 0 || work_offset >= sample_range->w * sample_range->h) 
{
-               return;
-       }
-
-       int x = (work_offset % sample_range->w) + sample_range->x;
-       int y = (work_offset / sample_range->w) + sample_range->y;
-
        kernel_branched_path_trace(NULL,
                          sample_range->buffer,
                          sample_range->rng_state,
-                         sample_range->sample,
+                         sample,
                          x, y,
                          sample_range->offset,
                          sample_range->stride);
diff --git a/intern/cycles/kernel/kernels/opencl/kernel.cl 
b/intern/cycles/kernel/kernels/opencl/kernel.cl
index 1816d01d3b..77bf645ef4 100644
--- a/intern/cycles/kernel/kernels/opencl/kernel.cl
+++ b/intern/cycles/kernel/kernels/opencl/kernel.cl
@@ -45,6 +45,8 @@
 
 #include "../../kernel_bake.h"
 
+#include "../../kernel_sample_range.h"
+
 #ifdef __COMPILE_ONLY_MEGAKERNEL__
 
 __kernel void kernel_ocl_path_trace(
@@ -65,41 +67,17 @@ __kernel void kernel_ocl_path_trace(
        kg->name = name;
 #include "../../kernel_textures.h"
 
-       /* order threads to maintain inner block coherency */
-       const int group_id = get_group_id(0) + get_num_groups(0) * 
get_group_id(1);
-       const int local_thread_id = get_local_id(0) + get_local_id(1) * 
get_local_size(0);
-
-       const int thread_id = group_id * (get_local_size(0) * 
get_local_size(1)) + local_thread_id;
-
-       /* find which sample range belongs to this thread */
-       ccl_global SampleRange* sample_range = NULL;
-
-       for(int i = 0; i < num_sample_ranges; i++) {
-               if(thread_id >= sample_ranges[i].work_offset &&
-                  thread_id < sample_ranges[i].work_offset + 
sample_ranges[i].w * sample_ranges[i].h)
-               {
-                       sample_range = &sample_ranges[i];
-               }
-       }
-
-       /* check if theres work for this thread */
-       if(!sample_range) {
-               return;
-       }
-
-       int work_offset = thread_id - sample_range->work_offset;
+       ccl_global SampleRange* sample_range;
+       int x, y, sample;
 
-       if(work_offset < 0 || work_offset >= sample_range->w * sample_range->h) 
{
+       if(!kernel_pixel_sample_for_thread(kg, sample_ranges, 
num_sample_ranges, &x, &y, &sample, &sample_range)) {
                return;
        }
 
-       int x = (work_offset % sample_range->w) + sample_range->x;
-       int y = (work_offset / sample_range->w) + sample_range->y;
-
        kernel_path_trace(kg,
                          sample_range->buffer,
                          sample_range->rng_state,
-                         sampl

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] [4923af4c77] cycles-tiles-rework: Cycles: Deduplicate sample range setting and getting

Reply via email to