Commit: 07db4d7e3ae725e590a22165fe0c1c3a113b1a85
Author: varunsundar08
Date: Thu Apr 16 16:50:31 2015 +0530
Branches: cycles_kernel_split
https://developer.blender.org/rB07db4d7e3ae725e590a22165fe0c1c3a113b1a85
[BCYCLES-213] Improve tile splitting logic
===================================================================
M intern/cycles/device/device_opencl.cpp
===================================================================
diff --git a/intern/cycles/device/device_opencl.cpp
b/intern/cycles/device/device_opencl.cpp
index 94a2d90..652731c 100644
--- a/intern/cycles/device/device_opencl.cpp
+++ b/intern/cycles/device/device_opencl.cpp
@@ -3275,39 +3275,67 @@ public:
}
/* Considers the scene properties, global memory available in the device
- * and returns a rectanglular tile dimension that should render on
split kernel
+ * and returns a rectanglular tile dimension (approx the maximum)
+ * that should render on split kernel
*/
- int2 get_render_feasible_tile_size(size_t feasible_global_work_size) {
- int2 render_feasible_tile_size;
+ int2 get_max_render_feasible_tile_size(size_t
feasible_global_work_size) {
+ int2 max_render_feasible_tile_size;
int square_root_val = sqrt(feasible_global_work_size);
- render_feasible_tile_size.x = square_root_val;
- render_feasible_tile_size.y = square_root_val;
+ max_render_feasible_tile_size.x = square_root_val;
+ max_render_feasible_tile_size.y = square_root_val;
- /* ciel round-off render_feasible_tile_size */
+ /* ciel round-off max_render_feasible_tile_size */
int2 ceil_render_feasible_tile_size;
- ceil_render_feasible_tile_size.x =
(((render_feasible_tile_size.x - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) *
SPLIT_KERNEL_LOCAL_SIZE_X;
- ceil_render_feasible_tile_size.y =
(((render_feasible_tile_size.y - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) *
SPLIT_KERNEL_LOCAL_SIZE_Y;
+ ceil_render_feasible_tile_size.x =
(((max_render_feasible_tile_size.x - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) *
SPLIT_KERNEL_LOCAL_SIZE_X;
+ ceil_render_feasible_tile_size.y =
(((max_render_feasible_tile_size.y - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) *
SPLIT_KERNEL_LOCAL_SIZE_Y;
if (ceil_render_feasible_tile_size.x *
ceil_render_feasible_tile_size.y <= feasible_global_work_size) {
return ceil_render_feasible_tile_size;
}
- /* floor round-off render_feasible_tile_size */
+ /* floor round-off max_render_feasible_tile_size */
int2 floor_render_feasible_tile_size;
- floor_render_feasible_tile_size.x =
(render_feasible_tile_size.x / SPLIT_KERNEL_LOCAL_SIZE_X) *
SPLIT_KERNEL_LOCAL_SIZE_X;
- floor_render_feasible_tile_size.y =
(render_feasible_tile_size.y / SPLIT_KERNEL_LOCAL_SIZE_Y) *
SPLIT_KERNEL_LOCAL_SIZE_Y;
+ floor_render_feasible_tile_size.x =
(max_render_feasible_tile_size.x / SPLIT_KERNEL_LOCAL_SIZE_X) *
SPLIT_KERNEL_LOCAL_SIZE_X;
+ floor_render_feasible_tile_size.y =
(max_render_feasible_tile_size.y / SPLIT_KERNEL_LOCAL_SIZE_Y) *
SPLIT_KERNEL_LOCAL_SIZE_Y;
return floor_render_feasible_tile_size;
}
- /* Splits existing tile into multiple tiles of tile size
render_feasible_tile_size */
- vector<RenderTile> split_tiles(RenderTile rtile, int2
render_feasible_tile_size) {
+ /* Try splitting the current tile into multiple smaller
almost-square-tiles */
+ int2 get_split_tile_size(RenderTile rtile, int2
max_render_feasible_tile_size) {
+ int2 split_tile_size;
+ int num_global_threads = max_render_feasible_tile_size.x *
max_render_feasible_tile_size.y;
+ int d_w = rtile.w;
+ int d_h = rtile.h;
+
+ /* Ceil round off d_w and d_h */
+ d_w = (((d_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X) + 1) *
SPLIT_KERNEL_LOCAL_SIZE_X;
+ d_h = (((d_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y) + 1) *
SPLIT_KERNEL_LOCAL_SIZE_Y;
+
+ while (d_w * d_h > num_global_threads) {
+ /* Halve the longer dimension */
+ if (d_w >= d_h) {
+ d_w = d_w / 2;
+ d_w = (((d_w - 1) / SPLIT_KERNEL_LOCAL_SIZE_X)
+ 1) * SPLIT_KERNEL_LOCAL_SIZE_X;
+ }
+ else {
+ d_h = d_h / 2;
+ d_h = (((d_h - 1) / SPLIT_KERNEL_LOCAL_SIZE_Y)
+ 1) * SPLIT_KERNEL_LOCAL_SIZE_Y;
+ }
+ }
+ split_tile_size.x = d_w;
+ split_tile_size.y = d_h;
+ return split_tile_size;
+ }
+
+ /* Splits existing tile into multiple tiles of tile size
split_tile_size */
+ vector<RenderTile> split_tiles(RenderTile rtile, int2 split_tile_size) {
vector<RenderTile> to_path_trace_rtile;
int d_w = rtile.w;
int d_h = rtile.h;
- int num_tiles_x = (((d_w - 1) / render_feasible_tile_size.x) +
1);
- int num_tiles_y = (((d_h - 1) / render_feasible_tile_size.y) +
1);
+ int num_tiles_x = (((d_w - 1) / split_tile_size.x) + 1);
+ int num_tiles_y = (((d_h - 1) / split_tile_size.y) + 1);
/* buffer and rng_state offset calc */
size_t offset_index = rtile.offset + (rtile.x + rtile.y *
rtile.stride);
@@ -3321,10 +3349,10 @@ public:
for (int tile_iter_x = 0; tile_iter_x < num_tiles_x;
tile_iter_x++) {
int rtile_index = tile_iter_y * num_tiles_x +
tile_iter_x;
-
to_path_trace_rtile[rtile_index].rng_state_offset_x = offset_x + tile_iter_x *
render_feasible_tile_size.x;
-
to_path_trace_rtile[rtile_index].rng_state_offset_y = offset_y + tile_iter_y *
render_feasible_tile_size.y;
-
to_path_trace_rtile[rtile_index].buffer_offset_x = offset_x + tile_iter_x *
render_feasible_tile_size.x;
-
to_path_trace_rtile[rtile_index].buffer_offset_y = offset_y + tile_iter_y *
render_feasible_tile_size.y;
+
to_path_trace_rtile[rtile_index].rng_state_offset_x = offset_x + tile_iter_x *
split_tile_size.x;
+
to_path_trace_rtile[rtile_index].rng_state_offset_y = offset_y + tile_iter_y *
split_tile_size.y;
+
to_path_trace_rtile[rtile_index].buffer_offset_x = offset_x + tile_iter_x *
split_tile_size.x;
+
to_path_trace_rtile[rtile_index].buffer_offset_y = offset_y + tile_iter_y *
split_tile_size.y;
to_path_trace_rtile[rtile_index].start_sample =
rtile.start_sample;
to_path_trace_rtile[rtile_index].num_samples =
rtile.num_samples;
to_path_trace_rtile[rtile_index].sample =
rtile.sample;
@@ -3334,20 +3362,17 @@ public:
to_path_trace_rtile[rtile_index].buffers =
rtile.buffers;
to_path_trace_rtile[rtile_index].buffer =
rtile.buffer;
to_path_trace_rtile[rtile_index].rng_state =
rtile.rng_state;
- to_path_trace_rtile[rtile_index].x = rtile.x +
(tile_iter_x * render_feasible_tile_size.x);
- to_path_trace_rtile[rtile_index].y = rtile.y +
(tile_iter_y * render_feasible_tile_size.y);
+ to_path_trace_rtile[rtile_index].x = rtile.x +
(tile_iter_x * split_tile_size.x);
+ to_path_trace_rtile[rtile_index].y = rtile.y +
(tile_iter_y * split_tile_size.y);
to_path_trace_rtile[rtile_index].buffer_rng_state_stride = rtile.stride;
- /* Set max render feasible tile size */
-
to_path_trace_rtile[rtile_index].max_render_feasible_tile_size =
render_feasible_tile_size;
-
/* Fill width and height of the new render tile
*/
to_path_trace_rtile[rtile_index].w =
(tile_iter_x == (num_tiles_x - 1)) ?
- (d_w - (tile_iter_x *
render_feasible_tile_size.x)) /* Border tile */
- : render_feasible_tile_size.x;
+ (d_w - (tile_iter_x *
split_tile_size.x)) /* Border tile */
+ : split_tile_size.x;
to_path_trace_rtile[rtile_index].h =
(tile_iter_y == (num_tiles_y - 1)) ?
- (d_h - (tile_iter_y *
render_feasible_tile_size.y)) /* Border tile */
- : render_feasible_tile_size.y;
+ (d_h - (tile_iter_y *
split_tile_size.y)) /* Border tile */
+ : split_tile_size.y;
to_path_trace_rtile[rtile_index].stride =
to_path_trace_rtile[rtile_index].w;
}
@@ -3370,6 +3395,7 @@ public:
#ifdef __SPLIT_KERNEL__
bool initialize_data_and_check_render_feasibility =
false;
bool need_to_split_tiles_further = false;
+ int2 max_render_feasible_tile_size;
size_t feasible_global_work_size;
#endif
@@ -3404,24 +3430,27 @@ public:
/* Check render feasibility */
feasible_global_work_size =
get_feasible_global_work_size(tile,
CL_MEM_PTR(const_mem_map["__data"]->device_pointer));
+ max_render_feasible_tile_size =
get_max_render_feasible_tile_size(feasible_global_work_size);
need_to_split_tiles_further =
need_to_split_tile(tile.tile_size.x, tile.tile_size.y,
feasible_global_work_size);
- /* Print message to console */
- if (need_to_split_tiles_further &&
background) {
- int2 render_feasible_tile_size
= get_render_feasible_tile_size(feasible_global_work_size);
- fprintf(stderr, "Message :
Tiles need to be split further inside path trace (due to in-sufficient
device-global-memory for split kernel to function) \n\
-The user set tile size %dx%d will be split into tiles of dimension %dx%d to
render \n", tile.tile_size.x, tile.tile_size.y, render_feasible_tile_size.x,
render_feasible_tile_size.y);
- }
-
initialize_data_and_check_render_feasibility = true;
}
if (need_to_split_tiles_further) {
- int2 render_feasible_tile_size =
get_render_feasible_tile_size(feasible_global_work_size);
- vector<RenderTile>
to_path_trace_render_tiles = split_tiles(tile, render_feasible_tile_size);
+
+ int2 split_tile_size =
get_split_tile_size(tile, max_render_feasible_tile_size);
+ vector<RenderTile>
to_path_trace_render_tiles = split_tiles(tile, split_tile_size);
+
+ /* Print message to console */
+ if (background &&
(to_path_trace_render_tiles.size() > 1)) {
+ fprintf(stderr, "Message :
Tiles need to be split further inside path trace (due to insufficient
device-global-memory for split kernel to function) \n\
+The current tile of dimensions %dx%d is split into tiles of dimension %dx%d
for render \n", tile.w, tile.h, split_tile_size.x, split_tile_size.y);
+ }
/* Process all split tiles */
for (int tile_iter = 0; tile_iter <
to_path_trace_render_tiles.size(); tile_iter++) {
+ /* Set
max_render_feasible_render_tile_size for all tiles */
+
to_path_trace_render_tiles[tile_iter].max_render_feasible_tile_size =
max_render_feasible_tile_size;
/* The second argument is dummy
*/
path_trace(to_path_trace_render_tiles[tile_iter], 0);
}
_______________________________________________
Bf-blender-cvs mailing list
[email protected]
http://lists.blender.org/mailman/listinfo/bf-blender-cvs