Commit: 42bd06306323df702bf108c1b1a7529086ba28f6
Author: Lukas Stockner
Date:   Sat Sep 3 17:09:26 2016 +0200
Branches: soc-2016-cycles_denoising
https://developer.blender.org/rB42bd06306323df702bf108c1b1a7529086ba28f6

Cycles: Temporarily add old kernel for comparison

===================================================================

M       intern/cycles/device/device_cpu.cpp
M       intern/cycles/kernel/kernel_filter.h
A       intern/cycles/kernel/kernel_filter_old.h
M       intern/cycles/kernel/kernels/cpu/kernel_cpu.h
M       intern/cycles/kernel/kernels/cpu/kernel_cpu_impl.h

===================================================================

diff --git a/intern/cycles/device/device_cpu.cpp 
b/intern/cycles/device/device_cpu.cpp
index 7b20bfc..5dab9bb 100644
--- a/intern/cycles/device/device_cpu.cpp
+++ b/intern/cycles/device/device_cpu.cpp
@@ -419,11 +419,15 @@ public:
        {
                void(*filter_estimate_params_kernel)(KernelGlobals*, int, 
float*, int, int, void*, int4);
                void(*filter_final_pass_kernel)(KernelGlobals*, int, float*, 
int, int, int, int, float*, void*, int4, int4);
+               void(*filter_old_1)(KernelGlobals*, float*, int, int, int, int, 
float, float*, int4);
+               void(*filter_old_2)(KernelGlobals*, float*, float*, int, int, 
int, int, int, int, float, float*, int4, int4);
 
 #ifdef WITH_CYCLES_OPTIMIZED_KERNEL_AVX2
                if(system_cpu_support_avx2()) {
                        filter_estimate_params_kernel = 
kernel_cpu_avx2_filter_estimate_params;
                        filter_final_pass_kernel = 
kernel_cpu_avx2_filter_final_pass;
+                       filter_old_1 = kernel_cpu_avx2_filter_old_1;
+                       filter_old_2 = kernel_cpu_avx2_filter_old_2;
                }
                else
 #endif
@@ -431,6 +435,8 @@ public:
                if(system_cpu_support_avx()) {
                        filter_estimate_params_kernel = 
kernel_cpu_avx_filter_estimate_params;
                        filter_final_pass_kernel = 
kernel_cpu_avx_filter_final_pass;
+                       filter_old_1 = kernel_cpu_avx_filter_old_1;
+                       filter_old_2 = kernel_cpu_avx_filter_old_2;
                }
                else
 #endif
@@ -438,6 +444,8 @@ public:
                if(system_cpu_support_sse41()) {
                        filter_estimate_params_kernel = 
kernel_cpu_sse41_filter_estimate_params;
                        filter_final_pass_kernel = 
kernel_cpu_sse41_filter_final_pass;
+                       filter_old_1 = kernel_cpu_sse41_filter_old_1;
+                       filter_old_2 = kernel_cpu_sse41_filter_old_2;
                }
                else
 #endif
@@ -445,6 +453,8 @@ public:
                if(system_cpu_support_sse3()) {
                        filter_estimate_params_kernel = 
kernel_cpu_sse3_filter_estimate_params;
                        filter_final_pass_kernel = 
kernel_cpu_sse3_filter_final_pass;
+                       filter_old_1 = kernel_cpu_sse3_filter_old_1;
+                       filter_old_2 = kernel_cpu_sse3_filter_old_2;
                }
                else
 #endif
@@ -452,46 +462,83 @@ public:
                if(system_cpu_support_sse2()) {
                        filter_estimate_params_kernel = 
kernel_cpu_sse2_filter_estimate_params;
                        filter_final_pass_kernel = 
kernel_cpu_sse2_filter_final_pass;
+                       filter_old_1 = kernel_cpu_sse2_filter_old_1;
+                       filter_old_2 = kernel_cpu_sse2_filter_old_2;
                }
                else
 #endif
                {
                        filter_estimate_params_kernel = 
kernel_cpu_filter_estimate_params;
                        filter_final_pass_kernel = kernel_cpu_filter_final_pass;
+                       filter_old_1 = kernel_cpu_filter_old_1;
+                       filter_old_2 = kernel_cpu_filter_old_2;
                }
 
-               FilterStorage *storages = new 
FilterStorage[filter_area.z*filter_area.w];
+               bool old_filter = getenv("OLD_FILTER");
 
-               for(int y = 0; y < filter_area.w; y++) {
-                       for(int x = 0; x < filter_area.z; x++) {
-                               filter_estimate_params_kernel(kg, sample, 
filter_buffer, x + filter_area.x, y + filter_area.y, storages + y*filter_area.z 
+ x, rect);
-                       }
-               }
+               FilterStorage *storage = new 
FilterStorage[filter_area.z*filter_area.w];
+               int hw = kg->__data.integrator.half_window;
 
+               if(old_filter) {
+                       for(int y = 0; y < filter_area.w; y++) {
+                               for(int x = 0; x < filter_area.z; x++) {
+                                       filter_old_1(kg, filter_buffer, x + 
filter_area.x, y + filter_area.y, sample, hw, 1.0f, ((float*) (storage + 
y*filter_area.z + x)), rect);
+                               }
+                       }
 #ifdef WITH_CYCLES_DEBUG_FILTER
-#define WRITE_DEBUG(name, var) 
debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", filter_area.x, 
filter_area.y, name).c_str(), &storages[0].var, filter_area.z, filter_area.w, 
sizeof(FilterStorage)/sizeof(float), filter_area.z);
-               for(int i = 0; i < DENOISE_FEATURES; i++) {
-                       WRITE_DEBUG(string_printf("mean_%d", i).c_str(), 
means[i]);
-                       WRITE_DEBUG(string_printf("scale_%d", i).c_str(), 
scales[i]);
-                       WRITE_DEBUG(string_printf("singular_%d", i).c_str(), 
singular[i]);
-                       WRITE_DEBUG(string_printf("bandwidth_%d", i).c_str(), 
bandwidth[i]);
-               }
-               WRITE_DEBUG("singular_threshold", singular_threshold);
-               WRITE_DEBUG("feature_matrix_norm", feature_matrix_norm);
-               WRITE_DEBUG("global_bandwidth", global_bandwidth);
-#endif
-               for(int y = 0; y < filter_area.w; y++) {
-                       for(int x = 0; x < filter_area.z; x++) {
-                               filter_final_pass_kernel(kg, sample, 
filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, buffers, 
storages + y*filter_area.z + x, filter_area, rect);
+#define WRITE_DEBUG(name, var) 
debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", filter_area.x, 
filter_area.y, name).c_str(), &storage[0].var, filter_area.z, filter_area.w, 
sizeof(FilterStorage)/sizeof(float), filter_area.z);
+                       for(int i = 0; i < DENOISE_FEATURES; i++) {
+                               WRITE_DEBUG(string_printf("mean_%d", 
i).c_str(), means[i]);
+                               WRITE_DEBUG(string_printf("scale_%d", 
i).c_str(), scales[i]);
+                               WRITE_DEBUG(string_printf("singular_%d", 
i).c_str(), singular[i]);
+                               WRITE_DEBUG(string_printf("bandwidth_%d", 
i).c_str(), bandwidth[i]);
+                       }
+                       WRITE_DEBUG("singular_threshold", singular_threshold);
+                       WRITE_DEBUG("feature_matrix_norm", feature_matrix_norm);
+                       WRITE_DEBUG("global_bandwidth", global_bandwidth);
+#endif
+                       for(int y = 0; y < filter_area.w; y++) {
+                               for(int x = 0; x < filter_area.z; x++) {
+                                       filter_old_2(kg, buffers, 
filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, sample, 
hw, 1.0f, ((float*) (storage + y*filter_area.z + x)), rect, filter_area);
+                               }
                        }
-               }
 #ifdef WITH_CYCLES_DEBUG_FILTER
-               WRITE_DEBUG("filtered_global_bandwidth", 
filtered_global_bandwidth);
-               WRITE_DEBUG("sum_weight", sum_weight);
-               WRITE_DEBUG("log_rmse_per_sample", log_rmse_per_sample);
+                       WRITE_DEBUG("filtered_global_bandwidth", 
filtered_global_bandwidth);
+                       WRITE_DEBUG("sum_weight", sum_weight);
+                       WRITE_DEBUG("log_rmse_per_sample", log_rmse_per_sample);
 #undef WRITE_DEBUG
 #endif
-               delete[] storages;
+               } else {
+                       for(int y = 0; y < filter_area.w; y++) {
+                               for(int x = 0; x < filter_area.z; x++) {
+                                       filter_estimate_params_kernel(kg, 
sample, filter_buffer, x + filter_area.x, y + filter_area.y, storage + 
y*filter_area.z + x, rect);
+                               }
+                       }
+#ifdef WITH_CYCLES_DEBUG_FILTER
+#define WRITE_DEBUG(name, var) 
debug_write_pfm(string_printf("debug_%dx%d_%s.pfm", filter_area.x, 
filter_area.y, name).c_str(), &storage[0].var, filter_area.z, filter_area.w, 
sizeof(FilterStorage)/sizeof(float), filter_area.z);
+                       for(int i = 0; i < DENOISE_FEATURES; i++) {
+                               WRITE_DEBUG(string_printf("mean_%d", 
i).c_str(), means[i]);
+                               WRITE_DEBUG(string_printf("scale_%d", 
i).c_str(), scales[i]);
+                               WRITE_DEBUG(string_printf("singular_%d", 
i).c_str(), singular[i]);
+                               WRITE_DEBUG(string_printf("bandwidth_%d", 
i).c_str(), bandwidth[i]);
+                       }
+                       WRITE_DEBUG("singular_threshold", singular_threshold);
+                       WRITE_DEBUG("feature_matrix_norm", feature_matrix_norm);
+                       WRITE_DEBUG("global_bandwidth", global_bandwidth);
+#endif
+                       for(int y = 0; y < filter_area.w; y++) {
+                               for(int x = 0; x < filter_area.z; x++) {
+                                       filter_final_pass_kernel(kg, sample, 
filter_buffer, x + filter_area.x, y + filter_area.y, offset, stride, buffers, 
storage + y*filter_area.z + x, filter_area, rect);
+                               }
+                       }
+#ifdef WITH_CYCLES_DEBUG_FILTER
+                       WRITE_DEBUG("filtered_global_bandwidth", 
filtered_global_bandwidth);
+                       WRITE_DEBUG("sum_weight", sum_weight);
+                       WRITE_DEBUG("log_rmse_per_sample", log_rmse_per_sample);
+#undef WRITE_DEBUG
+#endif
+               }
+               free(storage);
        }
 
        void thread_render(DeviceTask& task)
diff --git a/intern/cycles/kernel/kernel_filter.h 
b/intern/cycles/kernel/kernel_filter.h
index c32541a..e4031f6 100644
--- a/intern/cycles/kernel/kernel_filter.h
+++ b/intern/cycles/kernel/kernel_filter.h
@@ -17,6 +17,8 @@
 #include "kernel_filter_pre.h"
 #include "kernel_filter_util.h"
 
+#include "kernel_filter_old.h"
+
 CCL_NAMESPACE_BEGIN
 
 /* Not all features are included in the matrix norm. */
diff --git a/intern/cycles/kernel/kernel_filter_old.h 
b/intern/cycles/kernel/kernel_filter_old.h
new file mode 100644
index 0000000..8eb3370
--- /dev/null
+++ b/intern/cycles/kernel/kernel_filter_old.h
@@ -0,0 +1,659 @@
+/*
+ * Copyright 2011-2013 Blender Foundation
+ *
+ * Licensed under the Apache License, Version 2.0 (the "License");
+ * you may not use this file except in compliance with the License.
+ * You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+CCL_NAMESPACE_BEGIN
+
+#define Buf_F(px, py, o) denoise_data[(py - rect.y)*denoise_stride + (px - 
rect.x) + pass_stride*(o)]//(buffers[((y) * w + (x)) * 
kernel_data.film.pass_stride + (o)])
+#define Buf_F3(px, py, o) make_float3(denoise_data[(py - 
rect.y)*denoise_stride + (px - rect.x) + pass_stride*(o)], denoise_data[(py - 
rect.y)*denoise_stride + (px - rect.x) + pass_stride*((o)+2)], denoise_data[(py 
- rect.y)*denoise_stride + (px - rect.x) + pass_stride*((o)+4)])//(buffers + 
((y) * w + (x)) * kernel_data.film.pass_stride + (o)))
+//#define Buf_F4(x, y, o) *((float4*) (buffers + ((y) * w + (x)) * 
kernel_data.film.pass_stride + (o)))
+
+ccl_device float3 saturate(float3 a)
+{
+       return make_float3(saturate(a.x), saturate(a.y), saturate(a.z));
+}
+
+ccl_device void cholesky(float *A, int n, float *L)
+{
+       for (int i = 0; i < n; ++i) {
+               for (int j = 0; j <= i; ++j) {
+                       float s = 0.0f;
+                       for (int k = 0; k < j; ++k) {
+                               s += L[i * n + k] * L[j * n + k];
+                       }
+                       L[i * n + j] = (i == j) ? sqrtf(A[i * n + i] - s) : 
(1.0f / L[j * n + j] * (A[j * n + i] - s));
+               }
+       }
+}
+
+ccl_device int old_svd(float *A, float *V, float *S2, int n)
+{
+       int  i, j, k, EstColRank = n, RotCount = n, SweepCount = 0;
+       int slimit = 8;
+       float eps = 1e-8f;
+       float e2 = 10.f * n * eps * eps;
+       float tol = 0.1f * eps;
+       float vt, p, x0, y0, q, r, c0, s0, d1, d2;
+
+       for(int r = 0; r < n; r++)
+               for(int c = 0; c < n; c++)
+                       V[r*n+c] = (c == r)? 1.0f: 0.0f;
+
+       while (RotCount != 0 && SweepCount++ <= slimit) {
+               RotCount = EstColRank * (EstColRank - 1) / 2;
+
+               for (j = 0; j < EstColRank-1; ++j) {
+                       fo

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
[email protected]
https://lists.blender.org/mailman/listinfo/bf-blender-cvs

Reply via email to