render25: Render Branch: move irradiance cache computation of 4x4 matrix svd and

Brecht Van Lommel Tue, 15 Jun 2010 11:19:41 -0700

Revision: 29474
          
http://projects.blender.org/plugins/scmsvn/viewcvs.php?view=rev&root=bf-blender&revision=29474
Author:   blendix
Date:     2010-06-15 20:19:35 +0200 (Tue, 15 Jun 2010)


Log Message:
-----------
Render Branch: move irradiance cache computation of 4x4 matrix svd and
pseudoinverse into blenlib, slightly faster due to avoiding malloc/free
and some floating point divisions.

Modified Paths:
--------------
    branches/render25/intern/iksolver/intern/IK_Solver.cpp
    branches/render25/source/blender/blenlib/BLI_math_matrix.h
    branches/render25/source/blender/blenlib/intern/math_matrix.c
    branches/render25/source/blender/render/intern/source/cache.c

Modified: branches/render25/intern/iksolver/intern/IK_Solver.cpp
===================================================================
--- branches/render25/intern/iksolver/intern/IK_Solver.cpp      2010-06-15 
18:14:04 UTC (rev 29473)
+++ branches/render25/intern/iksolver/intern/IK_Solver.cpp      2010-06-15 
18:19:35 UTC (rev 29474)
@@ -377,40 +377,3 @@
        return ((result)? 1: 0);
 }
 
-#include "TNT/cmat.h"
-#include "TNT/svd.h"
-
-extern "C" {
-void TNT_svd(float m[][4], float *w, float u[][4]);
-}
-
-void TNT_svd(float m[][4], float *w, float u[][4])
-{
-       TNT::Matrix<float> M, V, U;
-       TNT::Vector<float> W, W1, W2;
-       int i, j;
-
-       M.newsize(4, 4);
-       V.newsize(4, 4);
-       U.newsize(4, 4);
-       W.newsize(4);
-       W1.newsize(4);
-       W2.newsize(4);
-
-       for(i=0; i<4; i++)
-               for(j=0; j<4; j++)
-                       M[i][j]= m[j][i];
-
-       TNT::SVD(M, V, W, U, W1, W2);
-
-       for(i=0; i<4; i++)
-               w[i]= W[i];
-
-       for(i=0; i<4; i++) {
-               for(j=0; j<4; j++) {
-                       m[i][j]= V[j][i];
-                       u[i][j]= U[j][i];
-               }
-       }
-}
-

Modified: branches/render25/source/blender/blenlib/BLI_math_matrix.h
===================================================================
--- branches/render25/source/blender/blenlib/BLI_math_matrix.h  2010-06-15 
18:14:04 UTC (rev 29473)
+++ branches/render25/source/blender/blenlib/BLI_math_matrix.h  2010-06-15 
18:19:35 UTC (rev 29474)
@@ -122,6 +122,9 @@
        float g, float h, float i);
 float determinant_m4(float A[4][4]);
 
+void svd_m4(float U[4][4], float s[4], float V[4][4], float A[4][4]);
+void pseudoinverse_m4_m4(float Ainv[4][4], float A[4][4], float epsilon);
+
 /****************************** Transformations ******************************/
 
 void scale_m3_fl(float R[3][3], float scale);

Modified: branches/render25/source/blender/blenlib/intern/math_matrix.c
===================================================================
--- branches/render25/source/blender/blenlib/intern/math_matrix.c       
2010-06-15 18:14:04 UTC (rev 29473)
+++ branches/render25/source/blender/blenlib/intern/math_matrix.c       
2010-06-15 18:19:35 UTC (rev 29474)
@@ -1149,3 +1149,458 @@
        printf("%f %f %f %f\n",m[0][3],m[1][3],m[2][3],m[3][3]);
        printf("\n");
 }
+
+/*********************************** SVD ************************************
+ * from TNT matrix library
+
+ * Compute the Single Value Decomposition of an arbitrary matrix A
+ * That is compute the 3 matrices U,W,V with U column orthogonal (m,n) 
+ * ,W a diagonal matrix and V an orthogonal square matrix s.t. 
+ * A = U.W.Vt. From this decomposition it is trivial to compute the 
+ * (pseudo-inverse) of A as Ainv = V.Winv.tranpose(U).
+ */
+
+void svd_m4(float U[4][4], float s[4], float V[4][4], float A_[4][4])
+{
+       float A[4][4];
+       float work1[4], work2[4];
+       int m = 4;
+       int n = 4;
+       int maxiter = 200;
+       int nu = minf(m,n);
+
+       float *work = work1;
+       float *e = work2;
+       float eps;
+
+       int i=0, j=0, k=0, p, pp, iter;
+
+       // Reduce A to bidiagonal form, storing the diagonal elements
+       // in s and the super-diagonal elements in e.
+
+       int nct = minf(m-1,n);
+       int nrt = maxf(0,minf(n-2,m));
+
+       copy_m4_m4(A, A_);
+       zero_m4(U);
+       zero_v4(s);
+
+       for (k = 0; k < maxf(nct,nrt); k++) {
+               if (k < nct) {
+
+                       // Compute the transformation for the k-th column and
+                       // place the k-th diagonal in s[k].
+                       // Compute 2-norm of k-th column without under/overflow.
+                       s[k] = 0;
+                       for (i = k; i < m; i++) {
+                               s[k] = hypotf(s[k],A[i][k]);
+                       }
+                       if (s[k] != 0.0f) {
+                               float invsk;
+                               if (A[k][k] < 0.0f) {
+                                       s[k] = -s[k];
+                               }
+                               invsk = 1.0f/s[k];
+                               for (i = k; i < m; i++) {
+                                       A[i][k] *= invsk;
+                               }
+                               A[k][k] += 1.0f;
+                       }
+                       s[k] = -s[k];
+               }
+               for (j = k+1; j < n; j++) {
+                       if ((k < nct) && (s[k] != 0.0f))  {
+
+                       // Apply the transformation.
+
+                               float t = 0;
+                               for (i = k; i < m; i++) {
+                                       t += A[i][k]*A[i][j];
+                               }
+                               t = -t/A[k][k];
+                               for (i = k; i < m; i++) {
+                                       A[i][j] += t*A[i][k];
+                               }
+                       }
+
+                       // Place the k-th row of A into e for the
+                       // subsequent calculation of the row transformation.
+
+                       e[j] = A[k][j];
+               }
+               if (k < nct) {
+
+                       // Place the transformation in U for subsequent back
+                       // multiplication.
+
+                       for (i = k; i < m; i++)
+                               U[i][k] = A[i][k];
+               }
+               if (k < nrt) {
+
+                       // Compute the k-th row transformation and place the
+                       // k-th super-diagonal in e[k].
+                       // Compute 2-norm without under/overflow.
+                       e[k] = 0;
+                       for (i = k+1; i < n; i++) {
+                               e[k] = hypotf(e[k],e[i]);
+                       }
+                       if (e[k] != 0.0f) {
+                               float invek;
+                               if (e[k+1] < 0.0f) {
+                                       e[k] = -e[k];
+                               }
+                               invek = 1.0f/e[k];
+                               for (i = k+1; i < n; i++) {
+                                       e[i] *= invek;
+                               }
+                               e[k+1] += 1.0f;
+                       }
+                       e[k] = -e[k];
+                       if ((k+1 < m) & (e[k] != 0.0f)) {
+                               float invek1;
+
+                       // Apply the transformation.
+
+                               for (i = k+1; i < m; i++) {
+                                       work[i] = 0.0f;
+                               }
+                               for (j = k+1; j < n; j++) {
+                                       for (i = k+1; i < m; i++) {
+                                               work[i] += e[j]*A[i][j];
+                                       }
+                               }
+                               invek1 = 1.0f/e[k+1];
+                               for (j = k+1; j < n; j++) {
+                                       float t = -e[j]*invek1;
+                                       for (i = k+1; i < m; i++) {
+                                               A[i][j] += t*work[i];
+                                       }
+                               }
+                       }
+
+                       // Place the transformation in V for subsequent
+                       // back multiplication.
+
+                       for (i = k+1; i < n; i++)
+                               V[i][k] = e[i];
+               }
+       }
+
+       // Set up the final bidiagonal matrix or order p.
+
+       p = minf(n,m+1);
+       if (nct < n) {
+               s[nct] = A[nct][nct];
+       }
+       if (m < p) {
+               s[p-1] = 0.0f;
+       }
+       if (nrt+1 < p) {
+               e[nrt] = A[nrt][p-1];
+       }
+       e[p-1] = 0.0f;
+
+       // If required, generate U.
+
+       for (j = nct; j < nu; j++) {
+               for (i = 0; i < m; i++) {
+                       U[i][j] = 0.0f;
+               }
+               U[j][j] = 1.0f;
+       }
+       for (k = nct-1; k >= 0; k--) {
+               if (s[k] != 0.0f) {
+                       for (j = k+1; j < nu; j++) {
+                               float t = 0;
+                               for (i = k; i < m; i++) {
+                                       t += U[i][k]*U[i][j];
+                               }
+                               t = -t/U[k][k];
+                               for (i = k; i < m; i++) {
+                                       U[i][j] += t*U[i][k];
+                               }
+                       }
+                       for (i = k; i < m; i++ ) {
+                               U[i][k] = -U[i][k];
+                       }
+                       U[k][k] = 1.0f + U[k][k];
+                       for (i = 0; i < k-1; i++) {
+                               U[i][k] = 0.0f;
+                       }
+               } else {
+                       for (i = 0; i < m; i++) {
+                               U[i][k] = 0.0f;
+                       }
+                       U[k][k] = 1.0f;
+               }
+       }
+
+       // If required, generate V.
+
+       for (k = n-1; k >= 0; k--) {
+               if ((k < nrt) & (e[k] != 0.0f)) {
+                       for (j = k+1; j < nu; j++) {
+                               float t = 0;
+                               for (i = k+1; i < n; i++) {
+                                       t += V[i][k]*V[i][j];
+                               }
+                               t = -t/V[k+1][k];
+                               for (i = k+1; i < n; i++) {
+                                       V[i][j] += t*V[i][k];
+                               }
+                       }
+               }
+               for (i = 0; i < n; i++) {
+                       V[i][k] = 0.0f;
+               }
+               V[k][k] = 1.0f;
+       }
+
+       // Main iteration loop for the singular values.
+
+       pp = p-1;
+       iter = 0;
+       eps = powf(2.0f,-52.0f);
+       while (p > 0) {
+               int kase=0;
+               k=0;
+
+               // Test for maximum iterations to avoid infinite loop
+               if(maxiter == 0)
+                       break;
+               maxiter--;
+
+               // This section of the program inspects for
+               // negligible elements in the s and e arrays.  On
+               // completion the variables kase and k are set as follows.
+
+               // kase = 1       if s(p) and e[k-1] are negligible and k<p
+               // kase = 2       if s(k) is negligible and k<p
+               // kase = 3       if e[k-1] is negligible, k<p, and
+               //                                s(k), ..., s(p) are not 
negligible (qr step).
+               // kase = 4       if e(p-1) is negligible (convergence).
+
+               for (k = p-2; k >= -1; k--) {
+                       if (k == -1) {
+                               break;
+                       }
+                       if (fabsf(e[k]) <= eps*(fabsf(s[k]) + fabsf(s[k+1]))) {
+                               e[k] = 0.0f;
+                               break;
+                       }
+               }
+               if (k == p-2) {
+                       kase = 4;
+               } else {
+                       int ks;
+                       for (ks = p-1; ks >= k; ks--) {
+                               float t;
+                               if (ks == k) {
+                                       break;
+                               }
+                               t = (ks != p ? fabsf(e[ks]) : 0.f) + 
+                                                         (ks != k+1 ? 
fabsf(e[ks-1]) : 0.0f);
+                               if (fabsf(s[ks]) <= eps*t)  {
+                                       s[ks] = 0.0f;
+                                       break;
+                               }
+                       }
+                       if (ks == k) {
+                               kase = 3;
+                       } else if (ks == p-1) {
+                               kase = 1;
+                       } else {
+                               kase = 2;
+                               k = ks;
+                       }
+               }
+               k++;
+
+               // Perform the task indicated by kase.
+
+               switch (kase) {
+
+                       // Deflate negligible s(p).
+
+                       case 1: {
+                               float f = e[p-2];
+                               e[p-2] = 0.0f;
+                               for (j = p-2; j >= k; j--) {
+                                       float t = hypotf(s[j],f);
+                                       float invt = 1.0f/t;
+                                       float cs = s[j]*invt;
+                                       float sn = f*invt;
+                                       s[j] = t;
+                                       if (j != k) {
+                                               f = -sn*e[j-1];
+                                               e[j-1] = cs*e[j-1];
+                                       }
+
+                                       for (i = 0; i < n; i++) {
+                                               t = cs*V[i][j] + sn*V[i][p-1];
+                                               V[i][p-1] = -sn*V[i][j] + 
cs*V[i][p-1];
+                                               V[i][j] = t;
+                                       }
+                               }
+                       }
+                       break;
+
+                       // Split at negligible s(k).
+
+                       case 2: {
+                               float f = e[k-1];
+                               e[k-1] = 0.0f;
+                               for (j = k; j < p; j++) {
+                                       float t = hypotf(s[j],f);
+                                       float invt = 1.0f/t;
+                                       float cs = s[j]*invt;
+                                       float sn = f*invt;
+                                       s[j] = t;
+                                       f = -sn*e[j];
+                                       e[j] = cs*e[j];
+
+                                       for (i = 0; i < m; i++) {
+                                               t = cs*U[i][j] + sn*U[i][k-1];
+                                               U[i][k-1] = -sn*U[i][j] + 
cs*U[i][k-1];
+                                               U[i][j] = t;
+                                       }
+                               }
+                       }
+                       break;
+
+                       // Perform one qr step.
+
+                       case 3: {
+
+                               // Calculate the shift.
+
+                               float scale = maxf(maxf(maxf(maxf(
+                                                 
fabsf(s[p-1]),fabsf(s[p-2])),fabsf(e[p-2])), 
+                                                 fabsf(s[k])),fabsf(e[k]));
+                               float invscale = 1.0f/scale;
+                               float sp = s[p-1]*invscale;
+                               float spm1 = s[p-2]*invscale;
+                               float epm1 = e[p-2]*invscale;
+                               float sk = s[k]*invscale;
+                               float ek = e[k]*invscale;
+                               float b = ((spm1 + sp)*(spm1 - sp) + 
epm1*epm1)*0.5f;
+                               float c = (sp*epm1)*(sp*epm1);
+                               float shift = 0.0f;
+                               float f, g;
+                               if ((b != 0.0f) || (c != 0.0f)) {
+                                       shift = sqrtf(b*b + c);
+                                       if (b < 0.0f) {
+                                               shift = -shift;
+                                       }
+                                       shift = c/(b + shift);
+                               }
+                               f = (sk + sp)*(sk - sp) + shift;
+                               g = sk*ek;
+
+                               // Chase zeros.
+
+                               for (j = k; j < p-1; j++) {
+                                       float t = hypotf(f,g);
+                                       /* division by zero checks added to 
avoid NaN (brecht) */
+                                       float cs = (t == 0.0f)? 0.0f: f/t;
+                                       float sn = (t == 0.0f)? 0.0f: g/t;
+                                       if (j != k) {
+                                               e[j-1] = t;
+                                       }
+                                       f = cs*s[j] + sn*e[j];
+                                       e[j] = cs*e[j] - sn*s[j];
+                                       g = sn*s[j+1];
+                                       s[j+1] = cs*s[j+1];
+
+                                       for (i = 0; i < n; i++) {
+                                               t = cs*V[i][j] + sn*V[i][j+1];
+                                               V[i][j+1] = -sn*V[i][j] + 
cs*V[i][j+1];
+                                               V[i][j] = t;
+                                       }
+
+                                       t = hypotf(f,g);
+                                       /* division by zero checks added to 
avoid NaN (brecht) */
+                                       cs = (t == 0.0f)? 0.0f: f/t;
+                                       sn = (t == 0.0f)? 0.0f: g/t;
+                                       s[j] = t;
+                                       f = cs*e[j] + sn*s[j+1];
+                                       s[j+1] = -sn*e[j] + cs*s[j+1];
+                                       g = sn*e[j+1];
+                                       e[j+1] = cs*e[j+1];
+                                       if (j < m-1) {
+                                               for (i = 0; i < m; i++) {
+                                                       t = cs*U[i][j] + 
sn*U[i][j+1];
+                                                       U[i][j+1] = -sn*U[i][j] 
+ cs*U[i][j+1];
+                                                       U[i][j] = t;
+                                               }
+                                       }
+                               }
+                               e[p-2] = f;
+                               iter = iter + 1;

@@ Diff output truncated at 10240 characters. @@

_______________________________________________
Bf-blender-cvs mailing list
Bf-blender-cvs@blender.org
http://lists.blender.org/mailman/listinfo/bf-blender-cvs

[Bf-blender-cvs] SVN commit: /data/svn/bf-blender [29474] branches/render25: Render Branch: move irradiance cache computation of 4x4 matrix svd and

Reply via email to