Revision: 29474 http://projects.blender.org/plugins/scmsvn/viewcvs.php?view=rev&root=bf-blender&revision=29474 Author: blendix Date: 2010-06-15 20:19:35 +0200 (Tue, 15 Jun 2010)
Log Message: ----------- Render Branch: move irradiance cache computation of 4x4 matrix svd and pseudoinverse into blenlib, slightly faster due to avoiding malloc/free and some floating point divisions. Modified Paths: -------------- branches/render25/intern/iksolver/intern/IK_Solver.cpp branches/render25/source/blender/blenlib/BLI_math_matrix.h branches/render25/source/blender/blenlib/intern/math_matrix.c branches/render25/source/blender/render/intern/source/cache.c Modified: branches/render25/intern/iksolver/intern/IK_Solver.cpp =================================================================== --- branches/render25/intern/iksolver/intern/IK_Solver.cpp 2010-06-15 18:14:04 UTC (rev 29473) +++ branches/render25/intern/iksolver/intern/IK_Solver.cpp 2010-06-15 18:19:35 UTC (rev 29474) @@ -377,40 +377,3 @@ return ((result)? 1: 0); } -#include "TNT/cmat.h" -#include "TNT/svd.h" - -extern "C" { -void TNT_svd(float m[][4], float *w, float u[][4]); -} - -void TNT_svd(float m[][4], float *w, float u[][4]) -{ - TNT::Matrix<float> M, V, U; - TNT::Vector<float> W, W1, W2; - int i, j; - - M.newsize(4, 4); - V.newsize(4, 4); - U.newsize(4, 4); - W.newsize(4); - W1.newsize(4); - W2.newsize(4); - - for(i=0; i<4; i++) - for(j=0; j<4; j++) - M[i][j]= m[j][i]; - - TNT::SVD(M, V, W, U, W1, W2); - - for(i=0; i<4; i++) - w[i]= W[i]; - - for(i=0; i<4; i++) { - for(j=0; j<4; j++) { - m[i][j]= V[j][i]; - u[i][j]= U[j][i]; - } - } -} - Modified: branches/render25/source/blender/blenlib/BLI_math_matrix.h =================================================================== --- branches/render25/source/blender/blenlib/BLI_math_matrix.h 2010-06-15 18:14:04 UTC (rev 29473) +++ branches/render25/source/blender/blenlib/BLI_math_matrix.h 2010-06-15 18:19:35 UTC (rev 29474) @@ -122,6 +122,9 @@ float g, float h, float i); float determinant_m4(float A[4][4]); +void svd_m4(float U[4][4], float s[4], float V[4][4], float A[4][4]); +void pseudoinverse_m4_m4(float Ainv[4][4], float A[4][4], float epsilon); + /****************************** Transformations ******************************/ void scale_m3_fl(float R[3][3], float scale); Modified: branches/render25/source/blender/blenlib/intern/math_matrix.c =================================================================== --- branches/render25/source/blender/blenlib/intern/math_matrix.c 2010-06-15 18:14:04 UTC (rev 29473) +++ branches/render25/source/blender/blenlib/intern/math_matrix.c 2010-06-15 18:19:35 UTC (rev 29474) @@ -1149,3 +1149,458 @@ printf("%f %f %f %f\n",m[0][3],m[1][3],m[2][3],m[3][3]); printf("\n"); } + +/*********************************** SVD ************************************ + * from TNT matrix library + + * Compute the Single Value Decomposition of an arbitrary matrix A + * That is compute the 3 matrices U,W,V with U column orthogonal (m,n) + * ,W a diagonal matrix and V an orthogonal square matrix s.t. + * A = U.W.Vt. From this decomposition it is trivial to compute the + * (pseudo-inverse) of A as Ainv = V.Winv.tranpose(U). + */ + +void svd_m4(float U[4][4], float s[4], float V[4][4], float A_[4][4]) +{ + float A[4][4]; + float work1[4], work2[4]; + int m = 4; + int n = 4; + int maxiter = 200; + int nu = minf(m,n); + + float *work = work1; + float *e = work2; + float eps; + + int i=0, j=0, k=0, p, pp, iter; + + // Reduce A to bidiagonal form, storing the diagonal elements + // in s and the super-diagonal elements in e. + + int nct = minf(m-1,n); + int nrt = maxf(0,minf(n-2,m)); + + copy_m4_m4(A, A_); + zero_m4(U); + zero_v4(s); + + for (k = 0; k < maxf(nct,nrt); k++) { + if (k < nct) { + + // Compute the transformation for the k-th column and + // place the k-th diagonal in s[k]. + // Compute 2-norm of k-th column without under/overflow. + s[k] = 0; + for (i = k; i < m; i++) { + s[k] = hypotf(s[k],A[i][k]); + } + if (s[k] != 0.0f) { + float invsk; + if (A[k][k] < 0.0f) { + s[k] = -s[k]; + } + invsk = 1.0f/s[k]; + for (i = k; i < m; i++) { + A[i][k] *= invsk; + } + A[k][k] += 1.0f; + } + s[k] = -s[k]; + } + for (j = k+1; j < n; j++) { + if ((k < nct) && (s[k] != 0.0f)) { + + // Apply the transformation. + + float t = 0; + for (i = k; i < m; i++) { + t += A[i][k]*A[i][j]; + } + t = -t/A[k][k]; + for (i = k; i < m; i++) { + A[i][j] += t*A[i][k]; + } + } + + // Place the k-th row of A into e for the + // subsequent calculation of the row transformation. + + e[j] = A[k][j]; + } + if (k < nct) { + + // Place the transformation in U for subsequent back + // multiplication. + + for (i = k; i < m; i++) + U[i][k] = A[i][k]; + } + if (k < nrt) { + + // Compute the k-th row transformation and place the + // k-th super-diagonal in e[k]. + // Compute 2-norm without under/overflow. + e[k] = 0; + for (i = k+1; i < n; i++) { + e[k] = hypotf(e[k],e[i]); + } + if (e[k] != 0.0f) { + float invek; + if (e[k+1] < 0.0f) { + e[k] = -e[k]; + } + invek = 1.0f/e[k]; + for (i = k+1; i < n; i++) { + e[i] *= invek; + } + e[k+1] += 1.0f; + } + e[k] = -e[k]; + if ((k+1 < m) & (e[k] != 0.0f)) { + float invek1; + + // Apply the transformation. + + for (i = k+1; i < m; i++) { + work[i] = 0.0f; + } + for (j = k+1; j < n; j++) { + for (i = k+1; i < m; i++) { + work[i] += e[j]*A[i][j]; + } + } + invek1 = 1.0f/e[k+1]; + for (j = k+1; j < n; j++) { + float t = -e[j]*invek1; + for (i = k+1; i < m; i++) { + A[i][j] += t*work[i]; + } + } + } + + // Place the transformation in V for subsequent + // back multiplication. + + for (i = k+1; i < n; i++) + V[i][k] = e[i]; + } + } + + // Set up the final bidiagonal matrix or order p. + + p = minf(n,m+1); + if (nct < n) { + s[nct] = A[nct][nct]; + } + if (m < p) { + s[p-1] = 0.0f; + } + if (nrt+1 < p) { + e[nrt] = A[nrt][p-1]; + } + e[p-1] = 0.0f; + + // If required, generate U. + + for (j = nct; j < nu; j++) { + for (i = 0; i < m; i++) { + U[i][j] = 0.0f; + } + U[j][j] = 1.0f; + } + for (k = nct-1; k >= 0; k--) { + if (s[k] != 0.0f) { + for (j = k+1; j < nu; j++) { + float t = 0; + for (i = k; i < m; i++) { + t += U[i][k]*U[i][j]; + } + t = -t/U[k][k]; + for (i = k; i < m; i++) { + U[i][j] += t*U[i][k]; + } + } + for (i = k; i < m; i++ ) { + U[i][k] = -U[i][k]; + } + U[k][k] = 1.0f + U[k][k]; + for (i = 0; i < k-1; i++) { + U[i][k] = 0.0f; + } + } else { + for (i = 0; i < m; i++) { + U[i][k] = 0.0f; + } + U[k][k] = 1.0f; + } + } + + // If required, generate V. + + for (k = n-1; k >= 0; k--) { + if ((k < nrt) & (e[k] != 0.0f)) { + for (j = k+1; j < nu; j++) { + float t = 0; + for (i = k+1; i < n; i++) { + t += V[i][k]*V[i][j]; + } + t = -t/V[k+1][k]; + for (i = k+1; i < n; i++) { + V[i][j] += t*V[i][k]; + } + } + } + for (i = 0; i < n; i++) { + V[i][k] = 0.0f; + } + V[k][k] = 1.0f; + } + + // Main iteration loop for the singular values. + + pp = p-1; + iter = 0; + eps = powf(2.0f,-52.0f); + while (p > 0) { + int kase=0; + k=0; + + // Test for maximum iterations to avoid infinite loop + if(maxiter == 0) + break; + maxiter--; + + // This section of the program inspects for + // negligible elements in the s and e arrays. On + // completion the variables kase and k are set as follows. + + // kase = 1 if s(p) and e[k-1] are negligible and k<p + // kase = 2 if s(k) is negligible and k<p + // kase = 3 if e[k-1] is negligible, k<p, and + // s(k), ..., s(p) are not negligible (qr step). + // kase = 4 if e(p-1) is negligible (convergence). + + for (k = p-2; k >= -1; k--) { + if (k == -1) { + break; + } + if (fabsf(e[k]) <= eps*(fabsf(s[k]) + fabsf(s[k+1]))) { + e[k] = 0.0f; + break; + } + } + if (k == p-2) { + kase = 4; + } else { + int ks; + for (ks = p-1; ks >= k; ks--) { + float t; + if (ks == k) { + break; + } + t = (ks != p ? fabsf(e[ks]) : 0.f) + + (ks != k+1 ? fabsf(e[ks-1]) : 0.0f); + if (fabsf(s[ks]) <= eps*t) { + s[ks] = 0.0f; + break; + } + } + if (ks == k) { + kase = 3; + } else if (ks == p-1) { + kase = 1; + } else { + kase = 2; + k = ks; + } + } + k++; + + // Perform the task indicated by kase. + + switch (kase) { + + // Deflate negligible s(p). + + case 1: { + float f = e[p-2]; + e[p-2] = 0.0f; + for (j = p-2; j >= k; j--) { + float t = hypotf(s[j],f); + float invt = 1.0f/t; + float cs = s[j]*invt; + float sn = f*invt; + s[j] = t; + if (j != k) { + f = -sn*e[j-1]; + e[j-1] = cs*e[j-1]; + } + + for (i = 0; i < n; i++) { + t = cs*V[i][j] + sn*V[i][p-1]; + V[i][p-1] = -sn*V[i][j] + cs*V[i][p-1]; + V[i][j] = t; + } + } + } + break; + + // Split at negligible s(k). + + case 2: { + float f = e[k-1]; + e[k-1] = 0.0f; + for (j = k; j < p; j++) { + float t = hypotf(s[j],f); + float invt = 1.0f/t; + float cs = s[j]*invt; + float sn = f*invt; + s[j] = t; + f = -sn*e[j]; + e[j] = cs*e[j]; + + for (i = 0; i < m; i++) { + t = cs*U[i][j] + sn*U[i][k-1]; + U[i][k-1] = -sn*U[i][j] + cs*U[i][k-1]; + U[i][j] = t; + } + } + } + break; + + // Perform one qr step. + + case 3: { + + // Calculate the shift. + + float scale = maxf(maxf(maxf(maxf( + fabsf(s[p-1]),fabsf(s[p-2])),fabsf(e[p-2])), + fabsf(s[k])),fabsf(e[k])); + float invscale = 1.0f/scale; + float sp = s[p-1]*invscale; + float spm1 = s[p-2]*invscale; + float epm1 = e[p-2]*invscale; + float sk = s[k]*invscale; + float ek = e[k]*invscale; + float b = ((spm1 + sp)*(spm1 - sp) + epm1*epm1)*0.5f; + float c = (sp*epm1)*(sp*epm1); + float shift = 0.0f; + float f, g; + if ((b != 0.0f) || (c != 0.0f)) { + shift = sqrtf(b*b + c); + if (b < 0.0f) { + shift = -shift; + } + shift = c/(b + shift); + } + f = (sk + sp)*(sk - sp) + shift; + g = sk*ek; + + // Chase zeros. + + for (j = k; j < p-1; j++) { + float t = hypotf(f,g); + /* division by zero checks added to avoid NaN (brecht) */ + float cs = (t == 0.0f)? 0.0f: f/t; + float sn = (t == 0.0f)? 0.0f: g/t; + if (j != k) { + e[j-1] = t; + } + f = cs*s[j] + sn*e[j]; + e[j] = cs*e[j] - sn*s[j]; + g = sn*s[j+1]; + s[j+1] = cs*s[j+1]; + + for (i = 0; i < n; i++) { + t = cs*V[i][j] + sn*V[i][j+1]; + V[i][j+1] = -sn*V[i][j] + cs*V[i][j+1]; + V[i][j] = t; + } + + t = hypotf(f,g); + /* division by zero checks added to avoid NaN (brecht) */ + cs = (t == 0.0f)? 0.0f: f/t; + sn = (t == 0.0f)? 0.0f: g/t; + s[j] = t; + f = cs*e[j] + sn*s[j+1]; + s[j+1] = -sn*e[j] + cs*s[j+1]; + g = sn*e[j+1]; + e[j+1] = cs*e[j+1]; + if (j < m-1) { + for (i = 0; i < m; i++) { + t = cs*U[i][j] + sn*U[i][j+1]; + U[i][j+1] = -sn*U[i][j] + cs*U[i][j+1]; + U[i][j] = t; + } + } + } + e[p-2] = f; + iter = iter + 1; @@ Diff output truncated at 10240 characters. @@ _______________________________________________ Bf-blender-cvs mailing list Bf-blender-cvs@blender.org http://lists.blender.org/mailman/listinfo/bf-blender-cvs