Recently I suspected the reg_sweep function in sweep.c was causing problems for GLM. As it turned out, my suspicions were unfounded.
However I'm posting a few patches I came up with during my investigations. None of them should change the behaviour of the code. But potentially make it faster and IMO easier to follow. If nobody objects, I'll push them to master in a day or so. J' -- PGP Public key ID: 1024D/2DE827B3 fingerprint = 8797 A26D 0854 2EAB 0285 A290 8A67 719C 2DE8 27B3 See http://pgp.mit.edu or any PGP keyserver for public key.
From f46d2c634b595c67a5af7994de9f6c851f632898 Mon Sep 17 00:00:00 2001 From: John Darrington <[email protected]> Date: Tue, 15 Nov 2011 14:51:49 +0100 Subject: [PATCH 1/5] sweep.c: swap rows/columns instead of using indirection for last_col This makes the code shorter, and I believe should make it faster too. --- lib/linreg/sweep.c | 74 +++++++++++++++++++++------------------------------- 1 files changed, 30 insertions(+), 44 deletions(-) diff --git a/lib/linreg/sweep.c b/lib/linreg/sweep.c index b72d24a..b8b57e8 100644 --- a/lib/linreg/sweep.c +++ b/lib/linreg/sweep.c @@ -44,6 +44,7 @@ #include "sweep.h" +#include <assert.h> /* The matrix A will be overwritten. In ordinary uses of the sweep operator, A will be the matrix @@ -73,65 +74,52 @@ reg_sweep (gsl_matrix * A, int last_col) int i; int j; int k; - int row_i; - int row_k; - int row_j; - int *ordered_cols; gsl_matrix *B; if (A != NULL) { if (A->size1 == A->size2) { - ordered_cols = malloc (A->size1 * sizeof (*ordered_cols)); - for (i = 0; i < last_col; i++) - { - ordered_cols[i] = i; - } - for (i = last_col + 1; i < A->size1; i++) - { - ordered_cols[i - 1] = i; - } - ordered_cols[A->size1 - 1] = last_col; + assert (last_col < A->size1); + gsl_matrix_swap_rows (A, A->size1 - 1, last_col); + gsl_matrix_swap_columns (A, A->size1 - 1 , last_col); + B = gsl_matrix_alloc (A->size1, A->size2); for (k = 0; k < (A->size1 - 1); k++) { - row_k = ordered_cols[k]; - sweep_element = gsl_matrix_get (A, row_k, row_k); + sweep_element = gsl_matrix_get (A, k, k); if (fabs (sweep_element) > GSL_DBL_MIN) { tmp = -1.0 / sweep_element; - gsl_matrix_set (B, row_k, row_k, tmp); + gsl_matrix_set (B, k, k, tmp); /* Rows before current row k. */ for (i = 0; i < k; i++) { - row_i = ordered_cols[i]; for (j = i; j < A->size2; j++) { - row_j = ordered_cols[j]; /* Use only the upper triangle of A. */ - if (row_j < row_k) + if (j < k) { - tmp = gsl_matrix_get (A, row_i, row_j) - - gsl_matrix_get (A, row_i, row_k) - * gsl_matrix_get (A, row_j, row_k) / sweep_element; - gsl_matrix_set (B, row_i, row_j, tmp); + tmp = gsl_matrix_get (A, i, j) - + gsl_matrix_get (A, i, k) + * gsl_matrix_get (A, j, k) / sweep_element; + gsl_matrix_set (B, i, j, tmp); } - else if (row_j > row_k) + else if (j > k) { - tmp = gsl_matrix_get (A, row_i, row_j) - - gsl_matrix_get (A, row_i, row_k) - * gsl_matrix_get (A, row_k, row_j) / sweep_element; - gsl_matrix_set (B, row_i, row_j, tmp); + tmp = gsl_matrix_get (A, i, j) - + gsl_matrix_get (A, i, k) + * gsl_matrix_get (A, k, j) / sweep_element; + gsl_matrix_set (B, i, j, tmp); } else { - tmp = gsl_matrix_get (A, row_i, row_k) / sweep_element; - gsl_matrix_set (B, row_i, row_j, tmp); + tmp = gsl_matrix_get (A, i, k) / sweep_element; + gsl_matrix_set (B, i, j, tmp); } } } @@ -140,36 +128,34 @@ reg_sweep (gsl_matrix * A, int last_col) */ for (j = k + 1; j < A->size1; j++) { - row_j = ordered_cols[j]; - tmp = gsl_matrix_get (A, row_k, row_j) / sweep_element; - gsl_matrix_set (B, row_k, row_j, tmp); + tmp = gsl_matrix_get (A, k, j) / sweep_element; + gsl_matrix_set (B, k, j, tmp); } /* Rows after the current row k. */ for (i = k + 1; i < A->size1; i++) { - row_i = ordered_cols[i]; for (j = i; j < A->size2; j++) { - row_j = ordered_cols[j]; - tmp = gsl_matrix_get (A, row_i, row_j) - - gsl_matrix_get (A, row_k, row_i) - * gsl_matrix_get (A, row_k, row_j) / sweep_element; - gsl_matrix_set (B, row_i, row_j, tmp); + tmp = gsl_matrix_get (A, i, j) - + gsl_matrix_get (A, k, i) + * gsl_matrix_get (A, k, j) / sweep_element; + gsl_matrix_set (B, i, j, tmp); } } } for (i = 0; i < A->size1; i++) for (j = i; j < A->size2; j++) { - row_i = ordered_cols[i]; - row_j = ordered_cols[j]; - gsl_matrix_set (A, row_i, row_j, gsl_matrix_get (B, row_i, row_j)); + gsl_matrix_set (A, i, j, gsl_matrix_get (B, i, j)); } } gsl_matrix_free (B); - free (ordered_cols); + + gsl_matrix_swap_columns (A, A->size1 - 1 , last_col); + gsl_matrix_swap_rows (A, A->size1 - 1, last_col); + return GSL_SUCCESS; } return GSL_ENOTSQR; -- 1.7.2.5
From 1e60b6097b6ad896787a0aa831d82fd98b946dd5 Mon Sep 17 00:00:00 2001 From: John Darrington <[email protected]> Date: Tue, 15 Nov 2011 14:57:34 +0100 Subject: [PATCH 2/5] sweep.c: Reverse sense of consistency tests. This avoids numerous levels of indentation. --- lib/linreg/sweep.c | 161 ++++++++++++++++++++++++++-------------------------- 1 files changed, 80 insertions(+), 81 deletions(-) diff --git a/lib/linreg/sweep.c b/lib/linreg/sweep.c index b8b57e8..0f8c223 100644 --- a/lib/linreg/sweep.c +++ b/lib/linreg/sweep.c @@ -38,7 +38,7 @@ Numerical Linear Algebra for Applications in Statistics. JE Gentle. Springer. 1998. ISBN 0-387-98542-5. - */ +*/ #include <config.h> @@ -49,26 +49,32 @@ The matrix A will be overwritten. In ordinary uses of the sweep operator, A will be the matrix - __ __ + __ __ |X'X X'Y| | | |Y'X Y'Y| - -- -- + -- -- - X refers to the design matrix and Y to the vector of dependent - observations. reg_sweep sweeps on the diagonal elements of - X'X. + X refers to the design matrix and Y to the vector of dependent + observations. reg_sweep sweeps on the diagonal elements of + X'X. - The matrix A is assumed to be symmetric, so the sweep operation is - performed only for the upper triangle of A. + The matrix A is assumed to be symmetric, so the sweep operation is + performed only for the upper triangle of A. - LAST_COL is considered to be the final column in the augmented matrix, - that is, the column to the right of the '=' sign of the system. - */ + LAST_COL is considered to be the final column in the augmented matrix, + that is, the column to the right of the '=' sign of the system. +*/ int reg_sweep (gsl_matrix * A, int last_col) { + if (A == NULL) + return GSL_EFAULT; + + if (A->size1 != A->size2) + return GSL_ENOTSQR; + double sweep_element; double tmp; int i; @@ -76,89 +82,82 @@ reg_sweep (gsl_matrix * A, int last_col) int k; gsl_matrix *B; - if (A != NULL) + + assert (last_col < A->size1); + gsl_matrix_swap_rows (A, A->size1 - 1, last_col); + gsl_matrix_swap_columns (A, A->size1 - 1 , last_col); + + B = gsl_matrix_alloc (A->size1, A->size2); + for (k = 0; k < (A->size1 - 1); k++) { - if (A->size1 == A->size2) + sweep_element = gsl_matrix_get (A, k, k); + if (fabs (sweep_element) > GSL_DBL_MIN) { - assert (last_col < A->size1); - gsl_matrix_swap_rows (A, A->size1 - 1, last_col); - gsl_matrix_swap_columns (A, A->size1 - 1 , last_col); - - B = gsl_matrix_alloc (A->size1, A->size2); - for (k = 0; k < (A->size1 - 1); k++) + tmp = -1.0 / sweep_element; + gsl_matrix_set (B, k, k, tmp); + /* + Rows before current row k. + */ + for (i = 0; i < k; i++) { - sweep_element = gsl_matrix_get (A, k, k); - if (fabs (sweep_element) > GSL_DBL_MIN) + for (j = i; j < A->size2; j++) { - tmp = -1.0 / sweep_element; - gsl_matrix_set (B, k, k, tmp); /* - Rows before current row k. - */ - for (i = 0; i < k; i++) + Use only the upper triangle of A. + */ + if (j < k) { - for (j = i; j < A->size2; j++) - { - /* - Use only the upper triangle of A. - */ - if (j < k) - { - tmp = gsl_matrix_get (A, i, j) - - gsl_matrix_get (A, i, k) - * gsl_matrix_get (A, j, k) / sweep_element; - gsl_matrix_set (B, i, j, tmp); - } - else if (j > k) - { - tmp = gsl_matrix_get (A, i, j) - - gsl_matrix_get (A, i, k) - * gsl_matrix_get (A, k, j) / sweep_element; - gsl_matrix_set (B, i, j, tmp); - } - else - { - tmp = gsl_matrix_get (A, i, k) / sweep_element; - gsl_matrix_set (B, i, j, tmp); - } - } + tmp = gsl_matrix_get (A, i, j) - + gsl_matrix_get (A, i, k) + * gsl_matrix_get (A, j, k) / sweep_element; + gsl_matrix_set (B, i, j, tmp); } - /* - Current row k. - */ - for (j = k + 1; j < A->size1; j++) + else if (j > k) { - tmp = gsl_matrix_get (A, k, j) / sweep_element; - gsl_matrix_set (B, k, j, tmp); + tmp = gsl_matrix_get (A, i, j) - + gsl_matrix_get (A, i, k) + * gsl_matrix_get (A, k, j) / sweep_element; + gsl_matrix_set (B, i, j, tmp); } - /* - Rows after the current row k. - */ - for (i = k + 1; i < A->size1; i++) + else { - for (j = i; j < A->size2; j++) - { - tmp = gsl_matrix_get (A, i, j) - - gsl_matrix_get (A, k, i) - * gsl_matrix_get (A, k, j) / sweep_element; - gsl_matrix_set (B, i, j, tmp); - } + tmp = gsl_matrix_get (A, i, k) / sweep_element; + gsl_matrix_set (B, i, j, tmp); } } - for (i = 0; i < A->size1; i++) - for (j = i; j < A->size2; j++) - { - gsl_matrix_set (A, i, j, gsl_matrix_get (B, i, j)); - } } - gsl_matrix_free (B); - - gsl_matrix_swap_columns (A, A->size1 - 1 , last_col); - gsl_matrix_swap_rows (A, A->size1 - 1, last_col); - - return GSL_SUCCESS; + /* + Current row k. + */ + for (j = k + 1; j < A->size1; j++) + { + tmp = gsl_matrix_get (A, k, j) / sweep_element; + gsl_matrix_set (B, k, j, tmp); + } + /* + Rows after the current row k. + */ + for (i = k + 1; i < A->size1; i++) + { + for (j = i; j < A->size2; j++) + { + tmp = gsl_matrix_get (A, i, j) - + gsl_matrix_get (A, k, i) + * gsl_matrix_get (A, k, j) / sweep_element; + gsl_matrix_set (B, i, j, tmp); + } + } } - return GSL_ENOTSQR; + for (i = 0; i < A->size1; i++) + for (j = i; j < A->size2; j++) + { + gsl_matrix_set (A, i, j, gsl_matrix_get (B, i, j)); + } } - return GSL_EFAULT; + gsl_matrix_free (B); + + gsl_matrix_swap_columns (A, A->size1 - 1 , last_col); + gsl_matrix_swap_rows (A, A->size1 - 1, last_col); + + return GSL_SUCCESS; } -- 1.7.2.5
From b8f1c67b087b875d45c28bcae330910e7723d6c6 Mon Sep 17 00:00:00 2001 From: John Darrington <[email protected]> Date: Tue, 15 Nov 2011 15:14:30 +0100 Subject: [PATCH 3/5] sweep.c: Reduce scope of local variables and avoid reusing them. This makes the code easier to follow, and helps the compiler with optimisation. --- lib/linreg/sweep.c | 16 ++++++---------- 1 files changed, 6 insertions(+), 10 deletions(-) diff --git a/lib/linreg/sweep.c b/lib/linreg/sweep.c index 0f8c223..c218456 100644 --- a/lib/linreg/sweep.c +++ b/lib/linreg/sweep.c @@ -75,8 +75,6 @@ reg_sweep (gsl_matrix * A, int last_col) if (A->size1 != A->size2) return GSL_ENOTSQR; - double sweep_element; - double tmp; int i; int j; int k; @@ -90,11 +88,10 @@ reg_sweep (gsl_matrix * A, int last_col) B = gsl_matrix_alloc (A->size1, A->size2); for (k = 0; k < (A->size1 - 1); k++) { - sweep_element = gsl_matrix_get (A, k, k); + const double sweep_element = gsl_matrix_get (A, k, k); if (fabs (sweep_element) > GSL_DBL_MIN) { - tmp = -1.0 / sweep_element; - gsl_matrix_set (B, k, k, tmp); + gsl_matrix_set (B, k, k, -1.0 / sweep_element); /* Rows before current row k. */ @@ -102,9 +99,8 @@ reg_sweep (gsl_matrix * A, int last_col) { for (j = i; j < A->size2; j++) { - /* - Use only the upper triangle of A. - */ + /* Use only the upper triangle of A. */ + double tmp; if (j < k) { tmp = gsl_matrix_get (A, i, j) - @@ -131,7 +127,7 @@ reg_sweep (gsl_matrix * A, int last_col) */ for (j = k + 1; j < A->size1; j++) { - tmp = gsl_matrix_get (A, k, j) / sweep_element; + double tmp = gsl_matrix_get (A, k, j) / sweep_element; gsl_matrix_set (B, k, j, tmp); } /* @@ -141,7 +137,7 @@ reg_sweep (gsl_matrix * A, int last_col) { for (j = i; j < A->size2; j++) { - tmp = gsl_matrix_get (A, i, j) - + double tmp = gsl_matrix_get (A, i, j) - gsl_matrix_get (A, k, i) * gsl_matrix_get (A, k, j) / sweep_element; gsl_matrix_set (B, i, j, tmp); -- 1.7.2.5
From 019d870c9a961108b6b5e762ce968db2e06203a7 Mon Sep 17 00:00:00 2001 From: John Darrington <[email protected]> Date: Tue, 15 Nov 2011 15:47:10 +0100 Subject: [PATCH 5/5] sweep.c: Use gsl_matrix_memcpy instead of element by element copying. Although only the upper triangle needs to be copied, there is no disadvantage copying the entire matrix (the lower triangle is never read) and the expense of using two loops is likely to outweigh the saving achieved by avoiding the other triangle. --- lib/linreg/sweep.c | 6 +----- 1 files changed, 1 insertions(+), 5 deletions(-) diff --git a/lib/linreg/sweep.c b/lib/linreg/sweep.c index 7667e3a..1eb466e 100644 --- a/lib/linreg/sweep.c +++ b/lib/linreg/sweep.c @@ -142,11 +142,7 @@ reg_sweep (gsl_matrix * A, int last_col) } } } - for (i = 0; i < A->size1; i++) - for (j = i; j < A->size2; j++) - { - gsl_matrix_set (A, i, j, gsl_matrix_get (B, i, j)); - } + gsl_matrix_memcpy (A, B); } gsl_matrix_free (B); -- 1.7.2.5
From 90afc1f0572a82bd6eb4532350b368c685b3cc7b Mon Sep 17 00:00:00 2001 From: John Darrington <[email protected]> Date: Tue, 15 Nov 2011 15:20:59 +0100 Subject: [PATCH 4/5] sweep.c: Move repeated call out of if ... else This line is identical for all cases, so it only needs to appear once. --- lib/linreg/sweep.c | 4 +--- 1 files changed, 1 insertions(+), 3 deletions(-) diff --git a/lib/linreg/sweep.c b/lib/linreg/sweep.c index c218456..7667e3a 100644 --- a/lib/linreg/sweep.c +++ b/lib/linreg/sweep.c @@ -106,20 +106,18 @@ reg_sweep (gsl_matrix * A, int last_col) tmp = gsl_matrix_get (A, i, j) - gsl_matrix_get (A, i, k) * gsl_matrix_get (A, j, k) / sweep_element; - gsl_matrix_set (B, i, j, tmp); } else if (j > k) { tmp = gsl_matrix_get (A, i, j) - gsl_matrix_get (A, i, k) * gsl_matrix_get (A, k, j) / sweep_element; - gsl_matrix_set (B, i, j, tmp); } else { tmp = gsl_matrix_get (A, i, k) / sweep_element; - gsl_matrix_set (B, i, j, tmp); } + gsl_matrix_set (B, i, j, tmp); } } /* -- 1.7.2.5
signature.asc
Description: Digital signature
_______________________________________________ pspp-dev mailing list [email protected] https://lists.gnu.org/mailman/listinfo/pspp-dev
