Recently I suspected the reg_sweep function in sweep.c was causing
problems for GLM.  As it turned out, my suspicions were unfounded.

However I'm posting a few patches I came up with  during my investigations.
None of them should change the behaviour of the code.  But potentially
make it faster and IMO easier to follow.

If nobody objects, I'll push them to master in a day or so.

J'

-- 
PGP Public key ID: 1024D/2DE827B3 
fingerprint = 8797 A26D 0854 2EAB 0285  A290 8A67 719C 2DE8 27B3
See http://pgp.mit.edu or any PGP keyserver for public key.

From f46d2c634b595c67a5af7994de9f6c851f632898 Mon Sep 17 00:00:00 2001
From: John Darrington <[email protected]>
Date: Tue, 15 Nov 2011 14:51:49 +0100
Subject: [PATCH 1/5] sweep.c: swap rows/columns instead of using indirection for last_col

This makes the code shorter, and I believe should make it faster too.
---
 lib/linreg/sweep.c |   74 +++++++++++++++++++++-------------------------------
 1 files changed, 30 insertions(+), 44 deletions(-)

diff --git a/lib/linreg/sweep.c b/lib/linreg/sweep.c
index b72d24a..b8b57e8 100644
--- a/lib/linreg/sweep.c
+++ b/lib/linreg/sweep.c
@@ -44,6 +44,7 @@
 
 #include "sweep.h"
 
+#include <assert.h>
 /*
   The matrix A will be overwritten. In ordinary uses of the sweep
   operator, A will be the matrix
@@ -73,65 +74,52 @@ reg_sweep (gsl_matrix * A, int last_col)
   int i;
   int j;
   int k;
-  int row_i;
-  int row_k;
-  int row_j;
-  int *ordered_cols;
   gsl_matrix *B;
 
   if (A != NULL)
     {
       if (A->size1 == A->size2)
 	{
-	  ordered_cols = malloc (A->size1 * sizeof (*ordered_cols));
-	  for (i = 0; i < last_col; i++)
-	    {
-	      ordered_cols[i] = i;
-	    }
-	  for (i = last_col + 1; i < A->size1; i++)
-	    {
-	      ordered_cols[i - 1] = i;
-	    }
-	  ordered_cols[A->size1 - 1] = last_col;
+          assert (last_col < A->size1);
+	  gsl_matrix_swap_rows (A, A->size1 - 1, last_col);
+	  gsl_matrix_swap_columns (A, A->size1 - 1 , last_col);
+	  
 	  B = gsl_matrix_alloc (A->size1, A->size2);
 	  for (k = 0; k < (A->size1 - 1); k++)
 	    {
-	      row_k = ordered_cols[k];
-	      sweep_element = gsl_matrix_get (A, row_k, row_k);
+	      sweep_element = gsl_matrix_get (A, k, k);
 	      if (fabs (sweep_element) > GSL_DBL_MIN)
 		{
 		  tmp = -1.0 / sweep_element;
-		  gsl_matrix_set (B, row_k, row_k, tmp);
+		  gsl_matrix_set (B, k, k, tmp);
 		  /*
 		     Rows before current row k.
 		   */
 		  for (i = 0; i < k; i++)
 		    {
-		      row_i = ordered_cols[i];
 		      for (j = i; j < A->size2; j++)
 			{
-			  row_j = ordered_cols[j];
 			  /*
 			     Use only the upper triangle of A.
 			   */
-			  if (row_j < row_k)
+			  if (j < k)
 			    {
-			      tmp = gsl_matrix_get (A, row_i, row_j) -
-				gsl_matrix_get (A, row_i, row_k)
-				* gsl_matrix_get (A, row_j, row_k) / sweep_element;
-			      gsl_matrix_set (B, row_i, row_j, tmp);
+			      tmp = gsl_matrix_get (A, i, j) -
+				gsl_matrix_get (A, i, k)
+				* gsl_matrix_get (A, j, k) / sweep_element;
+			      gsl_matrix_set (B, i, j, tmp);
 			    }
-			  else if (row_j > row_k)
+			  else if (j > k)
 			    {
-			      tmp = gsl_matrix_get (A, row_i, row_j) -
-				gsl_matrix_get (A, row_i, row_k)
-				* gsl_matrix_get (A, row_k, row_j) / sweep_element;
-			      gsl_matrix_set (B, row_i, row_j, tmp);
+			      tmp = gsl_matrix_get (A, i, j) -
+				gsl_matrix_get (A, i, k)
+				* gsl_matrix_get (A, k, j) / sweep_element;
+			      gsl_matrix_set (B, i, j, tmp);
 			    }
 			  else
 			    {
-			      tmp = gsl_matrix_get (A, row_i, row_k) / sweep_element;
-			      gsl_matrix_set (B, row_i, row_j, tmp);
+			      tmp = gsl_matrix_get (A, i, k) / sweep_element;
+			      gsl_matrix_set (B, i, j, tmp);
 			    }
 			}
 		    }
@@ -140,36 +128,34 @@ reg_sweep (gsl_matrix * A, int last_col)
 		   */
 		  for (j = k + 1; j < A->size1; j++)
 		    {
-		      row_j = ordered_cols[j];
-		      tmp = gsl_matrix_get (A, row_k, row_j) / sweep_element;
-		      gsl_matrix_set (B, row_k, row_j, tmp);
+		      tmp = gsl_matrix_get (A, k, j) / sweep_element;
+		      gsl_matrix_set (B, k, j, tmp);
 		    }
 		  /*
 		     Rows after the current row k.
 		   */
 		  for (i = k + 1; i < A->size1; i++)
 		    {
-		      row_i = ordered_cols[i];
 		      for (j = i; j < A->size2; j++)
 			{
-			  row_j = ordered_cols[j];
-			  tmp = gsl_matrix_get (A, row_i, row_j) -
-			    gsl_matrix_get (A, row_k, row_i)
-			    * gsl_matrix_get (A, row_k, row_j) / sweep_element;
-			  gsl_matrix_set (B, row_i, row_j, tmp);
+			  tmp = gsl_matrix_get (A, i, j) -
+			    gsl_matrix_get (A, k, i)
+			    * gsl_matrix_get (A, k, j) / sweep_element;
+			  gsl_matrix_set (B, i, j, tmp);
 			}
 		    }
 		}
 	      for (i = 0; i < A->size1; i++)
 		for (j = i; j < A->size2; j++)
 		  {
-		    row_i = ordered_cols[i];
-		    row_j = ordered_cols[j];
-		    gsl_matrix_set (A, row_i, row_j, gsl_matrix_get (B, row_i, row_j));
+		    gsl_matrix_set (A, i, j, gsl_matrix_get (B, i, j));
 		  }
 	    }
 	  gsl_matrix_free (B);
-	  free (ordered_cols);
+
+	  gsl_matrix_swap_columns (A, A->size1 - 1 , last_col);
+	  gsl_matrix_swap_rows (A, A->size1 - 1, last_col);
+
 	  return GSL_SUCCESS;
 	}
       return GSL_ENOTSQR;
-- 
1.7.2.5

From 1e60b6097b6ad896787a0aa831d82fd98b946dd5 Mon Sep 17 00:00:00 2001
From: John Darrington <[email protected]>
Date: Tue, 15 Nov 2011 14:57:34 +0100
Subject: [PATCH 2/5] sweep.c: Reverse sense of consistency tests.

This avoids numerous levels of indentation.
---
 lib/linreg/sweep.c |  161 ++++++++++++++++++++++++++--------------------------
 1 files changed, 80 insertions(+), 81 deletions(-)

diff --git a/lib/linreg/sweep.c b/lib/linreg/sweep.c
index b8b57e8..0f8c223 100644
--- a/lib/linreg/sweep.c
+++ b/lib/linreg/sweep.c
@@ -38,7 +38,7 @@
 
   Numerical Linear Algebra for Applications in Statistics. JE Gentle.
   Springer. 1998. ISBN 0-387-98542-5.
- */
+*/
 
 #include <config.h>
 
@@ -49,26 +49,32 @@
   The matrix A will be overwritten. In ordinary uses of the sweep
   operator, A will be the matrix
 
-   __       __
+  __       __
   |X'X    X'Y|
   |          |
   |Y'X    Y'Y|
-   --        --
+  --        --
 
-   X refers to the design matrix and Y to the vector of dependent
-   observations. reg_sweep sweeps on the diagonal elements of
-   X'X.
+  X refers to the design matrix and Y to the vector of dependent
+  observations. reg_sweep sweeps on the diagonal elements of
+  X'X.
 
-   The matrix A is assumed to be symmetric, so the sweep operation is
-   performed only for the upper triangle of A.
+  The matrix A is assumed to be symmetric, so the sweep operation is
+  performed only for the upper triangle of A.
 
-   LAST_COL is considered to be the final column in the augmented matrix,
-   that is, the column to the right of the '=' sign of the system.
- */
+  LAST_COL is considered to be the final column in the augmented matrix,
+  that is, the column to the right of the '=' sign of the system.
+*/
 
 int
 reg_sweep (gsl_matrix * A, int last_col)
 {
+  if (A == NULL)
+    return GSL_EFAULT;
+
+  if (A->size1 != A->size2)
+    return GSL_ENOTSQR;
+
   double sweep_element;
   double tmp;
   int i;
@@ -76,89 +82,82 @@ reg_sweep (gsl_matrix * A, int last_col)
   int k;
   gsl_matrix *B;
 
-  if (A != NULL)
+
+  assert (last_col < A->size1);
+  gsl_matrix_swap_rows (A, A->size1 - 1, last_col);
+  gsl_matrix_swap_columns (A, A->size1 - 1 , last_col);
+	  
+  B = gsl_matrix_alloc (A->size1, A->size2);
+  for (k = 0; k < (A->size1 - 1); k++)
     {
-      if (A->size1 == A->size2)
+      sweep_element = gsl_matrix_get (A, k, k);
+      if (fabs (sweep_element) > GSL_DBL_MIN)
 	{
-          assert (last_col < A->size1);
-	  gsl_matrix_swap_rows (A, A->size1 - 1, last_col);
-	  gsl_matrix_swap_columns (A, A->size1 - 1 , last_col);
-	  
-	  B = gsl_matrix_alloc (A->size1, A->size2);
-	  for (k = 0; k < (A->size1 - 1); k++)
+	  tmp = -1.0 / sweep_element;
+	  gsl_matrix_set (B, k, k, tmp);
+	  /*
+	    Rows before current row k.
+	  */
+	  for (i = 0; i < k; i++)
 	    {
-	      sweep_element = gsl_matrix_get (A, k, k);
-	      if (fabs (sweep_element) > GSL_DBL_MIN)
+	      for (j = i; j < A->size2; j++)
 		{
-		  tmp = -1.0 / sweep_element;
-		  gsl_matrix_set (B, k, k, tmp);
 		  /*
-		     Rows before current row k.
-		   */
-		  for (i = 0; i < k; i++)
+		    Use only the upper triangle of A.
+		  */
+		  if (j < k)
 		    {
-		      for (j = i; j < A->size2; j++)
-			{
-			  /*
-			     Use only the upper triangle of A.
-			   */
-			  if (j < k)
-			    {
-			      tmp = gsl_matrix_get (A, i, j) -
-				gsl_matrix_get (A, i, k)
-				* gsl_matrix_get (A, j, k) / sweep_element;
-			      gsl_matrix_set (B, i, j, tmp);
-			    }
-			  else if (j > k)
-			    {
-			      tmp = gsl_matrix_get (A, i, j) -
-				gsl_matrix_get (A, i, k)
-				* gsl_matrix_get (A, k, j) / sweep_element;
-			      gsl_matrix_set (B, i, j, tmp);
-			    }
-			  else
-			    {
-			      tmp = gsl_matrix_get (A, i, k) / sweep_element;
-			      gsl_matrix_set (B, i, j, tmp);
-			    }
-			}
+		      tmp = gsl_matrix_get (A, i, j) -
+			gsl_matrix_get (A, i, k)
+			* gsl_matrix_get (A, j, k) / sweep_element;
+		      gsl_matrix_set (B, i, j, tmp);
 		    }
-		  /*
-		     Current row k.
-		   */
-		  for (j = k + 1; j < A->size1; j++)
+		  else if (j > k)
 		    {
-		      tmp = gsl_matrix_get (A, k, j) / sweep_element;
-		      gsl_matrix_set (B, k, j, tmp);
+		      tmp = gsl_matrix_get (A, i, j) -
+			gsl_matrix_get (A, i, k)
+			* gsl_matrix_get (A, k, j) / sweep_element;
+		      gsl_matrix_set (B, i, j, tmp);
 		    }
-		  /*
-		     Rows after the current row k.
-		   */
-		  for (i = k + 1; i < A->size1; i++)
+		  else
 		    {
-		      for (j = i; j < A->size2; j++)
-			{
-			  tmp = gsl_matrix_get (A, i, j) -
-			    gsl_matrix_get (A, k, i)
-			    * gsl_matrix_get (A, k, j) / sweep_element;
-			  gsl_matrix_set (B, i, j, tmp);
-			}
+		      tmp = gsl_matrix_get (A, i, k) / sweep_element;
+		      gsl_matrix_set (B, i, j, tmp);
 		    }
 		}
-	      for (i = 0; i < A->size1; i++)
-		for (j = i; j < A->size2; j++)
-		  {
-		    gsl_matrix_set (A, i, j, gsl_matrix_get (B, i, j));
-		  }
 	    }
-	  gsl_matrix_free (B);
-
-	  gsl_matrix_swap_columns (A, A->size1 - 1 , last_col);
-	  gsl_matrix_swap_rows (A, A->size1 - 1, last_col);
-
-	  return GSL_SUCCESS;
+	  /*
+	    Current row k.
+	  */
+	  for (j = k + 1; j < A->size1; j++)
+	    {
+	      tmp = gsl_matrix_get (A, k, j) / sweep_element;
+	      gsl_matrix_set (B, k, j, tmp);
+	    }
+	  /*
+	    Rows after the current row k.
+	  */
+	  for (i = k + 1; i < A->size1; i++)
+	    {
+	      for (j = i; j < A->size2; j++)
+		{
+		  tmp = gsl_matrix_get (A, i, j) -
+		    gsl_matrix_get (A, k, i)
+		    * gsl_matrix_get (A, k, j) / sweep_element;
+		  gsl_matrix_set (B, i, j, tmp);
+		}
+	    }
 	}
-      return GSL_ENOTSQR;
+      for (i = 0; i < A->size1; i++)
+	for (j = i; j < A->size2; j++)
+	  {
+	    gsl_matrix_set (A, i, j, gsl_matrix_get (B, i, j));
+	  }
     }
-  return GSL_EFAULT;
+  gsl_matrix_free (B);
+
+  gsl_matrix_swap_columns (A, A->size1 - 1 , last_col);
+  gsl_matrix_swap_rows (A, A->size1 - 1, last_col);
+
+  return GSL_SUCCESS;
 }
-- 
1.7.2.5

From b8f1c67b087b875d45c28bcae330910e7723d6c6 Mon Sep 17 00:00:00 2001
From: John Darrington <[email protected]>
Date: Tue, 15 Nov 2011 15:14:30 +0100
Subject: [PATCH 3/5] sweep.c: Reduce scope of local variables and avoid reusing them.

This makes the code easier to follow, and helps the compiler with
optimisation.
---
 lib/linreg/sweep.c |   16 ++++++----------
 1 files changed, 6 insertions(+), 10 deletions(-)

diff --git a/lib/linreg/sweep.c b/lib/linreg/sweep.c
index 0f8c223..c218456 100644
--- a/lib/linreg/sweep.c
+++ b/lib/linreg/sweep.c
@@ -75,8 +75,6 @@ reg_sweep (gsl_matrix * A, int last_col)
   if (A->size1 != A->size2)
     return GSL_ENOTSQR;
 
-  double sweep_element;
-  double tmp;
   int i;
   int j;
   int k;
@@ -90,11 +88,10 @@ reg_sweep (gsl_matrix * A, int last_col)
   B = gsl_matrix_alloc (A->size1, A->size2);
   for (k = 0; k < (A->size1 - 1); k++)
     {
-      sweep_element = gsl_matrix_get (A, k, k);
+      const double sweep_element = gsl_matrix_get (A, k, k);
       if (fabs (sweep_element) > GSL_DBL_MIN)
 	{
-	  tmp = -1.0 / sweep_element;
-	  gsl_matrix_set (B, k, k, tmp);
+	  gsl_matrix_set (B, k, k, -1.0 / sweep_element);
 	  /*
 	    Rows before current row k.
 	  */
@@ -102,9 +99,8 @@ reg_sweep (gsl_matrix * A, int last_col)
 	    {
 	      for (j = i; j < A->size2; j++)
 		{
-		  /*
-		    Use only the upper triangle of A.
-		  */
+		  /* Use only the upper triangle of A. */
+		  double tmp;
 		  if (j < k)
 		    {
 		      tmp = gsl_matrix_get (A, i, j) -
@@ -131,7 +127,7 @@ reg_sweep (gsl_matrix * A, int last_col)
 	  */
 	  for (j = k + 1; j < A->size1; j++)
 	    {
-	      tmp = gsl_matrix_get (A, k, j) / sweep_element;
+	      double tmp = gsl_matrix_get (A, k, j) / sweep_element;
 	      gsl_matrix_set (B, k, j, tmp);
 	    }
 	  /*
@@ -141,7 +137,7 @@ reg_sweep (gsl_matrix * A, int last_col)
 	    {
 	      for (j = i; j < A->size2; j++)
 		{
-		  tmp = gsl_matrix_get (A, i, j) -
+		  double tmp = gsl_matrix_get (A, i, j) -
 		    gsl_matrix_get (A, k, i)
 		    * gsl_matrix_get (A, k, j) / sweep_element;
 		  gsl_matrix_set (B, i, j, tmp);
-- 
1.7.2.5

From 019d870c9a961108b6b5e762ce968db2e06203a7 Mon Sep 17 00:00:00 2001
From: John Darrington <[email protected]>
Date: Tue, 15 Nov 2011 15:47:10 +0100
Subject: [PATCH 5/5] sweep.c: Use gsl_matrix_memcpy instead of element by element copying.

Although only the upper triangle needs to be copied, there is no
disadvantage copying the entire matrix (the lower triangle is never read)
and the expense of using two loops is likely to outweigh the saving achieved
by avoiding the other triangle.
---
 lib/linreg/sweep.c |    6 +-----
 1 files changed, 1 insertions(+), 5 deletions(-)

diff --git a/lib/linreg/sweep.c b/lib/linreg/sweep.c
index 7667e3a..1eb466e 100644
--- a/lib/linreg/sweep.c
+++ b/lib/linreg/sweep.c
@@ -142,11 +142,7 @@ reg_sweep (gsl_matrix * A, int last_col)
 		}
 	    }
 	}
-      for (i = 0; i < A->size1; i++)
-	for (j = i; j < A->size2; j++)
-	  {
-	    gsl_matrix_set (A, i, j, gsl_matrix_get (B, i, j));
-	  }
+      gsl_matrix_memcpy (A, B);
     }
   gsl_matrix_free (B);
 
-- 
1.7.2.5

From 90afc1f0572a82bd6eb4532350b368c685b3cc7b Mon Sep 17 00:00:00 2001
From: John Darrington <[email protected]>
Date: Tue, 15 Nov 2011 15:20:59 +0100
Subject: [PATCH 4/5] sweep.c: Move repeated call out of if ... else

This line is identical for all cases, so it only needs to appear once.
---
 lib/linreg/sweep.c |    4 +---
 1 files changed, 1 insertions(+), 3 deletions(-)

diff --git a/lib/linreg/sweep.c b/lib/linreg/sweep.c
index c218456..7667e3a 100644
--- a/lib/linreg/sweep.c
+++ b/lib/linreg/sweep.c
@@ -106,20 +106,18 @@ reg_sweep (gsl_matrix * A, int last_col)
 		      tmp = gsl_matrix_get (A, i, j) -
 			gsl_matrix_get (A, i, k)
 			* gsl_matrix_get (A, j, k) / sweep_element;
-		      gsl_matrix_set (B, i, j, tmp);
 		    }
 		  else if (j > k)
 		    {
 		      tmp = gsl_matrix_get (A, i, j) -
 			gsl_matrix_get (A, i, k)
 			* gsl_matrix_get (A, k, j) / sweep_element;
-		      gsl_matrix_set (B, i, j, tmp);
 		    }
 		  else
 		    {
 		      tmp = gsl_matrix_get (A, i, k) / sweep_element;
-		      gsl_matrix_set (B, i, j, tmp);
 		    }
+		  gsl_matrix_set (B, i, j, tmp);
 		}
 	    }
 	  /*
-- 
1.7.2.5

Attachment: signature.asc
Description: Digital signature

_______________________________________________
pspp-dev mailing list
[email protected]
https://lists.gnu.org/mailman/listinfo/pspp-dev

Reply via email to