[systemml] branch master updated: [MINOR] Integration of steplm builtin (avoid excessive test output)

mboehm7 Sun, 24 May 2020 08:20:30 -0700

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git



The following commit(s) were added to refs/heads/master by this push:
     new c61e7ac  [MINOR] Integration of steplm builtin (avoid excessive test 
output)
c61e7ac is described below

commit c61e7ac71a83df3d525b6131ca76cf8252c6802f
Author: Matthias Boehm <[email protected]>
AuthorDate: Sun May 24 17:19:39 2020 +0200

    [MINOR] Integration of steplm builtin (avoid excessive test output)
---
 scripts/algorithms/StepLinearRegDS.dml | 324 +--------------------------------
 scripts/builtin/steplm.dml             |  19 +-
 2 files changed, 16 insertions(+), 327 deletions(-)

diff --git a/scripts/algorithms/StepLinearRegDS.dml 
b/scripts/algorithms/StepLinearRegDS.dml
index 20b1777..a8740f5 100644
--- a/scripts/algorithms/StepLinearRegDS.dml
+++ b/scripts/algorithms/StepLinearRegDS.dml
@@ -79,331 +79,15 @@ fileX = $X;
 fileY = $Y;
 fileB = $B;
 fileS = $S;
-
 write_beta = ifdef($write_beta, TRUE);
-
-# currently only the forward selection strategy in supported: start from one 
feature and iteratively add 
-# features until AIC improves
-dir = "forward";
-
 fmt  = ifdef ($fmt, "text");
-intercept_status = ifdef ($icpt, 1);
+intercept = ifdef ($icpt, 1);
 thr = ifdef ($thr, 0.001);
 
-print ("BEGIN STEPWISE LINEAR REGRESSION SCRIPT");
-print ("Reading X and Y...");
 X_orig = read (fileX);
 y = read (fileY);
 
-n = nrow (X_orig);
-m_orig = ncol (X_orig);
-
-# BEGIN STEPWISE LINEAR REGRESSION
-
-if (dir == "forward") {
-  continue = TRUE;
-  columns_fixed = matrix (0, rows = 1, cols = m_orig);
-  columns_fixed_ordered = matrix (0, rows = 1, cols = 1);
-
-  # X_global stores the best model found at each step
-  X_global = matrix (0, rows = n, cols = 1);
-
-  if (intercept_status == 1 | intercept_status == 2) {
-    beta = mean (y);
-    AIC_best = 2 + n * log(sum((beta - y)^2) / n);
-  } else {
-    beta = 0.0;
-    AIC_best = n * log(sum(y^2) / n);
-  }
-
-  AICs = matrix (AIC_best, rows = 1, cols = m_orig);
-  print ("Best AIC without any features: " + AIC_best);
-
-  boa_ncol = ncol(X_orig)
-  if (intercept_status != 0) {
-    boa_ncol = boa_ncol + 1
-  }
-
-  beta_out_all = matrix(0, rows = boa_ncol, cols = m_orig * 1);
-
-  y_ncol = 1;
-
-  # First pass to examine single features
-  parfor (i in 1:m_orig, check = 0) {
-    columns_fixed_ordered_1 = matrix(i, rows=1, cols=1);
-
-    [AIC_1, beta_out_i] = linear_regression (X_orig[, i], y, m_orig, 
columns_fixed_ordered_1,
-                                             write_beta, 0);
-
-    AICs[1, i] = AIC_1;
-
-    beta_out_all[1:nrow(beta_out_i), (i - 1) * y_ncol + 1 : i * y_ncol] = 
beta_out_i[, 1:1];
-
-  }
-
-  # Determine the best AIC
-  column_best = 0;
-  for (k in 1:m_orig) {
-    AIC_cur = as.scalar (AICs[1, k]);
-    if ( (AIC_cur < AIC_best) & ((AIC_best - AIC_cur) > abs (thr * AIC_best)) 
) {
-      column_best = k;
-      AIC_best = as.scalar(AICs[1, k]);
-    }
-  }
-
-  # beta best so far
-  beta_best = beta_out_all[, (column_best-1) * y_ncol + 1: column_best * 
y_ncol];
-
-  if (column_best == 0) {
-    print ("AIC of an empty model is " + AIC_best + " and adding no feature 
achieves more than " +
-           (thr * 100) + "% decrease in AIC!");
-    Selected = matrix (0, rows = 1, cols = 1);
-    if (intercept_status == 0) {
-      B = matrix (beta, rows = m_orig, cols = 1);
-    } else {
-      B_tmp = matrix (0, rows = m_orig + 1, cols = 1);
-      B_tmp[m_orig + 1, ] = beta;
-      B = B_tmp;
-    }
-
-    beta_out = B;
-
-    write(Selected, fileS, format=fmt);
-    write(beta_out, fileB, format=fmt);
-
-    stop ("");
-  }
-  print ("Best AIC " + AIC_best + " achieved with feature: " + column_best);
-  columns_fixed[1, column_best] = 1;
-  columns_fixed_ordered[1, 1] = column_best;
-  X_global = X_orig[, column_best];
-
-    while (continue) {
-    # Subsequent passes over the features
-    beta_out_all_2 = matrix(0, rows = boa_ncol, cols = m_orig * 1);
-
-    parfor (i in 1:m_orig, check = 0) {
-      if (as.scalar(columns_fixed[1, i]) == 0) {
-
-        # Construct the feature matrix
-        X = cbind (X_global, X_orig[, i]);
-
-        tmp = matrix(0, rows=1, cols=1);
-        tmp[1, 1] = i;
-        columns_fixed_ordered_2 = append(columns_fixed_ordered, tmp )
-        [AIC_2, beta_out_i] = linear_regression (X, y, m_orig, 
columns_fixed_ordered_2, write_beta, 0);
-        beta_out_all_2[1:nrow(beta_out_i), (i - 1) * y_ncol + 1 : i * y_ncol] 
= beta_out_i[,1:1];
-
-        AICs[1, i] = AIC_2;
-      }
-    }
-
-    # Determine the best AIC
-    for (k in 1:m_orig) {
-      AIC_cur = as.scalar (AICs[1, k]);
-      if ( (AIC_cur < AIC_best) & ((AIC_best - AIC_cur) > abs (thr * 
AIC_best)) &
-            (as.scalar(columns_fixed[1, k]) == 0) ) {
-        column_best = k;
-        AIC_best = as.scalar(AICs[1, k]);
-      }
-    }
-
-    # have the best beta store in the matrix
-    beta_best = beta_out_all_2[, (column_best - 1) * y_ncol + 1 : column_best 
* y_ncol];
-
-    # Append best found features (i.e., columns) to X_global
-    if (as.scalar(columns_fixed[1, column_best]) == 0) { # new best feature 
found
-      print ("Best AIC " + AIC_best + " achieved with feature: " + 
column_best);
-      columns_fixed[1, column_best] = 1;
-      columns_fixed_ordered = cbind (columns_fixed_ordered, 
as.matrix(column_best));
-
-      if (ncol(columns_fixed_ordered) == m_orig) { # all features examined
-        X_global = cbind (X_global, X_orig[, column_best]);
-        continue = FALSE;
-      } else {
-        X_global = cbind (X_global, X_orig[, column_best]);
-      }
-    } else {
-      continue = FALSE;
-    }
-
-  }
-
-  # run linear regression with selected set of features
-  print ("Running linear regression with selected features...");
-  [AIC, beta_out] = linear_regression (X_global, y, m_orig, 
columns_fixed_ordered, write_beta, 1);
-
-  Selected = columns_fixed_ordered;
-  if (intercept_status != 0) {
-    Selected = cbind(Selected, matrix(boa_ncol, rows=1, cols=1))
-  }
-
-  beta_out = reorder_matrix(boa_ncol, beta_out, Selected);
-
-  write(Selected, fileS, format=fmt);
-  write(beta_out, fileB, format=fmt);
-
-} else {
-  stop ("Currently only forward selection strategy is supported!");
-}
-
-# Computes linear regression using a direct solver for (X^T X) beta = X^T y.
-# It also outputs the AIC of the computed model.
-
-linear_regression = function (Matrix[Double] X, Matrix[Double] y, Double 
m_orig,
-  Matrix[Double] Selected, Boolean write_beta, Boolean writeStats)
-  return (Double AIC, Matrix[Double] beta) {
-
-    intercept_status = ifdef ($icpt, 0);
-    fmt = ifdef ($fmt, "text");
-    n = nrow (X);
-    m = ncol (X);
-
-    # Introduce the intercept, shift and rescale the columns of X if needed
-    if (intercept_status == 1 | intercept_status == 2) { # add the intercept 
column
-      ones_n = matrix (1, rows = n, cols = 1);
-      X = cbind (X, ones_n);
-      m = m - 1;
-    }
-
-    m_ext = ncol(X);
-
-    if (intercept_status == 2) { # scale-&-shift X columns to mean 0, variance 
1
-      # Important assumption: X [, m_ext] = ones_n
-      avg_X_cols = t(colSums(X)) / n;
-      var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 1);
-      is_unsafe = (var_X_cols <= 0);
-      scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
-      scale_X [m_ext, 1] = 1;
-      shift_X = - avg_X_cols * scale_X;
-      shift_X [m_ext, 1] = 0;
-    } else {
-      scale_X = matrix (1, rows = m_ext, cols = 1);
-      shift_X = matrix (0, rows = m_ext, cols = 1);
-    }
-
-    # BEGIN THE DIRECT SOLVE ALGORITHM (EXTERNAL CALL)
-
-    A = t(X) %*% X;
-    b = t(X) %*% y;
-    if (intercept_status == 2) {
-      A = t(diag (scale_X) %*% A + shift_X %*% A [m_ext, ]);
-      A =   diag (scale_X) %*% A + shift_X %*% A [m_ext, ];
-      b =   diag (scale_X) %*% b + shift_X %*% b [m_ext, ];
-    }
-
-    beta_unscaled = solve (A, b);
-
-    # END THE DIRECT SOLVE ALGORITHM
-
-    if (intercept_status == 2) {
-      beta = scale_X * beta_unscaled;
-      beta [m_ext, ] = beta [m_ext, ] + t(shift_X) %*% beta_unscaled;
-    } else {
-      beta = beta_unscaled;
-    }
-
-    # COMPUTE AIC
-    y_residual = y - X %*% beta;
-    ss_res = sum (y_residual ^ 2);
-    eq_deg_of_freedom = m_ext;
-    AIC = (2 * eq_deg_of_freedom) + n * log (ss_res / n);
-
-    if(write_beta == 1) {
-      fileO = ifdef ($O, " ");
-      fileS = $S;
-
-      print ("Computing the statistics...");
-      avg_tot = sum (y) / n;
-      ss_tot = sum (y ^ 2);
-      ss_avg_tot = ss_tot - n * avg_tot ^ 2;
-      var_tot = ss_avg_tot / (n - 1);
-      # y_residual = y - X %*% beta;
-      avg_res = sum (y_residual) / n;
-      # ss_res = sum (y_residual ^ 2);
-      ss_avg_res = ss_res - n * avg_res ^ 2;
-
-      R2 = 1 - ss_res / ss_avg_tot;
-      if (n > m_ext) {
-        dispersion  = ss_res / (n - m_ext);
-        adjusted_R2 = 1 - dispersion / (ss_avg_tot / (n - 1));
-      } else {
-        dispersion  = NaN;
-        adjusted_R2 = NaN;
-      }
-
-      R2_nobias = 1 - ss_avg_res / ss_avg_tot;
-      deg_freedom = n - m - 1;
-      if (deg_freedom > 0) {
-        var_res = ss_avg_res / deg_freedom;
-        adjusted_R2_nobias = 1 - var_res / (ss_avg_tot / (n - 1));
-      } else {
-        var_res = NaN;
-        adjusted_R2_nobias = NaN;
-        print ("Warning: zero or negative number of degrees of freedom.");
-      }
-
-      R2_vs_0 = 1 - ss_res / ss_tot;
-      if (n > m) {
-        adjusted_R2_vs_0 = 1 - (ss_res / (n - m)) / (ss_tot / n);
-      } else {
-        adjusted_R2_vs_0 = NaN;
-      }
-
-      str = "AVG_TOT_Y," + avg_tot;                                    #  
Average of the response value Y
-      str = append (str, "STDEV_TOT_Y," + sqrt (var_tot));             #  
Standard Deviation of the response value Y
-      str = append (str, "AVG_RES_Y," + avg_res);                      #  
Average of the residual Y - pred(Y|X), i.e. residual bias
-      str = append (str, "STDEV_RES_Y," + sqrt (var_res));             #  
Standard Deviation of the residual Y - pred(Y|X)
-      str = append (str, "DISPERSION," + dispersion);                  #  
GLM-style dispersion, i.e. residual sum of squares / # d.f.
-      str = append (str, "R2," + R2);                                  #  R^2 
of residual with bias included vs. total average
-      str = append (str, "ADJUSTED_R2," + adjusted_R2);                #  
Adjusted R^2 of residual with bias included vs. total average
-      str = append (str, "R2_NOBIAS," + R2_nobias);                    #  R^2 
of residual with bias subtracted vs. total average
-      str = append (str, "ADJUSTED_R2_NOBIAS," + adjusted_R2_nobias);  #  
Adjusted R^2 of residual with bias subtracted vs. total average
-      if (intercept_status == 0) {
-        str = append (str, "R2_VS_0," + R2_vs_0);                      #  R^2 
of residual with bias included vs. zero constant
-        str = append (str, "ADJUSTED_R2_VS_0," + adjusted_R2_vs_0);    #  
Adjusted R^2 of residual with bias included vs. zero constant
-      }
-
-      if (fileO != " " & writeStats != 0) {
-        write(str, fileO);
-      } else {
-        print (str);
-        print ("");
-      }
-
-      # TODO IMP NOTE: with the fix in PR-22, we have not accounted for 
-      # intercept=2 and # the code before # was not matching so we have 
removed it
-      # for now. Pl see the git revision history and diff to see the changes.
-      # in future we will have this feature. For now it is disabled
-    }
-  }
-
-
-reorder_matrix = function(
-  double ncolX, # number of column in X, inlcuding the intercept column
-  matrix[double] B, # beta
-  matrix[double] S  # Selected
-) return (matrix[double] Y) {
-  # This function assumes that B and S have same number of elements.
-  # if the intercept is included in the model, all inputs should be adjusted
-  # appropriately before calling this function.
-
-  S = t(S);
-  num_empty_B = ncolX - nrow(B);
-  if (num_empty_B < 0) {
-    stop("Error: unable to re-order the matrix. Reason: B more than matrix X");
-  }
-
-  if (num_empty_B > 0) {
-    pad_zeros = matrix(0, rows = num_empty_B, cols=1);
-    B = rbind(B, pad_zeros);
-    S = rbind(S, pad_zeros);
-  }
-
-  # since the table won't accept zeros as index we hack it.
-  S0 = replace(target = S, pattern = 0, replacement = ncolX+1);
-  seqS = seq(1, nrow(S0));
-  P = table(seqS, S0, ncolX, ncolX);
+[beta_out, Selected] = steplm(X=X_orig, y=y, icpt=intercept, verbose=FALSE);
 
-  Y = t(P) %*% B;
-}
+write(Selected, fileS, format=fmt);
+write(beta_out, fileB, format=fmt);
diff --git a/scripts/builtin/steplm.dml b/scripts/builtin/steplm.dml
index 28208c8..01f35ba 100644
--- a/scripts/builtin/steplm.dml
+++ b/scripts/builtin/steplm.dml
@@ -71,7 +71,8 @@ m_steplm = function(Matrix[Double] X, Matrix[Double] y, 
Integer icpt = 0,
   # start from one feature and iteratively add features until AIC improves
   thr = 0.001;
 
-  print("BEGIN STEPWISE LINEAR REGRESSION SCRIPT");
+       if(verbose)
+    print("BEGIN STEPWISE LINEAR REGRESSION SCRIPT");
   X_orig = X;
   n = nrow(X_orig);
   m_orig = ncol(X_orig);
@@ -90,7 +91,8 @@ m_steplm = function(Matrix[Double] X, Matrix[Double] y, 
Integer icpt = 0,
     beta = 0.0;
     AIC_best_orig = n * log(sum(y ^ 2) / n);
   }
-  print("Best AIC without any features: " + AIC_best_orig);
+  if(verbose)
+         print("Best AIC without any features: " + AIC_best_orig);
   boa_ncol = ncol(X_orig) + as.integer(icpt!=0);
   beta_out_all = matrix(0, boa_ncol, m_orig);
 
@@ -108,15 +110,16 @@ m_steplm = function(Matrix[Double] X, Matrix[Double] y, 
Integer icpt = 0,
   # beta best so far
   beta_best = beta_out_all[, column_best];
   if (column_best == 0) {
-    print("AIC of an empty model is " + AIC_best + " and adding no feature 
achieves more than " + (thr * 100) + "% decrease in AIC!");
+    if(verbose)
+                 print("AIC of an empty model is " + AIC_best + " and adding 
no feature achieves more than " + (thr * 100) + "% decrease in AIC!");
     B = matrix(0, m_orig, 1);
     if (icpt != 0)
       B = rbind(B, as.matrix(beta));
     S = matrix(0, 1, 1);
   }
   else {
-
-    print("Best AIC " + AIC_best + " achieved with feature: " + column_best);
+               if(verbose)
+      print("Best AIC " + AIC_best + " achieved with feature: " + column_best);
 
     columns_fixed[1, column_best] = 1;
     columns_fixed_ordered[1, 1] = column_best;
@@ -152,7 +155,8 @@ m_steplm = function(Matrix[Double] X, Matrix[Double] y, 
Integer icpt = 0,
       # Append best found features (i.e., columns) to X_global
       if (as.scalar(columns_fixed[1, column_best]) == 0) {
         # new best feature found
-        print("Best AIC " + AIC_best + " achieved with feature: " + 
column_best);
+        if(verbose)
+                                 print("Best AIC " + AIC_best + " achieved 
with feature: " + column_best);
         columns_fixed[1, column_best] = 1;
         columns_fixed_ordered = cbind(columns_fixed_ordered, 
as.matrix(column_best));
         if (ncol(columns_fixed_ordered) == m_orig) {
@@ -167,7 +171,8 @@ m_steplm = function(Matrix[Double] X, Matrix[Double] y, 
Integer icpt = 0,
       }
     }
     # run linear regression with selected set of features
-    print("Running linear regression with selected features...");
+    if( verbose )
+                 print("Running linear regression with selected features...");
     [AIC, beta_out] = linear_regression(X_global, y, icpt, reg, tol, maxi, 
verbose);
     S = columns_fixed_ordered;
     if (icpt != 0)

[systemml] branch master updated: [MINOR] Integration of steplm builtin (avoid excessive test output)

Reply via email to