http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/algorithms/StepLinearRegDS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/StepLinearRegDS.dml 
b/scripts/algorithms/StepLinearRegDS.dml
index afd94ed..953402f 100644
--- a/scripts/algorithms/StepLinearRegDS.dml
+++ b/scripts/algorithms/StepLinearRegDS.dml
@@ -1,388 +1,388 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# THIS SCRIPT CHOOSES A LINEAR MODEL IN A STEPWISE ALGIRITHM USING AIC
-# EACH LINEAR REGRESSION USES A DIRECT SOLVER FOR (X^T X) beta = X^T y
-#
-# INPUT PARAMETERS:
-# 
--------------------------------------------------------------------------------------------
-# NAME    TYPE    DEFAULT    MEANING
-# 
--------------------------------------------------------------------------------------------
-# X       String       ---      Location (on HDFS) to read the matrix X of 
feature vectors
-# Y       String       ---      Location (on HDFS) to read the 1-column matrix 
Y of response values
-# B       String       ---      Location to store estimated regression 
parameters (the betas)
-# S       String    ---      Location to write the selected features ordered 
as computed by the algorithm
-# O       String       " "      Location to write the printed statistics; by 
default is standard output
-# icpt    Int        0       Intercept presence, shifting and rescaling the 
columns of X:
-#                            0 = no intercept, no shifting, no rescaling;
-#                            1 = add intercept, but neither shift nor rescale 
X;
-#                            2 = add intercept, shift & rescale X columns to 
mean = 0, variance = 1
-# thr     Double    0.01     Threshold to stop the algorithm: if the decrease 
in the value of AIC falls below thr
-#                            no further features are being checked and the 
algorithm stops 
-# fmt     String   "text"    Matrix output format for B (the betas) only, 
usually "text" or "csv"
-# 
--------------------------------------------------------------------------------------------
-# OUTPUT: Matrix of regression parameters (the betas) and its size depend on 
icpt input value:
-#         OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM 
X AND B:
-# icpt=0: ncol(X)   x 1  Betas for X only                Y ~ X %*% 
B[1:ncol(X), 1], or just X %*% B
-# icpt=1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% 
B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-# icpt=2: ncol(X)+1 x 2  Col.1: betas for X & intercept  Y ~ X %*% 
B[1:ncol(X), 1] + B[ncol(X)+1, 1]
-#                        Col.2: betas for shifted/rescaled X and intercept
-#
-# In addition, in the last run of linear regression some statistics are 
provided in CSV format, one comma-separated
-# name-value pair per each line, as follows:
-#
-# NAME                  MEANING
-# 
-------------------------------------------------------------------------------------
-# AVG_TOT_Y             Average of the response value Y
-# STDEV_TOT_Y           Standard Deviation of the response value Y
-# AVG_RES_Y             Average of the residual Y - pred(Y|X), i.e. residual 
bias
-# STDEV_RES_Y           Standard Deviation of the residual Y - pred(Y|X)
-# DISPERSION            GLM-style dispersion, i.e. residual sum of squares / # 
deg. fr.
-# PLAIN_R2              Plain R^2 of residual with bias included vs. total 
average
-# ADJUSTED_R2           Adjusted R^2 of residual with bias included vs. total 
average
-# PLAIN_R2_NOBIAS       Plain R^2 of residual with bias subtracted vs. total 
average
-# ADJUSTED_R2_NOBIAS    Adjusted R^2 of residual with bias subtracted vs. 
total average
-# PLAIN_R2_VS_0         * Plain R^2 of residual with bias included vs. zero 
constant
-# ADJUSTED_R2_VS_0      * Adjusted R^2 of residual with bias included vs. zero 
constant
-# 
-------------------------------------------------------------------------------------
-# * The last two statistics are only printed if there is no intercept (icpt=0)
-# If the best AIC is achieved without any features the matrix of selected 
features contains 0.  
-# Moreover, in this case no further statistics will be produced  
-#
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f StepLinearRegDS.dml -nvargs X=INPUT_DIR/X 
Y=INPUT_DIR/Y B=OUTPUT_DIR/betas
-#     O=OUTPUT_DIR/stats S=OUTPUT_DIR/selected icpt=2 thr=0.01 fmt=csv
-
-fileX = $X;
-fileY = $Y;
-fileB = $B;
-fileS = $S;
-
-# currently only the forward selection strategy in supported: start from one 
feature and iteratively add 
-# features until AIC improves
-dir = "forward";
-
-fmt  = ifdef ($fmt, "text");
-intercept_status = ifdef ($icpt, 0);   
-thr = ifdef ($thr, 0.01);  
-
-print ("BEGIN STEPWISE LINEAR REGRESSION SCRIPT");
-print ("Reading X and Y...");
-X_orig = read (fileX);
-y = read (fileY);
-
-n = nrow (X_orig);
-m_orig = ncol (X_orig);
-
-# BEGIN STEPWISE LINEAR REGRESSION
-
-if (dir == "forward") {  
-       
-       continue = TRUE;
-       columns_fixed = matrix (0, rows = 1, cols = m_orig);
-       columns_fixed_ordered = matrix (0, rows = 1, cols = 1);
-       
-       # X_global stores the best model found at each step 
-       X_global = matrix (0, rows = n, cols = 1);
-       
-       if (intercept_status == 1 | intercept_status == 2) {
-               beta = mean (y);
-               AIC_best = 2 + n * log(sum((beta - y)^2) / n);
-       } else {
-               beta = 0;
-               AIC_best = n * log(sum(y^2) / n);
-       }
-       AICs = matrix (AIC_best, rows = 1, cols = m_orig);
-       print ("Best AIC without any features: " + AIC_best);
-       
-       # First pass to examine single features
-       parfor (i in 1:m_orig) {        
-               [AIC_1] = linear_regression (X_orig[,i], y, m_orig, 
columns_fixed_ordered, " ");                                        
-               AICs[1,i] = AIC_1;
-       }
-
-       # Determine the best AIC 
-       column_best = 0;        
-       for (k in 1:m_orig) {
-               AIC_cur = as.scalar (AICs[1,k]);
-               if ( (AIC_cur < AIC_best) & ((AIC_best - AIC_cur) > abs (thr * 
AIC_best)) ) {
-                       column_best = k;
-                       AIC_best = as.scalar(AICs[1,k]);
-               }
-       }
-       
-       if (column_best == 0) {
-               print ("AIC of an empty model is " + AIC_best + " and adding no 
feature achieves more than " + (thr * 100) + "% decrease in AIC!");
-               S = matrix (0, rows=1, cols=1);
-               if (intercept_status == 0) {
-                       B = matrix (beta, rows = m_orig, cols = 1);
-               } else {
-                       B_tmp = matrix (0, rows = m_orig + 1, cols = 1);
-                       B_tmp[m_orig + 1,] = beta;
-                       B = B_tmp;
-               }
-               write (S, fileS, format=fmt);
-               write (B, fileB, format=fmt);
-               stop ("");
-       }
-       print ("Best AIC " + AIC_best + " achieved with feature: " + 
column_best);      
-       columns_fixed[1,column_best] = 1;
-       columns_fixed_ordered[1,1] = column_best;
-       X_global = X_orig[,column_best];                
-               
-       while (continue) {
-               # Subsequent passes over the features
-               parfor (i in 1:m_orig) { 
-                       if (as.scalar(columns_fixed[1,i]) == 0) {       
-                       
-                               # Construct the feature matrix
-                               X = append (X_global, X_orig[,i]);
-                               
-                               [AIC_2] = linear_regression (X, y, m_orig, 
columns_fixed_ordered, " ");
-                               AICs[1,i] = AIC_2;
-                       }       
-               }
-       
-               # Determine the best AIC
-               for (k in 1:m_orig) {
-                       AIC_cur = as.scalar (AICs[1,k]);
-                       if ( (AIC_cur < AIC_best) & ((AIC_best - AIC_cur) > abs 
(thr * AIC_best)) & (as.scalar(columns_fixed[1,k]) == 0) ) {
-                               column_best = k;
-                               AIC_best = as.scalar(AICs[1,k]);
-                       }
-               }
-                               
-               # Append best found features (i.e., columns) to X_global
-               if (as.scalar(columns_fixed[1,column_best]) == 0) { # new best 
feature found
-                       print ("Best AIC " + AIC_best + " achieved with 
feature: " + column_best);
-                       columns_fixed[1,column_best] = 1;
-                       columns_fixed_ordered = append (columns_fixed_ordered, 
as.matrix(column_best));
-                       if (ncol(columns_fixed_ordered) == m_orig) { # all 
features examined
-                               X_global = append (X_global, 
X_orig[,column_best]);
-                               continue = FALSE;
-                       } else {
-                               X_global = append (X_global, 
X_orig[,column_best]);
-                       }
-               } else {
-                       continue = FALSE;
-               }
-       }
-       
-       # run linear regression with selected set of features
-       print ("Running linear regression with selected features...");
-       [AIC] = linear_regression (X_global, y, m_orig, columns_fixed_ordered, 
fileB); 
-       
-} else {
-       stop ("Currently only forward selection strategy is supported!");
-} 
-
-
-/*
-* Computes linear regression using a direct solver for (X^T X) beta = X^T y.
-* It also outputs the AIC of the computed model.  
-*/
-linear_regression = function (Matrix[Double] X, Matrix[Double] y, Double 
m_orig, Matrix[Double] Selected, String fileB) return (Double AIC) {
-               
-       intercept_status = ifdef ($icpt, 0);            
-       n = nrow (X);   
-       m = ncol (X);
-       
-       # Introduce the intercept, shift and rescale the columns of X if needed
-       if (intercept_status == 1 | intercept_status == 2) { # add the 
intercept column
-               ones_n = matrix (1, rows = n, cols = 1);
-               X = append (X, ones_n);
-               m = m - 1;
-       }
-       m_ext = ncol(X);
-       
-       if (intercept_status == 2) { # scale-&-shift X columns to mean 0, 
variance 1
-                                                            # Important 
assumption: X [, m_ext] = ones_n
-               avg_X_cols = t(colSums(X)) / n;
-               var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 
1);
-               is_unsafe = ppred (var_X_cols, 0.0, "<=");
-               scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
-               scale_X [m_ext, 1] = 1;
-               shift_X = - avg_X_cols * scale_X;
-               shift_X [m_ext, 1] = 0;
-       } else {
-               scale_X = matrix (1, rows = m_ext, cols = 1);
-               shift_X = matrix (0, rows = m_ext, cols = 1);
-       }
-
-       # BEGIN THE DIRECT SOLVE ALGORITHM (EXTERNAL CALL)
-
-       A = t(X) %*% X;
-       b = t(X) %*% y;
-       if (intercept_status == 2) {
-               A = t(diag (scale_X) %*% A + shift_X %*% A [m_ext, ]);
-               A =   diag (scale_X) %*% A + shift_X %*% A [m_ext, ];
-               b =   diag (scale_X) %*% b + shift_X %*% b [m_ext, ];
-       }
-
-       beta_unscaled = solve (A, b);
-       
-       # END THE DIRECT SOLVE ALGORITHM
-
-       if (intercept_status == 2) {
-               beta = scale_X * beta_unscaled;
-               beta [m_ext, ] = beta [m_ext, ] + t(shift_X) %*% beta_unscaled;
-       } else {
-               beta = beta_unscaled;
-       }
-       
-       # COMPUTE AIC
-       y_residual = y - X %*% beta;
-       ss_res = sum (y_residual ^ 2);
-       eq_deg_of_freedom = m_ext;
-       AIC = (2 * eq_deg_of_freedom) + n * log (ss_res / n);
-       
-       if (fileB != " ") {
-       
-               fileO = ifdef ($O, " ");
-               fileS = $S;
-               
-               print ("Computing the statistics...");
-               avg_tot = sum (y) / n;
-               ss_tot = sum (y ^ 2);
-               ss_avg_tot = ss_tot - n * avg_tot ^ 2;
-               var_tot = ss_avg_tot / (n - 1);
-#              y_residual = y - X %*% beta;
-               avg_res = sum (y_residual) / n;
-#              ss_res = sum (y_residual ^ 2);
-               ss_avg_res = ss_res - n * avg_res ^ 2;
-
-               plain_R2 = 1 - ss_res / ss_avg_tot;
-               if (n > m_ext) {
-                       dispersion  = ss_res / (n - m_ext);
-                       adjusted_R2 = 1 - dispersion / (ss_avg_tot / (n - 1));
-               } else {
-                       dispersion  = 0.0 / 0.0;
-                       adjusted_R2 = 0.0 / 0.0;
-               }
-
-               plain_R2_nobias = 1 - ss_avg_res / ss_avg_tot;
-               deg_freedom = n - m - 1;
-               if (deg_freedom > 0) {
-                       var_res = ss_avg_res / deg_freedom;
-                       adjusted_R2_nobias = 1 - var_res / (ss_avg_tot / (n - 
1));
-               } else {
-                       var_res = 0.0 / 0.0;
-                       adjusted_R2_nobias = 0.0 / 0.0;
-                       print ("Warning: zero or negative number of degrees of 
freedom.");
-               }
-
-               plain_R2_vs_0 = 1 - ss_res / ss_tot;
-               if (n > m) {
-                       adjusted_R2_vs_0 = 1 - (ss_res / (n - m)) / (ss_tot / 
n);
-               } else {
-                       adjusted_R2_vs_0 = 0.0 / 0.0;
-               }
-
-               str = "AVG_TOT_Y," + avg_tot;                                   
 #  Average of the response value Y
-               str = append (str, "STDEV_TOT_Y," + sqrt (var_tot));            
 #  Standard Deviation of the response value Y
-               str = append (str, "AVG_RES_Y," + avg_res);                     
 #  Average of the residual Y - pred(Y|X), i.e. residual bias
-               str = append (str, "STDEV_RES_Y," + sqrt (var_res));            
 #  Standard Deviation of the residual Y - pred(Y|X)
-               str = append (str, "DISPERSION," + dispersion);                 
 #  GLM-style dispersion, i.e. residual sum of squares / # d.f.
-               str = append (str, "PLAIN_R2," + plain_R2);                     
 #  Plain R^2 of residual with bias included vs. total average
-               str = append (str, "ADJUSTED_R2," + adjusted_R2);               
 #  Adjusted R^2 of residual with bias included vs. total average
-               str = append (str, "PLAIN_R2_NOBIAS," + plain_R2_nobias);       
 #  Plain R^2 of residual with bias subtracted vs. total average
-               str = append (str, "ADJUSTED_R2_NOBIAS," + adjusted_R2_nobias); 
 #  Adjusted R^2 of residual with bias subtracted vs. total average
-               if (intercept_status == 0) {
-                       str = append (str, "PLAIN_R2_VS_0," + plain_R2_vs_0);   
     #  Plain R^2 of residual with bias included vs. zero constant
-                       str = append (str, "ADJUSTED_R2_VS_0," + 
adjusted_R2_vs_0);  #  Adjusted R^2 of residual with bias included vs. zero 
constant
-               }
-
-               if (fileO != " ") {
-                       write (str, fileO);
-               } else {
-                       print (str);
-               }
-
-               # Prepare the output matrix
-               print ("Writing the output matrix...");
-               if (intercept_status == 2) {
-                       beta_out = append (beta, beta_unscaled);
-               } else {
-                       beta_out = beta;
-               }
-               
-               # Output which features give the best AIC and are being used 
for linear regression 
-               write (Selected, fileS, format=fmt);
-               
-               no_selected = ncol (Selected);
-               max_selected = max (Selected);
-               last = max_selected + 1;        
-               
-               if (intercept_status != 0) {
-               
-                       Selected_ext = append (Selected, as.matrix (last));     
                
-                       P1 = table (seq (1, ncol (Selected_ext)), 
t(Selected_ext)); 
-
-                       if (intercept_status == 2) {
-                       
-                               P1_beta = P1 * beta;
-                               P2_beta = colSums (P1_beta);
-                               P1_beta_unscaled = P1 * beta_unscaled;
-                               P2_beta_unscaled = colSums(P1_beta_unscaled);
-                               
-                               if (max_selected < m_orig) {
-                                       P2_beta = append (P2_beta, matrix (0, 
rows=1, cols=(m_orig - max_selected)));
-                                       P2_beta_unscaled = append 
(P2_beta_unscaled, matrix (0, rows=1, cols=(m_orig - max_selected)));
-                                       
-                                       P2_beta[1, m_orig+1] = P2_beta[1, 
max_selected + 1]; 
-                                       P2_beta[1, max_selected + 1] = 0;
-                               
-                                       P2_beta_unscaled[1, m_orig+1] = 
P2_beta_unscaled[1, max_selected + 1]; 
-                                       P2_beta_unscaled[1, max_selected + 1] = 
0;
-                               }
-                               beta_out = append (t(P2_beta), 
t(P2_beta_unscaled));
-                               
-                       } else {
-                       
-                               P1_beta = P1 * beta;
-                               P2_beta = colSums (P1_beta);
-                               
-                               if (max_selected < m_orig) {
-                                       P2_beta = append (P2_beta, matrix (0, 
rows=1, cols=(m_orig - max_selected)));
-                                       P2_beta[1, m_orig+1] = P2_beta[1, 
max_selected + 1] ; 
-                                       P2_beta[1, max_selected + 1] = 0;
-                               }
-                               beta_out = t(P2_beta);
-                               
-                       }
-               } else {
-               
-                       P1 = table (seq (1, no_selected), t(Selected)); 
-                       P1_beta = P1 * beta;
-                       P2_beta = colSums (P1_beta);    
-
-                       if (max_selected < m_orig) {
-                               P2_beta = append (P2_beta, matrix (0, rows=1, 
cols=(m_orig - max_selected)));
-                       }               
-
-                       beta_out = t(P2_beta);  
-               }
-               
-               write ( beta_out, fileB, format=fmt );          
-       }
-}
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# THIS SCRIPT CHOOSES A LINEAR MODEL IN A STEPWISE ALGIRITHM USING AIC
+# EACH LINEAR REGRESSION USES A DIRECT SOLVER FOR (X^T X) beta = X^T y
+#
+# INPUT PARAMETERS:
+# 
--------------------------------------------------------------------------------------------
+# NAME    TYPE    DEFAULT    MEANING
+# 
--------------------------------------------------------------------------------------------
+# X       String       ---      Location (on HDFS) to read the matrix X of 
feature vectors
+# Y       String       ---      Location (on HDFS) to read the 1-column matrix 
Y of response values
+# B       String       ---      Location to store estimated regression 
parameters (the betas)
+# S       String    ---      Location to write the selected features ordered 
as computed by the algorithm
+# O       String       " "      Location to write the printed statistics; by 
default is standard output
+# icpt    Int        0       Intercept presence, shifting and rescaling the 
columns of X:
+#                            0 = no intercept, no shifting, no rescaling;
+#                            1 = add intercept, but neither shift nor rescale 
X;
+#                            2 = add intercept, shift & rescale X columns to 
mean = 0, variance = 1
+# thr     Double    0.01     Threshold to stop the algorithm: if the decrease 
in the value of AIC falls below thr
+#                            no further features are being checked and the 
algorithm stops 
+# fmt     String   "text"    Matrix output format for B (the betas) only, 
usually "text" or "csv"
+# 
--------------------------------------------------------------------------------------------
+# OUTPUT: Matrix of regression parameters (the betas) and its size depend on 
icpt input value:
+#         OUTPUT SIZE:   OUTPUT CONTENTS:                HOW TO PREDICT Y FROM 
X AND B:
+# icpt=0: ncol(X)   x 1  Betas for X only                Y ~ X %*% 
B[1:ncol(X), 1], or just X %*% B
+# icpt=1: ncol(X)+1 x 1  Betas for X and intercept       Y ~ X %*% 
B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+# icpt=2: ncol(X)+1 x 2  Col.1: betas for X & intercept  Y ~ X %*% 
B[1:ncol(X), 1] + B[ncol(X)+1, 1]
+#                        Col.2: betas for shifted/rescaled X and intercept
+#
+# In addition, in the last run of linear regression some statistics are 
provided in CSV format, one comma-separated
+# name-value pair per each line, as follows:
+#
+# NAME                  MEANING
+# 
-------------------------------------------------------------------------------------
+# AVG_TOT_Y             Average of the response value Y
+# STDEV_TOT_Y           Standard Deviation of the response value Y
+# AVG_RES_Y             Average of the residual Y - pred(Y|X), i.e. residual 
bias
+# STDEV_RES_Y           Standard Deviation of the residual Y - pred(Y|X)
+# DISPERSION            GLM-style dispersion, i.e. residual sum of squares / # 
deg. fr.
+# PLAIN_R2              Plain R^2 of residual with bias included vs. total 
average
+# ADJUSTED_R2           Adjusted R^2 of residual with bias included vs. total 
average
+# PLAIN_R2_NOBIAS       Plain R^2 of residual with bias subtracted vs. total 
average
+# ADJUSTED_R2_NOBIAS    Adjusted R^2 of residual with bias subtracted vs. 
total average
+# PLAIN_R2_VS_0         * Plain R^2 of residual with bias included vs. zero 
constant
+# ADJUSTED_R2_VS_0      * Adjusted R^2 of residual with bias included vs. zero 
constant
+# 
-------------------------------------------------------------------------------------
+# * The last two statistics are only printed if there is no intercept (icpt=0)
+# If the best AIC is achieved without any features the matrix of selected 
features contains 0.  
+# Moreover, in this case no further statistics will be produced  
+#
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f StepLinearRegDS.dml -nvargs X=INPUT_DIR/X 
Y=INPUT_DIR/Y B=OUTPUT_DIR/betas
+#     O=OUTPUT_DIR/stats S=OUTPUT_DIR/selected icpt=2 thr=0.01 fmt=csv
+
+fileX = $X;
+fileY = $Y;
+fileB = $B;
+fileS = $S;
+
+# currently only the forward selection strategy in supported: start from one 
feature and iteratively add 
+# features until AIC improves
+dir = "forward";
+
+fmt  = ifdef ($fmt, "text");
+intercept_status = ifdef ($icpt, 0);   
+thr = ifdef ($thr, 0.01);  
+
+print ("BEGIN STEPWISE LINEAR REGRESSION SCRIPT");
+print ("Reading X and Y...");
+X_orig = read (fileX);
+y = read (fileY);
+
+n = nrow (X_orig);
+m_orig = ncol (X_orig);
+
+# BEGIN STEPWISE LINEAR REGRESSION
+
+if (dir == "forward") {  
+       
+       continue = TRUE;
+       columns_fixed = matrix (0, rows = 1, cols = m_orig);
+       columns_fixed_ordered = matrix (0, rows = 1, cols = 1);
+       
+       # X_global stores the best model found at each step 
+       X_global = matrix (0, rows = n, cols = 1);
+       
+       if (intercept_status == 1 | intercept_status == 2) {
+               beta = mean (y);
+               AIC_best = 2 + n * log(sum((beta - y)^2) / n);
+       } else {
+               beta = 0;
+               AIC_best = n * log(sum(y^2) / n);
+       }
+       AICs = matrix (AIC_best, rows = 1, cols = m_orig);
+       print ("Best AIC without any features: " + AIC_best);
+       
+       # First pass to examine single features
+       parfor (i in 1:m_orig) {        
+               [AIC_1] = linear_regression (X_orig[,i], y, m_orig, 
columns_fixed_ordered, " ");                                        
+               AICs[1,i] = AIC_1;
+       }
+
+       # Determine the best AIC 
+       column_best = 0;        
+       for (k in 1:m_orig) {
+               AIC_cur = as.scalar (AICs[1,k]);
+               if ( (AIC_cur < AIC_best) & ((AIC_best - AIC_cur) > abs (thr * 
AIC_best)) ) {
+                       column_best = k;
+                       AIC_best = as.scalar(AICs[1,k]);
+               }
+       }
+       
+       if (column_best == 0) {
+               print ("AIC of an empty model is " + AIC_best + " and adding no 
feature achieves more than " + (thr * 100) + "% decrease in AIC!");
+               S = matrix (0, rows=1, cols=1);
+               if (intercept_status == 0) {
+                       B = matrix (beta, rows = m_orig, cols = 1);
+               } else {
+                       B_tmp = matrix (0, rows = m_orig + 1, cols = 1);
+                       B_tmp[m_orig + 1,] = beta;
+                       B = B_tmp;
+               }
+               write (S, fileS, format=fmt);
+               write (B, fileB, format=fmt);
+               stop ("");
+       }
+       print ("Best AIC " + AIC_best + " achieved with feature: " + 
column_best);      
+       columns_fixed[1,column_best] = 1;
+       columns_fixed_ordered[1,1] = column_best;
+       X_global = X_orig[,column_best];                
+               
+       while (continue) {
+               # Subsequent passes over the features
+               parfor (i in 1:m_orig) { 
+                       if (as.scalar(columns_fixed[1,i]) == 0) {       
+                       
+                               # Construct the feature matrix
+                               X = append (X_global, X_orig[,i]);
+                               
+                               [AIC_2] = linear_regression (X, y, m_orig, 
columns_fixed_ordered, " ");
+                               AICs[1,i] = AIC_2;
+                       }       
+               }
+       
+               # Determine the best AIC
+               for (k in 1:m_orig) {
+                       AIC_cur = as.scalar (AICs[1,k]);
+                       if ( (AIC_cur < AIC_best) & ((AIC_best - AIC_cur) > abs 
(thr * AIC_best)) & (as.scalar(columns_fixed[1,k]) == 0) ) {
+                               column_best = k;
+                               AIC_best = as.scalar(AICs[1,k]);
+                       }
+               }
+                               
+               # Append best found features (i.e., columns) to X_global
+               if (as.scalar(columns_fixed[1,column_best]) == 0) { # new best 
feature found
+                       print ("Best AIC " + AIC_best + " achieved with 
feature: " + column_best);
+                       columns_fixed[1,column_best] = 1;
+                       columns_fixed_ordered = append (columns_fixed_ordered, 
as.matrix(column_best));
+                       if (ncol(columns_fixed_ordered) == m_orig) { # all 
features examined
+                               X_global = append (X_global, 
X_orig[,column_best]);
+                               continue = FALSE;
+                       } else {
+                               X_global = append (X_global, 
X_orig[,column_best]);
+                       }
+               } else {
+                       continue = FALSE;
+               }
+       }
+       
+       # run linear regression with selected set of features
+       print ("Running linear regression with selected features...");
+       [AIC] = linear_regression (X_global, y, m_orig, columns_fixed_ordered, 
fileB); 
+       
+} else {
+       stop ("Currently only forward selection strategy is supported!");
+} 
+
+
+/*
+* Computes linear regression using a direct solver for (X^T X) beta = X^T y.
+* It also outputs the AIC of the computed model.  
+*/
+linear_regression = function (Matrix[Double] X, Matrix[Double] y, Double 
m_orig, Matrix[Double] Selected, String fileB) return (Double AIC) {
+               
+       intercept_status = ifdef ($icpt, 0);            
+       n = nrow (X);   
+       m = ncol (X);
+       
+       # Introduce the intercept, shift and rescale the columns of X if needed
+       if (intercept_status == 1 | intercept_status == 2) { # add the 
intercept column
+               ones_n = matrix (1, rows = n, cols = 1);
+               X = append (X, ones_n);
+               m = m - 1;
+       }
+       m_ext = ncol(X);
+       
+       if (intercept_status == 2) { # scale-&-shift X columns to mean 0, 
variance 1
+                                                            # Important 
assumption: X [, m_ext] = ones_n
+               avg_X_cols = t(colSums(X)) / n;
+               var_X_cols = (t(colSums (X ^ 2)) - n * (avg_X_cols ^ 2)) / (n - 
1);
+               is_unsafe = ppred (var_X_cols, 0.0, "<=");
+               scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
+               scale_X [m_ext, 1] = 1;
+               shift_X = - avg_X_cols * scale_X;
+               shift_X [m_ext, 1] = 0;
+       } else {
+               scale_X = matrix (1, rows = m_ext, cols = 1);
+               shift_X = matrix (0, rows = m_ext, cols = 1);
+       }
+
+       # BEGIN THE DIRECT SOLVE ALGORITHM (EXTERNAL CALL)
+
+       A = t(X) %*% X;
+       b = t(X) %*% y;
+       if (intercept_status == 2) {
+               A = t(diag (scale_X) %*% A + shift_X %*% A [m_ext, ]);
+               A =   diag (scale_X) %*% A + shift_X %*% A [m_ext, ];
+               b =   diag (scale_X) %*% b + shift_X %*% b [m_ext, ];
+       }
+
+       beta_unscaled = solve (A, b);
+       
+       # END THE DIRECT SOLVE ALGORITHM
+
+       if (intercept_status == 2) {
+               beta = scale_X * beta_unscaled;
+               beta [m_ext, ] = beta [m_ext, ] + t(shift_X) %*% beta_unscaled;
+       } else {
+               beta = beta_unscaled;
+       }
+       
+       # COMPUTE AIC
+       y_residual = y - X %*% beta;
+       ss_res = sum (y_residual ^ 2);
+       eq_deg_of_freedom = m_ext;
+       AIC = (2 * eq_deg_of_freedom) + n * log (ss_res / n);
+       
+       if (fileB != " ") {
+       
+               fileO = ifdef ($O, " ");
+               fileS = $S;
+               
+               print ("Computing the statistics...");
+               avg_tot = sum (y) / n;
+               ss_tot = sum (y ^ 2);
+               ss_avg_tot = ss_tot - n * avg_tot ^ 2;
+               var_tot = ss_avg_tot / (n - 1);
+#              y_residual = y - X %*% beta;
+               avg_res = sum (y_residual) / n;
+#              ss_res = sum (y_residual ^ 2);
+               ss_avg_res = ss_res - n * avg_res ^ 2;
+
+               plain_R2 = 1 - ss_res / ss_avg_tot;
+               if (n > m_ext) {
+                       dispersion  = ss_res / (n - m_ext);
+                       adjusted_R2 = 1 - dispersion / (ss_avg_tot / (n - 1));
+               } else {
+                       dispersion  = 0.0 / 0.0;
+                       adjusted_R2 = 0.0 / 0.0;
+               }
+
+               plain_R2_nobias = 1 - ss_avg_res / ss_avg_tot;
+               deg_freedom = n - m - 1;
+               if (deg_freedom > 0) {
+                       var_res = ss_avg_res / deg_freedom;
+                       adjusted_R2_nobias = 1 - var_res / (ss_avg_tot / (n - 
1));
+               } else {
+                       var_res = 0.0 / 0.0;
+                       adjusted_R2_nobias = 0.0 / 0.0;
+                       print ("Warning: zero or negative number of degrees of 
freedom.");
+               }
+
+               plain_R2_vs_0 = 1 - ss_res / ss_tot;
+               if (n > m) {
+                       adjusted_R2_vs_0 = 1 - (ss_res / (n - m)) / (ss_tot / 
n);
+               } else {
+                       adjusted_R2_vs_0 = 0.0 / 0.0;
+               }
+
+               str = "AVG_TOT_Y," + avg_tot;                                   
 #  Average of the response value Y
+               str = append (str, "STDEV_TOT_Y," + sqrt (var_tot));            
 #  Standard Deviation of the response value Y
+               str = append (str, "AVG_RES_Y," + avg_res);                     
 #  Average of the residual Y - pred(Y|X), i.e. residual bias
+               str = append (str, "STDEV_RES_Y," + sqrt (var_res));            
 #  Standard Deviation of the residual Y - pred(Y|X)
+               str = append (str, "DISPERSION," + dispersion);                 
 #  GLM-style dispersion, i.e. residual sum of squares / # d.f.
+               str = append (str, "PLAIN_R2," + plain_R2);                     
 #  Plain R^2 of residual with bias included vs. total average
+               str = append (str, "ADJUSTED_R2," + adjusted_R2);               
 #  Adjusted R^2 of residual with bias included vs. total average
+               str = append (str, "PLAIN_R2_NOBIAS," + plain_R2_nobias);       
 #  Plain R^2 of residual with bias subtracted vs. total average
+               str = append (str, "ADJUSTED_R2_NOBIAS," + adjusted_R2_nobias); 
 #  Adjusted R^2 of residual with bias subtracted vs. total average
+               if (intercept_status == 0) {
+                       str = append (str, "PLAIN_R2_VS_0," + plain_R2_vs_0);   
     #  Plain R^2 of residual with bias included vs. zero constant
+                       str = append (str, "ADJUSTED_R2_VS_0," + 
adjusted_R2_vs_0);  #  Adjusted R^2 of residual with bias included vs. zero 
constant
+               }
+
+               if (fileO != " ") {
+                       write (str, fileO);
+               } else {
+                       print (str);
+               }
+
+               # Prepare the output matrix
+               print ("Writing the output matrix...");
+               if (intercept_status == 2) {
+                       beta_out = append (beta, beta_unscaled);
+               } else {
+                       beta_out = beta;
+               }
+               
+               # Output which features give the best AIC and are being used 
for linear regression 
+               write (Selected, fileS, format=fmt);
+               
+               no_selected = ncol (Selected);
+               max_selected = max (Selected);
+               last = max_selected + 1;        
+               
+               if (intercept_status != 0) {
+               
+                       Selected_ext = append (Selected, as.matrix (last));     
                
+                       P1 = table (seq (1, ncol (Selected_ext)), 
t(Selected_ext)); 
+
+                       if (intercept_status == 2) {
+                       
+                               P1_beta = P1 * beta;
+                               P2_beta = colSums (P1_beta);
+                               P1_beta_unscaled = P1 * beta_unscaled;
+                               P2_beta_unscaled = colSums(P1_beta_unscaled);
+                               
+                               if (max_selected < m_orig) {
+                                       P2_beta = append (P2_beta, matrix (0, 
rows=1, cols=(m_orig - max_selected)));
+                                       P2_beta_unscaled = append 
(P2_beta_unscaled, matrix (0, rows=1, cols=(m_orig - max_selected)));
+                                       
+                                       P2_beta[1, m_orig+1] = P2_beta[1, 
max_selected + 1]; 
+                                       P2_beta[1, max_selected + 1] = 0;
+                               
+                                       P2_beta_unscaled[1, m_orig+1] = 
P2_beta_unscaled[1, max_selected + 1]; 
+                                       P2_beta_unscaled[1, max_selected + 1] = 
0;
+                               }
+                               beta_out = append (t(P2_beta), 
t(P2_beta_unscaled));
+                               
+                       } else {
+                       
+                               P1_beta = P1 * beta;
+                               P2_beta = colSums (P1_beta);
+                               
+                               if (max_selected < m_orig) {
+                                       P2_beta = append (P2_beta, matrix (0, 
rows=1, cols=(m_orig - max_selected)));
+                                       P2_beta[1, m_orig+1] = P2_beta[1, 
max_selected + 1] ; 
+                                       P2_beta[1, max_selected + 1] = 0;
+                               }
+                               beta_out = t(P2_beta);
+                               
+                       }
+               } else {
+               
+                       P1 = table (seq (1, no_selected), t(Selected)); 
+                       P1_beta = P1 * beta;
+                       P2_beta = colSums (P1_beta);    
+
+                       if (max_selected < m_orig) {
+                               P2_beta = append (P2_beta, matrix (0, rows=1, 
cols=(m_orig - max_selected)));
+                       }               
+
+                       beta_out = t(P2_beta);  
+               }
+               
+               write ( beta_out, fileB, format=fmt );          
+       }
+}
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/algorithms/Univar-Stats.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Univar-Stats.dml 
b/scripts/algorithms/Univar-Stats.dml
index abb3fea..62d6a28 100644
--- a/scripts/algorithms/Univar-Stats.dml
+++ b/scripts/algorithms/Univar-Stats.dml
@@ -1,150 +1,150 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# DML Script to compute univariate statistics for all attributes 
-# in a given data set
-#
-# Three inputs:
-#     $1) X - input data
-#     $2) TYPES - row matrix that denotes the "kind"/"type" of all attributes
-#             kind=1 for scale, 
-#             kind=2 for nominal,
-#             kind=3 for ordinal
-#
-# One output:
-#     $STATS) output directory in which following three statistics 
-#         files are created
-#         + base.stats - matrix with all 17 statistics (14 scale, 
-#         3 categorical) computed for all attributes
-#         + categorical.counts - matrix in which each column 
-#         gives the category-wise counts for all categories in 
-#         that attribute
-#
-#
-
-A = read($X); # data file
-K = read($TYPES); # attribute kind file
-
-# number of features/attributes
-n = ncol(A);
-
-# number of data records
-m = nrow(A);
-
-# number of statistics
-numBaseStats = 17; # (14 scale stats, 3 categorical stats)
-
-max_kind = max(K);
-
-# matrices to store computed statistics
-baseStats = matrix(0, rows=numBaseStats, cols=n);
-
-# Compute max domain size among all categorical attributes
-maxs = colMaxs(A);
-maxDomainSize = max( ppred(K, 1, ">") * maxs );
-maxDomain = as.integer(maxDomainSize);
-
-
-parfor(i in 1:n, check=0) {
-
-       # project out the i^th column
-       F = A[,i];
-
-       kind = castAsScalar(K[1,i]);
-
-       if ( kind == 1 ) {
-               #print("[" + i + "] Scale");
-               # compute SCALE statistics on the projected column
-               minimum = min(F);
-               maximum = max(F);
-               rng = maximum - minimum;
-
-               mu = mean(F);
-               m2 = moment(F, 2);
-               m3 = moment(F, 3);
-               m4 = moment(F, 4);
-
-               var = m/(m-1.0)*m2;
-               std_dev = sqrt(var);
-               se = std_dev/sqrt(m);
-               cv = std_dev/mu;
-
-               g1 = m3/(std_dev^3);
-               g2 = m4/(std_dev^4) - 3;
-               #se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
-               se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
-
-               #se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
-               se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
-
-               md = median(F); #quantile(F, 0.5);
-               iqm = interQuartileMean(F);
-
-               # place the computed statistics in output matrices
-               baseStats[1,i] = minimum;
-               baseStats[2,i] = maximum;
-               baseStats[3,i] = rng;
-
-               baseStats[4,i] = mu;
-               baseStats[5,i] = var;
-               baseStats[6,i] = std_dev;
-               baseStats[7,i] = se;
-               baseStats[8,i] = cv;
-
-               baseStats[9,i] = g1;
-               baseStats[10,i] = g2;
-               baseStats[11,i] = se_g1;
-               baseStats[12,i] = se_g2;
-
-               baseStats[13,i] = md;
-               baseStats[14,i] = iqm;
-       }
-       else {
-               if (kind == 2 | kind == 3) {
-                       #print("[" + i + "] Categorical");
-                       
-                       # check if the categorical column has valid values
-                       minF = min(F);
-                       if (minF <=0) {
-                               print("ERROR: Categorical attributes can only 
take values starting from 1. Encountered a value " + minF + " in attribute " + 
i);
-                       }
-                       else {
-                               # compute CATEGORICAL statistics on the 
projected column
-                               num_cat = max(F); # number of categories
-                               cat_counts = table(F,1, maxDomain, 1);  # 
counts for each category
-
-                               mode = rowIndexMax(t(cat_counts));
-                               mx = max(cat_counts)
-                               modeArr =  ppred(cat_counts, mx, "==")
-                               numModes = sum(modeArr);
-
-                               # place the computed statistics in output 
matrices
-                               baseStats[15,i] = num_cat;
-                               baseStats[16,i] = mode;
-                               baseStats[17,i] = numModes;
-                       }
-               }
-       }
-}
-
-write(baseStats, $STATS);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# DML Script to compute univariate statistics for all attributes 
+# in a given data set
+#
+# Three inputs:
+#     $1) X - input data
+#     $2) TYPES - row matrix that denotes the "kind"/"type" of all attributes
+#             kind=1 for scale, 
+#             kind=2 for nominal,
+#             kind=3 for ordinal
+#
+# One output:
+#     $STATS) output directory in which following three statistics 
+#         files are created
+#         + base.stats - matrix with all 17 statistics (14 scale, 
+#         3 categorical) computed for all attributes
+#         + categorical.counts - matrix in which each column 
+#         gives the category-wise counts for all categories in 
+#         that attribute
+#
+#
+
+A = read($X); # data file
+K = read($TYPES); # attribute kind file
+
+# number of features/attributes
+n = ncol(A);
+
+# number of data records
+m = nrow(A);
+
+# number of statistics
+numBaseStats = 17; # (14 scale stats, 3 categorical stats)
+
+max_kind = max(K);
+
+# matrices to store computed statistics
+baseStats = matrix(0, rows=numBaseStats, cols=n);
+
+# Compute max domain size among all categorical attributes
+maxs = colMaxs(A);
+maxDomainSize = max( ppred(K, 1, ">") * maxs );
+maxDomain = as.integer(maxDomainSize);
+
+
+parfor(i in 1:n, check=0) {
+
+       # project out the i^th column
+       F = A[,i];
+
+       kind = castAsScalar(K[1,i]);
+
+       if ( kind == 1 ) {
+               #print("[" + i + "] Scale");
+               # compute SCALE statistics on the projected column
+               minimum = min(F);
+               maximum = max(F);
+               rng = maximum - minimum;
+
+               mu = mean(F);
+               m2 = moment(F, 2);
+               m3 = moment(F, 3);
+               m4 = moment(F, 4);
+
+               var = m/(m-1.0)*m2;
+               std_dev = sqrt(var);
+               se = std_dev/sqrt(m);
+               cv = std_dev/mu;
+
+               g1 = m3/(std_dev^3);
+               g2 = m4/(std_dev^4) - 3;
+               #se_g1=sqrt( 6*m*(m-1.0) / ((m-2.0)*(m+1.0)*(m+3.0)) ); 
+               se_g1=sqrt( (6/(m-2.0)) * (m/(m+1.0)) * ((m-1.0)/(m+3.0)) ); 
+
+               #se_g2= sqrt( (4*(m^2-1)*se_g1^2)/((m+5.0)*(m-3.0)) );  
+               se_g2=sqrt( (4/(m+5.0)) * ((m^2-1)/(m-3.0)) * se_g1^2 ); 
+
+               md = median(F); #quantile(F, 0.5);
+               iqm = interQuartileMean(F);
+
+               # place the computed statistics in output matrices
+               baseStats[1,i] = minimum;
+               baseStats[2,i] = maximum;
+               baseStats[3,i] = rng;
+
+               baseStats[4,i] = mu;
+               baseStats[5,i] = var;
+               baseStats[6,i] = std_dev;
+               baseStats[7,i] = se;
+               baseStats[8,i] = cv;
+
+               baseStats[9,i] = g1;
+               baseStats[10,i] = g2;
+               baseStats[11,i] = se_g1;
+               baseStats[12,i] = se_g2;
+
+               baseStats[13,i] = md;
+               baseStats[14,i] = iqm;
+       }
+       else {
+               if (kind == 2 | kind == 3) {
+                       #print("[" + i + "] Categorical");
+                       
+                       # check if the categorical column has valid values
+                       minF = min(F);
+                       if (minF <=0) {
+                               print("ERROR: Categorical attributes can only 
take values starting from 1. Encountered a value " + minF + " in attribute " + 
i);
+                       }
+                       else {
+                               # compute CATEGORICAL statistics on the 
projected column
+                               num_cat = max(F); # number of categories
+                               cat_counts = table(F,1, maxDomain, 1);  # 
counts for each category
+
+                               mode = rowIndexMax(t(cat_counts));
+                               mx = max(cat_counts)
+                               modeArr =  ppred(cat_counts, mx, "==")
+                               numModes = sum(modeArr);
+
+                               # place the computed statistics in output 
matrices
+                               baseStats[15,i] = num_cat;
+                               baseStats[16,i] = mode;
+                               baseStats[17,i] = numModes;
+                       }
+               }
+       }
+}
+
+write(baseStats, $STATS);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/algorithms/bivar-stats.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/bivar-stats.dml 
b/scripts/algorithms/bivar-stats.dml
index 4846f56..99549dc 100644
--- a/scripts/algorithms/bivar-stats.dml
+++ b/scripts/algorithms/bivar-stats.dml
@@ -1,398 +1,398 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-#
-# For a given pair of attribute sets, compute bivariate statistics between all 
attribute pairs 
-#   Given, index1 = {A_11, A_12, ... A_1m} and index2 = {A_21, A_22, ... A_2n} 
-#          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and 
(1<= j <=n)
-#
-# Six inputs:  
-#    1) X  - input data
-#    2) index1 - First attribute set {A_11, A_12, ... A_1m}
-#    3) index2 - Second attribute set {A_21, A_22, ... A_2n}
-#    4) types1 - kind for attributes in S1 
-#    5) types2 - kind for attributes in S2
-#             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
-# 
-# One output:    
-#    6) output directory in which following (maximum of) four statistics files 
are created
-#        + bivar.scale.scale.stats - matrix containing scale-scale correlations
-#        + bivar.nominal.nominal.stats - 
-#        + bivar.nominal.scale.stats - 
-#        + bivar.ordinal.ordinal.stats - 
-#
-# hadoop jar SystemML.jar -f bivar-stats.dml -nvargs X=<Data>
-#                                                    index1=<Feature Index Set 
1>
-#                                                    index2=<Feature Index Set 
2>
-#                                                    types1=<Feature Types 1>
-#                                                    types2=<Feature Types 2>
-#                                                    OUTDIR=<Output Location>
-
-D = read($X);  # input data set
-S1 = read($index1); # attribute set 1
-S2 = read($index2); # attribute set 2
-K1 = read($types1); # kind for attributes in S1
-K2 = read($types2); # kind for attributes in S2
-
-s1size = ncol(S1);
-s2size = ncol(S2);
-numPairs = s1size * s2size;
-
-#test: 1 is Pearson'R, 2 is F-test, 3 is chi-squared, 4 is Spearman'sRho
-# R, (chisq, df, pval, cramersv,) spearman, eta, anovaf, feature_col_index1, 
feature_col_index2, test
-
-num_scale_scale_tests = 0
-num_nominal_nominal_tests = 0
-num_ordinal_ordinal_tests = 0
-num_nominal_scale_tests = 0
-
-pair2row = matrix(0, rows=numPairs, cols=2)
-for( i in 1:s1size, check=0) {
-    pre_a1 = castAsScalar(S1[1,i]);
-    pre_k1 = castAsScalar(K1[1,i]);
-
-    for( j in 1:s2size, check=0) {
-        pre_pairID = (i-1)*s2size+j; 
-        pre_a2 = castAsScalar(S2[1,j]);
-        pre_k2 = castAsScalar(K2[1,j]);
-       
-       if (pre_k1 == pre_k2) {
-            if (pre_k1 == 1) {
-                       num_scale_scale_tests = num_scale_scale_tests + 1
-                               pair2row[pre_pairID,1] = num_scale_scale_tests
-            } else {
-                       num_nominal_nominal_tests = num_nominal_nominal_tests + 
1
-                               pair2row[pre_pairID,1] = 
num_nominal_nominal_tests
-               
-                if ( pre_k1 == 3 ) {
-                               num_ordinal_ordinal_tests = 
num_ordinal_ordinal_tests + 1
-                               pair2row[pre_pairID, 2] = 
num_ordinal_ordinal_tests
-                }
-            }
-        }
-        else {
-            if (pre_k1 == 1 | pre_k2 == 1) {
-                       num_nominal_scale_tests = num_nominal_scale_tests + 1
-                               pair2row[pre_pairID,1] = num_nominal_scale_tests
-            } else {
-                       num_nominal_nominal_tests = num_nominal_nominal_tests + 
1
-                               pair2row[pre_pairID,1] = 
num_nominal_nominal_tests 
-            }
-               }
-    }
-}
-
-size_scale_scale_tests     = max(num_scale_scale_tests, 1);
-size_nominal_nominal_tests = max(num_nominal_nominal_tests, 1)
-size_ordinal_ordinal_tests = max(num_ordinal_ordinal_tests, 1);
-size_nominal_scale_tests   = max(num_nominal_scale_tests, 1);
-
-basestats                 = matrix(0, rows=11, cols=numPairs);
-basestats_scale_scale     = matrix(0, rows=6, cols=size_scale_scale_tests)
-basestats_nominal_nominal = matrix(0, rows=6, cols=size_nominal_nominal_tests)
-basestats_ordinal_ordinal = matrix(0, rows=3, cols=size_ordinal_ordinal_tests)
-basestats_nominal_scale   = matrix(0, rows=11, cols=size_nominal_scale_tests)
-
-
-# Compute max domain size among all categorical attributes
-# and check if these cols have been recoded
-
-debug_str = "Stopping execution of DML script due to invalid input";
-
-error_flag = FALSE;
-
-maxs = colMaxs(D);
-mins = colMins(D)
-maxDomainSize = -1.0;
-for(k in 1:ncol(K1) ) {
-  type = as.scalar(K1[1,k]);
-  
-  if ( type > 1) {
-    colID = as.scalar(S1[1,k]);
-    
-    colMaximum = as.scalar(maxs[1,colID]);
-    if(maxDomainSize < colMaximum) maxDomainSize = colMaximum;
-  
-       colMinimum = as.scalar(mins[1,colID]);
-       if(colMinimum < 1){
-         if(type == 2)
-           debug_str = append(debug_str, "Column " + colID + " was declared as 
nominal but its minimum value is " + colMinimum)
-         else
-           debug_str = append(debug_str, "Column " + colID + " was declared as 
ordinal but its minimum value is " + colMinimum)
-         error_flag = TRUE;
-       }
-  }
-}
-
-for(k in 1:ncol(K2) ) {
-  type = as.scalar(K2[1,k]);
-  
-  if ( type > 1) {
-    colID = as.scalar(S2[1,k]);
-    
-    colMaximum = as.scalar(maxs[1,colID]);
-    if(maxDomainSize < colMaximum) maxDomainSize = colMaximum;
-  
-       colMinimum = as.scalar(mins[1,colID]);
-       if(colMinimum < 1){
-         if(type == 2)
-           debug_str = append(debug_str, "Column " + colID + " was declared as 
nominal but its minimum value is " + colMinimum)
-         else 
-               debug_str = append(debug_str, "Column " + colID + " was 
declared as ordinal but its minimum value is " + colMinimum)
-         error_flag = TRUE;
-       }
-  }
-}
-maxDomain = as.integer(maxDomainSize);
-
-if(error_flag) stop(debug_str);
-
-parfor( i in 1:s1size, check=0) {
-    a1 = castAsScalar(S1[1,i]);
-    k1 = castAsScalar(K1[1,i]);
-    A1 = D[,a1];
-
-    parfor( j in 1:s2size, check=0) {
-        pairID = (i-1)*s2size+j; 
-        a2 = castAsScalar(S2[1,j]);
-        k2 = castAsScalar(K2[1,j]);
-        A2 = D[,a2];
-
-               rowid1 = castAsScalar(pair2row[pairID, 1])
-       rowid2 = castAsScalar(pair2row[pairID, 2])
-
-        if (k1 == k2) {
-            if (k1 == 1) {
-                # scale-scale
-                print("[" + i + "," + j + "] scale-scale");
-                [r, cov, sigma1, sigma2] = bivar_ss(A1,A2);   
-               
-                       basestats_scale_scale[1,rowid1] = a1;
-                               basestats_scale_scale[2,rowid1] = a2;   
-                basestats_scale_scale[3,rowid1] = r;
-                basestats_scale_scale[4,rowid1] = cov;
-                basestats_scale_scale[5,rowid1] = sigma1;
-                basestats_scale_scale[6,rowid1] = sigma2;
-            } else {
-                # nominal-nominal or ordinal-ordinal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1, A2, maxDomain);
-
-                basestats_nominal_nominal[1,rowid1] = a1;
-                               basestats_nominal_nominal[2,rowid1] = a2;       
-                basestats_nominal_nominal[3,rowid1] = chisq;
-                basestats_nominal_nominal[4,rowid1] = df;
-                basestats_nominal_nominal[5,rowid1] = pval;
-                basestats_nominal_nominal[6,rowid1] = cramersv;
-
-                if ( k1 == 3 ) {
-                    # ordinal-ordinal
-                    print("[" + i + "," + j + "] ordinal-ordinal");
-                    sp = bivar_oo(A1, A2, maxDomain);
-
-                    basestats_ordinal_ordinal[1,rowid2] = a1;
-                    basestats_ordinal_ordinal[2,rowid2] = a2;
-                    basestats_ordinal_ordinal[3,rowid2] = sp;
-                }
-            }
-        } else {
-            if (k1 == 1 | k2 == 1) {
-                # Scale-nominal/ordinal     
-                print("[" + i + "," + j + "] scale-categorical");
-                
-                       if ( k1 == 1 ) {
-                       [eta, f, pval, bw_ss, within_ss, bw_df, within_df, 
bw_mean_square, within_mean_square] = bivar_sc(A1, A2, maxDomain);
-                } else {
-                    [eta, f, pval, bw_ss, within_ss, bw_df, within_df, 
bw_mean_square, within_mean_square] = bivar_sc(A2, A1, maxDomain);
-                }
-               
-                basestats_nominal_scale[1,rowid1] = a1;
-                basestats_nominal_scale[2,rowid1] = a2;
-                basestats_nominal_scale[3,rowid1] = eta;
-                basestats_nominal_scale[4,rowid1] = f;
-                basestats_nominal_scale[5,rowid1] = pval;
-                basestats_nominal_scale[6,rowid1] = bw_ss;
-                basestats_nominal_scale[7,rowid1] = within_ss;
-                basestats_nominal_scale[8,rowid1] = bw_df;
-                basestats_nominal_scale[9,rowid1] = within_df;
-                basestats_nominal_scale[10,rowid1] = bw_mean_square;
-                basestats_nominal_scale[11,rowid1] = within_mean_square;
-            } else {
-                # nominal-ordinal or ordinal-nominal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1, A2, maxDomain);
-
-                               basestats_nominal_nominal[1,rowid1] = a1;
-                               basestats_nominal_nominal[2,rowid1] = a2;
-                basestats_nominal_nominal[3,rowid1] = chisq;
-                basestats_nominal_nominal[4,rowid1] = df;
-                basestats_nominal_nominal[5,rowid1] = pval;
-                basestats_nominal_nominal[6,rowid1] = cramersv;
-            }
-        }
-    }
-}
-
-if(num_scale_scale_tests == size_scale_scale_tests){
-  write(basestats_scale_scale, $OUTDIR + "/bivar.scale.scale.stats");
-}
-
-if(num_nominal_scale_tests == size_nominal_scale_tests){
-  write(basestats_nominal_scale, $OUTDIR + "/bivar.nominal.scale.stats");
-}
-
-if(num_nominal_nominal_tests == size_nominal_nominal_tests){
-  write(basestats_nominal_nominal, $OUTDIR + "/bivar.nominal.nominal.stats");
-}
-
-if(num_ordinal_ordinal_tests == size_ordinal_ordinal_tests){
-  write(basestats_ordinal_ordinal, $OUTDIR + "/bivar.ordinal.ordinal.stats");
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B, Double maxDomain) 
return (Double chisq, Double df, Double pval, Double cramersv) {
-
-    # Contingency Table
-    F = table(A, B, maxDomain, maxDomain);
-    F = F[1:max(A), 1:max(B)];
-
-    # Chi-Squared
-    W = sum(F);
-    r = rowSums(F);
-    c = colSums(F);
-    E = (r %*% c)/W;
-    T = (F-E)^2/E;
-    chi_squared = sum(T);
-
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1);
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
-    # Cramer's V
-    R = nrow(F);
-    C = ncol(F);
-    q = min(R,C);
-    cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-    # Assign return values
-    chisq = chi_squared;
-    df = as.double(degFreedom);
-    pval = pValue;
-    cramersv = cramers_v;
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R, 
Double covXY, Double sigmaX, Double sigmaY) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y);
-
-    # compute standard deviations for both X and Y by computing 2^nd central 
moment
-    W = nrow(X);
-    m2X = moment(X,2);
-    m2Y = moment(Y,2);
-    sigmaX = sqrt(m2X * (W/(W-1.0)) );
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY);
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A, Double maxDomain) 
-                  return (Double Eta, Double AnovaF, Double pval, Double 
bw_ss, Double within_ss, Double bw_df, Double within_df, Double bw_mean_square, 
Double within_mean_square) {
-
-    # mean and variance in target variable
-    W = nrow(A);
-    my = mean(Y);
-    varY = moment(Y,2) * W/(W-1.0)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs = aggregate(target=Y, groups=A, fn="count", ngroups=maxDomain); 
-    CMeans = aggregate(target=Y, groups=A, fn="mean", ngroups=maxDomain);
-    CVars =  aggregate(target=Y, groups=A, fn="variance", ngroups=maxDomain);
-    
-    # number of categories
-    R = nrow(CFreqs);
-
-    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-    bw_ss = sum( (CFreqs*(CMeans-my)^2) );
-    bw_df = as.double(R-1);
-    bw_mean_square = bw_ss/bw_df;
-       
-    within_ss = sum( (CFreqs-1)*CVars );
-    within_df = as.double(W-R);
-    within_mean_square = within_ss/within_df;
-       
-    AnovaF = bw_mean_square/within_mean_square;
-    
-    pval = pf(target=AnovaF, df1=bw_df, df2=within_df, lower.tail=FALSE)
-}
-
-
-# 
-----------------------------------------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which 
each cell denotes to the computed score for that category
-computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
-    Ranks = cumsum(X) - X/2 + 1/2;
-}
-
-#-------------------------------------------------------------------------
-
-bivar_oo = function(Matrix[Double] A, Matrix[Double] B, Double maxDomain) 
return (Double sp) {
-
-    # compute contingency table
-    F = table(A, B, maxDomain, maxDomain);
-    F = F[1:max(A), 1:max(B)];
-    
-    catA = nrow(F);  # number of categories in A
-    catB = ncol(F);  # number of categories in B
-
-    # compute category-wise counts for both the attributes
-    R = rowSums(F);
-    S = colSums(F);
-
-    # compute scores, both are column vectors
-    [C] = computeRanks(R);
-    meanX = mean(C,R); 
-
-    columnS = t(S);
-    [D] = computeRanks(columnS);
-
-    # scores (C,D) are individual values, and counts (R,S) act as weights
-    meanY = mean(D,columnS);
-
-    W = sum(F); # total weight, or total #cases
-    varX = moment(C,R,2)*(W/(W-1.0));
-    varY = moment(D,columnS,2)*(W/(W-1.0));
-    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
-    sp = covXY/(sqrt(varX)*sqrt(varY));
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+#
+# For a given pair of attribute sets, compute bivariate statistics between all 
attribute pairs 
+#   Given, index1 = {A_11, A_12, ... A_1m} and index2 = {A_21, A_22, ... A_2n} 
+#          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and 
(1<= j <=n)
+#
+# Six inputs:  
+#    1) X  - input data
+#    2) index1 - First attribute set {A_11, A_12, ... A_1m}
+#    3) index2 - Second attribute set {A_21, A_22, ... A_2n}
+#    4) types1 - kind for attributes in S1 
+#    5) types2 - kind for attributes in S2
+#             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+# 
+# One output:    
+#    6) output directory in which following (maximum of) four statistics files 
are created
+#        + bivar.scale.scale.stats - matrix containing scale-scale correlations
+#        + bivar.nominal.nominal.stats - 
+#        + bivar.nominal.scale.stats - 
+#        + bivar.ordinal.ordinal.stats - 
+#
+# hadoop jar SystemML.jar -f bivar-stats.dml -nvargs X=<Data>
+#                                                    index1=<Feature Index Set 
1>
+#                                                    index2=<Feature Index Set 
2>
+#                                                    types1=<Feature Types 1>
+#                                                    types2=<Feature Types 2>
+#                                                    OUTDIR=<Output Location>
+
+D = read($X);  # input data set
+S1 = read($index1); # attribute set 1
+S2 = read($index2); # attribute set 2
+K1 = read($types1); # kind for attributes in S1
+K2 = read($types2); # kind for attributes in S2
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+numPairs = s1size * s2size;
+
+#test: 1 is Pearson'R, 2 is F-test, 3 is chi-squared, 4 is Spearman'sRho
+# R, (chisq, df, pval, cramersv,) spearman, eta, anovaf, feature_col_index1, 
feature_col_index2, test
+
+num_scale_scale_tests = 0
+num_nominal_nominal_tests = 0
+num_ordinal_ordinal_tests = 0
+num_nominal_scale_tests = 0
+
+pair2row = matrix(0, rows=numPairs, cols=2)
+for( i in 1:s1size, check=0) {
+    pre_a1 = castAsScalar(S1[1,i]);
+    pre_k1 = castAsScalar(K1[1,i]);
+
+    for( j in 1:s2size, check=0) {
+        pre_pairID = (i-1)*s2size+j; 
+        pre_a2 = castAsScalar(S2[1,j]);
+        pre_k2 = castAsScalar(K2[1,j]);
+       
+       if (pre_k1 == pre_k2) {
+            if (pre_k1 == 1) {
+                       num_scale_scale_tests = num_scale_scale_tests + 1
+                               pair2row[pre_pairID,1] = num_scale_scale_tests
+            } else {
+                       num_nominal_nominal_tests = num_nominal_nominal_tests + 
1
+                               pair2row[pre_pairID,1] = 
num_nominal_nominal_tests
+               
+                if ( pre_k1 == 3 ) {
+                               num_ordinal_ordinal_tests = 
num_ordinal_ordinal_tests + 1
+                               pair2row[pre_pairID, 2] = 
num_ordinal_ordinal_tests
+                }
+            }
+        }
+        else {
+            if (pre_k1 == 1 | pre_k2 == 1) {
+                       num_nominal_scale_tests = num_nominal_scale_tests + 1
+                               pair2row[pre_pairID,1] = num_nominal_scale_tests
+            } else {
+                       num_nominal_nominal_tests = num_nominal_nominal_tests + 
1
+                               pair2row[pre_pairID,1] = 
num_nominal_nominal_tests 
+            }
+               }
+    }
+}
+
+size_scale_scale_tests     = max(num_scale_scale_tests, 1);
+size_nominal_nominal_tests = max(num_nominal_nominal_tests, 1)
+size_ordinal_ordinal_tests = max(num_ordinal_ordinal_tests, 1);
+size_nominal_scale_tests   = max(num_nominal_scale_tests, 1);
+
+basestats                 = matrix(0, rows=11, cols=numPairs);
+basestats_scale_scale     = matrix(0, rows=6, cols=size_scale_scale_tests)
+basestats_nominal_nominal = matrix(0, rows=6, cols=size_nominal_nominal_tests)
+basestats_ordinal_ordinal = matrix(0, rows=3, cols=size_ordinal_ordinal_tests)
+basestats_nominal_scale   = matrix(0, rows=11, cols=size_nominal_scale_tests)
+
+
+# Compute max domain size among all categorical attributes
+# and check if these cols have been recoded
+
+debug_str = "Stopping execution of DML script due to invalid input";
+
+error_flag = FALSE;
+
+maxs = colMaxs(D);
+mins = colMins(D)
+maxDomainSize = -1.0;
+for(k in 1:ncol(K1) ) {
+  type = as.scalar(K1[1,k]);
+  
+  if ( type > 1) {
+    colID = as.scalar(S1[1,k]);
+    
+    colMaximum = as.scalar(maxs[1,colID]);
+    if(maxDomainSize < colMaximum) maxDomainSize = colMaximum;
+  
+       colMinimum = as.scalar(mins[1,colID]);
+       if(colMinimum < 1){
+         if(type == 2)
+           debug_str = append(debug_str, "Column " + colID + " was declared as 
nominal but its minimum value is " + colMinimum)
+         else
+           debug_str = append(debug_str, "Column " + colID + " was declared as 
ordinal but its minimum value is " + colMinimum)
+         error_flag = TRUE;
+       }
+  }
+}
+
+for(k in 1:ncol(K2) ) {
+  type = as.scalar(K2[1,k]);
+  
+  if ( type > 1) {
+    colID = as.scalar(S2[1,k]);
+    
+    colMaximum = as.scalar(maxs[1,colID]);
+    if(maxDomainSize < colMaximum) maxDomainSize = colMaximum;
+  
+       colMinimum = as.scalar(mins[1,colID]);
+       if(colMinimum < 1){
+         if(type == 2)
+           debug_str = append(debug_str, "Column " + colID + " was declared as 
nominal but its minimum value is " + colMinimum)
+         else 
+               debug_str = append(debug_str, "Column " + colID + " was 
declared as ordinal but its minimum value is " + colMinimum)
+         error_flag = TRUE;
+       }
+  }
+}
+maxDomain = as.integer(maxDomainSize);
+
+if(error_flag) stop(debug_str);
+
+parfor( i in 1:s1size, check=0) {
+    a1 = castAsScalar(S1[1,i]);
+    k1 = castAsScalar(K1[1,i]);
+    A1 = D[,a1];
+
+    parfor( j in 1:s2size, check=0) {
+        pairID = (i-1)*s2size+j; 
+        a2 = castAsScalar(S2[1,j]);
+        k2 = castAsScalar(K2[1,j]);
+        A2 = D[,a2];
+
+               rowid1 = castAsScalar(pair2row[pairID, 1])
+       rowid2 = castAsScalar(pair2row[pairID, 2])
+
+        if (k1 == k2) {
+            if (k1 == 1) {
+                # scale-scale
+                print("[" + i + "," + j + "] scale-scale");
+                [r, cov, sigma1, sigma2] = bivar_ss(A1,A2);   
+               
+                       basestats_scale_scale[1,rowid1] = a1;
+                               basestats_scale_scale[2,rowid1] = a2;   
+                basestats_scale_scale[3,rowid1] = r;
+                basestats_scale_scale[4,rowid1] = cov;
+                basestats_scale_scale[5,rowid1] = sigma1;
+                basestats_scale_scale[6,rowid1] = sigma2;
+            } else {
+                # nominal-nominal or ordinal-ordinal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1, A2, maxDomain);
+
+                basestats_nominal_nominal[1,rowid1] = a1;
+                               basestats_nominal_nominal[2,rowid1] = a2;       
+                basestats_nominal_nominal[3,rowid1] = chisq;
+                basestats_nominal_nominal[4,rowid1] = df;
+                basestats_nominal_nominal[5,rowid1] = pval;
+                basestats_nominal_nominal[6,rowid1] = cramersv;
+
+                if ( k1 == 3 ) {
+                    # ordinal-ordinal
+                    print("[" + i + "," + j + "] ordinal-ordinal");
+                    sp = bivar_oo(A1, A2, maxDomain);
+
+                    basestats_ordinal_ordinal[1,rowid2] = a1;
+                    basestats_ordinal_ordinal[2,rowid2] = a2;
+                    basestats_ordinal_ordinal[3,rowid2] = sp;
+                }
+            }
+        } else {
+            if (k1 == 1 | k2 == 1) {
+                # Scale-nominal/ordinal     
+                print("[" + i + "," + j + "] scale-categorical");
+                
+                       if ( k1 == 1 ) {
+                       [eta, f, pval, bw_ss, within_ss, bw_df, within_df, 
bw_mean_square, within_mean_square] = bivar_sc(A1, A2, maxDomain);
+                } else {
+                    [eta, f, pval, bw_ss, within_ss, bw_df, within_df, 
bw_mean_square, within_mean_square] = bivar_sc(A2, A1, maxDomain);
+                }
+               
+                basestats_nominal_scale[1,rowid1] = a1;
+                basestats_nominal_scale[2,rowid1] = a2;
+                basestats_nominal_scale[3,rowid1] = eta;
+                basestats_nominal_scale[4,rowid1] = f;
+                basestats_nominal_scale[5,rowid1] = pval;
+                basestats_nominal_scale[6,rowid1] = bw_ss;
+                basestats_nominal_scale[7,rowid1] = within_ss;
+                basestats_nominal_scale[8,rowid1] = bw_df;
+                basestats_nominal_scale[9,rowid1] = within_df;
+                basestats_nominal_scale[10,rowid1] = bw_mean_square;
+                basestats_nominal_scale[11,rowid1] = within_mean_square;
+            } else {
+                # nominal-ordinal or ordinal-nominal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1, A2, maxDomain);
+
+                               basestats_nominal_nominal[1,rowid1] = a1;
+                               basestats_nominal_nominal[2,rowid1] = a2;
+                basestats_nominal_nominal[3,rowid1] = chisq;
+                basestats_nominal_nominal[4,rowid1] = df;
+                basestats_nominal_nominal[5,rowid1] = pval;
+                basestats_nominal_nominal[6,rowid1] = cramersv;
+            }
+        }
+    }
+}
+
+if(num_scale_scale_tests == size_scale_scale_tests){
+  write(basestats_scale_scale, $OUTDIR + "/bivar.scale.scale.stats");
+}
+
+if(num_nominal_scale_tests == size_nominal_scale_tests){
+  write(basestats_nominal_scale, $OUTDIR + "/bivar.nominal.scale.stats");
+}
+
+if(num_nominal_nominal_tests == size_nominal_nominal_tests){
+  write(basestats_nominal_nominal, $OUTDIR + "/bivar.nominal.nominal.stats");
+}
+
+if(num_ordinal_ordinal_tests == size_ordinal_ordinal_tests){
+  write(basestats_ordinal_ordinal, $OUTDIR + "/bivar.ordinal.ordinal.stats");
+}
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B, Double maxDomain) 
return (Double chisq, Double df, Double pval, Double cramersv) {
+
+    # Contingency Table
+    F = table(A, B, maxDomain, maxDomain);
+    F = F[1:max(A), 1:max(B)];
+
+    # Chi-Squared
+    W = sum(F);
+    r = rowSums(F);
+    c = colSums(F);
+    E = (r %*% c)/W;
+    T = (F-E)^2/E;
+    chi_squared = sum(T);
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1);
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+    # Cramer's V
+    R = nrow(F);
+    C = ncol(F);
+    q = min(R,C);
+    cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+    # Assign return values
+    chisq = chi_squared;
+    df = as.double(degFreedom);
+    pval = pValue;
+    cramersv = cramers_v;
+}
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R, 
Double covXY, Double sigmaX, Double sigmaY) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y);
+
+    # compute standard deviations for both X and Y by computing 2^nd central 
moment
+    W = nrow(X);
+    m2X = moment(X,2);
+    m2Y = moment(Y,2);
+    sigmaX = sqrt(m2X * (W/(W-1.0)) );
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY);
+}
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A, Double maxDomain) 
+                  return (Double Eta, Double AnovaF, Double pval, Double 
bw_ss, Double within_ss, Double bw_df, Double within_df, Double bw_mean_square, 
Double within_mean_square) {
+
+    # mean and variance in target variable
+    W = nrow(A);
+    my = mean(Y);
+    varY = moment(Y,2) * W/(W-1.0)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs = aggregate(target=Y, groups=A, fn="count", ngroups=maxDomain); 
+    CMeans = aggregate(target=Y, groups=A, fn="mean", ngroups=maxDomain);
+    CVars =  aggregate(target=Y, groups=A, fn="variance", ngroups=maxDomain);
+    
+    # number of categories
+    R = nrow(CFreqs);
+
+    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+    bw_ss = sum( (CFreqs*(CMeans-my)^2) );
+    bw_df = as.double(R-1);
+    bw_mean_square = bw_ss/bw_df;
+       
+    within_ss = sum( (CFreqs-1)*CVars );
+    within_df = as.double(W-R);
+    within_mean_square = within_ss/within_df;
+       
+    AnovaF = bw_mean_square/within_mean_square;
+    
+    pval = pf(target=AnovaF, df1=bw_df, df2=within_df, lower.tail=FALSE)
+}
+
+
+# 
-----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which 
each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+    Ranks = cumsum(X) - X/2 + 1/2;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B, Double maxDomain) 
return (Double sp) {
+
+    # compute contingency table
+    F = table(A, B, maxDomain, maxDomain);
+    F = F[1:max(A), 1:max(B)];
+    
+    catA = nrow(F);  # number of categories in A
+    catB = ncol(F);  # number of categories in B
+
+    # compute category-wise counts for both the attributes
+    R = rowSums(F);
+    S = colSums(F);
+
+    # compute scores, both are column vectors
+    [C] = computeRanks(R);
+    meanX = mean(C,R); 
+
+    columnS = t(S);
+    [D] = computeRanks(columnS);
+
+    # scores (C,D) are individual values, and counts (R,S) act as weights
+    meanY = mean(D,columnS);
+
+    W = sum(F); # total weight, or total #cases
+    varX = moment(C,R,2)*(W/(W-1.0));
+    varY = moment(D,columnS,2)*(W/(W-1.0));
+    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+    sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# 
-----------------------------------------------------------------------------------------------------------

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/algorithms/decision-tree-predict.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/decision-tree-predict.dml 
b/scripts/algorithms/decision-tree-predict.dml
index 9e01adb..3447da6 100644
--- a/scripts/algorithms/decision-tree-predict.dml
+++ b/scripts/algorithms/decision-tree-predict.dml
@@ -1,142 +1,142 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT COMPUTES LABEL PREDICTIONS MEANT FOR USE WITH A DECISION TREE 
MODEL ON A HELD OUT TEST SET.
-#
-# INPUT         PARAMETERS:
-# 
---------------------------------------------------------------------------------------------
-# NAME          TYPE     DEFAULT      MEANING
-# 
---------------------------------------------------------------------------------------------
-# X             String   ---          Location to read the test feature matrix 
X; note that X needs to be both recoded and dummy coded 
-# Y                        String   " "                  Location to read the 
true label matrix Y if requested; note that Y needs to be both recoded and 
dummy coded
-# R                    String   " "          Location to read matrix R which 
for each feature in X contains the following information 
-#                                                                              
- R[,1]: column ids
-#                                                                              
- R[,2]: start indices 
-#                                                                              
- R[,3]: end indices
-#                                                                        If R 
is not provided by default all variables are assumed to be scale
-# M             String          ---              Location to read matrix M 
containing the learned tree in the following format
-#                                                                              
- M[1,j]: id of node j (in a complete binary tree)
-#                                                                              
- M[2,j]: Offset (no. of columns) to left child of j if j is an internal node, 
otherwise 0
-#                                                                              
- M[3,j]: Feature index of the feature that node j looks at if j is an internal 
node, otherwise 0
-#                                                                              
- M[4,j]: Type of the feature that node j looks at if j is an internal node: 1 
for scale and 2 for categorical features, 
-#                                                                              
          otherwise the label that leaf node j is supposed to predict
-#                                                                              
- M[5,j]: If j is an internal node: 1 if the feature chosen for j is scale, 
otherwise the size of the subset of values 
-#                                                                              
                  stored in rows 6,7,... if j is categorical 
-#                                                                              
                  If j is a leaf node: number of misclassified samples reaching 
at node j 
-#                                                                              
- M[6:,j]: If j is an internal node: Threshold the example's feature value is 
compared to is stored at M[6,j] 
-#                                                                              
                   if the feature chosen for j is scale, otherwise if the 
feature chosen for j is categorical rows 6,7,... 
-#                                                                              
                   depict the value subset chosen for j
-#                                                                              
           If j is a leaf node 1 if j is impure and the number of samples at j 
> threshold, otherwise 0
-# P                            String   ---              Location to store the 
label predictions for X
-# A                    String   " "          Location to write the test 
accuracy (%) for the prediction if requested
-# CM                   String   " "              Location to write the 
confusion matrix if requested 
-# fmt              String   "text"       The output format of the output, such 
as "text" or "csv"
-# 
---------------------------------------------------------------------------------------------
-# OUTPUT: 
-#      1- Matrix Y containing the predicted labels for X 
-#   2- Test accuracy if requested
-#   3- Confusion matrix C if requested
-# 
-------------------------------------------------------------------------------------------
-# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
-# hadoop jar SystemML.jar -f decision-tree-predict.dml -nvargs X=INPUT_DIR/X 
Y=INPUT_DIR/Y R=INPUT_DIR/R M=INPUT_DIR/model P=OUTPUT_DIR/predictions
-#                                                                              
                                A=OUTPUT_DIR/accuracy CM=OUTPUT_DIR/confusion 
fmt=csv
-
-fileX = $X;
-fileM = $M;
-fileP = $P;
-fileY = ifdef ($Y, " ");
-fileR = ifdef ($R, " ");
-fileCM = ifdef ($CM, " ");
-fileA = ifdef ($A, " ");
-fmtO = ifdef ($fmt, "text");
-X_test = read (fileX);
-M = read (fileM);
-
-num_records = nrow (X_test);
-Y_predicted = matrix (0, rows = num_records, cols = 1);
-
-R_cat = matrix (0, rows = 1, cols = 1);
-R_scale = matrix (0, rows = 1, cols = 1);
-
-if (fileR != " ") {
-       R = read (fileR);
-       dummy_coded = ppred (R[,2], R[,3], "!=");
-       R_scale = removeEmpty (target = R[,2] * (1 - dummy_coded), margin = 
"rows");
-       R_cat = removeEmpty (target = R[,2:3] * dummy_coded, margin = "rows");
-} else { # only scale features available
-       R_scale = seq (1, ncol (X_test));
-}
-
-parfor (i in 1:num_records, check = 0) {
-       cur_sample = X_test[i,];
-       cur_node_pos = 1;
-       label_found = FALSE;
-       while (!label_found) {
-               cur_feature = as.scalar (M[3,cur_node_pos]);    
-               type_label = as.scalar (M[4,cur_node_pos]);
-               if (cur_feature == 0) { # leaf node
-                       label_found = TRUE;
-                       Y_predicted[i,] = type_label;
-               } else {
-                       # determine type: 1 for scale, 2 for categorical 
-                       if (type_label == 1) { # scale feature
-                               cur_start_ind = as.scalar 
(R_scale[cur_feature,]);
-                               cur_value = as.scalar 
(cur_sample[,cur_start_ind]);
-                               cur_split = as.scalar (M[6,cur_node_pos]);
-                               if (cur_value < cur_split) { # go to left branch
-                                       cur_node_pos = cur_node_pos + as.scalar 
(M[2,cur_node_pos]);
-                               } else { # go to right branch
-                                       cur_node_pos = cur_node_pos + as.scalar 
(M[2,cur_node_pos]) + 1;
-                               }
-                       } else if (type_label == 2) { # categorical feature     
                        
-                               cur_start_ind = as.scalar 
(R_cat[cur_feature,1]);
-                               cur_end_ind = as.scalar (R_cat[cur_feature,2]); 
                                
-                               cur_value = as.scalar 
(rowIndexMax(cur_sample[,cur_start_ind:cur_end_ind])); # as.scalar 
(cur_sample[,cur_feature]);
-                               cur_offset = as.scalar (M[5,cur_node_pos]);
-                               value_found = sum (ppred (M[6:(6 + cur_offset - 
1),cur_node_pos], cur_value, "=="));
-                               if (value_found) { # go to left branch
-                                       cur_node_pos = cur_node_pos + as.scalar 
(M[2,cur_node_pos]);
-                               } else { # go to right branch
-                                       cur_node_pos = cur_node_pos + as.scalar 
(M[2,cur_node_pos]) + 1;
-                               }
-}}}}
-
-write (Y_predicted, fileP, format = fmtO);
-
-if (fileY != " ") {
-       Y_test = read (fileY);
-       num_classes = ncol (Y_test);
-       Y_test = rowSums (Y_test * t (seq (1, num_classes)));
-       result = ppred (Y_test, Y_predicted, "==");
-       result = sum (result);
-       accuracy = result / num_records * 100;
-       acc_str = "Accuracy (%): " + accuracy;
-       if (fileA != " ") {
-               write (acc_str, fileA, format = fmtO);
-       } else {
-               print (acc_str);
-       }
-       if (fileCM != " ") {
-               confusion_mat = table(Y_predicted, Y_test, num_classes, 
num_classes)
-        write(confusion_mat, fileCM, format = fmtO)
-       }
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT COMPUTES LABEL PREDICTIONS MEANT FOR USE WITH A DECISION TREE 
MODEL ON A HELD OUT TEST SET.
+#
+# INPUT         PARAMETERS:
+# 
---------------------------------------------------------------------------------------------
+# NAME          TYPE     DEFAULT      MEANING
+# 
---------------------------------------------------------------------------------------------
+# X             String   ---          Location to read the test feature matrix 
X; note that X needs to be both recoded and dummy coded 
+# Y                        String   " "                  Location to read the 
true label matrix Y if requested; note that Y needs to be both recoded and 
dummy coded
+# R                    String   " "          Location to read matrix R which 
for each feature in X contains the following information 
+#                                                                              
- R[,1]: column ids
+#                                                                              
- R[,2]: start indices 
+#                                                                              
- R[,3]: end indices
+#                                                                        If R 
is not provided by default all variables are assumed to be scale
+# M             String          ---              Location to read matrix M 
containing the learned tree in the following format
+#                                                                              
- M[1,j]: id of node j (in a complete binary tree)
+#                                                                              
- M[2,j]: Offset (no. of columns) to left child of j if j is an internal node, 
otherwise 0
+#                                                                              
- M[3,j]: Feature index of the feature that node j looks at if j is an internal 
node, otherwise 0
+#                                                                              
- M[4,j]: Type of the feature that node j looks at if j is an internal node: 1 
for scale and 2 for categorical features, 
+#                                                                              
          otherwise the label that leaf node j is supposed to predict
+#                                                                              
- M[5,j]: If j is an internal node: 1 if the feature chosen for j is scale, 
otherwise the size of the subset of values 
+#                                                                              
                  stored in rows 6,7,... if j is categorical 
+#                                                                              
                  If j is a leaf node: number of misclassified samples reaching 
at node j 
+#                                                                              
- M[6:,j]: If j is an internal node: Threshold the example's feature value is 
compared to is stored at M[6,j] 
+#                                                                              
                   if the feature chosen for j is scale, otherwise if the 
feature chosen for j is categorical rows 6,7,... 
+#                                                                              
                   depict the value subset chosen for j
+#                                                                              
           If j is a leaf node 1 if j is impure and the number of samples at j 
> threshold, otherwise 0
+# P                            String   ---              Location to store the 
label predictions for X
+# A                    String   " "          Location to write the test 
accuracy (%) for the prediction if requested
+# CM                   String   " "              Location to write the 
confusion matrix if requested 
+# fmt              String   "text"       The output format of the output, such 
as "text" or "csv"
+# 
---------------------------------------------------------------------------------------------
+# OUTPUT: 
+#      1- Matrix Y containing the predicted labels for X 
+#   2- Test accuracy if requested
+#   3- Confusion matrix C if requested
+# 
-------------------------------------------------------------------------------------------
+# HOW TO INVOKE THIS SCRIPT - EXAMPLE:
+# hadoop jar SystemML.jar -f decision-tree-predict.dml -nvargs X=INPUT_DIR/X 
Y=INPUT_DIR/Y R=INPUT_DIR/R M=INPUT_DIR/model P=OUTPUT_DIR/predictions
+#                                                                              
                                A=OUTPUT_DIR/accuracy CM=OUTPUT_DIR/confusion 
fmt=csv
+
+fileX = $X;
+fileM = $M;
+fileP = $P;
+fileY = ifdef ($Y, " ");
+fileR = ifdef ($R, " ");
+fileCM = ifdef ($CM, " ");
+fileA = ifdef ($A, " ");
+fmtO = ifdef ($fmt, "text");
+X_test = read (fileX);
+M = read (fileM);
+
+num_records = nrow (X_test);
+Y_predicted = matrix (0, rows = num_records, cols = 1);
+
+R_cat = matrix (0, rows = 1, cols = 1);
+R_scale = matrix (0, rows = 1, cols = 1);
+
+if (fileR != " ") {
+       R = read (fileR);
+       dummy_coded = ppred (R[,2], R[,3], "!=");
+       R_scale = removeEmpty (target = R[,2] * (1 - dummy_coded), margin = 
"rows");
+       R_cat = removeEmpty (target = R[,2:3] * dummy_coded, margin = "rows");
+} else { # only scale features available
+       R_scale = seq (1, ncol (X_test));
+}
+
+parfor (i in 1:num_records, check = 0) {
+       cur_sample = X_test[i,];
+       cur_node_pos = 1;
+       label_found = FALSE;
+       while (!label_found) {
+               cur_feature = as.scalar (M[3,cur_node_pos]);    
+               type_label = as.scalar (M[4,cur_node_pos]);
+               if (cur_feature == 0) { # leaf node
+                       label_found = TRUE;
+                       Y_predicted[i,] = type_label;
+               } else {
+                       # determine type: 1 for scale, 2 for categorical 
+                       if (type_label == 1) { # scale feature
+                               cur_start_ind = as.scalar 
(R_scale[cur_feature,]);
+                               cur_value = as.scalar 
(cur_sample[,cur_start_ind]);
+                               cur_split = as.scalar (M[6,cur_node_pos]);
+                               if (cur_value < cur_split) { # go to left branch
+                                       cur_node_pos = cur_node_pos + as.scalar 
(M[2,cur_node_pos]);
+                               } else { # go to right branch
+                                       cur_node_pos = cur_node_pos + as.scalar 
(M[2,cur_node_pos]) + 1;
+                               }
+                       } else if (type_label == 2) { # categorical feature     
                        
+                               cur_start_ind = as.scalar 
(R_cat[cur_feature,1]);
+                               cur_end_ind = as.scalar (R_cat[cur_feature,2]); 
                                
+                               cur_value = as.scalar 
(rowIndexMax(cur_sample[,cur_start_ind:cur_end_ind])); # as.scalar 
(cur_sample[,cur_feature]);
+                               cur_offset = as.scalar (M[5,cur_node_pos]);
+                               value_found = sum (ppred (M[6:(6 + cur_offset - 
1),cur_node_pos], cur_value, "=="));
+                               if (value_found) { # go to left branch
+                                       cur_node_pos = cur_node_pos + as.scalar 
(M[2,cur_node_pos]);
+                               } else { # go to right branch
+                                       cur_node_pos = cur_node_pos + as.scalar 
(M[2,cur_node_pos]) + 1;
+                               }
+}}}}
+
+write (Y_predicted, fileP, format = fmtO);
+
+if (fileY != " ") {
+       Y_test = read (fileY);
+       num_classes = ncol (Y_test);
+       Y_test = rowSums (Y_test * t (seq (1, num_classes)));
+       result = ppred (Y_test, Y_predicted, "==");
+       result = sum (result);
+       accuracy = result / num_records * 100;
+       acc_str = "Accuracy (%): " + accuracy;
+       if (fileA != " ") {
+               write (acc_str, fileA, format = fmtO);
+       } else {
+               print (acc_str);
+       }
+       if (fileCM != " ") {
+               confusion_mat = table(Y_predicted, Y_test, num_classes, 
num_classes)
+        write(confusion_mat, fileCM, format = fmtO)
+       }
+}

Reply via email to