http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/m-svm/m-svm.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/m-svm/m-svm.dml 
b/src/test/scripts/applications/m-svm/m-svm.dml
index 1307707..bbf5acc 100644
--- a/src/test/scripts/applications/m-svm/m-svm.dml
+++ b/src/test/scripts/applications/m-svm/m-svm.dml
@@ -1,145 +1,145 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multiclass SVM with squared slack variables, 
-# learns one-against-the-rest binary-class classifiers
-# 
-# Example Usage:
-# Assume SVM_HOME is set to the home of the dml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume number of classes is 10, epsilon = 0.001, lambda=1.0, max_iterations 
= 100
-# 
-# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.dml -nvargs X=$INPUT_DIR/X 
Y=$INPUT_DIR/y icpt=intercept classes=10 tol=.001 reg=1.0 maxiter=100 
model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
-#
-
-cmdLine_fmt=ifdef($fmt, "text")
-cmdLine_icpt = ifdef($icpt, 0)
-cmdLine_tol=ifdef($tol, 0.001)
-cmdLine_reg=ifdef($reg, 1.0)
-cmdLine_maxiter=ifdef($maxiter, 100)
-
-print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + 
" maxiter=" + cmdLine_maxiter)
-
-X = read($X)
-
-check_X = sum(X)
-if(check_X == 0){
-       print("X has no non-zeros")
-}else{
-       Y = read($Y)
-       intercept = cmdLine_icpt
-       num_classes = $classes
-       epsilon = cmdLine_tol
-       lambda = cmdLine_reg
-       max_iterations = cmdLine_maxiter
- 
-       num_samples = nrow(X)
-       num_features = ncol(X)
-
-       if (intercept == 1) {
-               ones  = matrix(1, rows=num_samples, cols=1);
-               X = append(X, ones);
-       }
-
-       num_rows_in_w = num_features
-       if(intercept == 1){
-               num_rows_in_w = num_rows_in_w + 1
-       }
-       w = matrix(0, rows=num_rows_in_w, cols=num_classes)
-
-       debug_mat = matrix(-1, rows=max_iterations, cols=num_classes)
-       parfor(iter_class in 1:num_classes){              
-               Y_local = 2 * ppred(Y, iter_class, "==") - 1
-               w_class = matrix(0, rows=num_features, cols=1)
-               if (intercept == 1) {
-                       zero_matrix = matrix(0, rows=1, cols=1);
-                       w_class = t(append(t(w_class), zero_matrix));
-               }
- 
-               g_old = t(X) %*% Y_local
-               s = g_old
-
-               Xw = matrix(0, rows=nrow(X), cols=1)
-               iter = 0
-               continue = 1
-               while(continue == 1)  {
-                       # minimizing primal obj along direction s
-                       step_sz = 0
-                       Xd = X %*% s
-                       wd = lambda * sum(w_class * s)
-                       dd = lambda * sum(s * s)
-                       continue1 = 1
-                       while(continue1 == 1){
-                               tmp_Xw = Xw + step_sz*Xd
-                               out = 1 - Y_local * (tmp_Xw)
-                               sv = ppred(out, 0, ">")
-                               out = out * sv
-                               g = wd + step_sz*dd - sum(out * Y_local * Xd)
-                               h = dd + sum(Xd * sv * Xd)
-                               step_sz = step_sz - g/h
-                               if (g*g/h < 0.0000000001){
-                               continue1 = 0
-                               }
-                       }
- 
-                       #update weights
-                       w_class = w_class + step_sz*s
-                       Xw = Xw + step_sz*Xd
- 
-                       out = 1 - Y_local * Xw
-                       sv = ppred(out, 0, ">")
-                       out = sv * out
-                       obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * 
w_class)
-                       g_new = t(X) %*% (out * Y_local) - lambda * w_class
-
-                       tmp = sum(s * g_old)
-  
-                       train_acc = sum(ppred(Y_local*(X%*%w_class), 0, 
">="))/num_samples*100
-                       print("For class " + iter_class + " iteration " + iter 
+ " training accuracy: " + train_acc)
-                       debug_mat[iter+1,iter_class] = obj         
-   
-                       if((step_sz*tmp < epsilon*obj) | (iter >= 
max_iterations-1)){
-                               continue = 0
-                       }
- 
-                       #non-linear CG step
-                       be = sum(g_new * g_new)/sum(g_old * g_old)
-                       s = be * s + g_new
-                       g_old = g_new
-
-                       iter = iter + 1
-               }
-
-               w[,iter_class] = w_class
-       }
-
-       write(w, $model, format=cmdLine_fmt)
-
-       debug_str = "# Class, Iter, Obj"
-       for(iter_class in 1:ncol(debug_mat)){
-               for(iter in 1:nrow(debug_mat)){
-                       obj = castAsScalar(debug_mat[iter, iter_class])
-                       if(obj != -1) 
-                               debug_str = append(debug_str, iter_class + "," 
+ iter + "," + obj)
-               }
-       }
-       write(debug_str, $Log)
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multiclass SVM with squared slack variables, 
+# learns one-against-the-rest binary-class classifiers
+# 
+# Example Usage:
+# Assume SVM_HOME is set to the home of the dml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume number of classes is 10, epsilon = 0.001, lambda=1.0, max_iterations 
= 100
+# 
+# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.dml -nvargs X=$INPUT_DIR/X 
Y=$INPUT_DIR/y icpt=intercept classes=10 tol=.001 reg=1.0 maxiter=100 
model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
+#
+
+cmdLine_fmt=ifdef($fmt, "text")
+cmdLine_icpt = ifdef($icpt, 0)
+cmdLine_tol=ifdef($tol, 0.001)
+cmdLine_reg=ifdef($reg, 1.0)
+cmdLine_maxiter=ifdef($maxiter, 100)
+
+print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + 
" maxiter=" + cmdLine_maxiter)
+
+X = read($X)
+
+check_X = sum(X)
+if(check_X == 0){
+       print("X has no non-zeros")
+}else{
+       Y = read($Y)
+       intercept = cmdLine_icpt
+       num_classes = $classes
+       epsilon = cmdLine_tol
+       lambda = cmdLine_reg
+       max_iterations = cmdLine_maxiter
+ 
+       num_samples = nrow(X)
+       num_features = ncol(X)
+
+       if (intercept == 1) {
+               ones  = matrix(1, rows=num_samples, cols=1);
+               X = append(X, ones);
+       }
+
+       num_rows_in_w = num_features
+       if(intercept == 1){
+               num_rows_in_w = num_rows_in_w + 1
+       }
+       w = matrix(0, rows=num_rows_in_w, cols=num_classes)
+
+       debug_mat = matrix(-1, rows=max_iterations, cols=num_classes)
+       parfor(iter_class in 1:num_classes){              
+               Y_local = 2 * ppred(Y, iter_class, "==") - 1
+               w_class = matrix(0, rows=num_features, cols=1)
+               if (intercept == 1) {
+                       zero_matrix = matrix(0, rows=1, cols=1);
+                       w_class = t(append(t(w_class), zero_matrix));
+               }
+ 
+               g_old = t(X) %*% Y_local
+               s = g_old
+
+               Xw = matrix(0, rows=nrow(X), cols=1)
+               iter = 0
+               continue = 1
+               while(continue == 1)  {
+                       # minimizing primal obj along direction s
+                       step_sz = 0
+                       Xd = X %*% s
+                       wd = lambda * sum(w_class * s)
+                       dd = lambda * sum(s * s)
+                       continue1 = 1
+                       while(continue1 == 1){
+                               tmp_Xw = Xw + step_sz*Xd
+                               out = 1 - Y_local * (tmp_Xw)
+                               sv = ppred(out, 0, ">")
+                               out = out * sv
+                               g = wd + step_sz*dd - sum(out * Y_local * Xd)
+                               h = dd + sum(Xd * sv * Xd)
+                               step_sz = step_sz - g/h
+                               if (g*g/h < 0.0000000001){
+                               continue1 = 0
+                               }
+                       }
+ 
+                       #update weights
+                       w_class = w_class + step_sz*s
+                       Xw = Xw + step_sz*Xd
+ 
+                       out = 1 - Y_local * Xw
+                       sv = ppred(out, 0, ">")
+                       out = sv * out
+                       obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * 
w_class)
+                       g_new = t(X) %*% (out * Y_local) - lambda * w_class
+
+                       tmp = sum(s * g_old)
+  
+                       train_acc = sum(ppred(Y_local*(X%*%w_class), 0, 
">="))/num_samples*100
+                       print("For class " + iter_class + " iteration " + iter 
+ " training accuracy: " + train_acc)
+                       debug_mat[iter+1,iter_class] = obj         
+   
+                       if((step_sz*tmp < epsilon*obj) | (iter >= 
max_iterations-1)){
+                               continue = 0
+                       }
+ 
+                       #non-linear CG step
+                       be = sum(g_new * g_new)/sum(g_old * g_old)
+                       s = be * s + g_new
+                       g_old = g_new
+
+                       iter = iter + 1
+               }
+
+               w[,iter_class] = w_class
+       }
+
+       write(w, $model, format=cmdLine_fmt)
+
+       debug_str = "# Class, Iter, Obj"
+       for(iter_class in 1:ncol(debug_mat)){
+               for(iter in 1:nrow(debug_mat)){
+                       obj = castAsScalar(debug_mat[iter, iter_class])
+                       if(obj != -1) 
+                               debug_str = append(debug_str, iter_class + "," 
+ iter + "," + obj)
+               }
+       }
+       write(debug_str, $Log)
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/m-svm/m-svm.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/m-svm/m-svm.pydml 
b/src/test/scripts/applications/m-svm/m-svm.pydml
index 83f17cf..348f599 100644
--- a/src/test/scripts/applications/m-svm/m-svm.pydml
+++ b/src/test/scripts/applications/m-svm/m-svm.pydml
@@ -1,136 +1,136 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multiclass SVM with squared slack variables, 
-# learns one-against-the-rest binary-class classifiers
-# 
-# Example Usage:
-# Assume SVM_HOME is set to the home of the pydml script
-# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
-# Assume number of classes is 10, epsilon = 0.001, lambda=1.0, max_iterations 
= 100
-# 
-# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.pydml -python -nvargs 
X=$INPUT_DIR/X Y=$INPUT_DIR/y icpt=intercept classes=10 tol=.001 reg=1.0 
maxiter=100 model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
-#
-
-cmdLine_fmt=ifdef($fmt, "text")
-cmdLine_icpt = ifdef($icpt, 0)
-cmdLine_tol=ifdef($tol, 0.001)
-cmdLine_reg=ifdef($reg, 1.0)
-cmdLine_maxiter=ifdef($maxiter, 100)
-
-print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + 
" maxiter=" + cmdLine_maxiter)
-
-X = load($X)
-
-check_X = sum(X)
-if(check_X == 0):
-    print("X has no non-zeros")
-else:
-    Y = load($Y)
-    intercept = cmdLine_icpt
-    num_classes = $classes
-    epsilon = cmdLine_tol
-    lambda = cmdLine_reg
-    max_iterations = cmdLine_maxiter
-    
-    num_samples = nrow(X)
-    num_features = ncol(X)
-    
-    if (intercept == 1):
-        ones  = full(1, rows=num_samples, cols=1)
-        X = append(X, ones)
-    
-    num_rows_in_w = num_features
-    if(intercept == 1):
-        num_rows_in_w = num_rows_in_w + 1
-    w = full(0, rows=num_rows_in_w, cols=num_classes)
-    
-    debug_mat = full(-1, rows=max_iterations, cols=num_classes)
-    parfor(iter_class in 1:num_classes):
-        Y_local = 2 * ppred(Y, iter_class, "==") - 1
-        w_class = full(0, rows=num_features, cols=1)
-        if (intercept == 1):
-            zero_matrix = full(0, rows=1, cols=1)
-            w_class = transpose(append(transpose(w_class), zero_matrix))
-        g_old = dot(transpose(X), Y_local)
-        s = g_old
-        
-        Xw = full(0, rows=nrow(X), cols=1)
-        iter = 0
-        continue = 1
-        while(continue == 1):
-            # minimizing primal obj along direction s
-            step_sz = 0
-            Xd = dot(X, s)
-            wd = lambda * sum(w_class * s)
-            dd = lambda * sum(s * s)
-            continue1 = 1
-            while(continue1 == 1):
-                tmp_Xw = Xw + step_sz*Xd
-                out = 1 - Y_local * (tmp_Xw)
-                sv = ppred(out, 0, ">")
-                out = out * sv
-                g = wd + step_sz*dd - sum(out * Y_local * Xd)
-                h = dd + sum(Xd * sv * Xd)
-                step_sz = step_sz - g/h
-                if (g*g/h < 0.0000000001):
-                    continue1 = 0
-            
-            #update weights
-            w_class = w_class + step_sz*s
-            Xw = Xw + step_sz*Xd
-            
-            out = 1 - Y_local * Xw
-            sv = ppred(out, 0, ">")
-            out = sv * out
-            obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
-            g_new = dot(transpose(X), (out * Y_local)) - lambda * w_class
-            
-            tmp = sum(s * g_old)
-            
-            train_acc = sum(ppred(Y_local*(dot(X, w_class)), 0, 
">="))/num_samples*100
-            print("For class " + iter_class + " iteration " + iter + " 
training accuracy: " + train_acc)
-            debug_mat[iter+1,iter_class] = obj
-            
-            if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)):
-                continue = 0
-            
-            #non-linear CG step
-            be = sum(g_new * g_new)/sum(g_old * g_old)
-            s = be * s + g_new
-            g_old = g_new
-            
-            iter = iter + 1
-        # end while(continue == 1)
-        
-        w[,iter_class] = w_class
-    # end parfor(iter_class in 1:num_classes)
-    
-    save(w, $model, format=cmdLine_fmt)
-    
-    debug_str = "# Class, Iter, Obj"
-    for(iter_class in 1:ncol(debug_mat)):
-        for(iter in 1:nrow(debug_mat)):
-            obj = castAsScalar(debug_mat[iter, iter_class])
-            if(obj != -1):
-                debug_str = append(debug_str, iter_class + "," + iter + "," + 
obj)
-    save(debug_str, $Log)
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multiclass SVM with squared slack variables, 
+# learns one-against-the-rest binary-class classifiers
+# 
+# Example Usage:
+# Assume SVM_HOME is set to the home of the pydml script
+# Assume input and output directories are on hdfs as INPUT_DIR and OUTPUT_DIR
+# Assume number of classes is 10, epsilon = 0.001, lambda=1.0, max_iterations 
= 100
+# 
+# hadoop jar SystemML.jar -f $SVM_HOME/m-svm.pydml -python -nvargs 
X=$INPUT_DIR/X Y=$INPUT_DIR/y icpt=intercept classes=10 tol=.001 reg=1.0 
maxiter=100 model=$OUTPUT_DIR/w Log=$OUTPUT_DIR/Log fmt="text"
+#
+
+cmdLine_fmt=ifdef($fmt, "text")
+cmdLine_icpt = ifdef($icpt, 0)
+cmdLine_tol=ifdef($tol, 0.001)
+cmdLine_reg=ifdef($reg, 1.0)
+cmdLine_maxiter=ifdef($maxiter, 100)
+
+print("icpt=" + cmdLine_icpt + " tol=" + cmdLine_tol + " reg=" + cmdLine_reg + 
" maxiter=" + cmdLine_maxiter)
+
+X = load($X)
+
+check_X = sum(X)
+if(check_X == 0):
+    print("X has no non-zeros")
+else:
+    Y = load($Y)
+    intercept = cmdLine_icpt
+    num_classes = $classes
+    epsilon = cmdLine_tol
+    lambda = cmdLine_reg
+    max_iterations = cmdLine_maxiter
+    
+    num_samples = nrow(X)
+    num_features = ncol(X)
+    
+    if (intercept == 1):
+        ones  = full(1, rows=num_samples, cols=1)
+        X = append(X, ones)
+    
+    num_rows_in_w = num_features
+    if(intercept == 1):
+        num_rows_in_w = num_rows_in_w + 1
+    w = full(0, rows=num_rows_in_w, cols=num_classes)
+    
+    debug_mat = full(-1, rows=max_iterations, cols=num_classes)
+    parfor(iter_class in 1:num_classes):
+        Y_local = 2 * ppred(Y, iter_class, "==") - 1
+        w_class = full(0, rows=num_features, cols=1)
+        if (intercept == 1):
+            zero_matrix = full(0, rows=1, cols=1)
+            w_class = transpose(append(transpose(w_class), zero_matrix))
+        g_old = dot(transpose(X), Y_local)
+        s = g_old
+        
+        Xw = full(0, rows=nrow(X), cols=1)
+        iter = 0
+        continue = 1
+        while(continue == 1):
+            # minimizing primal obj along direction s
+            step_sz = 0
+            Xd = dot(X, s)
+            wd = lambda * sum(w_class * s)
+            dd = lambda * sum(s * s)
+            continue1 = 1
+            while(continue1 == 1):
+                tmp_Xw = Xw + step_sz*Xd
+                out = 1 - Y_local * (tmp_Xw)
+                sv = ppred(out, 0, ">")
+                out = out * sv
+                g = wd + step_sz*dd - sum(out * Y_local * Xd)
+                h = dd + sum(Xd * sv * Xd)
+                step_sz = step_sz - g/h
+                if (g*g/h < 0.0000000001):
+                    continue1 = 0
+            
+            #update weights
+            w_class = w_class + step_sz*s
+            Xw = Xw + step_sz*Xd
+            
+            out = 1 - Y_local * Xw
+            sv = ppred(out, 0, ">")
+            out = sv * out
+            obj = 0.5 * sum(out * out) + lambda/2 * sum(w_class * w_class)
+            g_new = dot(transpose(X), (out * Y_local)) - lambda * w_class
+            
+            tmp = sum(s * g_old)
+            
+            train_acc = sum(ppred(Y_local*(dot(X, w_class)), 0, 
">="))/num_samples*100
+            print("For class " + iter_class + " iteration " + iter + " 
training accuracy: " + train_acc)
+            debug_mat[iter+1,iter_class] = obj
+            
+            if((step_sz*tmp < epsilon*obj) | (iter >= max_iterations-1)):
+                continue = 0
+            
+            #non-linear CG step
+            be = sum(g_new * g_new)/sum(g_old * g_old)
+            s = be * s + g_new
+            g_old = g_new
+            
+            iter = iter + 1
+        # end while(continue == 1)
+        
+        w[,iter_class] = w_class
+    # end parfor(iter_class in 1:num_classes)
+    
+    save(w, $model, format=cmdLine_fmt)
+    
+    debug_str = "# Class, Iter, Obj"
+    for(iter_class in 1:ncol(debug_mat)):
+        for(iter in 1:nrow(debug_mat)):
+            obj = castAsScalar(debug_mat[iter, iter_class])
+            if(obj != -1):
+                debug_str = append(debug_str, iter_class + "," + iter + "," + 
obj)
+    save(debug_str, $Log)
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/mdabivar/MDABivariateStats.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/mdabivar/MDABivariateStats.R 
b/src/test/scripts/applications/mdabivar/MDABivariateStats.R
index 7715c5e..1844bbb 100644
--- a/src/test/scripts/applications/mdabivar/MDABivariateStats.R
+++ b/src/test/scripts/applications/mdabivar/MDABivariateStats.R
@@ -1,294 +1,294 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-bivar_ss = function(X, Y) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y)
-
-    # compute standard deviations for both X and Y by computing 2^nd central 
moment
-    m2X = var(X)
-    m2Y = var(Y)
-    sigmaX = sqrt(m2X)
-    sigmaY = sqrt(m2Y)
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY)
-
-    return(list("R" = R, "covXY" = covXY, "sigmaX" = sigmaX, "sigmaY" = 
sigmaY))
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(A, B) {
-
-    # Contingency Table
-    F = table(A,B)
-    
-    # Chi-Squared
-    cst = chisq.test(F)
-
-    r = rowSums(F)
-    c = colSums(F)
-    
-    chi_squared = as.numeric(cst[1])
-
-    # compute p-value
-    pValue = as.numeric(cst[3])
-
-    # Assign return values
-    pval = pValue
-    contingencyTable = F
-    rowMarginals = r
-    colMarginals = c
-
-    return(list("pval" = pval, "contingencyTable" = contingencyTable, 
"rowMarginals" = rowMarginals, "colMarginals" = colMarginals))
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Y, A) {
-    # mean and variance in target variable
-    W = length(A)
-    my = mean(Y)
-    varY = var(Y)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs = as.matrix(table(A)) 
-
-    CMeans = as.matrix(aggregate(Y, by=list(A), "mean")$x)
-
-    CVars = as.matrix(aggregate(Y, by=list(A), "var")$x)
-    CVars[is.na(CVars)] <- 0
-
-    # number of categories
-    R = nrow(CFreqs)
-    df1 = R-1
-    df2 = W-R
-
-    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1)
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
-    AnovaF = anova_num/anova_den
-    pVal = 1-pf(AnovaF, df1, df2)
-
-    return(list("pVal" = pVal, "CFreqs" = CFreqs, "CMeans" = CMeans, "CVars" = 
CVars))
-}
-
-# Main starts here 
-----------------------------------------------------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library(Matrix)
-
-# input data set
-D = readMM(paste(args[1], "X.mtx", sep=""));
-
-# label attr id (must be a valid index > 0)  
-label_index = as.integer(args[2])
-
-# feature attributes, column vector of indices
-feature_indices = readMM(paste(args[1], "feature_indices.mtx", sep="")) 
-
-# can be either 1 (scale) or 0 (categorical)
-label_measurement_level = as.integer(args[3]) 
-
-# measurement levels for features, 0/1 column vector
-feature_measurement_levels = readMM(paste(args[1], 
"feature_measurement_levels.mtx", sep="")) 
-
-sz = ncol(D)
-
-# store for pvalues and pearson's r
-stats = matrix(0, sz, 1)
-# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
-tests = matrix(0, sz, 1)
-# store for covariances used to compute pearson's r
-covariances = matrix(0, sz, 1)
-# store for standard deviations used to compute pearson's r
-standard_deviations = matrix(0, sz, 1)
-
-labels = D[,label_index]
-
-labelCorrection = 0
-if(label_measurement_level == 1){
-       numLabels = length(labels)
-        cmLabels = var(labels)
-       stdLabels = sqrt(cmLabels)
-       standard_deviations[label_index,1] = stdLabels
-}else{
-       labelCorrection = 1 - min(labels)
-       labels = labels + labelCorrection
-}
-
-mx = apply(D, 2, max)
-mn = apply(D, 2, min)  
-num_distinct_values = mx-mn+1
-max_num_distinct_values = 0
-for(i1 in 1:nrow(feature_indices)){
-       feature_index1 = feature_indices[i1,1]
-       num = num_distinct_values[feature_index1]
-       if(feature_measurement_levels[i1,1] == 0 & num >= 
max_num_distinct_values){
-               max_num_distinct_values = num
-       }
-}
-distinct_label_values = matrix(0, 1, 1)        
-contingencyTableSz = 1
-maxNumberOfGroups = 1
-if(max_num_distinct_values != 0){
-       maxNumberOfGroups = max_num_distinct_values
-}
-if(label_measurement_level==0){
-       distinct_label_values = as.data.frame(table(labels))$Freq
-       if(max_num_distinct_values != 0){
-               contingencyTableSz = 
max_num_distinct_values*length(distinct_label_values)
-       }
-       maxNumberOfGroups = max(maxNumberOfGroups, 
length(distinct_label_values))
-}
-# store for contingency table cell values
-contingencyTablesCounts = matrix(0, sz, contingencyTableSz)
-# store for contingency table label(row) assignments
-contingencyTablesLabelValues = matrix(0, sz, contingencyTableSz)
-# store for contingency table feature(col) assignments
-contingencyTablesFeatureValues = matrix(0, sz, contingencyTableSz)
-# store for distinct values
-featureValues = matrix(0, sz, maxNumberOfGroups)
-# store for counts of distinct values
-featureCounts = matrix(0, sz, maxNumberOfGroups)
-# store for group means
-featureMeans = matrix(0, sz, maxNumberOfGroups)
-# store for group standard deviations
-featureSTDs = matrix(0, sz, maxNumberOfGroups)
-
-if(label_measurement_level == 0){
-       featureCounts[label_index,1:length(distinct_label_values)] = 
distinct_label_values
-       for(i2 in 1:length(distinct_label_values)){
-               featureValues[label_index,i2] = i2-labelCorrection
-       }
-}
-
-for(i3 in 1:nrow(feature_indices)){
-       feature_index2 = feature_indices[i3,1]
-       feature_measurement_level = feature_measurement_levels[i3,1]
-       
-       feature = D[,feature_index2]
-       
-       if(feature_measurement_level == 0){
-               featureCorrection = 1 - min(feature)
-               feature = feature + featureCorrection
-                       
-               if(label_measurement_level == feature_measurement_level){
-                 # categorical-categorical
-                 tests[feature_index2,1] = 1
-
-                 ret = bivar_cc(labels, feature)
-                  pVal = ret$pval
-                  contingencyTable = ret$contingencyTable
-                  rowMarginals = ret$rowMarginals
-                  colMarginals = ret$colMarginals
-
-                 stats[feature_index2,1] = pVal
-                       
-                 sz3 = nrow(contingencyTable)*ncol(contingencyTable)
-                       
-                 contingencyTableCounts = matrix(0, 1, sz3)
-                 contingencyTableLabelValues = matrix(0, 1, sz3)
-                 contingencyTableFeatureValues = matrix(0, 1, sz3)
-                       
-                 for(i4 in 1:nrow(contingencyTable)){
-                        for(j in 1:ncol(contingencyTable)){
-                                       #get rid of this, see *1 below
-                                       contingencyTableCounts[1, 
ncol(contingencyTable)*(i4-1)+j] = contingencyTable[i4,j]
-                                       
-                                       contingencyTableLabelValues[1, 
ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
-                                       contingencyTableFeatureValues[1, 
ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
-                               }
-                       }
-                       contingencyTablesCounts[feature_index2,1:sz3] = 
contingencyTableCounts
-            
-                       contingencyTablesLabelValues[feature_index2,1:sz3] = 
contingencyTableLabelValues
-                       contingencyTablesFeatureValues[feature_index2,1:sz3] = 
contingencyTableFeatureValues
-                       
-                       featureCounts[feature_index2,1:length(colMarginals)] = 
colMarginals
-                       for(i5 in 1:length(colMarginals)){
-                               featureValues[feature_index2,i5] = 
i5-featureCorrection
-                       }
-               }else{
-                       # label is scale, feature is categorical
-                       tests[feature_index2,1] = 2
-                       
-                       ret = bivar_sc(labels, feature)
-                  pVal = ret$pVal
-                  frequencies = ret$CFreqs
-                  means = ret$CMeans
-                  variances = ret$CVars
-
-                       stats[feature_index2,1] = pVal
-                       featureCounts[feature_index2,1:nrow(frequencies)] = 
t(frequencies)
-                       for(i6 in 1:nrow(frequencies)){
-                               featureValues[feature_index2,i6] = i6 - 
featureCorrection
-                       }
-                       featureMeans[feature_index2,1:nrow(means)] = t(means)
-                       featureSTDs[feature_index2,1:nrow(variances)] = 
t(sqrt(variances))
-               }
-       }else{
-               if(label_measurement_level == feature_measurement_level){
-                 # scale-scale
-                 tests[feature_index2,1] = 3
-
-                 ret = bivar_ss(labels, feature)
-                  r = ret$R
-                  covariance = ret$covXY
-                  stdX = ret$sigmaX
-                  stdY = ret$sigmaY
- 
-                 stats[feature_index2,1] = r
-                 covariances[feature_index2,1] = covariance
-                 standard_deviations[feature_index2,1] = stdY
-               }else{
-                 # label is categorical, feature is scale
-                 tests[feature_index2,1] = 2
-                       
-                 ret = bivar_sc(feature, labels)
-                 pVal = ret$pVal
-                 frequencies = ret$CFreqs
-                  means = ret$CMeans
-                 variances = ret$CVars
-                       
-                 stats[feature_index2,1] = pVal
-                 featureMeans[feature_index2,1:nrow(means)] = t(means)
-                 featureSTDs[feature_index2,1:nrow(variances)] = 
t(sqrt(variances))
-               }
-       }
-}
-
-writeMM(as(stats, "CsparseMatrix"), paste(args[4], "stats", sep=""))
-writeMM(as(tests, "CsparseMatrix"), paste(args[4], "tests", sep=""))
-writeMM(as(covariances, "CsparseMatrix"), paste(args[4], "covariances", 
sep=""))
-writeMM(as(standard_deviations, "CsparseMatrix"), paste(args[4], 
"standard_deviations", sep=""))
-writeMM(as(contingencyTablesCounts, "CsparseMatrix"), paste(args[4], 
"contingencyTablesCounts", sep=""))
-writeMM(as(contingencyTablesLabelValues, "CsparseMatrix"), paste(args[4], 
"contingencyTablesLabelValues", sep=""))
-writeMM(as(contingencyTablesFeatureValues, "CsparseMatrix"), paste(args[4], 
"contingencyTablesFeatureValues", sep=""))
-writeMM(as(featureValues, "CsparseMatrix"), paste(args[4], "featureValues", 
sep=""))
-writeMM(as(featureCounts, "CsparseMatrix"), paste(args[4], "featureCounts", 
sep=""))
-writeMM(as(featureMeans, "CsparseMatrix"), paste(args[4], "featureMeans", 
sep=""))
-writeMM(as(featureSTDs, "CsparseMatrix"), paste(args[4], "featureSTDs", 
sep=""))
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+bivar_ss = function(X, Y) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y)
+
+    # compute standard deviations for both X and Y by computing 2^nd central 
moment
+    m2X = var(X)
+    m2Y = var(Y)
+    sigmaX = sqrt(m2X)
+    sigmaY = sqrt(m2Y)
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY)
+
+    return(list("R" = R, "covXY" = covXY, "sigmaX" = sigmaX, "sigmaY" = 
sigmaY))
+}
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(A, B) {
+
+    # Contingency Table
+    F = table(A,B)
+    
+    # Chi-Squared
+    cst = chisq.test(F)
+
+    r = rowSums(F)
+    c = colSums(F)
+    
+    chi_squared = as.numeric(cst[1])
+
+    # compute p-value
+    pValue = as.numeric(cst[3])
+
+    # Assign return values
+    pval = pValue
+    contingencyTable = F
+    rowMarginals = r
+    colMarginals = c
+
+    return(list("pval" = pval, "contingencyTable" = contingencyTable, 
"rowMarginals" = rowMarginals, "colMarginals" = colMarginals))
+}
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Y, A) {
+    # mean and variance in target variable
+    W = length(A)
+    my = mean(Y)
+    varY = var(Y)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs = as.matrix(table(A)) 
+
+    CMeans = as.matrix(aggregate(Y, by=list(A), "mean")$x)
+
+    CVars = as.matrix(aggregate(Y, by=list(A), "var")$x)
+    CVars[is.na(CVars)] <- 0
+
+    # number of categories
+    R = nrow(CFreqs)
+    df1 = R-1
+    df2 = W-R
+
+    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1)
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
+    AnovaF = anova_num/anova_den
+    pVal = 1-pf(AnovaF, df1, df2)
+
+    return(list("pVal" = pVal, "CFreqs" = CFreqs, "CMeans" = CMeans, "CVars" = 
CVars))
+}
+
+# Main starts here 
-----------------------------------------------------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library(Matrix)
+
+# input data set
+D = readMM(paste(args[1], "X.mtx", sep=""));
+
+# label attr id (must be a valid index > 0)  
+label_index = as.integer(args[2])
+
+# feature attributes, column vector of indices
+feature_indices = readMM(paste(args[1], "feature_indices.mtx", sep="")) 
+
+# can be either 1 (scale) or 0 (categorical)
+label_measurement_level = as.integer(args[3]) 
+
+# measurement levels for features, 0/1 column vector
+feature_measurement_levels = readMM(paste(args[1], 
"feature_measurement_levels.mtx", sep="")) 
+
+sz = ncol(D)
+
+# store for pvalues and pearson's r
+stats = matrix(0, sz, 1)
+# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
+tests = matrix(0, sz, 1)
+# store for covariances used to compute pearson's r
+covariances = matrix(0, sz, 1)
+# store for standard deviations used to compute pearson's r
+standard_deviations = matrix(0, sz, 1)
+
+labels = D[,label_index]
+
+labelCorrection = 0
+if(label_measurement_level == 1){
+       numLabels = length(labels)
+        cmLabels = var(labels)
+       stdLabels = sqrt(cmLabels)
+       standard_deviations[label_index,1] = stdLabels
+}else{
+       labelCorrection = 1 - min(labels)
+       labels = labels + labelCorrection
+}
+
+mx = apply(D, 2, max)
+mn = apply(D, 2, min)  
+num_distinct_values = mx-mn+1
+max_num_distinct_values = 0
+for(i1 in 1:nrow(feature_indices)){
+       feature_index1 = feature_indices[i1,1]
+       num = num_distinct_values[feature_index1]
+       if(feature_measurement_levels[i1,1] == 0 & num >= 
max_num_distinct_values){
+               max_num_distinct_values = num
+       }
+}
+distinct_label_values = matrix(0, 1, 1)        
+contingencyTableSz = 1
+maxNumberOfGroups = 1
+if(max_num_distinct_values != 0){
+       maxNumberOfGroups = max_num_distinct_values
+}
+if(label_measurement_level==0){
+       distinct_label_values = as.data.frame(table(labels))$Freq
+       if(max_num_distinct_values != 0){
+               contingencyTableSz = 
max_num_distinct_values*length(distinct_label_values)
+       }
+       maxNumberOfGroups = max(maxNumberOfGroups, 
length(distinct_label_values))
+}
+# store for contingency table cell values
+contingencyTablesCounts = matrix(0, sz, contingencyTableSz)
+# store for contingency table label(row) assignments
+contingencyTablesLabelValues = matrix(0, sz, contingencyTableSz)
+# store for contingency table feature(col) assignments
+contingencyTablesFeatureValues = matrix(0, sz, contingencyTableSz)
+# store for distinct values
+featureValues = matrix(0, sz, maxNumberOfGroups)
+# store for counts of distinct values
+featureCounts = matrix(0, sz, maxNumberOfGroups)
+# store for group means
+featureMeans = matrix(0, sz, maxNumberOfGroups)
+# store for group standard deviations
+featureSTDs = matrix(0, sz, maxNumberOfGroups)
+
+if(label_measurement_level == 0){
+       featureCounts[label_index,1:length(distinct_label_values)] = 
distinct_label_values
+       for(i2 in 1:length(distinct_label_values)){
+               featureValues[label_index,i2] = i2-labelCorrection
+       }
+}
+
+for(i3 in 1:nrow(feature_indices)){
+       feature_index2 = feature_indices[i3,1]
+       feature_measurement_level = feature_measurement_levels[i3,1]
+       
+       feature = D[,feature_index2]
+       
+       if(feature_measurement_level == 0){
+               featureCorrection = 1 - min(feature)
+               feature = feature + featureCorrection
+                       
+               if(label_measurement_level == feature_measurement_level){
+                 # categorical-categorical
+                 tests[feature_index2,1] = 1
+
+                 ret = bivar_cc(labels, feature)
+                  pVal = ret$pval
+                  contingencyTable = ret$contingencyTable
+                  rowMarginals = ret$rowMarginals
+                  colMarginals = ret$colMarginals
+
+                 stats[feature_index2,1] = pVal
+                       
+                 sz3 = nrow(contingencyTable)*ncol(contingencyTable)
+                       
+                 contingencyTableCounts = matrix(0, 1, sz3)
+                 contingencyTableLabelValues = matrix(0, 1, sz3)
+                 contingencyTableFeatureValues = matrix(0, 1, sz3)
+                       
+                 for(i4 in 1:nrow(contingencyTable)){
+                        for(j in 1:ncol(contingencyTable)){
+                                       #get rid of this, see *1 below
+                                       contingencyTableCounts[1, 
ncol(contingencyTable)*(i4-1)+j] = contingencyTable[i4,j]
+                                       
+                                       contingencyTableLabelValues[1, 
ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
+                                       contingencyTableFeatureValues[1, 
ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
+                               }
+                       }
+                       contingencyTablesCounts[feature_index2,1:sz3] = 
contingencyTableCounts
+            
+                       contingencyTablesLabelValues[feature_index2,1:sz3] = 
contingencyTableLabelValues
+                       contingencyTablesFeatureValues[feature_index2,1:sz3] = 
contingencyTableFeatureValues
+                       
+                       featureCounts[feature_index2,1:length(colMarginals)] = 
colMarginals
+                       for(i5 in 1:length(colMarginals)){
+                               featureValues[feature_index2,i5] = 
i5-featureCorrection
+                       }
+               }else{
+                       # label is scale, feature is categorical
+                       tests[feature_index2,1] = 2
+                       
+                       ret = bivar_sc(labels, feature)
+                  pVal = ret$pVal
+                  frequencies = ret$CFreqs
+                  means = ret$CMeans
+                  variances = ret$CVars
+
+                       stats[feature_index2,1] = pVal
+                       featureCounts[feature_index2,1:nrow(frequencies)] = 
t(frequencies)
+                       for(i6 in 1:nrow(frequencies)){
+                               featureValues[feature_index2,i6] = i6 - 
featureCorrection
+                       }
+                       featureMeans[feature_index2,1:nrow(means)] = t(means)
+                       featureSTDs[feature_index2,1:nrow(variances)] = 
t(sqrt(variances))
+               }
+       }else{
+               if(label_measurement_level == feature_measurement_level){
+                 # scale-scale
+                 tests[feature_index2,1] = 3
+
+                 ret = bivar_ss(labels, feature)
+                  r = ret$R
+                  covariance = ret$covXY
+                  stdX = ret$sigmaX
+                  stdY = ret$sigmaY
+ 
+                 stats[feature_index2,1] = r
+                 covariances[feature_index2,1] = covariance
+                 standard_deviations[feature_index2,1] = stdY
+               }else{
+                 # label is categorical, feature is scale
+                 tests[feature_index2,1] = 2
+                       
+                 ret = bivar_sc(feature, labels)
+                 pVal = ret$pVal
+                 frequencies = ret$CFreqs
+                  means = ret$CMeans
+                 variances = ret$CVars
+                       
+                 stats[feature_index2,1] = pVal
+                 featureMeans[feature_index2,1:nrow(means)] = t(means)
+                 featureSTDs[feature_index2,1:nrow(variances)] = 
t(sqrt(variances))
+               }
+       }
+}
+
+writeMM(as(stats, "CsparseMatrix"), paste(args[4], "stats", sep=""))
+writeMM(as(tests, "CsparseMatrix"), paste(args[4], "tests", sep=""))
+writeMM(as(covariances, "CsparseMatrix"), paste(args[4], "covariances", 
sep=""))
+writeMM(as(standard_deviations, "CsparseMatrix"), paste(args[4], 
"standard_deviations", sep=""))
+writeMM(as(contingencyTablesCounts, "CsparseMatrix"), paste(args[4], 
"contingencyTablesCounts", sep=""))
+writeMM(as(contingencyTablesLabelValues, "CsparseMatrix"), paste(args[4], 
"contingencyTablesLabelValues", sep=""))
+writeMM(as(contingencyTablesFeatureValues, "CsparseMatrix"), paste(args[4], 
"contingencyTablesFeatureValues", sep=""))
+writeMM(as(featureValues, "CsparseMatrix"), paste(args[4], "featureValues", 
sep=""))
+writeMM(as(featureCounts, "CsparseMatrix"), paste(args[4], "featureCounts", 
sep=""))
+writeMM(as(featureMeans, "CsparseMatrix"), paste(args[4], "featureMeans", 
sep=""))
+writeMM(as(featureSTDs, "CsparseMatrix"), paste(args[4], "featureSTDs", 
sep=""))
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/mdabivar/MDABivariateStats.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/mdabivar/MDABivariateStats.dml 
b/src/test/scripts/applications/mdabivar/MDABivariateStats.dml
index 1e04154..56163ad 100644
--- a/src/test/scripts/applications/mdabivar/MDABivariateStats.dml
+++ b/src/test/scripts/applications/mdabivar/MDABivariateStats.dml
@@ -1,268 +1,268 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Main starts here 
-----------------------------------------------------------------------------------------------------------
-
-# input data set
-D = read($1)
-
-# label attr id (must be a valid index > 0)  
-label_index = $2
-
-# feature attributes, column vector of indices
-feature_indices = read($3) 
-
-# can be either 1 (scale) or 0 (categorical)
-label_measurement_level = $4 
-
-# measurement levels for features, 0/1 column vector
-feature_measurement_levels = read($5) 
-
-sz = ncol(D)
-
-# store for pvalues and pearson's r
-stats = matrix(0, rows=sz, cols=1)
-# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
-tests = matrix(0, rows=sz, cols=1)
-# store for covariances used to compute pearson's r
-covariances = matrix(0, rows=sz, cols=1)
-# store for standard deviations used to compute pearson's r
-standard_deviations = matrix(0, rows=sz, cols=1)
-
-labels = D[,label_index]
-
-labelCorrection = 0
-if(label_measurement_level == 1){
-       numLabels = nrow(labels)
-    cmLabels = moment(labels,2)
-    stdLabels = sqrt(cmLabels * (numLabels/(numLabels-1.0)) )
-       standard_deviations[label_index,1] = stdLabels
-}else{
-       labelCorrection = 1 - min(labels)
-       labels = labels + labelCorrection
-}
-
-mx = colMaxs(D)
-mn = colMins(D)        
-num_distinct_values = mx-mn+1
-max_num_distinct_values = 0
-for(i1 in 1:nrow(feature_indices)){
-       feature_index1 = castAsScalar(feature_indices[i1,1])
-       num = castAsScalar(num_distinct_values[1,feature_index1])
-       if(castAsScalar(feature_measurement_levels[i1,1]) == 0 & num >= 
max_num_distinct_values){
-               max_num_distinct_values = num
-       }
-}
-distinct_label_values = matrix(0, rows=1, cols=1)      
-contingencyTableSz = 1
-maxNumberOfGroups = 1
-if(max_num_distinct_values != 0){
-       maxNumberOfGroups = max_num_distinct_values
-}
-if(label_measurement_level==0){
-       distinct_label_values = aggregate(target=labels, groups=labels, 
fn="count")
-       if(max_num_distinct_values != 0){
-               contingencyTableSz = 
max_num_distinct_values*nrow(distinct_label_values)
-       }
-       maxNumberOfGroups = max(maxNumberOfGroups, nrow(distinct_label_values))
-}
-# store for contingency table cell values
-contingencyTablesCounts = matrix(0, rows=sz, cols=contingencyTableSz)
-# store for contingency table label(row) assignments
-contingencyTablesLabelValues = matrix(0, rows=sz, cols=contingencyTableSz)
-# store for contingency table feature(col) assignments
-contingencyTablesFeatureValues = matrix(0, rows=sz, cols=contingencyTableSz)
-# store for distinct values
-featureValues = matrix(0, rows=sz, cols=maxNumberOfGroups)
-# store for counts of distinct values
-featureCounts = matrix(0, rows=sz, cols=maxNumberOfGroups)
-# store for group means
-featureMeans = matrix(0, rows=sz, cols=maxNumberOfGroups)
-# store for group standard deviations
-featureSTDs = matrix(0, rows=sz, cols=maxNumberOfGroups)
-
-if(label_measurement_level == 0){
-       featureCounts[label_index,1:nrow(distinct_label_values)] = 
t(distinct_label_values)
-       parfor(i2 in 1:nrow(distinct_label_values)){
-               featureValues[label_index,i2] = i2-labelCorrection
-       }
-}
-
-parfor(i3 in 1:nrow(feature_indices), check=0){
-       feature_index2 = castAsScalar(feature_indices[i3,1])
-       feature_measurement_level = 
castAsScalar(feature_measurement_levels[i3,1])
-       
-       feature = D[,feature_index2]
-       
-       if(feature_measurement_level == 0){
-               featureCorrection = 1 - min(feature)
-               feature = feature + featureCorrection
-                       
-               if(label_measurement_level == feature_measurement_level){
-                       # categorical-categorical
-                       tests[feature_index2,1] = 1
-                       [pVal, contingencyTable, rowMarginals, colMarginals] = 
bivar_cc(labels, feature)
-                       stats[feature_index2,1] = pVal
-                       
-                       sz3=1
-                       if(1==1){
-                               sz3 = 
nrow(contingencyTable)*ncol(contingencyTable)
-                       }
-                       contingencyTableLabelValues = matrix(0, rows=1, 
cols=sz3)
-                       contingencyTableFeatureValues = matrix(0, rows=1, 
cols=sz3)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Main starts here 
-----------------------------------------------------------------------------------------------------------
+
+# input data set
+D = read($1)
+
+# label attr id (must be a valid index > 0)  
+label_index = $2
+
+# feature attributes, column vector of indices
+feature_indices = read($3) 
+
+# can be either 1 (scale) or 0 (categorical)
+label_measurement_level = $4 
+
+# measurement levels for features, 0/1 column vector
+feature_measurement_levels = read($5) 
+
+sz = ncol(D)
+
+# store for pvalues and pearson's r
+stats = matrix(0, rows=sz, cols=1)
+# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
+tests = matrix(0, rows=sz, cols=1)
+# store for covariances used to compute pearson's r
+covariances = matrix(0, rows=sz, cols=1)
+# store for standard deviations used to compute pearson's r
+standard_deviations = matrix(0, rows=sz, cols=1)
+
+labels = D[,label_index]
+
+labelCorrection = 0
+if(label_measurement_level == 1){
+       numLabels = nrow(labels)
+    cmLabels = moment(labels,2)
+    stdLabels = sqrt(cmLabels * (numLabels/(numLabels-1.0)) )
+       standard_deviations[label_index,1] = stdLabels
+}else{
+       labelCorrection = 1 - min(labels)
+       labels = labels + labelCorrection
+}
+
+mx = colMaxs(D)
+mn = colMins(D)        
+num_distinct_values = mx-mn+1
+max_num_distinct_values = 0
+for(i1 in 1:nrow(feature_indices)){
+       feature_index1 = castAsScalar(feature_indices[i1,1])
+       num = castAsScalar(num_distinct_values[1,feature_index1])
+       if(castAsScalar(feature_measurement_levels[i1,1]) == 0 & num >= 
max_num_distinct_values){
+               max_num_distinct_values = num
+       }
+}
+distinct_label_values = matrix(0, rows=1, cols=1)      
+contingencyTableSz = 1
+maxNumberOfGroups = 1
+if(max_num_distinct_values != 0){
+       maxNumberOfGroups = max_num_distinct_values
+}
+if(label_measurement_level==0){
+       distinct_label_values = aggregate(target=labels, groups=labels, 
fn="count")
+       if(max_num_distinct_values != 0){
+               contingencyTableSz = 
max_num_distinct_values*nrow(distinct_label_values)
+       }
+       maxNumberOfGroups = max(maxNumberOfGroups, nrow(distinct_label_values))
+}
+# store for contingency table cell values
+contingencyTablesCounts = matrix(0, rows=sz, cols=contingencyTableSz)
+# store for contingency table label(row) assignments
+contingencyTablesLabelValues = matrix(0, rows=sz, cols=contingencyTableSz)
+# store for contingency table feature(col) assignments
+contingencyTablesFeatureValues = matrix(0, rows=sz, cols=contingencyTableSz)
+# store for distinct values
+featureValues = matrix(0, rows=sz, cols=maxNumberOfGroups)
+# store for counts of distinct values
+featureCounts = matrix(0, rows=sz, cols=maxNumberOfGroups)
+# store for group means
+featureMeans = matrix(0, rows=sz, cols=maxNumberOfGroups)
+# store for group standard deviations
+featureSTDs = matrix(0, rows=sz, cols=maxNumberOfGroups)
+
+if(label_measurement_level == 0){
+       featureCounts[label_index,1:nrow(distinct_label_values)] = 
t(distinct_label_values)
+       parfor(i2 in 1:nrow(distinct_label_values)){
+               featureValues[label_index,i2] = i2-labelCorrection
+       }
+}
+
+parfor(i3 in 1:nrow(feature_indices), check=0){
+       feature_index2 = castAsScalar(feature_indices[i3,1])
+       feature_measurement_level = 
castAsScalar(feature_measurement_levels[i3,1])
+       
+       feature = D[,feature_index2]
+       
+       if(feature_measurement_level == 0){
+               featureCorrection = 1 - min(feature)
+               feature = feature + featureCorrection
                        
-            parfor(i4 in 1:nrow(contingencyTable), check=0){
-                               parfor(j in 1:ncol(contingencyTable), check=0){
-                                       contingencyTableLabelValues[1, 
ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
-                                       contingencyTableFeatureValues[1, 
ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
-                               }
-                       }
+               if(label_measurement_level == feature_measurement_level){
+                       # categorical-categorical
+                       tests[feature_index2,1] = 1
+                       [pVal, contingencyTable, rowMarginals, colMarginals] = 
bivar_cc(labels, feature)
+                       stats[feature_index2,1] = pVal
+                       
+                       sz3=1
+                       if(1==1){
+                               sz3 = 
nrow(contingencyTable)*ncol(contingencyTable)
+                       }
+                       contingencyTableLabelValues = matrix(0, rows=1, 
cols=sz3)
+                       contingencyTableFeatureValues = matrix(0, rows=1, 
cols=sz3)
+                       
+            parfor(i4 in 1:nrow(contingencyTable), check=0){
+                               parfor(j in 1:ncol(contingencyTable), check=0){
+                                       contingencyTableLabelValues[1, 
ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
+                                       contingencyTableFeatureValues[1, 
ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
+                               }
+                       }
                        contingencyTableCounts = matrix(contingencyTable, 
rows=1, cols=sz3, byrow=TRUE)
-            contingencyTablesCounts[feature_index2,1:sz3] = 
contingencyTableCounts
-            
-                       contingencyTablesLabelValues[feature_index2,1:sz3] = 
contingencyTableLabelValues
-                       contingencyTablesFeatureValues[feature_index2,1:sz3] = 
contingencyTableFeatureValues
-                       
-                       featureCounts[feature_index2,1:ncol(colMarginals)] = 
colMarginals
-                       parfor(i5 in 1:ncol(colMarginals), check=0){
-                               featureValues[feature_index2,i5] = 
i5-featureCorrection
-                       }
-               }else{
-                       # label is scale, feature is categorical
-                       tests[feature_index2,1] = 2
-                       [pVal, frequencies, means, variances] = 
bivar_sc(labels, feature)
-                       stats[feature_index2,1] = pVal
-                       featureCounts[feature_index2,1:nrow(frequencies)] = 
t(frequencies)
-                       parfor(i6 in 1:nrow(frequencies), check=0){
-                               featureValues[feature_index2,i6] = i6 - 
featureCorrection
-                       }
-                       featureMeans[feature_index2,1:nrow(means)] = t(means)
-                       featureSTDs[feature_index2,1:nrow(variances)] = 
t(sqrt(variances))
-               }
-       }else{
-               if(label_measurement_level == feature_measurement_level){
-                       # scale-scale
-                       tests[feature_index2,1] = 3
-                       [r, covariance, stdX, stdY] = bivar_ss(labels, feature)
-                       stats[feature_index2,1] = r
-                       covariances[feature_index2,1] = covariance
-                       standard_deviations[feature_index2,1] = stdY
-               }else{
-                       # label is categorical, feature is scale
-                       tests[feature_index2,1] = 2
-                       [pVal, frequencies, means, variances] = 
bivar_sc(feature, labels)
-                       stats[feature_index2,1] = pVal
-                       featureMeans[feature_index2,1:nrow(means)] = t(means)
-                       featureSTDs[feature_index2,1:nrow(variances)] = 
t(sqrt(variances))
-               }
-       }
-}
-
-write(stats, $6, format="text")
-write(tests, $7, format="text")
-write(covariances, $8, format="text")
-write(standard_deviations, $9, format="text")
-write(contingencyTablesCounts, $10, format="text")
-write(contingencyTablesLabelValues, $11, format="text")
-write(contingencyTablesFeatureValues, $12, format="text")
-write(featureValues, $13, format="text")
-write(featureCounts, $14, format="text")
-write(featureMeans, $15, format="text")
-write(featureSTDs, $16, format="text")
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R, 
Double covXY, Double sigmaX, Double sigmaY) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y)
-
-    # compute standard deviations for both X and Y by computing 2^nd central 
moment
-    W = nrow(X)
-    m2X = moment(X,2)
-    m2Y = moment(Y,2)
-    sigmaX = sqrt(m2X * (W/(W-1.0)) )
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) )
-
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY)
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double pval, 
Matrix[Double] contingencyTable, Matrix[Double] rowMarginals, Matrix[Double] 
colMarginals) {
-
-    # Contingency Table
-    FF = table(A,B)
-
+            contingencyTablesCounts[feature_index2,1:sz3] = 
contingencyTableCounts
+            
+                       contingencyTablesLabelValues[feature_index2,1:sz3] = 
contingencyTableLabelValues
+                       contingencyTablesFeatureValues[feature_index2,1:sz3] = 
contingencyTableFeatureValues
+                       
+                       featureCounts[feature_index2,1:ncol(colMarginals)] = 
colMarginals
+                       parfor(i5 in 1:ncol(colMarginals), check=0){
+                               featureValues[feature_index2,i5] = 
i5-featureCorrection
+                       }
+               }else{
+                       # label is scale, feature is categorical
+                       tests[feature_index2,1] = 2
+                       [pVal, frequencies, means, variances] = 
bivar_sc(labels, feature)
+                       stats[feature_index2,1] = pVal
+                       featureCounts[feature_index2,1:nrow(frequencies)] = 
t(frequencies)
+                       parfor(i6 in 1:nrow(frequencies), check=0){
+                               featureValues[feature_index2,i6] = i6 - 
featureCorrection
+                       }
+                       featureMeans[feature_index2,1:nrow(means)] = t(means)
+                       featureSTDs[feature_index2,1:nrow(variances)] = 
t(sqrt(variances))
+               }
+       }else{
+               if(label_measurement_level == feature_measurement_level){
+                       # scale-scale
+                       tests[feature_index2,1] = 3
+                       [r, covariance, stdX, stdY] = bivar_ss(labels, feature)
+                       stats[feature_index2,1] = r
+                       covariances[feature_index2,1] = covariance
+                       standard_deviations[feature_index2,1] = stdY
+               }else{
+                       # label is categorical, feature is scale
+                       tests[feature_index2,1] = 2
+                       [pVal, frequencies, means, variances] = 
bivar_sc(feature, labels)
+                       stats[feature_index2,1] = pVal
+                       featureMeans[feature_index2,1:nrow(means)] = t(means)
+                       featureSTDs[feature_index2,1:nrow(variances)] = 
t(sqrt(variances))
+               }
+       }
+}
+
+write(stats, $6, format="text")
+write(tests, $7, format="text")
+write(covariances, $8, format="text")
+write(standard_deviations, $9, format="text")
+write(contingencyTablesCounts, $10, format="text")
+write(contingencyTablesLabelValues, $11, format="text")
+write(contingencyTablesFeatureValues, $12, format="text")
+write(featureValues, $13, format="text")
+write(featureCounts, $14, format="text")
+write(featureMeans, $15, format="text")
+write(featureSTDs, $16, format="text")
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R, 
Double covXY, Double sigmaX, Double sigmaY) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y)
+
+    # compute standard deviations for both X and Y by computing 2^nd central 
moment
+    W = nrow(X)
+    m2X = moment(X,2)
+    m2Y = moment(Y,2)
+    sigmaX = sqrt(m2X * (W/(W-1.0)) )
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) )
+
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY)
+}
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double pval, 
Matrix[Double] contingencyTable, Matrix[Double] rowMarginals, Matrix[Double] 
colMarginals) {
+
+    # Contingency Table
+    FF = table(A,B)
+
     tmp = removeEmpty(target=FF, margin="rows"); 
     F = removeEmpty(target=tmp, margin="cols");
 
-    # Chi-Squared
-    W = sum(F)
-    r = rowSums(F)
-    c = colSums(F)
-    E = (r %*% c)/W
-    E = ppred(E, 0, "==")*0.0001 + E
-    T = (F-E)^2/E
-    chi_squared = sum(T)
-
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1)
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE)
-
-
-    # Assign return values
-    pval = pValue
-    contingencyTable = F
-    rowMarginals = r
-    colMarginals = c
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double pVal, 
Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
-       # mean and variance in target variable
-    W = nrow(A)
-    my = mean(Y)
-    varY = moment(Y,2) * W/(W-1.0)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs1 = aggregate(target=Y, groups=A, fn="count")
+    # Chi-Squared
+    W = sum(F)
+    r = rowSums(F)
+    c = colSums(F)
+    E = (r %*% c)/W
+    E = ppred(E, 0, "==")*0.0001 + E
+    T = (F-E)^2/E
+    chi_squared = sum(T)
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1)
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE)
+
+
+    # Assign return values
+    pval = pValue
+    contingencyTable = F
+    rowMarginals = r
+    colMarginals = c
+}
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double pVal, 
Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) {
+       # mean and variance in target variable
+    W = nrow(A)
+    my = mean(Y)
+    varY = moment(Y,2) * W/(W-1.0)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs1 = aggregate(target=Y, groups=A, fn="count")
     present_domain_vals_mat = removeEmpty(target=diag(1-ppred(CFreqs1, 0, 
"==")), margin="rows")
     CFreqs = present_domain_vals_mat %*% CFreqs1
 
-    CMeans = present_domain_vals_mat %*% aggregate(target=Y, groups=A, 
fn="mean")
-    CVars = present_domain_vals_mat %*% aggregate(target=Y, groups=A, 
fn="variance")
-    
-    # number of categories
-    R = nrow(CFreqs)
-    df1 = R-1
-    df2 = W-R
-
-       anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1)
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
-    AnovaF = anova_num/anova_den
-    pVal = pf(target=AnovaF, df1=df1, df2=df2, lower.tail=FALSE)
-}
+    CMeans = present_domain_vals_mat %*% aggregate(target=Y, groups=A, 
fn="mean")
+    CVars = present_domain_vals_mat %*% aggregate(target=Y, groups=A, 
fn="variance")
+    
+    # number of categories
+    R = nrow(CFreqs)
+    df1 = R-1
+    df2 = W-R
+
+       anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1)
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
+    AnovaF = anova_num/anova_den
+    pVal = pf(target=AnovaF, df1=df1, df2=df2, lower.tail=FALSE)
+}

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml 
b/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml
index c899065..7fbc101 100644
--- a/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml
+++ b/src/test/scripts/applications/mdabivar/MDABivariateStats.pydml
@@ -1,246 +1,246 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Main starts here 
-----------------------------------------------------------------------------------------------------------
-
-# input data set
-D = load($1)
-
-# label attr id (must be a valid index > 0)  
-label_index = $2
-
-# feature attributes, column vector of indices
-feature_indices = load($3) 
-
-# can be either 1 (scale) or 0 (categorical)
-label_measurement_level = $4 
-
-# measurement levels for features, 0/1 column vector
-feature_measurement_levels = read($5) 
-
-sz = ncol(D)
-
-# store for pvalues and pearson's r
-stats = full(0, rows=sz, cols=1)
-# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
-tests = full(0, rows=sz, cols=1)
-# store for covariances used to compute pearson's r
-covariances = full(0, rows=sz, cols=1)
-# store for standard deviations used to compute pearson's r
-standard_deviations = full(0, rows=sz, cols=1)
-
-labels = D[,label_index]
-
-labelCorrection = 0
-if(label_measurement_level == 1):
-    numLabels = nrow(labels)
-    cmLabels = moment(labels,2)
-    stdLabels = sqrt(cmLabels * (numLabels/(numLabels-1.0)) )
-    standard_deviations[label_index,1] = stdLabels
-else:
-    labelCorrection = 1 - min(labels)
-    labels = labels + labelCorrection
-
-mx = colMaxs(D)
-mn = colMins(D)
-num_distinct_values = mx-mn+1
-max_num_distinct_values = 0
-for(i1 in 1:nrow(feature_indices)):
-    feature_index1 = castAsScalar(feature_indices[i1,1])
-    num = castAsScalar(num_distinct_values[1,feature_index1])
-    if(castAsScalar(feature_measurement_levels[i1,1]) == 0 & num >= 
max_num_distinct_values):
-        max_num_distinct_values = num
-distinct_label_values = full(0, rows=1, cols=1)
-contingencyTableSz = 1
-maxNumberOfGroups = 1
-if(max_num_distinct_values != 0):
-    maxNumberOfGroups = max_num_distinct_values
-if(label_measurement_level==0):
-    distinct_label_values = aggregate(target=labels, groups=labels, fn="count")
-    if(max_num_distinct_values != 0):
-        contingencyTableSz = 
max_num_distinct_values*nrow(distinct_label_values)
-    maxNumberOfGroups = max(maxNumberOfGroups, nrow(distinct_label_values))
-# store for contingency table cell values
-contingencyTablesCounts = full(0, rows=sz, cols=contingencyTableSz)
-# store for contingency table label(row) assignments
-contingencyTablesLabelValues = full(0, rows=sz, cols=contingencyTableSz)
-# store for contingency table feature(col) assignments
-contingencyTablesFeatureValues = full(0, rows=sz, cols=contingencyTableSz)
-# store for distinct values
-featureValues = full(0, rows=sz, cols=maxNumberOfGroups)
-# store for counts of distinct values
-featureCounts = full(0, rows=sz, cols=maxNumberOfGroups)
-# store for group means
-featureMeans = full(0, rows=sz, cols=maxNumberOfGroups)
-# store for group standard deviations
-featureSTDs = full(0, rows=sz, cols=maxNumberOfGroups)
-
-if(label_measurement_level == 0):
-    featureCounts[label_index,1:nrow(distinct_label_values)] = 
transpose(distinct_label_values)
-    parfor(i2 in 1:nrow(distinct_label_values)):
-        featureValues[label_index,i2] = i2-labelCorrection
-
-parfor(i3 in 1:nrow(feature_indices), check=0):
-    feature_index2 = castAsScalar(feature_indices[i3,1])
-    feature_measurement_level = castAsScalar(feature_measurement_levels[i3,1])
-    
-    feature = D[,feature_index2]
-    
-    if(feature_measurement_level == 0):
-        featureCorrection = 1 - min(feature)
-        feature = feature + featureCorrection
-        
-        if(label_measurement_level == feature_measurement_level):
-            # categorical-categorical
-            tests[feature_index2,1] = 1
-            [pVal, contingencyTable, rowMarginals, colMarginals] = 
bivar_cc(labels, feature)
-            stats[feature_index2,1] = pVal
-            
-            sz3=1
-            if(1==1):
-                sz3 = nrow(contingencyTable)*ncol(contingencyTable)
-            contingencyTableLabelValues = full(0, rows=1, cols=sz3)
-            contingencyTableFeatureValues = full(0, rows=1, cols=sz3)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Main starts here 
-----------------------------------------------------------------------------------------------------------
+
+# input data set
+D = load($1)
+
+# label attr id (must be a valid index > 0)  
+label_index = $2
+
+# feature attributes, column vector of indices
+feature_indices = load($3) 
+
+# can be either 1 (scale) or 0 (categorical)
+label_measurement_level = $4 
+
+# measurement levels for features, 0/1 column vector
+feature_measurement_levels = read($5) 
+
+sz = ncol(D)
+
+# store for pvalues and pearson's r
+stats = full(0, rows=sz, cols=1)
+# store for type of test performed: 1 is chi-sq, 2 is ftest, 3 is pearson's
+tests = full(0, rows=sz, cols=1)
+# store for covariances used to compute pearson's r
+covariances = full(0, rows=sz, cols=1)
+# store for standard deviations used to compute pearson's r
+standard_deviations = full(0, rows=sz, cols=1)
+
+labels = D[,label_index]
+
+labelCorrection = 0
+if(label_measurement_level == 1):
+    numLabels = nrow(labels)
+    cmLabels = moment(labels,2)
+    stdLabels = sqrt(cmLabels * (numLabels/(numLabels-1.0)) )
+    standard_deviations[label_index,1] = stdLabels
+else:
+    labelCorrection = 1 - min(labels)
+    labels = labels + labelCorrection
+
+mx = colMaxs(D)
+mn = colMins(D)
+num_distinct_values = mx-mn+1
+max_num_distinct_values = 0
+for(i1 in 1:nrow(feature_indices)):
+    feature_index1 = castAsScalar(feature_indices[i1,1])
+    num = castAsScalar(num_distinct_values[1,feature_index1])
+    if(castAsScalar(feature_measurement_levels[i1,1]) == 0 & num >= 
max_num_distinct_values):
+        max_num_distinct_values = num
+distinct_label_values = full(0, rows=1, cols=1)
+contingencyTableSz = 1
+maxNumberOfGroups = 1
+if(max_num_distinct_values != 0):
+    maxNumberOfGroups = max_num_distinct_values
+if(label_measurement_level==0):
+    distinct_label_values = aggregate(target=labels, groups=labels, fn="count")
+    if(max_num_distinct_values != 0):
+        contingencyTableSz = 
max_num_distinct_values*nrow(distinct_label_values)
+    maxNumberOfGroups = max(maxNumberOfGroups, nrow(distinct_label_values))
+# store for contingency table cell values
+contingencyTablesCounts = full(0, rows=sz, cols=contingencyTableSz)
+# store for contingency table label(row) assignments
+contingencyTablesLabelValues = full(0, rows=sz, cols=contingencyTableSz)
+# store for contingency table feature(col) assignments
+contingencyTablesFeatureValues = full(0, rows=sz, cols=contingencyTableSz)
+# store for distinct values
+featureValues = full(0, rows=sz, cols=maxNumberOfGroups)
+# store for counts of distinct values
+featureCounts = full(0, rows=sz, cols=maxNumberOfGroups)
+# store for group means
+featureMeans = full(0, rows=sz, cols=maxNumberOfGroups)
+# store for group standard deviations
+featureSTDs = full(0, rows=sz, cols=maxNumberOfGroups)
+
+if(label_measurement_level == 0):
+    featureCounts[label_index,1:nrow(distinct_label_values)] = 
transpose(distinct_label_values)
+    parfor(i2 in 1:nrow(distinct_label_values)):
+        featureValues[label_index,i2] = i2-labelCorrection
+
+parfor(i3 in 1:nrow(feature_indices), check=0):
+    feature_index2 = castAsScalar(feature_indices[i3,1])
+    feature_measurement_level = castAsScalar(feature_measurement_levels[i3,1])
+    
+    feature = D[,feature_index2]
+    
+    if(feature_measurement_level == 0):
+        featureCorrection = 1 - min(feature)
+        feature = feature + featureCorrection
+        
+        if(label_measurement_level == feature_measurement_level):
+            # categorical-categorical
+            tests[feature_index2,1] = 1
+            [pVal, contingencyTable, rowMarginals, colMarginals] = 
bivar_cc(labels, feature)
+            stats[feature_index2,1] = pVal
+            
+            sz3=1
+            if(1==1):
+                sz3 = nrow(contingencyTable)*ncol(contingencyTable)
+            contingencyTableLabelValues = full(0, rows=1, cols=sz3)
+            contingencyTableFeatureValues = full(0, rows=1, cols=sz3)
             
-            parfor(i4 in 1:nrow(contingencyTable), check=0):
-                parfor(j in 1:ncol(contingencyTable), check=0):
-                    contingencyTableLabelValues[1, 
ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
-                    contingencyTableFeatureValues[1, 
ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
+            parfor(i4 in 1:nrow(contingencyTable), check=0):
+                parfor(j in 1:ncol(contingencyTable), check=0):
+                    contingencyTableLabelValues[1, 
ncol(contingencyTable)*(i4-1)+j] = i4-labelCorrection
+                    contingencyTableFeatureValues[1, 
ncol(contingencyTable)*(i4-1)+j] = j-featureCorrection 
             contingencyTableCounts = contingencyTable.reshape(rows=1, cols=sz3)
-            contingencyTablesCounts[feature_index2,1:sz3] = 
contingencyTableCounts
-            
-            contingencyTablesLabelValues[feature_index2,1:sz3] = 
contingencyTableLabelValues
-            contingencyTablesFeatureValues[feature_index2,1:sz3] = 
contingencyTableFeatureValues
-            
-            featureCounts[feature_index2,1:ncol(colMarginals)] = colMarginals
-            parfor(i5 in 1:ncol(colMarginals), check=0):
-                featureValues[feature_index2,i5] = i5-featureCorrection
-        else:
-            # label is scale, feature is categorical
-            tests[feature_index2,1] = 2
-            [pVal, frequencies, means, variances] = bivar_sc(labels, feature)
-            stats[feature_index2,1] = pVal
-            featureCounts[feature_index2,1:nrow(frequencies)] = 
transpose(frequencies)
-            parfor(i6 in 1:nrow(frequencies), check=0):
-                featureValues[feature_index2,i6] = i6 - featureCorrection
-            featureMeans[feature_index2,1:nrow(means)] = transpose(means)
-            featureSTDs[feature_index2,1:nrow(variances)] = 
transpose(sqrt(variances))
-    else:
-        if(label_measurement_level == feature_measurement_level):
-            # scale-scale
-            tests[feature_index2,1] = 3
-            [r, covariance, stdX, stdY] = bivar_ss(labels, feature)
-            stats[feature_index2,1] = r
-            covariances[feature_index2,1] = covariance
-            standard_deviations[feature_index2,1] = stdY
-        else:
-            # label is categorical, feature is scale
-            tests[feature_index2,1] = 2
-            [pVal, frequencies, means, variances] = bivar_sc(feature, labels)
-            stats[feature_index2,1] = pVal
-            featureMeans[feature_index2,1:nrow(means)] = transpose(means)
-            featureSTDs[feature_index2,1:nrow(variances)] = 
transpose(sqrt(variances))
-    # end if(feature_measurement_level == 0)
-# end parfor(i3 in 1:nrow(feature_indices), check=0)
-
-save(stats, $6, format="text")
-save(tests, $7, format="text")
-save(covariances, $8, format="text")
-save(standard_deviations, $9, format="text")
-save(contingencyTablesCounts, $10, format="text")
-save(contingencyTablesLabelValues, $11, format="text")
-save(contingencyTablesFeatureValues, $12, format="text")
-save(featureValues, $13, format="text")
-save(featureCounts, $14, format="text")
-save(featureMeans, $15, format="text")
-save(featureSTDs, $16, format="text")
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-def bivar_ss(X:matrix[float], Y:matrix[float]) -> (R:float, covXY:float, 
sigmaX:float, sigmaY:float):
-    # Unweighted co-variance
-    covXY = cov(X,Y)
-    
-    # compute standard deviations for both X and Y by computing 2^nd central 
moment
-    W = nrow(X)
-    m2X = moment(X,2)
-    m2Y = moment(Y,2)
-    sigmaX = sqrt(m2X * (W/(W-1.0)) )
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) )
-    
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY)
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-def bivar_cc(A:matrix[float], B:matrix[float]) -> (pval:float, 
contingencyTable:matrix[float], rowMarginals:matrix[float], 
colMarginals:matrix[float]):
-    # Contingency Table
-    FF = table(A,B)
-    
-    tmp = removeEmpty(target=FF, axis=0)
-    F = removeEmpty(target=tmp, axis=1)
-    
-    # Chi-Squared
-    W = sum(F)
-    r = rowSums(F)
-    c = colSums(F)
-    E = (dot(r, c))/W
-    E = ppred(E, 0, "==")*0.0001 + E
-    T = (F-E)**2/E
-    chi_squared = sum(T)
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1)
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=False)
-    
-    # Assign return values
-    pval = pValue
-    contingencyTable = F
-    rowMarginals = r
-    colMarginals = c
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-def bivar_sc(Y:matrix[float], A:matrix[float]) -> (pVal:float, 
CFreqs:matrix[float], CMeans:matrix[float], CVars:matrix[float]):
-    # mean and variance in target variable
-    W = nrow(A)
-    my = mean(Y)
-    varY = moment(Y,2) * W/(W-1.0)
-    
-    # category-wise (frequencies, means, variances)
-    CFreqs1 = aggregate(target=Y, groups=A, fn="count")
-    present_domain_vals_mat = removeEmpty(target=diag(1-ppred(CFreqs1, 0, 
"==")), axis=0)
-    CFreqs = dot(present_domain_vals_mat, CFreqs1)
-    
-    CMeans = dot(present_domain_vals_mat, aggregate(target=Y, groups=A, 
fn="mean"))
-    CVars = dot(present_domain_vals_mat, aggregate(target=Y, groups=A, 
fn="variance"))
-    
-    # number of categories
-    R = nrow(CFreqs)
-    df1 = R-1
-    df2 = W-R
-    
-    anova_num = sum( (CFreqs*(CMeans-my)**2) )/(R-1)
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
-    AnovaF = anova_num/anova_den
-    pVal = pf(target=AnovaF, df1=df1, df2=df2, lower.tail=False)
-
+            contingencyTablesCounts[feature_index2,1:sz3] = 
contingencyTableCounts
+            
+            contingencyTablesLabelValues[feature_index2,1:sz3] = 
contingencyTableLabelValues
+            contingencyTablesFeatureValues[feature_index2,1:sz3] = 
contingencyTableFeatureValues
+            
+            featureCounts[feature_index2,1:ncol(colMarginals)] = colMarginals
+            parfor(i5 in 1:ncol(colMarginals), check=0):
+                featureValues[feature_index2,i5] = i5-featureCorrection
+        else:
+            # label is scale, feature is categorical
+            tests[feature_index2,1] = 2
+            [pVal, frequencies, means, variances] = bivar_sc(labels, feature)
+            stats[feature_index2,1] = pVal
+            featureCounts[feature_index2,1:nrow(frequencies)] = 
transpose(frequencies)
+            parfor(i6 in 1:nrow(frequencies), check=0):
+                featureValues[feature_index2,i6] = i6 - featureCorrection
+            featureMeans[feature_index2,1:nrow(means)] = transpose(means)
+            featureSTDs[feature_index2,1:nrow(variances)] = 
transpose(sqrt(variances))
+    else:
+        if(label_measurement_level == feature_measurement_level):
+            # scale-scale
+            tests[feature_index2,1] = 3
+            [r, covariance, stdX, stdY] = bivar_ss(labels, feature)
+            stats[feature_index2,1] = r
+            covariances[feature_index2,1] = covariance
+            standard_deviations[feature_index2,1] = stdY
+        else:
+            # label is categorical, feature is scale
+            tests[feature_index2,1] = 2
+            [pVal, frequencies, means, variances] = bivar_sc(feature, labels)
+            stats[feature_index2,1] = pVal
+            featureMeans[feature_index2,1:nrow(means)] = transpose(means)
+            featureSTDs[feature_index2,1:nrow(variances)] = 
transpose(sqrt(variances))
+    # end if(feature_measurement_level == 0)
+# end parfor(i3 in 1:nrow(feature_indices), check=0)
+
+save(stats, $6, format="text")
+save(tests, $7, format="text")
+save(covariances, $8, format="text")
+save(standard_deviations, $9, format="text")
+save(contingencyTablesCounts, $10, format="text")
+save(contingencyTablesLabelValues, $11, format="text")
+save(contingencyTablesFeatureValues, $12, format="text")
+save(featureValues, $13, format="text")
+save(featureCounts, $14, format="text")
+save(featureMeans, $15, format="text")
+save(featureSTDs, $16, format="text")
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+def bivar_ss(X:matrix[float], Y:matrix[float]) -> (R:float, covXY:float, 
sigmaX:float, sigmaY:float):
+    # Unweighted co-variance
+    covXY = cov(X,Y)
+    
+    # compute standard deviations for both X and Y by computing 2^nd central 
moment
+    W = nrow(X)
+    m2X = moment(X,2)
+    m2Y = moment(Y,2)
+    sigmaX = sqrt(m2X * (W/(W-1.0)) )
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) )
+    
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY)
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+def bivar_cc(A:matrix[float], B:matrix[float]) -> (pval:float, 
contingencyTable:matrix[float], rowMarginals:matrix[float], 
colMarginals:matrix[float]):
+    # Contingency Table
+    FF = table(A,B)
+    
+    tmp = removeEmpty(target=FF, axis=0)
+    F = removeEmpty(target=tmp, axis=1)
+    
+    # Chi-Squared
+    W = sum(F)
+    r = rowSums(F)
+    c = colSums(F)
+    E = (dot(r, c))/W
+    E = ppred(E, 0, "==")*0.0001 + E
+    T = (F-E)**2/E
+    chi_squared = sum(T)
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1)
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=False)
+    
+    # Assign return values
+    pval = pValue
+    contingencyTable = F
+    rowMarginals = r
+    colMarginals = c
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+def bivar_sc(Y:matrix[float], A:matrix[float]) -> (pVal:float, 
CFreqs:matrix[float], CMeans:matrix[float], CVars:matrix[float]):
+    # mean and variance in target variable
+    W = nrow(A)
+    my = mean(Y)
+    varY = moment(Y,2) * W/(W-1.0)
+    
+    # category-wise (frequencies, means, variances)
+    CFreqs1 = aggregate(target=Y, groups=A, fn="count")
+    present_domain_vals_mat = removeEmpty(target=diag(1-ppred(CFreqs1, 0, 
"==")), axis=0)
+    CFreqs = dot(present_domain_vals_mat, CFreqs1)
+    
+    CMeans = dot(present_domain_vals_mat, aggregate(target=Y, groups=A, 
fn="mean"))
+    CVars = dot(present_domain_vals_mat, aggregate(target=Y, groups=A, 
fn="variance"))
+    
+    # number of categories
+    R = nrow(CFreqs)
+    df1 = R-1
+    df2 = W-R
+    
+    anova_num = sum( (CFreqs*(CMeans-my)**2) )/(R-1)
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R)
+    AnovaF = anova_num/anova_den
+    pVal = pf(target=AnovaF, df1=df1, df2=df2, lower.tail=False)
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.R 
b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.R
index dc65b8a..a3ca47a 100644
--- a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.R
+++ b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.R
@@ -1,71 +1,71 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-D = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-C = as.matrix(readMM(paste(args[1], "Y.mtx", sep="")))
-
-# reading input args
-numClasses = as.integer(args[2]);
-laplace_correction = as.double(args[3]);
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-
-# Compute the feature counts for each class
-classFeatureCounts = matrix(0, numClasses, numFeatures)
-for (i in 1:numFeatures) {
-  Col = D[,i]
-  classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), 
FUN=sum)[,2];
-}
-
-# Compute the total feature count for each class 
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
-
-# Compute class conditional probabilities
-ones = matrix(1, 1, numFeatures)
-repClassSums = classSums %*% ones;
-class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
-
-# Compute class priors
-class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
-class_prior = class_counts / numRows;
-
-# Compute accuracy on training set
-ones = matrix(1, numRows, 1)
-D_w_ones = cbind(D, ones)
-model = cbind(class_conditionals, class_prior)
-log_probs = D_w_ones %*% t(log(model))
-pred = max.col(log_probs,ties.method="last");
-acc = sum(pred == C) / numRows * 100
-
-print(paste("Training Accuracy (%): ", acc, sep=""))
-
-# write out the model
-writeMM(as(class_prior, "CsparseMatrix"), paste(args[4], "prior", sep=""));
-writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[4], 
"conditionals", sep=""));
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+D = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+C = as.matrix(readMM(paste(args[1], "Y.mtx", sep="")))
+
+# reading input args
+numClasses = as.integer(args[2]);
+laplace_correction = as.double(args[3]);
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+
+# Compute the feature counts for each class
+classFeatureCounts = matrix(0, numClasses, numFeatures)
+for (i in 1:numFeatures) {
+  Col = D[,i]
+  classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), 
FUN=sum)[,2];
+}
+
+# Compute the total feature count for each class 
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
+
+# Compute class conditional probabilities
+ones = matrix(1, 1, numFeatures)
+repClassSums = classSums %*% ones;
+class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
+
+# Compute class priors
+class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
+class_prior = class_counts / numRows;
+
+# Compute accuracy on training set
+ones = matrix(1, numRows, 1)
+D_w_ones = cbind(D, ones)
+model = cbind(class_conditionals, class_prior)
+log_probs = D_w_ones %*% t(log(model))
+pred = max.col(log_probs,ties.method="last");
+acc = sum(pred == C) / numRows * 100
+
+print(paste("Training Accuracy (%): ", acc, sep=""))
+
+# write out the model
+writeMM(as(class_prior, "CsparseMatrix"), paste(args[4], "prior", sep=""));
+writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[4], 
"conditionals", sep=""));


Reply via email to