[19/51] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

dusenberrymw Fri, 22 Jan 2016 08:34:44 -0800

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.dml 
b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.dml
index d0ce8d1..77df03b 100644
--- a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.dml
+++ b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.dml
@@ -1,78 +1,78 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multinomial naive Bayes classifier with Laplace correction
-#
-# Example Usage:
-# hadoop jar SystemML.jar -f naive-bayes.dml -nvargs X=<Data> Y=<labels> 
classes=<Num Classes> laplace=<Laplace Correction> prior=<Model file1> 
conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
-#
-
-# defaults
-# $laplace = 1
-fmt = ifdef($fmt, "text")
-
-# reading input args
-numClasses = $classes
-D = read($X)
-C = read($Y)
-laplace_correction = ifdef($laplace, 1)
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-
-# Compute the feature counts for each class
-classFeatureCounts = matrix(0, rows=numClasses, cols=numFeatures)
-parfor (i in 1:numFeatures) {
-  Col = D[,i]
-  classFeatureCounts[,i] = aggregate(target=Col, groups=C, fn="sum", 
ngroups=as.integer(numClasses))
-}
-
-# Compute the total feature count for each class 
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
-
-# Compute class conditional probabilities
-ones = matrix(1, rows=1, cols=numFeatures)
-repClassSums = classSums %*% ones
-class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums
-
-# Compute class priors
-class_counts = aggregate(target=C, groups=C, fn="count", 
ngroups=as.integer(numClasses))
-class_prior = class_counts / numRows;
-
-# Compute accuracy on training set
-ones = matrix(1, rows=numRows, cols=1)
-D_w_ones = append(D, ones)
-model = append(class_conditionals, class_prior)
-log_probs = D_w_ones %*% t(log(model))
-pred = rowIndexMax(log_probs)
-acc = sum(ppred(pred, C, "==")) / numRows * 100
-
-acc_str = "Training Accuracy (%): " + acc
-print(acc_str)
-write(acc_str, $accuracy)
-
-# write out the model
-write(class_prior, $prior, format=fmt);
-write(class_conditionals, $conditionals, format=fmt);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multinomial naive Bayes classifier with Laplace correction
+#
+# Example Usage:
+# hadoop jar SystemML.jar -f naive-bayes.dml -nvargs X=<Data> Y=<labels> 
classes=<Num Classes> laplace=<Laplace Correction> prior=<Model file1> 
conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
+#
+
+# defaults
+# $laplace = 1
+fmt = ifdef($fmt, "text")
+
+# reading input args
+numClasses = $classes
+D = read($X)
+C = read($Y)
+laplace_correction = ifdef($laplace, 1)
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+
+# Compute the feature counts for each class
+classFeatureCounts = matrix(0, rows=numClasses, cols=numFeatures)
+parfor (i in 1:numFeatures) {
+  Col = D[,i]
+  classFeatureCounts[,i] = aggregate(target=Col, groups=C, fn="sum", 
ngroups=as.integer(numClasses))
+}
+
+# Compute the total feature count for each class 
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
+
+# Compute class conditional probabilities
+ones = matrix(1, rows=1, cols=numFeatures)
+repClassSums = classSums %*% ones
+class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums
+
+# Compute class priors
+class_counts = aggregate(target=C, groups=C, fn="count", 
ngroups=as.integer(numClasses))
+class_prior = class_counts / numRows;
+
+# Compute accuracy on training set
+ones = matrix(1, rows=numRows, cols=1)
+D_w_ones = append(D, ones)
+model = append(class_conditionals, class_prior)
+log_probs = D_w_ones %*% t(log(model))
+pred = rowIndexMax(log_probs)
+acc = sum(ppred(pred, C, "==")) / numRows * 100
+
+acc_str = "Training Accuracy (%): " + acc
+print(acc_str)
+write(acc_str, $accuracy)
+
+# write out the model
+write(class_prior, $prior, format=fmt);
+write(class_conditionals, $conditionals, format=fmt);


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.pydml 
b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.pydml
index 5d84951..462b330 100644
--- a/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.pydml
+++ b/src/test/scripts/applications/naive-bayes-parfor/naive-bayes.pydml
@@ -1,79 +1,79 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multinomial naive Bayes classifier with Laplace correction
-#
-# Example Usage:
-# hadoop jar SystemML.jar -f naive-bayes.pydml -python -nvargs X=<Data> 
Y=<labels> classes=<Num Classes> laplace=<Laplace Correction> prior=<Model 
file1> conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
-#
-
-# defaults
-# $laplace = 1
-fmt = ifdef($fmt, "text")
-
-# reading input args
-numClasses = $classes
-D = load($X)
-C = load($Y)
-laplace_correction = ifdef($laplace, 1)
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-
-# Compute the feature counts for each class
-classFeatureCounts = full(0, rows=numClasses, cols=numFeatures)
-parfor (i in 1:numFeatures):
-    Col = D[,i]
-    classFeatureCounts[,i] = aggregate(target=Col, groups=C, fn="sum", 
ngroups=numClasses)
-
-# Compute the total feature count for each class 
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
-
-# Compute class conditional probabilities
-ones = full(1, rows=1, cols=numFeatures)
-repClassSums = dot(classSums, ones)
-class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums
-
-# Compute class priors
-class_counts = aggregate(target=C, groups=C, fn="count", ngroups=numClasses)
-class_prior = class_counts / numRows
-
-# Compute accuracy on training set
-ones = full(1, rows=numRows, cols=1)
-D_w_ones = append(D, ones)
-model = append(class_conditionals, class_prior)
-log_model = log(model)
-transpose_log_model = log_model.transpose()
-log_probs = dot(D_w_ones, transpose_log_model)
-pred = rowIndexMax(log_probs)
-acc = sum(ppred(pred, C, "==")) / numRows * 100
-
-acc_str = "Training Accuracy (%): " + acc
-print(acc_str)
-save(acc_str, $accuracy)
-
-# write out the model
-save(class_prior, $prior, format=fmt)
-save(class_conditionals, $conditionals, format=fmt)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multinomial naive Bayes classifier with Laplace correction
+#
+# Example Usage:
+# hadoop jar SystemML.jar -f naive-bayes.pydml -python -nvargs X=<Data> 
Y=<labels> classes=<Num Classes> laplace=<Laplace Correction> prior=<Model 
file1> conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
+#
+
+# defaults
+# $laplace = 1
+fmt = ifdef($fmt, "text")
+
+# reading input args
+numClasses = $classes
+D = load($X)
+C = load($Y)
+laplace_correction = ifdef($laplace, 1)
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+
+# Compute the feature counts for each class
+classFeatureCounts = full(0, rows=numClasses, cols=numFeatures)
+parfor (i in 1:numFeatures):
+    Col = D[,i]
+    classFeatureCounts[,i] = aggregate(target=Col, groups=C, fn="sum", 
ngroups=numClasses)
+
+# Compute the total feature count for each class 
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
+
+# Compute class conditional probabilities
+ones = full(1, rows=1, cols=numFeatures)
+repClassSums = dot(classSums, ones)
+class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums
+
+# Compute class priors
+class_counts = aggregate(target=C, groups=C, fn="count", ngroups=numClasses)
+class_prior = class_counts / numRows
+
+# Compute accuracy on training set
+ones = full(1, rows=numRows, cols=1)
+D_w_ones = append(D, ones)
+model = append(class_conditionals, class_prior)
+log_model = log(model)
+transpose_log_model = log_model.transpose()
+log_probs = dot(D_w_ones, transpose_log_model)
+pred = rowIndexMax(log_probs)
+acc = sum(ppred(pred, C, "==")) / numRows * 100
+
+acc_str = "Training Accuracy (%): " + acc
+print(acc_str)
+save(acc_str, $accuracy)
+
+# write out the model
+save(class_prior, $prior, format=fmt)
+save(class_conditionals, $conditionals, format=fmt)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/naive-bayes/naive-bayes.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/naive-bayes/naive-bayes.R 
b/src/test/scripts/applications/naive-bayes/naive-bayes.R
index dc65b8a..a3ca47a 100644
--- a/src/test/scripts/applications/naive-bayes/naive-bayes.R
+++ b/src/test/scripts/applications/naive-bayes/naive-bayes.R
@@ -1,71 +1,71 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-
-library("Matrix")
-
-D = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
-C = as.matrix(readMM(paste(args[1], "Y.mtx", sep="")))
-
-# reading input args
-numClasses = as.integer(args[2]);
-laplace_correction = as.double(args[3]);
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-
-# Compute the feature counts for each class
-classFeatureCounts = matrix(0, numClasses, numFeatures)
-for (i in 1:numFeatures) {
-  Col = D[,i]
-  classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), 
FUN=sum)[,2];
-}
-
-# Compute the total feature count for each class 
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
-
-# Compute class conditional probabilities
-ones = matrix(1, 1, numFeatures)
-repClassSums = classSums %*% ones;
-class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
-
-# Compute class priors
-class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
-class_prior = class_counts / numRows;
-
-# Compute accuracy on training set
-ones = matrix(1, numRows, 1)
-D_w_ones = cbind(D, ones)
-model = cbind(class_conditionals, class_prior)
-log_probs = D_w_ones %*% t(log(model))
-pred = max.col(log_probs,ties.method="last");
-acc = sum(pred == C) / numRows * 100
-
-print(paste("Training Accuracy (%): ", acc, sep=""))
-
-# write out the model
-writeMM(as(class_prior, "CsparseMatrix"), paste(args[4], "prior", sep=""));
-writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[4], 
"conditionals", sep=""));
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+
+library("Matrix")
+
+D = as.matrix(readMM(paste(args[1], "X.mtx", sep="")))
+C = as.matrix(readMM(paste(args[1], "Y.mtx", sep="")))
+
+# reading input args
+numClasses = as.integer(args[2]);
+laplace_correction = as.double(args[3]);
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+
+# Compute the feature counts for each class
+classFeatureCounts = matrix(0, numClasses, numFeatures)
+for (i in 1:numFeatures) {
+  Col = D[,i]
+  classFeatureCounts[,i] = aggregate(as.vector(Col), by=list(as.vector(C)), 
FUN=sum)[,2];
+}
+
+# Compute the total feature count for each class 
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplace_correction
+
+# Compute class conditional probabilities
+ones = matrix(1, 1, numFeatures)
+repClassSums = classSums %*% ones;
+class_conditionals = (classFeatureCounts + laplace_correction) / repClassSums;
+
+# Compute class priors
+class_counts = aggregate(as.vector(C), by=list(as.vector(C)), FUN=length)[,2]
+class_prior = class_counts / numRows;
+
+# Compute accuracy on training set
+ones = matrix(1, numRows, 1)
+D_w_ones = cbind(D, ones)
+model = cbind(class_conditionals, class_prior)
+log_probs = D_w_ones %*% t(log(model))
+pred = max.col(log_probs,ties.method="last");
+acc = sum(pred == C) / numRows * 100
+
+print(paste("Training Accuracy (%): ", acc, sep=""))
+
+# write out the model
+writeMM(as(class_prior, "CsparseMatrix"), paste(args[4], "prior", sep=""));
+writeMM(as(class_conditionals, "CsparseMatrix"), paste(args[4], 
"conditionals", sep=""));

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/naive-bayes/naive-bayes.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/naive-bayes/naive-bayes.dml 
b/src/test/scripts/applications/naive-bayes/naive-bayes.dml
index 1fe1bf4..a81edea 100644
--- a/src/test/scripts/applications/naive-bayes/naive-bayes.dml
+++ b/src/test/scripts/applications/naive-bayes/naive-bayes.dml
@@ -1,67 +1,67 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multinomial naive Bayes classifier with Laplace correction
-#
-# Example Usage:
-# hadoop jar SystemML.jar -f naive-bayes.dml -nvargs X=<Data> Y=<labels> 
classes=<Num Classes> laplace=<Laplace Correction> prior=<Model file1> 
conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
-#
-
-# defaults
-# $laplace = 1
-fmt = ifdef($fmt, "text")
-
-# reading input args
-numClasses = $classes
-D = read($X)
-C = read($Y)
-laplaceCorrection = ifdef($laplace, 1)
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-# Compute the feature counts for each class
-classFeatureCounts = aggregate(target=D, groups=C, fn="sum", 
ngroups=as.integer(numClasses));
-
-# Compute the total feature count for each class 
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplaceCorrection
-
-# Compute class conditional probabilities
-classConditionals = (classFeatureCounts + laplaceCorrection) / classSums
-
-# Compute class priors
-classCounts = aggregate(target=C, groups=C, fn="count", 
ngroups=as.integer(numClasses))
-classPrior = classCounts / numRows;
-
-# Compute accuracy on training set
-logProbs = D %*% t(log(classConditionals)) + t(log(classPrior));
-acc = sum(rowIndexMax(logProbs) == C) / numRows * 100
-
-acc_str = "Training Accuracy (%): " + acc
-print(acc_str)
-write(acc_str, $accuracy)
-
-# write out the model
-write(classPrior, $prior, format=fmt);
-write(classConditionals, $conditionals, format=fmt);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multinomial naive Bayes classifier with Laplace correction
+#
+# Example Usage:
+# hadoop jar SystemML.jar -f naive-bayes.dml -nvargs X=<Data> Y=<labels> 
classes=<Num Classes> laplace=<Laplace Correction> prior=<Model file1> 
conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
+#
+
+# defaults
+# $laplace = 1
+fmt = ifdef($fmt, "text")
+
+# reading input args
+numClasses = $classes
+D = read($X)
+C = read($Y)
+laplaceCorrection = ifdef($laplace, 1)
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+# Compute the feature counts for each class
+classFeatureCounts = aggregate(target=D, groups=C, fn="sum", 
ngroups=as.integer(numClasses));
+
+# Compute the total feature count for each class 
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplaceCorrection
+
+# Compute class conditional probabilities
+classConditionals = (classFeatureCounts + laplaceCorrection) / classSums
+
+# Compute class priors
+classCounts = aggregate(target=C, groups=C, fn="count", 
ngroups=as.integer(numClasses))
+classPrior = classCounts / numRows;
+
+# Compute accuracy on training set
+logProbs = D %*% t(log(classConditionals)) + t(log(classPrior));
+acc = sum(rowIndexMax(logProbs) == C) / numRows * 100
+
+acc_str = "Training Accuracy (%): " + acc
+print(acc_str)
+write(acc_str, $accuracy)
+
+# write out the model
+write(classPrior, $prior, format=fmt);
+write(classConditionals, $conditionals, format=fmt);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/naive-bayes/naive-bayes.pydml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/naive-bayes/naive-bayes.pydml 
b/src/test/scripts/applications/naive-bayes/naive-bayes.pydml
index 25fbf84..480265d 100644
--- a/src/test/scripts/applications/naive-bayes/naive-bayes.pydml
+++ b/src/test/scripts/applications/naive-bayes/naive-bayes.pydml
@@ -1,71 +1,71 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# Implements multinomial naive Bayes classifier with Laplace correction
-#
-# Example Usage:
-# hadoop jar SystemML.jar -f naive-bayes.pydml -python -nvargs X=<Data> 
Y=<labels> classes=<Num Classes> laplace=<Laplace Correction> prior=<Model 
file1> conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
-#
-
-# defaults
-# $laplace = 1
-fmt = ifdef($fmt, "text")
-
-# reading input args
-numClasses = $classes
-D = load($X)
-C = load($Y)
-laplaceCorrection = ifdef($laplace, 1)
-
-numRows = nrow(D)
-numFeatures = ncol(D)
-
-# Compute conditionals
-# Compute the feature counts for each class
-classFeatureCounts = aggregate(target=D, groups=C, fn="sum", 
ngroups=numClasses);
-
-# Compute the total feature count for each class 
-# and add the number of features to this sum
-# for subsequent regularization (Laplace's rule)
-classSums = rowSums(classFeatureCounts) + numFeatures*laplaceCorrection
-
-# Compute class conditional probabilities
-classConditionals = (classFeatureCounts + laplaceCorrection) / classSums
-
-# Compute class priors
-classCounts = aggregate(target=C, groups=C, fn="count", ngroups=numClasses)
-classPrior = classCounts / numRows
-
-# Compute accuracy on training set
-lmodel1 = log(classConditionals)
-lmodel2 = log(classPrior)
-tlmodel1 = lmodel1.transpose()
-tlmodel2 = lmodel2.transpose()
-logProbs = dot(D, tlmodel1) + tlmodel2
-acc = sum(rowIndexMax(logProbs) == C) / numRows * 100
-
-acc_str = "Training Accuracy (%): " + acc
-print(acc_str)
-save(acc_str, $accuracy)
-
-# write out the model
-save(classPrior, $prior, format=fmt)
-save(classConditionals, $conditionals, format=fmt)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# Implements multinomial naive Bayes classifier with Laplace correction
+#
+# Example Usage:
+# hadoop jar SystemML.jar -f naive-bayes.pydml -python -nvargs X=<Data> 
Y=<labels> classes=<Num Classes> laplace=<Laplace Correction> prior=<Model 
file1> conditionals=<Model file2> accuracy=<accuracy file> fmt="text"
+#
+
+# defaults
+# $laplace = 1
+fmt = ifdef($fmt, "text")
+
+# reading input args
+numClasses = $classes
+D = load($X)
+C = load($Y)
+laplaceCorrection = ifdef($laplace, 1)
+
+numRows = nrow(D)
+numFeatures = ncol(D)
+
+# Compute conditionals
+# Compute the feature counts for each class
+classFeatureCounts = aggregate(target=D, groups=C, fn="sum", 
ngroups=numClasses);
+
+# Compute the total feature count for each class 
+# and add the number of features to this sum
+# for subsequent regularization (Laplace's rule)
+classSums = rowSums(classFeatureCounts) + numFeatures*laplaceCorrection
+
+# Compute class conditional probabilities
+classConditionals = (classFeatureCounts + laplaceCorrection) / classSums
+
+# Compute class priors
+classCounts = aggregate(target=C, groups=C, fn="count", ngroups=numClasses)
+classPrior = classCounts / numRows
+
+# Compute accuracy on training set
+lmodel1 = log(classConditionals)
+lmodel2 = log(classPrior)
+tlmodel1 = lmodel1.transpose()
+tlmodel2 = lmodel2.transpose()
+logProbs = dot(D, tlmodel1) + tlmodel2
+acc = sum(rowIndexMax(logProbs) == C) / numRows * 100
+
+acc_str = "Training Accuracy (%): " + acc
+print(acc_str)
+save(acc_str, $accuracy)
+
+# write out the model
+save(classPrior, $prior, format=fmt)
+save(classConditionals, $conditionals, format=fmt)

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_bivariate.R
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_bivariate.R 
b/src/test/scripts/applications/parfor/parfor_bivariate.R
index 889b22b..82a5be5 100644
--- a/src/test/scripts/applications/parfor/parfor_bivariate.R
+++ b/src/test/scripts/applications/parfor/parfor_bivariate.R
@@ -1,155 +1,155 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-args <- commandArgs(TRUE)
-options(digits=22)
-
-library("Matrix")
-
-
-D1 <- readMM(paste(args[1], "D.mtx", sep=""))
-S11 <- readMM(paste(args[1], "S1.mtx", sep=""))
-S21 <- readMM(paste(args[1], "S2.mtx", sep=""))
-K11 <- readMM(paste(args[1], "K1.mtx", sep=""))
-K21 <- readMM(paste(args[1], "K2.mtx", sep=""))
-D <- as.matrix(D1);
-S1 <- as.matrix(S11);
-S2 <- as.matrix(S21);
-K1 <- as.matrix(K11);
-K2 <- as.matrix(K21);
-
-numPairs <- ncol(S1) * ncol(S2); # number of attribute pairs (|S1|*|S2|)
-maxC <- args[2]; # max number of categories in any categorical attribute
-
-s1size <- ncol(S1);
-s2size <- ncol(S2);
-
-# R, chisq, cramers, spearman, eta, anovaf
-numstats <- 8;
-basestats <- array(0,dim=c(numstats,numPairs)); 
-cat_counts <- array(0,dim=c(maxC,numPairs)); 
-cat_means <- array(0,dim=c(maxC,numPairs));
-cat_vars <- array(0,dim=c(maxC,numPairs));
-
-
-for( i in 1:s1size ) { 
-    a1 <- S1[,i];
-    k1 <- K1[1,i];
-    A1 <- as.matrix(D[,a1]);
-
-    for( j in 1:s2size ) {
-        pairID <-(i-1)*s2size+j;
-        a2 <- S2[,j];
-        k2 <- K2[1,j];
-        A2 <- as.matrix(D[,a2]);
-    
-        if (k1 == k2) {
-            if (k1 == 1) {   
-                # scale-scale
-                print("scale-scale");
-                basestats[1,pairID] <- cor(D[,a1], D[,a2]);
-                #basestats[1,pairID] <- cor(A1, A2);
-                
-                print(basestats[1,pairID]);
-            } else {
-                # nominal-nominal or ordinal-ordinal
-                print("categorical-categorical");
-                F <- table(A1,A2);
-                cst <- chisq.test(F);
-                chi_squared <- as.numeric(cst[1]);
-                degFreedom <- (nrow(F)-1)*(ncol(F)-1);
-                pValue <- as.numeric(cst[3]);
-                q <- min(dim(F));
-                W <- sum(F);
-                cramers_v <- sqrt(chi_squared/(W*(q-1)));
-
-                basestats[2,pairID] <- chi_squared;
-                basestats[3,pairID] <- degFreedom;
-                basestats[4,pairID] <- pValue;
-                basestats[5,pairID] <- cramers_v;
-
-                if ( k1 == 3 ) {
-                    # ordinal-ordinal   
-                    print("ordinal-ordinal");
-                    basestats[6,pairID] <- cor(A1,A2, method="spearman");
-                }
-            }
-        } 
-        else {       
-            if (k1 == 1 || k2 == 1) {    
-                # Scale-nominal/ordinal
-                print("scale-categorical");
-                if ( k1 == 1 ) {
-                    Av <- as.matrix(A2); 
-                    Yv <- as.matrix(A1); 
-                }
-                else {
-                    Av <- as.matrix(A1); 
-                    Yv <- as.matrix(A2); 
-                }
-                
-                W <- nrow(Av);
-                my <- mean(Yv); 
-                varY <- var(Yv);
-                
-                CFreqs <- as.matrix(table(Av)); 
-                CMeans <- as.matrix(aggregate(Yv, by=list(Av), "mean")$V1);
-                CVars <- as.matrix(aggregate(Yv, by=list(Av), "var")$V1);
-                R <- nrow(CFreqs);
-              
-                Eta <- sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-                anova_num <- sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-                anova_den <- sum( (CFreqs-1)*CVars )/(W-R);
-                ANOVAF <- anova_num/anova_den;
-
-                basestats[7,pairID] <- Eta;
-                basestats[8,pairID] <- ANOVAF;
-
-                cat_counts[ 1:length(CFreqs),pairID] <- CFreqs;
-                cat_means[ 1:length(CMeans),pairID] <- CMeans;
-                cat_vars[ 1:length(CVars),pairID] <- CVars;
-            }
-            else {
-                # nominal-ordinal or ordinal-nominal    
-                print("nomial-ordinal"); #TODO should not be same code         
   
-                F <- table(A1,A2);
-                cst <- chisq.test(F);
-                chi_squared <- as.numeric(cst[1]);
-                degFreedom <- (nrow(F)-1)*(ncol(F)-1);
-                pValue <- as.numeric(cst[3]);
-                q <- min(dim(F));
-                W <- sum(F);
-                cramers_v <- sqrt(chi_squared/(W*(q-1)));
-                
-                basestats[2,pairID] <- chi_squared;
-                basestats[3,pairID] <- degFreedom;
-                basestats[4,pairID] <- pValue;
-                basestats[5,pairID] <- cramers_v;
-            }
-        }
-    }
-}
-
-writeMM(as(basestats, "CsparseMatrix"), paste(args[3], "bivar.stats", sep=""));
-writeMM(as(cat_counts, "CsparseMatrix"), paste(args[3], "category.counts", 
sep=""));
-writeMM(as(cat_means, "CsparseMatrix"), paste(args[3], "category.means", 
sep=""));
-writeMM(as(cat_vars, "CsparseMatrix"), paste(args[3], "category.variances", 
sep=""));
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+args <- commandArgs(TRUE)
+options(digits=22)
+
+library("Matrix")
+
+
+D1 <- readMM(paste(args[1], "D.mtx", sep=""))
+S11 <- readMM(paste(args[1], "S1.mtx", sep=""))
+S21 <- readMM(paste(args[1], "S2.mtx", sep=""))
+K11 <- readMM(paste(args[1], "K1.mtx", sep=""))
+K21 <- readMM(paste(args[1], "K2.mtx", sep=""))
+D <- as.matrix(D1);
+S1 <- as.matrix(S11);
+S2 <- as.matrix(S21);
+K1 <- as.matrix(K11);
+K2 <- as.matrix(K21);
+
+numPairs <- ncol(S1) * ncol(S2); # number of attribute pairs (|S1|*|S2|)
+maxC <- args[2]; # max number of categories in any categorical attribute
+
+s1size <- ncol(S1);
+s2size <- ncol(S2);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats <- 8;
+basestats <- array(0,dim=c(numstats,numPairs)); 
+cat_counts <- array(0,dim=c(maxC,numPairs)); 
+cat_means <- array(0,dim=c(maxC,numPairs));
+cat_vars <- array(0,dim=c(maxC,numPairs));
+
+
+for( i in 1:s1size ) { 
+    a1 <- S1[,i];
+    k1 <- K1[1,i];
+    A1 <- as.matrix(D[,a1]);
+
+    for( j in 1:s2size ) {
+        pairID <-(i-1)*s2size+j;
+        a2 <- S2[,j];
+        k2 <- K2[1,j];
+        A2 <- as.matrix(D[,a2]);
+    
+        if (k1 == k2) {
+            if (k1 == 1) {   
+                # scale-scale
+                print("scale-scale");
+                basestats[1,pairID] <- cor(D[,a1], D[,a2]);
+                #basestats[1,pairID] <- cor(A1, A2);
+                
+                print(basestats[1,pairID]);
+            } else {
+                # nominal-nominal or ordinal-ordinal
+                print("categorical-categorical");
+                F <- table(A1,A2);
+                cst <- chisq.test(F);
+                chi_squared <- as.numeric(cst[1]);
+                degFreedom <- (nrow(F)-1)*(ncol(F)-1);
+                pValue <- as.numeric(cst[3]);
+                q <- min(dim(F));
+                W <- sum(F);
+                cramers_v <- sqrt(chi_squared/(W*(q-1)));
+
+                basestats[2,pairID] <- chi_squared;
+                basestats[3,pairID] <- degFreedom;
+                basestats[4,pairID] <- pValue;
+                basestats[5,pairID] <- cramers_v;
+
+                if ( k1 == 3 ) {
+                    # ordinal-ordinal   
+                    print("ordinal-ordinal");
+                    basestats[6,pairID] <- cor(A1,A2, method="spearman");
+                }
+            }
+        } 
+        else {       
+            if (k1 == 1 || k2 == 1) {    
+                # Scale-nominal/ordinal
+                print("scale-categorical");
+                if ( k1 == 1 ) {
+                    Av <- as.matrix(A2); 
+                    Yv <- as.matrix(A1); 
+                }
+                else {
+                    Av <- as.matrix(A1); 
+                    Yv <- as.matrix(A2); 
+                }
+                
+                W <- nrow(Av);
+                my <- mean(Yv); 
+                varY <- var(Yv);
+                
+                CFreqs <- as.matrix(table(Av)); 
+                CMeans <- as.matrix(aggregate(Yv, by=list(Av), "mean")$V1);
+                CVars <- as.matrix(aggregate(Yv, by=list(Av), "var")$V1);
+                R <- nrow(CFreqs);
+              
+                Eta <- sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+                anova_num <- sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+                anova_den <- sum( (CFreqs-1)*CVars )/(W-R);
+                ANOVAF <- anova_num/anova_den;
+
+                basestats[7,pairID] <- Eta;
+                basestats[8,pairID] <- ANOVAF;
+
+                cat_counts[ 1:length(CFreqs),pairID] <- CFreqs;
+                cat_means[ 1:length(CMeans),pairID] <- CMeans;
+                cat_vars[ 1:length(CVars),pairID] <- CVars;
+            }
+            else {
+                # nominal-ordinal or ordinal-nominal    
+                print("nomial-ordinal"); #TODO should not be same code         
   
+                F <- table(A1,A2);
+                cst <- chisq.test(F);
+                chi_squared <- as.numeric(cst[1]);
+                degFreedom <- (nrow(F)-1)*(ncol(F)-1);
+                pValue <- as.numeric(cst[3]);
+                q <- min(dim(F));
+                W <- sum(F);
+                cramers_v <- sqrt(chi_squared/(W*(q-1)));
+                
+                basestats[2,pairID] <- chi_squared;
+                basestats[3,pairID] <- degFreedom;
+                basestats[4,pairID] <- pValue;
+                basestats[5,pairID] <- cramers_v;
+            }
+        }
+    }
+}
+
+writeMM(as(basestats, "CsparseMatrix"), paste(args[3], "bivar.stats", sep=""));
+writeMM(as(cat_counts, "CsparseMatrix"), paste(args[3], "category.counts", 
sep=""));
+writeMM(as(cat_means, "CsparseMatrix"), paste(args[3], "category.means", 
sep=""));
+writeMM(as(cat_vars, "CsparseMatrix"), paste(args[3], "category.variances", 
sep=""));
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_bivariate0.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_bivariate0.dml 
b/src/test/scripts/applications/parfor/parfor_bivariate0.dml
index ac79abd..341a0b1 100644
--- a/src/test/scripts/applications/parfor/parfor_bivariate0.dml
+++ b/src/test/scripts/applications/parfor/parfor_bivariate0.dml
@@ -1,265 +1,265 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-/*
- *
- * For a given pair of attribute sets, compute bivariate statistics between 
all attribute pairs 
- *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
- *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) 
and (1<= j <=n)
- *
- * Seven inputs:  
- *    $1) D  - input data
- *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
- *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
- *    $4) K1 - kind for attributes in S1 
- *    $5) K2 - kind for attributes in S2
- *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
- *    $6) numPairs - total number of pairs (m*n)
- *    $7) maxC - maximum number of categories in any categorical attribute
- * 
- * One output:    
- *    $6) output directory in which following four statistics files are created
- *        + bivar.stats - matrix with all 8 bivariate statistics computed for 
different attribute pairs
- *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
- *        + categorical.counts - 
- *        + categorical.means - 
- *        + categorical.variances - 
- *          -> Values in these three matrices are applicable only for 
scale-categorical attribute pairs. 
- *          k^th column in these matrices denote the attribute pair 
(A_1i,A_2j) where i*j = k.
- */
-
-D = read($1, rows=$7, cols=$8);  # input data set
-S1 = read($2, rows=1, cols=$9); # attribute set 1
-S2 = read($3, rows=1, cols=$9); # attribute set 2
-K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
-K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
-numPairs = $10; # number of attribute pairs (|S1|*|S2|)
-maxC = $11;     # max number of categories in any categorical attribute
-
-s1size = ncol(S1);
-s2size = ncol(S2);
-
-# R, chisq, cramers, spearman, eta, anovaf
-numstats = 8;
-basestats = matrix(0, rows=numstats, cols=numPairs);
-cat_counts = matrix(0, rows=maxC, cols=numPairs);
-cat_means = matrix(0, rows=maxC, cols=numPairs);
-cat_vars = matrix(0, rows=maxC, cols=numPairs);
-
-
-for( i in 1:s1size ) {
-    a1 = castAsScalar(S1[,i]);
-    k1 = castAsScalar(K1[1,i]);
-    A1 = D[,a1];
-    #print("a1="+a1);
-
-    for( j in 1:s2size ) {
-        pairID = (i-1)*s2size+j; 
-        #print("ID="+pairID+"(i="+i+",j="+j+")");
-        a2 = castAsScalar(S2[,j]);
-        k2 = castAsScalar(K2[1,j]);
-        A2 = D[,a2];
-        #print("a2="+a2);
-    
-        if (k1 == k2) {
-            if (k1 == 1) {
-                # scale-scale
-                print("[" + i + "," + j + "] scale-scale");
-                r = bivar_ss(A1,A2);   
-                basestats[1,pairID] = r;
-                #print("scale:"+r);
-            } else {
-                # nominal-nominal or ordinal-ordinal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = chisq;
-                basestats[3,pairID] = df;
-                basestats[4,pairID] = pval;
-                basestats[5,pairID] = cramersv;
-
-                if ( k1 == 3 ) {
-                    # ordinal-ordinal
-                    print("[" + i + "," + j + "] ordinal-ordinal");
-                    sp = bivar_oo(A1, A2);
-                    basestats[6,pairID] = sp;
-                }
-            }
-        } 
-        else {
-            if (k1 == 1 | k2 == 1) {
-                # Scale-nominal/ordinal      TODO MB correctness errors
-                print("[" + i + "," + j + "] scale-categorical");
-                
-               if ( k1 == 1 ) {
-                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
-                }
-                else {
-                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
-                }
-                basestats[7,pairID] = eta;
-                basestats[8,pairID] = f;
-                cat_counts[,pairID] = counts;
-                cat_means[,pairID] = means;
-                cat_vars[,pairID] = vars; 
-            }
-            else {
-                # nominal-ordinal or ordinal-nominal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = chisq;
-                basestats[3,pairID] = df;
-                basestats[4,pairID] = pval;
-                basestats[5,pairID] = cramersv;
-            }
-        }
-    }
-}
-
-write(basestats, $6 + "/bivar.stats");
-write(cat_counts, $6 + "/category.counts");
-write(cat_means, $6 + "/category.means");
-write(cat_vars, $6 + "/category.variances");
-
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, 
Double df, Double pval, Double cramersv) {
-
-    # Contingency Table
-    F = table(A,B);
-
-    # Chi-Squared
-    W = sum(F);
-    r = rowSums(F);
-    c = colSums(F);
-    E = (r %*% c)/W;
-    T = (F-E)^2/E;
-    chi_squared = sum(T);
-
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1);
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
-    # Cramer's V
-    R = nrow(F);
-    C = ncol(F);
-    q = min(R,C);
-    cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-    # Assign return values
-    chisq = chi_squared;
-    df = as.double(degFreedom);
-    pval = pValue;
-    cramersv = cramers_v;
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y);
-
-    # compute standard deviations for both X and Y by computing 2^nd central 
moment
-    W = nrow(X);
-    m2X = moment(X,2);
-    m2Y = moment(Y,2);
-    sigmaX = sqrt(m2X * (W/(W-1.0)) );
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY);
-    #print("Rmx="+m2X);
-    #print("Rmy="+m2Y);
-    #print("Rcov="+covXY);
-    #print("Rsx="+sigmaX);
-    #print("Rsy="+sigmaY);
-    #print("R="+R);
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, 
Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] 
CVars ) {
-
-    # mean and variance in target variable
-    W = nrow(A);
-    my = mean(Y);
-    varY = moment(Y,2) * W/(W-1.0)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
-    CMeans = aggregate(target=Y, groups=A, fn="mean");
-    CVars =  aggregate(target=Y, groups=A, fn="variance");
-
-    # number of categories
-    R = nrow(CFreqs);
-
-    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-    AnovaF = anova_num/anova_den;
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which 
each cell denotes to the computed score for that category
-computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
-    Ranks = cumsum(X) - X/2 + 1/2;
-}
-
-#-------------------------------------------------------------------------
-
-bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
-
-    # compute contingency table
-    F = table(A,B);
-
-    catA = nrow(F);  # number of categories in A
-    catB = ncol(F);  # number of categories in B
-
-    # compute category-wise counts for both the attributes
-    R = rowSums(F);
-    S = colSums(F);
-
-    # compute scores, both are column vectors
-    [C] = computeRanks(R);
-    meanX = mean(C,R); 
-
-    columnS = t(S);
-    [D] = computeRanks(columnS);
-
-    # scores (C,D) are individual values, and counts (R,S) act as weights
-    meanY = mean(D,columnS);
-
-    W = sum(F); # total weight, or total #cases
-    varX = moment(C,R,2)*(W/(W-1.0));
-    varY = moment(D,columnS,2)*(W/(W-1.0));
-    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
-    sp = covXY/(sqrt(varX)*sqrt(varY));
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+/*
+ *
+ * For a given pair of attribute sets, compute bivariate statistics between 
all attribute pairs 
+ *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
+ *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) 
and (1<= j <=n)
+ *
+ * Seven inputs:  
+ *    $1) D  - input data
+ *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
+ *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
+ *    $4) K1 - kind for attributes in S1 
+ *    $5) K2 - kind for attributes in S2
+ *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+ *    $6) numPairs - total number of pairs (m*n)
+ *    $7) maxC - maximum number of categories in any categorical attribute
+ * 
+ * One output:    
+ *    $6) output directory in which following four statistics files are created
+ *        + bivar.stats - matrix with all 8 bivariate statistics computed for 
different attribute pairs
+ *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
+ *        + categorical.counts - 
+ *        + categorical.means - 
+ *        + categorical.variances - 
+ *          -> Values in these three matrices are applicable only for 
scale-categorical attribute pairs. 
+ *          k^th column in these matrices denote the attribute pair 
(A_1i,A_2j) where i*j = k.
+ */
+
+D = read($1, rows=$7, cols=$8);  # input data set
+S1 = read($2, rows=1, cols=$9); # attribute set 1
+S2 = read($3, rows=1, cols=$9); # attribute set 2
+K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
+K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
+numPairs = $10; # number of attribute pairs (|S1|*|S2|)
+maxC = $11;     # max number of categories in any categorical attribute
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats = 8;
+basestats = matrix(0, rows=numstats, cols=numPairs);
+cat_counts = matrix(0, rows=maxC, cols=numPairs);
+cat_means = matrix(0, rows=maxC, cols=numPairs);
+cat_vars = matrix(0, rows=maxC, cols=numPairs);
+
+
+for( i in 1:s1size ) {
+    a1 = castAsScalar(S1[,i]);
+    k1 = castAsScalar(K1[1,i]);
+    A1 = D[,a1];
+    #print("a1="+a1);
+
+    for( j in 1:s2size ) {
+        pairID = (i-1)*s2size+j; 
+        #print("ID="+pairID+"(i="+i+",j="+j+")");
+        a2 = castAsScalar(S2[,j]);
+        k2 = castAsScalar(K2[1,j]);
+        A2 = D[,a2];
+        #print("a2="+a2);
+    
+        if (k1 == k2) {
+            if (k1 == 1) {
+                # scale-scale
+                print("[" + i + "," + j + "] scale-scale");
+                r = bivar_ss(A1,A2);   
+                basestats[1,pairID] = r;
+                #print("scale:"+r);
+            } else {
+                # nominal-nominal or ordinal-ordinal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = chisq;
+                basestats[3,pairID] = df;
+                basestats[4,pairID] = pval;
+                basestats[5,pairID] = cramersv;
+
+                if ( k1 == 3 ) {
+                    # ordinal-ordinal
+                    print("[" + i + "," + j + "] ordinal-ordinal");
+                    sp = bivar_oo(A1, A2);
+                    basestats[6,pairID] = sp;
+                }
+            }
+        } 
+        else {
+            if (k1 == 1 | k2 == 1) {
+                # Scale-nominal/ordinal      TODO MB correctness errors
+                print("[" + i + "," + j + "] scale-categorical");
+                
+               if ( k1 == 1 ) {
+                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
+                }
+                else {
+                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
+                }
+                basestats[7,pairID] = eta;
+                basestats[8,pairID] = f;
+                cat_counts[,pairID] = counts;
+                cat_means[,pairID] = means;
+                cat_vars[,pairID] = vars; 
+            }
+            else {
+                # nominal-ordinal or ordinal-nominal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = chisq;
+                basestats[3,pairID] = df;
+                basestats[4,pairID] = pval;
+                basestats[5,pairID] = cramersv;
+            }
+        }
+    }
+}
+
+write(basestats, $6 + "/bivar.stats");
+write(cat_counts, $6 + "/category.counts");
+write(cat_means, $6 + "/category.means");
+write(cat_vars, $6 + "/category.variances");
+
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, 
Double df, Double pval, Double cramersv) {
+
+    # Contingency Table
+    F = table(A,B);
+
+    # Chi-Squared
+    W = sum(F);
+    r = rowSums(F);
+    c = colSums(F);
+    E = (r %*% c)/W;
+    T = (F-E)^2/E;
+    chi_squared = sum(T);
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1);
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+    # Cramer's V
+    R = nrow(F);
+    C = ncol(F);
+    q = min(R,C);
+    cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+    # Assign return values
+    chisq = chi_squared;
+    df = as.double(degFreedom);
+    pval = pValue;
+    cramersv = cramers_v;
+}
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y);
+
+    # compute standard deviations for both X and Y by computing 2^nd central 
moment
+    W = nrow(X);
+    m2X = moment(X,2);
+    m2Y = moment(Y,2);
+    sigmaX = sqrt(m2X * (W/(W-1.0)) );
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY);
+    #print("Rmx="+m2X);
+    #print("Rmy="+m2Y);
+    #print("Rcov="+covXY);
+    #print("Rsx="+sigmaX);
+    #print("Rsy="+sigmaY);
+    #print("R="+R);
+}
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, 
Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] 
CVars ) {
+
+    # mean and variance in target variable
+    W = nrow(A);
+    my = mean(Y);
+    varY = moment(Y,2) * W/(W-1.0)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
+    CMeans = aggregate(target=Y, groups=A, fn="mean");
+    CVars =  aggregate(target=Y, groups=A, fn="variance");
+
+    # number of categories
+    R = nrow(CFreqs);
+
+    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+    AnovaF = anova_num/anova_den;
+}
+
+# 
-----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which 
each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+    Ranks = cumsum(X) - X/2 + 1/2;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
+
+    # compute contingency table
+    F = table(A,B);
+
+    catA = nrow(F);  # number of categories in A
+    catB = ncol(F);  # number of categories in B
+
+    # compute category-wise counts for both the attributes
+    R = rowSums(F);
+    S = colSums(F);
+
+    # compute scores, both are column vectors
+    [C] = computeRanks(R);
+    meanX = mean(C,R); 
+
+    columnS = t(S);
+    [D] = computeRanks(columnS);
+
+    # scores (C,D) are individual values, and counts (R,S) act as weights
+    meanY = mean(D,columnS);
+
+    W = sum(F); # total weight, or total #cases
+    varX = moment(C,R,2)*(W/(W-1.0));
+    varY = moment(D,columnS,2)*(W/(W-1.0));
+    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+    sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# 
-----------------------------------------------------------------------------------------------------------
  
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_bivariate1.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/parfor/parfor_bivariate1.dml 
b/src/test/scripts/applications/parfor/parfor_bivariate1.dml
index 69a7ad6..aa40d7c 100644
--- a/src/test/scripts/applications/parfor/parfor_bivariate1.dml
+++ b/src/test/scripts/applications/parfor/parfor_bivariate1.dml
@@ -1,256 +1,256 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-
-/*
- *
- * For a given pair of attribute sets, compute bivariate statistics between 
all attribute pairs 
- *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
- *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) 
and (1<= j <=n)
- *
- * Seven inputs:  
- *    $1) D  - input data
- *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
- *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
- *    $4) K1 - kind for attributes in S1 
- *    $5) K2 - kind for attributes in S2
- *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
- *    $6) numPairs - total number of pairs (m*n)
- *    $7) maxC - maximum number of categories in any categorical attribute
- * 
- * One output:    
- *    $6) output directory in which following four statistics files are created
- *        + bivar.stats - matrix with all 8 bivariate statistics computed for 
different attribute pairs
- *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
- *        + categorical.counts - 
- *        + categorical.means - 
- *        + categorical.variances - 
- *          -> Values in these three matrices are applicable only for 
scale-categorical attribute pairs. 
- *          k^th column in these matrices denote the attribute pair 
(A_1i,A_2j) where i*j = k.
- */
-
-D = read($1, rows=$7, cols=$8);  # input data set
-S1 = read($2, rows=1, cols=$9); # attribute set 1
-S2 = read($3, rows=1, cols=$9); # attribute set 2
-K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
-K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
-numPairs = $10; # number of attribute pairs (|S1|*|S2|)
-maxC = $11;     # max number of categories in any categorical attribute
-
-s1size = ncol(S1);
-s2size = ncol(S2);
-
-# R, chisq, cramers, spearman, eta, anovaf
-numstats = 8;
-basestats = matrix(0, rows=numstats, cols=numPairs);
-cat_counts = matrix(0, rows=maxC, cols=numPairs);
-cat_means = matrix(0, rows=maxC, cols=numPairs);
-cat_vars = matrix(0, rows=maxC, cols=numPairs);
-
-
-parfor( i in 1:s1size, par=4, mode=LOCAL, check=0, opt=NONE) {
-    a1 = castAsScalar(S1[,i]);
-    k1 = castAsScalar(K1[1,i]);
-    A1 = D[,a1];
-
-    parfor( j in 1:s2size, par=4, mode=LOCAL, check=0, opt=NONE) {
-        pairID = (i-1)*s2size+j; 
-        a2 = castAsScalar(S2[,j]);
-        k2 = castAsScalar(K2[1,j]);
-        A2 = D[,a2];
-    
-        if (k1 == k2) {
-            if (k1 == 1) {
-                # scale-scale
-                print("[" + i + "," + j + "] scale-scale");
-                r = bivar_ss(A1,A2);   
-                basestats[1,pairID] = r;
-            } else {
-                # nominal-nominal or ordinal-ordinal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = chisq;
-                basestats[3,pairID] = df;
-                basestats[4,pairID] = pval;
-                basestats[5,pairID] = cramersv;
-
-                if ( k1 == 3 ) {
-                    # ordinal-ordinal
-                    print("[" + i + "," + j + "] ordinal-ordinal");
-                    sp = bivar_oo(A1, A2);
-                    basestats[6,pairID] = sp;
-                }
-            }
-        } 
-        else {
-            if (k1 == 1 | k2 == 1) {
-                # Scale-nominal/ordinal      TODO MB correctness errors
-                print("[" + i + "," + j + "] scale-categorical");
-                
-               if ( k1 == 1 ) {
-                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
-                }
-                else {
-                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
-                }
-                basestats[7,pairID] = eta;
-                basestats[8,pairID] = f;
-                cat_counts[,pairID] = counts;
-                cat_means[,pairID] = means;
-                cat_vars[,pairID] = vars; 
-            }
-            else {
-                # nominal-ordinal or ordinal-nominal
-                print("[" + i + "," + j + "] categorical-categorical");
-                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
-                basestats[2,pairID] = chisq;
-                basestats[3,pairID] = df;
-                basestats[4,pairID] = pval;
-                basestats[5,pairID] = cramersv;
-            }
-        }
-    }
-}
-
-write(basestats, $6 + "/bivar.stats");
-write(cat_counts, $6 + "/category.counts");
-write(cat_means, $6 + "/category.means");
-write(cat_vars, $6 + "/category.variances");
-
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, 
Double df, Double pval, Double cramersv) {
-
-    # Contingency Table
-    F = table(A,B);
-
-    # Chi-Squared
-    W = sum(F);
-    r = rowSums(F);
-    c = colSums(F);
-    E = (r %*% c)/W;
-    T = (F-E)^2/E;
-    chi_squared = sum(T);
-
-    # compute p-value
-    degFreedom = (nrow(F)-1)*(ncol(F)-1);
-    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
-
-    # Cramer's V
-    R = nrow(F);
-    C = ncol(F);
-    q = min(R,C);
-    cramers_v = sqrt(chi_squared/(W*(q-1)));
-
-    # Assign return values
-    chisq = chi_squared;
-    df = as.double(degFreedom);
-    pval = pValue;
-    cramersv = cramers_v;
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
-
-    # Unweighted co-variance
-    covXY = cov(X,Y);
-
-    # compute standard deviations for both X and Y by computing 2^nd central 
moment
-    W = nrow(X);
-    m2X = moment(X,2);
-    m2Y = moment(Y,2);
-    sigmaX = sqrt(m2X * (W/(W-1.0)) );
-    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
-
-    # Pearson's R
-    R = covXY / (sigmaX*sigmaY);
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-# Y points to SCALE variable
-# A points to CATEGORICAL variable
-bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, 
Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] 
CVars ) {
-
-    # mean and variance in target variable
-    W = nrow(A);
-    my = mean(Y);
-    varY = moment(Y,2) * W/(W-1.0)
-
-    # category-wise (frequencies, means, variances)
-    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
-    CMeans = aggregate(target=Y, groups=A, fn="mean");
-    CVars =  aggregate(target=Y, groups=A, fn="variance");
-
-    # number of categories
-    R = nrow(CFreqs);
-
-    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
-
-    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
-    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
-    AnovaF = anova_num/anova_den;
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
-# Function to compute ranks
-# takes a column vector as input, and produces a vector of same size in which 
each cell denotes to the computed score for that category
-computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
-    Ranks = cumsum(X) - X/2 + 1/2;
-}
-
-#-------------------------------------------------------------------------
-
-bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
-
-    # compute contingency table
-    F = table(A,B);
-
-    catA = nrow(F);  # number of categories in A
-    catB = ncol(F);  # number of categories in B
-
-    # compute category-wise counts for both the attributes
-    R = rowSums(F);
-    S = colSums(F);
-
-    # compute scores, both are column vectors
-    [C] = computeRanks(R);
-    meanX = mean(C,R); 
-
-    columnS = t(S);
-    [D] = computeRanks(columnS);
-
-    # scores (C,D) are individual values, and counts (R,S) act as weights
-    meanY = mean(D,columnS);
-
-    W = sum(F); # total weight, or total #cases
-    varX = moment(C,R,2)*(W/(W-1.0));
-    varY = moment(D,columnS,2)*(W/(W-1.0));
-    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
-
-    sp = covXY/(sqrt(varX)*sqrt(varY));
-}
-
-# 
-----------------------------------------------------------------------------------------------------------
-
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+
+/*
+ *
+ * For a given pair of attribute sets, compute bivariate statistics between 
all attribute pairs 
+ *   Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} 
+ *          compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) 
and (1<= j <=n)
+ *
+ * Seven inputs:  
+ *    $1) D  - input data
+ *    $2) S1 - First attribute set {A_11, A_12, ... A_1m}
+ *    $3) S2 - Second attribute set {A_21, A_22, ... A_2n}
+ *    $4) K1 - kind for attributes in S1 
+ *    $5) K2 - kind for attributes in S2
+ *             kind=1 for scale, kind=2 for nominal, kind=3 for ordinal
+ *    $6) numPairs - total number of pairs (m*n)
+ *    $7) maxC - maximum number of categories in any categorical attribute
+ * 
+ * One output:    
+ *    $6) output directory in which following four statistics files are created
+ *        + bivar.stats - matrix with all 8 bivariate statistics computed for 
different attribute pairs
+ *                        (R, (chi-sq, df, pval, cramersv), spearman, Eta, F)
+ *        + categorical.counts - 
+ *        + categorical.means - 
+ *        + categorical.variances - 
+ *          -> Values in these three matrices are applicable only for 
scale-categorical attribute pairs. 
+ *          k^th column in these matrices denote the attribute pair 
(A_1i,A_2j) where i*j = k.
+ */
+
+D = read($1, rows=$7, cols=$8);  # input data set
+S1 = read($2, rows=1, cols=$9); # attribute set 1
+S2 = read($3, rows=1, cols=$9); # attribute set 2
+K1 = read($4, rows=1, cols=$9); # kind for attributes in S1
+K2 = read($5, rows=1, cols=$9); # kind for attributes in S2
+numPairs = $10; # number of attribute pairs (|S1|*|S2|)
+maxC = $11;     # max number of categories in any categorical attribute
+
+s1size = ncol(S1);
+s2size = ncol(S2);
+
+# R, chisq, cramers, spearman, eta, anovaf
+numstats = 8;
+basestats = matrix(0, rows=numstats, cols=numPairs);
+cat_counts = matrix(0, rows=maxC, cols=numPairs);
+cat_means = matrix(0, rows=maxC, cols=numPairs);
+cat_vars = matrix(0, rows=maxC, cols=numPairs);
+
+
+parfor( i in 1:s1size, par=4, mode=LOCAL, check=0, opt=NONE) {
+    a1 = castAsScalar(S1[,i]);
+    k1 = castAsScalar(K1[1,i]);
+    A1 = D[,a1];
+
+    parfor( j in 1:s2size, par=4, mode=LOCAL, check=0, opt=NONE) {
+        pairID = (i-1)*s2size+j; 
+        a2 = castAsScalar(S2[,j]);
+        k2 = castAsScalar(K2[1,j]);
+        A2 = D[,a2];
+    
+        if (k1 == k2) {
+            if (k1 == 1) {
+                # scale-scale
+                print("[" + i + "," + j + "] scale-scale");
+                r = bivar_ss(A1,A2);   
+                basestats[1,pairID] = r;
+            } else {
+                # nominal-nominal or ordinal-ordinal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = chisq;
+                basestats[3,pairID] = df;
+                basestats[4,pairID] = pval;
+                basestats[5,pairID] = cramersv;
+
+                if ( k1 == 3 ) {
+                    # ordinal-ordinal
+                    print("[" + i + "," + j + "] ordinal-ordinal");
+                    sp = bivar_oo(A1, A2);
+                    basestats[6,pairID] = sp;
+                }
+            }
+        } 
+        else {
+            if (k1 == 1 | k2 == 1) {
+                # Scale-nominal/ordinal      TODO MB correctness errors
+                print("[" + i + "," + j + "] scale-categorical");
+                
+               if ( k1 == 1 ) {
+                    [eta,f, counts, means, vars] = bivar_sc(A1,A2);
+                }
+                else {
+                    [eta,f, counts, means, vars] = bivar_sc(A2,A1);
+                }
+                basestats[7,pairID] = eta;
+                basestats[8,pairID] = f;
+                cat_counts[,pairID] = counts;
+                cat_means[,pairID] = means;
+                cat_vars[,pairID] = vars; 
+            }
+            else {
+                # nominal-ordinal or ordinal-nominal
+                print("[" + i + "," + j + "] categorical-categorical");
+                [chisq, df, pval, cramersv]  = bivar_cc(A1,A2);
+                basestats[2,pairID] = chisq;
+                basestats[3,pairID] = df;
+                basestats[4,pairID] = pval;
+                basestats[5,pairID] = cramersv;
+            }
+        }
+    }
+}
+
+write(basestats, $6 + "/bivar.stats");
+write(cat_counts, $6 + "/category.counts");
+write(cat_means, $6 + "/category.means");
+write(cat_vars, $6 + "/category.variances");
+
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, 
Double df, Double pval, Double cramersv) {
+
+    # Contingency Table
+    F = table(A,B);
+
+    # Chi-Squared
+    W = sum(F);
+    r = rowSums(F);
+    c = colSums(F);
+    E = (r %*% c)/W;
+    T = (F-E)^2/E;
+    chi_squared = sum(T);
+
+    # compute p-value
+    degFreedom = (nrow(F)-1)*(ncol(F)-1);
+    pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE);
+
+    # Cramer's V
+    R = nrow(F);
+    C = ncol(F);
+    q = min(R,C);
+    cramers_v = sqrt(chi_squared/(W*(q-1)));
+
+    # Assign return values
+    chisq = chi_squared;
+    df = as.double(degFreedom);
+    pval = pValue;
+    cramersv = cramers_v;
+}
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) {
+
+    # Unweighted co-variance
+    covXY = cov(X,Y);
+
+    # compute standard deviations for both X and Y by computing 2^nd central 
moment
+    W = nrow(X);
+    m2X = moment(X,2);
+    m2Y = moment(Y,2);
+    sigmaX = sqrt(m2X * (W/(W-1.0)) );
+    sigmaY = sqrt(m2Y * (W/(W-1.0)) );
+
+    # Pearson's R
+    R = covXY / (sigmaX*sigmaY);
+}
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+# Y points to SCALE variable
+# A points to CATEGORICAL variable
+bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, 
Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] 
CVars ) {
+
+    # mean and variance in target variable
+    W = nrow(A);
+    my = mean(Y);
+    varY = moment(Y,2) * W/(W-1.0)
+
+    # category-wise (frequencies, means, variances)
+    CFreqs = aggregate(target=Y, groups=A, fn="count"); 
+    CMeans = aggregate(target=Y, groups=A, fn="mean");
+    CVars =  aggregate(target=Y, groups=A, fn="variance");
+
+    # number of categories
+    R = nrow(CFreqs);
+
+    Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) ));
+
+    anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1);
+    anova_den = sum( (CFreqs-1)*CVars )/(W-R);
+    AnovaF = anova_num/anova_den;
+}
+
+# 
-----------------------------------------------------------------------------------------------------------
+# Function to compute ranks
+# takes a column vector as input, and produces a vector of same size in which 
each cell denotes to the computed score for that category
+computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) {
+    Ranks = cumsum(X) - X/2 + 1/2;
+}
+
+#-------------------------------------------------------------------------
+
+bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) {
+
+    # compute contingency table
+    F = table(A,B);
+
+    catA = nrow(F);  # number of categories in A
+    catB = ncol(F);  # number of categories in B
+
+    # compute category-wise counts for both the attributes
+    R = rowSums(F);
+    S = colSums(F);
+
+    # compute scores, both are column vectors
+    [C] = computeRanks(R);
+    meanX = mean(C,R); 
+
+    columnS = t(S);
+    [D] = computeRanks(columnS);
+
+    # scores (C,D) are individual values, and counts (R,S) act as weights
+    meanY = mean(D,columnS);
+
+    W = sum(F); # total weight, or total #cases
+    varX = moment(C,R,2)*(W/(W-1.0));
+    varY = moment(D,columnS,2)*(W/(W-1.0));
+    covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) );
+
+    sp = covXY/(sqrt(varX)*sqrt(varY));
+}
+
+# 
-----------------------------------------------------------------------------------------------------------
+
+

[19/51] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Reply via email to