[39/51] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

dusenberrymw Fri, 22 Jan 2016 08:34:44 -0800

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/datagen/genRandData4LogReg_LTstats.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4LogReg_LTstats.dml 
b/scripts/datagen/genRandData4LogReg_LTstats.dml
index 6742f0c..2ec5aef 100644
--- a/scripts/datagen/genRandData4LogReg_LTstats.dml
+++ b/scripts/datagen/genRandData4LogReg_LTstats.dml
@@ -1,233 +1,233 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# generates random data to test bi- and multinomial logistic regression
-
-# $N  = number of training samples
-# $Nt = number of test samples (or 0 if none)
-# $nf = number of features (independent variables)
-# $nc = number of categories; = 1 if "binomial" with +1/-1 labels
-# $Xmin  = minimum feature value
-# $Xmax  = maximum feature value
-# $spars = controls sparsity in the generated data
-# $avgLTmin = average linear term (X %*% beta + intercept), minimum value
-# $avgLTmax = average linear term (X %*% beta + intercept), maximum value
-# $stdLT = requested standard deviation for the linear terms
-# $iceptmin = intercept, minimum value (0.0 disables intercept)
-# $iceptmax = intercept, maximum value (0.0 disables intercept)
-# $B  = location to store generated regression parameters
-# $X  = location to store generated training data
-# $Y  = location to store generated training category labels
-# $Xt = location to store generated test data
-# $Yt = location to store generated test category labels
-#
-# Example:
-# hadoop jar SystemML.jar -f genRandData4LogReg_LTstats.dml -nvargs
-#     N=1000000 Nt=1000 nf=20 nc=3 Xmin=0.0 Xmax=1.0 spars=1.0 avgLTmin=3.0 
avgLTmax=5.0 stdLT=1.25
-#     iceptmin=1.0 iceptmax=1.0 B=./B123 X=./X123 Y=./Y123 Xt=./Xt123 
Yt=./Yt123
-
-numTrainingSamples = $N;
-numTestSamples = $Nt;
-numFeatures = $nf;
-numCategories = $nc;
-minIntercept = $iceptmin;
-maxIntercept = $iceptmax;
-minXentry = $Xmin;
-maxXentry = $Xmax;
-minAvgLT = $avgLTmin;
-maxAvgLT = $avgLTmax;
-sparsityLevel = $spars;
-stdevLT = $stdLT;
-fileB  = ifdef ($B,  "B");
-fileX  = ifdef ($X,  "X");
-fileY  = ifdef ($Y,  "Y");
-fileXt = ifdef ($Xt, "Xt");
-fileYt = ifdef ($Yt, "Yt");
-
-
-numSamples = numTrainingSamples + numTestSamples;
-
-isBinomialPMOne = FALSE;
-if (numCategories == 1) {
-    numCategories = 2;
-    isBinomialPMOne = TRUE;
-}
-do_we_output_intercept = 1;
-if (minIntercept == 0.0 & maxIntercept == 0.0) {
-    do_we_output_intercept = 0;
-}
-
-X = Rand (rows = numSamples, cols = numFeatures, min = minXentry, max = 
maxXentry, pdf = "uniform", sparsity = sparsityLevel);
-
-meanLT  = Rand (rows = 1, cols = numCategories - 1, min = minAvgLT, max = 
maxAvgLT, pdf = "uniform");
-sigmaLT = matrix (stdevLT, rows = 1, cols = numCategories - 1);
-b_intercept = Rand (rows = 1, cols = numCategories - 1, min = minIntercept, 
max = maxIntercept, pdf = "uniform");
-
-meanLT_minus_intercept = meanLT - b_intercept;
-[B, new_sigmaLT] = generateWeights (X, meanLT_minus_intercept, sigmaLT);
-
-ones = matrix (1.0, rows = numSamples, cols = 1);
-LT = X %*% B + ones %*% b_intercept;
-actual_meanLT  = colSums (LT) / numSamples;
-actual_sigmaLT = sqrt (colSums ((LT - ones %*% actual_meanLT)^2) / numSamples);
-
-for (i in 1:(numCategories - 1)) {
-    if (castAsScalar (new_sigmaLT [1, i]) == castAsScalar (sigmaLT [1, i])) {
-        print ("Category " + i + ":  Intercept = " + castAsScalar (b_intercept 
[1, i])); 
-    } else {
-        print ("Category " + i + ":  Intercept = " + castAsScalar (b_intercept 
[1, i]) + ",  st.dev.(LT) relaxed from " + castAsScalar (sigmaLT [1, i])); 
-    }
-    print ("    Wanted LT mean = " + castAsScalar (meanLT [1, i])        + ",  
st.dev. = " + castAsScalar (new_sigmaLT [1, i]));
-    print ("    Actual LT mean = " + castAsScalar (actual_meanLT [1, i]) + ",  
st.dev. = " + castAsScalar (actual_sigmaLT [1, i]));
-}
-
-
-ones = matrix (1.0, rows = 1, cols = numCategories - 1);
-Prob = exp (LT);
-Prob = Prob / ((1.0 + rowSums (Prob)) %*% ones);
-Prob = t(cumsum (t(Prob)));
-
-r = Rand (rows = numSamples, cols = 1, min = 0, max = 1, pdf = "uniform", seed 
= 0);
-R = r %*% ones;
-Y = 1 + rowSums (ppred (Prob, R, "<"));
-if (isBinomialPMOne) {
-    Y = 3 - 2 * Y;
-}
-
-
-/* USE FOR LINEAR REGRESSION
-
-r = Rand (rows = numSamples, cols = 1, pdf = "normal");
-Y = LT [, 1] + r;
-
-*/
-
-
-if (do_we_output_intercept == 1) {
-    new_B = matrix (0.0, rows = nrow(B) + 1, cols = ncol(B));
-    new_B [1:nrow(B), 1:ncol(B)] = B;
-    new_B [nrow(B)+1, 1:ncol(B)] = b_intercept;
-    write (new_B, fileB, format="mm");
-} else {
-    write (B, fileB, format="mm");
-}
-
-if (numTestSamples > 0) {
-    X_train = X [1:numTrainingSamples,];
-    Y_train = Y [1:numTrainingSamples,];
-    X_test  = X [(numTrainingSamples+1):numSamples,];
-    Y_test  = Y [(numTrainingSamples+1):numSamples,];
-    write (X_train, fileX,  format="mm");
-    write (Y_train, fileY,  format="mm");
-    write (X_test,  fileXt, format="mm");
-    write (Y_test,  fileYt, format="mm");
-} else {
-    write (X, fileX, format="mm");
-    write (Y, fileY, format="mm");
-}
-
-
-
-
-
-
-# Generates weight vectors to ensure the desired statistics for Linear Terms = 
X %*% W
-# To be used for data generation in the testing of GLM, Logistic Regression, 
etc.
-# INPUT:  meanLT and sigmaLT are row vectors, meanLT[1, i] and sigmaLT[1, i] 
are
-#         the desired mean and standard deviation for X %*% W[, i]
-# OUTPUT: "W" is the matrix of generated (column) weight vectors W[, i]
-#         new_sigmaLT[1, i] == sigmaLT[1, i] if the std.dev is successfully 
enforced,
-#         new_sigmaLT[1, i]  > sigmaLT[1, i] if we had to relax this 
constraint.
-generateWeights = 
-    function (Matrix[double] X, Matrix[double] meanLT, Matrix[double] sigmaLT)
-    return   (Matrix[double] W, Matrix[double] new_sigmaLT)
-{
-    num_w = ncol (meanLT);  # Number of output weight vectors
-    dim_w = ncol (X);       # Number of features / dimensions in a weight 
vector
-    w_X = t(colSums(X));    # "Prohibited" weight shift direction that changes 
meanLT
-                            # (all orthogonal shift directions do not affect 
meanLT)
-
-    # Compute "w_1" with meanLT = 1 and with the smallest possible sigmaLT
-
-    w_1 = straightenX (X);
-    r_1 = (X %*% w_1) - 1.0;
-    norm_r_1_sq = sum (r_1 ^ 2);
-    
-    # For each W[, i] generate uniformly random directions to shift away from 
"w_1"
-    
-    DW_raw = Rand (rows = dim_w, cols = num_w, pdf = "normal");
-    DW = DW_raw - (w_X %*% t(w_X) %*% DW_raw) / sum (w_X ^ 2); # Orthogonal to 
w_X
-    XDW = X %*% DW;
-    
-    # Determine how far to shift in the chosen directions to satisfy the 
constraints
-    # Use the positive root of the quadratic equation; relax sigmaLT where 
needed
-    
-    a_qe = colSums (XDW ^ 2);
-    b_qe = 2.0 * meanLT * (t(r_1) %*% XDW);
-    c_qe = meanLT^2 * norm_r_1_sq - sigmaLT^2 * nrow(X);
-
-    is_sigmaLT_OK = ppred (c_qe, 0.0, "<=");
-    new_sigmaLT = is_sigmaLT_OK * sigmaLT + (1 - is_sigmaLT_OK) * abs (meanLT) 
* sqrt (norm_r_1_sq / nrow(X));
-    c_qe = is_sigmaLT_OK * c_qe;
-    x_qe = (- b_qe + sqrt (b_qe * b_qe - 4.0 * a_qe * c_qe)) / (2.0 * a_qe);
-    
-    # Scale and shift "w_1" in the "DW" directions to produce the result:
-    
-    ones = matrix (1.0, rows = dim_w, cols = 1);
-    W = w_1 %*% meanLT + DW * (ones %*% x_qe);
-}
-
-# Computes vector w such that  ||X %*% w - 1|| -> MIN  given  avg(X %*% w) = 1
-# We find z_LS such that ||X %*% z_LS - 1|| -> MIN unconditionally, then scale
-# it to compute  w = c * z_LS  such that  sum(X %*% w) = nrow(X).
-straightenX =
-    function (Matrix[double] X)
-    return   (Matrix[double] w)
-{
-    w_X = t(colSums(X));
-    lambda_LS = 0.000001 * sum(X ^ 2) / ncol(X);
-    eps = 0.000000001 * nrow(X);
-
-    # BEGIN LEAST SQUARES
-    
-    r_LS = - w_X;
-    z_LS = matrix (0.0, rows = ncol(X), cols = 1);
-    p_LS = - r_LS;
-    norm_r2_LS = sum (r_LS ^ 2);
-    i_LS = 0;
-    while (i_LS < 50 & i_LS < ncol(X) & norm_r2_LS >= eps)
-    {
-        temp_LS = X %*% p_LS;
-        q_LS = (t(X) %*% temp_LS) + lambda_LS * p_LS;
-        alpha_LS = norm_r2_LS / sum (p_LS * q_LS);
-        z_LS = z_LS + alpha_LS * p_LS;
-        old_norm_r2_LS = norm_r2_LS;
-        r_LS = r_LS + alpha_LS * q_LS;
-        norm_r2_LS = sum (r_LS ^ 2);
-        p_LS = -r_LS + (norm_r2_LS / old_norm_r2_LS) * p_LS;
-        i_LS = i_LS + 1;
-    }
-    
-    # END LEAST SQUARES
-    
-    w = (nrow(X) / sum (w_X * z_LS)) * z_LS;
-}
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# generates random data to test bi- and multinomial logistic regression
+
+# $N  = number of training samples
+# $Nt = number of test samples (or 0 if none)
+# $nf = number of features (independent variables)
+# $nc = number of categories; = 1 if "binomial" with +1/-1 labels
+# $Xmin  = minimum feature value
+# $Xmax  = maximum feature value
+# $spars = controls sparsity in the generated data
+# $avgLTmin = average linear term (X %*% beta + intercept), minimum value
+# $avgLTmax = average linear term (X %*% beta + intercept), maximum value
+# $stdLT = requested standard deviation for the linear terms
+# $iceptmin = intercept, minimum value (0.0 disables intercept)
+# $iceptmax = intercept, maximum value (0.0 disables intercept)
+# $B  = location to store generated regression parameters
+# $X  = location to store generated training data
+# $Y  = location to store generated training category labels
+# $Xt = location to store generated test data
+# $Yt = location to store generated test category labels
+#
+# Example:
+# hadoop jar SystemML.jar -f genRandData4LogReg_LTstats.dml -nvargs
+#     N=1000000 Nt=1000 nf=20 nc=3 Xmin=0.0 Xmax=1.0 spars=1.0 avgLTmin=3.0 
avgLTmax=5.0 stdLT=1.25
+#     iceptmin=1.0 iceptmax=1.0 B=./B123 X=./X123 Y=./Y123 Xt=./Xt123 
Yt=./Yt123
+
+numTrainingSamples = $N;
+numTestSamples = $Nt;
+numFeatures = $nf;
+numCategories = $nc;
+minIntercept = $iceptmin;
+maxIntercept = $iceptmax;
+minXentry = $Xmin;
+maxXentry = $Xmax;
+minAvgLT = $avgLTmin;
+maxAvgLT = $avgLTmax;
+sparsityLevel = $spars;
+stdevLT = $stdLT;
+fileB  = ifdef ($B,  "B");
+fileX  = ifdef ($X,  "X");
+fileY  = ifdef ($Y,  "Y");
+fileXt = ifdef ($Xt, "Xt");
+fileYt = ifdef ($Yt, "Yt");
+
+
+numSamples = numTrainingSamples + numTestSamples;
+
+isBinomialPMOne = FALSE;
+if (numCategories == 1) {
+    numCategories = 2;
+    isBinomialPMOne = TRUE;
+}
+do_we_output_intercept = 1;
+if (minIntercept == 0.0 & maxIntercept == 0.0) {
+    do_we_output_intercept = 0;
+}
+
+X = Rand (rows = numSamples, cols = numFeatures, min = minXentry, max = 
maxXentry, pdf = "uniform", sparsity = sparsityLevel);
+
+meanLT  = Rand (rows = 1, cols = numCategories - 1, min = minAvgLT, max = 
maxAvgLT, pdf = "uniform");
+sigmaLT = matrix (stdevLT, rows = 1, cols = numCategories - 1);
+b_intercept = Rand (rows = 1, cols = numCategories - 1, min = minIntercept, 
max = maxIntercept, pdf = "uniform");
+
+meanLT_minus_intercept = meanLT - b_intercept;
+[B, new_sigmaLT] = generateWeights (X, meanLT_minus_intercept, sigmaLT);
+
+ones = matrix (1.0, rows = numSamples, cols = 1);
+LT = X %*% B + ones %*% b_intercept;
+actual_meanLT  = colSums (LT) / numSamples;
+actual_sigmaLT = sqrt (colSums ((LT - ones %*% actual_meanLT)^2) / numSamples);
+
+for (i in 1:(numCategories - 1)) {
+    if (castAsScalar (new_sigmaLT [1, i]) == castAsScalar (sigmaLT [1, i])) {
+        print ("Category " + i + ":  Intercept = " + castAsScalar (b_intercept 
[1, i])); 
+    } else {
+        print ("Category " + i + ":  Intercept = " + castAsScalar (b_intercept 
[1, i]) + ",  st.dev.(LT) relaxed from " + castAsScalar (sigmaLT [1, i])); 
+    }
+    print ("    Wanted LT mean = " + castAsScalar (meanLT [1, i])        + ",  
st.dev. = " + castAsScalar (new_sigmaLT [1, i]));
+    print ("    Actual LT mean = " + castAsScalar (actual_meanLT [1, i]) + ",  
st.dev. = " + castAsScalar (actual_sigmaLT [1, i]));
+}
+
+
+ones = matrix (1.0, rows = 1, cols = numCategories - 1);
+Prob = exp (LT);
+Prob = Prob / ((1.0 + rowSums (Prob)) %*% ones);
+Prob = t(cumsum (t(Prob)));
+
+r = Rand (rows = numSamples, cols = 1, min = 0, max = 1, pdf = "uniform", seed 
= 0);
+R = r %*% ones;
+Y = 1 + rowSums (ppred (Prob, R, "<"));
+if (isBinomialPMOne) {
+    Y = 3 - 2 * Y;
+}
+
+
+/* USE FOR LINEAR REGRESSION
+
+r = Rand (rows = numSamples, cols = 1, pdf = "normal");
+Y = LT [, 1] + r;
+
+*/
+
+
+if (do_we_output_intercept == 1) {
+    new_B = matrix (0.0, rows = nrow(B) + 1, cols = ncol(B));
+    new_B [1:nrow(B), 1:ncol(B)] = B;
+    new_B [nrow(B)+1, 1:ncol(B)] = b_intercept;
+    write (new_B, fileB, format="mm");
+} else {
+    write (B, fileB, format="mm");
+}
+
+if (numTestSamples > 0) {
+    X_train = X [1:numTrainingSamples,];
+    Y_train = Y [1:numTrainingSamples,];
+    X_test  = X [(numTrainingSamples+1):numSamples,];
+    Y_test  = Y [(numTrainingSamples+1):numSamples,];
+    write (X_train, fileX,  format="mm");
+    write (Y_train, fileY,  format="mm");
+    write (X_test,  fileXt, format="mm");
+    write (Y_test,  fileYt, format="mm");
+} else {
+    write (X, fileX, format="mm");
+    write (Y, fileY, format="mm");
+}
+
+
+
+
+
+
+# Generates weight vectors to ensure the desired statistics for Linear Terms = 
X %*% W
+# To be used for data generation in the testing of GLM, Logistic Regression, 
etc.
+# INPUT:  meanLT and sigmaLT are row vectors, meanLT[1, i] and sigmaLT[1, i] 
are
+#         the desired mean and standard deviation for X %*% W[, i]
+# OUTPUT: "W" is the matrix of generated (column) weight vectors W[, i]
+#         new_sigmaLT[1, i] == sigmaLT[1, i] if the std.dev is successfully 
enforced,
+#         new_sigmaLT[1, i]  > sigmaLT[1, i] if we had to relax this 
constraint.
+generateWeights = 
+    function (Matrix[double] X, Matrix[double] meanLT, Matrix[double] sigmaLT)
+    return   (Matrix[double] W, Matrix[double] new_sigmaLT)
+{
+    num_w = ncol (meanLT);  # Number of output weight vectors
+    dim_w = ncol (X);       # Number of features / dimensions in a weight 
vector
+    w_X = t(colSums(X));    # "Prohibited" weight shift direction that changes 
meanLT
+                            # (all orthogonal shift directions do not affect 
meanLT)
+
+    # Compute "w_1" with meanLT = 1 and with the smallest possible sigmaLT
+
+    w_1 = straightenX (X);
+    r_1 = (X %*% w_1) - 1.0;
+    norm_r_1_sq = sum (r_1 ^ 2);
+    
+    # For each W[, i] generate uniformly random directions to shift away from 
"w_1"
+    
+    DW_raw = Rand (rows = dim_w, cols = num_w, pdf = "normal");
+    DW = DW_raw - (w_X %*% t(w_X) %*% DW_raw) / sum (w_X ^ 2); # Orthogonal to 
w_X
+    XDW = X %*% DW;
+    
+    # Determine how far to shift in the chosen directions to satisfy the 
constraints
+    # Use the positive root of the quadratic equation; relax sigmaLT where 
needed
+    
+    a_qe = colSums (XDW ^ 2);
+    b_qe = 2.0 * meanLT * (t(r_1) %*% XDW);
+    c_qe = meanLT^2 * norm_r_1_sq - sigmaLT^2 * nrow(X);
+
+    is_sigmaLT_OK = ppred (c_qe, 0.0, "<=");
+    new_sigmaLT = is_sigmaLT_OK * sigmaLT + (1 - is_sigmaLT_OK) * abs (meanLT) 
* sqrt (norm_r_1_sq / nrow(X));
+    c_qe = is_sigmaLT_OK * c_qe;
+    x_qe = (- b_qe + sqrt (b_qe * b_qe - 4.0 * a_qe * c_qe)) / (2.0 * a_qe);
+    
+    # Scale and shift "w_1" in the "DW" directions to produce the result:
+    
+    ones = matrix (1.0, rows = dim_w, cols = 1);
+    W = w_1 %*% meanLT + DW * (ones %*% x_qe);
+}
+
+# Computes vector w such that  ||X %*% w - 1|| -> MIN  given  avg(X %*% w) = 1
+# We find z_LS such that ||X %*% z_LS - 1|| -> MIN unconditionally, then scale
+# it to compute  w = c * z_LS  such that  sum(X %*% w) = nrow(X).
+straightenX =
+    function (Matrix[double] X)
+    return   (Matrix[double] w)
+{
+    w_X = t(colSums(X));
+    lambda_LS = 0.000001 * sum(X ^ 2) / ncol(X);
+    eps = 0.000000001 * nrow(X);
+
+    # BEGIN LEAST SQUARES
+    
+    r_LS = - w_X;
+    z_LS = matrix (0.0, rows = ncol(X), cols = 1);
+    p_LS = - r_LS;
+    norm_r2_LS = sum (r_LS ^ 2);
+    i_LS = 0;
+    while (i_LS < 50 & i_LS < ncol(X) & norm_r2_LS >= eps)
+    {
+        temp_LS = X %*% p_LS;
+        q_LS = (t(X) %*% temp_LS) + lambda_LS * p_LS;
+        alpha_LS = norm_r2_LS / sum (p_LS * q_LS);
+        z_LS = z_LS + alpha_LS * p_LS;
+        old_norm_r2_LS = norm_r2_LS;
+        r_LS = r_LS + alpha_LS * q_LS;
+        norm_r2_LS = sum (r_LS ^ 2);
+        p_LS = -r_LS + (norm_r2_LS / old_norm_r2_LS) * p_LS;
+        i_LS = i_LS + 1;
+    }
+    
+    # END LEAST SQUARES
+    
+    w = (nrow(X) / sum (w_X * z_LS)) * z_LS;
+}


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/datagen/genRandData4MultiClassSVM.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4MultiClassSVM.dml 
b/scripts/datagen/genRandData4MultiClassSVM.dml
index 65ee1d4..5d9fbcb 100644
--- a/scripts/datagen/genRandData4MultiClassSVM.dml
+++ b/scripts/datagen/genRandData4MultiClassSVM.dml
@@ -1,68 +1,68 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates random data to test linear logistic regression
-
-# $1 is number of samples
-# $2 is number of features (independent variables)
-# $3 is maximum feature value (absolute value)
-# $4 is maximum weight (absolute value)
-# $5 is location to store generated weights
-# $6 is location to store generated data
-# $7 is location to store generated labels
-# $8 addNoise. if 0 then no noise is added, to add noise set this to 1
-# $9 is b, 0 disables intercept
-# $10 controls sparsity in the generated data
-
-numSamples = $1
-numFeatures = $2
-maxFeatureValue = $3
-maxWeight = $4
-addNoise = $8
-b = $9
-
-X = Rand(rows=numSamples, cols=numFeatures, min=-1, max=1, pdf="uniform", 
seed=0, sparsity=$10)
-X = X * maxFeatureValue 
-
-w = Rand(rows=numFeatures, cols=1, min=-1, max=1, pdf="uniform", seed=0)
-w = w * maxWeight
-
-ot = X%*%w
-if(b!=0) {
-       b_mat = Rand(rows=1, cols=1, min=b, max=b, pdf="uniform")
-       w =  t(append(t(w), b_mat))
-       ot = ot + b
-}
-
-prob = 1/(1+exp(-ot))
-if(addNoise == 1){
-       r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
-}else{
-       print("this data generator generates the same dataset for both noise=0 
and noise=1")
-       r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
-       #r = Rand(rows=numSamples, cols=1, min=0.5, max=0.5, pdf="uniform")
-}
-Y = 1 - 2*ppred(prob, r, "<")
-Y = (Y+3)/2
-
-write(w, $5, format="binary")
-write(X, $6, format="binary")
-write(Y, $7, format="binary")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates random data to test linear logistic regression
+
+# $1 is number of samples
+# $2 is number of features (independent variables)
+# $3 is maximum feature value (absolute value)
+# $4 is maximum weight (absolute value)
+# $5 is location to store generated weights
+# $6 is location to store generated data
+# $7 is location to store generated labels
+# $8 addNoise. if 0 then no noise is added, to add noise set this to 1
+# $9 is b, 0 disables intercept
+# $10 controls sparsity in the generated data
+
+numSamples = $1
+numFeatures = $2
+maxFeatureValue = $3
+maxWeight = $4
+addNoise = $8
+b = $9
+
+X = Rand(rows=numSamples, cols=numFeatures, min=-1, max=1, pdf="uniform", 
seed=0, sparsity=$10)
+X = X * maxFeatureValue 
+
+w = Rand(rows=numFeatures, cols=1, min=-1, max=1, pdf="uniform", seed=0)
+w = w * maxWeight
+
+ot = X%*%w
+if(b!=0) {
+       b_mat = Rand(rows=1, cols=1, min=b, max=b, pdf="uniform")
+       w =  t(append(t(w), b_mat))
+       ot = ot + b
+}
+
+prob = 1/(1+exp(-ot))
+if(addNoise == 1){
+       r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
+}else{
+       print("this data generator generates the same dataset for both noise=0 
and noise=1")
+       r = Rand(rows=numSamples, cols=1, min=0, max=1, pdf="uniform", seed=0)
+       #r = Rand(rows=numSamples, cols=1, min=0.5, max=0.5, pdf="uniform")
+}
+Y = 1 - 2*ppred(prob, r, "<")
+Y = (Y+3)/2
+
+write(w, $5, format="binary")
+write(X, $6, format="binary")
+write(Y, $7, format="binary")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/datagen/genRandData4NMF.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4NMF.dml 
b/scripts/datagen/genRandData4NMF.dml
index 5988d48..cf18430 100644
--- a/scripts/datagen/genRandData4NMF.dml
+++ b/scripts/datagen/genRandData4NMF.dml
@@ -1,129 +1,129 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates random data for non-negative
-# matrix factorization
-#
-# follows lda's generative model
-# see Blei, Ng & Jordan, JMLR'03 paper
-# titled Latent Dirichlet Allocation
-#
-# $1 is number of samples
-# $2 is number of features
-# $3 is number of latent factors
-# $4 is number of features per sample
-#       (may overlap). use this to vary
-#       sparsity.      
-# $5 is file to store sample mixtures
-# $6 is file to store factors
-# $7 is file to store generated data
-
-numDocuments = $1
-numFeatures = $2
-numTopics = $3
-numWordsPerDoc = $4
-
-docTopicMixtures = Rand(rows=numDocuments, cols=numTopics, min=0.0, max=1.0, 
pdf="uniform", seed=0, sparsity=0.75)
-denomsTM = rowSums(docTopicMixtures)
-zerosInDenomsTM = ppred(denomsTM, 0, "==")
-denomsTM = 0.1*zerosInDenomsTM + (1-zerosInDenomsTM)*denomsTM
-parfor(i in 1:numTopics){
-       docTopicMixtures[,i] = docTopicMixtures[,i]/denomsTM
-}
-write(docTopicMixtures, $5, format="binary")
-for(j in 2:numTopics){
-       docTopicMixtures[,j] = docTopicMixtures[,j-1] + docTopicMixtures[,j]
-}
-
-topicDistributions = Rand(rows=numTopics, cols=numFeatures, min=0.0, max=1.0, 
pdf="uniform", seed=0, sparsity=0.75)
-parfor(i in 1:numTopics){
-       topicDist = topicDistributions[i,]
-       
-       denom2 = sum(topicDist)
-       if(denom2 == 0){
-               denom2 = denom2 + 0.1
-       }
-       
-       topicDistributions[i,] = topicDist / denom2
-}
-write(topicDistributions, $6, format="binary")
-for(j in 2:numFeatures){
-       topicDistributions[,j] = topicDistributions[,j-1] + 
topicDistributions[,j]
-}
-
-data = Rand(rows=numDocuments, cols=numFeatures, min=0, max=0, pdf="uniform")
-
-parfor(i in 1:numDocuments){
-       docTopic = docTopicMixtures[i,]
-       
-    ldata = Rand(rows=1, cols=numFeatures, min=0, max=0, pdf="uniform");
-  
-       r_z = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", 
seed=0)
-       r_w = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", 
seed=0)
-       
-       for(j in 1:numWordsPerDoc){
-               rz = castAsScalar(r_z[j,1])
-               continue = 1
-               
-               z = -1
-               #this is a workaround
-               #z=1    
-               
-               for(k1 in 1:numTopics){
-                       prob = castAsScalar(docTopic[1,k1])
-                       if(continue==1 & rz <= prob){
-                               z=k1
-                               continue=0
-                       }
-               }
-               
-               if(z==-1){
-                       print("z is unassigned: " + z)
-                       z = numTopics
-               }
-               
-               rw = castAsScalar(r_w[j,1])
-               continue = 1
-               
-               w = -1
-               #this is a workaround
-               #w = 1
-               
-               for(k2 in 1:numFeatures){
-                       prob = castAsScalar(topicDistributions[z,k2])
-                       if(continue == 1 & rw <= prob){
-                               w = k2
-                               continue = 0
-                       }
-               }
-               
-               if(w==-1){
-                       print("w is unassigned: " + w)
-                       w = numFeatures
-               }
-               
-               ldata[1,w] = ldata[1,w] + 1
-       }
-  
-    data[i,] = ldata;
-}
-
-write(data, $7, format="binary")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates random data for non-negative
+# matrix factorization
+#
+# follows lda's generative model
+# see Blei, Ng & Jordan, JMLR'03 paper
+# titled Latent Dirichlet Allocation
+#
+# $1 is number of samples
+# $2 is number of features
+# $3 is number of latent factors
+# $4 is number of features per sample
+#       (may overlap). use this to vary
+#       sparsity.      
+# $5 is file to store sample mixtures
+# $6 is file to store factors
+# $7 is file to store generated data
+
+numDocuments = $1
+numFeatures = $2
+numTopics = $3
+numWordsPerDoc = $4
+
+docTopicMixtures = Rand(rows=numDocuments, cols=numTopics, min=0.0, max=1.0, 
pdf="uniform", seed=0, sparsity=0.75)
+denomsTM = rowSums(docTopicMixtures)
+zerosInDenomsTM = ppred(denomsTM, 0, "==")
+denomsTM = 0.1*zerosInDenomsTM + (1-zerosInDenomsTM)*denomsTM
+parfor(i in 1:numTopics){
+       docTopicMixtures[,i] = docTopicMixtures[,i]/denomsTM
+}
+write(docTopicMixtures, $5, format="binary")
+for(j in 2:numTopics){
+       docTopicMixtures[,j] = docTopicMixtures[,j-1] + docTopicMixtures[,j]
+}
+
+topicDistributions = Rand(rows=numTopics, cols=numFeatures, min=0.0, max=1.0, 
pdf="uniform", seed=0, sparsity=0.75)
+parfor(i in 1:numTopics){
+       topicDist = topicDistributions[i,]
+       
+       denom2 = sum(topicDist)
+       if(denom2 == 0){
+               denom2 = denom2 + 0.1
+       }
+       
+       topicDistributions[i,] = topicDist / denom2
+}
+write(topicDistributions, $6, format="binary")
+for(j in 2:numFeatures){
+       topicDistributions[,j] = topicDistributions[,j-1] + 
topicDistributions[,j]
+}
+
+data = Rand(rows=numDocuments, cols=numFeatures, min=0, max=0, pdf="uniform")
+
+parfor(i in 1:numDocuments){
+       docTopic = docTopicMixtures[i,]
+       
+    ldata = Rand(rows=1, cols=numFeatures, min=0, max=0, pdf="uniform");
+  
+       r_z = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", 
seed=0)
+       r_w = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", 
seed=0)
+       
+       for(j in 1:numWordsPerDoc){
+               rz = castAsScalar(r_z[j,1])
+               continue = 1
+               
+               z = -1
+               #this is a workaround
+               #z=1    
+               
+               for(k1 in 1:numTopics){
+                       prob = castAsScalar(docTopic[1,k1])
+                       if(continue==1 & rz <= prob){
+                               z=k1
+                               continue=0
+                       }
+               }
+               
+               if(z==-1){
+                       print("z is unassigned: " + z)
+                       z = numTopics
+               }
+               
+               rw = castAsScalar(r_w[j,1])
+               continue = 1
+               
+               w = -1
+               #this is a workaround
+               #w = 1
+               
+               for(k2 in 1:numFeatures){
+                       prob = castAsScalar(topicDistributions[z,k2])
+                       if(continue == 1 & rw <= prob){
+                               w = k2
+                               continue = 0
+                       }
+               }
+               
+               if(w==-1){
+                       print("w is unassigned: " + w)
+                       w = numFeatures
+               }
+               
+               ldata[1,w] = ldata[1,w] + 1
+       }
+  
+    data[i,] = ldata;
+}
+
+write(data, $7, format="binary")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/datagen/genRandData4NMFBlockwise.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4NMFBlockwise.dml 
b/scripts/datagen/genRandData4NMFBlockwise.dml
index 63133da..e3fd67f 100644
--- a/scripts/datagen/genRandData4NMFBlockwise.dml
+++ b/scripts/datagen/genRandData4NMFBlockwise.dml
@@ -1,138 +1,138 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates random data for non-negative
-# matrix factorization
-#
-# follows lda's generative model
-# see Blei, Ng & Jordan, JMLR'03 paper
-# titled Latent Dirichlet Allocation
-#
-# $1 is number of samples
-# $2 is number of features
-# $3 is number of latent factors
-# $4 is number of features per sample
-#       (may overlap). use this to vary
-#       sparsity.      
-# $5 is file to store sample mixtures
-# $6 is file to store factors
-# $7 is file to store generated data
-#
-# $8 is the blocksize, i.e., number of rows per block
-#    (should be set such that $8x$2 fits in mem budget)
-
-numDocuments = $1
-numFeatures = $2
-numTopics = $3
-numWordsPerDoc = $4
-blocksize = $8
-
-docTopicMixtures = Rand(rows=numDocuments, cols=numTopics, min=0.0, max=1.0, 
pdf="uniform", seed=0, sparsity=0.75)
-denomsTM = rowSums(docTopicMixtures)
-zerosInDenomsTM = ppred(denomsTM, 0, "==")
-denomsTM = 0.1*zerosInDenomsTM + (1-zerosInDenomsTM)*denomsTM
-parfor(i in 1:numTopics){
-       docTopicMixtures[,i] = docTopicMixtures[,i]/denomsTM
-}
-write(docTopicMixtures, $5, format="binary")
-for(j in 2:numTopics){
-       docTopicMixtures[,j] = docTopicMixtures[,j-1] + docTopicMixtures[,j]
-}
-
-topicDistributions = Rand(rows=numTopics, cols=numFeatures, min=0.0, max=1.0, 
pdf="uniform", seed=0, sparsity=0.75)
-parfor(i in 1:numTopics){
-       topicDist = topicDistributions[i,]
-       
-       denom2 = sum(topicDist)
-       if(denom2 == 0){
-               denom2 = denom2 + 0.1
-       }
-       
-       topicDistributions[i,] = topicDist / denom2
-}
-write(topicDistributions, $6, format="binary")
-for(j in 2:numFeatures){
-       topicDistributions[,j] = topicDistributions[,j-1] + 
topicDistributions[,j]
-}
-
-data0 = Rand(rows=numDocuments, cols=numFeatures, min=0, max=0, pdf="uniform")
-
-#outer-loop for blockwise computation
-for( k in seq(1,numDocuments,blocksize) )  
-{
-  len = min(blocksize,numDocuments-k); #block length
-  data = data0[k:(k+len),];            #obtain block
-  
-  parfor(i in 1:len){
-       docTopic = docTopicMixtures[i,]
-       
-       r_z = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", 
seed=0)
-       r_w = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", 
seed=0)
-       
-       for(j in 1:numWordsPerDoc){
-               rz = castAsScalar(r_z[j,1])
-               continue = 1
-               
-               z = -1
-               #this is a workaround
-               #z=1    
-               
-               for(k1 in 1:numTopics){
-                       prob = castAsScalar(docTopic[1,k1])
-                       if(continue==1 & rz <= prob){
-                               z=k1
-                               continue=0
-                       }
-               }
-               
-               if(z==-1){
-                       print("z is unassigned: " + z)
-                       z = numTopics
-               }
-               
-               rw = castAsScalar(r_w[j,1])
-               continue = 1
-               
-               w = -1
-               #this is a workaround
-               #w = 1
-               
-               for(k2 in 1:numFeatures){
-                       prob = castAsScalar(topicDistributions[z,k2])
-                       if(continue == 1 & rw <= prob){
-                               w = k2
-                               continue = 0
-                       }
-               }
-               
-               if(w==-1){
-                       print("w is unassigned: " + w)
-                       w = numFeatures
-               }
-               
-               data[i,w] = data[i,w] + 1
-       }
-  }
-  
-  data0[k:(k+len),] = data; # write block back
-}
-
-write(data0, $7, format="binary")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates random data for non-negative
+# matrix factorization
+#
+# follows lda's generative model
+# see Blei, Ng & Jordan, JMLR'03 paper
+# titled Latent Dirichlet Allocation
+#
+# $1 is number of samples
+# $2 is number of features
+# $3 is number of latent factors
+# $4 is number of features per sample
+#       (may overlap). use this to vary
+#       sparsity.      
+# $5 is file to store sample mixtures
+# $6 is file to store factors
+# $7 is file to store generated data
+#
+# $8 is the blocksize, i.e., number of rows per block
+#    (should be set such that $8x$2 fits in mem budget)
+
+numDocuments = $1
+numFeatures = $2
+numTopics = $3
+numWordsPerDoc = $4
+blocksize = $8
+
+docTopicMixtures = Rand(rows=numDocuments, cols=numTopics, min=0.0, max=1.0, 
pdf="uniform", seed=0, sparsity=0.75)
+denomsTM = rowSums(docTopicMixtures)
+zerosInDenomsTM = ppred(denomsTM, 0, "==")
+denomsTM = 0.1*zerosInDenomsTM + (1-zerosInDenomsTM)*denomsTM
+parfor(i in 1:numTopics){
+       docTopicMixtures[,i] = docTopicMixtures[,i]/denomsTM
+}
+write(docTopicMixtures, $5, format="binary")
+for(j in 2:numTopics){
+       docTopicMixtures[,j] = docTopicMixtures[,j-1] + docTopicMixtures[,j]
+}
+
+topicDistributions = Rand(rows=numTopics, cols=numFeatures, min=0.0, max=1.0, 
pdf="uniform", seed=0, sparsity=0.75)
+parfor(i in 1:numTopics){
+       topicDist = topicDistributions[i,]
+       
+       denom2 = sum(topicDist)
+       if(denom2 == 0){
+               denom2 = denom2 + 0.1
+       }
+       
+       topicDistributions[i,] = topicDist / denom2
+}
+write(topicDistributions, $6, format="binary")
+for(j in 2:numFeatures){
+       topicDistributions[,j] = topicDistributions[,j-1] + 
topicDistributions[,j]
+}
+
+data0 = Rand(rows=numDocuments, cols=numFeatures, min=0, max=0, pdf="uniform")
+
+#outer-loop for blockwise computation
+for( k in seq(1,numDocuments,blocksize) )  
+{
+  len = min(blocksize,numDocuments-k); #block length
+  data = data0[k:(k+len),];            #obtain block
+  
+  parfor(i in 1:len){
+       docTopic = docTopicMixtures[i,]
+       
+       r_z = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", 
seed=0)
+       r_w = Rand(rows=numWordsPerDoc, cols=1, min=0, max=1, pdf="uniform", 
seed=0)
+       
+       for(j in 1:numWordsPerDoc){
+               rz = castAsScalar(r_z[j,1])
+               continue = 1
+               
+               z = -1
+               #this is a workaround
+               #z=1    
+               
+               for(k1 in 1:numTopics){
+                       prob = castAsScalar(docTopic[1,k1])
+                       if(continue==1 & rz <= prob){
+                               z=k1
+                               continue=0
+                       }
+               }
+               
+               if(z==-1){
+                       print("z is unassigned: " + z)
+                       z = numTopics
+               }
+               
+               rw = castAsScalar(r_w[j,1])
+               continue = 1
+               
+               w = -1
+               #this is a workaround
+               #w = 1
+               
+               for(k2 in 1:numFeatures){
+                       prob = castAsScalar(topicDistributions[z,k2])
+                       if(continue == 1 & rw <= prob){
+                               w = k2
+                               continue = 0
+                       }
+               }
+               
+               if(w==-1){
+                       print("w is unassigned: " + w)
+                       w = numFeatures
+               }
+               
+               data[i,w] = data[i,w] + 1
+       }
+  }
+  
+  data0[k:(k+len),] = data; # write block back
+}
+
+write(data0, $7, format="binary")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/datagen/genRandData4SurvAnalysis.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4SurvAnalysis.dml 
b/scripts/datagen/genRandData4SurvAnalysis.dml
index 7ac235b..da94a22 100644
--- a/scripts/datagen/genRandData4SurvAnalysis.dml
+++ b/scripts/datagen/genRandData4SurvAnalysis.dml
@@ -1,133 +1,133 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#  
-# THIS SCRIPT GENERATED RANDOM DATA FOR KAPLAN-MEIER AND COX PROPORTIONAL 
HAZARD MODELS
-# ASSUMPTION: BASELINE HAZARD HAS WEIBULL DISTIRUTION WITH PARAMETERS LAMBDA 
AND V 
-#
-# INPUT   PARAMETERS:
-# 
---------------------------------------------------------------------------------------------
-# NAME    TYPE     DEFAULT      MEANING
-# 
---------------------------------------------------------------------------------------------
-# type    Sting    ---          The type of model for which the data is being 
generated: "kaplan-meier" or "cox"
-# n       Int                   Number of records 
-# lambda  Double   2.0          Scale parameter of the Weibull distribution 
used for generating timestamps 
-# v       Double   1.5          Shape parameter of the Weibull distribution 
used for generating timestamps 
-# p       Double   0.8          1 - probability of a record being censored
-# g       Int      2            If type=kaplan-meier the number of categorical 
features used for grouping 
-# s       Int      1            If type=kaplan-meier the number of categorical 
features used for stratifying
-# f       Int      10           If type=kaplan-meier maximum number of levels 
(i.e., distinct values) of g+s categorical features
-# m       Int      100          If type=cox the number of features in the model
-# sp      Double   1.0          If type=cox the sparsity of the feature matrix 
-# O       String   ---          Location to write the output matrix containing 
random data for the kaplan-meier or the cox model 
-# B       String   ---          If type=cox location to write the output 
matrix containing the coefficients for the cox model 
-# TE     String   ---                  Location to store column indices of X 
corresponding to timestamp (first row) and event information (second row)
-# F       String   ---                 Location to store column indices of X 
which are to be used for fitting the Cox model
-# fmt     String   "text"       The output format of results of the 
kaplan-meier analysis, such as "text" or "csv"
-# 
---------------------------------------------------------------------------------------------
-# OUTPUTS: 
-# 1- If type=kaplan-meier an n x (2+g+s) matrix O with      
-#    - column 1 contains timestamps generated randomly from a Weibull 
distribution with parameters lambda and v
-#       - column 2 contains the information whether an event occurred (1) or 
data is censored (0)
-#       - columns 3:2+g contain categorical features used for grouping 
-#    - columns 3+g:2+g+s contain categorical features used for stratifying
-#   if type=cox an n x (2+m) matrix O with 
-#       - column 1 contains timestamps generated randomly from a Weibull 
distribution with parameters lambda and v
-#       - column 2 contains the information whether an event occurred (1) or 
data is censored (0)
-#       - columns 3:2+m contain scale features 
-# 2- If type=cox a coefficient matrix B
-# 3- A colum matrix TE containing the column indices of X corresponding to 
timestamp (first row) and event information (second row) 
-# 4- A column matrix F containing the column indices of X which are to be used 
for KM analysis or fitting the Cox model
-
-type = $type; # either "kaplan-meier" or "cox" 
-num_records = $n; 
-lambda = ifdef ($l, 2.0); 
-p_event = ifdef ($p, 0.8); # 1 - prob. of a record being censored
-# parameters related to the kaplan-meier model
-n_groups = ifdef ($g, 2);
-n_strata = ifdef ($s, 1);
-max_level = ifdef ($f, 10);
-# parameters related to the cox model
-num_features = ifdef ($m, 1000);  
-sparsity = ifdef ($sp, 1.0); 
-fileO = $O;
-fileB = $B; 
-fileTE = $TE;
-fileF = $F;
-fmtO = ifdef ($fmt, "text"); # $fmt="text" 
-p_censor = 1 - p_event; # prob. that record is censored
-
-if (type == "kaplan-meier") {
-       
-       v = ifdef ($v, 1.5);
-       # generate categorical features used for grouping and stratifying
-       X = ceil (rand (rows = num_records, cols = n_groups + n_strata, min = 
0.000000001, max = max_level - 0.000000001, pdf = "uniform"));
-       
-       # generate timestamps
-       U = rand (rows = num_records, cols = 1, min = 0.000000001, max = 1); 
-       T = (-log (U) / lambda) ^ (1/v);
-
-} else if (type == "cox") {
-
-       v = ifdef ($v, 50);
-       # generate feature matrix
-       X = rand (rows = num_records, cols = num_features, min = 1, max = 5, 
pdf = "uniform", sparsity = sparsity);
-
-       # generate coefficients
-       B = rand (rows = num_features, cols = 1, min = -1.0, max = 1.0, pdf = 
"uniform", sparsity = 1.0); # * beta_range;       
-
-       # generate timestamps
-       U = rand (rows = num_records, cols = 1, min = 0.000000001, max = 1); 
-       T = (-log (U) / (lambda * exp (X %*% B)) ) ^ (1/v);
-
-} else {
-       stop ("Wrong model type!");
-}
-
-Y = matrix (0, rows = num_records, cols = 2);
-event = floor (rand (rows = num_records, cols = 1, min = (1 - p_censor), max = 
(1 + p_event)));
-n_time = sum (event);
-Y[,2] = event;
-       
-# binning of event times
-min_T = min (T);
-max_T = max (T);
-# T = T - min_T;
-len = max_T - min_T;
-num_bins = len / n_time;
-T = ceil (T / num_bins);
-
-# print ("min(T) " + min(T) + " max(T) " + max(T));
-Y[,1] = T;
-
-O = append (Y, X);
-write (O, fileO, format = fmtO);
-
-if (type == "cox") {
-       write (B, fileB, format = fmtO);
-       
-}
-
-TE = matrix ("1 2", rows = 2, cols = 1);
-F = seq (1, num_features);
-write (TE, fileTE, format = fmtO);
-write (F, fileF, format = fmtO);
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#  
+# THIS SCRIPT GENERATED RANDOM DATA FOR KAPLAN-MEIER AND COX PROPORTIONAL 
HAZARD MODELS
+# ASSUMPTION: BASELINE HAZARD HAS WEIBULL DISTIRUTION WITH PARAMETERS LAMBDA 
AND V 
+#
+# INPUT   PARAMETERS:
+# 
---------------------------------------------------------------------------------------------
+# NAME    TYPE     DEFAULT      MEANING
+# 
---------------------------------------------------------------------------------------------
+# type    Sting    ---          The type of model for which the data is being 
generated: "kaplan-meier" or "cox"
+# n       Int                   Number of records 
+# lambda  Double   2.0          Scale parameter of the Weibull distribution 
used for generating timestamps 
+# v       Double   1.5          Shape parameter of the Weibull distribution 
used for generating timestamps 
+# p       Double   0.8          1 - probability of a record being censored
+# g       Int      2            If type=kaplan-meier the number of categorical 
features used for grouping 
+# s       Int      1            If type=kaplan-meier the number of categorical 
features used for stratifying
+# f       Int      10           If type=kaplan-meier maximum number of levels 
(i.e., distinct values) of g+s categorical features
+# m       Int      100          If type=cox the number of features in the model
+# sp      Double   1.0          If type=cox the sparsity of the feature matrix 
+# O       String   ---          Location to write the output matrix containing 
random data for the kaplan-meier or the cox model 
+# B       String   ---          If type=cox location to write the output 
matrix containing the coefficients for the cox model 
+# TE     String   ---                  Location to store column indices of X 
corresponding to timestamp (first row) and event information (second row)
+# F       String   ---                 Location to store column indices of X 
which are to be used for fitting the Cox model
+# fmt     String   "text"       The output format of results of the 
kaplan-meier analysis, such as "text" or "csv"
+# 
---------------------------------------------------------------------------------------------
+# OUTPUTS: 
+# 1- If type=kaplan-meier an n x (2+g+s) matrix O with      
+#    - column 1 contains timestamps generated randomly from a Weibull 
distribution with parameters lambda and v
+#       - column 2 contains the information whether an event occurred (1) or 
data is censored (0)
+#       - columns 3:2+g contain categorical features used for grouping 
+#    - columns 3+g:2+g+s contain categorical features used for stratifying
+#   if type=cox an n x (2+m) matrix O with 
+#       - column 1 contains timestamps generated randomly from a Weibull 
distribution with parameters lambda and v
+#       - column 2 contains the information whether an event occurred (1) or 
data is censored (0)
+#       - columns 3:2+m contain scale features 
+# 2- If type=cox a coefficient matrix B
+# 3- A colum matrix TE containing the column indices of X corresponding to 
timestamp (first row) and event information (second row) 
+# 4- A column matrix F containing the column indices of X which are to be used 
for KM analysis or fitting the Cox model
+
+type = $type; # either "kaplan-meier" or "cox" 
+num_records = $n; 
+lambda = ifdef ($l, 2.0); 
+p_event = ifdef ($p, 0.8); # 1 - prob. of a record being censored
+# parameters related to the kaplan-meier model
+n_groups = ifdef ($g, 2);
+n_strata = ifdef ($s, 1);
+max_level = ifdef ($f, 10);
+# parameters related to the cox model
+num_features = ifdef ($m, 1000);  
+sparsity = ifdef ($sp, 1.0); 
+fileO = $O;
+fileB = $B; 
+fileTE = $TE;
+fileF = $F;
+fmtO = ifdef ($fmt, "text"); # $fmt="text" 
+p_censor = 1 - p_event; # prob. that record is censored
+
+if (type == "kaplan-meier") {
+       
+       v = ifdef ($v, 1.5);
+       # generate categorical features used for grouping and stratifying
+       X = ceil (rand (rows = num_records, cols = n_groups + n_strata, min = 
0.000000001, max = max_level - 0.000000001, pdf = "uniform"));
+       
+       # generate timestamps
+       U = rand (rows = num_records, cols = 1, min = 0.000000001, max = 1); 
+       T = (-log (U) / lambda) ^ (1/v);
+
+} else if (type == "cox") {
+
+       v = ifdef ($v, 50);
+       # generate feature matrix
+       X = rand (rows = num_records, cols = num_features, min = 1, max = 5, 
pdf = "uniform", sparsity = sparsity);
+
+       # generate coefficients
+       B = rand (rows = num_features, cols = 1, min = -1.0, max = 1.0, pdf = 
"uniform", sparsity = 1.0); # * beta_range;       
+
+       # generate timestamps
+       U = rand (rows = num_records, cols = 1, min = 0.000000001, max = 1); 
+       T = (-log (U) / (lambda * exp (X %*% B)) ) ^ (1/v);
+
+} else {
+       stop ("Wrong model type!");
+}
+
+Y = matrix (0, rows = num_records, cols = 2);
+event = floor (rand (rows = num_records, cols = 1, min = (1 - p_censor), max = 
(1 + p_event)));
+n_time = sum (event);
+Y[,2] = event;
+       
+# binning of event times
+min_T = min (T);
+max_T = max (T);
+# T = T - min_T;
+len = max_T - min_T;
+num_bins = len / n_time;
+T = ceil (T / num_bins);
+
+# print ("min(T) " + min(T) + " max(T) " + max(T));
+Y[,1] = T;
+
+O = append (Y, X);
+write (O, fileO, format = fmtO);
+
+if (type == "cox") {
+       write (B, fileB, format = fmtO);
+       
+}
+
+TE = matrix ("1 2", rows = 2, cols = 1);
+F = seq (1, num_features);
+write (TE, fileTE, format = fmtO);
+write (F, fileF, format = fmtO);
+

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/datagen/genRandData4Transform.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4Transform.dml 
b/scripts/datagen/genRandData4Transform.dml
index b207629..bc799d6 100644
--- a/scripts/datagen/genRandData4Transform.dml
+++ b/scripts/datagen/genRandData4Transform.dml
@@ -1,96 +1,96 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# Generates random data to test transform with
-#
-# rows, cols: dimensions of the data matrix to be generated
-# prob_categorical: percentage of the generated cols to be categorical
-# min_domain, max_domain: provide a range for domain sizes of the generated 
categorical cols
-# prob_missing: percentage of the generated (scale) cols to have missing values
-# prob_missing_cell: probability of a cell to have a missing value
-# out_X, out_missing, out_categorical: output file names
-#
-
-#params for size of data
-num_rows = ifdef($rows, 1000)
-num_cols = ifdef($cols, 25)
-
-#params for kind of cols
-prob_categorical = ifdef($prob_cat, 0.1)
-min_domain_size = ifdef($min_domain, 1)
-max_domain_size = ifdef($max_domain, 10)
-
-#params for missing value cols
-prob_missing_col = ifdef($prob_missing, 0.1)
-prob_missing_val = ifdef($prob_missing_cell, 0.2)
-
-num_scalar_cols = as.double(num_cols)
-num_categorical_cols = 0.0
-scalar_ind = matrix(1, rows=num_scalar_cols, cols=1)
-if(prob_categorical > 0){
-  categorical_ind = Rand(rows=num_cols, cols=1, min=0, max=1, pdf="uniform")
-  categorical_ind = ppred(categorical_ind, prob_categorical, "<")
-  categorical_col_ids = removeEmpty(target=seq(1, num_cols, 
1)*categorical_ind, margin="rows")
-  num_categorical_cols = sum(categorical_ind)
-  write(categorical_col_ids, $out_categorical, format="csv")
-  
-  domain_sizes = Rand(rows=num_categorical_cols, cols=1, min=0, max=1, 
pdf="uniform")
-  domain_sizes = round(min_domain_size + (max_domain_size - 
min_domain_size)*domain_sizes)
-  
-  categorical_X = Rand(rows=num_rows, cols=num_categorical_cols, min=0, max=1, 
pdf="uniform")
-  categorical_X = t(round(1 + t(categorical_X)*(domain_sizes - 1)))
-
-  scalar_ind = 1-categorical_ind
-}
-
-scalar_col_ids = removeEmpty(target=seq(1, num_cols, 1)*scalar_ind, 
margin="rows")
-num_scalar_cols = sum(scalar_ind)
-scalar_X = Rand(rows=num_rows, cols=num_scalar_cols, min=0, max=1, 
pdf="uniform")
-  
-if(num_categorical_cols > 0 & num_scalar_cols > 0){
-  X = append(scalar_X, categorical_X)
-  permut_mat = table(seq(1, num_scalar_cols, 1), scalar_col_ids, 
num_scalar_cols, num_cols)
-  fill_in = matrix(0, rows=num_cols-num_scalar_cols, cols=num_cols)
-  permut_mat = t(append(t(permut_mat), t(fill_in)))
-  X = X %*% permut_mat
-}else{
-  if(num_categorical_cols > 0) X = categorical_X
-  else{
-    if(num_scalar_cols > 0) X = scalar_X
-    else print("somehow, we've managed to compute that precisely 0 cols should 
be categorical and 0 cols should be scale")
-  }
-}
-
-if(prob_missing_col > 0){
-  missing_col_ind = Rand(rows=num_cols, cols=1, min=0, max=1, pdf="uniform")
-  missing_col_ind = ppred(missing_col_ind, prob_missing_col, "<")
-  #currently only support missing value imputation for scale cols
-  missing_col_ind = missing_col_ind * scalar_ind
-  missing_col_ids = removeEmpty(target=seq(1, num_cols, 1)*missing_col_ind, 
margin="rows")
-  missing_values = Rand(rows=num_rows, cols=nrow(missing_col_ids), min=0, 
max=1, pdf="uniform")
-  missing_values = ppred(missing_values, prob_missing_val, "<")
-  X = append(X, missing_values)
-  
-  write(missing_col_ids, $out_missing, format="csv")
-}
-
-write(X, $out_X, format="csv")
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# Generates random data to test transform with
+#
+# rows, cols: dimensions of the data matrix to be generated
+# prob_categorical: percentage of the generated cols to be categorical
+# min_domain, max_domain: provide a range for domain sizes of the generated 
categorical cols
+# prob_missing: percentage of the generated (scale) cols to have missing values
+# prob_missing_cell: probability of a cell to have a missing value
+# out_X, out_missing, out_categorical: output file names
+#
+
+#params for size of data
+num_rows = ifdef($rows, 1000)
+num_cols = ifdef($cols, 25)
+
+#params for kind of cols
+prob_categorical = ifdef($prob_cat, 0.1)
+min_domain_size = ifdef($min_domain, 1)
+max_domain_size = ifdef($max_domain, 10)
+
+#params for missing value cols
+prob_missing_col = ifdef($prob_missing, 0.1)
+prob_missing_val = ifdef($prob_missing_cell, 0.2)
+
+num_scalar_cols = as.double(num_cols)
+num_categorical_cols = 0.0
+scalar_ind = matrix(1, rows=num_scalar_cols, cols=1)
+if(prob_categorical > 0){
+  categorical_ind = Rand(rows=num_cols, cols=1, min=0, max=1, pdf="uniform")
+  categorical_ind = ppred(categorical_ind, prob_categorical, "<")
+  categorical_col_ids = removeEmpty(target=seq(1, num_cols, 
1)*categorical_ind, margin="rows")
+  num_categorical_cols = sum(categorical_ind)
+  write(categorical_col_ids, $out_categorical, format="csv")
+  
+  domain_sizes = Rand(rows=num_categorical_cols, cols=1, min=0, max=1, 
pdf="uniform")
+  domain_sizes = round(min_domain_size + (max_domain_size - 
min_domain_size)*domain_sizes)
+  
+  categorical_X = Rand(rows=num_rows, cols=num_categorical_cols, min=0, max=1, 
pdf="uniform")
+  categorical_X = t(round(1 + t(categorical_X)*(domain_sizes - 1)))
+
+  scalar_ind = 1-categorical_ind
+}
+
+scalar_col_ids = removeEmpty(target=seq(1, num_cols, 1)*scalar_ind, 
margin="rows")
+num_scalar_cols = sum(scalar_ind)
+scalar_X = Rand(rows=num_rows, cols=num_scalar_cols, min=0, max=1, 
pdf="uniform")
+  
+if(num_categorical_cols > 0 & num_scalar_cols > 0){
+  X = append(scalar_X, categorical_X)
+  permut_mat = table(seq(1, num_scalar_cols, 1), scalar_col_ids, 
num_scalar_cols, num_cols)
+  fill_in = matrix(0, rows=num_cols-num_scalar_cols, cols=num_cols)
+  permut_mat = t(append(t(permut_mat), t(fill_in)))
+  X = X %*% permut_mat
+}else{
+  if(num_categorical_cols > 0) X = categorical_X
+  else{
+    if(num_scalar_cols > 0) X = scalar_X
+    else print("somehow, we've managed to compute that precisely 0 cols should 
be categorical and 0 cols should be scale")
+  }
+}
+
+if(prob_missing_col > 0){
+  missing_col_ind = Rand(rows=num_cols, cols=1, min=0, max=1, pdf="uniform")
+  missing_col_ind = ppred(missing_col_ind, prob_missing_col, "<")
+  #currently only support missing value imputation for scale cols
+  missing_col_ind = missing_col_ind * scalar_ind
+  missing_col_ids = removeEmpty(target=seq(1, num_cols, 1)*missing_col_ind, 
margin="rows")
+  missing_values = Rand(rows=num_rows, cols=nrow(missing_col_ids), min=0, 
max=1, pdf="uniform")
+  missing_values = ppred(missing_values, prob_missing_val, "<")
+  X = append(X, missing_values)
+  
+  write(missing_col_ids, $out_missing, format="csv")
+}
+
+write(X, $out_X, format="csv")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/datagen/genRandData4Univariate.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genRandData4Univariate.dml 
b/scripts/datagen/genRandData4Univariate.dml
index d3c842c..bcbd528 100644
--- a/scripts/datagen/genRandData4Univariate.dml
+++ b/scripts/datagen/genRandData4Univariate.dml
@@ -1,61 +1,61 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates random numbers from a distribution
-# with specified mean, standard deviation, 
-# skewness, kurtosis
-# mean and standard deviation are taken in as
-# arguments by this script
-# a,b,c,d are coefficients computed by some
-# equation solver determined from the specified
-# skewness and kurtosis using power method
-# polynomials
-#
-# for more details see:
-# Statistical Simulation: Power Method Polynomials
-# and Other Transformations
-# Author: Todd C. Headrick
-# Chapman & Hall/CRC, Boca Raton, FL, 2010.
-# ISBN 978-1-4200-6490-2
-
-# $1 is the number of random points to be sampled
-# $2 is specified mean
-# $3 is specified standard deviation
-# $4-$7 are a,b,c,d obtained by solving a system
-# of equations using specified kurtosis and skewness
-# $8 is the file to write out the generated data to
-
-numSamples = $1
-mu = $2
-sigma = $3
-a = $4
-b = $5
-c = $6
-d = $7
-
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates random numbers from a distribution
+# with specified mean, standard deviation, 
+# skewness, kurtosis
+# mean and standard deviation are taken in as
+# arguments by this script
+# a,b,c,d are coefficients computed by some
+# equation solver determined from the specified
+# skewness and kurtosis using power method
+# polynomials
+#
+# for more details see:
+# Statistical Simulation: Power Method Polynomials
+# and Other Transformations
+# Author: Todd C. Headrick
+# Chapman & Hall/CRC, Boca Raton, FL, 2010.
+# ISBN 978-1-4200-6490-2
+
+# $1 is the number of random points to be sampled
+# $2 is specified mean
+# $3 is specified standard deviation
+# $4-$7 are a,b,c,d obtained by solving a system
+# of equations using specified kurtosis and skewness
+# $8 is the file to write out the generated data to
+
+numSamples = $1
+mu = $2
+sigma = $3
+a = $4
+b = $5
+c = $6
+d = $7
+
 
 print("a=" + a + " b=" + b + " c=" + c + " d=" + d)
 
-X = Rand(rows=numSamples, cols=1, pdf="normal", seed=0)
-Y = a + b*X + c*X^2 + d*X^3
-
-Z = Y*sigma + mu
-write(Z, $8, format="binary")
+X = Rand(rows=numSamples, cols=1, pdf="normal", seed=0)
+Y = a + b*X + c*X^2 + d*X^3
+
+Z = Y*sigma + mu
+write(Z, $8, format="binary")

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/staging/PPCA.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/PPCA.dml b/scripts/staging/PPCA.dml
index 667c709..abfcdb1 100644
--- a/scripts/staging/PPCA.dml
+++ b/scripts/staging/PPCA.dml
@@ -1,160 +1,160 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
- 
-# This script performs Probabilistic Principal Component Analysis (PCA) on the 
given input data. 
-# It is based on paper: sPCA: Scalable Principal Component Analysis for Big 
Data on Distributed 
-# Platforms. Tarek Elgamal et.al.
-
-# INPUT PARAMETERS:
-# 
---------------------------------------------------------------------------------------------
-# NAME         TYPE   DEFAULT  MEANING
-# 
---------------------------------------------------------------------------------------------
-# X            String ---      location to read the matrix X input matrix
-# k            Int    ---      indicates dimension of the new vector space 
constructed from eigen vectors
-# tolobj       Int    0.00001  objective function tolerance value to stop ppca 
algorithm
-# tolrecerr    Int    0.02     reconstruction error tolerance value to stop 
the algorithm      
-# iter         Int    10       maximum number of iterations
-# fmt          String 'text'   output format of results PPCA such as "text" or 
"csv"
-# hadoop jar SystemML.jar -f PPCA.dml -nvargs X=/INPUT_DIR/X  C=/OUTPUT_DIR/C 
V=/OUTPUT_DIR/V k=2 tol=0.2 iter=100
-# 
---------------------------------------------------------------------------------------------
-# OUTPUT PARAMETERS: 
-# 
---------------------------------------------------------------------------------------------
-# NAME   TYPE   DEFAULT  MEANING
-# 
---------------------------------------------------------------------------------------------
-# C            Matrix  ---     principal components
-# V            Matrix  ---     eigenvalues / eigenvalues of principal 
components
-#
-
-X = read($X);
-
-fileC = $C;
-fileV = $V;
-
-k = ifdef($k, ncol(X));
-iter = ifdef($iter, 10);
-tolobj = ifdef($tolobj, 0.00001);
-tolrecerr = ifdef($tolrecerr, 0.02);
-fmt0 = ifdef($fmt, "text"); 
-
-n = nrow(X);
-m = ncol(X);
-
-#initializing principal components matrix
-C =  rand(rows=m, cols=k, pdf="normal");
-ss = rand(rows=1, cols=1, pdf="normal");
-ss = as.scalar(ss);
-ssPrev = ss;
-
-# best selected principle components - with the lowest reconstruction error 
-PC = C;
-
-# initilizing reconstruction error
-RE = tolrecerr+1;
-REBest = RE;
-
-Z = matrix(0,rows=1,cols=1);
-
-#Objective function value
-ObjRelChng = tolobj+1;
-
-# mean centered input matrix - dim -> [n,m]
-Xm = X - colMeans(X);
-
-#I -> k x k
-ITMP = matrix(1,rows=k,cols=1);
-I = diag(ITMP);
-
-i = 0;
-while (i < iter & ObjRelChng > tolobj & RE > tolrecerr){
-       #Estimation step - Covariance matrix 
-       #M -> k x k
-       M = t(C) %*% C + I*ss; 
-       
-       #Auxilary matrix with n latent variables 
-       # Z -> n x k            
-       Z = Xm %*% (C %*% inv(M)); 
-
-       #ZtZ -> k x k
-       ZtZ = t(Z) %*% Z + inv(M)*ss;
-       
-       #XtZ -> m x k
-       XtZ = t(Xm) %*% Z;
-       
-       #Maximization step
-       #C ->  m x k
-       ZtZ_sum = sum(ZtZ); #+n*inv(M)); 
-       C = XtZ/ZtZ_sum;
-
-       #ss2 -> 1 x 1
-       ss2 = trace(ZtZ * (t(C) %*% C));
-
-       #ss3 -> 1 x 1 
-       ss3 = sum((Z %*% t(C)) %*% t(Xm));
-       
-       #Frobenius norm of reconstruction error -> Euclidean norm 
-       #Fn -> 1 x 1    
-       Fn = sum(Xm*Xm);
-
-       #ss -> 1 x 1
-       ss = (Fn + ss2 - 2*ss3)/(n*m);
-
-   #calculating objective function relative change
-   ObjRelChng = abs(1 - ss/ssPrev);
-   #print("Objective Relative Change: " + ObjRelChng + ", Objective: " + ss);
-
-       #Reconstruction error
-       R = ((Z %*% t(C)) -  Xm);       
-
-       #calculate the error
-       #TODO rethink calculation of reconstruction error .... 
-       #1-Norm of reconstruction error - a big dense matrix 
-       #RE -> n x m
-       RE = abs(sum(R)/sum(Xm));       
-       if (RE < REBest){
-               PC = C;
-               REBest = RE;
-       }       
-       #print("ss: " + ss +" = Fn( "+ Fn +" ) + ss2( " + ss2  +" ) - 2*ss3( " 
+ ss3 + " ), Reconstruction Error: " + RE);
-
-       ssPrev = ss;    
-       i = i+1;
-}
-print("Objective Relative Change: " + ObjRelChng);
-print ("Number of iterations: " + i + ", Reconstruction Err: " + REBest);
-
-# reconstructs data
-# RD -> n x k
-RD = X %*% PC;
-
-# calculate eigenvalues - principle component variance
-RDMean = colMeans(RD);
-V = t(colMeans(RD*RD) - (RDMean*RDMean));
-
-# sorting eigenvalues and eigenvectors in decreasing order
-V_decr_idx = order(target=V,by=1,decreasing=TRUE,index.return=TRUE);
-VF_decr = table(seq(1,nrow(V)),V_decr_idx);
-V = VF_decr %*% V;
-PC = PC %*% VF_decr;
-
-# writing principal components 
-write(PC, fileC, format=fmt0);
-# writing eigen values/pc variance
-write(V, fileV, format=fmt0);
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+ 
+# This script performs Probabilistic Principal Component Analysis (PCA) on the 
given input data. 
+# It is based on paper: sPCA: Scalable Principal Component Analysis for Big 
Data on Distributed 
+# Platforms. Tarek Elgamal et.al.
+
+# INPUT PARAMETERS:
+# 
---------------------------------------------------------------------------------------------
+# NAME         TYPE   DEFAULT  MEANING
+# 
---------------------------------------------------------------------------------------------
+# X            String ---      location to read the matrix X input matrix
+# k            Int    ---      indicates dimension of the new vector space 
constructed from eigen vectors
+# tolobj       Int    0.00001  objective function tolerance value to stop ppca 
algorithm
+# tolrecerr    Int    0.02     reconstruction error tolerance value to stop 
the algorithm      
+# iter         Int    10       maximum number of iterations
+# fmt          String 'text'   output format of results PPCA such as "text" or 
"csv"
+# hadoop jar SystemML.jar -f PPCA.dml -nvargs X=/INPUT_DIR/X  C=/OUTPUT_DIR/C 
V=/OUTPUT_DIR/V k=2 tol=0.2 iter=100
+# 
---------------------------------------------------------------------------------------------
+# OUTPUT PARAMETERS: 
+# 
---------------------------------------------------------------------------------------------
+# NAME   TYPE   DEFAULT  MEANING
+# 
---------------------------------------------------------------------------------------------
+# C            Matrix  ---     principal components
+# V            Matrix  ---     eigenvalues / eigenvalues of principal 
components
+#
+
+X = read($X);
+
+fileC = $C;
+fileV = $V;
+
+k = ifdef($k, ncol(X));
+iter = ifdef($iter, 10);
+tolobj = ifdef($tolobj, 0.00001);
+tolrecerr = ifdef($tolrecerr, 0.02);
+fmt0 = ifdef($fmt, "text"); 
+
+n = nrow(X);
+m = ncol(X);
+
+#initializing principal components matrix
+C =  rand(rows=m, cols=k, pdf="normal");
+ss = rand(rows=1, cols=1, pdf="normal");
+ss = as.scalar(ss);
+ssPrev = ss;
+
+# best selected principle components - with the lowest reconstruction error 
+PC = C;
+
+# initilizing reconstruction error
+RE = tolrecerr+1;
+REBest = RE;
+
+Z = matrix(0,rows=1,cols=1);
+
+#Objective function value
+ObjRelChng = tolobj+1;
+
+# mean centered input matrix - dim -> [n,m]
+Xm = X - colMeans(X);
+
+#I -> k x k
+ITMP = matrix(1,rows=k,cols=1);
+I = diag(ITMP);
+
+i = 0;
+while (i < iter & ObjRelChng > tolobj & RE > tolrecerr){
+       #Estimation step - Covariance matrix 
+       #M -> k x k
+       M = t(C) %*% C + I*ss; 
+       
+       #Auxilary matrix with n latent variables 
+       # Z -> n x k            
+       Z = Xm %*% (C %*% inv(M)); 
+
+       #ZtZ -> k x k
+       ZtZ = t(Z) %*% Z + inv(M)*ss;
+       
+       #XtZ -> m x k
+       XtZ = t(Xm) %*% Z;
+       
+       #Maximization step
+       #C ->  m x k
+       ZtZ_sum = sum(ZtZ); #+n*inv(M)); 
+       C = XtZ/ZtZ_sum;
+
+       #ss2 -> 1 x 1
+       ss2 = trace(ZtZ * (t(C) %*% C));
+
+       #ss3 -> 1 x 1 
+       ss3 = sum((Z %*% t(C)) %*% t(Xm));
+       
+       #Frobenius norm of reconstruction error -> Euclidean norm 
+       #Fn -> 1 x 1    
+       Fn = sum(Xm*Xm);
+
+       #ss -> 1 x 1
+       ss = (Fn + ss2 - 2*ss3)/(n*m);
+
+   #calculating objective function relative change
+   ObjRelChng = abs(1 - ss/ssPrev);
+   #print("Objective Relative Change: " + ObjRelChng + ", Objective: " + ss);
+
+       #Reconstruction error
+       R = ((Z %*% t(C)) -  Xm);       
+
+       #calculate the error
+       #TODO rethink calculation of reconstruction error .... 
+       #1-Norm of reconstruction error - a big dense matrix 
+       #RE -> n x m
+       RE = abs(sum(R)/sum(Xm));       
+       if (RE < REBest){
+               PC = C;
+               REBest = RE;
+       }       
+       #print("ss: " + ss +" = Fn( "+ Fn +" ) + ss2( " + ss2  +" ) - 2*ss3( " 
+ ss3 + " ), Reconstruction Error: " + RE);
+
+       ssPrev = ss;    
+       i = i+1;
+}
+print("Objective Relative Change: " + ObjRelChng);
+print ("Number of iterations: " + i + ", Reconstruction Err: " + REBest);
+
+# reconstructs data
+# RD -> n x k
+RD = X %*% PC;
+
+# calculate eigenvalues - principle component variance
+RDMean = colMeans(RD);
+V = t(colMeans(RD*RD) - (RDMean*RDMean));
+
+# sorting eigenvalues and eigenvectors in decreasing order
+V_decr_idx = order(target=V,by=1,decreasing=TRUE,index.return=TRUE);
+VF_decr = table(seq(1,nrow(V)),V_decr_idx);
+V = VF_decr %*% V;
+PC = PC %*% VF_decr;
+
+# writing principal components 
+write(PC, fileC, format=fmt0);
+# writing eigen values/pc variance
+write(V, fileV, format=fmt0);

http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/scripts/staging/regression/lasso/lasso.dml
----------------------------------------------------------------------
diff --git a/scripts/staging/regression/lasso/lasso.dml 
b/scripts/staging/regression/lasso/lasso.dml
index fb520df..1da88d3 100644
--- a/scripts/staging/regression/lasso/lasso.dml
+++ b/scripts/staging/regression/lasso/lasso.dml
@@ -1,113 +1,113 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#uses the sparsa algorithm to perform lasso regression
-
-X = read($X)
-y = read($Y)
-n = nrow(X)
-m = ncol(X)
-
-#params
-tol = 10^(-15)
-M = 5
-tau = 1
-maxiter = 1000
-
-#constants
-eta = 2
-sigma = 0.01
-alpha_min = 10^(-30)
-alpha_max = 10^(30)
-
-#init
-alpha = 1
-w = Rand(rows=m, cols=1, min=0, max=1, pdf="uniform")
-history = -1*10^30 * matrix(1, rows=M, cols=1)
-
-r = X %*% w - y
-g = t(X) %*% r
-obj = 0.5 * sum(r*r) + tau*sum(abs(w))
-
-print("Initial OBJ=" + obj)
-
-history[M,1] = obj
-
-inactive_set = matrix(1, rows=m, cols=1)
-iter = 0
-continue = TRUE
-while(iter < maxiter & continue) {
-       dw = matrix(0, rows=m, cols=1)
-       dg = matrix(0, rows=m, cols=1)
-       relChangeObj = -1.0
-       
-       inner_iter = 0
-       inner_continue = TRUE
-       inner_maxiter = 100
-       while(inner_iter < inner_maxiter & inner_continue) {
-               u = w - g/alpha
-               lambda = tau/alpha
-               
-               wnew = sign(u) * (abs(u) - lambda) * ppred(abs(u) - lambda, 0, 
">")
-               dw = wnew - w
-               dw2 = sum(dw*dw)
-               
-               r = X %*% wnew - y
-               gnew = t(X) %*% r
-               objnew = 0.5 * sum(r*r) + tau*sum(abs(wnew))            
-               obj_threshold = max(history) - 0.5*sigma*alpha*dw2
-               
-               if(objnew <= obj_threshold) {
-                       w = wnew
-                       dg = gnew - g
-                       g = gnew
-                       inner_continue = FALSE
-                       
-                       history[1:(M-1),] = history[2:M,]
-                       history[M,1] = objnew
-                       relChangeObj = abs(objnew - obj)/obj
-                       obj = objnew
-               }
-               else 
-                       alpha = eta*alpha
-       
-               inner_iter = inner_iter + 1
-       }
-       
-       if(inner_continue) 
-               print("Inner loop did not converge")
-       
-       alphanew = sum(dw*dg)/sum(dw*dw)
-       alpha = max(alpha_min, min(alpha_max, alphanew))
-       
-       old_inactive_set = inactive_set
-       inactive_set = ppred(w, 0, "!=")
-       diff = sum(abs(old_inactive_set - inactive_set))
-
-       if(diff == 0 & relChangeObj < tol) 
-               continue = FALSE
-
-       num_inactive = sum(ppred(w, 0, "!="))
-       print("ITER=" + iter + " OBJ=" + obj + " relative change=" + 
relChangeObj + " num_inactive=" + num_inactive)
-       iter = iter + 1
-}
-
-write(w, $model)
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#uses the sparsa algorithm to perform lasso regression
+
+X = read($X)
+y = read($Y)
+n = nrow(X)
+m = ncol(X)
+
+#params
+tol = 10^(-15)
+M = 5
+tau = 1
+maxiter = 1000
+
+#constants
+eta = 2
+sigma = 0.01
+alpha_min = 10^(-30)
+alpha_max = 10^(30)
+
+#init
+alpha = 1
+w = Rand(rows=m, cols=1, min=0, max=1, pdf="uniform")
+history = -1*10^30 * matrix(1, rows=M, cols=1)
+
+r = X %*% w - y
+g = t(X) %*% r
+obj = 0.5 * sum(r*r) + tau*sum(abs(w))
+
+print("Initial OBJ=" + obj)
+
+history[M,1] = obj
+
+inactive_set = matrix(1, rows=m, cols=1)
+iter = 0
+continue = TRUE
+while(iter < maxiter & continue) {
+       dw = matrix(0, rows=m, cols=1)
+       dg = matrix(0, rows=m, cols=1)
+       relChangeObj = -1.0
+       
+       inner_iter = 0
+       inner_continue = TRUE
+       inner_maxiter = 100
+       while(inner_iter < inner_maxiter & inner_continue) {
+               u = w - g/alpha
+               lambda = tau/alpha
+               
+               wnew = sign(u) * (abs(u) - lambda) * ppred(abs(u) - lambda, 0, 
">")
+               dw = wnew - w
+               dw2 = sum(dw*dw)
+               
+               r = X %*% wnew - y
+               gnew = t(X) %*% r
+               objnew = 0.5 * sum(r*r) + tau*sum(abs(wnew))            
+               obj_threshold = max(history) - 0.5*sigma*alpha*dw2
+               
+               if(objnew <= obj_threshold) {
+                       w = wnew
+                       dg = gnew - g
+                       g = gnew
+                       inner_continue = FALSE
+                       
+                       history[1:(M-1),] = history[2:M,]
+                       history[M,1] = objnew
+                       relChangeObj = abs(objnew - obj)/obj
+                       obj = objnew
+               }
+               else 
+                       alpha = eta*alpha
+       
+               inner_iter = inner_iter + 1
+       }
+       
+       if(inner_continue) 
+               print("Inner loop did not converge")
+       
+       alphanew = sum(dw*dg)/sum(dw*dw)
+       alpha = max(alpha_min, min(alpha_max, alphanew))
+       
+       old_inactive_set = inactive_set
+       inactive_set = ppred(w, 0, "!=")
+       diff = sum(abs(old_inactive_set - inactive_set))
+
+       if(diff == 0 & relChangeObj < tol) 
+               continue = FALSE
+
+       num_inactive = sum(ppred(w, 0, "!="))
+       print("ITER=" + iter + " OBJ=" + obj + " relative change=" + 
relChangeObj + " num_inactive=" + num_inactive)
+       iter = iter + 1
+}
+
+write(w, $model)

[39/51] [partial] incubator-systemml git commit: [SYSTEMML-482] [SYSTEMML-480] Adding a Git attributes file to enfore Unix-styled line endings, and normalizing all of the line endings.

Reply via email to