Repository: systemml
Updated Branches:
  refs/heads/master bc16b9e3d -> 608ac39c4


[MINOR] Removed unused datagen/test scripts and internal udf functions

Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/608ac39c
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/608ac39c
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/608ac39c

Branch: refs/heads/master
Commit: 608ac39c40e4d2517c9900d901a403c0c579db7f
Parents: bc16b9e
Author: Matthias Boehm <mboe...@gmail.com>
Authored: Sat Jun 10 20:01:00 2017 -0700
Committer: Matthias Boehm <mboe...@gmail.com>
Committed: Sat Jun 10 20:01:00 2017 -0700

----------------------------------------------------------------------
 pom.xml                                         |   4 +-
 scripts/datagen/genCorrelatedData.dml           |  46 ---
 scripts/datagen/genLinearRegressionData.dml     |  71 ----
 scripts/datagen/obsolete/genCorrelatedData.dml  |  46 +++
 .../obsolete/genLinearRegressionData.dml        |  71 ++++
 .../sysml/hops/ipa/InterProceduralAnalysis.java |  19 +-
 .../org/apache/sysml/udf/lib/DeNaNWrapper.java  |  79 -----
 .../sysml/udf/lib/DeNegInfinityWrapper.java     |  79 -----
 .../sysml/udf/lib/PermutationMatrixWrapper.java | 146 --------
 .../applications/ctableStats/stratstats.dml     | 350 -------------------
 10 files changed, 119 insertions(+), 792 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/pom.xml
----------------------------------------------------------------------
diff --git a/pom.xml b/pom.xml
index 0fee7f9..5b914d5 100644
--- a/pom.xml
+++ b/pom.xml
@@ -95,11 +95,9 @@
                        <directory>scripts</directory>
                        <excludes>
                                <exclude>algorithms/obsolete/*</exclude>
-                               <exclude>algorithms/obsolete</exclude>
+                               <exclude>datagen/obsolete/*</exclude>
                                <exclude>perftest/*</exclude>
-                               <exclude>perftest</exclude>
                                <exclude>staging/**/*</exclude>
-                               <exclude>staging</exclude>
                                <exclude>nn/test/compare_backends/*</exclude>
                                <exclude>nn/test/compare_backends/*</exclude>
                                <exclude>nn/examples/caffe2dml/**/*</exclude>

http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/scripts/datagen/genCorrelatedData.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genCorrelatedData.dml 
b/scripts/datagen/genCorrelatedData.dml
deleted file mode 100644
index d3289ce..0000000
--- a/scripts/datagen/genCorrelatedData.dml
+++ /dev/null
@@ -1,46 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# generates random correlated data
-# can generate any number of variables/columns
-# used to test univariate stats computation
-# by systemml
-
-# $1 is number of variables/columns
-# $2 is number of samples to create
-# $3 is the location to write out the covariance mat
-# $4 is the location to write out the generated data
-dims = $1
-numSamples = $2
-
-U = Rand(rows=dims, cols=dims, min=-1.0, max=1.0, pdf="uniform", seed=0)
-denoms = sqrt(colSums(U*U))
-parfor(i in 1:dims){
-       U[i,] = U[i,] / denoms
-}
-
-C = t(U)%*%U
-write(C, $3, format="binary")
-
-R = Rand(rows=numSamples, cols=dims, pdf="normal", seed=0)
-Rc = R%*%U
-write(Rc, $4, format="binary")
-

http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/scripts/datagen/genLinearRegressionData.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/genLinearRegressionData.dml 
b/scripts/datagen/genLinearRegressionData.dml
deleted file mode 100644
index 10b094c..0000000
--- a/scripts/datagen/genLinearRegressionData.dml
+++ /dev/null
@@ -1,71 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-#
-# This script generates random data for linear regression. A matrix is 
generated
-# consisting of a data matrix with a label column appended to it.
-#
-# INPUT PARAMETERS:
-# 
--------------------------------------------------------------------------------------------
-# NAME            TYPE    DEFAULT  MEANING
-# 
--------------------------------------------------------------------------------------------
-# numSamples      Int     ---      Number of samples
-# numFeatures     Int     ---      Number of features (independent variables)
-# maxFeatureValue Int     ---      Maximum feature value (absolute value)
-# maxWeight       Int     ---      Maximum weight (absolute value)
-# addNoise        Boolean ---      Determines whether noise should be added to 
Y
-# b               Double  ---      Intercept
-# sparsity        Double  ---      Controls the sparsity in the generated data 
(a value between 0 and 1)
-# output          String  ---      Location to write the generated data/label 
matrix
-# format          String  ---      Matrix output format
-# perc                   Double  0.8      Percentage of training sample
-# percFile               String  ---      File to store the percentages
-# 
--------------------------------------------------------------------------------------------
-# OUTPUT: Matrix of random data with appended label column
-# 
---------------------------------------------------------------------------------------------
-#
-# Example
-# ./runStandaloneSystemML.sh algorithms/datagen/genLinearRegressionData.dml 
-nvargs numSamples=1000 numFeatures=50 maxFeatureValue=5 maxWeight=5 
addNoise=FALSE b=0 sparsity=0.7 output=linRegData.csv format=csv
-#
-
-perc = ifdef($perc, 0.8)
-percFile = ifdef($percFile, "perc.csv")
-p = matrix(0, rows=2, cols=1)
-p[1,1] = perc
-p[2,1] = (1-perc) 
-write(p, percFile, format="csv")
-
-X = Rand(cols=$numFeatures, max=1, min=-1, pdf="uniform", rows=$numSamples, 
seed=0, sparsity=$sparsity)
-X = X * $maxFeatureValue
-
-w = Rand(cols=1, max=1, min=-1, pdf="uniform", rows=$numFeatures, seed=0)
-w = w * $maxWeight
-
-Y = X %*% w
-Y = Y + $b
-
-if ($addNoise == TRUE) {
-    noise = Rand(cols=1, pdf="normal", rows=$numSamples, seed=0)
-    Y = Y + noise
-}
-
-Z = cbind(X,Y)
-write(Z, $output, format=$format)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/scripts/datagen/obsolete/genCorrelatedData.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/obsolete/genCorrelatedData.dml 
b/scripts/datagen/obsolete/genCorrelatedData.dml
new file mode 100644
index 0000000..d3289ce
--- /dev/null
+++ b/scripts/datagen/obsolete/genCorrelatedData.dml
@@ -0,0 +1,46 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# generates random correlated data
+# can generate any number of variables/columns
+# used to test univariate stats computation
+# by systemml
+
+# $1 is number of variables/columns
+# $2 is number of samples to create
+# $3 is the location to write out the covariance mat
+# $4 is the location to write out the generated data
+dims = $1
+numSamples = $2
+
+U = Rand(rows=dims, cols=dims, min=-1.0, max=1.0, pdf="uniform", seed=0)
+denoms = sqrt(colSums(U*U))
+parfor(i in 1:dims){
+       U[i,] = U[i,] / denoms
+}
+
+C = t(U)%*%U
+write(C, $3, format="binary")
+
+R = Rand(rows=numSamples, cols=dims, pdf="normal", seed=0)
+Rc = R%*%U
+write(Rc, $4, format="binary")
+

http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/scripts/datagen/obsolete/genLinearRegressionData.dml
----------------------------------------------------------------------
diff --git a/scripts/datagen/obsolete/genLinearRegressionData.dml 
b/scripts/datagen/obsolete/genLinearRegressionData.dml
new file mode 100644
index 0000000..10b094c
--- /dev/null
+++ b/scripts/datagen/obsolete/genLinearRegressionData.dml
@@ -0,0 +1,71 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# This script generates random data for linear regression. A matrix is 
generated
+# consisting of a data matrix with a label column appended to it.
+#
+# INPUT PARAMETERS:
+# 
--------------------------------------------------------------------------------------------
+# NAME            TYPE    DEFAULT  MEANING
+# 
--------------------------------------------------------------------------------------------
+# numSamples      Int     ---      Number of samples
+# numFeatures     Int     ---      Number of features (independent variables)
+# maxFeatureValue Int     ---      Maximum feature value (absolute value)
+# maxWeight       Int     ---      Maximum weight (absolute value)
+# addNoise        Boolean ---      Determines whether noise should be added to 
Y
+# b               Double  ---      Intercept
+# sparsity        Double  ---      Controls the sparsity in the generated data 
(a value between 0 and 1)
+# output          String  ---      Location to write the generated data/label 
matrix
+# format          String  ---      Matrix output format
+# perc                   Double  0.8      Percentage of training sample
+# percFile               String  ---      File to store the percentages
+# 
--------------------------------------------------------------------------------------------
+# OUTPUT: Matrix of random data with appended label column
+# 
---------------------------------------------------------------------------------------------
+#
+# Example
+# ./runStandaloneSystemML.sh algorithms/datagen/genLinearRegressionData.dml 
-nvargs numSamples=1000 numFeatures=50 maxFeatureValue=5 maxWeight=5 
addNoise=FALSE b=0 sparsity=0.7 output=linRegData.csv format=csv
+#
+
+perc = ifdef($perc, 0.8)
+percFile = ifdef($percFile, "perc.csv")
+p = matrix(0, rows=2, cols=1)
+p[1,1] = perc
+p[2,1] = (1-perc) 
+write(p, percFile, format="csv")
+
+X = Rand(cols=$numFeatures, max=1, min=-1, pdf="uniform", rows=$numSamples, 
seed=0, sparsity=$sparsity)
+X = X * $maxFeatureValue
+
+w = Rand(cols=1, max=1, min=-1, pdf="uniform", rows=$numFeatures, seed=0)
+w = w * $maxWeight
+
+Y = X %*% w
+Y = Y + $b
+
+if ($addNoise == TRUE) {
+    noise = Rand(cols=1, pdf="normal", rows=$numSamples, seed=0)
+    Y = Y + noise
+}
+
+Z = cbind(X,Y)
+write(Z, $output, format=$format)
\ No newline at end of file

http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java 
b/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java
index 19396a9..3562c9f 100644
--- a/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java
+++ b/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java
@@ -75,8 +75,6 @@ import org.apache.sysml.runtime.instructions.cp.ScalarObject;
 import org.apache.sysml.runtime.instructions.cp.ScalarObjectFactory;
 import org.apache.sysml.runtime.matrix.MatrixCharacteristics;
 import org.apache.sysml.runtime.matrix.MatrixFormatMetaData;
-import org.apache.sysml.udf.lib.DeNaNWrapper;
-import org.apache.sysml.udf.lib.DeNegInfinityWrapper;
 import org.apache.sysml.udf.lib.DynamicReadMatrixCP;
 import org.apache.sysml.udf.lib.DynamicReadMatrixRcCP;
 import org.apache.sysml.udf.lib.OrderWrapper;
@@ -908,28 +906,13 @@ public class InterProceduralAnalysis
        {
                String className = 
fstmt.getOtherParams().get(ExternalFunctionStatement.CLASS_NAME);
 
-               if(    className.equals(OrderWrapper.class.getName()) 
-                       || 
className.equals(DeNaNWrapper.class.getCanonicalName())
-                       || 
className.equals(DeNegInfinityWrapper.class.getCanonicalName()) )
+               if( className.equals(OrderWrapper.class.getName()) )
                {                       
                        Hop input = fop.getInput().get(0);
                        long lnnz = 
className.equals(OrderWrapper.class.getName()) ? input.getNnz() : -1;
                        MatrixObject moOut = 
createOutputMatrix(input.getDim1(), input.getDim2(),lnnz);
                        callVars.put(fop.getOutputVariableNames()[0], moOut);
                }
-               else if( 
className.equals("org.apache.sysml.udf.lib.EigenWrapper") ) 
-               //else if( className.equals(EigenWrapper.class.getName()) ) 
//string ref for build flexibility
-               {
-                       Hop input = fop.getInput().get(0);
-                       callVars.put(fop.getOutputVariableNames()[0], 
createOutputMatrix(input.getDim1(), 1, -1));
-                       callVars.put(fop.getOutputVariableNames()[1], 
createOutputMatrix(input.getDim1(), input.getDim1(),-1));                 
-               }
-               else if( 
className.equals("org.apache.sysml.udf.lib.LinearSolverWrapperCP") ) 
-               //else if( 
className.equals(LinearSolverWrapperCP.class.getName()) ) //string ref for 
build flexibility
-               {
-                       Hop input = fop.getInput().get(1);
-                       callVars.put(fop.getOutputVariableNames()[0], 
createOutputMatrix(input.getDim1(), 1, -1));
-               }
                else if(   className.equals(DynamicReadMatrixCP.class.getName())
                                || 
className.equals(DynamicReadMatrixRcCP.class.getName()) ) 
                {

http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/src/main/java/org/apache/sysml/udf/lib/DeNaNWrapper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/udf/lib/DeNaNWrapper.java 
b/src/main/java/org/apache/sysml/udf/lib/DeNaNWrapper.java
deleted file mode 100644
index 5443893..0000000
--- a/src/main/java/org/apache/sysml/udf/lib/DeNaNWrapper.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.udf.lib;
-
-
-import org.apache.sysml.udf.FunctionParameter;
-import org.apache.sysml.udf.Matrix;
-import org.apache.sysml.udf.PackageFunction;
-import org.apache.sysml.udf.Matrix.ValueType;
-
-/**
- * Wrapper class to deNaN matrices by replacing all NaNs with zeros,
- * made by modifying <code>OrderWrapper.java</code>
- */
-@Deprecated
-public class DeNaNWrapper extends PackageFunction 
-{
-       
-       private static final long serialVersionUID = 1L;
-       private static  final String OUTPUT_FILE = "TMP";
-
-       //return matrix
-       private Matrix ret;
-
-       @Override
-       public int getNumFunctionOutputs() 
-       {
-               return 1;       
-       }
-
-       @Override
-       public FunctionParameter getFunctionOutput(int pos) 
-       {       
-               if(pos == 0)
-                       return ret;
-               
-               throw new RuntimeException("Invalid function output being 
requested");
-       }
-
-       @Override
-       public void execute() 
-       { 
-               try 
-               {
-                       Matrix inM = (Matrix) getFunctionInput(0);
-                       double [][] inData = inM.getMatrixAsDoubleArray();
-                       for (int i = 0; i < inData.length; i++) {
-                               for (int j = 0; j < inData[i].length; j++) {
-                                       if (Double.isNaN (inData [i][j])) {
-                                               inData [i][j] = 0.0;
-                       }   }   }
-                       //create and copy output matrix         
-                       String dir = createOutputFilePathAndName( OUTPUT_FILE 
);        
-                       ret = new Matrix( dir, inM.getNumRows(), 
inM.getNumCols(), ValueType.Double );
-                       ret.setMatrixDoubleArray(inData);
-               } 
-               catch (Exception e) 
-               {
-                       throw new RuntimeException("Error executing external 
removeNaN function", e);
-               }
-       }
-}

http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/src/main/java/org/apache/sysml/udf/lib/DeNegInfinityWrapper.java
----------------------------------------------------------------------
diff --git a/src/main/java/org/apache/sysml/udf/lib/DeNegInfinityWrapper.java 
b/src/main/java/org/apache/sysml/udf/lib/DeNegInfinityWrapper.java
deleted file mode 100644
index d964c0a..0000000
--- a/src/main/java/org/apache/sysml/udf/lib/DeNegInfinityWrapper.java
+++ /dev/null
@@ -1,79 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.udf.lib;
-
-
-import org.apache.sysml.udf.FunctionParameter;
-import org.apache.sysml.udf.Matrix;
-import org.apache.sysml.udf.PackageFunction;
-import org.apache.sysml.udf.Matrix.ValueType;
-
-/**
- * Wrapper class to deNegInfinity matrices by replacing all Negative Infinities
- * with zeros, made by modifying <code>DeNaNWrapper.java</code>
- */
-@Deprecated
-public class DeNegInfinityWrapper extends PackageFunction 
-{
-       
-       private static final long serialVersionUID = 1L;
-       private static final String OUTPUT_FILE = "TMP";
-
-       //return matrix
-       private Matrix ret;
-
-       @Override
-       public int getNumFunctionOutputs() 
-       {
-               return 1;       
-       }
-
-       @Override
-       public FunctionParameter getFunctionOutput(int pos) 
-       {       
-               if(pos == 0)
-                       return ret;
-               
-               throw new RuntimeException("Invalid function output being 
requested");
-       }
-
-       @Override
-       public void execute() 
-       { 
-               try 
-               {
-                       Matrix inM = (Matrix) getFunctionInput(0);
-                       double [][] inData = inM.getMatrixAsDoubleArray();
-                       for (int i = 0; i < inData.length; i++) {
-                               for (int j = 0; j < inData[i].length; j++) {
-                                       if (Double.NEGATIVE_INFINITY == inData 
[i][j]) {
-                                               inData [i][j] = 0.0;
-                       }   }   }
-                       //create and copy output matrix         
-                       String dir = createOutputFilePathAndName( OUTPUT_FILE 
);        
-                       ret = new Matrix( dir, inM.getNumRows(), 
inM.getNumCols(), ValueType.Double );
-                       ret.setMatrixDoubleArray(inData);
-               } 
-               catch (Exception e) 
-               {
-                       throw new RuntimeException("Error executing external 
order function", e);
-               }
-       }
-}

http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/src/main/java/org/apache/sysml/udf/lib/PermutationMatrixWrapper.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/udf/lib/PermutationMatrixWrapper.java 
b/src/main/java/org/apache/sysml/udf/lib/PermutationMatrixWrapper.java
deleted file mode 100644
index 845d92e..0000000
--- a/src/main/java/org/apache/sysml/udf/lib/PermutationMatrixWrapper.java
+++ /dev/null
@@ -1,146 +0,0 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements.  See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership.  The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License.  You may obtain a copy of the License at
- * 
- *   http://www.apache.org/licenses/LICENSE-2.0
- * 
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied.  See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
-package org.apache.sysml.udf.lib;
-
-import java.util.Arrays;
-import java.util.Comparator;
-
-import org.apache.sysml.runtime.matrix.data.InputInfo;
-import org.apache.sysml.runtime.matrix.data.MatrixBlock;
-import org.apache.sysml.runtime.matrix.data.OutputInfo;
-import org.apache.sysml.udf.FunctionParameter;
-import org.apache.sysml.udf.Matrix;
-import org.apache.sysml.udf.PackageFunction;
-import org.apache.sysml.udf.Scalar;
-import org.apache.sysml.udf.Matrix.ValueType;
-
-/**
- * Wrapper class for Sorting and Creating of a Permutation Matrix
- * 
- * Sort single-column matrix and produce a permutation matrix. Pre-multiplying
- * the input matrix with the permutation matrix produces a sorted matrix. A
- * permutation matrix is a matrix where each row and each column as exactly one
- * 1: To From 1
- * 
- * Input: (n x 1)-matrix, and true/false for sorting in descending order 
Output:
- * (n x n)- matrix
- * 
- * permutation_matrix= externalFunction(Matrix[Double] A, Boolean desc) return
- * (Matrix[Double] P) implemented in
- * (classname="org.apache.sysml.udf.lib.PermutationMatrixWrapper"
- * ,exectype="mem"); A = read( "Data/A.mtx"); P = permutation_matrix( A[,2],
- * false); B = P %*% A
- * 
- */
-@Deprecated
-public class PermutationMatrixWrapper extends PackageFunction 
-{
-       
-       private static final long serialVersionUID = 1L;
-       private static final String OUTPUT_FILE = "TMP";
-
-       // return matrix
-       private Matrix _ret;
-
-       @Override
-       public int getNumFunctionOutputs() {
-               return 1;
-       }
-
-       @Override
-       public FunctionParameter getFunctionOutput(int pos) {
-               if (pos == 0)
-                       return _ret;
-
-               throw new RuntimeException(
-                               "Invalid function output being requested");
-       }
-
-       @Override
-       public void execute() {
-               try {
-                       Matrix inM = (Matrix) getFunctionInput(0);
-                       double[][] inData = inM.getMatrixAsDoubleArray();
-                       boolean desc = Boolean.parseBoolean(((Scalar) 
getFunctionInput(1))
-                                       .getValue());
-
-                       // add index column as first column
-                       double[][] idxData = new double[(int) 
inM.getNumRows()][2];
-                       for (int i = 0; i < idxData.length; i++) {
-                               idxData[i][0] = i;
-                               idxData[i][1] = inData[i][0];
-                       }
-
-                       // sort input matrix (in-place)
-                       if (!desc) // asc
-                               Arrays.sort(idxData, new AscRowComparator(1));
-                       else
-                               // desc
-                               Arrays.sort(idxData, new DescRowComparator(1));
-
-                       // create and populate sparse matrixblock for result
-                       MatrixBlock mb = new MatrixBlock(idxData.length, 
idxData.length,
-                                       true, idxData.length);
-                       for (int i = 0; i < idxData.length; i++) {
-                               mb.quickSetValue(i, (int) idxData[i][0], 1.0);
-                       }
-                       mb.examSparsity();
-
-                       // set result
-                       String dir = createOutputFilePathAndName(OUTPUT_FILE);
-                       _ret = new Matrix(dir, mb.getNumRows(), 
mb.getNumColumns(),
-                                       ValueType.Double);
-                       _ret.setMatrixDoubleArray(mb, 
OutputInfo.BinaryBlockOutputInfo,
-                                       InputInfo.BinaryBlockInputInfo);
-               } 
-               catch (Exception e) {
-                       throw new RuntimeException(
-                                       "Error executing external 
permutation_matrix function", e);
-               }
-       }
-
-       private static class AscRowComparator implements Comparator<double[]> {
-               private int _col = -1;
-
-               public AscRowComparator(int col) {
-                       _col = col;
-               }
-
-               @Override
-               public int compare(double[] arg0, double[] arg1) {
-                       return (arg0[_col] < arg1[_col] ? -1
-                                       : (arg0[_col] == arg1[_col] ? 0 : 1));
-               }
-       }
-
-       private static class DescRowComparator implements Comparator<double[]> {
-               private int _col = -1;
-
-               public DescRowComparator(int col) {
-                       _col = col;
-               }
-
-               @Override
-               public int compare(double[] arg0, double[] arg1) {
-                       return (arg0[_col] > arg1[_col] ? -1
-                                       : (arg0[_col] == arg1[_col] ? 0 : 1));
-               }
-       }
-}

http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/src/test/scripts/applications/ctableStats/stratstats.dml
----------------------------------------------------------------------
diff --git a/src/test/scripts/applications/ctableStats/stratstats.dml 
b/src/test/scripts/applications/ctableStats/stratstats.dml
deleted file mode 100644
index 5d190e7..0000000
--- a/src/test/scripts/applications/ctableStats/stratstats.dml
+++ /dev/null
@@ -1,350 +0,0 @@
-#-------------------------------------------------------------
-#
-# Licensed to the Apache Software Foundation (ASF) under one
-# or more contributor license agreements.  See the NOTICE file
-# distributed with this work for additional information
-# regarding copyright ownership.  The ASF licenses this file
-# to you under the Apache License, Version 2.0 (the
-# "License"); you may not use this file except in compliance
-# with the License.  You may obtain a copy of the License at
-# 
-#   http://www.apache.org/licenses/LICENSE-2.0
-# 
-# Unless required by applicable law or agreed to in writing,
-# software distributed under the License is distributed on an
-# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-# KIND, either express or implied.  See the License for the
-# specific language governing permissions and limitations
-# under the License.
-#
-#-------------------------------------------------------------
-
-# STRATIFIED BIVARIATE STATISTICS, VERSION 2
-# 
-# INPUT  1: Dataset with records as rows (matrix filename)
-# INPUT  2: The stratum ID column number (integer)
-#   Stratum ID must be a small positive integer; fractional values are 
rounded; if 0 or less, shifted to positive.
-# INPUT  3: 1st variate column numbers (matrix filename)
-# INPUT  4: 2nd variate column numbers (matrix filename)
-# INPUT  5: Output (matrix filename)
-#
-# OUTPUT 1: Output Matrix with 40 columns, containing the following 
information:
-#     Rows: One row per each distinct pair (1st variate, 2nd variate)
-#     Col 01: 1st variate column number
-#     Col 02: 1st variate global presence count
-#     Col 03: 1st variate global mean
-#     Col 04: 1st variate global standard deviation
-#     Col 05: 1st variate stratified standard deviation
-#     Col 06: R-squared, 1st variate vs. strata
-#     Col 07: P-value, 1st variate vs. strata
-#     Col 08-10: Reserved
-#     Col 11: 2nd variate column number
-#     Col 12: 2nd variate global presence count
-#     Col 13: 2nd variate global mean
-#     Col 14: 2nd variate global standard deviation
-#     Col 15: 2nd variate stratified standard deviation
-#     Col 16: R-squared, 2nd variate vs. strata
-#     Col 17: P-value, 2nd variate vs. strata
-#     Col 18-20: Reserved
-#     Col 21: Global 1st & 2nd variate presence count
-#     Col 22: Global regression slope (2nd variate vs. 1st variate)
-#     Col 23: Global regression slope standard deviation
-#     Col 24: Global correlation = +/- sqrt(R-squared)
-#     Col 25: Global residual standard deviation
-#     Col 26: Global R-squared
-#     Col 27: Global P-value for hypothesis "slope = 0"
-#     Col 28-30: Reserved
-#     Col 31: Stratified 1st & 2nd variate presence count
-#     Col 32: Stratified regression slope (2nd variate vs. 1st variate)
-#     Col 33: Stratified regression slope standard deviation
-#     Col 34: Stratified correlation = +/- sqrt(R-squared)
-#     Col 35: Stratified residual standard deviation
-#     Col 36: Stratified R-squared
-#     Col 37: Stratified P-value for hypothesis "slope = 0"
-#     Col 38: Number of strata with at least two counted points
-#     Col 39-40: Reserved
-#     TO DO: GOODNESS OF FIT MEASURE
-#
-# EXAMPLE:
-# hadoop jar SystemML.jar -f PATH/stratstats.dml -exec singlenode -args 
PATH/stratstats_test_data.mtx 1 PATH/stratstats_test_X.mtx 
PATH/stratstats_test_Y.mtx PATH/stratstats_test_output.mtx
-
-NaN = 0/0;
-
-print ("BEGIN STRATIFIED STATISTICS SCRIPT");
-
-print ("Reading the input matrices...");
-
-DataWithNaNs = read ($1, format = "text");
-Xcols = read ($3, format = "text");
-Ycols = read ($4, format = "text");
-stratum_column_id = $2;
-num_records  = nrow(DataWithNaNs);
-num_attrs    = ncol(DataWithNaNs);
-num_attrs_X  = ncol(Xcols);
-num_attrs_Y  = ncol(Ycols);
-num_attrs_XY = num_attrs_X * num_attrs_Y;
-
-
-print ("Preparing the variates...");
-
-Data = deNaN (DataWithNaNs);
-DataNaNmask = ppred (DataWithNaNs, NaN, "==");
-
-tXcols = t(Xcols);
-ones = matrix (1.0, rows = num_attrs_X, cols = 1);
-one_to_num_attrs_X = sumup (ones);
-ProjX = matrix (0.0, rows = num_attrs, cols = num_attrs_X);
-ProjX_ctable = table (tXcols, one_to_num_attrs_X);
-ProjX [1:nrow(ProjX_ctable), ] = ProjX_ctable;
-X = Data %*% ProjX;
-X_mask = 1 - (DataNaNmask %*% ProjX);
-
-tYcols = t(Ycols);
-ones = matrix (1.0, rows = num_attrs_Y, cols = 1);
-one_to_num_attrs_Y = sumup (ones);
-ProjY = matrix (0.0, rows = num_attrs, cols = num_attrs_Y);
-ProjY_ctable = table (tYcols, one_to_num_attrs_Y);
-ProjY [1:nrow(ProjY_ctable), ] = ProjY_ctable;
-Y = Data %*% ProjY;
-Y_mask = 1 - (DataNaNmask %*% ProjY);
-
-
-print ("Preparing the strata...");
-
-Proj_to_deNaN_strata = diag (1 - DataNaNmask [, stratum_column_id]);
-Proj_to_deNaN_strata = removeEmpty (target = Proj_to_deNaN_strata, margin = 
"rows");
-vector_of_strata_with_empty_but_no_NaNs = round (Proj_to_deNaN_strata %*% 
(Data [, stratum_column_id]));
-vector_of_strata_with_empty_but_no_NaNs = 
vector_of_strata_with_empty_but_no_NaNs + (1 - min 
(vector_of_strata_with_empty_but_no_NaNs));
-num_strata_with_empty_but_no_NaNs = max 
(vector_of_strata_with_empty_but_no_NaNs);
-num_records_with_nonNaN_strata = nrow (Proj_to_deNaN_strata);
-ones = matrix (1.0, rows = num_records_with_nonNaN_strata, cols = 1);
-one_to_num_records_with_nonNaN_strata = sumup (ones);
-StrataSummator_with_empty_from_nonNaNs = table 
(vector_of_strata_with_empty_but_no_NaNs, 
one_to_num_records_with_nonNaN_strata);
-StrataSummator_from_nonNaNs = removeEmpty (target = 
StrataSummator_with_empty_from_nonNaNs, margin = "rows");
-StrataSummator = StrataSummator_from_nonNaNs %*% Proj_to_deNaN_strata;
-num_strata = nrow (StrataSummator);
-num_empty_strata = num_strata_with_empty_but_no_NaNs - num_strata;
-print ("There are " + num_strata + " nonempty strata and " + num_empty_strata 
+ " empty but non-NaN strata.");
-
-print ("Computing the global single-variate statistics...");
-
-cnt_X_global = colSums (X_mask);
-cnt_Y_global = colSums (Y_mask);
-avg_X_global = colSums (X) / cnt_X_global;
-avg_Y_global = colSums (Y) / cnt_Y_global;
-var_sumX_global = colSums (X * X) - cnt_X_global * (avg_X_global * 
avg_X_global);
-var_sumY_global = colSums (Y * Y) - cnt_Y_global * (avg_Y_global * 
avg_Y_global);
-                 sqrt_failsafe_input_1 = var_sumX_global / (cnt_X_global - 1);
-stdev_X_global = sqrt_failsafe (sqrt_failsafe_input_1);
-                 sqrt_failsafe_input_2 = var_sumY_global / (cnt_Y_global - 1)
-stdev_Y_global = sqrt_failsafe (sqrt_failsafe_input_2);
-
-print ("Computing the stratified single-variate statistics...");
-
-# Compute per-stratum statistics, prevent div-0 for locally empty (NaN-filled) 
strata
-
-Cnt_X_per_stratum = StrataSummator %*% X_mask;
-Cnt_Y_per_stratum = StrataSummator %*% Y_mask;
-Is_none_X_per_stratum = ppred (Cnt_X_per_stratum, 0, "==");
-Is_none_Y_per_stratum = ppred (Cnt_Y_per_stratum, 0, "==");
-One_over_cnt_X_per_stratum = (1 - Is_none_X_per_stratum) / (Cnt_X_per_stratum 
+ Is_none_X_per_stratum);
-One_over_cnt_Y_per_stratum = (1 - Is_none_Y_per_stratum) / (Cnt_Y_per_stratum 
+ Is_none_Y_per_stratum);
-num_X_nonempty_strata = num_strata - colSums (Is_none_X_per_stratum);
-num_Y_nonempty_strata = num_strata - colSums (Is_none_Y_per_stratum);
-
-Sum_X_per_stratum  = StrataSummator %*% X;
-Sum_Y_per_stratum  = StrataSummator %*% Y;
-
-# Recompute some global statistics to exclude NaN-stratum records
-
-cnt_X_excluding_NaNstrata = colSums (Cnt_X_per_stratum);
-cnt_Y_excluding_NaNstrata = colSums (Cnt_Y_per_stratum);
-sum_X_excluding_NaNstrata = colSums (Sum_X_per_stratum);
-sum_Y_excluding_NaNstrata = colSums (Sum_Y_per_stratum);
-var_sumX_excluding_NaNstrata = colSums (StrataSummator %*% (X * X)) - 
(sum_X_excluding_NaNstrata * sum_X_excluding_NaNstrata) / 
cnt_X_excluding_NaNstrata;
-var_sumY_excluding_NaNstrata = colSums (StrataSummator %*% (Y * Y)) - 
(sum_Y_excluding_NaNstrata * sum_Y_excluding_NaNstrata) / 
cnt_Y_excluding_NaNstrata;
-
-# Compute the stratified statistics
-
-var_sumX_stratified = colSums (StrataSummator %*% (X * X)) - colSums 
(One_over_cnt_X_per_stratum * Sum_X_per_stratum * Sum_X_per_stratum);
-var_sumY_stratified = colSums (StrataSummator %*% (Y * Y)) - colSums 
(One_over_cnt_Y_per_stratum * Sum_Y_per_stratum * Sum_Y_per_stratum);
-                      sqrt_failsafe_input_3 = var_sumX_stratified / 
(cnt_X_excluding_NaNstrata - num_X_nonempty_strata);
-stdev_X_stratified  = sqrt_failsafe (sqrt_failsafe_input_3);
-                      sqrt_failsafe_input_4 = var_sumY_stratified / 
(cnt_Y_excluding_NaNstrata - num_Y_nonempty_strata);
-stdev_Y_stratified  = sqrt_failsafe (sqrt_failsafe_input_4);
-r_sqr_X_vs_strata   = 1 - var_sumX_stratified / var_sumX_excluding_NaNstrata;
-r_sqr_Y_vs_strata   = 1 - var_sumY_stratified / var_sumY_excluding_NaNstrata;
-fStat_X_vs_strata   = ((var_sumX_excluding_NaNstrata - var_sumX_stratified) / 
(num_X_nonempty_strata - 1)) / (var_sumX_stratified / 
(cnt_X_excluding_NaNstrata - num_X_nonempty_strata));
-fStat_Y_vs_strata   = ((var_sumY_excluding_NaNstrata - var_sumY_stratified) / 
(num_Y_nonempty_strata - 1)) / (var_sumY_stratified / 
(cnt_Y_excluding_NaNstrata - num_Y_nonempty_strata));
-p_val_X_vs_strata   = fStat_tailprob (fStat_X_vs_strata, num_X_nonempty_strata 
- 1, cnt_X_excluding_NaNstrata - num_X_nonempty_strata);
-p_val_Y_vs_strata   = fStat_tailprob (fStat_Y_vs_strata, num_Y_nonempty_strata 
- 1, cnt_Y_excluding_NaNstrata - num_Y_nonempty_strata);
-
-print ("Computing the global bivariate statistics...");
-
-# Compute the aggregate X vs. Y statistics and map them into proper positions
-
-cnt_XY_rectangle       = t(X_mask) %*% Y_mask;
-sum_X_forXY_rectangle  = t(X)      %*% Y_mask;
-sum_XX_forXY_rectangle = t(X * X)  %*% Y_mask;
-sum_Y_forXY_rectangle  = t(X_mask) %*% Y;
-sum_YY_forXY_rectangle = t(X_mask) %*% (Y * Y);
-sum_XY_rectangle       = t(X)      %*% Y;
-cnt_XY_global       = matrix (cnt_XY_rectangle,       rows = 1, cols = 
num_attrs_XY, byrow = TRUE);
-sum_X_forXY_global  = matrix (sum_X_forXY_rectangle,  rows = 1, cols = 
num_attrs_XY, byrow = TRUE);
-sum_XX_forXY_global = matrix (sum_XX_forXY_rectangle, rows = 1, cols = 
num_attrs_XY, byrow = TRUE);
-sum_Y_forXY_global  = matrix (sum_Y_forXY_rectangle,  rows = 1, cols = 
num_attrs_XY, byrow = TRUE);
-sum_YY_forXY_global = matrix (sum_YY_forXY_rectangle, rows = 1, cols = 
num_attrs_XY, byrow = TRUE);
-sum_XY_global       = matrix (sum_XY_rectangle,       rows = 1, cols = 
num_attrs_XY, byrow = TRUE);
-ones_XY = matrix (1.0, rows = 1, cols = num_attrs_XY);
-
-# Compute the global bivariate statistics for output
-
-cov_sumX_sumY_global   = sum_XY_global - sum_X_forXY_global * 
sum_Y_forXY_global / cnt_XY_global;
-var_sumX_forXY_global  = sum_XX_forXY_global - sum_X_forXY_global * 
sum_X_forXY_global / cnt_XY_global;
-var_sumY_forXY_global  = sum_YY_forXY_global - sum_Y_forXY_global * 
sum_Y_forXY_global / cnt_XY_global;
-slope_XY_global        = cov_sumX_sumY_global / var_sumX_forXY_global;
-                                                sqrt_failsafe_input_5 = 
var_sumX_forXY_global * var_sumY_forXY_global;
-                                                sqrt_failsafe_output_5 = 
sqrt_failsafe (sqrt_failsafe_input_5);
-corr_XY_global         = cov_sumX_sumY_global / sqrt_failsafe_output_5;
-r_sqr_X_vs_Y_global    = cov_sumX_sumY_global * cov_sumX_sumY_global / 
(var_sumX_forXY_global * var_sumY_forXY_global);
-                         sqrt_failsafe_input_6 = (1 - r_sqr_X_vs_Y_global) * 
var_sumY_forXY_global / var_sumX_forXY_global / (cnt_XY_global - 2)
-stdev_slope_XY_global  = sqrt_failsafe (sqrt_failsafe_input_6);
-                         sqrt_failsafe_input_7 = (1 - r_sqr_X_vs_Y_global) * 
var_sumY_forXY_global / (cnt_XY_global - 2)
-stdev_errY_vs_X_global = sqrt_failsafe (sqrt_failsafe_input_7);
-fStat_Y_vs_X_global    = (cnt_XY_global - 2) * r_sqr_X_vs_Y_global / (1 - 
r_sqr_X_vs_Y_global);
-p_val_Y_vs_X_global    = fStat_tailprob (fStat_Y_vs_X_global, ones_XY, 
cnt_XY_global - 2);
-
-print ("Computing the stratified bivariate statistics...");
-
-# Create projections to "intermingle" X and Y into attribute pairs
-
-Proj_X_to_XY = matrix (0.0, rows = num_attrs_X, cols = num_attrs_XY);
-Proj_Y_to_XY = matrix (0.0, rows = num_attrs_Y, cols = num_attrs_XY);
-ones_Y_row   = matrix (1.0, rows = 1, cols = num_attrs_Y);
-for (i in 1:num_attrs_X) {
-    start_cid = (i - 1) * num_attrs_Y + 1;
-    end_cid = i * num_attrs_Y;
-    Proj_X_to_XY [i, start_cid:end_cid] = ones_Y_row;
-    Proj_Y_to_XY [ , start_cid:end_cid] = diag (ones_Y_row);
-}
-
-# Compute per-stratum statistics, prevent div-0 for locally empty (NaN-filled) 
strata
-
-Cnt_XY_per_stratum       = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ( 
Y_mask %*% Proj_Y_to_XY));
-Sum_X_forXY_per_stratum  = StrataSummator %*% (( X      %*% Proj_X_to_XY) * ( 
Y_mask %*% Proj_Y_to_XY));
-Sum_XX_forXY_per_stratum = StrataSummator %*% (((X * X) %*% Proj_X_to_XY) * ( 
Y_mask %*% Proj_Y_to_XY));
-Sum_Y_forXY_per_stratum  = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ( 
Y      %*% Proj_Y_to_XY));
-Sum_YY_forXY_per_stratum = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * 
((Y * Y) %*% Proj_Y_to_XY));
-Sum_XY_per_stratum       = StrataSummator %*% (( X      %*% Proj_X_to_XY) * ( 
Y      %*% Proj_Y_to_XY));
-
-Is_none_XY_per_stratum = ppred (Cnt_XY_per_stratum, 0, "==");
-One_over_cnt_XY_per_stratum = (1 - Is_none_XY_per_stratum) / 
(Cnt_XY_per_stratum + Is_none_XY_per_stratum);
-num_XY_nonempty_strata = num_strata - colSums (Is_none_XY_per_stratum);
-
-# Recompute some global aggregate X vs. Y statistics to exclude NaN-stratum 
records
-
-cnt_XY_excluding_NaNstrata = colSums (Cnt_XY_per_stratum);
-sum_XX_forXY_excluding_NaNstrata = colSums (Sum_XX_forXY_per_stratum);
-sum_YY_forXY_excluding_NaNstrata = colSums (Sum_YY_forXY_per_stratum);
-sum_XY_excluding_NaNstrata = colSums (Sum_XY_per_stratum);
-
-# Compute the stratified bivariate statistics
-
-var_sumX_forXY_stratified = sum_XX_forXY_excluding_NaNstrata - colSums 
(Sum_X_forXY_per_stratum * Sum_X_forXY_per_stratum * 
One_over_cnt_XY_per_stratum);
-var_sumY_forXY_stratified = sum_YY_forXY_excluding_NaNstrata - colSums 
(Sum_Y_forXY_per_stratum * Sum_Y_forXY_per_stratum * 
One_over_cnt_XY_per_stratum);
-cov_sumX_sumY_stratified  = sum_XY_excluding_NaNstrata       - colSums 
(Sum_X_forXY_per_stratum * Sum_Y_forXY_per_stratum * 
One_over_cnt_XY_per_stratum);
-
-slope_XY_stratified     = cov_sumX_sumY_stratified / var_sumX_forXY_stratified;
-                                                     sqrt_failsafe_input_8 = 
var_sumX_forXY_stratified * var_sumY_forXY_stratified;
-                                                     sqrt_failsafe_output_8 = 
sqrt_failsafe (sqrt_failsafe_input_8);
-corr_XY_stratified      = cov_sumX_sumY_stratified / sqrt_failsafe_output_8;
-r_sqr_X_vs_Y_stratified = cov_sumX_sumY_stratified * cov_sumX_sumY_stratified 
/ (var_sumX_forXY_stratified * var_sumY_forXY_stratified);
-r_sqr_X_vs_Y_stratified = corr_XY_stratified * corr_XY_stratified;
-                             sqrt_failsafe_input_9 = (1 - 
r_sqr_X_vs_Y_stratified) * var_sumY_forXY_stratified / 
var_sumX_forXY_stratified / (cnt_XY_excluding_NaNstrata - 
num_XY_nonempty_strata - 1);
-stdev_slope_XY_stratified  = sqrt_failsafe (sqrt_failsafe_input_9);
-                             sqrt_failsafe_input_10 = (1 - 
r_sqr_X_vs_Y_stratified) * var_sumY_forXY_stratified / 
(cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1);
-stdev_errY_vs_X_stratified = sqrt_failsafe (sqrt_failsafe_input_10);
-fStat_Y_vs_X_stratified = (cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata 
- 1) * r_sqr_X_vs_Y_stratified / (1 - r_sqr_X_vs_Y_stratified);
-p_val_Y_vs_X_stratified = fStat_tailprob (fStat_Y_vs_X_stratified, ones_XY, 
cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1);
-
-print ("Preparing the output matrix...");
-OutMtx = matrix (0.0, rows = 40, cols = num_attrs_XY);
-
-OutMtx [ 1, ] = Xcols              %*% Proj_X_to_XY;  # 1st variate column 
number
-OutMtx [ 2, ] = cnt_X_global       %*% Proj_X_to_XY;  # 1st variate global 
presence count
-OutMtx [ 3, ] = avg_X_global       %*% Proj_X_to_XY;  # 1st variate global mean
-OutMtx [ 4, ] = stdev_X_global     %*% Proj_X_to_XY;  # 1st variate global 
standard deviation
-OutMtx [ 5, ] = stdev_X_stratified %*% Proj_X_to_XY;  # 1st variate stratified 
standard deviation
-OutMtx [ 6, ] = r_sqr_X_vs_strata  %*% Proj_X_to_XY;  # R-squared, 1st variate 
vs. strata
-OutMtx [ 7, ] = p_val_X_vs_strata  %*% Proj_X_to_XY;  # P-value, 1st variate 
vs. strata
-OutMtx [11, ] = Ycols              %*% Proj_Y_to_XY;  # 2nd variate column 
number
-OutMtx [12, ] = cnt_Y_global       %*% Proj_Y_to_XY;  # 2nd variate global 
presence count
-OutMtx [13, ] = avg_Y_global       %*% Proj_Y_to_XY;  # 2nd variate global mean
-OutMtx [14, ] = stdev_Y_global     %*% Proj_Y_to_XY;  # 2nd variate global 
standard deviation
-OutMtx [15, ] = stdev_Y_stratified %*% Proj_Y_to_XY;  # 2nd variate stratified 
standard deviation
-OutMtx [16, ] = r_sqr_Y_vs_strata  %*% Proj_Y_to_XY;  # R-squared, 2nd variate 
vs. strata
-OutMtx [17, ] = p_val_Y_vs_strata  %*% Proj_Y_to_XY;  # P-value, 2nd variate 
vs. strata
-
-
-OutMtx [21, ] = cnt_XY_global;              # Global 1st & 2nd variate 
presence count
-OutMtx [22, ] = slope_XY_global;            # Global regression slope (2nd 
variate vs. 1st variate)
-OutMtx [23, ] = stdev_slope_XY_global;      # Global regression slope standard 
deviation
-OutMtx [24, ] = corr_XY_global;             # Global correlation = +/- 
sqrt(R-squared)
-OutMtx [25, ] = stdev_errY_vs_X_global;     # Global residual standard 
deviation
-OutMtx [26, ] = r_sqr_X_vs_Y_global;        # Global R-squared
-OutMtx [27, ] = p_val_Y_vs_X_global;        # Global P-value for hypothesis 
"slope = 0"
-OutMtx [31, ] = cnt_XY_excluding_NaNstrata; # Stratified 1st & 2nd variate 
presence count
-OutMtx [32, ] = slope_XY_stratified;        # Stratified regression slope (2nd 
variate vs. 1st variate)
-OutMtx [33, ] = stdev_slope_XY_stratified;  # Stratified regression slope 
standard deviation
-OutMtx [34, ] = corr_XY_stratified;         # Stratified correlation = +/- 
sqrt(R-squared)
-OutMtx [35, ] = stdev_errY_vs_X_stratified; # Stratified residual standard 
deviation
-OutMtx [36, ] = r_sqr_X_vs_Y_stratified;    # Stratified R-squared
-OutMtx [37, ] = p_val_Y_vs_X_stratified;    # Stratified P-value for 
hypothesis "slope = 0"
-OutMtx [38, ] = colSums (ppred (Cnt_XY_per_stratum, 2, ">="));  # Number of 
strata with at least two counted points
-
-OutMtx = t(OutMtx);
-
-print ("Writing the output matrix...");
-write (OutMtx, $5, format="text");
-print ("END STRATIFIED STATISTICS SCRIPT");
-
-
-deNaN = externalFunction (Matrix[Double] A) return (Matrix[Double] B)
-        implemented in (classname = "org.apache.sysml.udf.lib.DeNaNWrapper", 
exectype = "mem");
-
-fStat_tailprob = function (Matrix[double] fStat, Matrix[double] df_1, 
Matrix[double] df_2) return (Matrix[double] tailprob)
-{ # TEMPORARY IMPLEMENTATION
-    tailprob = fStat;
-    for (i in 1:nrow(fStat)) {
-      for (j in 1:ncol(fStat)) {
-        q = as.scalar (fStat [i, j]);
-        d1 = as.scalar (df_1 [i, j]);
-        d2 = as.scalar (df_2 [i, j]);
-        if (d1 >= 1 & d2 >= 1 & q >= 0.0) {
-            tailprob  [i, j] = pf (target = q, df1 = d1, df2 = d2, 
lower.tail=FALSE);
-        } else {
-            tailprob  [i, j] = 0/0;
-        }
-    } }
-}
-
-sqrt_failsafe = function (Matrix[double] input_A) return (Matrix[double] 
output_A)
-{
-    NaN = 0/0;
-    mask_A = ppred (input_A, 0.0, ">=");
-    prep_A = input_A * mask_A;
-    mask_A = mask_A - mask_A * (ppred (prep_A, NaN, "=="));
-    prep_A = deNaN (prep_A);
-    output_A = sqrt (prep_A) / mask_A;
-}
-
-sumup = function (Matrix[double] A) return (Matrix[double] sum_A)
-{
-    shift = 1;
-    m_A = nrow(A);
-    sum_A = A;
-    while (shift < m_A) {
-        sum_A [(shift+1):m_A, ] = sum_A [(shift+1):m_A, ] + sum_A 
[1:(m_A-shift), ];
-        shift = 2 * shift;
-    } 
-}

Reply via email to