Repository: systemml Updated Branches: refs/heads/master bc16b9e3d -> 608ac39c4
[MINOR] Removed unused datagen/test scripts and internal udf functions Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/608ac39c Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/608ac39c Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/608ac39c Branch: refs/heads/master Commit: 608ac39c40e4d2517c9900d901a403c0c579db7f Parents: bc16b9e Author: Matthias Boehm <mboe...@gmail.com> Authored: Sat Jun 10 20:01:00 2017 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Sat Jun 10 20:01:00 2017 -0700 ---------------------------------------------------------------------- pom.xml | 4 +- scripts/datagen/genCorrelatedData.dml | 46 --- scripts/datagen/genLinearRegressionData.dml | 71 ---- scripts/datagen/obsolete/genCorrelatedData.dml | 46 +++ .../obsolete/genLinearRegressionData.dml | 71 ++++ .../sysml/hops/ipa/InterProceduralAnalysis.java | 19 +- .../org/apache/sysml/udf/lib/DeNaNWrapper.java | 79 ----- .../sysml/udf/lib/DeNegInfinityWrapper.java | 79 ----- .../sysml/udf/lib/PermutationMatrixWrapper.java | 146 -------- .../applications/ctableStats/stratstats.dml | 350 ------------------- 10 files changed, 119 insertions(+), 792 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/pom.xml ---------------------------------------------------------------------- diff --git a/pom.xml b/pom.xml index 0fee7f9..5b914d5 100644 --- a/pom.xml +++ b/pom.xml @@ -95,11 +95,9 @@ <directory>scripts</directory> <excludes> <exclude>algorithms/obsolete/*</exclude> - <exclude>algorithms/obsolete</exclude> + <exclude>datagen/obsolete/*</exclude> <exclude>perftest/*</exclude> - <exclude>perftest</exclude> <exclude>staging/**/*</exclude> - <exclude>staging</exclude> <exclude>nn/test/compare_backends/*</exclude> <exclude>nn/test/compare_backends/*</exclude> <exclude>nn/examples/caffe2dml/**/*</exclude> http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/scripts/datagen/genCorrelatedData.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/genCorrelatedData.dml b/scripts/datagen/genCorrelatedData.dml deleted file mode 100644 index d3289ce..0000000 --- a/scripts/datagen/genCorrelatedData.dml +++ /dev/null @@ -1,46 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# generates random correlated data -# can generate any number of variables/columns -# used to test univariate stats computation -# by systemml - -# $1 is number of variables/columns -# $2 is number of samples to create -# $3 is the location to write out the covariance mat -# $4 is the location to write out the generated data -dims = $1 -numSamples = $2 - -U = Rand(rows=dims, cols=dims, min=-1.0, max=1.0, pdf="uniform", seed=0) -denoms = sqrt(colSums(U*U)) -parfor(i in 1:dims){ - U[i,] = U[i,] / denoms -} - -C = t(U)%*%U -write(C, $3, format="binary") - -R = Rand(rows=numSamples, cols=dims, pdf="normal", seed=0) -Rc = R%*%U -write(Rc, $4, format="binary") - http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/scripts/datagen/genLinearRegressionData.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/genLinearRegressionData.dml b/scripts/datagen/genLinearRegressionData.dml deleted file mode 100644 index 10b094c..0000000 --- a/scripts/datagen/genLinearRegressionData.dml +++ /dev/null @@ -1,71 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# -# This script generates random data for linear regression. A matrix is generated -# consisting of a data matrix with a label column appended to it. -# -# INPUT PARAMETERS: -# -------------------------------------------------------------------------------------------- -# NAME TYPE DEFAULT MEANING -# -------------------------------------------------------------------------------------------- -# numSamples Int --- Number of samples -# numFeatures Int --- Number of features (independent variables) -# maxFeatureValue Int --- Maximum feature value (absolute value) -# maxWeight Int --- Maximum weight (absolute value) -# addNoise Boolean --- Determines whether noise should be added to Y -# b Double --- Intercept -# sparsity Double --- Controls the sparsity in the generated data (a value between 0 and 1) -# output String --- Location to write the generated data/label matrix -# format String --- Matrix output format -# perc Double 0.8 Percentage of training sample -# percFile String --- File to store the percentages -# -------------------------------------------------------------------------------------------- -# OUTPUT: Matrix of random data with appended label column -# --------------------------------------------------------------------------------------------- -# -# Example -# ./runStandaloneSystemML.sh algorithms/datagen/genLinearRegressionData.dml -nvargs numSamples=1000 numFeatures=50 maxFeatureValue=5 maxWeight=5 addNoise=FALSE b=0 sparsity=0.7 output=linRegData.csv format=csv -# - -perc = ifdef($perc, 0.8) -percFile = ifdef($percFile, "perc.csv") -p = matrix(0, rows=2, cols=1) -p[1,1] = perc -p[2,1] = (1-perc) -write(p, percFile, format="csv") - -X = Rand(cols=$numFeatures, max=1, min=-1, pdf="uniform", rows=$numSamples, seed=0, sparsity=$sparsity) -X = X * $maxFeatureValue - -w = Rand(cols=1, max=1, min=-1, pdf="uniform", rows=$numFeatures, seed=0) -w = w * $maxWeight - -Y = X %*% w -Y = Y + $b - -if ($addNoise == TRUE) { - noise = Rand(cols=1, pdf="normal", rows=$numSamples, seed=0) - Y = Y + noise -} - -Z = cbind(X,Y) -write(Z, $output, format=$format) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/scripts/datagen/obsolete/genCorrelatedData.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/obsolete/genCorrelatedData.dml b/scripts/datagen/obsolete/genCorrelatedData.dml new file mode 100644 index 0000000..d3289ce --- /dev/null +++ b/scripts/datagen/obsolete/genCorrelatedData.dml @@ -0,0 +1,46 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# generates random correlated data +# can generate any number of variables/columns +# used to test univariate stats computation +# by systemml + +# $1 is number of variables/columns +# $2 is number of samples to create +# $3 is the location to write out the covariance mat +# $4 is the location to write out the generated data +dims = $1 +numSamples = $2 + +U = Rand(rows=dims, cols=dims, min=-1.0, max=1.0, pdf="uniform", seed=0) +denoms = sqrt(colSums(U*U)) +parfor(i in 1:dims){ + U[i,] = U[i,] / denoms +} + +C = t(U)%*%U +write(C, $3, format="binary") + +R = Rand(rows=numSamples, cols=dims, pdf="normal", seed=0) +Rc = R%*%U +write(Rc, $4, format="binary") + http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/scripts/datagen/obsolete/genLinearRegressionData.dml ---------------------------------------------------------------------- diff --git a/scripts/datagen/obsolete/genLinearRegressionData.dml b/scripts/datagen/obsolete/genLinearRegressionData.dml new file mode 100644 index 0000000..10b094c --- /dev/null +++ b/scripts/datagen/obsolete/genLinearRegressionData.dml @@ -0,0 +1,71 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# +# This script generates random data for linear regression. A matrix is generated +# consisting of a data matrix with a label column appended to it. +# +# INPUT PARAMETERS: +# -------------------------------------------------------------------------------------------- +# NAME TYPE DEFAULT MEANING +# -------------------------------------------------------------------------------------------- +# numSamples Int --- Number of samples +# numFeatures Int --- Number of features (independent variables) +# maxFeatureValue Int --- Maximum feature value (absolute value) +# maxWeight Int --- Maximum weight (absolute value) +# addNoise Boolean --- Determines whether noise should be added to Y +# b Double --- Intercept +# sparsity Double --- Controls the sparsity in the generated data (a value between 0 and 1) +# output String --- Location to write the generated data/label matrix +# format String --- Matrix output format +# perc Double 0.8 Percentage of training sample +# percFile String --- File to store the percentages +# -------------------------------------------------------------------------------------------- +# OUTPUT: Matrix of random data with appended label column +# --------------------------------------------------------------------------------------------- +# +# Example +# ./runStandaloneSystemML.sh algorithms/datagen/genLinearRegressionData.dml -nvargs numSamples=1000 numFeatures=50 maxFeatureValue=5 maxWeight=5 addNoise=FALSE b=0 sparsity=0.7 output=linRegData.csv format=csv +# + +perc = ifdef($perc, 0.8) +percFile = ifdef($percFile, "perc.csv") +p = matrix(0, rows=2, cols=1) +p[1,1] = perc +p[2,1] = (1-perc) +write(p, percFile, format="csv") + +X = Rand(cols=$numFeatures, max=1, min=-1, pdf="uniform", rows=$numSamples, seed=0, sparsity=$sparsity) +X = X * $maxFeatureValue + +w = Rand(cols=1, max=1, min=-1, pdf="uniform", rows=$numFeatures, seed=0) +w = w * $maxWeight + +Y = X %*% w +Y = Y + $b + +if ($addNoise == TRUE) { + noise = Rand(cols=1, pdf="normal", rows=$numSamples, seed=0) + Y = Y + noise +} + +Z = cbind(X,Y) +write(Z, $output, format=$format) \ No newline at end of file http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java b/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java index 19396a9..3562c9f 100644 --- a/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java +++ b/src/main/java/org/apache/sysml/hops/ipa/InterProceduralAnalysis.java @@ -75,8 +75,6 @@ import org.apache.sysml.runtime.instructions.cp.ScalarObject; import org.apache.sysml.runtime.instructions.cp.ScalarObjectFactory; import org.apache.sysml.runtime.matrix.MatrixCharacteristics; import org.apache.sysml.runtime.matrix.MatrixFormatMetaData; -import org.apache.sysml.udf.lib.DeNaNWrapper; -import org.apache.sysml.udf.lib.DeNegInfinityWrapper; import org.apache.sysml.udf.lib.DynamicReadMatrixCP; import org.apache.sysml.udf.lib.DynamicReadMatrixRcCP; import org.apache.sysml.udf.lib.OrderWrapper; @@ -908,28 +906,13 @@ public class InterProceduralAnalysis { String className = fstmt.getOtherParams().get(ExternalFunctionStatement.CLASS_NAME); - if( className.equals(OrderWrapper.class.getName()) - || className.equals(DeNaNWrapper.class.getCanonicalName()) - || className.equals(DeNegInfinityWrapper.class.getCanonicalName()) ) + if( className.equals(OrderWrapper.class.getName()) ) { Hop input = fop.getInput().get(0); long lnnz = className.equals(OrderWrapper.class.getName()) ? input.getNnz() : -1; MatrixObject moOut = createOutputMatrix(input.getDim1(), input.getDim2(),lnnz); callVars.put(fop.getOutputVariableNames()[0], moOut); } - else if( className.equals("org.apache.sysml.udf.lib.EigenWrapper") ) - //else if( className.equals(EigenWrapper.class.getName()) ) //string ref for build flexibility - { - Hop input = fop.getInput().get(0); - callVars.put(fop.getOutputVariableNames()[0], createOutputMatrix(input.getDim1(), 1, -1)); - callVars.put(fop.getOutputVariableNames()[1], createOutputMatrix(input.getDim1(), input.getDim1(),-1)); - } - else if( className.equals("org.apache.sysml.udf.lib.LinearSolverWrapperCP") ) - //else if( className.equals(LinearSolverWrapperCP.class.getName()) ) //string ref for build flexibility - { - Hop input = fop.getInput().get(1); - callVars.put(fop.getOutputVariableNames()[0], createOutputMatrix(input.getDim1(), 1, -1)); - } else if( className.equals(DynamicReadMatrixCP.class.getName()) || className.equals(DynamicReadMatrixRcCP.class.getName()) ) { http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/src/main/java/org/apache/sysml/udf/lib/DeNaNWrapper.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/udf/lib/DeNaNWrapper.java b/src/main/java/org/apache/sysml/udf/lib/DeNaNWrapper.java deleted file mode 100644 index 5443893..0000000 --- a/src/main/java/org/apache/sysml/udf/lib/DeNaNWrapper.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.udf.lib; - - -import org.apache.sysml.udf.FunctionParameter; -import org.apache.sysml.udf.Matrix; -import org.apache.sysml.udf.PackageFunction; -import org.apache.sysml.udf.Matrix.ValueType; - -/** - * Wrapper class to deNaN matrices by replacing all NaNs with zeros, - * made by modifying <code>OrderWrapper.java</code> - */ -@Deprecated -public class DeNaNWrapper extends PackageFunction -{ - - private static final long serialVersionUID = 1L; - private static final String OUTPUT_FILE = "TMP"; - - //return matrix - private Matrix ret; - - @Override - public int getNumFunctionOutputs() - { - return 1; - } - - @Override - public FunctionParameter getFunctionOutput(int pos) - { - if(pos == 0) - return ret; - - throw new RuntimeException("Invalid function output being requested"); - } - - @Override - public void execute() - { - try - { - Matrix inM = (Matrix) getFunctionInput(0); - double [][] inData = inM.getMatrixAsDoubleArray(); - for (int i = 0; i < inData.length; i++) { - for (int j = 0; j < inData[i].length; j++) { - if (Double.isNaN (inData [i][j])) { - inData [i][j] = 0.0; - } } } - //create and copy output matrix - String dir = createOutputFilePathAndName( OUTPUT_FILE ); - ret = new Matrix( dir, inM.getNumRows(), inM.getNumCols(), ValueType.Double ); - ret.setMatrixDoubleArray(inData); - } - catch (Exception e) - { - throw new RuntimeException("Error executing external removeNaN function", e); - } - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/src/main/java/org/apache/sysml/udf/lib/DeNegInfinityWrapper.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/udf/lib/DeNegInfinityWrapper.java b/src/main/java/org/apache/sysml/udf/lib/DeNegInfinityWrapper.java deleted file mode 100644 index d964c0a..0000000 --- a/src/main/java/org/apache/sysml/udf/lib/DeNegInfinityWrapper.java +++ /dev/null @@ -1,79 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.udf.lib; - - -import org.apache.sysml.udf.FunctionParameter; -import org.apache.sysml.udf.Matrix; -import org.apache.sysml.udf.PackageFunction; -import org.apache.sysml.udf.Matrix.ValueType; - -/** - * Wrapper class to deNegInfinity matrices by replacing all Negative Infinities - * with zeros, made by modifying <code>DeNaNWrapper.java</code> - */ -@Deprecated -public class DeNegInfinityWrapper extends PackageFunction -{ - - private static final long serialVersionUID = 1L; - private static final String OUTPUT_FILE = "TMP"; - - //return matrix - private Matrix ret; - - @Override - public int getNumFunctionOutputs() - { - return 1; - } - - @Override - public FunctionParameter getFunctionOutput(int pos) - { - if(pos == 0) - return ret; - - throw new RuntimeException("Invalid function output being requested"); - } - - @Override - public void execute() - { - try - { - Matrix inM = (Matrix) getFunctionInput(0); - double [][] inData = inM.getMatrixAsDoubleArray(); - for (int i = 0; i < inData.length; i++) { - for (int j = 0; j < inData[i].length; j++) { - if (Double.NEGATIVE_INFINITY == inData [i][j]) { - inData [i][j] = 0.0; - } } } - //create and copy output matrix - String dir = createOutputFilePathAndName( OUTPUT_FILE ); - ret = new Matrix( dir, inM.getNumRows(), inM.getNumCols(), ValueType.Double ); - ret.setMatrixDoubleArray(inData); - } - catch (Exception e) - { - throw new RuntimeException("Error executing external order function", e); - } - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/src/main/java/org/apache/sysml/udf/lib/PermutationMatrixWrapper.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/udf/lib/PermutationMatrixWrapper.java b/src/main/java/org/apache/sysml/udf/lib/PermutationMatrixWrapper.java deleted file mode 100644 index 845d92e..0000000 --- a/src/main/java/org/apache/sysml/udf/lib/PermutationMatrixWrapper.java +++ /dev/null @@ -1,146 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.udf.lib; - -import java.util.Arrays; -import java.util.Comparator; - -import org.apache.sysml.runtime.matrix.data.InputInfo; -import org.apache.sysml.runtime.matrix.data.MatrixBlock; -import org.apache.sysml.runtime.matrix.data.OutputInfo; -import org.apache.sysml.udf.FunctionParameter; -import org.apache.sysml.udf.Matrix; -import org.apache.sysml.udf.PackageFunction; -import org.apache.sysml.udf.Scalar; -import org.apache.sysml.udf.Matrix.ValueType; - -/** - * Wrapper class for Sorting and Creating of a Permutation Matrix - * - * Sort single-column matrix and produce a permutation matrix. Pre-multiplying - * the input matrix with the permutation matrix produces a sorted matrix. A - * permutation matrix is a matrix where each row and each column as exactly one - * 1: To From 1 - * - * Input: (n x 1)-matrix, and true/false for sorting in descending order Output: - * (n x n)- matrix - * - * permutation_matrix= externalFunction(Matrix[Double] A, Boolean desc) return - * (Matrix[Double] P) implemented in - * (classname="org.apache.sysml.udf.lib.PermutationMatrixWrapper" - * ,exectype="mem"); A = read( "Data/A.mtx"); P = permutation_matrix( A[,2], - * false); B = P %*% A - * - */ -@Deprecated -public class PermutationMatrixWrapper extends PackageFunction -{ - - private static final long serialVersionUID = 1L; - private static final String OUTPUT_FILE = "TMP"; - - // return matrix - private Matrix _ret; - - @Override - public int getNumFunctionOutputs() { - return 1; - } - - @Override - public FunctionParameter getFunctionOutput(int pos) { - if (pos == 0) - return _ret; - - throw new RuntimeException( - "Invalid function output being requested"); - } - - @Override - public void execute() { - try { - Matrix inM = (Matrix) getFunctionInput(0); - double[][] inData = inM.getMatrixAsDoubleArray(); - boolean desc = Boolean.parseBoolean(((Scalar) getFunctionInput(1)) - .getValue()); - - // add index column as first column - double[][] idxData = new double[(int) inM.getNumRows()][2]; - for (int i = 0; i < idxData.length; i++) { - idxData[i][0] = i; - idxData[i][1] = inData[i][0]; - } - - // sort input matrix (in-place) - if (!desc) // asc - Arrays.sort(idxData, new AscRowComparator(1)); - else - // desc - Arrays.sort(idxData, new DescRowComparator(1)); - - // create and populate sparse matrixblock for result - MatrixBlock mb = new MatrixBlock(idxData.length, idxData.length, - true, idxData.length); - for (int i = 0; i < idxData.length; i++) { - mb.quickSetValue(i, (int) idxData[i][0], 1.0); - } - mb.examSparsity(); - - // set result - String dir = createOutputFilePathAndName(OUTPUT_FILE); - _ret = new Matrix(dir, mb.getNumRows(), mb.getNumColumns(), - ValueType.Double); - _ret.setMatrixDoubleArray(mb, OutputInfo.BinaryBlockOutputInfo, - InputInfo.BinaryBlockInputInfo); - } - catch (Exception e) { - throw new RuntimeException( - "Error executing external permutation_matrix function", e); - } - } - - private static class AscRowComparator implements Comparator<double[]> { - private int _col = -1; - - public AscRowComparator(int col) { - _col = col; - } - - @Override - public int compare(double[] arg0, double[] arg1) { - return (arg0[_col] < arg1[_col] ? -1 - : (arg0[_col] == arg1[_col] ? 0 : 1)); - } - } - - private static class DescRowComparator implements Comparator<double[]> { - private int _col = -1; - - public DescRowComparator(int col) { - _col = col; - } - - @Override - public int compare(double[] arg0, double[] arg1) { - return (arg0[_col] > arg1[_col] ? -1 - : (arg0[_col] == arg1[_col] ? 0 : 1)); - } - } -} http://git-wip-us.apache.org/repos/asf/systemml/blob/608ac39c/src/test/scripts/applications/ctableStats/stratstats.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/ctableStats/stratstats.dml b/src/test/scripts/applications/ctableStats/stratstats.dml deleted file mode 100644 index 5d190e7..0000000 --- a/src/test/scripts/applications/ctableStats/stratstats.dml +++ /dev/null @@ -1,350 +0,0 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -# STRATIFIED BIVARIATE STATISTICS, VERSION 2 -# -# INPUT 1: Dataset with records as rows (matrix filename) -# INPUT 2: The stratum ID column number (integer) -# Stratum ID must be a small positive integer; fractional values are rounded; if 0 or less, shifted to positive. -# INPUT 3: 1st variate column numbers (matrix filename) -# INPUT 4: 2nd variate column numbers (matrix filename) -# INPUT 5: Output (matrix filename) -# -# OUTPUT 1: Output Matrix with 40 columns, containing the following information: -# Rows: One row per each distinct pair (1st variate, 2nd variate) -# Col 01: 1st variate column number -# Col 02: 1st variate global presence count -# Col 03: 1st variate global mean -# Col 04: 1st variate global standard deviation -# Col 05: 1st variate stratified standard deviation -# Col 06: R-squared, 1st variate vs. strata -# Col 07: P-value, 1st variate vs. strata -# Col 08-10: Reserved -# Col 11: 2nd variate column number -# Col 12: 2nd variate global presence count -# Col 13: 2nd variate global mean -# Col 14: 2nd variate global standard deviation -# Col 15: 2nd variate stratified standard deviation -# Col 16: R-squared, 2nd variate vs. strata -# Col 17: P-value, 2nd variate vs. strata -# Col 18-20: Reserved -# Col 21: Global 1st & 2nd variate presence count -# Col 22: Global regression slope (2nd variate vs. 1st variate) -# Col 23: Global regression slope standard deviation -# Col 24: Global correlation = +/- sqrt(R-squared) -# Col 25: Global residual standard deviation -# Col 26: Global R-squared -# Col 27: Global P-value for hypothesis "slope = 0" -# Col 28-30: Reserved -# Col 31: Stratified 1st & 2nd variate presence count -# Col 32: Stratified regression slope (2nd variate vs. 1st variate) -# Col 33: Stratified regression slope standard deviation -# Col 34: Stratified correlation = +/- sqrt(R-squared) -# Col 35: Stratified residual standard deviation -# Col 36: Stratified R-squared -# Col 37: Stratified P-value for hypothesis "slope = 0" -# Col 38: Number of strata with at least two counted points -# Col 39-40: Reserved -# TO DO: GOODNESS OF FIT MEASURE -# -# EXAMPLE: -# hadoop jar SystemML.jar -f PATH/stratstats.dml -exec singlenode -args PATH/stratstats_test_data.mtx 1 PATH/stratstats_test_X.mtx PATH/stratstats_test_Y.mtx PATH/stratstats_test_output.mtx - -NaN = 0/0; - -print ("BEGIN STRATIFIED STATISTICS SCRIPT"); - -print ("Reading the input matrices..."); - -DataWithNaNs = read ($1, format = "text"); -Xcols = read ($3, format = "text"); -Ycols = read ($4, format = "text"); -stratum_column_id = $2; -num_records = nrow(DataWithNaNs); -num_attrs = ncol(DataWithNaNs); -num_attrs_X = ncol(Xcols); -num_attrs_Y = ncol(Ycols); -num_attrs_XY = num_attrs_X * num_attrs_Y; - - -print ("Preparing the variates..."); - -Data = deNaN (DataWithNaNs); -DataNaNmask = ppred (DataWithNaNs, NaN, "=="); - -tXcols = t(Xcols); -ones = matrix (1.0, rows = num_attrs_X, cols = 1); -one_to_num_attrs_X = sumup (ones); -ProjX = matrix (0.0, rows = num_attrs, cols = num_attrs_X); -ProjX_ctable = table (tXcols, one_to_num_attrs_X); -ProjX [1:nrow(ProjX_ctable), ] = ProjX_ctable; -X = Data %*% ProjX; -X_mask = 1 - (DataNaNmask %*% ProjX); - -tYcols = t(Ycols); -ones = matrix (1.0, rows = num_attrs_Y, cols = 1); -one_to_num_attrs_Y = sumup (ones); -ProjY = matrix (0.0, rows = num_attrs, cols = num_attrs_Y); -ProjY_ctable = table (tYcols, one_to_num_attrs_Y); -ProjY [1:nrow(ProjY_ctable), ] = ProjY_ctable; -Y = Data %*% ProjY; -Y_mask = 1 - (DataNaNmask %*% ProjY); - - -print ("Preparing the strata..."); - -Proj_to_deNaN_strata = diag (1 - DataNaNmask [, stratum_column_id]); -Proj_to_deNaN_strata = removeEmpty (target = Proj_to_deNaN_strata, margin = "rows"); -vector_of_strata_with_empty_but_no_NaNs = round (Proj_to_deNaN_strata %*% (Data [, stratum_column_id])); -vector_of_strata_with_empty_but_no_NaNs = vector_of_strata_with_empty_but_no_NaNs + (1 - min (vector_of_strata_with_empty_but_no_NaNs)); -num_strata_with_empty_but_no_NaNs = max (vector_of_strata_with_empty_but_no_NaNs); -num_records_with_nonNaN_strata = nrow (Proj_to_deNaN_strata); -ones = matrix (1.0, rows = num_records_with_nonNaN_strata, cols = 1); -one_to_num_records_with_nonNaN_strata = sumup (ones); -StrataSummator_with_empty_from_nonNaNs = table (vector_of_strata_with_empty_but_no_NaNs, one_to_num_records_with_nonNaN_strata); -StrataSummator_from_nonNaNs = removeEmpty (target = StrataSummator_with_empty_from_nonNaNs, margin = "rows"); -StrataSummator = StrataSummator_from_nonNaNs %*% Proj_to_deNaN_strata; -num_strata = nrow (StrataSummator); -num_empty_strata = num_strata_with_empty_but_no_NaNs - num_strata; -print ("There are " + num_strata + " nonempty strata and " + num_empty_strata + " empty but non-NaN strata."); - -print ("Computing the global single-variate statistics..."); - -cnt_X_global = colSums (X_mask); -cnt_Y_global = colSums (Y_mask); -avg_X_global = colSums (X) / cnt_X_global; -avg_Y_global = colSums (Y) / cnt_Y_global; -var_sumX_global = colSums (X * X) - cnt_X_global * (avg_X_global * avg_X_global); -var_sumY_global = colSums (Y * Y) - cnt_Y_global * (avg_Y_global * avg_Y_global); - sqrt_failsafe_input_1 = var_sumX_global / (cnt_X_global - 1); -stdev_X_global = sqrt_failsafe (sqrt_failsafe_input_1); - sqrt_failsafe_input_2 = var_sumY_global / (cnt_Y_global - 1) -stdev_Y_global = sqrt_failsafe (sqrt_failsafe_input_2); - -print ("Computing the stratified single-variate statistics..."); - -# Compute per-stratum statistics, prevent div-0 for locally empty (NaN-filled) strata - -Cnt_X_per_stratum = StrataSummator %*% X_mask; -Cnt_Y_per_stratum = StrataSummator %*% Y_mask; -Is_none_X_per_stratum = ppred (Cnt_X_per_stratum, 0, "=="); -Is_none_Y_per_stratum = ppred (Cnt_Y_per_stratum, 0, "=="); -One_over_cnt_X_per_stratum = (1 - Is_none_X_per_stratum) / (Cnt_X_per_stratum + Is_none_X_per_stratum); -One_over_cnt_Y_per_stratum = (1 - Is_none_Y_per_stratum) / (Cnt_Y_per_stratum + Is_none_Y_per_stratum); -num_X_nonempty_strata = num_strata - colSums (Is_none_X_per_stratum); -num_Y_nonempty_strata = num_strata - colSums (Is_none_Y_per_stratum); - -Sum_X_per_stratum = StrataSummator %*% X; -Sum_Y_per_stratum = StrataSummator %*% Y; - -# Recompute some global statistics to exclude NaN-stratum records - -cnt_X_excluding_NaNstrata = colSums (Cnt_X_per_stratum); -cnt_Y_excluding_NaNstrata = colSums (Cnt_Y_per_stratum); -sum_X_excluding_NaNstrata = colSums (Sum_X_per_stratum); -sum_Y_excluding_NaNstrata = colSums (Sum_Y_per_stratum); -var_sumX_excluding_NaNstrata = colSums (StrataSummator %*% (X * X)) - (sum_X_excluding_NaNstrata * sum_X_excluding_NaNstrata) / cnt_X_excluding_NaNstrata; -var_sumY_excluding_NaNstrata = colSums (StrataSummator %*% (Y * Y)) - (sum_Y_excluding_NaNstrata * sum_Y_excluding_NaNstrata) / cnt_Y_excluding_NaNstrata; - -# Compute the stratified statistics - -var_sumX_stratified = colSums (StrataSummator %*% (X * X)) - colSums (One_over_cnt_X_per_stratum * Sum_X_per_stratum * Sum_X_per_stratum); -var_sumY_stratified = colSums (StrataSummator %*% (Y * Y)) - colSums (One_over_cnt_Y_per_stratum * Sum_Y_per_stratum * Sum_Y_per_stratum); - sqrt_failsafe_input_3 = var_sumX_stratified / (cnt_X_excluding_NaNstrata - num_X_nonempty_strata); -stdev_X_stratified = sqrt_failsafe (sqrt_failsafe_input_3); - sqrt_failsafe_input_4 = var_sumY_stratified / (cnt_Y_excluding_NaNstrata - num_Y_nonempty_strata); -stdev_Y_stratified = sqrt_failsafe (sqrt_failsafe_input_4); -r_sqr_X_vs_strata = 1 - var_sumX_stratified / var_sumX_excluding_NaNstrata; -r_sqr_Y_vs_strata = 1 - var_sumY_stratified / var_sumY_excluding_NaNstrata; -fStat_X_vs_strata = ((var_sumX_excluding_NaNstrata - var_sumX_stratified) / (num_X_nonempty_strata - 1)) / (var_sumX_stratified / (cnt_X_excluding_NaNstrata - num_X_nonempty_strata)); -fStat_Y_vs_strata = ((var_sumY_excluding_NaNstrata - var_sumY_stratified) / (num_Y_nonempty_strata - 1)) / (var_sumY_stratified / (cnt_Y_excluding_NaNstrata - num_Y_nonempty_strata)); -p_val_X_vs_strata = fStat_tailprob (fStat_X_vs_strata, num_X_nonempty_strata - 1, cnt_X_excluding_NaNstrata - num_X_nonempty_strata); -p_val_Y_vs_strata = fStat_tailprob (fStat_Y_vs_strata, num_Y_nonempty_strata - 1, cnt_Y_excluding_NaNstrata - num_Y_nonempty_strata); - -print ("Computing the global bivariate statistics..."); - -# Compute the aggregate X vs. Y statistics and map them into proper positions - -cnt_XY_rectangle = t(X_mask) %*% Y_mask; -sum_X_forXY_rectangle = t(X) %*% Y_mask; -sum_XX_forXY_rectangle = t(X * X) %*% Y_mask; -sum_Y_forXY_rectangle = t(X_mask) %*% Y; -sum_YY_forXY_rectangle = t(X_mask) %*% (Y * Y); -sum_XY_rectangle = t(X) %*% Y; -cnt_XY_global = matrix (cnt_XY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE); -sum_X_forXY_global = matrix (sum_X_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE); -sum_XX_forXY_global = matrix (sum_XX_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE); -sum_Y_forXY_global = matrix (sum_Y_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE); -sum_YY_forXY_global = matrix (sum_YY_forXY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE); -sum_XY_global = matrix (sum_XY_rectangle, rows = 1, cols = num_attrs_XY, byrow = TRUE); -ones_XY = matrix (1.0, rows = 1, cols = num_attrs_XY); - -# Compute the global bivariate statistics for output - -cov_sumX_sumY_global = sum_XY_global - sum_X_forXY_global * sum_Y_forXY_global / cnt_XY_global; -var_sumX_forXY_global = sum_XX_forXY_global - sum_X_forXY_global * sum_X_forXY_global / cnt_XY_global; -var_sumY_forXY_global = sum_YY_forXY_global - sum_Y_forXY_global * sum_Y_forXY_global / cnt_XY_global; -slope_XY_global = cov_sumX_sumY_global / var_sumX_forXY_global; - sqrt_failsafe_input_5 = var_sumX_forXY_global * var_sumY_forXY_global; - sqrt_failsafe_output_5 = sqrt_failsafe (sqrt_failsafe_input_5); -corr_XY_global = cov_sumX_sumY_global / sqrt_failsafe_output_5; -r_sqr_X_vs_Y_global = cov_sumX_sumY_global * cov_sumX_sumY_global / (var_sumX_forXY_global * var_sumY_forXY_global); - sqrt_failsafe_input_6 = (1 - r_sqr_X_vs_Y_global) * var_sumY_forXY_global / var_sumX_forXY_global / (cnt_XY_global - 2) -stdev_slope_XY_global = sqrt_failsafe (sqrt_failsafe_input_6); - sqrt_failsafe_input_7 = (1 - r_sqr_X_vs_Y_global) * var_sumY_forXY_global / (cnt_XY_global - 2) -stdev_errY_vs_X_global = sqrt_failsafe (sqrt_failsafe_input_7); -fStat_Y_vs_X_global = (cnt_XY_global - 2) * r_sqr_X_vs_Y_global / (1 - r_sqr_X_vs_Y_global); -p_val_Y_vs_X_global = fStat_tailprob (fStat_Y_vs_X_global, ones_XY, cnt_XY_global - 2); - -print ("Computing the stratified bivariate statistics..."); - -# Create projections to "intermingle" X and Y into attribute pairs - -Proj_X_to_XY = matrix (0.0, rows = num_attrs_X, cols = num_attrs_XY); -Proj_Y_to_XY = matrix (0.0, rows = num_attrs_Y, cols = num_attrs_XY); -ones_Y_row = matrix (1.0, rows = 1, cols = num_attrs_Y); -for (i in 1:num_attrs_X) { - start_cid = (i - 1) * num_attrs_Y + 1; - end_cid = i * num_attrs_Y; - Proj_X_to_XY [i, start_cid:end_cid] = ones_Y_row; - Proj_Y_to_XY [ , start_cid:end_cid] = diag (ones_Y_row); -} - -# Compute per-stratum statistics, prevent div-0 for locally empty (NaN-filled) strata - -Cnt_XY_per_stratum = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY)); -Sum_X_forXY_per_stratum = StrataSummator %*% (( X %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY)); -Sum_XX_forXY_per_stratum = StrataSummator %*% (((X * X) %*% Proj_X_to_XY) * ( Y_mask %*% Proj_Y_to_XY)); -Sum_Y_forXY_per_stratum = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ( Y %*% Proj_Y_to_XY)); -Sum_YY_forXY_per_stratum = StrataSummator %*% (( X_mask %*% Proj_X_to_XY) * ((Y * Y) %*% Proj_Y_to_XY)); -Sum_XY_per_stratum = StrataSummator %*% (( X %*% Proj_X_to_XY) * ( Y %*% Proj_Y_to_XY)); - -Is_none_XY_per_stratum = ppred (Cnt_XY_per_stratum, 0, "=="); -One_over_cnt_XY_per_stratum = (1 - Is_none_XY_per_stratum) / (Cnt_XY_per_stratum + Is_none_XY_per_stratum); -num_XY_nonempty_strata = num_strata - colSums (Is_none_XY_per_stratum); - -# Recompute some global aggregate X vs. Y statistics to exclude NaN-stratum records - -cnt_XY_excluding_NaNstrata = colSums (Cnt_XY_per_stratum); -sum_XX_forXY_excluding_NaNstrata = colSums (Sum_XX_forXY_per_stratum); -sum_YY_forXY_excluding_NaNstrata = colSums (Sum_YY_forXY_per_stratum); -sum_XY_excluding_NaNstrata = colSums (Sum_XY_per_stratum); - -# Compute the stratified bivariate statistics - -var_sumX_forXY_stratified = sum_XX_forXY_excluding_NaNstrata - colSums (Sum_X_forXY_per_stratum * Sum_X_forXY_per_stratum * One_over_cnt_XY_per_stratum); -var_sumY_forXY_stratified = sum_YY_forXY_excluding_NaNstrata - colSums (Sum_Y_forXY_per_stratum * Sum_Y_forXY_per_stratum * One_over_cnt_XY_per_stratum); -cov_sumX_sumY_stratified = sum_XY_excluding_NaNstrata - colSums (Sum_X_forXY_per_stratum * Sum_Y_forXY_per_stratum * One_over_cnt_XY_per_stratum); - -slope_XY_stratified = cov_sumX_sumY_stratified / var_sumX_forXY_stratified; - sqrt_failsafe_input_8 = var_sumX_forXY_stratified * var_sumY_forXY_stratified; - sqrt_failsafe_output_8 = sqrt_failsafe (sqrt_failsafe_input_8); -corr_XY_stratified = cov_sumX_sumY_stratified / sqrt_failsafe_output_8; -r_sqr_X_vs_Y_stratified = cov_sumX_sumY_stratified * cov_sumX_sumY_stratified / (var_sumX_forXY_stratified * var_sumY_forXY_stratified); -r_sqr_X_vs_Y_stratified = corr_XY_stratified * corr_XY_stratified; - sqrt_failsafe_input_9 = (1 - r_sqr_X_vs_Y_stratified) * var_sumY_forXY_stratified / var_sumX_forXY_stratified / (cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1); -stdev_slope_XY_stratified = sqrt_failsafe (sqrt_failsafe_input_9); - sqrt_failsafe_input_10 = (1 - r_sqr_X_vs_Y_stratified) * var_sumY_forXY_stratified / (cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1); -stdev_errY_vs_X_stratified = sqrt_failsafe (sqrt_failsafe_input_10); -fStat_Y_vs_X_stratified = (cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1) * r_sqr_X_vs_Y_stratified / (1 - r_sqr_X_vs_Y_stratified); -p_val_Y_vs_X_stratified = fStat_tailprob (fStat_Y_vs_X_stratified, ones_XY, cnt_XY_excluding_NaNstrata - num_XY_nonempty_strata - 1); - -print ("Preparing the output matrix..."); -OutMtx = matrix (0.0, rows = 40, cols = num_attrs_XY); - -OutMtx [ 1, ] = Xcols %*% Proj_X_to_XY; # 1st variate column number -OutMtx [ 2, ] = cnt_X_global %*% Proj_X_to_XY; # 1st variate global presence count -OutMtx [ 3, ] = avg_X_global %*% Proj_X_to_XY; # 1st variate global mean -OutMtx [ 4, ] = stdev_X_global %*% Proj_X_to_XY; # 1st variate global standard deviation -OutMtx [ 5, ] = stdev_X_stratified %*% Proj_X_to_XY; # 1st variate stratified standard deviation -OutMtx [ 6, ] = r_sqr_X_vs_strata %*% Proj_X_to_XY; # R-squared, 1st variate vs. strata -OutMtx [ 7, ] = p_val_X_vs_strata %*% Proj_X_to_XY; # P-value, 1st variate vs. strata -OutMtx [11, ] = Ycols %*% Proj_Y_to_XY; # 2nd variate column number -OutMtx [12, ] = cnt_Y_global %*% Proj_Y_to_XY; # 2nd variate global presence count -OutMtx [13, ] = avg_Y_global %*% Proj_Y_to_XY; # 2nd variate global mean -OutMtx [14, ] = stdev_Y_global %*% Proj_Y_to_XY; # 2nd variate global standard deviation -OutMtx [15, ] = stdev_Y_stratified %*% Proj_Y_to_XY; # 2nd variate stratified standard deviation -OutMtx [16, ] = r_sqr_Y_vs_strata %*% Proj_Y_to_XY; # R-squared, 2nd variate vs. strata -OutMtx [17, ] = p_val_Y_vs_strata %*% Proj_Y_to_XY; # P-value, 2nd variate vs. strata - - -OutMtx [21, ] = cnt_XY_global; # Global 1st & 2nd variate presence count -OutMtx [22, ] = slope_XY_global; # Global regression slope (2nd variate vs. 1st variate) -OutMtx [23, ] = stdev_slope_XY_global; # Global regression slope standard deviation -OutMtx [24, ] = corr_XY_global; # Global correlation = +/- sqrt(R-squared) -OutMtx [25, ] = stdev_errY_vs_X_global; # Global residual standard deviation -OutMtx [26, ] = r_sqr_X_vs_Y_global; # Global R-squared -OutMtx [27, ] = p_val_Y_vs_X_global; # Global P-value for hypothesis "slope = 0" -OutMtx [31, ] = cnt_XY_excluding_NaNstrata; # Stratified 1st & 2nd variate presence count -OutMtx [32, ] = slope_XY_stratified; # Stratified regression slope (2nd variate vs. 1st variate) -OutMtx [33, ] = stdev_slope_XY_stratified; # Stratified regression slope standard deviation -OutMtx [34, ] = corr_XY_stratified; # Stratified correlation = +/- sqrt(R-squared) -OutMtx [35, ] = stdev_errY_vs_X_stratified; # Stratified residual standard deviation -OutMtx [36, ] = r_sqr_X_vs_Y_stratified; # Stratified R-squared -OutMtx [37, ] = p_val_Y_vs_X_stratified; # Stratified P-value for hypothesis "slope = 0" -OutMtx [38, ] = colSums (ppred (Cnt_XY_per_stratum, 2, ">=")); # Number of strata with at least two counted points - -OutMtx = t(OutMtx); - -print ("Writing the output matrix..."); -write (OutMtx, $5, format="text"); -print ("END STRATIFIED STATISTICS SCRIPT"); - - -deNaN = externalFunction (Matrix[Double] A) return (Matrix[Double] B) - implemented in (classname = "org.apache.sysml.udf.lib.DeNaNWrapper", exectype = "mem"); - -fStat_tailprob = function (Matrix[double] fStat, Matrix[double] df_1, Matrix[double] df_2) return (Matrix[double] tailprob) -{ # TEMPORARY IMPLEMENTATION - tailprob = fStat; - for (i in 1:nrow(fStat)) { - for (j in 1:ncol(fStat)) { - q = as.scalar (fStat [i, j]); - d1 = as.scalar (df_1 [i, j]); - d2 = as.scalar (df_2 [i, j]); - if (d1 >= 1 & d2 >= 1 & q >= 0.0) { - tailprob [i, j] = pf (target = q, df1 = d1, df2 = d2, lower.tail=FALSE); - } else { - tailprob [i, j] = 0/0; - } - } } -} - -sqrt_failsafe = function (Matrix[double] input_A) return (Matrix[double] output_A) -{ - NaN = 0/0; - mask_A = ppred (input_A, 0.0, ">="); - prep_A = input_A * mask_A; - mask_A = mask_A - mask_A * (ppred (prep_A, NaN, "==")); - prep_A = deNaN (prep_A); - output_A = sqrt (prep_A) / mask_A; -} - -sumup = function (Matrix[double] A) return (Matrix[double] sum_A) -{ - shift = 1; - m_A = nrow(A); - sum_A = A; - while (shift < m_A) { - sum_A [(shift+1):m_A, ] = sum_A [(shift+1):m_A, ] + sum_A [1:(m_A-shift), ]; - shift = 2 * shift; - } -}