http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_bivariate2.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_bivariate2.dml b/src/test/scripts/applications/parfor/parfor_bivariate2.dml index f6dd4d5..d0734a9 100644 --- a/src/test/scripts/applications/parfor/parfor_bivariate2.dml +++ b/src/test/scripts/applications/parfor/parfor_bivariate2.dml @@ -1,256 +1,256 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - - -/* - * - * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs - * Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} - * compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n) - * - * Seven inputs: - * $1) D - input data - * $2) S1 - First attribute set {A_11, A_12, ... A_1m} - * $3) S2 - Second attribute set {A_21, A_22, ... A_2n} - * $4) K1 - kind for attributes in S1 - * $5) K2 - kind for attributes in S2 - * kind=1 for scale, kind=2 for nominal, kind=3 for ordinal - * $6) numPairs - total number of pairs (m*n) - * $7) maxC - maximum number of categories in any categorical attribute - * - * One output: - * $6) output directory in which following four statistics files are created - * + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs - * (R, (chi-sq, df, pval, cramersv), spearman, Eta, F) - * + categorical.counts - - * + categorical.means - - * + categorical.variances - - * -> Values in these three matrices are applicable only for scale-categorical attribute pairs. - * k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k. - */ - -D = read($1, rows=$7, cols=$8); # input data set -S1 = read($2, rows=1, cols=$9); # attribute set 1 -S2 = read($3, rows=1, cols=$9); # attribute set 2 -K1 = read($4, rows=1, cols=$9); # kind for attributes in S1 -K2 = read($5, rows=1, cols=$9); # kind for attributes in S2 -numPairs = $10; # number of attribute pairs (|S1|*|S2|) -maxC = $11; # max number of categories in any categorical attribute - -s1size = ncol(S1); -s2size = ncol(S2); - -# R, chisq, cramers, spearman, eta, anovaf -numstats = 8; -basestats = matrix(0, rows=numstats, cols=numPairs); -cat_counts = matrix(0, rows=maxC, cols=numPairs); -cat_means = matrix(0, rows=maxC, cols=numPairs); -cat_vars = matrix(0, rows=maxC, cols=numPairs); - - -parfor( i in 1:s1size, par=4, mode=LOCAL, check=0, opt=NONE) { - a1 = castAsScalar(S1[,i]); - k1 = castAsScalar(K1[1,i]); - A1 = D[,a1]; - - parfor( j in 1:s2size, par=4, mode=REMOTE_MR, check=0, opt=NONE) { - pairID = (i-1)*s2size+j; - a2 = castAsScalar(S2[,j]); - k2 = castAsScalar(K2[1,j]); - A2 = D[,a2]; - - if (k1 == k2) { - if (k1 == 1) { - # scale-scale - print("[" + i + "," + j + "] scale-scale"); - r = bivar_ss(A1,A2); - basestats[1,pairID] = r; - } else { - # nominal-nominal or ordinal-ordinal - print("[" + i + "," + j + "] categorical-categorical"); - [chisq, df, pval, cramersv] = bivar_cc(A1,A2); - basestats[2,pairID] = chisq; - basestats[3,pairID] = df; - basestats[4,pairID] = pval; - basestats[5,pairID] = cramersv; - - if ( k1 == 3 ) { - # ordinal-ordinal - print("[" + i + "," + j + "] ordinal-ordinal"); - sp = bivar_oo(A1, A2); - basestats[6,pairID] = sp; - } - } - } - else { - if (k1 == 1 | k2 == 1) { - # Scale-nominal/ordinal TODO MB correctness errors - print("[" + i + "," + j + "] scale-categorical"); - - if ( k1 == 1 ) { - [eta,f, counts, means, vars] = bivar_sc(A1,A2); - } - else { - [eta,f, counts, means, vars] = bivar_sc(A2,A1); - } - basestats[7,pairID] = eta; - basestats[8,pairID] = f; - cat_counts[,pairID] = counts; - cat_means[,pairID] = means; - cat_vars[,pairID] = vars; - } - else { - # nominal-ordinal or ordinal-nominal - print("[" + i + "," + j + "] categorical-categorical"); - [chisq, df, pval, cramersv] = bivar_cc(A1,A2); - basestats[2,pairID] = chisq; - basestats[3,pairID] = df; - basestats[4,pairID] = pval; - basestats[5,pairID] = cramersv; - } - } - } -} - -write(basestats, $6 + "/bivar.stats"); -write(cat_counts, $6 + "/category.counts"); -write(cat_means, $6 + "/category.means"); -write(cat_vars, $6 + "/category.variances"); - - -# ----------------------------------------------------------------------------------------------------------- - -bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) { - - # Contingency Table - F = table(A,B); - - # Chi-Squared - W = sum(F); - r = rowSums(F); - c = colSums(F); - E = (r %*% c)/W; - T = (F-E)^2/E; - chi_squared = sum(T); - - # compute p-value - degFreedom = (nrow(F)-1)*(ncol(F)-1); - pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE); - - # Cramer's V - R = nrow(F); - C = ncol(F); - q = min(R,C); - cramers_v = sqrt(chi_squared/(W*(q-1))); - - # Assign return values - chisq = chi_squared; - df = as.double(degFreedom); - pval = pValue; - cramersv = cramers_v; -} - -# ----------------------------------------------------------------------------------------------------------- - -bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) { - - # Unweighted co-variance - covXY = cov(X,Y); - - # compute standard deviations for both X and Y by computing 2^nd central moment - W = nrow(X); - m2X = moment(X,2); - m2Y = moment(Y,2); - sigmaX = sqrt(m2X * (W/(W-1.0)) ); - sigmaY = sqrt(m2Y * (W/(W-1.0)) ); - - # Pearson's R - R = covXY / (sigmaX*sigmaY); -} - -# ----------------------------------------------------------------------------------------------------------- - -# Y points to SCALE variable -# A points to CATEGORICAL variable -bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) { - - # mean and variance in target variable - W = nrow(A); - my = mean(Y); - varY = moment(Y,2) * W/(W-1.0) - - # category-wise (frequencies, means, variances) - CFreqs = aggregate(target=Y, groups=A, fn="count"); - CMeans = aggregate(target=Y, groups=A, fn="mean"); - CVars = aggregate(target=Y, groups=A, fn="variance"); - - # number of categories - R = nrow(CFreqs); - - Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) )); - - anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1); - anova_den = sum( (CFreqs-1)*CVars )/(W-R); - AnovaF = anova_num/anova_den; -} - -# ----------------------------------------------------------------------------------------------------------- -# Function to compute ranks -# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category -computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) { - Ranks = cumsum(X) - X/2 + 1/2; -} - -#------------------------------------------------------------------------- - -bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) { - - # compute contingency table - F = table(A,B); - - catA = nrow(F); # number of categories in A - catB = ncol(F); # number of categories in B - - # compute category-wise counts for both the attributes - R = rowSums(F); - S = colSums(F); - - # compute scores, both are column vectors - [C] = computeRanks(R); - meanX = mean(C,R); - - columnS = t(S); - [D] = computeRanks(columnS); - - # scores (C,D) are individual values, and counts (R,S) act as weights - meanY = mean(D,columnS); - - W = sum(F); # total weight, or total #cases - varX = moment(C,R,2)*(W/(W-1.0)); - varY = moment(D,columnS,2)*(W/(W-1.0)); - covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) ); - - sp = covXY/(sqrt(varX)*sqrt(varY)); -} - -# ----------------------------------------------------------------------------------------------------------- - - +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + + +/* + * + * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs + * Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} + * compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n) + * + * Seven inputs: + * $1) D - input data + * $2) S1 - First attribute set {A_11, A_12, ... A_1m} + * $3) S2 - Second attribute set {A_21, A_22, ... A_2n} + * $4) K1 - kind for attributes in S1 + * $5) K2 - kind for attributes in S2 + * kind=1 for scale, kind=2 for nominal, kind=3 for ordinal + * $6) numPairs - total number of pairs (m*n) + * $7) maxC - maximum number of categories in any categorical attribute + * + * One output: + * $6) output directory in which following four statistics files are created + * + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs + * (R, (chi-sq, df, pval, cramersv), spearman, Eta, F) + * + categorical.counts - + * + categorical.means - + * + categorical.variances - + * -> Values in these three matrices are applicable only for scale-categorical attribute pairs. + * k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k. + */ + +D = read($1, rows=$7, cols=$8); # input data set +S1 = read($2, rows=1, cols=$9); # attribute set 1 +S2 = read($3, rows=1, cols=$9); # attribute set 2 +K1 = read($4, rows=1, cols=$9); # kind for attributes in S1 +K2 = read($5, rows=1, cols=$9); # kind for attributes in S2 +numPairs = $10; # number of attribute pairs (|S1|*|S2|) +maxC = $11; # max number of categories in any categorical attribute + +s1size = ncol(S1); +s2size = ncol(S2); + +# R, chisq, cramers, spearman, eta, anovaf +numstats = 8; +basestats = matrix(0, rows=numstats, cols=numPairs); +cat_counts = matrix(0, rows=maxC, cols=numPairs); +cat_means = matrix(0, rows=maxC, cols=numPairs); +cat_vars = matrix(0, rows=maxC, cols=numPairs); + + +parfor( i in 1:s1size, par=4, mode=LOCAL, check=0, opt=NONE) { + a1 = castAsScalar(S1[,i]); + k1 = castAsScalar(K1[1,i]); + A1 = D[,a1]; + + parfor( j in 1:s2size, par=4, mode=REMOTE_MR, check=0, opt=NONE) { + pairID = (i-1)*s2size+j; + a2 = castAsScalar(S2[,j]); + k2 = castAsScalar(K2[1,j]); + A2 = D[,a2]; + + if (k1 == k2) { + if (k1 == 1) { + # scale-scale + print("[" + i + "," + j + "] scale-scale"); + r = bivar_ss(A1,A2); + basestats[1,pairID] = r; + } else { + # nominal-nominal or ordinal-ordinal + print("[" + i + "," + j + "] categorical-categorical"); + [chisq, df, pval, cramersv] = bivar_cc(A1,A2); + basestats[2,pairID] = chisq; + basestats[3,pairID] = df; + basestats[4,pairID] = pval; + basestats[5,pairID] = cramersv; + + if ( k1 == 3 ) { + # ordinal-ordinal + print("[" + i + "," + j + "] ordinal-ordinal"); + sp = bivar_oo(A1, A2); + basestats[6,pairID] = sp; + } + } + } + else { + if (k1 == 1 | k2 == 1) { + # Scale-nominal/ordinal TODO MB correctness errors + print("[" + i + "," + j + "] scale-categorical"); + + if ( k1 == 1 ) { + [eta,f, counts, means, vars] = bivar_sc(A1,A2); + } + else { + [eta,f, counts, means, vars] = bivar_sc(A2,A1); + } + basestats[7,pairID] = eta; + basestats[8,pairID] = f; + cat_counts[,pairID] = counts; + cat_means[,pairID] = means; + cat_vars[,pairID] = vars; + } + else { + # nominal-ordinal or ordinal-nominal + print("[" + i + "," + j + "] categorical-categorical"); + [chisq, df, pval, cramersv] = bivar_cc(A1,A2); + basestats[2,pairID] = chisq; + basestats[3,pairID] = df; + basestats[4,pairID] = pval; + basestats[5,pairID] = cramersv; + } + } + } +} + +write(basestats, $6 + "/bivar.stats"); +write(cat_counts, $6 + "/category.counts"); +write(cat_means, $6 + "/category.means"); +write(cat_vars, $6 + "/category.variances"); + + +# ----------------------------------------------------------------------------------------------------------- + +bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) { + + # Contingency Table + F = table(A,B); + + # Chi-Squared + W = sum(F); + r = rowSums(F); + c = colSums(F); + E = (r %*% c)/W; + T = (F-E)^2/E; + chi_squared = sum(T); + + # compute p-value + degFreedom = (nrow(F)-1)*(ncol(F)-1); + pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE); + + # Cramer's V + R = nrow(F); + C = ncol(F); + q = min(R,C); + cramers_v = sqrt(chi_squared/(W*(q-1))); + + # Assign return values + chisq = chi_squared; + df = as.double(degFreedom); + pval = pValue; + cramersv = cramers_v; +} + +# ----------------------------------------------------------------------------------------------------------- + +bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) { + + # Unweighted co-variance + covXY = cov(X,Y); + + # compute standard deviations for both X and Y by computing 2^nd central moment + W = nrow(X); + m2X = moment(X,2); + m2Y = moment(Y,2); + sigmaX = sqrt(m2X * (W/(W-1.0)) ); + sigmaY = sqrt(m2Y * (W/(W-1.0)) ); + + # Pearson's R + R = covXY / (sigmaX*sigmaY); +} + +# ----------------------------------------------------------------------------------------------------------- + +# Y points to SCALE variable +# A points to CATEGORICAL variable +bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) { + + # mean and variance in target variable + W = nrow(A); + my = mean(Y); + varY = moment(Y,2) * W/(W-1.0) + + # category-wise (frequencies, means, variances) + CFreqs = aggregate(target=Y, groups=A, fn="count"); + CMeans = aggregate(target=Y, groups=A, fn="mean"); + CVars = aggregate(target=Y, groups=A, fn="variance"); + + # number of categories + R = nrow(CFreqs); + + Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) )); + + anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1); + anova_den = sum( (CFreqs-1)*CVars )/(W-R); + AnovaF = anova_num/anova_den; +} + +# ----------------------------------------------------------------------------------------------------------- +# Function to compute ranks +# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category +computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) { + Ranks = cumsum(X) - X/2 + 1/2; +} + +#------------------------------------------------------------------------- + +bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) { + + # compute contingency table + F = table(A,B); + + catA = nrow(F); # number of categories in A + catB = ncol(F); # number of categories in B + + # compute category-wise counts for both the attributes + R = rowSums(F); + S = colSums(F); + + # compute scores, both are column vectors + [C] = computeRanks(R); + meanX = mean(C,R); + + columnS = t(S); + [D] = computeRanks(columnS); + + # scores (C,D) are individual values, and counts (R,S) act as weights + meanY = mean(D,columnS); + + W = sum(F); # total weight, or total #cases + varX = moment(C,R,2)*(W/(W-1.0)); + varY = moment(D,columnS,2)*(W/(W-1.0)); + covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) ); + + sp = covXY/(sqrt(varX)*sqrt(varY)); +} + +# ----------------------------------------------------------------------------------------------------------- + +
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_bivariate3.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_bivariate3.dml b/src/test/scripts/applications/parfor/parfor_bivariate3.dml index 1443c6e..990a6fe 100644 --- a/src/test/scripts/applications/parfor/parfor_bivariate3.dml +++ b/src/test/scripts/applications/parfor/parfor_bivariate3.dml @@ -1,256 +1,256 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - - -/* - * - * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs - * Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} - * compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n) - * - * Seven inputs: - * $1) D - input data - * $2) S1 - First attribute set {A_11, A_12, ... A_1m} - * $3) S2 - Second attribute set {A_21, A_22, ... A_2n} - * $4) K1 - kind for attributes in S1 - * $5) K2 - kind for attributes in S2 - * kind=1 for scale, kind=2 for nominal, kind=3 for ordinal - * $6) numPairs - total number of pairs (m*n) - * $7) maxC - maximum number of categories in any categorical attribute - * - * One output: - * $6) output directory in which following four statistics files are created - * + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs - * (R, (chi-sq, df, pval, cramersv), spearman, Eta, F) - * + categorical.counts - - * + categorical.means - - * + categorical.variances - - * -> Values in these three matrices are applicable only for scale-categorical attribute pairs. - * k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k. - */ - -D = read($1, rows=$7, cols=$8); # input data set -S1 = read($2, rows=1, cols=$9); # attribute set 1 -S2 = read($3, rows=1, cols=$9); # attribute set 2 -K1 = read($4, rows=1, cols=$9); # kind for attributes in S1 -K2 = read($5, rows=1, cols=$9); # kind for attributes in S2 -numPairs = $10; # number of attribute pairs (|S1|*|S2|) -maxC = $11; # max number of categories in any categorical attribute - -s1size = ncol(S1); -s2size = ncol(S2); - -# R, chisq, cramers, spearman, eta, anovaf -numstats = 8; -basestats = matrix(0, rows=numstats, cols=numPairs); -cat_counts = matrix(0, rows=maxC, cols=numPairs); -cat_means = matrix(0, rows=maxC, cols=numPairs); -cat_vars = matrix(0, rows=maxC, cols=numPairs); - - -parfor( i in 1:s1size, par=4, mode=REMOTE_MR, check=0, opt=NONE) { - a1 = castAsScalar(S1[,i]); - k1 = castAsScalar(K1[1,i]); - A1 = D[,a1]; - - parfor( j in 1:s2size, par=4, mode=LOCAL, check=0, opt=NONE) { - pairID = (i-1)*s2size+j; - a2 = castAsScalar(S2[,j]); - k2 = castAsScalar(K2[1,j]); - A2 = D[,a2]; - - if (k1 == k2) { - if (k1 == 1) { - # scale-scale - print("[" + i + "," + j + "] scale-scale"); - r = bivar_ss(A1,A2); - basestats[1,pairID] = r; - } else { - # nominal-nominal or ordinal-ordinal - print("[" + i + "," + j + "] categorical-categorical"); - [chisq, df, pval, cramersv] = bivar_cc(A1,A2); - basestats[2,pairID] = chisq; - basestats[3,pairID] = df; - basestats[4,pairID] = pval; - basestats[5,pairID] = cramersv; - - if ( k1 == 3 ) { - # ordinal-ordinal - print("[" + i + "," + j + "] ordinal-ordinal"); - sp = bivar_oo(A1, A2); - basestats[6,pairID] = sp; - } - } - } - else { - if (k1 == 1 | k2 == 1) { - # Scale-nominal/ordinal TODO MB correctness errors - print("[" + i + "," + j + "] scale-categorical"); - - if ( k1 == 1 ) { - [eta,f, counts, means, vars] = bivar_sc(A1,A2); - } - else { - [eta,f, counts, means, vars] = bivar_sc(A2,A1); - } - basestats[7,pairID] = eta; - basestats[8,pairID] = f; - cat_counts[,pairID] = counts; - cat_means[,pairID] = means; - cat_vars[,pairID] = vars; - } - else { - # nominal-ordinal or ordinal-nominal - print("[" + i + "," + j + "] categorical-categorical"); - [chisq, df, pval, cramersv] = bivar_cc(A1,A2); - basestats[2,pairID] = chisq; - basestats[3,pairID] = df; - basestats[4,pairID] = pval; - basestats[5,pairID] = cramersv; - } - } - } -} - -write(basestats, $6 + "/bivar.stats"); -write(cat_counts, $6 + "/category.counts"); -write(cat_means, $6 + "/category.means"); -write(cat_vars, $6 + "/category.variances"); - - -# ----------------------------------------------------------------------------------------------------------- - -bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) { - - # Contingency Table - F = table(A,B); - - # Chi-Squared - W = sum(F); - r = rowSums(F); - c = colSums(F); - E = (r %*% c)/W; - T = (F-E)^2/E; - chi_squared = sum(T); - - # compute p-value - degFreedom = (nrow(F)-1)*(ncol(F)-1); - pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE); - - # Cramer's V - R = nrow(F); - C = ncol(F); - q = min(R,C); - cramers_v = sqrt(chi_squared/(W*(q-1))); - - # Assign return values - chisq = chi_squared; - df = as.double(degFreedom); - pval = pValue; - cramersv = cramers_v; -} - -# ----------------------------------------------------------------------------------------------------------- - -bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) { - - # Unweighted co-variance - covXY = cov(X,Y); - - # compute standard deviations for both X and Y by computing 2^nd central moment - W = nrow(X); - m2X = moment(X,2); - m2Y = moment(Y,2); - sigmaX = sqrt(m2X * (W/(W-1.0)) ); - sigmaY = sqrt(m2Y * (W/(W-1.0)) ); - - # Pearson's R - R = covXY / (sigmaX*sigmaY); -} - -# ----------------------------------------------------------------------------------------------------------- - -# Y points to SCALE variable -# A points to CATEGORICAL variable -bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) { - - # mean and variance in target variable - W = nrow(A); - my = mean(Y); - varY = moment(Y,2) * W/(W-1.0) - - # category-wise (frequencies, means, variances) - CFreqs = aggregate(target=Y, groups=A, fn="count"); - CMeans = aggregate(target=Y, groups=A, fn="mean"); - CVars = aggregate(target=Y, groups=A, fn="variance"); - - # number of categories - R = nrow(CFreqs); - - Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) )); - - anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1); - anova_den = sum( (CFreqs-1)*CVars )/(W-R); - AnovaF = anova_num/anova_den; -} - -# ----------------------------------------------------------------------------------------------------------- -# Function to compute ranks -# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category -computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) { - Ranks = cumsum(X) - X/2 + 1/2; -} - -#------------------------------------------------------------------------- - -bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) { - - # compute contingency table - F = table(A,B); - - catA = nrow(F); # number of categories in A - catB = ncol(F); # number of categories in B - - # compute category-wise counts for both the attributes - R = rowSums(F); - S = colSums(F); - - # compute scores, both are column vectors - [C] = computeRanks(R); - meanX = mean(C,R); - - columnS = t(S); - [D] = computeRanks(columnS); - - # scores (C,D) are individual values, and counts (R,S) act as weights - meanY = mean(D,columnS); - - W = sum(F); # total weight, or total #cases - varX = moment(C,R,2)*(W/(W-1.0)); - varY = moment(D,columnS,2)*(W/(W-1.0)); - covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) ); - - sp = covXY/(sqrt(varX)*sqrt(varY)); -} - -# ----------------------------------------------------------------------------------------------------------- - - +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + + +/* + * + * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs + * Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} + * compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n) + * + * Seven inputs: + * $1) D - input data + * $2) S1 - First attribute set {A_11, A_12, ... A_1m} + * $3) S2 - Second attribute set {A_21, A_22, ... A_2n} + * $4) K1 - kind for attributes in S1 + * $5) K2 - kind for attributes in S2 + * kind=1 for scale, kind=2 for nominal, kind=3 for ordinal + * $6) numPairs - total number of pairs (m*n) + * $7) maxC - maximum number of categories in any categorical attribute + * + * One output: + * $6) output directory in which following four statistics files are created + * + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs + * (R, (chi-sq, df, pval, cramersv), spearman, Eta, F) + * + categorical.counts - + * + categorical.means - + * + categorical.variances - + * -> Values in these three matrices are applicable only for scale-categorical attribute pairs. + * k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k. + */ + +D = read($1, rows=$7, cols=$8); # input data set +S1 = read($2, rows=1, cols=$9); # attribute set 1 +S2 = read($3, rows=1, cols=$9); # attribute set 2 +K1 = read($4, rows=1, cols=$9); # kind for attributes in S1 +K2 = read($5, rows=1, cols=$9); # kind for attributes in S2 +numPairs = $10; # number of attribute pairs (|S1|*|S2|) +maxC = $11; # max number of categories in any categorical attribute + +s1size = ncol(S1); +s2size = ncol(S2); + +# R, chisq, cramers, spearman, eta, anovaf +numstats = 8; +basestats = matrix(0, rows=numstats, cols=numPairs); +cat_counts = matrix(0, rows=maxC, cols=numPairs); +cat_means = matrix(0, rows=maxC, cols=numPairs); +cat_vars = matrix(0, rows=maxC, cols=numPairs); + + +parfor( i in 1:s1size, par=4, mode=REMOTE_MR, check=0, opt=NONE) { + a1 = castAsScalar(S1[,i]); + k1 = castAsScalar(K1[1,i]); + A1 = D[,a1]; + + parfor( j in 1:s2size, par=4, mode=LOCAL, check=0, opt=NONE) { + pairID = (i-1)*s2size+j; + a2 = castAsScalar(S2[,j]); + k2 = castAsScalar(K2[1,j]); + A2 = D[,a2]; + + if (k1 == k2) { + if (k1 == 1) { + # scale-scale + print("[" + i + "," + j + "] scale-scale"); + r = bivar_ss(A1,A2); + basestats[1,pairID] = r; + } else { + # nominal-nominal or ordinal-ordinal + print("[" + i + "," + j + "] categorical-categorical"); + [chisq, df, pval, cramersv] = bivar_cc(A1,A2); + basestats[2,pairID] = chisq; + basestats[3,pairID] = df; + basestats[4,pairID] = pval; + basestats[5,pairID] = cramersv; + + if ( k1 == 3 ) { + # ordinal-ordinal + print("[" + i + "," + j + "] ordinal-ordinal"); + sp = bivar_oo(A1, A2); + basestats[6,pairID] = sp; + } + } + } + else { + if (k1 == 1 | k2 == 1) { + # Scale-nominal/ordinal TODO MB correctness errors + print("[" + i + "," + j + "] scale-categorical"); + + if ( k1 == 1 ) { + [eta,f, counts, means, vars] = bivar_sc(A1,A2); + } + else { + [eta,f, counts, means, vars] = bivar_sc(A2,A1); + } + basestats[7,pairID] = eta; + basestats[8,pairID] = f; + cat_counts[,pairID] = counts; + cat_means[,pairID] = means; + cat_vars[,pairID] = vars; + } + else { + # nominal-ordinal or ordinal-nominal + print("[" + i + "," + j + "] categorical-categorical"); + [chisq, df, pval, cramersv] = bivar_cc(A1,A2); + basestats[2,pairID] = chisq; + basestats[3,pairID] = df; + basestats[4,pairID] = pval; + basestats[5,pairID] = cramersv; + } + } + } +} + +write(basestats, $6 + "/bivar.stats"); +write(cat_counts, $6 + "/category.counts"); +write(cat_means, $6 + "/category.means"); +write(cat_vars, $6 + "/category.variances"); + + +# ----------------------------------------------------------------------------------------------------------- + +bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) { + + # Contingency Table + F = table(A,B); + + # Chi-Squared + W = sum(F); + r = rowSums(F); + c = colSums(F); + E = (r %*% c)/W; + T = (F-E)^2/E; + chi_squared = sum(T); + + # compute p-value + degFreedom = (nrow(F)-1)*(ncol(F)-1); + pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE); + + # Cramer's V + R = nrow(F); + C = ncol(F); + q = min(R,C); + cramers_v = sqrt(chi_squared/(W*(q-1))); + + # Assign return values + chisq = chi_squared; + df = as.double(degFreedom); + pval = pValue; + cramersv = cramers_v; +} + +# ----------------------------------------------------------------------------------------------------------- + +bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) { + + # Unweighted co-variance + covXY = cov(X,Y); + + # compute standard deviations for both X and Y by computing 2^nd central moment + W = nrow(X); + m2X = moment(X,2); + m2Y = moment(Y,2); + sigmaX = sqrt(m2X * (W/(W-1.0)) ); + sigmaY = sqrt(m2Y * (W/(W-1.0)) ); + + # Pearson's R + R = covXY / (sigmaX*sigmaY); +} + +# ----------------------------------------------------------------------------------------------------------- + +# Y points to SCALE variable +# A points to CATEGORICAL variable +bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) { + + # mean and variance in target variable + W = nrow(A); + my = mean(Y); + varY = moment(Y,2) * W/(W-1.0) + + # category-wise (frequencies, means, variances) + CFreqs = aggregate(target=Y, groups=A, fn="count"); + CMeans = aggregate(target=Y, groups=A, fn="mean"); + CVars = aggregate(target=Y, groups=A, fn="variance"); + + # number of categories + R = nrow(CFreqs); + + Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) )); + + anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1); + anova_den = sum( (CFreqs-1)*CVars )/(W-R); + AnovaF = anova_num/anova_den; +} + +# ----------------------------------------------------------------------------------------------------------- +# Function to compute ranks +# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category +computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) { + Ranks = cumsum(X) - X/2 + 1/2; +} + +#------------------------------------------------------------------------- + +bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) { + + # compute contingency table + F = table(A,B); + + catA = nrow(F); # number of categories in A + catB = ncol(F); # number of categories in B + + # compute category-wise counts for both the attributes + R = rowSums(F); + S = colSums(F); + + # compute scores, both are column vectors + [C] = computeRanks(R); + meanX = mean(C,R); + + columnS = t(S); + [D] = computeRanks(columnS); + + # scores (C,D) are individual values, and counts (R,S) act as weights + meanY = mean(D,columnS); + + W = sum(F); # total weight, or total #cases + varX = moment(C,R,2)*(W/(W-1.0)); + varY = moment(D,columnS,2)*(W/(W-1.0)); + covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) ); + + sp = covXY/(sqrt(varX)*sqrt(varY)); +} + +# ----------------------------------------------------------------------------------------------------------- + + http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_bivariate4.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_bivariate4.dml b/src/test/scripts/applications/parfor/parfor_bivariate4.dml index 9b85bad..a19957f 100644 --- a/src/test/scripts/applications/parfor/parfor_bivariate4.dml +++ b/src/test/scripts/applications/parfor/parfor_bivariate4.dml @@ -1,258 +1,258 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - - -/* - * - * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs - * Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} - * compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n) - * - * Seven inputs: - * $1) D - input data - * $2) S1 - First attribute set {A_11, A_12, ... A_1m} - * $3) S2 - Second attribute set {A_21, A_22, ... A_2n} - * $4) K1 - kind for attributes in S1 - * $5) K2 - kind for attributes in S2 - * kind=1 for scale, kind=2 for nominal, kind=3 for ordinal - * $6) numPairs - total number of pairs (m*n) - * $7) maxC - maximum number of categories in any categorical attribute - * - * One output: - * $6) output directory in which following four statistics files are created - * + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs - * (R, (chi-sq, df, pval, cramersv), spearman, Eta, F) - * + categorical.counts - - * + categorical.means - - * + categorical.variances - - * -> Values in these three matrices are applicable only for scale-categorical attribute pairs. - * k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k. - */ - -D = read($1, rows=$7, cols=$8); # input data set -S1 = read($2, rows=1, cols=$9); # attribute set 1 -S2 = read($3, rows=1, cols=$9); # attribute set 2 -K1 = read($4, rows=1, cols=$9); # kind for attributes in S1 -K2 = read($5, rows=1, cols=$9); # kind for attributes in S2 -numPairs = $10; # number of attribute pairs (|S1|*|S2|) -maxC = $11; # max number of categories in any categorical attribute - -s1size = ncol(S1); -s2size = ncol(S2); - -#numpairs = s1size * s2size; -#print(s1size + ", " + s2size + ", " + numpairs); - -# R, chisq, cramers, spearman, eta, anovaf -numstats = 8; -basestats = matrix(0, rows=numstats, cols=numPairs); -cat_counts = matrix(0, rows=maxC, cols=numPairs); -cat_means = matrix(0, rows=maxC, cols=numPairs); -cat_vars = matrix(0, rows=maxC, cols=numPairs); - - -parfor( i in 1:s1size, check=0) { - a1 = castAsScalar(S1[,i]); - k1 = castAsScalar(K1[1,i]); - A1 = D[,a1]; - - parfor( j in 1:s2size, check=0) { - pairID = (i-1)*s2size+j; - a2 = castAsScalar(S2[,j]); - k2 = castAsScalar(K2[1,j]); - A2 = D[,a2]; - - if (k1 == k2) { - if (k1 == 1) { - # scale-scale - print("[" + i + "," + j + "] scale-scale"); - r = bivar_ss(A1,A2); - basestats[1,pairID] = r; - } else { - # nominal-nominal or ordinal-ordinal - print("[" + i + "," + j + "] categorical-categorical"); - [chisq, df, pval, cramersv] = bivar_cc(A1,A2); - basestats[2,pairID] = chisq; - basestats[3,pairID] = df; - basestats[4,pairID] = pval; - basestats[5,pairID] = cramersv; - - if ( k1 == 3 ) { - # ordinal-ordinal - print("[" + i + "," + j + "] ordinal-ordinal"); - sp = bivar_oo(A1, A2); - basestats[6,pairID] = sp; - } - } - } - else { - if (k1 == 1 | k2 == 1) { - # Scale-nominal/ordinal - print("[" + i + "," + j + "] scale-categorical"); - - if ( k1 == 1 ) { - [eta,f, counts, means, vars] = bivar_sc(A1,A2); - } - else { - [eta,f, counts, means, vars] = bivar_sc(A2,A1); - } - basestats[7,pairID] = eta; - basestats[8,pairID] = f; - cat_counts[,pairID] = counts; - cat_means[,pairID] = means; - cat_vars[,pairID] = vars; - } - else { - # nominal-ordinal or ordinal-nominal - print("[" + i + "," + j + "] categorical-categorical"); - [chisq, df, pval, cramersv] = bivar_cc(A1,A2); - basestats[2,pairID] = chisq; - basestats[3,pairID] = df; - basestats[4,pairID] = pval; - basestats[5,pairID] = cramersv; - } - } - } -} - -write(basestats, $6 + "/bivar.stats"); -write(cat_counts, $6 + "/category.counts"); -write(cat_means, $6 + "/category.means"); -write(cat_vars, $6 + "/category.variances"); - - -# ----------------------------------------------------------------------------------------------------------- - -bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) { - - # Contingency Table - F = table(A,B); - - # Chi-Squared - W = sum(F); - r = rowSums(F); - c = colSums(F); - E = (r %*% c)/W; - T = (F-E)^2/E; - chi_squared = sum(T); - - # compute p-value - degFreedom = (nrow(F)-1)*(ncol(F)-1); - pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE); - - # Cramer's V - R = nrow(F); - C = ncol(F); - q = min(R,C); - cramers_v = sqrt(chi_squared/(W*(q-1))); - - # Assign return values - chisq = chi_squared; - df = as.double(degFreedom); - pval = pValue; - cramersv = cramers_v; -} - -# ----------------------------------------------------------------------------------------------------------- - -bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) { - - # Unweighted co-variance - covXY = cov(X,Y); - - # compute standard deviations for both X and Y by computing 2^nd central moment - W = nrow(X); - m2X = moment(X,2); - m2Y = moment(Y,2); - sigmaX = sqrt(m2X * (W/(W-1.0)) ); - sigmaY = sqrt(m2Y * (W/(W-1.0)) ); - - # Pearson's R - R = covXY / (sigmaX*sigmaY); -} - -# ----------------------------------------------------------------------------------------------------------- - -# Y points to SCALE variable -# A points to CATEGORICAL variable -bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) { - - # mean and variance in target variable - W = nrow(A); - my = mean(Y); - varY = moment(Y,2) * W/(W-1.0) - - # category-wise (frequencies, means, variances) - CFreqs = aggregate(target=Y, groups=A, fn="count"); - CMeans = aggregate(target=Y, groups=A, fn="mean"); - CVars = aggregate(target=Y, groups=A, fn="variance"); - - # number of categories - R = nrow(CFreqs); - - Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) )); - - anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1); - anova_den = sum( (CFreqs-1)*CVars )/(W-R); - AnovaF = anova_num/anova_den; -} - - -# ----------------------------------------------------------------------------------------------------------- -# Function to compute ranks -# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category -computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) { - Ranks = cumsum(X) - X/2 + 1/2; -} - -#------------------------------------------------------------------------- - -bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) { - - # compute contingency table - F = table(A,B); - - catA = nrow(F); # number of categories in A - catB = ncol(F); # number of categories in B - - # compute category-wise counts for both the attributes - R = rowSums(F); - S = colSums(F); - - # compute scores, both are column vectors - [C] = computeRanks(R); - meanX = mean(C,R); - - columnS = t(S); - [D] = computeRanks(columnS); - - # scores (C,D) are individual values, and counts (R,S) act as weights - meanY = mean(D,columnS); - - W = sum(F); # total weight, or total #cases - varX = moment(C,R,2)*(W/(W-1.0)); - varY = moment(D,columnS,2)*(W/(W-1.0)); - covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) ); - - sp = covXY/(sqrt(varX)*sqrt(varY)); -} - -# ----------------------------------------------------------------------------------------------------------- +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + + +/* + * + * For a given pair of attribute sets, compute bivariate statistics between all attribute pairs + * Given, S_1 = {A_11, A_12, ... A_1m} and S_2 = {A_21, A_22, ... A_2n} + * compute bivariate stats for m*n pairs (A_1i, A_2j), (1<= i <=m) and (1<= j <=n) + * + * Seven inputs: + * $1) D - input data + * $2) S1 - First attribute set {A_11, A_12, ... A_1m} + * $3) S2 - Second attribute set {A_21, A_22, ... A_2n} + * $4) K1 - kind for attributes in S1 + * $5) K2 - kind for attributes in S2 + * kind=1 for scale, kind=2 for nominal, kind=3 for ordinal + * $6) numPairs - total number of pairs (m*n) + * $7) maxC - maximum number of categories in any categorical attribute + * + * One output: + * $6) output directory in which following four statistics files are created + * + bivar.stats - matrix with all 8 bivariate statistics computed for different attribute pairs + * (R, (chi-sq, df, pval, cramersv), spearman, Eta, F) + * + categorical.counts - + * + categorical.means - + * + categorical.variances - + * -> Values in these three matrices are applicable only for scale-categorical attribute pairs. + * k^th column in these matrices denote the attribute pair (A_1i,A_2j) where i*j = k. + */ + +D = read($1, rows=$7, cols=$8); # input data set +S1 = read($2, rows=1, cols=$9); # attribute set 1 +S2 = read($3, rows=1, cols=$9); # attribute set 2 +K1 = read($4, rows=1, cols=$9); # kind for attributes in S1 +K2 = read($5, rows=1, cols=$9); # kind for attributes in S2 +numPairs = $10; # number of attribute pairs (|S1|*|S2|) +maxC = $11; # max number of categories in any categorical attribute + +s1size = ncol(S1); +s2size = ncol(S2); + +#numpairs = s1size * s2size; +#print(s1size + ", " + s2size + ", " + numpairs); + +# R, chisq, cramers, spearman, eta, anovaf +numstats = 8; +basestats = matrix(0, rows=numstats, cols=numPairs); +cat_counts = matrix(0, rows=maxC, cols=numPairs); +cat_means = matrix(0, rows=maxC, cols=numPairs); +cat_vars = matrix(0, rows=maxC, cols=numPairs); + + +parfor( i in 1:s1size, check=0) { + a1 = castAsScalar(S1[,i]); + k1 = castAsScalar(K1[1,i]); + A1 = D[,a1]; + + parfor( j in 1:s2size, check=0) { + pairID = (i-1)*s2size+j; + a2 = castAsScalar(S2[,j]); + k2 = castAsScalar(K2[1,j]); + A2 = D[,a2]; + + if (k1 == k2) { + if (k1 == 1) { + # scale-scale + print("[" + i + "," + j + "] scale-scale"); + r = bivar_ss(A1,A2); + basestats[1,pairID] = r; + } else { + # nominal-nominal or ordinal-ordinal + print("[" + i + "," + j + "] categorical-categorical"); + [chisq, df, pval, cramersv] = bivar_cc(A1,A2); + basestats[2,pairID] = chisq; + basestats[3,pairID] = df; + basestats[4,pairID] = pval; + basestats[5,pairID] = cramersv; + + if ( k1 == 3 ) { + # ordinal-ordinal + print("[" + i + "," + j + "] ordinal-ordinal"); + sp = bivar_oo(A1, A2); + basestats[6,pairID] = sp; + } + } + } + else { + if (k1 == 1 | k2 == 1) { + # Scale-nominal/ordinal + print("[" + i + "," + j + "] scale-categorical"); + + if ( k1 == 1 ) { + [eta,f, counts, means, vars] = bivar_sc(A1,A2); + } + else { + [eta,f, counts, means, vars] = bivar_sc(A2,A1); + } + basestats[7,pairID] = eta; + basestats[8,pairID] = f; + cat_counts[,pairID] = counts; + cat_means[,pairID] = means; + cat_vars[,pairID] = vars; + } + else { + # nominal-ordinal or ordinal-nominal + print("[" + i + "," + j + "] categorical-categorical"); + [chisq, df, pval, cramersv] = bivar_cc(A1,A2); + basestats[2,pairID] = chisq; + basestats[3,pairID] = df; + basestats[4,pairID] = pval; + basestats[5,pairID] = cramersv; + } + } + } +} + +write(basestats, $6 + "/bivar.stats"); +write(cat_counts, $6 + "/category.counts"); +write(cat_means, $6 + "/category.means"); +write(cat_vars, $6 + "/category.variances"); + + +# ----------------------------------------------------------------------------------------------------------- + +bivar_cc = function(Matrix[Double] A, Matrix[Double] B) return (Double chisq, Double df, Double pval, Double cramersv) { + + # Contingency Table + F = table(A,B); + + # Chi-Squared + W = sum(F); + r = rowSums(F); + c = colSums(F); + E = (r %*% c)/W; + T = (F-E)^2/E; + chi_squared = sum(T); + + # compute p-value + degFreedom = (nrow(F)-1)*(ncol(F)-1); + pValue = pchisq(target=chi_squared, df=degFreedom, lower.tail=FALSE); + + # Cramer's V + R = nrow(F); + C = ncol(F); + q = min(R,C); + cramers_v = sqrt(chi_squared/(W*(q-1))); + + # Assign return values + chisq = chi_squared; + df = as.double(degFreedom); + pval = pValue; + cramersv = cramers_v; +} + +# ----------------------------------------------------------------------------------------------------------- + +bivar_ss = function(Matrix[Double] X, Matrix[Double] Y) return (Double R) { + + # Unweighted co-variance + covXY = cov(X,Y); + + # compute standard deviations for both X and Y by computing 2^nd central moment + W = nrow(X); + m2X = moment(X,2); + m2Y = moment(Y,2); + sigmaX = sqrt(m2X * (W/(W-1.0)) ); + sigmaY = sqrt(m2Y * (W/(W-1.0)) ); + + # Pearson's R + R = covXY / (sigmaX*sigmaY); +} + +# ----------------------------------------------------------------------------------------------------------- + +# Y points to SCALE variable +# A points to CATEGORICAL variable +bivar_sc = function(Matrix[Double] Y, Matrix[Double] A) return (Double Eta, Double AnovaF, Matrix[Double] CFreqs, Matrix[Double] CMeans, Matrix[Double] CVars ) { + + # mean and variance in target variable + W = nrow(A); + my = mean(Y); + varY = moment(Y,2) * W/(W-1.0) + + # category-wise (frequencies, means, variances) + CFreqs = aggregate(target=Y, groups=A, fn="count"); + CMeans = aggregate(target=Y, groups=A, fn="mean"); + CVars = aggregate(target=Y, groups=A, fn="variance"); + + # number of categories + R = nrow(CFreqs); + + Eta = sqrt(1 - ( sum((CFreqs-1)*CVars) / ((W-1)*varY) )); + + anova_num = sum( (CFreqs*(CMeans-my)^2) )/(R-1); + anova_den = sum( (CFreqs-1)*CVars )/(W-R); + AnovaF = anova_num/anova_den; +} + + +# ----------------------------------------------------------------------------------------------------------- +# Function to compute ranks +# takes a column vector as input, and produces a vector of same size in which each cell denotes to the computed score for that category +computeRanks = function(Matrix[Double] X) return (Matrix[Double] Ranks) { + Ranks = cumsum(X) - X/2 + 1/2; +} + +#------------------------------------------------------------------------- + +bivar_oo = function(Matrix[Double] A, Matrix[Double] B) return (Double sp) { + + # compute contingency table + F = table(A,B); + + catA = nrow(F); # number of categories in A + catB = ncol(F); # number of categories in B + + # compute category-wise counts for both the attributes + R = rowSums(F); + S = colSums(F); + + # compute scores, both are column vectors + [C] = computeRanks(R); + meanX = mean(C,R); + + columnS = t(S); + [D] = computeRanks(columnS); + + # scores (C,D) are individual values, and counts (R,S) act as weights + meanY = mean(D,columnS); + + W = sum(F); # total weight, or total #cases + varX = moment(C,R,2)*(W/(W-1.0)); + varY = moment(D,columnS,2)*(W/(W-1.0)); + covXY = sum( t(F/(W-1) * (C-meanX)) * (D-meanY) ); + + sp = covXY/(sqrt(varX)*sqrt(varY)); +} + +# ----------------------------------------------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_corr.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_corr.R b/src/test/scripts/applications/parfor/parfor_corr.R index 5f8c315..854e593 100644 --- a/src/test/scripts/applications/parfor/parfor_corr.R +++ b/src/test/scripts/applications/parfor/parfor_corr.R @@ -1,48 +1,48 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -args <- commandArgs(TRUE) -options(digits=22) - -library("Matrix") - -V1 <- readMM(paste(args[1], "V.mtx", sep="")) -V <- as.matrix(V1); - -m <- nrow(V); -n <- ncol(V); -W <- m; - -R <- array(0,dim=c(n,n)) - -for( i in 1:(n-1) ) -{ - X <- V[ ,i]; - - for( j in (i+1):n ) - { - Y <- V[ ,j]; - R[i,j] <- cor(X, Y) - #print(R[i,j]); - } -} - -writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep="")); +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +args <- commandArgs(TRUE) +options(digits=22) + +library("Matrix") + +V1 <- readMM(paste(args[1], "V.mtx", sep="")) +V <- as.matrix(V1); + +m <- nrow(V); +n <- ncol(V); +W <- m; + +R <- array(0,dim=c(n,n)) + +for( i in 1:(n-1) ) +{ + X <- V[ ,i]; + + for( j in (i+1):n ) + { + Y <- V[ ,j]; + R[i,j] <- cor(X, Y) + #print(R[i,j]); + } +} + +writeMM(as(R, "CsparseMatrix"), paste(args[2], "Rout", sep="")); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_corr0.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_corr0.dml b/src/test/scripts/applications/parfor/parfor_corr0.dml index 7a340bd..a711d0a 100644 --- a/src/test/scripts/applications/parfor/parfor_corr0.dml +++ b/src/test/scripts/applications/parfor/parfor_corr0.dml @@ -1,51 +1,51 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -V = read($1,rows=$2,cols=$3); -m = $2; -n = $3; -W = m; - -R = matrix(0,rows=n,cols=n); - -for( i in 1:(n-1) ) -{ - X = V[,i]; - m2X = moment(X,2); - sigmaX = sqrt(m2X * (W/(W-1.0)) ); - - for( j in (i+1):n ) - { - Y = V[,j]; - - #corr computation - m2Y = moment(Y,2); - sigmaY = sqrt(m2Y * (W/(W-1.0)) ); - covXY = cov(X,Y); - rXY = covXY / (sigmaX*sigmaY); - - #print("R[("+i+","+j+")]="+rXY); - R[i,j] = rXY; - - } -} - +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +V = read($1,rows=$2,cols=$3); +m = $2; +n = $3; +W = m; + +R = matrix(0,rows=n,cols=n); + +for( i in 1:(n-1) ) +{ + X = V[,i]; + m2X = moment(X,2); + sigmaX = sqrt(m2X * (W/(W-1.0)) ); + + for( j in (i+1):n ) + { + Y = V[,j]; + + #corr computation + m2Y = moment(Y,2); + sigmaY = sqrt(m2Y * (W/(W-1.0)) ); + covXY = cov(X,Y); + rXY = covXY / (sigmaX*sigmaY); + + #print("R[("+i+","+j+")]="+rXY); + R[i,j] = rXY; + + } +} + write(R, $4); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_corr1.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_corr1.dml b/src/test/scripts/applications/parfor/parfor_corr1.dml index 8bb0368..b2d5a14 100644 --- a/src/test/scripts/applications/parfor/parfor_corr1.dml +++ b/src/test/scripts/applications/parfor/parfor_corr1.dml @@ -1,50 +1,50 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -V = read($1,rows=$2,cols=$3); -m = $2; -n = $3; -W = m; - -R = matrix(0, rows=n,cols=n); - -parfor( i in 1:(n-1), par=4, mode=LOCAL, opt=NONE ) -{ - X = V[,i]; - m2X = moment(X,2); - sigmaX = sqrt(m2X * (W/(W-1.0)) ); - - parfor( j in (i+1):n, par=4, mode=LOCAL, opt=NONE ) - { - Y = V[,j]; - - #corr computation - m2Y = moment(Y,2); - sigmaY = sqrt(m2Y * (W/(W-1.0)) ); - covXY = cov(X,Y); - rXY = covXY / (sigmaX*sigmaY); - - #print("R[("+i+","+j+")]="+rXY); - R[i,j] = rXY; - } -} - +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +V = read($1,rows=$2,cols=$3); +m = $2; +n = $3; +W = m; + +R = matrix(0, rows=n,cols=n); + +parfor( i in 1:(n-1), par=4, mode=LOCAL, opt=NONE ) +{ + X = V[,i]; + m2X = moment(X,2); + sigmaX = sqrt(m2X * (W/(W-1.0)) ); + + parfor( j in (i+1):n, par=4, mode=LOCAL, opt=NONE ) + { + Y = V[,j]; + + #corr computation + m2Y = moment(Y,2); + sigmaY = sqrt(m2Y * (W/(W-1.0)) ); + covXY = cov(X,Y); + rXY = covXY / (sigmaX*sigmaY); + + #print("R[("+i+","+j+")]="+rXY); + R[i,j] = rXY; + } +} + write(R, $4); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_corr2.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_corr2.dml b/src/test/scripts/applications/parfor/parfor_corr2.dml index c5f5c31..9e10534 100644 --- a/src/test/scripts/applications/parfor/parfor_corr2.dml +++ b/src/test/scripts/applications/parfor/parfor_corr2.dml @@ -1,50 +1,50 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -V = read($1,rows=$2,cols=$3); -m = $2; -n = $3; -W = m; - -R = matrix(0, rows=n,cols=n); - -parfor( i in 1:(n-1), par=4, mode=LOCAL, opt=NONE ) -{ - X = V[,i]; - m2X = moment(X,2); - sigmaX = sqrt(m2X * (W/(W-1.0)) ); - - parfor( j in (i+1):n, par=4, mode=REMOTE_MR, opt=NONE ) - { - Y = V[,j]; - - #corr computation - m2Y = moment(Y,2); - sigmaY = sqrt(m2Y * (W/(W-1.0)) ); - covXY = cov(X,Y); - rXY = covXY / (sigmaX*sigmaY); - - print("R[{"+i+","+j+"}]="+rXY); #test robustness of ProgramConverter - R[i,j] = rXY; - } -} - +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +V = read($1,rows=$2,cols=$3); +m = $2; +n = $3; +W = m; + +R = matrix(0, rows=n,cols=n); + +parfor( i in 1:(n-1), par=4, mode=LOCAL, opt=NONE ) +{ + X = V[,i]; + m2X = moment(X,2); + sigmaX = sqrt(m2X * (W/(W-1.0)) ); + + parfor( j in (i+1):n, par=4, mode=REMOTE_MR, opt=NONE ) + { + Y = V[,j]; + + #corr computation + m2Y = moment(Y,2); + sigmaY = sqrt(m2Y * (W/(W-1.0)) ); + covXY = cov(X,Y); + rXY = covXY / (sigmaX*sigmaY); + + print("R[{"+i+","+j+"}]="+rXY); #test robustness of ProgramConverter + R[i,j] = rXY; + } +} + write(R, $4); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_corr3.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_corr3.dml b/src/test/scripts/applications/parfor/parfor_corr3.dml index 46295a3..95fc38f 100644 --- a/src/test/scripts/applications/parfor/parfor_corr3.dml +++ b/src/test/scripts/applications/parfor/parfor_corr3.dml @@ -1,51 +1,51 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -V = read($1,rows=$2,cols=$3); -m = $2; -n = $3; -W = m; - -R = matrix(0, rows=n,cols=n); - -parfor( i in 1:(n-1), par=4, mode=REMOTE_MR, opt=NONE ) -{ - X = V[,i]; - m2X = moment(X,2); - sigmaX = sqrt(m2X * (W/(W-1.0)) ); - - parfor( j in (i+1):n, par=4, mode=LOCAL, opt=NONE ) - { - Y = V[,j]; - - #corr computation - m2Y = moment(Y,2); - sigmaY = sqrt(m2Y * (W/(W-1.0)) ); - covXY = cov(X,Y); - rXY = covXY / (sigmaX*sigmaY); - - print("R[{"+i+","+j+"}]="+rXY); #test robustness of ProgramConverter - R[i,j] = rXY; - - } -} - +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +V = read($1,rows=$2,cols=$3); +m = $2; +n = $3; +W = m; + +R = matrix(0, rows=n,cols=n); + +parfor( i in 1:(n-1), par=4, mode=REMOTE_MR, opt=NONE ) +{ + X = V[,i]; + m2X = moment(X,2); + sigmaX = sqrt(m2X * (W/(W-1.0)) ); + + parfor( j in (i+1):n, par=4, mode=LOCAL, opt=NONE ) + { + Y = V[,j]; + + #corr computation + m2Y = moment(Y,2); + sigmaY = sqrt(m2Y * (W/(W-1.0)) ); + covXY = cov(X,Y); + rXY = covXY / (sigmaX*sigmaY); + + print("R[{"+i+","+j+"}]="+rXY); #test robustness of ProgramConverter + R[i,j] = rXY; + + } +} + write(R, $4); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_corr4.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_corr4.dml b/src/test/scripts/applications/parfor/parfor_corr4.dml index 62e643d..16b4767 100644 --- a/src/test/scripts/applications/parfor/parfor_corr4.dml +++ b/src/test/scripts/applications/parfor/parfor_corr4.dml @@ -1,51 +1,51 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -V = read($1,rows=$2,cols=$3); -m = $2; -n = $3; -W = m; - -R = matrix(0, rows=n,cols=n); - -parfor( i in 1:(n-1) ) -{ - X = V[,i]; - m2X = moment(X,2); - sigmaX = sqrt(m2X * (W/(W-1.0)) ); - - parfor( j in (i+1):n ) - { - Y = V[,j]; - - #corr computation - m2Y = moment(Y,2); - sigmaY = sqrt(m2Y * (W/(W-1.0)) ); - covXY = cov(X,Y); - rXY = covXY / (sigmaX*sigmaY); - - print("R[{"+i+","+j+"}]="+rXY); - R[i,j] = rXY; - - } -} - +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +V = read($1,rows=$2,cols=$3); +m = $2; +n = $3; +W = m; + +R = matrix(0, rows=n,cols=n); + +parfor( i in 1:(n-1) ) +{ + X = V[,i]; + m2X = moment(X,2); + sigmaX = sqrt(m2X * (W/(W-1.0)) ); + + parfor( j in (i+1):n ) + { + Y = V[,j]; + + #corr computation + m2Y = moment(Y,2); + sigmaY = sqrt(m2Y * (W/(W-1.0)) ); + covXY = cov(X,Y); + rXY = covXY / (sigmaX*sigmaY); + + print("R[{"+i+","+j+"}]="+rXY); + R[i,j] = rXY; + + } +} + write(R, $4); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_corr5.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_corr5.dml b/src/test/scripts/applications/parfor/parfor_corr5.dml index 0da9539..1663b8f 100644 --- a/src/test/scripts/applications/parfor/parfor_corr5.dml +++ b/src/test/scripts/applications/parfor/parfor_corr5.dml @@ -1,51 +1,51 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -V = read($1,rows=$2,cols=$3); -m = $2; -n = $3; -W = m; - -R = matrix(0, rows=n,cols=n); - -parfor( i in 1:(n-1), profile=1 ) -{ - X = V[,i]; - m2X = moment(X,2); - sigmaX = sqrt(m2X * (W/(W-1.0)) ); - - parfor( j in (i+1):n, profile=1 ) - { - Y = V[,j]; - - #corr computation - m2Y = moment(Y,2); - sigmaY = sqrt(m2Y * (W/(W-1.0)) ); - covXY = cov(X,Y); - rXY = covXY / (sigmaX*sigmaY); - - print("R[{"+i+","+j+"}]="+rXY); - R[i,j] = rXY; - - } -} - +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +V = read($1,rows=$2,cols=$3); +m = $2; +n = $3; +W = m; + +R = matrix(0, rows=n,cols=n); + +parfor( i in 1:(n-1), profile=1 ) +{ + X = V[,i]; + m2X = moment(X,2); + sigmaX = sqrt(m2X * (W/(W-1.0)) ); + + parfor( j in (i+1):n, profile=1 ) + { + Y = V[,j]; + + #corr computation + m2Y = moment(Y,2); + sigmaY = sqrt(m2Y * (W/(W-1.0)) ); + covXY = cov(X,Y); + rXY = covXY / (sigmaX*sigmaY); + + print("R[{"+i+","+j+"}]="+rXY); + R[i,j] = rXY; + + } +} + write(R, $4); \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/05d2c0a8/src/test/scripts/applications/parfor/parfor_corr6.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/applications/parfor/parfor_corr6.dml b/src/test/scripts/applications/parfor/parfor_corr6.dml index 166b3d7..2ee26cc 100644 --- a/src/test/scripts/applications/parfor/parfor_corr6.dml +++ b/src/test/scripts/applications/parfor/parfor_corr6.dml @@ -1,51 +1,51 @@ -#------------------------------------------------------------- -# -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. -# -#------------------------------------------------------------- - -V = read($1,rows=$2,cols=$3); -m = $2; -n = $3; -W = m; - -R = matrix(0, rows=n,cols=n); - -parfor( i in 1:(n-1), log=debug ) -{ - X = V[,i]; - m2X = moment(X,2); - sigmaX = sqrt(m2X * (W/(W-1.0)) ); - - parfor( j in (i+1):n ) - { - Y = V[,j]; - - #corr computation - m2Y = moment(Y,2); - sigmaY = sqrt(m2Y * (W/(W-1.0)) ); - covXY = cov(X,Y); - rXY = covXY / (sigmaX*sigmaY); - - print("R[{"+i+","+j+"}]="+rXY); - R[i,j] = rXY; - - } -} - +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +V = read($1,rows=$2,cols=$3); +m = $2; +n = $3; +W = m; + +R = matrix(0, rows=n,cols=n); + +parfor( i in 1:(n-1), log=debug ) +{ + X = V[,i]; + m2X = moment(X,2); + sigmaX = sqrt(m2X * (W/(W-1.0)) ); + + parfor( j in (i+1):n ) + { + Y = V[,j]; + + #corr computation + m2Y = moment(Y,2); + sigmaY = sqrt(m2Y * (W/(W-1.0)) ); + covXY = cov(X,Y); + rXY = covXY / (sigmaX*sigmaY); + + print("R[{"+i+","+j+"}]="+rXY); + R[i,j] = rXY; + + } +} + write(R, $4); \ No newline at end of file
