Repository: incubator-systemml Updated Branches: refs/heads/master 4ac77744f -> 3d1f77ce2
[SYSTEMML-1549] Cox.dml - return S & T in usable format Return S and T as a matrix instead of as a string. Closes #465. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/3d1f77ce Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/3d1f77ce Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/3d1f77ce Branch: refs/heads/master Commit: 3d1f77ce20ce28a958544f19a84f589f4840c3ed Parents: 4ac7774 Author: Brendan Dwyer <[email protected]> Authored: Mon May 22 15:47:12 2017 -0700 Committer: Deron Eriksson <[email protected]> Committed: Mon May 22 15:47:12 2017 -0700 ---------------------------------------------------------------------- scripts/algorithms/Cox.dml | 51 ++++++++++++++++++++++++++++++----------- 1 file changed, 38 insertions(+), 13 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3d1f77ce/scripts/algorithms/Cox.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/Cox.dml b/scripts/algorithms/Cox.dml index a021109..e30dc87 100644 --- a/scripts/algorithms/Cox.dml +++ b/scripts/algorithms/Cox.dml @@ -68,18 +68,18 @@ # M[,6]: lower 100*(1-alpha)% confidence interval of betas # M[,7]: upper 100*(1-alpha)% confidence interval of betas # -# Two log files containing a summary of some statistics of the fitted model: +# Two matrices containing a summary of some statistics of the fitted model: # 1- File S with the following format -# - line 1: no. of observations -# - line 2: no. of events -# - line 3: log-likelihood -# - line 4: AIC -# - line 5: Rsquare (Cox & Snell) -# - line 6: max possible Rsquare +# - row 1: no. of observations +# - row 2: no. of events +# - row 3: log-likelihood +# - row 4: AIC +# - row 5: Rsquare (Cox & Snell) +# - row 6: max possible Rsquare # 2- File T with the following format -# - line 1: Likelihood ratio test statistic, degree of freedom, P-value -# - line 2: Wald test statistic, degree of freedom, P-value -# - line 3: Score (log-rank) test statistic, degree of freedom, P-value +# - row 1: Likelihood ratio test statistic, degree of freedom, P-value +# - row 2: Wald test statistic, degree of freedom, P-value +# - row 3: Score (log-rank) test statistic, degree of freedom, P-value # # Additionally, the following matrices are stored (needed for prediction) # 1- A column matrix RT that contains the order-preserving recoded timestamps from X @@ -210,7 +210,14 @@ if (ncol (X_orig) < 3) { loglik = -o; S_str = "no. of records " + N + " loglik " + loglik; if (fileS != " ") { - write (S_str, fileS, format = fmtO); + S = matrix(0, 6, 1); + S[1, 1] = N; + S[2, 1] = 0; # number of events + S[3, 1] = loglik; + S[4, 1] = -1; # AIC + S[5, 1] = -1; # Rsquare + S[6, 1] = -1; #Rsquare_max + write (S, fileS, format = fmtO); } else { print (S_str); } @@ -388,41 +395,59 @@ CI_l = b - se_b * z_alpha_2; CI_r = b - se_b + z_alpha_2; ######## SOME STATISTICS AND TESTS +S = matrix(0, 6, 1); +T = matrix(0, 3, 3); + # no. of records S_str = "no. of records " + N; +S[1, 1] = N; # no.of events S_str = append (S_str, "no. of events " + sum (E)); +S[2, 1] = sum (E); # log-likelihood loglik = -o; S_str = append (S_str, "loglik " + loglik + " "); +S[3, 1] = loglik; # AIC = -2 * loglik + 2 * D AIC = -2 * loglik + 2 * D; S_str = append (S_str, "AIC " + AIC + " "); +S[4, 1] = AIC; # Wald test wald_t = as.scalar (t(b) %*% H %*% b); wald_p = 1 - cdf (target = wald_t, dist = "chisq", df = D); T_str = "Wald test = " + wald_t + " on " + D + " df, p = " + wald_p + " "; +T[1, 1] = wald_t; +T[1, 2] = D; +T[1, 3] = wald_p; # Likelihood ratio test lratio_t = 2 * o_init - 2 * o; lratio_p = 1 - cdf (target = lratio_t, dist = "chisq", df = D); T_str = append (T_str, "Likelihood ratio test = " + lratio_t + " on " + D + " df, p = " + lratio_p + " "); +T[2, 1] = lratio_t; +T[2, 2] = D; +T[2, 3] = lratio_p; H0_inv = inv (H0); score_t = as.scalar (t (g0) %*% H0_inv %*% g0); score_p = 1 - cdf (target = score_t, dist = "chisq", df = D); T_str = append (T_str, "Score (logrank) test = " + score_t + " on " + D + " df, p = " + score_p + " "); +T[3, 1] = score_t; +T[3, 2] = D; +T[3, 3] = score_p; # Rsquare (Cox & Snell) Rsquare = 1 - exp (-lratio_t / N); Rsquare_max = 1 - exp (-2 * o_init / N); S_str = append (S_str, "Rsquare (Cox & Snell): " + Rsquare + " "); +S[5, 1] = Rsquare; S_str = append (S_str, "max possible Rsquare: " + Rsquare_max); +S[6, 1] = Rsquare_max; M = matrix (0, rows = D, cols = 7); M[,1] = b; @@ -435,12 +460,12 @@ M[,7] = CI_r; write (M, fileM, format = fmtO); if (fileS != " ") { - write (S_str, fileS, format = fmtO); + write (S, fileS, format = fmtO); } else { print (S_str); } if (fileT != " ") { - write (T_str, fileT, format = fmtO); + write (T, fileT, format = fmtO); } else { print (T_str); }
