Repository: incubator-systemml
Updated Branches:
  refs/heads/master 4ac77744f -> 3d1f77ce2


[SYSTEMML-1549] Cox.dml - return S & T in usable format

Return S and T as a matrix instead of as a string.

Closes #465.


Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo
Commit: 
http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/3d1f77ce
Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/3d1f77ce
Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/3d1f77ce

Branch: refs/heads/master
Commit: 3d1f77ce20ce28a958544f19a84f589f4840c3ed
Parents: 4ac7774
Author: Brendan Dwyer <[email protected]>
Authored: Mon May 22 15:47:12 2017 -0700
Committer: Deron Eriksson <[email protected]>
Committed: Mon May 22 15:47:12 2017 -0700

----------------------------------------------------------------------
 scripts/algorithms/Cox.dml | 51 ++++++++++++++++++++++++++++++-----------
 1 file changed, 38 insertions(+), 13 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/3d1f77ce/scripts/algorithms/Cox.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Cox.dml b/scripts/algorithms/Cox.dml
index a021109..e30dc87 100644
--- a/scripts/algorithms/Cox.dml
+++ b/scripts/algorithms/Cox.dml
@@ -68,18 +68,18 @@
 #      M[,6]: lower 100*(1-alpha)% confidence interval of betas
 #      M[,7]: upper 100*(1-alpha)% confidence interval of betas
 #
-# Two log files containing a summary of some statistics of the fitted model:
+# Two matrices containing a summary of some statistics of the fitted model:
 # 1- File S with the following format 
-#      - line 1: no. of observations
-#      - line 2: no. of events
-#   - line 3: log-likelihood 
-#      - line 4: AIC
-#      - line 5: Rsquare (Cox & Snell)
-#      - line 6: max possible Rsquare
+#      - row 1: no. of observations
+#      - row 2: no. of events
+#   - row 3: log-likelihood 
+#      - row 4: AIC
+#      - row 5: Rsquare (Cox & Snell)
+#      - row 6: max possible Rsquare
 # 2- File T with the following format
-#      - line 1: Likelihood ratio test statistic, degree of freedom, P-value
-#      - line 2: Wald test statistic, degree of freedom, P-value
-#      - line 3: Score (log-rank) test statistic, degree of freedom, P-value
+#      - row 1: Likelihood ratio test statistic, degree of freedom, P-value
+#      - row 2: Wald test statistic, degree of freedom, P-value
+#      - row 3: Score (log-rank) test statistic, degree of freedom, P-value
 # 
 # Additionally, the following matrices are stored (needed for prediction)
 # 1- A column matrix RT that contains the order-preserving recoded timestamps 
from X 
@@ -210,7 +210,14 @@ if (ncol (X_orig) < 3) {
        loglik = -o;
        S_str = "no. of records " + N + " loglik " + loglik;
        if (fileS != " ") {
-               write (S_str, fileS, format = fmtO);
+         S = matrix(0, 6, 1);
+         S[1, 1] = N;
+         S[2, 1] = 0; # number of events
+         S[3, 1] = loglik;
+         S[4, 1] = -1; # AIC
+         S[5, 1] = -1; # Rsquare
+         S[6, 1] = -1; #Rsquare_max
+               write (S, fileS, format = fmtO);
        } else {
                print (S_str);
        }
@@ -388,41 +395,59 @@ CI_l = b - se_b * z_alpha_2;
 CI_r = b - se_b + z_alpha_2;
 
 ######## SOME STATISTICS AND TESTS
+S = matrix(0, 6, 1);
+T = matrix(0, 3, 3);
+
 # no. of records
 S_str = "no. of records " + N;
+S[1, 1] = N;
 
 # no.of events
 S_str = append (S_str, "no. of events " + sum (E));
+S[2, 1] = sum (E);
 
 # log-likelihood
 loglik = -o;
 S_str = append (S_str, "loglik " + loglik + " ");
+S[3, 1] = loglik;
 
 # AIC = -2 * loglik + 2 * D
 AIC = -2 * loglik + 2 * D;
 S_str = append (S_str, "AIC " + AIC + " ");
+S[4, 1] = AIC;
 
 # Wald test
 wald_t = as.scalar (t(b) %*% H %*% b);
 wald_p = 1 - cdf (target = wald_t, dist = "chisq", df = D);
 T_str = "Wald test = " + wald_t + " on " + D + " df, p = " + wald_p + " ";
+T[1, 1] = wald_t;
+T[1, 2] = D;
+T[1, 3] = wald_p;
 
 # Likelihood ratio test
 lratio_t = 2 * o_init - 2 * o;
 lratio_p = 1 - cdf (target = lratio_t, dist = "chisq", df = D);
 T_str = append (T_str, "Likelihood ratio test = " + lratio_t + " on " + D + " 
df, p = " + lratio_p + " ");
 
+T[2, 1] = lratio_t;
+T[2, 2] = D;
+T[2, 3] = lratio_p;
 
 H0_inv = inv (H0);
 score_t = as.scalar (t (g0) %*% H0_inv %*% g0);
 score_p = 1 - cdf (target = score_t, dist = "chisq", df = D);
 T_str = append (T_str, "Score (logrank) test = " + score_t + " on " + D + " 
df, p = " + score_p + " ");
+T[3, 1] = score_t;
+T[3, 2] = D;
+T[3, 3] = score_p;
 
 # Rsquare (Cox & Snell)
 Rsquare = 1 - exp (-lratio_t / N);  
 Rsquare_max = 1 - exp (-2 * o_init / N);
 S_str = append (S_str, "Rsquare (Cox & Snell): " + Rsquare + " ");
+S[5, 1] = Rsquare;
 S_str = append (S_str, "max possible Rsquare: " + Rsquare_max);
+S[6, 1] = Rsquare_max;
 
 M = matrix (0, rows = D, cols = 7);
 M[,1] = b;
@@ -435,12 +460,12 @@ M[,7] = CI_r;
 
 write (M, fileM, format = fmtO);
 if (fileS != " ") {
-       write (S_str, fileS, format = fmtO);
+       write (S, fileS, format = fmtO);
 } else {
        print (S_str);
 }
 if (fileT != " ") {
-       write (T_str, fileT, format = fmtO);
+       write (T, fileT, format = fmtO);
 } else {
        print (T_str);
 }

Reply via email to