This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git
The following commit(s) were added to refs/heads/master by this push:
new c7ce2ff [SYSTEMDS-391] New built-in GLM function (Generalized Linear
Model)
c7ce2ff is described below
commit c7ce2ff95f306284363faf64321b1d2f36bbbeb4
Author: Shafaq Siddiqi <[email protected]>
AuthorDate: Thu Apr 30 22:30:11 2020 +0200
[SYSTEMDS-391] New built-in GLM function (Generalized Linear Model)
Closes #888.
---
docs/Tasks.txt | 3 +
scripts/algorithms/StepGLM.dml | 16 +-
scripts/builtin/glm.dml | 1118 ++++++++++++++++++++
.../java/org/apache/sysds/common/Builtins.java | 1 +
src/test/java/org/apache/sysds/test/TestUtils.java | 383 +++++++
.../apache/sysds/test/applications/GLMTest.java | 417 +-------
.../test/functions/builtin/BuiltinGLMTest.java | 269 +++++
src/test/scripts/functions/builtin/glmTest.R | 139 +++
src/test/scripts/functions/builtin/glmTest.dml | 25 +
9 files changed, 1955 insertions(+), 416 deletions(-)
diff --git a/docs/Tasks.txt b/docs/Tasks.txt
index 35d07e6..86ab8fe 100644
--- a/docs/Tasks.txt
+++ b/docs/Tasks.txt
@@ -290,5 +290,8 @@ SYSTEMDS-370 Lossy Compression Blocks
SYSTEMDS-380 Memory Footprint
* 371 Matrix Block Memory footprint update
+SYSTEMDS-390 New Builtin Functions IV
+ * 391 New GLM builtin-in function (from algorithms) OK
+
Others:
* Break append instruction to cbind and rbind
diff --git a/scripts/algorithms/StepGLM.dml b/scripts/algorithms/StepGLM.dml
index 2ce5a1b..213f373 100644
--- a/scripts/algorithms/StepGLM.dml
+++ b/scripts/algorithms/StepGLM.dml
@@ -127,18 +127,18 @@ if (dir == "forward") {
if (intercept_status == 0) {
# Compute AIC of an empty model with no features and no
intercept (all Ys are zero)
- [AIC_best] = glm (X_global, Y, 0, num_features,
columns_fixed_ordered, " ");
+ [AIC_best] = glm_fit (X_global, Y, 0, num_features,
columns_fixed_ordered, " ");
} else {
# compute AIC of an empty model with only intercept (all Ys are
constant)
all_ones = matrix (1, rows = num_records, cols = 1);
- [AIC_best] = glm (all_ones, Y, 0, num_features,
columns_fixed_ordered, " ");
+ [AIC_best] = glm_fit (all_ones, Y, 0, num_features,
columns_fixed_ordered, " ");
}
print ("Best AIC without any features: " + AIC_best);
# First pass to examine single features
AICs = matrix (AIC_best, rows = 1, cols = num_features);
parfor (i in 1:num_features) {
- [AIC_1] = glm (X_orig[,i], Y, intercept_status, num_features,
columns_fixed_ordered, " ");
+ [AIC_1] = glm_fit (X_orig[,i], Y, intercept_status,
num_features, columns_fixed_ordered, " ");
AICs[1,i] = AIC_1;
}
@@ -156,11 +156,11 @@ if (dir == "forward") {
print ("AIC of an empty model is " + AIC_best + " and adding no
feature achieves more than " + (thr * 100) + "% decrease in AIC!");
if (intercept_status == 0) {
# Compute AIC of an empty model with no features and no
intercept (all Ys are zero)
- [AIC_best] = glm (X_global, Y, 0, num_features,
columns_fixed_ordered, fileB);
+ [AIC_best] = glm_fit (X_global, Y, 0, num_features,
columns_fixed_ordered, fileB);
} else {
# compute AIC of an empty model with only intercept
(all Ys are constant)
###all_ones = matrix (1, rows = num_records, cols = 1);
- [AIC_best] = glm (all_ones, Y, 0, num_features,
columns_fixed_ordered, fileB);
+ [AIC_best] = glm_fit (all_ones, Y, 0, num_features,
columns_fixed_ordered, fileB);
}
};
@@ -177,7 +177,7 @@ if (dir == "forward") {
# Construct the feature matrix
X = cbind (X_global, X_orig[,i]);
- [AIC_2] = glm (X, Y, intercept_status,
num_features, columns_fixed_ordered, " ");
+ [AIC_2] = glm_fit (X, Y, intercept_status,
num_features, columns_fixed_ordered, " ");
AICs[1,i] = AIC_2;
}
}
@@ -209,7 +209,7 @@ if (dir == "forward") {
# run GLM with selected set of features
print ("Running GLM with selected features...");
- [AIC] = glm (X_global, Y, intercept_status, num_features,
columns_fixed_ordered, fileB);
+ [AIC] = glm_fit (X_global, Y, intercept_status, num_features,
columns_fixed_ordered, fileB);
} else {
stop ("Currently only forward selection strategy is supported!");
@@ -218,7 +218,7 @@ if (dir == "forward") {
################### UDFS USED IN THIS SCRIPT ##################
-glm = function (Matrix[Double] X, Matrix[Double] Y, Int intercept_status,
Double num_features_orig, Matrix[Double] Selected, String fileB) return (Double
AIC) {
+glm_fit = function (Matrix[Double] X, Matrix[Double] Y, Int intercept_status,
Double num_features_orig, Matrix[Double] Selected, String fileB) return (Double
AIC) {
# distribution family code: 1 = Power, 2 = Bernoulli/Binomial;
currently only Bernouli distribution family is supported!
distribution_type = 2; # $dfam = 2;
diff --git a/scripts/builtin/glm.dml b/scripts/builtin/glm.dml
new file mode 100644
index 0000000..e2b5255
--- /dev/null
+++ b/scripts/builtin/glm.dml
@@ -0,0 +1,1118 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+#
+# THIS BUILTIN SOLVES GLM REGRESSION USING NEWTON/FISHER SCORING WITH TRUST
REGIONS
+#
+# INPUT PARAMETERS:
+#
---------------------------------------------------------------------------------------------
+# NAME TYPE DEFAULT MEANING
+#
---------------------------------------------------------------------------------------------
+# X Double --- matrix X of feature vectors
+# Y Double --- matrix Y with either 1 or 2 columns:
+# if dfam = 2, Y is 1-column Bernoulli or 2-column
Binomial (#pos, #neg)
+# dfam Int 1 Distribution family code: 1 = Power, 2 = Binomial
+# vpow Double 0.0 Power for Variance defined as (mean)^power (ignored if
dfam != 1):
+# 0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 =
Inverse Gaussian
+# link Int 0 Link function code: 0 = canonical (depends on
distribution),
+# 1 = Power, 2 = Logit, 3 = Probit, 4 = Cloglog, 5 =
Cauchit
+# lpow Double 1.0 Power for Link function defined as (mean)^power
(ignored if link != 1):
+# -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 =
sqrt, 1.0 = identity
+# yneg Double 0.0 Response value for Bernoulli "No" label, usually 0.0
or -1.0
+# icpt Int 0 Intercept presence, X columns shifting and rescaling:
+# 0 = no intercept, no shifting, no rescaling;
+# 1 = add intercept, but neither shift nor rescale X;
+# 2 = add intercept, shift & rescale X columns to mean =
0, variance = 1
+# reg Double 0.0 Regularization parameter (lambda) for L2 regularization
+# tol Double 0.000001 Tolerance (epsilon)
+# disp Double 0.0 (Over-)dispersion value, or 0.0 to estimate it from
data
+# moi Int 200 Maximum number of outer (Newton / Fisher Scoring)
iterations
+# mii Int 0 Maximum number of inner (Conjugate Gradient)
iterations, 0 = no maximum
+#
---------------------------------------------------------------------------------------------
+#
+# OTUPUT:
+#
---------------------------------------------------------------------------------------------
+# NAME TYPE MEANING
+#
---------------------------------------------------------------------------------------------
+# beta Double Matrix beta, whose size depends on icpt:
+# icpt=0: ncol(X) x 1; icpt=1: (ncol(X) + 1) x 1; icpt=2:
(ncol(X) + 1) x 2
+#----------------------------------------------------------------------------------------------
+# In addition, some GLM statistics are provided as console output by setting
verbose = TRUE, one comma-separated name-value
+# pair per each line, as follows:
+#
+# NAME MEANING
+#
-------------------------------------------------------------------------------------------
+# TERMINATION_CODE A positive integer indicating success/failure as
follows:
+# 1 = Converged successfully; 2 = Maximum number of
iterations reached;
+# 3 = Input (X, Y) out of range; 4 = Distribution/link
is not supported
+# BETA_MIN Smallest beta value (regression coefficient),
excluding the intercept
+# BETA_MIN_INDEX Column index for the smallest beta value
+# BETA_MAX Largest beta value (regression coefficient), excluding
the intercept
+# BETA_MAX_INDEX Column index for the largest beta value
+# INTERCEPT Intercept value, or NaN if there is no intercept (if
icpt=0)
+# DISPERSION Dispersion used to scale deviance, provided as "disp"
input parameter
+# or estimated (same as DISPERSION_EST) if the "disp"
parameter is <= 0
+# DISPERSION_EST Dispersion estimated from the dataset
+# DEVIANCE_UNSCALED Deviance from the saturated model, assuming dispersion
== 1.0
+# DEVIANCE_SCALED Deviance from the saturated model, scaled by the
DISPERSION value
+#
-------------------------------------------------------------------------------------------
+#
+# The Log file, when requested, contains the following per-iteration variables
in CSV format,
+# each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for
initial values:
+#
+# NAME MEANING
+#
-------------------------------------------------------------------------------------------
+# NUM_CG_ITERS Number of inner (Conj.Gradient) iterations in this
outer iteration
+# IS_TRUST_REACHED 1 = trust region boundary was reached, 0 = otherwise
+# POINT_STEP_NORM L2-norm of iteration step from old point (i.e. "beta")
to new point
+# OBJECTIVE The loss function we minimize (i.e. negative partial
log-likelihood)
+# OBJ_DROP_REAL Reduction in the objective during this iteration,
actual value
+# OBJ_DROP_PRED Reduction in the objective predicted by a quadratic
approximation
+# OBJ_DROP_RATIO Actual-to-predicted reduction ratio, used to update
the trust region
+# GRADIENT_NORM L2-norm of the loss function gradient (NOTE: sometimes
omitted)
+# LINEAR_TERM_MIN The minimum value of X %*% beta, used to check for
overflows
+# LINEAR_TERM_MAX The maximum value of X %*% beta, used to check for
overflows
+# IS_POINT_UPDATED 1 = new point accepted; 0 = new point rejected, old
point restored
+# TRUST_DELTA Updated trust region size, the "delta"
+#
-------------------------------------------------------------------------------------------
+#
+#
+# SOME OF THE SUPPORTED GLM DISTRIBUTION FAMILIES
+# AND LINK FUNCTIONS:
+# -----------------------------------------------
+# INPUT PARAMETERS: MEANING: Cano-
+# dfam vpow link lpow Distribution.link nical?
+# -----------------------------------------------
+# 1 0.0 1 -1.0 Gaussian.inverse
+# 1 0.0 1 0.0 Gaussian.log
+# 1 0.0 1 1.0 Gaussian.id Yes
+# 1 1.0 1 0.0 Poisson.log Yes
+# 1 1.0 1 0.5 Poisson.sqrt
+# 1 1.0 1 1.0 Poisson.id
+# 1 2.0 1 -1.0 Gamma.inverse Yes
+# 1 2.0 1 0.0 Gamma.log
+# 1 2.0 1 1.0 Gamma.id
+# 1 3.0 1 -2.0 InvGaussian.1/mu^2 Yes
+# 1 3.0 1 -1.0 InvGaussian.inverse
+# 1 3.0 1 0.0 InvGaussian.log
+# 1 3.0 1 1.0 InvGaussian.id
+# 1 * 1 * AnyVariance.AnyLink
+# -----------------------------------------------
+# 2 * 1 0.0 Binomial.log
+# 2 * 1 0.5 Binomial.sqrt
+# 2 * 2 * Binomial.logit Yes
+# 2 * 3 * Binomial.probit
+# 2 * 4 * Binomial.cloglog
+# 2 * 5 * Binomial.cauchit
+# -----------------------------------------------
+
+m_glm = function(Matrix[Double] X, Matrix[Double] Y, Integer dfam=1,
+ Double vpow=0.0, Integer link=0, Double lpow=1.0, Double yneg=0.0,
+ Integer icpt = 0, Double disp=0.0, Double reg=0.0, Double tol=0.000001,
+ Integer moi=200, Integer mii=0, Boolean verbose=TRUE)
+ return(Matrix[Double] betas)
+{
+ distribution_type = dfam;
+ variance_as_power_of_the_mean = vpow;
+ link_type = link;
+ link_as_power_of_the_mean = lpow;
+ bernoulli_No_label = yneg;
+ intercept_status = icpt;
+ dispersion = disp;
+ regularization = reg;
+ eps = tol;
+ max_iteration_IRLS = moi;
+ max_iteration_CG = mii;
+
+ # variance_as_power_of_the_mean = as.double (variance_as_power_of_the_mean);
+ # link_as_power_of_the_mean = as.double (link_as_power_of_the_mean);
+ # bernoulli_No_label = as.double (bernoulli_No_label);
+ # dispersion = as.double (dispersion);
+ # eps = as.double (eps);
+
+
+ # Default values for output statistics:
+ termination_code = 0;
+ min_beta = NaN;
+ i_min_beta = NaN;
+ max_beta = NaN;
+ i_max_beta = NaN;
+ intercept_value = NaN;
+ dispersion = NaN;
+ estimated_dispersion = NaN;
+ deviance_nodisp = NaN;
+ deviance = NaN;
+
+ print("BEGIN GLM SCRIPT");
+
+ num_records = nrow (X);
+ num_features = ncol (X);
+ zeros_r = matrix (0, rows = num_records, cols = 1);
+ ones_r = 1 + zeros_r;
+
+ # Introduce the intercept, shift and rescale the columns of X if needed
+
+ if (intercept_status == 1 | intercept_status == 2) # add the intercept
column
+ {
+ X = cbind (X, ones_r);
+ num_features = ncol (X);
+ }
+
+ scale_lambda = matrix (1, rows = num_features, cols = 1);
+ if (intercept_status == 1 | intercept_status == 2)
+ {
+ scale_lambda [num_features, 1] = 0;
+ }
+
+ if (intercept_status == 2) # scale-&-shift X columns to mean 0, variance 1
+ { # Important assumption: X [, num_features] =
ones_r
+ avg_X_cols = t(colSums(X)) / num_records;
+ var_X_cols = (t(colSums (X ^ 2)) - num_records * (avg_X_cols ^ 2)) /
(num_records - 1);
+ is_unsafe = (var_X_cols <= 0);
+ scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
+ scale_X [num_features, 1] = 1;
+ shift_X = - avg_X_cols * scale_X;
+ shift_X [num_features, 1] = 0;
+ rowSums_X_sq = (X ^ 2) %*% (scale_X ^ 2) + X %*% (2 * scale_X * shift_X) +
sum (shift_X ^ 2);
+ }
+ else {
+ scale_X = matrix (1, rows = num_features, cols = 1);
+ shift_X = matrix (0, rows = num_features, cols = 1);
+ rowSums_X_sq = rowSums (X ^ 2);
+ }
+
+ # Henceforth we replace "X" with "X %*% (SHIFT/SCALE TRANSFORM)" and
rowSums(X ^ 2)
+ # with "rowSums_X_sq" in order to preserve the sparsity of X under shift and
scale.
+ # The transform is then associatively applied to the other side of the
expression,
+ # and is rewritten via "scale_X" and "shift_X" as follows:
+ #
+ # ssX_A = (SHIFT/SCALE TRANSFORM) %*% A --- is rewritten as:
+ # ssX_A = diag (scale_X) %*% A;
+ # ssX_A [num_features, ] = ssX_A [num_features, ] + t(shift_X) %*% A;
+ #
+ # tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A --- is rewritten as:
+ # tssX_A = diag (scale_X) %*% A + shift_X %*% A [num_features, ];
+
+ # Initialize other input-dependent parameters
+
+ lambda = scale_lambda * regularization;
+ if (max_iteration_CG == 0) {
+ max_iteration_CG = num_features;
+ }
+
+ # In Bernoulli case, convert one-column "Y" into two-column
+
+ if (distribution_type == 2 & ncol(Y) == 1)
+ {
+ is_Y_negative = (Y == bernoulli_No_label);
+ Y = cbind (1 - is_Y_negative, is_Y_negative);
+ count_Y_negative = sum (is_Y_negative);
+ if (count_Y_negative == 0) {
+ stop ("GLM Input Error: all Y-values encode Bernoulli YES-label, none
encode NO-label");
+ }
+ if (count_Y_negative == nrow(Y)) {
+ stop ("GLM Input Error: all Y-values encode Bernoulli NO-label, none
encode YES-label");
+ }
+ }
+
+ # Set up the canonical link, if requested [Then we have: Var(mu) * (d link /
d mu) = const]
+
+ if (link_type == 0)
+ {
+ if (distribution_type == 1) {
+ link_type = 1;
+ link_as_power_of_the_mean = 1.0 - variance_as_power_of_the_mean;
+ }
+ else if (distribution_type == 2) {
+ link_type = 2;
+ }
+ }
+
+# For power distributions and/or links, we use two constants,
+# "variance as power of the mean" and "link_as_power_of_the_mean",
+# to specify the variance and the link as arbitrary powers of the
+# mean. However, the variance-powers of 1.0 (Poisson family) and
+# 2.0 (Gamma family) have to be treated as special cases, because
+# these values integrate into logarithms. The link-power of 0.0
+# is also special as it represents the logarithm link.
+
+ num_response_columns = ncol (Y);
+
+ is_supported = check_if_supported (num_response_columns, distribution_type,
variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+ if (is_supported == 1)
+ {
+
+ ##### INITIALIZE THE BETAS #####
+
+ [beta, saturated_log_l, isNaN] = glm_initialize (X, Y, distribution_type,
+ variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean,
+ intercept_status, max_iteration_CG);
+ if (isNaN == 0)
+ {
+
+ ##### START OF THE MAIN PART #####
+
+ sum_X_sq = sum (rowSums_X_sq);
+ trust_delta = 0.5 * sqrt (num_features) / max (sqrt (rowSums_X_sq));
+ ### max_trust_delta = trust_delta * 10000.0;
+ log_l = 0.0;
+ deviance_nodisp = 0.0;
+ new_deviance_nodisp = 0.0;
+ isNaN_log_l = 2;
+ newbeta = beta;
+ g = matrix (0.0, rows = num_features, cols = 1);
+ g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
+ accept_new_beta = 1;
+ reached_trust_boundary = 0;
+ neg_log_l_change_predicted = 0.0;
+ i_IRLS = 0;
+
+ print ("BEGIN IRLS ITERATIONS...");
+
+ ssX_newbeta = diag (scale_X) %*% newbeta;
+ ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X)
%*% newbeta;
+ all_linear_terms = X %*% ssX_newbeta;
+
+ [new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
+ (all_linear_terms, Y, distribution_type,
variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+
+ if (isNaN_new_log_l == 0) {
+ new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
+ new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
+ }
+
+ # set w to avoid 'Initialization of w depends on if-else/while
execution' warnings
+ w = matrix (0.0, rows=1, cols=1);
+ while (termination_code == 0)
+ {
+ accept_new_beta = 1;
+
+ if (i_IRLS > 0)
+ {
+ if (isNaN_log_l == 0) {
+ accept_new_beta = 0;
+ }
+
+ # Decide whether to accept a new iteration point and update the trust
region
+ # See Alg. 4.1 on p. 69 of "Numerical Optimization" 2nd ed. by Nocedal
and Wright
+
+ rho = (- new_log_l + log_l) / neg_log_l_change_predicted;
+ if (rho < 0.25 | isNaN_new_log_l == 1) {
+ trust_delta = 0.25 * trust_delta;
+ }
+ if (rho > 0.75 & isNaN_new_log_l == 0 & reached_trust_boundary == 1)
{
+ trust_delta = 2 * trust_delta;
+
+ ### if (trust_delta > max_trust_delta) {
+ ### trust_delta = max_trust_delta;
+ ### }
+
+ }
+ if (rho > 0.1 & isNaN_new_log_l == 0) {
+ accept_new_beta = 1;
+ }
+ }
+
+ if (accept_new_beta == 1)
+ {
+ beta = newbeta; log_l = new_log_l; deviance_nodisp =
new_deviance_nodisp; isNaN_log_l = isNaN_new_log_l;
+
+ [g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type,
variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+
+ # We introduced these variables to avoid roundoff errors:
+ # g_Y = y_residual / (y_var * link_grad);
+ # w = 1.0 / (y_var * link_grad * link_grad);
+
+ gXY = - t(X) %*% g_Y;
+ g = diag (scale_X) %*% gXY + shift_X %*% gXY [num_features, ];
+ g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
+ }
+
+ [z, neg_log_l_change_predicted, num_CG_iters, reached_trust_boundary]
=
+ get_CG_Steihaug_point (X, scale_X, shift_X, w, g, beta, lambda,
trust_delta, max_iteration_CG);
+
+ newbeta = beta + z;
+
+ ssX_newbeta = diag (scale_X) %*% newbeta;
+ ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] +
t(shift_X) %*% newbeta;
+ all_linear_terms = X %*% ssX_newbeta;
+
+ [new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
+ (all_linear_terms, Y, distribution_type,
variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+
+ if (isNaN_new_log_l == 0) {
+ new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
+ new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
+ }
+
+ log_l_change = new_log_l - log_l; # R's criterion for
termination: |dev - devold|/(|dev| + 0.1) < eps
+
+ if (reached_trust_boundary == 0 & isNaN_new_log_l == 0 &
+ (2.0 * abs (log_l_change) < eps * (deviance_nodisp + 0.1) | abs
(log_l_change) < (abs (log_l) + abs (new_log_l)) * 0.00000000000001) )
+ {
+ termination_code = 1;
+ }
+ rho = - log_l_change / neg_log_l_change_predicted;
+ z_norm = sqrt (sum (z * z));
+
+ [z_norm_m, z_norm_e] = round_to_print (z_norm);
+ [trust_delta_m, trust_delta_e] = round_to_print (trust_delta);
+ [rho_m, rho_e] = round_to_print (rho);
+ [new_log_l_m, new_log_l_e] = round_to_print (new_log_l);
+ [log_l_change_m, log_l_change_e] = round_to_print (log_l_change);
+ [g_norm_m, g_norm_e] = round_to_print (g_norm);
+
+ i_IRLS = i_IRLS + 1;
+ if(verbose)
+ print ("Iter #" + i_IRLS + " completed"
+ + ", ||z|| = " + z_norm_m + "E" + z_norm_e
+ + ", trust_delta = " + trust_delta_m + "E" + trust_delta_e
+ + ", reached = " + reached_trust_boundary
+ + ", ||g|| = " + g_norm_m + "E" + g_norm_e
+ + ", new_log_l = " + new_log_l_m + "E" + new_log_l_e
+ + ", log_l_change = " + log_l_change_m + "E" + log_l_change_e
+ + ", rho = " + rho_m + "E" + rho_e);
+
+ if (i_IRLS == max_iteration_IRLS) {
+ termination_code = 2;
+ }
+ }
+
+ beta = newbeta;
+ log_l = new_log_l;
+ deviance_nodisp = new_deviance_nodisp;
+
+ if (termination_code == 1) {
+ print ("Converged in " + i_IRLS + " steps.");
+ }
+ else {
+ print ("Did not converge.");
+ }
+
+ ssX_beta = diag (scale_X) %*% beta;
+ ssX_beta [num_features, ] = ssX_beta [num_features, ] + t(shift_X) %*%
beta;
+ if (intercept_status == 2) {
+ beta_out = cbind (ssX_beta, beta);
+ } else {
+ beta_out = ssX_beta;
+ }
+
+ betas = beta_out
+
+ if (intercept_status == 1 | intercept_status == 2) {
+ intercept_value = as.scalar (beta_out [num_features, 1]);
+ beta_noicept = beta_out [1 : (num_features - 1), 1];
+ } else {
+ beta_noicept = beta_out [1 : num_features, 1];
+ }
+ min_beta = min (beta_noicept);
+ max_beta = max (beta_noicept);
+ tmp_i_min_beta = rowIndexMin (t(beta_noicept))
+ i_min_beta = as.scalar (tmp_i_min_beta [1, 1]);
+ tmp_i_max_beta = rowIndexMax (t(beta_noicept))
+ i_max_beta = as.scalar (tmp_i_max_beta [1, 1]);
+
+ ##### OVER-DISPERSION PART #####
+
+ all_linear_terms = X %*% ssX_beta;
+ [g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type,
variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+
+ pearson_residual_sq = g_Y ^ 2 / w;
+ pearson_residual_sq = replace (target = pearson_residual_sq, pattern =
NaN, replacement = 0);
+ # pearson_residual_sq = (y_residual ^ 2) / y_var;
+
+ if (num_records > num_features) {
+ estimated_dispersion = sum (pearson_residual_sq) / (num_records -
num_features);
+ }
+ if (dispersion <= 0) {
+ dispersion = estimated_dispersion;
+ }
+ deviance = deviance_nodisp / dispersion;
+
+ ##### END OF THE MAIN PART #####
+
+ } else { print ("Input matrices are out of range. Terminating the DML.");
termination_code = 3; }
+ } else { print ("Distribution/Link not supported. Terminating the DML.");
termination_code = 4; }
+
+ str = "TERMINATION_CODE," + termination_code;
+ str = append (str, "BETA_MIN," + min_beta);
+ str = append (str, "BETA_MIN_INDEX," + i_min_beta);
+ str = append (str, "BETA_MAX," + max_beta);
+ str = append (str, "BETA_MAX_INDEX," + i_max_beta);
+ str = append (str, "INTERCEPT," + intercept_value);
+ str = append (str, "DISPERSION," + dispersion);
+ str = append (str, "DISPERSION_EST," + estimated_dispersion);
+ str = append (str, "DEVIANCE_UNSCALED," + deviance_nodisp);
+ str = append (str, "DEVIANCE_SCALED," + deviance);
+
+ if (verbose) {
+ print (str);
+ }
+}
+
+check_if_supported = function (int ncol_y, int dist_type, double var_power,
int link_type, double link_power)
+return (int is_supported)
+{
+ is_supported = 0;
+ if (ncol_y == 1 & dist_type == 1 & link_type == 1)
+ { # POWER DISTRIBUTION
+ is_supported = 1;
+ if (var_power == 0 & link_power == -1.0) print
("Gaussian.inverse");
+ else if (var_power == 0 & link_power == 0) print ("Gaussian.log");
+ else if (var_power == 0 & link_power == 0.5) print ("Gaussian.sqrt");
+ else if (var_power == 0 & link_power == 1.0) print ("Gaussian.id");
+ else if (var_power == 0 ) print
("Gaussian.power_nonlog");
+ else if (var_power == 1.0 & link_power == -1.0) print ("Poisson.inverse");
+ else if (var_power == 1.0 & link_power == 0) print ("Poisson.log");
+ else if (var_power == 1.0 & link_power == 0.5) print ("Poisson.sqrt");
+ else if (var_power == 1.0 & link_power == 1.0) print ("Poisson.id");
+ else if (var_power == 1.0 ) print
("Poisson.power_nonlog");
+ else if (var_power == 2.0 & link_power == -1.0) print ("Gamma.inverse");
+ else if (var_power == 2.0 & link_power == 0) print ("Gamma.log");
+ else if (var_power == 2.0 & link_power == 0.5) print ("Gamma.sqrt");
+ else if (var_power == 2.0 & link_power == 1.0) print ("Gamma.id");
+ else if (var_power == 2.0 ) print
("Gamma.power_nonlog");
+ else if (var_power == 3.0 & link_power == -2.0) print
("InvGaussian.1/mu^2");
+ else if (var_power == 3.0 & link_power == -1.0) print
("InvGaussian.inverse");
+ else if (var_power == 3.0 & link_power == 0) print ("InvGaussian.log");
+ else if (var_power == 3.0 & link_power == 0.5) print
("InvGaussian.sqrt");
+ else if (var_power == 3.0 & link_power == 1.0) print ("InvGaussian.id");
+ else if (var_power == 3.0 ) print
("InvGaussian.power_nonlog");
+ else if ( link_power == 0) print ("PowerDist.log");
+ else print
("PowerDist.power_nonlog");
+ }
+ if (ncol_y == 1 & dist_type == 2)
+ {
+ print ("Error: Bernoulli response matrix has not been converted into
two-column format.");
+ }
+ if (ncol_y == 2 & dist_type == 2 & link_type >= 1 & link_type <= 5)
+ { # BINOMIAL/BERNOULLI DISTRIBUTION
+ is_supported = 1;
+ if (link_type == 1 & link_power == -1.0) print ("Binomial.inverse");
+ else if (link_type == 1 & link_power == 0) print ("Binomial.log");
+ else if (link_type == 1 & link_power == 0.5) print ("Binomial.sqrt");
+ else if (link_type == 1 & link_power == 1.0) print ("Binomial.id");
+ else if (link_type == 1) print
("Binomial.power_nonlog");
+ else if (link_type == 2) print ("Binomial.logit");
+ else if (link_type == 3) print ("Binomial.probit");
+ else if (link_type == 4) print ("Binomial.cloglog");
+ else if (link_type == 5) print ("Binomial.cauchit");
+ }
+ if (is_supported == 0) {
+ print ("Response matrix with " + ncol_y + " columns, distribution family
(" + dist_type + ", " + var_power
+ + ") and link family (" + link_type + ", " + link_power + ") are
NOT supported together.");
+ }
+}
+
+glm_initialize = function (Matrix[double] X, Matrix[double] Y, int dist_type,
double var_power, int link_type, double link_power, int icept_status, int
max_iter_CG)
+return (Matrix[double] beta, double saturated_log_l, int isNaN)
+{
+ saturated_log_l = 0.0;
+ isNaN = 0;
+ y_corr = Y [, 1];
+ if (dist_type == 2) {
+ n_corr = rowSums (Y);
+ is_n_zero = (n_corr == 0);
+ y_corr = Y [, 1] / (n_corr + is_n_zero) + (0.5 - Y [, 1]) * is_n_zero;
+ }
+ linear_terms = y_corr;
+ if (dist_type == 1 & link_type == 1)
+ { # POWER DISTRIBUTION
+ if (link_power == 0) {
+ if (sum (y_corr < 0) == 0) {
+ is_zero_y_corr = (y_corr == 0);
+ linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 -
is_zero_y_corr);
+ } else { isNaN = 1; }
+ } else if (link_power == 1.0) {
+ linear_terms = y_corr;
+ } else if (link_power == -1.0) {
+ linear_terms = 1.0 / y_corr;
+ } else if (link_power == 0.5) {
+ if (sum (y_corr < 0) == 0) {
+ linear_terms = sqrt (y_corr);
+ } else { isNaN = 1; }
+ } else if (link_power > 0) {
+ if (sum (y_corr < 0) == 0) {
+ is_zero_y_corr = (y_corr == 0);
+ linear_terms = (y_corr + is_zero_y_corr) ^ link_power -
is_zero_y_corr;
+ } else { isNaN = 1; }
+ } else {
+ if (sum (y_corr <= 0) == 0) {
+ linear_terms = y_corr ^ link_power;
+ } else { isNaN = 1; }
+ }
+ }
+ if (dist_type == 2 & link_type >= 1 & link_type <= 5)
+ { # BINOMIAL/BERNOULLI DISTRIBUTION
+ if (link_type == 1 & link_power == 0) { # Binomial.log
+ if (sum (y_corr < 0) == 0) {
+ is_zero_y_corr = (y_corr == 0);
+ linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 -
is_zero_y_corr);
+ } else { isNaN = 1; }
+ }
+ else if (link_type == 1 & link_power > 0) { # Binomial.power_nonlog pos
+ if (sum (y_corr < 0) == 0) {
+ is_zero_y_corr = (y_corr == 0);
+ linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
+ } else { isNaN = 1; }
+ }
+ else if (link_type == 1) { # Binomial.power_nonlog neg
+ if (sum (y_corr <= 0) == 0) {
+ linear_terms = y_corr ^ link_power;
+ } else { isNaN = 1; }
+ }
+ else
+ {
+ is_zero_y_corr = (y_corr <= 0);
+ is_one_y_corr = (y_corr >= 1.0);
+ y_corr = y_corr * (1.0 - is_zero_y_corr) * (1.0 - is_one_y_corr) + 0.5 *
(is_zero_y_corr + is_one_y_corr);
+ if (link_type == 2) { # Binomial.logit
+ linear_terms = log (y_corr / (1.0 - y_corr))
+ + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 -
is_zero_y_corr);
+ }
+ else if (link_type == 3) { # Binomial.probit
+ y_below_half = y_corr + (1.0 - 2.0 * y_corr) * (y_corr > 0.5);
+ t = sqrt (- 2.0 * log (y_below_half));
+ approx_inv_Gauss_CDF = - t + (2.515517 + t * (0.802853 + t *
0.010328)) / (1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308)));
+ linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * (y_corr > 0.5))
+ + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 -
is_zero_y_corr);
+ } else if (link_type == 4) { # Binomial.cloglog
+ linear_terms = log (- log (1.0 - y_corr))
+ - log (- log (0.5)) * (is_zero_y_corr + is_one_y_corr)
+ + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 -
is_zero_y_corr);
+ }
+ else if (link_type == 5) { # Binomial.cauchit
+ linear_terms = tan ((y_corr - 0.5) * pi)
+ + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 -
is_zero_y_corr);
+ }
+ }
+ }
+
+ if (isNaN == 0) {
+ [saturated_log_l, isNaN] =
+ glm_log_likelihood_part (linear_terms, Y, dist_type, var_power,
link_type, link_power);
+ }
+
+ if ((dist_type == 1 & link_type == 1 & link_power == 0) |
+ (dist_type == 2 & link_type >= 2))
+ {
+ desired_eta = 0.0;
+ }
+ else if (link_type == 1 & link_power == 0) {
+ desired_eta = log (0.5);
+ } else if (link_type == 1) {
+ desired_eta = 0.5 ^ link_power;
+ } else {
+ desired_eta = 0.5;
+ }
+
+ beta = matrix (0.0, rows = ncol(X), cols = 1);
+
+ if (desired_eta != 0)
+ {
+ if (icept_status == 1 | icept_status == 2) {
+ beta [nrow(beta), 1] = desired_eta;
+ } else {
+ # We want: avg (X %*% ssX_transform %*% beta) = desired_eta
+ # Note that "ssX_transform" is trivial here, hence ignored
+ beta = straightenX (X, 0.000001, max_iter_CG);
+ beta = beta * desired_eta;
+ }
+ }
+}
+
+
+glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
+ int dist_type, double var_power, int link_type, double
link_power)
+return (Matrix[double] g_Y, Matrix[double] w)
+{
+ # ORIGINALLY we returned more meaningful vectors, namely:
+ # Matrix[double] y_residual : y - y_mean, i.e. y observed - y predicted
+ # Matrix[double] link_gradient : derivative of the link function
+ # Matrix[double] var_function : variance without dispersion, i.e. the V(mu)
function
+ # BUT, this caused roundoff errors, so we had to compute "directly useful"
vectors
+ # and skip over the "meaningful intermediaries". Now we output these two
variables:
+ # g_Y = y_residual / (var_function * link_gradient);
+ # w = 1.0 / (var_function * link_gradient ^ 2);
+ zeros_r = matrix(0, nrow(linear_terms), 1);
+ ones_r = matrix(1, nrow(linear_terms), 1);
+ g_Y = zeros_r;
+ w = zeros_r;
+
+ # Some constants
+
+ one_over_sqrt_two_pi = 0.39894228040143267793994605993438;
+ p_one_m_one = matrix("1 -1", 1, 2);
+ m_one_p_one = matrix("-1 1", 1, 2);
+ flip_pos = matrix("0 1 1 0", 2, 2);
+ flip_neg = matrix("0 -1 1 0", 2, 2);
+
+ if (dist_type == 1 & link_type == 1)
+ { # POWER DISTRIBUTION
+ y_mean = zeros_r;
+ if (link_power == 0) {
+ y_mean = exp (linear_terms);
+ y_mean_pow = y_mean ^ (1 - var_power);
+ w = y_mean_pow * y_mean;
+ g_Y = y_mean_pow * (Y - y_mean);
+ } else if (link_power == 1.0) {
+ y_mean = linear_terms;
+ w = y_mean ^ (- var_power);
+ g_Y = w * (Y - y_mean);
+ } else {
+ y_mean = linear_terms ^ (1.0 / link_power);
+ c1 = (1 - var_power) / link_power - 1;
+ c2 = (2 - var_power) / link_power - 2;
+ g_Y = (linear_terms ^ c1) * (Y - y_mean) / link_power;
+ w = (linear_terms ^ c2) / (link_power ^ 2);
+ }
+ }
+ if (dist_type == 2 & link_type >= 1 & link_type <= 5)
+ { # BINOMIAL/BERNOULLI DISTRIBUTION
+ if (link_type == 1) { # BINOMIAL.POWER LINKS
+ if (link_power == 0) { # Binomial.log
+ vec1 = 1 / (exp (- linear_terms) - 1);
+ g_Y = Y [, 1] - Y [, 2] * vec1;
+ w = rowSums (Y) * vec1;
+ }
+ else { # Binomial.nonlog
+ vec1 = zeros_r;
+ if (link_power == 0.5) {
+ vec1 = 1 / (1 - linear_terms ^ 2);
+ } else if (sum (linear_terms < 0) == 0) {
+ vec1 = linear_terms ^ (- 2 + 1 / link_power) / (1 - linear_terms ^
(1 / link_power));
+ } else {isNaN = 1;}
+ # We want a "zero-protected" version of
+ # vec2 = Y [, 1] / linear_terms;
+ is_y_0 = (Y [, 1] == 0);
+ vec2 = (Y [, 1] + is_y_0) / (linear_terms * (1 - is_y_0) + is_y_0) -
is_y_0;
+ g_Y = (vec2 - Y [, 2] * vec1 * linear_terms) / link_power;
+ w = rowSums (Y) * vec1 / link_power ^ 2;
+ }
+ }
+ else {
+ is_LT_infinite = cbind(linear_terms==Inf, linear_terms==-Inf);
+ finite_linear_terms = replace (target = linear_terms, pattern =
Inf, replacement = 0);
+ finite_linear_terms = replace (target = finite_linear_terms, pattern =
-Inf, replacement = 0);
+ if (link_type == 2) { # Binomial.logit
+ Y_prob = cbind(exp(finite_linear_terms), ones_r);
+ Y_prob = Y_prob / rowSums (Y_prob);
+ Y_prob = Y_prob * (1.0 - rowSums (is_LT_infinite)) + is_LT_infinite;
+ g_Y = rowSums (Y * (Y_prob %*% flip_neg)); ### = y_residual;
+ w = rowSums (Y * (Y_prob %*% flip_pos) * Y_prob); ### = y_variance;
+ }
+ else if (link_type == 3) { # Binomial.probit
+ is_lt_pos = (linear_terms >= 0);
+ t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888); #
0.231641888 = 0.3275911 / sqrt (2.0)
+ pt_gp = t_gp * ( 0.254829592
+ + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed.
by M. Abramowitz and I.A. Stegun,
+ + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print
(Dec 1972), Sec. 7.1.26, p. 299
+ + t_gp * (-1.453152027
+ + t_gp * 1.061405429))));
+ the_gauss_exp = exp (- (linear_terms ^ 2) / 2.0);
+ vec1 = 0.25 * pt_gp * (2 - the_gauss_exp * pt_gp);
+ vec2 = Y [, 1] - rowSums (Y) * is_lt_pos + the_gauss_exp * pt_gp *
rowSums (Y) * (is_lt_pos - 0.5);
+ w = the_gauss_exp * (one_over_sqrt_two_pi ^ 2) * rowSums (Y) / vec1;
+ g_Y = one_over_sqrt_two_pi * vec2 / vec1;
+ }
+ else if (link_type == 4) { # Binomial.cloglog
+ the_exp = exp (linear_terms)
+ the_exp_exp = exp (- the_exp);
+ is_too_small = ((10000000 + the_exp) == 10000000);
+ the_exp_ratio = (1 - is_too_small) * (1 - the_exp_exp) / (the_exp +
is_too_small) + is_too_small * (1 - the_exp / 2);
+ g_Y = (rowSums (Y) * the_exp_exp - Y [, 2]) / the_exp_ratio;
+ w = the_exp_exp * the_exp * rowSums (Y) / the_exp_ratio;
+ } else if (link_type == 5) { # Binomial.cauchit
+ Y_prob = 0.5 + (atan (finite_linear_terms) %*% p_one_m_one) / pi;
+ Y_prob = Y_prob * (1.0 - rowSums (is_LT_infinite)) + is_LT_infinite;
+ y_residual = Y [, 1] * Y_prob [, 2] - Y [, 2] * Y_prob [, 1];
+ var_function = rowSums (Y) * Y_prob [, 1] * Y_prob [, 2];
+ link_gradient_normalized = (1 + linear_terms ^ 2) * pi;
+ g_Y = rowSums (Y) * y_residual / (var_function *
link_gradient_normalized);
+ w = (rowSums (Y) ^ 2) / (var_function * link_gradient_normalized ^
2);
+ }
+ }
+ }
+}
+
+
+glm_log_likelihood_part = function (Matrix[double] linear_terms,
Matrix[double] Y,
+ int dist_type, double var_power, int link_type, double link_power)
+return (double log_l, int isNaN)
+{
+ isNaN = 0;
+ log_l = 0.0;
+ num_records = nrow (Y);
+ zeros_r = matrix (0.0, rows = num_records, cols = 1);
+
+ if (dist_type == 1 & link_type == 1)
+ { # POWER DISTRIBUTION
+ b_cumulant = zeros_r;
+ natural_parameters = zeros_r;
+ is_natural_parameter_log_zero = zeros_r;
+ if(var_power == 1.0 & link_power == 0) { # Poisson.log
+ b_cumulant = exp (linear_terms);
+ is_natural_parameter_log_zero = (linear_terms == -Inf);
+ natural_parameters = replace (target = linear_terms, pattern = -Inf,
replacement = 0);
+ } else if (var_power == 1.0 & link_power == 1.0) { # Poisson.id
+ if (sum (linear_terms < 0) == 0) {
+ b_cumulant = linear_terms;
+ is_natural_parameter_log_zero = (linear_terms == 0);
+ natural_parameters = log (linear_terms +
is_natural_parameter_log_zero);
+ } else {isNaN = 1;}
+ } else if (var_power == 1.0 & link_power == 0.5) { # Poisson.sqrt
+ if (sum (linear_terms < 0) == 0) {
+ b_cumulant = linear_terms ^ 2;
+ is_natural_parameter_log_zero = (linear_terms == 0);
+ natural_parameters = 2.0 * log (linear_terms +
is_natural_parameter_log_zero);
+ } else {isNaN = 1;}
+ } else if (var_power == 1.0 & link_power > 0) { # Poisson.power_nonlog,
pos
+ if (sum (linear_terms < 0) == 0) {
+ is_natural_parameter_log_zero = (linear_terms == 0);
+ b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 /
link_power) - is_natural_parameter_log_zero;
+ natural_parameters = log (linear_terms +
is_natural_parameter_log_zero) / link_power;
+ } else {isNaN = 1;}
+ } else if (var_power == 1.0) { #
Poisson.power_nonlog, neg
+ if (sum (linear_terms <= 0) == 0) {
+ b_cumulant = linear_terms ^ (1.0 / link_power);
+ natural_parameters = log (linear_terms) / link_power;
+ } else {isNaN = 1;}
+ } else if (var_power == 2.0 & link_power == -1.0) { # Gamma.inverse
+ if (sum (linear_terms <= 0) == 0) {
+ b_cumulant = - log (linear_terms);
+ natural_parameters = - linear_terms;
+ } else {isNaN = 1;}
+ } else if (var_power == 2.0 & link_power == 1.0) { # Gamma.id
+ if (sum (linear_terms <= 0) == 0) {
+ b_cumulant = log (linear_terms);
+ natural_parameters = - 1.0 / linear_terms;
+ } else {isNaN = 1;}
+ } else if (var_power == 2.0 & link_power == 0) { # Gamma.log
+ b_cumulant = linear_terms;
+ natural_parameters = - exp (- linear_terms);
+ } else if (var_power == 2.0) { # Gamma.power_nonlog
+ if (sum (linear_terms <= 0) == 0) {
+ b_cumulant = log (linear_terms) / link_power;
+ natural_parameters = - linear_terms ^ (- 1.0 / link_power);
+ } else {isNaN = 1;}
+ } else if (link_power == 0) { # PowerDist.log
+ natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 -
var_power);
+ b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);
+ }
+ else { #
PowerDist.power_nonlog
+ if (-2 * link_power == 1.0 - var_power) {
+ natural_parameters = 1.0 / (linear_terms ^ 2) / (1.0 - var_power);
+ }
+ else if (-1 * link_power == 1.0 - var_power) {
+ natural_parameters = 1.0 / linear_terms / (1.0 - var_power);
+ } else if ( link_power == 1.0 - var_power) {
+ natural_parameters = linear_terms / (1.0 - var_power);
+ } else if ( 2 * link_power == 1.0 - var_power) {
+ natural_parameters = linear_terms ^ 2 / (1.0 - var_power);
+ } else if (sum (linear_terms <= 0) == 0) {
+ power = (1.0 - var_power) / link_power;
+ natural_parameters = (linear_terms ^ power) / (1.0 - var_power);
+ } else {
+ isNaN = 1;
+ }
+
+ if (-2 * link_power == 2.0 - var_power) {
+ b_cumulant = 1.0 / (linear_terms ^ 2) / (2.0 - var_power);
+ } else if (-1 * link_power == 2.0 - var_power) {
+ b_cumulant = 1.0 / linear_terms / (2.0 - var_power);
+ } else if ( link_power == 2.0 - var_power) {
+ b_cumulant = linear_terms / (2.0 - var_power);
+ } else if ( 2 * link_power == 2.0 - var_power) {
+ b_cumulant = linear_terms ^ 2 / (2.0 - var_power);
+ } else if (sum (linear_terms <= 0) == 0) {
+ power = (2.0 - var_power) / link_power;
+ b_cumulant = (linear_terms ^ power) / (2.0 - var_power);
+ } else {
+ isNaN = 1;
+ }
+ }
+ if (sum (is_natural_parameter_log_zero * abs (Y)) > 0) {
+ log_l = -Inf;
+ isNaN = 1;
+ }
+ if (isNaN == 0)
+ {
+ log_l = sum (Y * natural_parameters - b_cumulant);
+ if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {
+ log_l = -Inf;
+ isNaN = 1;
+ }
+ }
+ }
+
+ if (dist_type == 2 & link_type >= 1 & link_type <= 5)
+ { # BINOMIAL/BERNOULLI DISTRIBUTION
+ [Y_prob, isNaN] = binomial_probability_two_column (linear_terms,
link_type, link_power);
+
+ if (isNaN == 0) {
+ if( sum((Y_prob<=0) * abs(Y)) == 0 ) {
+ log_l = sum (Y * log (Y_prob * (1 - (Y_prob<=0)) + (Y_prob<=0)));
+ if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {
isNaN = 1; }
+ }
+ else {
+ log_l = -Inf;
+ isNaN = 1;
+ }
+ }
+ }
+
+ if (isNaN == 1) {
+ log_l = - Inf;
+ }
+}
+
+
+binomial_probability_two_column =
+ function (Matrix[double] linear_terms, int link_type, double link_power)
+ return (Matrix[double] Y_prob, int isNaN)
+{
+ isNaN = 0;
+
+ # Define some auxiliary matrices
+
+ ones_2 = matrix (1.0, rows = 1, cols = 2);
+ zeros_r = matrix(0, nrow(linear_terms), 1);
+ ones_r = matrix(1, nrow(linear_terms), 1);
+
+ # Begin the function body
+
+ Y_prob = zeros_r %*% ones_2;
+ if (link_type == 1) { # Binomial.power
+ if(link_power == 0) { # Binomial.log
+ Y_prob = cbind(exp(linear_terms), 1-exp(linear_terms));
+ } else if (link_power == 0.5) { # Binomial.sqrt
+ Y_prob = cbind(linear_terms^2, 1-linear_terms^2);
+ } else if (sum (linear_terms < 0) == 0) { # Binomial.power_nonlog
+ Y_prob = cbind(linear_terms^(1.0/link_power),
1-linear_terms^(1.0/link_power));
+ } else {
+ isNaN = 1;
+ }
+ } else { # Binomial.non_power
+ is_LT_infinite = cbind(linear_terms==Inf, linear_terms==-Inf);
+ finite_linear_terms = replace (target = linear_terms, pattern =
Inf, replacement = 0);
+ finite_linear_terms = replace (target = finite_linear_terms, pattern =
-Inf, replacement = 0);
+ if (link_type == 2) { # Binomial.logit
+ Y_prob = cbind(exp(finite_linear_terms), ones_r);
+ Y_prob = Y_prob / rowSums (Y_prob);
+ } else if (link_type == 3) { # Binomial.probit
+ lt_pos_neg = cbind(finite_linear_terms>=0, finite_linear_terms<0);
+ t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888); #
0.231641888 = 0.3275911 / sqrt (2.0)
+ pt_gp = t_gp * ( 0.254829592
+ + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed.
by M. Abramowitz and I.A. Stegun,
+ + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th
print (Dec 1972), Sec. 7.1.26, p. 299
+ + t_gp * (-1.453152027
+ + t_gp * 1.061405429))));
+ the_gauss_exp = exp (- (finite_linear_terms ^ 2) / 2.0);
+ Y_prob = lt_pos_neg + (0.5 - lt_pos_neg) * the_gauss_exp * pt_gp;
+ } else if (link_type == 4) { # Binomial.cloglog
+ the_exp = exp (finite_linear_terms);
+ the_exp_exp = exp (- the_exp);
+ is_too_small = ((10000000 + the_exp) == 10000000);
+ Y_prob [, 1] = (1 - is_too_small) * (1 - the_exp_exp) + is_too_small *
the_exp * (1 - the_exp / 2);
+ Y_prob [, 2] = the_exp_exp;
+ } else if (link_type == 5) { # Binomial.cauchit
+ Y_prob = 0.5 + cbind(atan(finite_linear_terms),
-atan(finite_linear_terms)) / pi;
+ } else {
+ isNaN = 1;
+ }
+ Y_prob = Y_prob * (1.0 - rowSums (is_LT_infinite)) + is_LT_infinite;
+ }
+}
+
+
+# THE CG-STEIHAUG PROCEDURE SCRIPT
+
+# Apply Conjugate Gradient - Steihaug algorithm in order to approximately
minimize
+# 0.5 z^T (X^T diag(w) X + diag (lambda)) z + (g + lambda * beta)^T z
+# under constraint: ||z|| <= trust_delta.
+# See Alg. 7.2 on p. 171 of "Numerical Optimization" 2nd ed. by Nocedal and
Wright
+# IN THE ABOVE, "X" IS UNDERSTOOD TO BE "X %*% (SHIFT/SCALE TRANSFORM)"; this
transform
+# is given separately because sparse "X" may become dense after applying the
transform.
+#
+get_CG_Steihaug_point =
+ function (Matrix[double] X, Matrix[double] scale_X, Matrix[double]
shift_X, Matrix[double] w,
+ Matrix[double] g, Matrix[double] beta, Matrix[double] lambda, double
trust_delta, int max_iter_CG)
+return (Matrix[double] z, double neg_log_l_change, int i_CG, int
reached_trust_boundary)
+{
+ trust_delta_sq = trust_delta ^ 2;
+ size_CG = nrow (g);
+ z = matrix (0.0, rows = size_CG, cols = 1);
+ neg_log_l_change = 0.0;
+ reached_trust_boundary = 0;
+ g_reg = g + lambda * beta;
+ r_CG = g_reg;
+ p_CG = -r_CG;
+ rr_CG = sum(r_CG * r_CG);
+ eps_CG = rr_CG * min (0.25, sqrt (rr_CG));
+ converged_CG = 0;
+ if (rr_CG < eps_CG) {
+ converged_CG = 1;
+ }
+
+ max_iteration_CG = max_iter_CG;
+ if (max_iteration_CG <= 0) {
+ max_iteration_CG = size_CG;
+ }
+ i_CG = 0;
+ while (converged_CG == 0)
+ {
+ i_CG = i_CG + 1;
+ ssX_p_CG = diag (scale_X) %*% p_CG;
+ ssX_p_CG [size_CG, ] = ssX_p_CG [size_CG, ] + t(shift_X) %*% p_CG;
+ temp_CG = t(X) %*% (w * (X %*% ssX_p_CG));
+ q_CG = (lambda * p_CG) + diag (scale_X) %*% temp_CG + shift_X %*% temp_CG
[size_CG, ];
+ pq_CG = sum (p_CG * q_CG);
+ if (pq_CG <= 0) {
+ pp_CG = sum (p_CG * p_CG);
+ if (pp_CG > 0) {
+ [z, neg_log_l_change] =
+ get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG,
trust_delta_sq);
+ reached_trust_boundary = 1;
+ } else {
+ neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));
+ }
+ converged_CG = 1;
+ }
+ if (converged_CG == 0) {
+ alpha_CG = rr_CG / pq_CG;
+ new_z = z + alpha_CG * p_CG;
+ if (sum(new_z * new_z) >= trust_delta_sq) {
+ pp_CG = sum (p_CG * p_CG);
+ [z, neg_log_l_change] =
+ get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG,
trust_delta_sq);
+ reached_trust_boundary = 1;
+ converged_CG = 1;
+ }
+ if (converged_CG == 0) {
+ z = new_z;
+ old_rr_CG = rr_CG;
+ r_CG = r_CG + alpha_CG * q_CG;
+ rr_CG = sum(r_CG * r_CG);
+ if (i_CG == max_iteration_CG | rr_CG < eps_CG) {
+ neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));
+ reached_trust_boundary = 0;
+ converged_CG = 1;
+ }
+ if (converged_CG == 0) {
+ p_CG = -r_CG + (rr_CG / old_rr_CG) * p_CG;
+ }
+ }
+ }
+ }
+}
+
+
+# An auxiliary function used twice inside the CG-STEIHAUG loop:
+get_trust_boundary_point =
+ function (Matrix[double] g, Matrix[double] z, Matrix[double] p,
+ Matrix[double] q, Matrix[double] r, double pp, double pq,
+ double trust_delta_sq)
+return (Matrix[double] new_z, double f_change)
+{
+ zz = sum (z * z); pz = sum (p * z);
+ sq_root_d = sqrt (pz * pz - pp * (zz - trust_delta_sq));
+ tau_1 = (- pz + sq_root_d) / pp;
+ tau_2 = (- pz - sq_root_d) / pp;
+ zq = sum (z * q); gp = sum (g * p);
+ f_extra = 0.5 * sum (z * (r + g));
+ f_change_1 = f_extra + (0.5 * tau_1 * pq + zq + gp) * tau_1;
+ f_change_2 = f_extra + (0.5 * tau_2 * pq + zq + gp) * tau_2;
+ new_z = z + ifelse(f_change_1<f_change_2, tau_1, tau_2) * p;
+ f_change = min(f_change_1, f_change_2);
+}
+
+
+# Computes vector w such that ||X %*% w - 1|| -> MIN given avg(X %*% w) = 1
+# We find z_LS such that ||X %*% z_LS - 1|| -> MIN unconditionally, then scale
+# it to compute w = c * z_LS such that sum(X %*% w) = nrow(X).
+straightenX = function (Matrix[double] X, double eps, int max_iter_CG)
+return (Matrix[double] w)
+{
+ w_X = t(colSums(X));
+ lambda_LS = 0.000001 * sum(X ^ 2) / ncol(X);
+ eps_LS = eps * nrow(X);
+
+ # BEGIN LEAST SQUARES
+
+ r_LS = - w_X;
+ z_LS = matrix (0.0, rows = ncol(X), cols = 1);
+ p_LS = - r_LS;
+ norm_r2_LS = sum (r_LS ^ 2);
+ i_LS = 0;
+ while (i_LS < max_iter_CG & i_LS < ncol(X) & norm_r2_LS >= eps_LS)
+ {
+ q_LS = t(X) %*% X %*% p_LS;
+ q_LS = q_LS + lambda_LS * p_LS;
+ alpha_LS = norm_r2_LS / sum (p_LS * q_LS);
+ z_LS = z_LS + alpha_LS * p_LS;
+ old_norm_r2_LS = norm_r2_LS;
+ r_LS = r_LS + alpha_LS * q_LS;
+ norm_r2_LS = sum (r_LS ^ 2);
+ p_LS = -r_LS + (norm_r2_LS / old_norm_r2_LS) * p_LS;
+ i_LS = i_LS + 1;
+ }
+ # END LEAST SQUARES
+
+ w = (nrow(X) / sum (w_X * z_LS)) * z_LS;
+}
+
+
+round_to_print = function (double x_to_truncate)
+return (double mantissa, int eee)
+{
+ mantissa = 1.0;
+ eee = 0;
+ positive_infinity = Inf;
+ x = abs (x_to_truncate);
+ if (x != x / 2.0) {
+ log_ten = log (10.0);
+ d_eee = round (log (x) / log_ten - 0.5);
+ mantissa = round (x * exp (log_ten * (4.0 - d_eee))) / 10000;
+ if (mantissa == 10.0) {
+ mantissa = 1.0;
+ d_eee = d_eee + 1;
+ }
+ if (x_to_truncate < 0) {
+ mantissa = - mantissa;
+ }
+ eee = 0;
+ pow_two = 1;
+ res_eee = abs (d_eee);
+ while (res_eee != 0) {
+ new_res_eee = round (res_eee / 2.0 - 0.3);
+ if (new_res_eee * 2.0 < res_eee) {
+ eee = eee + pow_two;
+ }
+ res_eee = new_res_eee;
+ pow_two = 2 * pow_two;
+ }
+ if (d_eee < 0) {
+ eee = - eee;
+ }
+ } else {
+ mantissa = x_to_truncate;
+ }
+}
diff --git a/src/main/java/org/apache/sysds/common/Builtins.java
b/src/main/java/org/apache/sysds/common/Builtins.java
index 60b7c18..4c8e1ff 100644
--- a/src/main/java/org/apache/sysds/common/Builtins.java
+++ b/src/main/java/org/apache/sysds/common/Builtins.java
@@ -89,6 +89,7 @@ public enum Builtins {
EXP("exp", false),
EVAL("eval", false),
FLOOR("floor", false),
+ GLM("glm", true),
GNMF("gnmf", true),
GRID_SEARCH("gridSearch", true),
IFELSE("ifelse", false),
diff --git a/src/test/java/org/apache/sysds/test/TestUtils.java
b/src/test/java/org/apache/sysds/test/TestUtils.java
index 530dccb..80e0328 100644
--- a/src/test/java/org/apache/sysds/test/TestUtils.java
+++ b/src/test/java/org/apache/sysds/test/TestUtils.java
@@ -2330,4 +2330,387 @@ public class TestUtils
public static String federatedAddress(String host, int port, String
input) {
return host + ':' + port + '/' + input;
}
+
+ public static double gaussian_probability (double point)
+ // "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A.
Stegun,
+ // U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26,
p. 299
+ {
+ double t_gp = 1.0 / (1.0 + Math.abs (point) * 0.231641888); //
0.231641888 = 0.3275911 / sqrt (2.0)
+ double erf_gp = 1.0 - t_gp * ( 0.254829592
+ + t_gp * (-0.284496736
+ + t_gp * ( 1.421413741
+ + t_gp * (-1.453152027
+ + t_gp * 1.061405429)))) * Math.exp (- point
* point / 2.0);
+ erf_gp = erf_gp * (point > 0 ? 1.0 : -1.0);
+ return (0.5 + 0.5 * erf_gp);
+ }
+
+ public static double logFactorial (double x)
+ // From paper: C. Lanczos "A Precision Approximation of the Gamma
Function",
+ // Journal of the SIAM: Numerical Analysis, Series B, Vol. 1, 1964,
pp. 86-96
+ {
+ final double[] cf = {1.000000000178, 76.180091729406,
-86.505320327112,
+ 24.014098222230, -1.231739516140,
0.001208580030, -0.000005363820};
+ double a_5 = cf[0] + cf[1] / (x + 1) + cf[2] / (x + 2) + cf[3]
/ (x + 3)
+ + cf[4] / (x + 4) + cf[5] / (x + 5) + cf[6] /
(x + 6);
+ return Math.log(a_5) + (x + 0.5) * Math.log(x + 5.5) - (x +
5.5) + 0.91893853320467; // log(sqrt(2 * PI))
+ }
+
+ public static long nextPoisson (Random r, double mu)
+ // Prob[k] = mu^k * exp(-mu) / k!
+ // The main part is from W. H"ormann "The Transformed Rejection Method
+ // for Generating Poisson Random Variables"
+ {
+ if (mu <= 0.0)
+ return 0;
+ if (mu >= 100000.0)
+ return Math.round (mu + Math.sqrt (mu) * r.nextGaussian
());
+ if (mu >= 10.0)
+ {
+ long output = 0;
+ double c = mu + 0.445;
+ double b = 0.931 + 2.53 * Math.sqrt (mu);
+ double a = -0.059 + 0.02483 * b;
+ double one_by_alpha = 1.1239 + 1.1328 / (b - 3.4);
+ double u_r = 0.43;
+ double v_r = 0.9277 - 3.6224 / (b - 2);
+ while (true)
+ {
+ double U;
+ double V = r.nextDouble ();
+ if (V <= 2 * u_r * v_r)
+ {
+ U = V / v_r - u_r;
+ output = (long) Math.floor ((2 * a /
(0.5 - Math.abs (U)) + b) * U + c);
+ break;
+ }
+ if (V >= v_r)
+ {
+ U = r.nextDouble () - 0.5;
+ }
+ else
+ {
+ U = V / v_r - (u_r + 0.5);
+ U = Math.signum (U) * 0.5 - U;
+ V = v_r * r.nextDouble ();
+ }
+ double us = 0.5 - Math.abs (U);
+ if (0.487 < Math.abs (U) && us < V)
+ continue;
+ long k = (long) Math.floor ((2 * a / us + b) *
U + c);
+ double V_to_compare = (V * one_by_alpha) / (a /
us / us + b);
+ if (0 <= k && Math.log (V_to_compare) <= - mu
+ k * Math.log (mu) - TestUtils.logFactorial (k))
+ {
+ output = k;
+ break;
+ }
+ }
+ return output;
+ }
+ long count = 0;
+ double res_mu = mu;
+ while (res_mu > 0.0)
+ {
+ count ++;
+ res_mu += Math.log (r.nextDouble ());
+ }
+ return count - 1;
+ }
+
+ public static double nextGamma (Random r, double alpha)
+ // PDF(x) = x^(alpha-1) * exp(-x) / Gamma(alpha)
+ // D.Knuth "The Art of Computer Programming", 2nd Edition, Vol. 2, Sec.
3.4.1
+ {
+ double x;
+ if (alpha > 10000.0)
+ {
+ x = 1.0 - 1.0 / (9.0 * alpha) + r.nextGaussian() /
Math.sqrt (9.0 * alpha);
+ return alpha * x * x * x;
+ }
+ else if (alpha > 5.0)
+ {
+ x = 0.0;
+ double the_root = Math.sqrt (2.0 * alpha - 1.0);
+ boolean is_accepted = false;
+ while (! is_accepted)
+ {
+ double y = Math.tan (Math.PI * r.nextDouble());
+ x = the_root * y + alpha - 1.0;
+ if (x <= 0)
+ continue;
+ double z = Math.exp ((alpha - 1.0) * (1.0 +
Math.log (x / (alpha - 1.0))) - x);
+ is_accepted = (r.nextDouble() <= z * (1.0 + y *
y));
+ }
+ return x;
+ }
+ else if (alpha > 0.0)
+ {
+ x = 1.0;
+ double frac_alpha = alpha;
+ while (frac_alpha >= 1.0)
+ {
+ x *= r.nextDouble ();
+ frac_alpha -= 1.0;
+ }
+ double output = - Math.log (x);
+ if (frac_alpha > 0.0) // Has to be between 0 and 1
+ {
+ double ceee = Math.E / (frac_alpha + Math.E);
+ boolean is_accepted = false;
+ while (! is_accepted)
+ {
+ double u = r.nextDouble();
+ if (u <= ceee)
+ {
+ x = Math.pow (u / ceee, 1.0 /
frac_alpha);
+ is_accepted = (r.nextDouble()
<= Math.exp (- x));
+ }
+ else
+ {
+ x = 1.0 - Math.log ((1.0 - u) /
(1.0 - ceee));
+ is_accepted = (r.nextDouble()
<= Math.pow (x, frac_alpha - 1.0));
+ }
+ }
+ output += x;
+ }
+ return output;
+ }
+ else // alpha <= 0.0
+ return 0.0;
+ }
+
+ public static double[] scaleWeights (double[] w_unscaled, double[][] X,
double icept, double meanLF, double sigmaLF)
+ {
+ int rows = X.length;
+ int cols = w_unscaled.length;
+ double[] w = new double [cols];
+ for (int j = 0; j < cols; j ++)
+ w [j] = w_unscaled [j];
+
+ double sum_wx = 0.0;
+ double sum_1x = 0.0;
+ double sum_wxwx = 0.0;
+ double sum_1x1x = 0.0;
+ double sum_wx1x = 0.0;
+
+ for (int i = 0; i < rows; i ++)
+ {
+ double wx = 0.0;
+ double one_x = 0.0;
+ for (int j = 0; j < cols; j ++)
+ {
+ wx += w [j] * X [i][j];
+ one_x += X [i][j];
+ }
+ sum_wx += wx;
+ sum_1x += one_x;
+ sum_wxwx += wx * wx;
+ sum_1x1x += one_x * one_x;
+ sum_wx1x += wx * one_x;
+ }
+
+ double a0 = (meanLF - icept) * rows * sum_wx / (sum_wx * sum_wx
+ sum_1x * sum_1x);
+ double b0 = (meanLF - icept) * rows * sum_1x / (sum_wx * sum_wx
+ sum_1x * sum_1x);
+ double a1 = sum_1x;
+ double b1 = - sum_wx;
+ double qA = a1 * a1 * sum_wxwx + 2 * a1 * b1 * sum_wx1x + b1 *
b1 * sum_1x1x;
+ double qB = 2 * (a0 * a1 * sum_wxwx + a0 * b1 * sum_wx1x + a1 *
b0 * sum_wx1x + b0 * b1 * sum_1x1x);
+ double qC_nosigmaLF = a0 * a0 * sum_wxwx + 2 * a0 * b0 *
sum_wx1x + b0 * b0 * sum_1x1x - rows * (meanLF - icept) * (meanLF - icept);
+ double qC = qC_nosigmaLF - rows * sigmaLF * sigmaLF;
+ double qD = qB * qB - 4 * qA * qC;
+ if (qD < 0)
+ {
+ double new_sigmaLF = Math.sqrt (qC_nosigmaLF / rows -
qB * qB / (4 * qA * rows));
+ String error_message = String.format ("Cannot generate
the weights: linear form variance demand is too tight! Try sigmaLF >%8.4f",
new_sigmaLF);
+ System.out.println (error_message);
+ System.out.flush ();
+ throw new IllegalArgumentException (error_message);
+ }
+ double t = (- qB + Math.sqrt (qD)) / (2 * qA);
+ double a = a0 + t * a1;
+ double b = b0 + t * b1;
+ for (int j = 0; j < cols; j ++)
+ w [j] = a * w [j] + b;
+
+ double sum_eta = 0.0;
+ double sum_sq_eta = 0.0;
+ for (int i = 0; i < rows; i ++)
+ {
+ double eta = 0.0;
+ for (int j = 0; j < cols; j ++)
+ eta += w [j] * X [i][j];
+ sum_eta += eta;
+ sum_sq_eta += eta * eta;
+ }
+ double mean_eta = icept + sum_eta / rows;
+ double sigma_eta = Math.sqrt ((sum_sq_eta - sum_eta * sum_eta /
rows) / (rows - 1));
+ System.out.println (String.format ("Linear Form Mean =%8.4f
(Desired:%8.4f)", mean_eta, meanLF));
+ System.out.println (String.format ("Linear Form Sigma =%8.4f
(Desired:%8.4f)", sigma_eta, sigmaLF));
+
+ return w;
+ }
+ public static class GLMDist
+ {
+ final int dist; // GLM distribution family type
+ final double param; // GLM parameter, typically variance
power of the mean
+ final int link; // GLM link function type
+ final double link_pow; // GLM link function as power of the
mean
+ double dispersion = 1.0;
+ long binom_n = 1;
+
+ public GLMDist (int _dist, double _param, int _link, double
_link_pow) {
+ dist = _dist; param = _param; link = _link; link_pow =
_link_pow;
+ }
+
+ public void set_dispersion (double _dispersion) {
+ dispersion = _dispersion;
+ }
+
+ public void set_binom_n (long _n) {
+ binom_n = _n;
+ }
+
+ public boolean is_binom_n_needed () {
+ return (dist == 2 && param == 1.0);
+ }
+
+ public double nextGLM (Random r, double eta) {
+ double mu = 0.0;
+ switch (link) {
+ case 1: // LINK: POWER
+ if (link_pow == 0.0) // LINK: log
+ mu = Math.exp (eta);
+ else if (link_pow == 1.0) // LINK:
identity
+ mu = eta;
+ else if (link_pow == -1.0) // LINK:
inverse
+ mu = 1.0 / eta;
+ else if (link_pow == 0.5) // LINK: sqrt
+ mu = eta * eta;
+ else if (link_pow == -2.0) // LINK:
1/mu^2
+ mu = Math.sqrt (1.0 / eta);
+ else
+ mu = Math.pow (eta, 1.0 /
link_pow);
+ break;
+ case 2: // LINK: logit
+ mu = 1.0 / (1.0 + Math.exp (- eta));
+ break;
+ case 3: // LINK: probit
+ mu = TestUtils.gaussian_probability
(eta);
+ break;
+ case 4: // LINK: cloglog
+ mu = 1.0 - Math.exp (- Math.exp (eta));
+ break;
+ case 5: // LINK: cauchit
+ mu = 0.5 + Math.atan (eta) / Math.PI;
+ break;
+ default:
+ mu = 0.0;
+ }
+
+ double output = 0.0;
+ if (dist == 1) // POWER
+ {
+ double var_pow = param;
+ if (var_pow == 0.0) // Gaussian, with
dispersion = sigma^2
+ {
+ output = mu + Math.sqrt (dispersion) *
r.nextGaussian ();
+ }
+ else if (var_pow == 1.0) // Poisson; Negative
Binomial if overdispersion
+ {
+ double lambda = mu;
+ if (dispersion > 1.000000001)
+ {
+ // output = Negative Binomial
random variable with:
+ // Number of failures =
mu / (dispersion - 1.0)
+ // Probability of success
= 1.0 - 1.0 / dispersion
+ lambda = (dispersion - 1.0) *
TestUtils.nextGamma (r, mu / (dispersion - 1.0));
+ }
+ output = TestUtils.nextPoisson (r,
lambda);
+ }
+ else if (var_pow == 2.0) // Gamma
+ {
+ double beta = dispersion * mu;
+ output = beta * TestUtils.nextGamma (r,
mu / beta);
+ }
+ else if (var_pow == 3.0) // Inverse Gaussian
+ {
+ // From: Raj Chhikara, J.L. Folks. The
Inverse Gaussian Distribution:
+ // Theory: Methodology, and
Applications. CRC Press, 1988, Section 4.5
+ double y_Gauss = r.nextGaussian ();
+ double mu_y_sq = mu * y_Gauss * y_Gauss;
+ double x_invG = 0.5 * dispersion * mu *
(2.0 / dispersion + mu_y_sq
+ - Math.sqrt (mu_y_sq *
(4.0 / dispersion + mu_y_sq)));
+ output = ((mu + x_invG) *
r.nextDouble() < mu ? x_invG : (mu * mu / x_invG));
+ }
+ else
+ {
+ output = mu + Math.sqrt (12.0 *
dispersion) * (r.nextDouble () - 0.5);
+ }
+ }
+ else if (dist == 2 && param != 1.0) // Binomial,
dispersion ignored
+ {
+ double bernoulli_zero = param;
+ output = (r.nextDouble () < mu ? 1.0 :
bernoulli_zero);
+ }
+ else if (dist == 2) // param == 1.0, Binomial
Two-Column, dispersion used
+ {
+ double alpha_plus_beta = (binom_n - dispersion)
/ (dispersion - 1.0);
+ double alpha = mu * alpha_plus_beta;
+ double x = TestUtils.nextGamma (r, alpha);
+ double y = TestUtils.nextGamma (r,
alpha_plus_beta - alpha);
+ double p = x / (x + y);
+ long out = 0;
+ for (long i = 0; i < binom_n; i++)
+ if (r.nextDouble() < p)
+ out ++;
+ output = out;
+ }
+ return output;
+ }
+ }
+
+ public static double[][] generateUnbalancedGLMInputDataX(int rows, int
cols, double logFeatureVarianceDisbalance) {
+ double[][] X = generateTestMatrix(rows, cols, -1.0, 1.0, 1.0,
34567);
+ double shift_X = 1.0;
+ // make the feature columns of X variance disbalanced
+ for (int j = 0; j < cols; j++) {
+ double varFactor = Math.pow(10.0,
logFeatureVarianceDisbalance * (-0.25 + j / (double) (2 * cols - 2)));
+ for (int i = 0; i < rows; i++)
+ X[i][j] = shift_X + X[i][j] * varFactor;
+ }
+ return X;
+ }
+
+ public static double[] generateUnbalancedGLMInputDataB(double[][] X,
int cols, double intercept, double avgLinearForm, double stdevLinearForm,
Random r) {
+ double[] beta_unscaled = new double[cols];
+ for (int j = 0; j < cols; j++)
+ beta_unscaled[j] = r.nextGaussian();
+ return scaleWeights(beta_unscaled, X, intercept, avgLinearForm,
stdevLinearForm);
+ }
+
+ public static double[][] generateUnbalancedGLMInputDataY(double[][] X,
double[] beta, int rows, int cols, GLMDist glmdist, double intercept, double
dispersion, Random r) {
+ double[][] y = null;
+ if (glmdist.is_binom_n_needed())
+ y = new double[rows][2];
+ else
+ y = new double[rows][1];
+
+ for (int i = 0; i < rows; i++) {
+ double eta = intercept;
+ for (int j = 0; j < cols; j++) {
+ eta += X[i][j] * beta[j];
+ }
+ if (glmdist.is_binom_n_needed()) {
+ long n = Math.round(dispersion * (1.0 + 2.0 *
r.nextDouble()) + 1.0);
+ glmdist.set_binom_n(n);
+ y[i][0] = glmdist.nextGLM(r, eta);
+ y[i][1] = n - y[i][0];
+ }
+ else {
+ y[i][0] = glmdist.nextGLM(r, eta);
+ }
+ }
+
+ return y;
+ }
}
diff --git a/src/test/java/org/apache/sysds/test/applications/GLMTest.java
b/src/test/java/org/apache/sysds/test/applications/GLMTest.java
index 1cc5878..b0242e2 100644
--- a/src/test/java/org/apache/sysds/test/applications/GLMTest.java
+++ b/src/test/java/org/apache/sysds/test/applications/GLMTest.java
@@ -216,79 +216,26 @@ public class GLMTest extends AutomatedTestBase
dispersion +
"} ------------");
- int rows = numRecords; // # of rows in
the training data
- int cols = numFeatures; // # of
features in the training data
+ int rows = numRecords; // # of rows in the training data
+ int cols = numFeatures; // # of features in the training data
- GLMDist glmdist = new GLMDist (distFamilyType, distParam,
linkType, linkPower);
+ TestUtils.GLMDist glmdist = new TestUtils.GLMDist
(distFamilyType, distParam, linkType, linkPower);
glmdist.set_dispersion (dispersion);
getAndLoadTestConfiguration(TEST_NAME);
// prepare training data set
-
- Random r = new Random (314159265);
- double[][] X = getRandomMatrix (rows, cols, -1.0, 1.0, 1.0,
34567); // 271828183);
- double shift_X = 1.0;
-
- // make the feature columns of X variance disbalanced
-
- for (int j = 0; j < cols; j ++)
- {
- double varFactor = Math.pow (10.0,
logFeatureVarianceDisbalance * (- 0.25 + j / (double) (2 * cols - 2)));
- for (int i = 0; i < rows; i ++)
- X [i][j] = shift_X + X [i][j] * varFactor;
- }
-
- double[] beta_unscaled = new double [cols];
- for (int j = 0; j < cols; j ++)
- beta_unscaled [j] = r.nextGaussian ();
- double[] beta = scaleWeights (beta_unscaled, X, intercept,
avgLinearForm, stdevLinearForm);
+ Random r = new Random(314159265);
+ double[][] X = TestUtils.generateUnbalancedGLMInputDataX(rows,
cols, logFeatureVarianceDisbalance);
+ double[] beta = TestUtils.generateUnbalancedGLMInputDataB(X,
cols, intercept, avgLinearForm, stdevLinearForm, r);
+ double[][] y = TestUtils.generateUnbalancedGLMInputDataY(X,
beta, rows, cols, glmdist, intercept, dispersion, r);
- long nnz_in_X = 0;
- long nnz_in_y = 0;
-
- double[][] y = null;
- if (glmdist.is_binom_n_needed())
- y = new double [rows][2];
- else
- y = new double [rows][1];
-
-
- for (int i = 0; i < rows; i ++)
- {
- double eta = intercept;
- for (int j = 0; j < cols; j ++)
- {
- eta += X [i][j] * beta [j];
- if (X [i][j] != 0.0)
- nnz_in_X ++;
- }
- if (glmdist.is_binom_n_needed())
- {
- long n = Math.round (dispersion * (1.0 + 2.0 *
r.nextDouble()) + 1.0);
- glmdist.set_binom_n (n);
- y [i][0] = glmdist.nextGLM (r, eta);
- y [i][1] = n - y[i][0];
- if (y [i][0] != 0.0)
- nnz_in_y ++;
- if (y [i][1] != 0.0)
- nnz_in_y ++;
- }
- else
- {
- y [i][0] = glmdist.nextGLM (r, eta);
- if (y [i][0] != 0.0)
- nnz_in_y ++;
- }
-
- }
-
int defaultBlockSize = OptimizerUtils.DEFAULT_BLOCKSIZE;
- MatrixCharacteristics mc_X = new MatrixCharacteristics(rows,
cols, defaultBlockSize, nnz_in_X);
+ MatrixCharacteristics mc_X = new MatrixCharacteristics(rows,
cols, defaultBlockSize, -1);
writeInputMatrixWithMTD ("X", X, true, mc_X);
- MatrixCharacteristics mc_y = new MatrixCharacteristics(rows,
y[0].length, defaultBlockSize, nnz_in_y);
+ MatrixCharacteristics mc_y = new MatrixCharacteristics(rows,
y[0].length, defaultBlockSize, -1);
writeInputMatrixWithMTD ("Y", y, true, mc_y);
List<String> proArgs = new ArrayList<>();
@@ -353,350 +300,4 @@ public class GLMTest extends AutomatedTestBase
}
TestUtils.compareMatrices (wR, wSYSTEMDS, eps * max_abs_beta,
"wR", "wSYSTEMDS");
}
-
- double[] scaleWeights (double[] w_unscaled, double[][] X, double icept,
double meanLF, double sigmaLF)
- {
- int rows = X.length;
- int cols = w_unscaled.length;
- double[] w = new double [cols];
- for (int j = 0; j < cols; j ++)
- w [j] = w_unscaled [j];
-
- double sum_wx = 0.0;
- double sum_1x = 0.0;
- double sum_wxwx = 0.0;
- double sum_1x1x = 0.0;
- double sum_wx1x = 0.0;
-
- for (int i = 0; i < rows; i ++)
- {
- double wx = 0.0;
- double one_x = 0.0;
- for (int j = 0; j < cols; j ++)
- {
- wx += w [j] * X [i][j];
- one_x += X [i][j];
- }
- sum_wx += wx;
- sum_1x += one_x;
- sum_wxwx += wx * wx;
- sum_1x1x += one_x * one_x;
- sum_wx1x += wx * one_x;
- }
-
- double a0 = (meanLF - icept) * rows * sum_wx / (sum_wx * sum_wx
+ sum_1x * sum_1x);
- double b0 = (meanLF - icept) * rows * sum_1x / (sum_wx * sum_wx
+ sum_1x * sum_1x);
- double a1 = sum_1x;
- double b1 = - sum_wx;
- double qA = a1 * a1 * sum_wxwx + 2 * a1 * b1 * sum_wx1x + b1 *
b1 * sum_1x1x;
- double qB = 2 * (a0 * a1 * sum_wxwx + a0 * b1 * sum_wx1x + a1 *
b0 * sum_wx1x + b0 * b1 * sum_1x1x);
- double qC_nosigmaLF = a0 * a0 * sum_wxwx + 2 * a0 * b0 *
sum_wx1x + b0 * b0 * sum_1x1x - rows * (meanLF - icept) * (meanLF - icept);
- double qC = qC_nosigmaLF - rows * sigmaLF * sigmaLF;
- double qD = qB * qB - 4 * qA * qC;
- if (qD < 0)
- {
- double new_sigmaLF = Math.sqrt (qC_nosigmaLF / rows -
qB * qB / (4 * qA * rows));
- String error_message = String.format ("Cannot generate
the weights: linear form variance demand is too tight! Try sigmaLF >%8.4f",
new_sigmaLF);
- System.out.println (error_message);
- System.out.flush ();
- throw new IllegalArgumentException (error_message);
- }
- double t = (- qB + Math.sqrt (qD)) / (2 * qA);
- double a = a0 + t * a1;
- double b = b0 + t * b1;
- for (int j = 0; j < cols; j ++)
- w [j] = a * w [j] + b;
-
- double sum_eta = 0.0;
- double sum_sq_eta = 0.0;
- for (int i = 0; i < rows; i ++)
- {
- double eta = 0.0;
- for (int j = 0; j < cols; j ++)
- eta += w [j] * X [i][j];
- sum_eta += eta;
- sum_sq_eta += eta * eta;
- }
- double mean_eta = icept + sum_eta / rows;
- double sigma_eta = Math.sqrt ((sum_sq_eta - sum_eta * sum_eta /
rows) / (rows - 1));
- System.out.println (String.format ("Linear Form Mean =%8.4f
(Desired:%8.4f)", mean_eta, meanLF));
- System.out.println (String.format ("Linear Form Sigma =%8.4f
(Desired:%8.4f)", sigma_eta, sigmaLF));
-
- return w;
- }
-
- public class GLMDist
- {
- final int dist; // GLM distribution family type
- final double param; // GLM parameter, typically variance
power of the mean
- final int link; // GLM link function type
- final double link_pow; // GLM link function as power of the
mean
- @SuppressWarnings("hiding")
- double dispersion = 1.0;
- long binom_n = 1;
-
- GLMDist (int _dist, double _param, int _link, double _link_pow)
- {
- dist = _dist; param = _param; link = _link; link_pow =
_link_pow;
- }
-
- void set_dispersion (double _dispersion)
- {
- dispersion = _dispersion;
- }
-
- void set_binom_n (long _n)
- {
- binom_n = _n;
- }
-
- boolean is_binom_n_needed ()
- {
- return (dist == 2 && param == 1.0);
- }
-
- double nextGLM (Random r, double eta)
- {
- double mu = 0.0;
- switch (link)
- {
- case 1: // LINK: POWER
- if (link_pow == 0.0) // LINK: log
- mu = Math.exp (eta);
- else if (link_pow == 1.0) // LINK: identity
- mu = eta;
- else if (link_pow == -1.0) // LINK: inverse
- mu = 1.0 / eta;
- else if (link_pow == 0.5) // LINK: sqrt
- mu = eta * eta;
- else if (link_pow == -2.0) // LINK: 1/mu^2
- mu = Math.sqrt (1.0 / eta);
- else
- mu = Math.pow (eta, 1.0 / link_pow);
- break;
- case 2: // LINK: logit
- mu = 1.0 / (1.0 + Math.exp (- eta));
- break;
- case 3: // LINK: probit
- mu = gaussian_probability (eta);
- break;
- case 4: // LINK: cloglog
- mu = 1.0 - Math.exp (- Math.exp (eta));
- break;
- case 5: // LINK: cauchit
- mu = 0.5 + Math.atan (eta) / Math.PI;
- break;
- default:
- mu = 0.0;
- }
-
- double output = 0.0;
- if (dist == 1) // POWER
- {
- double var_pow = param;
- if (var_pow == 0.0) // Gaussian, with
dispersion = sigma^2
- {
- output = mu + Math.sqrt (dispersion) *
r.nextGaussian ();
- }
- else if (var_pow == 1.0) // Poisson; Negative
Binomial if overdispersion
- {
- double lambda = mu;
- if (dispersion > 1.000000001)
- {
- // output = Negative Binomial
random variable with:
- // Number of failures =
mu / (dispersion - 1.0)
- // Probability of success
= 1.0 - 1.0 / dispersion
- lambda = (dispersion - 1.0) *
nextGamma (r, mu / (dispersion - 1.0));
- }
- output = nextPoisson (r, lambda);
- }
- else if (var_pow == 2.0) // Gamma
- {
- double beta = dispersion * mu;
- output = beta * nextGamma (r, mu /
beta);
- }
- else if (var_pow == 3.0) // Inverse Gaussian
- {
- // From: Raj Chhikara, J.L. Folks. The
Inverse Gaussian Distribution:
- // Theory: Methodology, and
Applications. CRC Press, 1988, Section 4.5
- double y_Gauss = r.nextGaussian ();
- double mu_y_sq = mu * y_Gauss * y_Gauss;
- double x_invG = 0.5 * dispersion * mu *
(2.0 / dispersion + mu_y_sq
- - Math.sqrt (mu_y_sq *
(4.0 / dispersion + mu_y_sq)));
- output = ((mu + x_invG) *
r.nextDouble() < mu ? x_invG : (mu * mu / x_invG));
- }
- else
- {
- output = mu + Math.sqrt (12.0 *
dispersion) * (r.nextDouble () - 0.5);
- }
- }
- else if (dist == 2 && param != 1.0) // Binomial,
dispersion ignored
- {
- double bernoulli_zero = param;
- output = (r.nextDouble () < mu ? 1.0 :
bernoulli_zero);
- }
- else if (dist == 2) // param == 1.0, Binomial
Two-Column, dispersion used
- {
- double alpha_plus_beta = (binom_n - dispersion)
/ (dispersion - 1.0);
- double alpha = mu * alpha_plus_beta;
- double x = nextGamma (r, alpha);
- double y = nextGamma (r, alpha_plus_beta -
alpha);
- double p = x / (x + y);
- long out = 0;
- for (long i = 0; i < binom_n; i++)
- if (r.nextDouble() < p)
- out ++;
- output = out;
- }
- return output;
- }
- }
-
- public double nextGamma (Random r, double alpha)
- // PDF(x) = x^(alpha-1) * exp(-x) / Gamma(alpha)
- // D.Knuth "The Art of Computer Programming", 2nd Edition, Vol. 2, Sec.
3.4.1
- {
- double x;
- if (alpha > 10000.0)
- {
- x = 1.0 - 1.0 / (9.0 * alpha) + r.nextGaussian() /
Math.sqrt (9.0 * alpha);
- return alpha * x * x * x;
- }
- else if (alpha > 5.0)
- {
- x = 0.0;
- double the_root = Math.sqrt (2.0 * alpha - 1.0);
- boolean is_accepted = false;
- while (! is_accepted)
- {
- double y = Math.tan (Math.PI * r.nextDouble());
- x = the_root * y + alpha - 1.0;
- if (x <= 0)
- continue;
- double z = Math.exp ((alpha - 1.0) * (1.0 +
Math.log (x / (alpha - 1.0))) - x);
- is_accepted = (r.nextDouble() <= z * (1.0 + y *
y));
- }
- return x;
- }
- else if (alpha > 0.0)
- {
- x = 1.0;
- double frac_alpha = alpha;
- while (frac_alpha >= 1.0)
- {
- x *= r.nextDouble ();
- frac_alpha -= 1.0;
- }
- double output = - Math.log (x);
- if (frac_alpha > 0.0) // Has to be between 0 and 1
- {
- double ceee = Math.E / (frac_alpha + Math.E);
- boolean is_accepted = false;
- while (! is_accepted)
- {
- double u = r.nextDouble();
- if (u <= ceee)
- {
- x = Math.pow (u / ceee, 1.0 /
frac_alpha);
- is_accepted = (r.nextDouble()
<= Math.exp (- x));
- }
- else
- {
- x = 1.0 - Math.log ((1.0 - u) /
(1.0 - ceee));
- is_accepted = (r.nextDouble()
<= Math.pow (x, frac_alpha - 1.0));
- }
- }
- output += x;
- }
- return output;
- }
- else // alpha <= 0.0
- return 0.0;
- }
-
- public long nextPoisson (Random r, double mu)
- // Prob[k] = mu^k * exp(-mu) / k!
- // The main part is from W. H"ormann "The Transformed Rejection Method
- // for Generating Poisson Random Variables"
- {
- if (mu <= 0.0)
- return 0;
- if (mu >= 100000.0)
- return Math.round (mu + Math.sqrt (mu) * r.nextGaussian
());
- if (mu >= 10.0)
- {
- long output = 0;
- double c = mu + 0.445;
- double b = 0.931 + 2.53 * Math.sqrt (mu);
- double a = -0.059 + 0.02483 * b;
- double one_by_alpha = 1.1239 + 1.1328 / (b - 3.4);
- double u_r = 0.43;
- double v_r = 0.9277 - 3.6224 / (b - 2);
- while (true)
- {
- double U;
- double V = r.nextDouble ();
- if (V <= 2 * u_r * v_r)
- {
- U = V / v_r - u_r;
- output = (long) Math.floor ((2 * a /
(0.5 - Math.abs (U)) + b) * U + c);
- break;
- }
- if (V >= v_r)
- {
- U = r.nextDouble () - 0.5;
- }
- else
- {
- U = V / v_r - (u_r + 0.5);
- U = Math.signum (U) * 0.5 - U;
- V = v_r * r.nextDouble ();
- }
- double us = 0.5 - Math.abs (U);
- if (0.487 < Math.abs (U) && us < V)
- continue;
- long k = (long) Math.floor ((2 * a / us + b) *
U + c);
- double V_to_compare = (V * one_by_alpha) / (a /
us / us + b);
- if (0 <= k && Math.log (V_to_compare) <= - mu
+ k * Math.log (mu) - logFactorial (k))
- {
- output = k;
- break;
- }
- }
- return output;
- }
- long count = 0;
- double res_mu = mu;
- while (res_mu > 0.0)
- {
- count ++;
- res_mu += Math.log (r.nextDouble ());
- }
- return count - 1;
- }
-
- static double logFactorial (double x)
- // From paper: C. Lanczos "A Precision Approximation of the Gamma
Function",
- // Journal of the SIAM: Numerical Analysis, Series B, Vol. 1, 1964,
pp. 86-96
- {
- final double[] cf = {1.000000000178, 76.180091729406,
-86.505320327112,
- 24.014098222230, -1.231739516140, 0.001208580030,
-0.000005363820};
- double a_5 = cf[0] + cf[1] / (x + 1) + cf[2] / (x + 2) + cf[3]
/ (x + 3)
- + cf[4] / (x + 4) + cf[5] / (x + 5) + cf[6] / (x + 6);
- return Math.log(a_5) + (x + 0.5) * Math.log(x + 5.5) - (x +
5.5) + 0.91893853320467; // log(sqrt(2 * PI))
- }
-
- public double gaussian_probability (double point)
- // "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A.
Stegun,
- // U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26,
p. 299
- {
- double t_gp = 1.0 / (1.0 + Math.abs (point) * 0.231641888); //
0.231641888 = 0.3275911 / sqrt (2.0)
- double erf_gp = 1.0 - t_gp * ( 0.254829592
- + t_gp * (-0.284496736
- + t_gp * ( 1.421413741
- + t_gp * (-1.453152027
- + t_gp * 1.061405429)))) * Math.exp (- point * point
/ 2.0);
- erf_gp = erf_gp * (point > 0 ? 1.0 : -1.0);
- return (0.5 + 0.5 * erf_gp);
- }
}
diff --git
a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGLMTest.java
b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGLMTest.java
new file mode 100644
index 0000000..a6a3939
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGLMTest.java
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.builtin;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.sysds.common.Types;
+import org.apache.sysds.hops.OptimizerUtils;
+import org.apache.sysds.lops.LopProperties;
+import org.apache.sysds.runtime.matrix.data.MatrixValue;
+import org.apache.sysds.runtime.meta.MatrixCharacteristics;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestUtils;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(value = Parameterized.class)
[email protected]
+
+public class BuiltinGLMTest extends AutomatedTestBase
+{
+ protected final static String TEST_NAME = "glmTest";
+ protected final static String TEST_DIR = "functions/builtin/";
+ protected String TEST_CLASS_DIR = TEST_DIR +
BuiltinGLMTest.class.getSimpleName() + "/";
+ double eps = 1e-4;
+ // SUPPORTED GLM DISTRIBUTION FAMILIES AND LINKS:
+ // -----------------------------------------------
+ // INPUT PARAMETERS: MEANING: Cano-
+ // dfam vpow link lpow Distribution.link nical?
+ // -----------------------------------------------
+ // 1 0.0 1 -1.0 Gaussian.inverse
+ // 1 0.0 1 0.0 Gaussian.log
+ // 1 0.0 1 1.0 Gaussian.id Yes
+ // 1 1.0 1 0.0 Poisson.log Yes
+ // 1 1.0 1 0.5 Poisson.sqrt
+ // 1 1.0 1 1.0 Poisson.id
+ // 1 2.0 1 -1.0 Gamma.inverse Yes
+ // 1 2.0 1 0.0 Gamma.log
+ // 1 2.0 1 1.0 Gamma.id
+ // 1 3.0 1 -2.0 InvGaussian.1/mu^2 Yes
+ // 1 3.0 1 -1.0 InvGaussian.inverse
+ // 1 3.0 1 0.0 InvGaussian.log
+ // 1 3.0 1 1.0 InvGaussian.id
+ // 1 * 1 * AnyVariance.AnyLink
+ // -----------------------------------------------
+ // 2 * 1 0.0 Binomial.log
+ // 2 * 2 * Binomial.logit Yes
+ // 2 * 3 * Binomial.probit
+ // 2 * 4 * Binomial.cloglog
+ // 2 * 5 * Binomial.cauchit
+ // -----------------------------------------------
+
+ protected int numRecords, numFeatures, distFamilyType, linkType,
intercept;
+ protected double distParam, linkPower, logFeatureVarianceDisbalance,
avgLinearForm, stdevLinearForm, dispersion;
+
+ public BuiltinGLMTest(int numRecords_, int numFeatures_, int
distFamilyType_, double distParam_,
+ int linkType_, double linkPower_, double
logFeatureVarianceDisbalance_,
+ double avgLinearForm_, double stdevLinearForm_, double
dispersion_)
+ {
+ this.numRecords = numRecords_;
+ this.numFeatures = numFeatures_;
+ this.distFamilyType = distFamilyType_;
+ this.distParam = distParam_;
+ this.linkType = linkType_;
+ this.linkPower = linkPower_;
+ this.logFeatureVarianceDisbalance =
logFeatureVarianceDisbalance_;
+ this.avgLinearForm = avgLinearForm_;
+ this.stdevLinearForm = stdevLinearForm_;
+ this.dispersion = dispersion_;
+ }
+
+ private void setIntercept(int intercept_)
+ {
+ intercept = intercept_/100;
+ }
+
+ @Override
+ public void setUp()
+ {
+ TestUtils.clearAssertionInformation();
+ addTestConfiguration(TEST_CLASS_DIR, TEST_NAME);
+ }
+
+ @Test
+ public void glmTestIntercept_0_CP() {
+ setIntercept(0);
+ runtestGLM();
+ }
+
+ @Test
+ public void glmTestIntercept_1_CP() {
+ setIntercept(1);
+ runtestGLM();
+ }
+
+ @Test
+ public void glmTestIntercept_2_CP() {
+ setIntercept(2);
+ runtestGLM();
+ }
+
+
+ public void runtestGLM() {
+ Types.ExecMode platformOld =
setExecMode(LopProperties.ExecType.CP);
+ try {
+ int rows = numRecords; // # of rows in
the training data
+ int cols = numFeatures; // # of features
in the training data
+ System.out.println("------------ BEGIN " + TEST_NAME +
" TEST WITH {" + rows + ", " + cols
+ + ", " + distFamilyType + ", " + distParam + ",
" + linkType + ", " + linkPower + ", "
+ + intercept + ", " +
logFeatureVarianceDisbalance + ", " + avgLinearForm + ", " + stdevLinearForm
+ + ", " + dispersion + "} ------------");
+
+ TestUtils.GLMDist glmdist = new
TestUtils.GLMDist(distFamilyType, distParam, linkType, linkPower);
+ glmdist.set_dispersion(dispersion);
+
+ loadTestConfiguration(getTestConfiguration(TEST_NAME));
+
+ // prepare training data set
+ Random r = new Random(314159265);
+ double[][] X =
TestUtils.generateUnbalancedGLMInputDataX(rows, cols,
logFeatureVarianceDisbalance);
+ double[] beta =
TestUtils.generateUnbalancedGLMInputDataB(X, cols, intercept, avgLinearForm,
stdevLinearForm, r);
+ double[][] y =
TestUtils.generateUnbalancedGLMInputDataY(X, beta, rows, cols, glmdist,
intercept, dispersion, r);
+
+ int defaultBlockSize = OptimizerUtils.DEFAULT_BLOCKSIZE;
+
+ MatrixCharacteristics mc_X = new
MatrixCharacteristics(rows, cols, defaultBlockSize, -1);
+ writeInputMatrixWithMTD("X", X, true, mc_X);
+
+ MatrixCharacteristics mc_y = new
MatrixCharacteristics(rows, y[0].length, defaultBlockSize, -1);
+ writeInputMatrixWithMTD("Y", y, true, mc_y);
+
+ String HOME = SCRIPT_DIR + TEST_DIR;
+ fullDMLScriptName = HOME + TEST_NAME + ".dml";
+ List<String> proArgs = new ArrayList<>();
+ proArgs.add("-exec");
+ proArgs.add(" singlenode");
+ proArgs.add("-nvargs");
+ proArgs.add("X=" + input("X"));
+ proArgs.add("Y=" + input("Y"));
+ proArgs.add("dfam=" + String.valueOf(distFamilyType));
+ proArgs.add(((distFamilyType == 2 && distParam != 1.0)
? "yneg=" : "vpow=") + String.valueOf(distParam));
+ proArgs.add((distFamilyType == 2 && distParam != 1.0) ?
"vpow=0.0" : "yneg=0.0");
+ proArgs.add("link=" + String.valueOf(linkType));
+ proArgs.add("lpow=" + String.valueOf(linkPower));
+ proArgs.add("icpt=" + String.valueOf(intercept)); //
INTERCEPT - CHANGE THIS AS NEEDED
+ proArgs.add("disp=0.0"); // DISPERSION (0.0: ESTIMATE)
+ proArgs.add("reg=0.0"); // LAMBDA REGULARIZER
+ proArgs.add("tol=0.000000000001"); // TOLERANCE
(EPSILON)
+ proArgs.add("moi=300");
+ proArgs.add("mii=0");
+ proArgs.add("B=" + output("betas_SYSTEMDS"));
+ programArgs = proArgs.toArray(new
String[proArgs.size()]);
+
+ fullRScriptName = HOME + TEST_NAME + ".R";
+ rCmd = getRCmd(input("X.mtx"), input("Y.mtx"),
+ String.valueOf(distFamilyType),
+ String.valueOf(distParam),
+ String.valueOf(linkType),
+ String.valueOf(linkPower),
+ String.valueOf(intercept),
+ "0.000000000001",
+ expected("betas_R"));
+
+ runTest(true, false, null, -1);
+
+ double max_abs_beta = 0.0;
+ HashMap<MatrixValue.CellIndex, Double> wTRUE = new
HashMap<>();
+ for (int j = 0; j < cols; j++) {
+ wTRUE.put(new MatrixValue.CellIndex(j + 1, 1),
Double.valueOf(beta[j]));
+ max_abs_beta = (max_abs_beta >=
Math.abs(beta[j]) ? max_abs_beta : Math.abs(beta[j]));
+ }
+
+ HashMap<MatrixValue.CellIndex, Double> wSYSTEMDS_raw =
readDMLMatrixFromHDFS("betas_SYSTEMDS");
+ HashMap<MatrixValue.CellIndex, Double> wSYSTEMDS = new
HashMap<>();
+ for (MatrixValue.CellIndex key : wSYSTEMDS_raw.keySet())
+ if (key.column == 1)
+ wSYSTEMDS.put(key,
wSYSTEMDS_raw.get(key));
+
+ runRScript(true);
+
+ HashMap<MatrixValue.CellIndex, Double> wR =
readRMatrixFromFS("betas_R");
+
+ if ((distParam == 0 && linkType == 1)) { // Gaussian.*
+ //NOTE MB: Gaussian.log was the only test
failing when we introduced multi-threaded
+ //matrix multplications (mmchain). After
discussions with Sasha, we decided to change the eps
+ //because accuracy is anyway affected by
various rewrites like binary to unary (-1*x->-x),
+ //transpose-matrixmult, and dot product sum.
Disabling these rewrites led to a successful
+ //test result. Even without multi-threaded
matrix mult this test was failing for different number
+ //of rows if these rewrites are enabled. Users
can turn off rewrites if high accuracy is required.
+ //However, in the future we might also consider
to use Kahan plus for aggregations in matrix mult
+ //(at least for the final aggregation of
partial results from individual threads).
+
+ //NOTE MB: similar issues occurred with other
tests when moving to github action tests
+ eps *= (linkPower == -1) ? 4 : 2;
//Gaussian.inverse vs Gaussian.*;
+ }
+ TestUtils.compareMatrices(wR, wSYSTEMDS, eps *
max_abs_beta, "wR", "wSYSTEMDS");
+ }
+ finally {
+ resetExecMode(platformOld);
+ }
+ }
+
+ @Parameterized.Parameters
+ public static Collection<Object[]> data() {
+ // SCHEMA:
+ // #RECORDS, #FEATURES, DISTRIBUTION_FAMILY, VARIANCE_POWER or
BERNOULLI_NO, LINK_TYPE, LINK_POWER,
+ // LOG_FEATURE_VARIANCE_DISBALANCE, AVG_LINEAR_FORM,
ST_DEV_LINEAR_FORM, DISPERSION
+ Object[][] data = new Object[][] {
+ // #RECS #FTRS DFM VPOW LNK LPOW LFVD
AVGLT STDLT DISP
+ // Both DML and R work and compute close
results:
+ { 100000, 50, 1, 0.0, 1, 0.0, 3.0,
10.0, 2.0, 2.5 }, // Gaussian.log
+ //{ 10000, 100, 1, 0.0, 1, 1.0, 3.0,
0.0, 2.0, 2.5 }, // Gaussian.id
+ //{ 20000, 100, 1, 0.0, 1, -1.0, 0.0,
0.2, 0.03, 2.5 }, // Gaussian.inverse
+ { 10000, 100, 1, 1.0, 1, 0.0, 3.0,
0.0, 1.0, 2.5 }, // Poisson.log
+ //{ 100000, 10, 1, 1.0, 1, 0.0, 3.0,
0.0, 50.0, 2.5 }, // Poisson.log // Pr[0|x] gets near 1
+ //{ 20000, 100, 1, 1.0, 1, 0.5, 3.0,
10.0, 2.0, 2.5 }, // Poisson.sqrt
+ //{ 20000, 100, 1, 1.0, 1, 1.0, 3.0,
50.0, 10.0, 2.5 }, // Poisson.id
+ { 100000, 50, 1, 2.0, 1, 0.0, 3.0,
0.0, 2.0, 2.5 }, // Gamma.log
+ //{ 100000, 50, 1, 2.0, 1, -1.0, 1.0,
2.0, 0.4, 1.5 }, // Gamma.inverse
+ //{ 10000, 100, 1, 3.0, 1, -2.0, 3.0,
50.0, 7.0, 1.7 }, // InvGaussian.1/mu^2
+ //{ 10000, 100, 1, 3.0, 1, -1.0, 3.0,
10.0, 2.0, 2.5 }, // InvGaussian.inverse
+ //{ 100000, 50, 1, 3.0, 1, 0.0, 2.0,
-2.0, 1.0, 1.7 }, // InvGaussian.log
+ //{ 100000, 50, 1, 3.0, 1, 1.0, 1.0,
0.2, 0.04, 1.7 }, // InvGaussian.id
+
+ { 100000, 50, 2, -1.0, 1, 0.0, 3.0,
-5.0, 1.0, 1.0 }, // Bernoulli {-1, 1}.log // Note: Y is sparse
+ //{ 100000, 50, 2, -1.0, 1, 1.0, 1.0,
0.6, 0.1, 1.0 }, // Bernoulli {-1, 1}.id
+ //{ 100000, 50, 2, -1.0, 1, 0.5, 0.0,
0.4, 0.05, 1.0 }, // Bernoulli {-1, 1}.sqrt
+ { 10000, 100, 2, -1.0, 2, 0.0, 3.0,
0.0, 2.0, 1.0 }, // Bernoulli {-1, 1}.logit
+ //{ 10000, 100, 2, -1.0, 2, 0.0, 3.0,
0.0, 50.0, 1.0 }, // Bernoulli {-1, 1}.logit // Pr[y|x] near 0, 1
+ { 20000, 100, 2, -1.0, 3, 0.0, 3.0,
0.0, 2.0, 1.0 }, // Bernoulli {-1, 1}.probit
+ //{ 100000, 10, 2, -1.0, 3, 0.0, 3.0,
0.0, 50.0, 1.0 }, // Bernoulli {-1, 1}.probit // Pr[y|x] near 0, 1
+ //{ 10000, 100, 2, -1.0, 4, 0.0, 3.0,
-2.0, 1.0, 1.0 }, // Bernoulli {-1, 1}.cloglog
+ //{ 50000, 20, 2, -1.0, 4, 0.0, 3.0,
-2.0, 50.0, 1.0 }, // Bernoulli {-1, 1}.cloglog // Pr[y|x] near 0, 1
+ //{ 50000, 100, 2, -1.0, 5, 0.0, 3.0,
0.0, 2.0, 1.0 }, // Bernoulli {-1, 1}.cauchit
+
+ { 100000, 50, 2, 1.0, 1, 0.0, 3.0,
-5.0, 1.0, 2.5 }, // Binomial two-column.log // Note: Y is sparse
+ //{ 10000, 100, 2, 1.0, 1, 1.0, 0.0,
0.4, 0.05, 2.5 }, // Binomial two-column.id
+ //{ 100000, 50, 2, 1.0, 1, 0.5, 0.0,
0.4, 0.05, 2.5 }, // Binomial two-column.sqrt
+ { 10000, 100, 2, 1.0, 2, 0.0, 3.0,
0.0, 2.0, 2.5 }, // Binomial two-column.logit
+ { 20000, 100, 2, 1.0, 3, 0.0, 3.0,
0.0, 2.0, 2.5 }, // Binomial two-column.probit
+ //{ 10000, 100, 2, 1.0, 4, 0.0, 3.0,
-2.0, 1.0, 2.5 }, // Binomial two-column.cloglog
+ //{ 20000, 100, 2, 1.0, 5, 0.0, 3.0,
0.0, 2.0, 2.5 }, // Binomial two-column.cauchit
+ };
+ return Arrays.asList(data);
+ }
+}
diff --git a/src/test/scripts/functions/builtin/glmTest.R
b/src/test/scripts/functions/builtin/glmTest.R
new file mode 100644
index 0000000..5d4c525
--- /dev/null
+++ b/src/test/scripts/functions/builtin/glmTest.R
@@ -0,0 +1,139 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+#
+# Intended to solve GLM Regression using R, in order to compare against the
DML implementation
+# INPUT 1: Matrix X [rows, columns]
+# and Matrix y [rows, 1]
+# INPUT 2-5: Distribution family and link, see below:
+# ---------------------------------------------
+# Dst Var Lnk Lnk Distribution Cano-
+# typ pow typ pow Family.link nical?
+# ---------------------------------------------
+# 1 0.0 1 -1.0 Gaussian.inverse
+# 1 0.0 1 0.0 Gaussian.log
+# 1 0.0 1 1.0 Gaussian.id Yes
+# 1 1.0 1 0.0 Poisson.log Yes
+# 1 1.0 1 0.5 Poisson.sqrt
+# 1 1.0 1 1.0 Poisson.id
+# 1 2.0 1 -1.0 Gamma.inverse Yes
+# 1 2.0 1 0.0 Gamma.log
+# 1 2.0 1 1.0 Gamma.id
+# 1 3.0 1 -2.0 InvGaussian.1/mu^2 Yes
+# 1 3.0 1 -1.0 InvGaussian.inverse
+# 1 3.0 1 0.0 InvGaussian.log
+# 1 3.0 1 1.0 InvGaussian.id
+# 1 * 1 * AnyVariance.AnyLink
+# ---------------------------------------------
+# 2 -1.0 * * Binomial {-1, 1}
+# 2 0.0 * * Binomial { 0, 1}
+# 2 1.0 * * Binomial two-column
+# 2 * 1 0.0 Binomial.log
+# 2 * 2 * Binomial.logit Yes
+# 2 * 3 * Binomial.probit
+# 2 * 4 * Binomial.cloglog
+# 2 * 5 * Binomial.cauchit
+# ---------------------------------------------
+# INPUT 2: (int) Distribution type
+# INPUT 3: (double) For Power families: Variance power of the mean
+# INPUT 4: (int) Link function type
+# INPUT 5: (double) Link as power of the mean
+# INPUT 6: (int) Intercept: 0 = no, 1 = yes
+# INPUT 7: (double) tolerance (epsilon)
+# INPUT 8: the regression coefficients output file
+# OUTPUT : Matrix beta [columns, 1]
+#
+# Assume that $GLMR_HOME is set to the home of the R script
+# Assume input and output directories are $GLMR_HOME/in/ and
$GLMR_HOME/expected/
+# Rscript $GLMR_HOME/GLM.R $GLMR_HOME/in/X.mtx $GLMR_HOME/in/y.mtx 2 0.0 2 0.0
1 0.00000001 $GLMR_HOME/expected/w.mtx
+
+args <- commandArgs (TRUE);
+
+library ("Matrix");
+# library ("batch");
+
+options (warn = -1);
+
+X_here <- as.matrix(readMM(args[1]))
+y_here <- as.matrix(readMM(args[2]))
+
+num_records <- nrow (X_here);
+num_features <- ncol (X_here);
+dist_type <- as.integer (args[3]);
+dist_param <- as.numeric (args[4]);
+link_type <- as.integer (args[5]);
+link_power <- as.numeric (args[6]);
+icept <- as.integer (args[7]);
+eps_n <- as.numeric (args[8]);
+
+f_ly <- gaussian ();
+var_power <- dist_param;
+
+if (dist_type == 1 & var_power == 0.0 & link_type == 1 & link_power == 1.0) {
f_ly <- gaussian (link = "identity"); } else
+if (dist_type == 1 & var_power == 0.0 & link_type == 1 & link_power == -1.0) {
f_ly <- gaussian (link = "inverse"); } else
+if (dist_type == 1 & var_power == 0.0 & link_type == 1 & link_power == 0.0) {
f_ly <- gaussian (link = "log"); } else
+if (dist_type == 1 & var_power == 1.0 & link_type == 1 & link_power == 1.0) {
f_ly <- poisson (link = "identity"); } else
+if (dist_type == 1 & var_power == 1.0 & link_type == 1 & link_power == 0.0) {
f_ly <- poisson (link = "log"); } else
+if (dist_type == 1 & var_power == 1.0 & link_type == 1 & link_power == 0.5) {
f_ly <- poisson (link = "sqrt"); } else
+if (dist_type == 1 & var_power == 2.0 & link_type == 1 & link_power == 1.0) {
f_ly <- Gamma (link = "identity"); } else
+if (dist_type == 1 & var_power == 2.0 & link_type == 1 & link_power == -1.0) {
f_ly <- Gamma (link = "inverse"); } else
+if (dist_type == 1 & var_power == 2.0 & link_type == 1 & link_power == 0.0) {
f_ly <- Gamma (link = "log"); } else
+if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power == 1.0) {
f_ly <- inverse.gaussian (link = "identity"); } else
+if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power == -1.0) {
f_ly <- inverse.gaussian (link = "inverse"); } else
+if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power == 0.0) {
f_ly <- inverse.gaussian (link = "log"); } else
+if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power == -2.0) {
f_ly <- inverse.gaussian (link = "1/mu^2"); } else
+if (dist_type == 2 & link_type == 1 & link_power == 0.0) {
f_ly <- binomial (link = "log"); } else
+if (dist_type == 2 & link_type == 1 & link_power == 1.0) {
f_ly <- binomial (link = "identity"); } else
+if (dist_type == 2 & link_type == 1 & link_power == 0.5) {
f_ly <- binomial (link = "sqrt"); } else
+if (dist_type == 2 & link_type == 2 ) {
f_ly <- binomial (link = "logit"); } else
+if (dist_type == 2 & link_type == 3 ) {
f_ly <- binomial (link = "probit"); } else
+if (dist_type == 2 & link_type == 4 ) {
f_ly <- binomial (link = "cloglog"); } else
+if (dist_type == 2 & link_type == 5 ) {
f_ly <- binomial (link = "cauchit"); }
+
+# quasi(link = "identity", variance = "constant")
+# quasibinomial(link = "logit")
+# quasipoisson(link = "log")
+
+if (dist_type == 2 & dist_param != 1.0) {
+ y_here <- (y_here - dist_param) / (1.0 - dist_param);
+}
+
+# epsilon tolerance: the iterations converge when |dev - devold|/(|dev| +
0.1) < epsilon.
+# maxit integer giving the maximal number of IWLS iterations.
+# trace logical indicating if output should be produced for each
iteration.
+#
+c_rol <- glm.control (epsilon = eps_n, maxit = 100, trace = FALSE);
+
+X_matrix = as.matrix (X_here);
+y_matrix = as.matrix (y_here);
+
+if (icept == 0) {
+ glmOut <- glm (y_matrix ~ X_matrix - 1, family = f_ly, control = c_rol); #
zero intercept
+ betas <- coef (glmOut);
+} else {
+ glmOut <- glm (y_matrix ~ X_matrix , family = f_ly, control = c_rol);
+ betas <- coef (glmOut);
+ beta_intercept = betas [1];
+ betas [1 : num_features] = betas [2 : (num_features + 1)];
+ betas [num_features + 1] = beta_intercept;
+}
+
+print (c("Deviance", glmOut$deviance));
+writeMM (as (betas, "CsparseMatrix"), args[9], format = "text");
\ No newline at end of file
diff --git a/src/test/scripts/functions/builtin/glmTest.dml
b/src/test/scripts/functions/builtin/glmTest.dml
new file mode 100644
index 0000000..a5549a8
--- /dev/null
+++ b/src/test/scripts/functions/builtin/glmTest.dml
@@ -0,0 +1,25 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied. See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($X);
+Y = read($Y);
+betas= glm(X=X, Y=Y, dfam = $dfam, vpow = $vpow, link = $link, lpow = $lpow,
yneg = $yneg, icpt = $icpt, disp=$disp, reg = $reg, tol = $tol, moi = $moi,
mii=$mii, verbose=TRUE)
+write(betas, $B)
\ No newline at end of file