[systemml] branch master updated: [SYSTEMDS-391] New built-in GLM function (Generalized Linear Model)

mboehm7 Thu, 30 Apr 2020 14:44:14 -0700

This is an automated email from the ASF dual-hosted git repository.

mboehm7 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemml.git



The following commit(s) were added to refs/heads/master by this push:
     new c7ce2ff  [SYSTEMDS-391] New built-in GLM function (Generalized Linear 
Model)
c7ce2ff is described below

commit c7ce2ff95f306284363faf64321b1d2f36bbbeb4
Author: Shafaq Siddiqi <[email protected]>
AuthorDate: Thu Apr 30 22:30:11 2020 +0200

    [SYSTEMDS-391] New built-in GLM function (Generalized Linear Model)
    
    Closes #888.
---
 docs/Tasks.txt                                     |    3 +
 scripts/algorithms/StepGLM.dml                     |   16 +-
 scripts/builtin/glm.dml                            | 1118 ++++++++++++++++++++
 .../java/org/apache/sysds/common/Builtins.java     |    1 +
 src/test/java/org/apache/sysds/test/TestUtils.java |  383 +++++++
 .../apache/sysds/test/applications/GLMTest.java    |  417 +-------
 .../test/functions/builtin/BuiltinGLMTest.java     |  269 +++++
 src/test/scripts/functions/builtin/glmTest.R       |  139 +++
 src/test/scripts/functions/builtin/glmTest.dml     |   25 +
 9 files changed, 1955 insertions(+), 416 deletions(-)

diff --git a/docs/Tasks.txt b/docs/Tasks.txt
index 35d07e6..86ab8fe 100644
--- a/docs/Tasks.txt
+++ b/docs/Tasks.txt
@@ -290,5 +290,8 @@ SYSTEMDS-370 Lossy Compression Blocks
 SYSTEMDS-380 Memory Footprint
  * 371 Matrix Block Memory footprint update
 
+SYSTEMDS-390 New Builtin Functions IV
+ * 391 New GLM builtin-in function (from algorithms)                  OK
+
 Others:
  * Break append instruction to cbind and rbind 
diff --git a/scripts/algorithms/StepGLM.dml b/scripts/algorithms/StepGLM.dml
index 2ce5a1b..213f373 100644
--- a/scripts/algorithms/StepGLM.dml
+++ b/scripts/algorithms/StepGLM.dml
@@ -127,18 +127,18 @@ if (dir == "forward") {
  
        if (intercept_status == 0) {
                # Compute AIC of an empty model with no features and no 
intercept (all Ys are zero)
-               [AIC_best] = glm (X_global, Y, 0, num_features, 
columns_fixed_ordered, " ");
+               [AIC_best] = glm_fit (X_global, Y, 0, num_features, 
columns_fixed_ordered, " ");
        } else {
                # compute AIC of an empty model with only intercept (all Ys are 
constant)
                all_ones = matrix (1, rows = num_records, cols = 1);
-               [AIC_best] = glm (all_ones, Y, 0, num_features, 
columns_fixed_ordered, " ");
+               [AIC_best] = glm_fit (all_ones, Y, 0, num_features, 
columns_fixed_ordered, " ");
        }
        print ("Best AIC without any features: " + AIC_best);
   
        # First pass to examine single features
        AICs = matrix (AIC_best, rows = 1, cols = num_features);
        parfor (i in 1:num_features) {  
-               [AIC_1] = glm (X_orig[,i], Y, intercept_status, num_features, 
columns_fixed_ordered, " ");
+               [AIC_1] = glm_fit (X_orig[,i], Y, intercept_status, 
num_features, columns_fixed_ordered, " ");
                AICs[1,i] = AIC_1;
        }
   
@@ -156,11 +156,11 @@ if (dir == "forward") {
                print ("AIC of an empty model is " + AIC_best + " and adding no 
feature achieves more than " + (thr * 100) + "% decrease in AIC!");
                if (intercept_status == 0) {
                        # Compute AIC of an empty model with no features and no 
intercept (all Ys are zero)
-                       [AIC_best] = glm (X_global, Y, 0, num_features, 
columns_fixed_ordered, fileB);
+                       [AIC_best] = glm_fit (X_global, Y, 0, num_features, 
columns_fixed_ordered, fileB);
                } else {
                        # compute AIC of an empty model with only intercept 
(all Ys are constant)
                        ###all_ones = matrix (1, rows = num_records, cols = 1);
-                       [AIC_best] = glm (all_ones, Y, 0, num_features, 
columns_fixed_ordered, fileB);
+                       [AIC_best] = glm_fit (all_ones, Y, 0, num_features, 
columns_fixed_ordered, fileB);
                }
        };
   
@@ -177,7 +177,7 @@ if (dir == "forward") {
                                # Construct the feature matrix
                                X = cbind (X_global, X_orig[,i]);
         
-                               [AIC_2] = glm (X, Y, intercept_status, 
num_features, columns_fixed_ordered, " ");
+                               [AIC_2] = glm_fit (X, Y, intercept_status, 
num_features, columns_fixed_ordered, " ");
                                AICs[1,i] = AIC_2;
                        }               
                }
@@ -209,7 +209,7 @@ if (dir == "forward") {
   
        # run GLM with selected set of features
        print ("Running GLM with selected features...");
-       [AIC] = glm (X_global, Y, intercept_status, num_features, 
columns_fixed_ordered, fileB);
+       [AIC] = glm_fit (X_global, Y, intercept_status, num_features, 
columns_fixed_ordered, fileB);
   
 } else {
        stop ("Currently only forward selection strategy is supported!");
@@ -218,7 +218,7 @@ if (dir == "forward") {
 
 ################### UDFS USED IN THIS SCRIPT ##################
 
-glm = function (Matrix[Double] X, Matrix[Double] Y, Int intercept_status, 
Double num_features_orig, Matrix[Double] Selected, String fileB) return (Double 
AIC) {
+glm_fit = function (Matrix[Double] X, Matrix[Double] Y, Int intercept_status, 
Double num_features_orig, Matrix[Double] Selected, String fileB) return (Double 
AIC) {
                
        # distribution family code: 1 = Power, 2 = Bernoulli/Binomial; 
currently only Bernouli distribution family is supported!                
        distribution_type = 2;                          # $dfam = 2;
diff --git a/scripts/builtin/glm.dml b/scripts/builtin/glm.dml
new file mode 100644
index 0000000..e2b5255
--- /dev/null
+++ b/scripts/builtin/glm.dml
@@ -0,0 +1,1118 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+# 
+# THIS BUILTIN SOLVES GLM REGRESSION USING NEWTON/FISHER SCORING WITH TRUST 
REGIONS
+#
+# INPUT PARAMETERS:
+# 
---------------------------------------------------------------------------------------------
+# NAME  TYPE   DEFAULT  MEANING
+# 
---------------------------------------------------------------------------------------------
+# X     Double  ---     matrix X of feature vectors
+# Y     Double  ---     matrix Y with either 1 or 2 columns:
+#                       if dfam = 2, Y is 1-column Bernoulli or 2-column 
Binomial (#pos, #neg)
+# dfam  Int     1       Distribution family code: 1 = Power, 2 = Binomial
+# vpow  Double  0.0     Power for Variance defined as (mean)^power (ignored if 
dfam != 1):
+#                       0.0 = Gaussian, 1.0 = Poisson, 2.0 = Gamma, 3.0 = 
Inverse Gaussian
+# link  Int     0       Link function code: 0 = canonical (depends on 
distribution),
+#                       1 = Power, 2 = Logit, 3 = Probit, 4 = Cloglog, 5 = 
Cauchit
+# lpow  Double  1.0     Power for Link function defined as (mean)^power 
(ignored if link != 1):
+#                       -2.0 = 1/mu^2, -1.0 = reciprocal, 0.0 = log, 0.5 = 
sqrt, 1.0 = identity
+# yneg  Double  0.0     Response value for Bernoulli "No" label, usually 0.0 
or -1.0
+# icpt  Int     0       Intercept presence, X columns shifting and rescaling:
+#                       0 = no intercept, no shifting, no rescaling;
+#                       1 = add intercept, but neither shift nor rescale X;
+#                       2 = add intercept, shift & rescale X columns to mean = 
0, variance = 1
+# reg   Double  0.0     Regularization parameter (lambda) for L2 regularization
+# tol   Double 0.000001 Tolerance (epsilon)
+# disp  Double  0.0     (Over-)dispersion value, or 0.0 to estimate it from 
data
+# moi   Int     200     Maximum number of outer (Newton / Fisher Scoring) 
iterations
+# mii   Int     0       Maximum number of inner (Conjugate Gradient) 
iterations, 0 = no maximum
+# 
---------------------------------------------------------------------------------------------
+# 
+# OTUPUT:
+# 
---------------------------------------------------------------------------------------------
+# NAME   TYPE     MEANING
+# 
---------------------------------------------------------------------------------------------
+# beta   Double   Matrix beta, whose size depends on icpt:
+#                 icpt=0: ncol(X) x 1;  icpt=1: (ncol(X) + 1) x 1;  icpt=2: 
(ncol(X) + 1) x 2
+#----------------------------------------------------------------------------------------------
+# In addition, some GLM statistics are provided as console output by setting 
verbose = TRUE, one comma-separated name-value
+# pair per each line, as follows:
+#
+# NAME                  MEANING
+# 
-------------------------------------------------------------------------------------------
+# TERMINATION_CODE      A positive integer indicating success/failure as 
follows:
+#                       1 = Converged successfully; 2 = Maximum number of 
iterations reached; 
+#                       3 = Input (X, Y) out of range; 4 = Distribution/link 
is not supported
+# BETA_MIN              Smallest beta value (regression coefficient), 
excluding the intercept
+# BETA_MIN_INDEX        Column index for the smallest beta value
+# BETA_MAX              Largest beta value (regression coefficient), excluding 
the intercept
+# BETA_MAX_INDEX        Column index for the largest beta value
+# INTERCEPT             Intercept value, or NaN if there is no intercept (if 
icpt=0)
+# DISPERSION            Dispersion used to scale deviance, provided as "disp" 
input parameter
+#                       or estimated (same as DISPERSION_EST) if the "disp" 
parameter is <= 0
+# DISPERSION_EST        Dispersion estimated from the dataset
+# DEVIANCE_UNSCALED     Deviance from the saturated model, assuming dispersion 
== 1.0
+# DEVIANCE_SCALED       Deviance from the saturated model, scaled by the 
DISPERSION value
+# 
-------------------------------------------------------------------------------------------
+#
+# The Log file, when requested, contains the following per-iteration variables 
in CSV format,
+# each line containing triple (NAME, ITERATION, VALUE) with ITERATION = 0 for 
initial values:
+#
+# NAME                  MEANING
+# 
-------------------------------------------------------------------------------------------
+# NUM_CG_ITERS          Number of inner (Conj.Gradient) iterations in this 
outer iteration
+# IS_TRUST_REACHED      1 = trust region boundary was reached, 0 = otherwise
+# POINT_STEP_NORM       L2-norm of iteration step from old point (i.e. "beta") 
to new point
+# OBJECTIVE             The loss function we minimize (i.e. negative partial 
log-likelihood)
+# OBJ_DROP_REAL         Reduction in the objective during this iteration, 
actual value
+# OBJ_DROP_PRED         Reduction in the objective predicted by a quadratic 
approximation
+# OBJ_DROP_RATIO        Actual-to-predicted reduction ratio, used to update 
the trust region
+# GRADIENT_NORM         L2-norm of the loss function gradient (NOTE: sometimes 
omitted)
+# LINEAR_TERM_MIN       The minimum value of X %*% beta, used to check for 
overflows
+# LINEAR_TERM_MAX       The maximum value of X %*% beta, used to check for 
overflows
+# IS_POINT_UPDATED      1 = new point accepted; 0 = new point rejected, old 
point restored
+# TRUST_DELTA           Updated trust region size, the "delta"
+# 
-------------------------------------------------------------------------------------------
+# 
+#
+# SOME OF THE SUPPORTED GLM DISTRIBUTION FAMILIES
+# AND LINK FUNCTIONS:
+# -----------------------------------------------
+# INPUT PARAMETERS:    MEANING:            Cano-
+# dfam vpow link lpow  Distribution.link   nical?
+# -----------------------------------------------
+#  1   0.0   1  -1.0   Gaussian.inverse
+#  1   0.0   1   0.0   Gaussian.log
+#  1   0.0   1   1.0   Gaussian.id          Yes
+#  1   1.0   1   0.0   Poisson.log          Yes
+#  1   1.0   1   0.5   Poisson.sqrt
+#  1   1.0   1   1.0   Poisson.id
+#  1   2.0   1  -1.0   Gamma.inverse        Yes
+#  1   2.0   1   0.0   Gamma.log
+#  1   2.0   1   1.0   Gamma.id
+#  1   3.0   1  -2.0   InvGaussian.1/mu^2   Yes
+#  1   3.0   1  -1.0   InvGaussian.inverse
+#  1   3.0   1   0.0   InvGaussian.log
+#  1   3.0   1   1.0   InvGaussian.id
+#  1    *    1    *    AnyVariance.AnyLink
+# -----------------------------------------------
+#  2    *    1   0.0   Binomial.log
+#  2    *    1   0.5   Binomial.sqrt
+#  2    *    2    *    Binomial.logit       Yes
+#  2    *    3    *    Binomial.probit
+#  2    *    4    *    Binomial.cloglog
+#  2    *    5    *    Binomial.cauchit
+# -----------------------------------------------
+
+m_glm = function(Matrix[Double] X, Matrix[Double] Y, Integer dfam=1, 
+  Double vpow=0.0, Integer link=0, Double lpow=1.0, Double yneg=0.0,
+  Integer icpt = 0, Double disp=0.0, Double reg=0.0, Double tol=0.000001,
+  Integer moi=200, Integer mii=0, Boolean verbose=TRUE)
+  return(Matrix[Double] betas)
+{ 
+  distribution_type = dfam;
+  variance_as_power_of_the_mean = vpow;
+  link_type = link;
+  link_as_power_of_the_mean = lpow;
+  bernoulli_No_label = yneg;
+  intercept_status = icpt;
+  dispersion = disp;
+  regularization = reg;
+  eps = tol;
+  max_iteration_IRLS = moi;
+  max_iteration_CG = mii;
+
+  # variance_as_power_of_the_mean = as.double (variance_as_power_of_the_mean);
+  # link_as_power_of_the_mean = as.double (link_as_power_of_the_mean);
+  # bernoulli_No_label = as.double (bernoulli_No_label);
+  # dispersion = as.double (dispersion);
+  # eps = as.double (eps);
+
+
+  # Default values for output statistics:
+  termination_code     = 0;
+  min_beta             = NaN;
+  i_min_beta           = NaN;
+  max_beta             = NaN;
+  i_max_beta           = NaN;
+  intercept_value      = NaN;
+  dispersion           = NaN;
+  estimated_dispersion = NaN;
+  deviance_nodisp      = NaN;
+  deviance             = NaN;
+
+  print("BEGIN GLM SCRIPT");
+
+  num_records  = nrow (X);
+  num_features = ncol (X);
+  zeros_r = matrix (0, rows = num_records, cols = 1);
+  ones_r = 1 + zeros_r;
+
+  # Introduce the intercept, shift and rescale the columns of X if needed
+
+  if (intercept_status == 1 | intercept_status == 2)  # add the intercept 
column
+  {
+    X = cbind (X, ones_r);
+    num_features = ncol (X);
+  }
+
+  scale_lambda = matrix (1, rows = num_features, cols = 1);
+  if (intercept_status == 1 | intercept_status == 2)
+  {
+    scale_lambda [num_features, 1] = 0;
+  }
+
+  if (intercept_status == 2)  # scale-&-shift X columns to mean 0, variance 1
+  {                           # Important assumption: X [, num_features] = 
ones_r
+    avg_X_cols = t(colSums(X)) / num_records;
+    var_X_cols = (t(colSums (X ^ 2)) - num_records * (avg_X_cols ^ 2)) / 
(num_records - 1);
+    is_unsafe = (var_X_cols <= 0);
+    scale_X = 1.0 / sqrt (var_X_cols * (1 - is_unsafe) + is_unsafe);
+    scale_X [num_features, 1] = 1;
+    shift_X = - avg_X_cols * scale_X;
+    shift_X [num_features, 1] = 0;
+    rowSums_X_sq = (X ^ 2) %*% (scale_X ^ 2) + X %*% (2 * scale_X * shift_X) + 
sum (shift_X ^ 2);
+  }
+  else {
+    scale_X = matrix (1, rows = num_features, cols = 1);
+    shift_X = matrix (0, rows = num_features, cols = 1);
+    rowSums_X_sq = rowSums (X ^ 2);
+  }
+
+  # Henceforth we replace "X" with "X %*% (SHIFT/SCALE TRANSFORM)" and 
rowSums(X ^ 2)
+  # with "rowSums_X_sq" in order to preserve the sparsity of X under shift and 
scale.
+  # The transform is then associatively applied to the other side of the 
expression,
+  # and is rewritten via "scale_X" and "shift_X" as follows:
+  #
+  # ssX_A  = (SHIFT/SCALE TRANSFORM) %*% A    --- is rewritten as:
+  # ssX_A  = diag (scale_X) %*% A;
+  # ssX_A [num_features, ] = ssX_A [num_features, ] + t(shift_X) %*% A;
+  #
+  # tssX_A = t(SHIFT/SCALE TRANSFORM) %*% A   --- is rewritten as:
+  # tssX_A = diag (scale_X) %*% A + shift_X %*% A [num_features, ];
+
+  # Initialize other input-dependent parameters
+
+  lambda = scale_lambda * regularization;
+  if (max_iteration_CG == 0) {
+    max_iteration_CG = num_features;
+  }
+
+  # In Bernoulli case, convert one-column "Y" into two-column
+
+  if (distribution_type == 2 & ncol(Y) == 1)
+  {
+    is_Y_negative = (Y == bernoulli_No_label);
+    Y = cbind (1 - is_Y_negative, is_Y_negative);
+    count_Y_negative = sum (is_Y_negative);
+    if (count_Y_negative == 0) {
+      stop ("GLM Input Error: all Y-values encode Bernoulli YES-label, none 
encode NO-label");
+    }
+    if (count_Y_negative == nrow(Y)) {
+      stop ("GLM Input Error: all Y-values encode Bernoulli NO-label, none 
encode YES-label");
+    }
+  }
+
+  # Set up the canonical link, if requested [Then we have: Var(mu) * (d link / 
d mu) = const]
+
+  if (link_type == 0)
+  {
+    if (distribution_type == 1) {
+      link_type = 1;
+      link_as_power_of_the_mean = 1.0 - variance_as_power_of_the_mean;
+    } 
+    else if (distribution_type == 2) {
+      link_type = 2;
+    }
+  }   
+
+# For power distributions and/or links, we use two constants,
+# "variance as power of the mean" and "link_as_power_of_the_mean",
+# to specify the variance and the link as arbitrary powers of the
+# mean.  However, the variance-powers of 1.0 (Poisson family) and
+# 2.0 (Gamma family) have to be treated as special cases, because
+# these values integrate into logarithms.  The link-power of 0.0
+# is also special as it represents the logarithm link.
+
+  num_response_columns = ncol (Y);
+
+  is_supported = check_if_supported (num_response_columns, distribution_type, 
variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+  if (is_supported == 1)
+  {
+
+    #####   INITIALIZE THE BETAS   #####
+
+    [beta, saturated_log_l, isNaN] = glm_initialize (X, Y, distribution_type, 
+      variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean, 
+      intercept_status, max_iteration_CG);
+    if (isNaN == 0)
+    {
+
+    #####  START OF THE MAIN PART  #####
+
+      sum_X_sq = sum (rowSums_X_sq);
+      trust_delta = 0.5 * sqrt (num_features) / max (sqrt (rowSums_X_sq));
+      ###  max_trust_delta = trust_delta * 10000.0;
+      log_l = 0.0;
+      deviance_nodisp = 0.0;
+      new_deviance_nodisp = 0.0;
+      isNaN_log_l = 2;
+      newbeta = beta;
+      g = matrix (0.0, rows = num_features, cols = 1);
+      g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
+      accept_new_beta = 1;
+      reached_trust_boundary = 0;
+      neg_log_l_change_predicted = 0.0;
+      i_IRLS = 0;
+
+      print ("BEGIN IRLS ITERATIONS...");
+
+      ssX_newbeta = diag (scale_X) %*% newbeta;
+      ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + t(shift_X) 
%*% newbeta;
+      all_linear_terms = X %*% ssX_newbeta;
+
+      [new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
+        (all_linear_terms, Y, distribution_type, 
variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+
+      if (isNaN_new_log_l == 0) {
+        new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
+        new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
+      }
+
+      # set w to avoid 'Initialization of w depends on if-else/while 
execution' warnings
+      w = matrix (0.0, rows=1, cols=1);
+      while (termination_code == 0)
+      {
+        accept_new_beta = 1;
+    
+        if (i_IRLS > 0)
+        {
+          if (isNaN_log_l == 0) {
+            accept_new_beta = 0;
+          }
+
+        # Decide whether to accept a new iteration point and update the trust 
region
+        # See Alg. 4.1 on p. 69 of "Numerical Optimization" 2nd ed. by Nocedal 
and Wright
+
+          rho = (- new_log_l + log_l) / neg_log_l_change_predicted;
+          if (rho < 0.25 | isNaN_new_log_l == 1) {
+            trust_delta = 0.25 * trust_delta;
+          }
+          if (rho > 0.75 & isNaN_new_log_l == 0 & reached_trust_boundary == 1) 
{
+            trust_delta = 2 * trust_delta;
+            
+        ### if (trust_delta > max_trust_delta) {
+        ###     trust_delta = max_trust_delta;
+        ### }
+
+          }
+          if (rho > 0.1 & isNaN_new_log_l == 0) {
+            accept_new_beta = 1;
+          }
+        }
+
+        if (accept_new_beta == 1)
+        {
+          beta = newbeta;  log_l = new_log_l;  deviance_nodisp = 
new_deviance_nodisp;  isNaN_log_l = isNaN_new_log_l;
+        
+          [g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, 
variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+        
+          # We introduced these variables to avoid roundoff errors:
+          #     g_Y = y_residual / (y_var * link_grad);
+          #     w   = 1.0 / (y_var * link_grad * link_grad);
+                      
+          gXY = - t(X) %*% g_Y;
+          g = diag (scale_X) %*% gXY + shift_X %*% gXY [num_features, ];
+          g_norm = sqrt (sum ((g + lambda * beta) ^ 2));
+        }
+    
+        [z, neg_log_l_change_predicted, num_CG_iters, reached_trust_boundary] 
= 
+          get_CG_Steihaug_point (X, scale_X, shift_X, w, g, beta, lambda, 
trust_delta, max_iteration_CG);
+
+        newbeta = beta + z;
+
+        ssX_newbeta = diag (scale_X) %*% newbeta;
+        ssX_newbeta [num_features, ] = ssX_newbeta [num_features, ] + 
t(shift_X) %*% newbeta;
+        all_linear_terms = X %*% ssX_newbeta;
+
+        [new_log_l, isNaN_new_log_l] = glm_log_likelihood_part
+            (all_linear_terms, Y, distribution_type, 
variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+
+        if (isNaN_new_log_l == 0) {
+          new_deviance_nodisp = 2.0 * (saturated_log_l - new_log_l);
+          new_log_l = new_log_l - 0.5 * sum (lambda * newbeta ^ 2);
+        }
+        
+        log_l_change = new_log_l - log_l;               # R's criterion for 
termination: |dev - devold|/(|dev| + 0.1) < eps
+
+        if (reached_trust_boundary == 0 & isNaN_new_log_l == 0 & 
+          (2.0 * abs (log_l_change) < eps * (deviance_nodisp + 0.1) | abs 
(log_l_change) < (abs (log_l) + abs (new_log_l)) * 0.00000000000001) )  
+        {
+          termination_code = 1;
+        }
+        rho = - log_l_change / neg_log_l_change_predicted;
+        z_norm = sqrt (sum (z * z));
+    
+        [z_norm_m, z_norm_e] = round_to_print (z_norm);
+        [trust_delta_m, trust_delta_e] = round_to_print (trust_delta);
+        [rho_m, rho_e] = round_to_print (rho);
+        [new_log_l_m, new_log_l_e] = round_to_print (new_log_l);
+        [log_l_change_m, log_l_change_e] = round_to_print (log_l_change);
+        [g_norm_m, g_norm_e] = round_to_print (g_norm);
+
+        i_IRLS = i_IRLS + 1;
+        if(verbose)
+          print ("Iter #" + i_IRLS + " completed"
+          + ", ||z|| = " + z_norm_m + "E" + z_norm_e
+          + ", trust_delta = " + trust_delta_m + "E" + trust_delta_e
+          + ", reached = " + reached_trust_boundary
+          + ", ||g|| = " + g_norm_m + "E" + g_norm_e
+          + ", new_log_l = " + new_log_l_m + "E" + new_log_l_e
+          + ", log_l_change = " + log_l_change_m + "E" + log_l_change_e
+          + ", rho = " + rho_m + "E" + rho_e);
+        
+        if (i_IRLS == max_iteration_IRLS) {
+          termination_code = 2;
+        }
+      } 
+
+      beta = newbeta;
+      log_l = new_log_l;
+      deviance_nodisp = new_deviance_nodisp;
+
+      if (termination_code == 1) {
+        print ("Converged in " + i_IRLS + " steps.");
+      }
+      else {
+        print ("Did not converge.");
+      }
+
+      ssX_beta = diag (scale_X) %*% beta;
+      ssX_beta [num_features, ] = ssX_beta [num_features, ] + t(shift_X) %*% 
beta;
+      if (intercept_status == 2) {
+        beta_out = cbind (ssX_beta, beta);
+      } else {
+        beta_out = ssX_beta;
+      }
+
+      betas = beta_out
+
+      if (intercept_status == 1 | intercept_status == 2) {
+        intercept_value = as.scalar (beta_out [num_features, 1]);
+        beta_noicept = beta_out [1 : (num_features - 1), 1];
+      } else {
+        beta_noicept = beta_out [1 : num_features, 1];
+      }
+      min_beta = min (beta_noicept);
+      max_beta = max (beta_noicept);
+      tmp_i_min_beta = rowIndexMin (t(beta_noicept))
+      i_min_beta = as.scalar (tmp_i_min_beta [1, 1]);
+      tmp_i_max_beta = rowIndexMax (t(beta_noicept))
+      i_max_beta = as.scalar (tmp_i_max_beta [1, 1]);
+
+      #####  OVER-DISPERSION PART  #####
+
+      all_linear_terms = X %*% ssX_beta;
+      [g_Y, w] = glm_dist (all_linear_terms, Y, distribution_type, 
variance_as_power_of_the_mean, link_type, link_as_power_of_the_mean);
+
+      pearson_residual_sq = g_Y ^ 2 / w;
+      pearson_residual_sq = replace (target = pearson_residual_sq, pattern = 
NaN, replacement = 0);
+      # pearson_residual_sq = (y_residual ^ 2) / y_var;
+
+      if (num_records > num_features) {
+        estimated_dispersion = sum (pearson_residual_sq) / (num_records - 
num_features);
+      }
+      if (dispersion <= 0) {
+        dispersion = estimated_dispersion;
+      }
+      deviance = deviance_nodisp / dispersion;
+
+      #####  END OF THE MAIN PART  #####
+
+    } else { print ("Input matrices are out of range.  Terminating the DML."); 
termination_code = 3; }
+  } else { print ("Distribution/Link not supported.  Terminating the DML."); 
termination_code = 4; }
+
+  str = "TERMINATION_CODE," + termination_code;
+  str = append (str, "BETA_MIN," + min_beta);
+  str = append (str, "BETA_MIN_INDEX," + i_min_beta);
+  str = append (str, "BETA_MAX," + max_beta);
+  str = append (str, "BETA_MAX_INDEX," + i_max_beta);
+  str = append (str, "INTERCEPT," + intercept_value);
+  str = append (str, "DISPERSION," + dispersion);
+  str = append (str, "DISPERSION_EST," + estimated_dispersion);
+  str = append (str, "DEVIANCE_UNSCALED," + deviance_nodisp);
+  str = append (str, "DEVIANCE_SCALED," + deviance);
+
+  if (verbose) {
+    print (str);
+  }
+}
+
+check_if_supported = function (int ncol_y, int dist_type, double var_power, 
int link_type, double link_power)
+return   (int is_supported)
+{
+  is_supported = 0;
+  if (ncol_y == 1 & dist_type == 1 & link_type == 1)
+  { # POWER DISTRIBUTION
+    is_supported = 1;
+    if (var_power == 0 & link_power == -1.0)        print 
("Gaussian.inverse");     
+    else if (var_power == 0 & link_power ==    0)   print ("Gaussian.log");    
       
+    else if (var_power == 0 & link_power ==  0.5)   print ("Gaussian.sqrt");   
        
+    else if (var_power == 0 & link_power ==  1.0)   print ("Gaussian.id");     
        
+    else if (var_power == 0                     )   print 
("Gaussian.power_nonlog");   
+    else if (var_power == 1.0 & link_power == -1.0) print ("Poisson.inverse"); 
         
+    else if (var_power == 1.0 & link_power ==    0) print ("Poisson.log");     
         
+    else if (var_power == 1.0 & link_power ==  0.5) print ("Poisson.sqrt");    
         
+    else if (var_power == 1.0 & link_power ==  1.0) print ("Poisson.id");      
         
+    else if (var_power == 1.0                     ) print 
("Poisson.power_nonlog");     
+    else if (var_power == 2.0 & link_power == -1.0) print ("Gamma.inverse");   
         
+    else if (var_power == 2.0 & link_power ==    0) print ("Gamma.log");       
         
+    else if (var_power == 2.0 & link_power ==  0.5) print ("Gamma.sqrt");      
         
+    else if (var_power == 2.0 & link_power ==  1.0) print ("Gamma.id");        
         
+    else if (var_power == 2.0                     ) print 
("Gamma.power_nonlog");      
+    else if (var_power == 3.0 & link_power == -2.0) print 
("InvGaussian.1/mu^2");       
+    else if (var_power == 3.0 & link_power == -1.0) print 
("InvGaussian.inverse");     
+    else if (var_power == 3.0 & link_power ==    0) print ("InvGaussian.log"); 
         
+    else if (var_power == 3.0 & link_power ==  0.5) print 
("InvGaussian.sqrt");         
+    else if (var_power == 3.0 & link_power ==  1.0) print ("InvGaussian.id");  
         
+    else if (var_power == 3.0                     ) print 
("InvGaussian.power_nonlog");
+    else if (                   link_power ==    0) print ("PowerDist.log");   
        
+    else                                            print 
("PowerDist.power_nonlog");
+  }
+  if (ncol_y == 1 & dist_type == 2)
+  {
+        print ("Error: Bernoulli response matrix has not been converted into 
two-column format.");
+  }
+  if (ncol_y == 2 & dist_type == 2 & link_type >= 1 & link_type <= 5)
+  { # BINOMIAL/BERNOULLI DISTRIBUTION
+    is_supported = 1;
+    if (link_type == 1 & link_power == -1.0)      print ("Binomial.inverse");  
   
+    else if (link_type == 1 & link_power ==    0) print ("Binomial.log");      
    
+    else if (link_type == 1 & link_power ==  0.5) print ("Binomial.sqrt");     
    
+    else if (link_type == 1 & link_power ==  1.0) print ("Binomial.id");       
    
+    else if (link_type == 1)                      print 
("Binomial.power_nonlog"); 
+    else if (link_type == 2)                      print ("Binomial.logit");    
    
+    else if (link_type == 3)                      print ("Binomial.probit");   
    
+    else if (link_type == 4)                      print ("Binomial.cloglog");  
    
+    else if (link_type == 5)                      print ("Binomial.cauchit");  
   
+  }   
+  if (is_supported == 0) {
+    print ("Response matrix with " + ncol_y + " columns, distribution family 
(" + dist_type + ", " + var_power
+             + ") and link family (" + link_type + ", " + link_power + ") are 
NOT supported together.");
+  }
+}
+
+glm_initialize = function (Matrix[double] X, Matrix[double] Y, int dist_type, 
double var_power, int link_type, double link_power, int icept_status, int 
max_iter_CG)
+return (Matrix[double] beta, double saturated_log_l, int isNaN)
+{
+  saturated_log_l = 0.0;
+  isNaN = 0;
+  y_corr = Y [, 1];
+  if (dist_type == 2) {
+    n_corr = rowSums (Y);
+    is_n_zero = (n_corr == 0);
+    y_corr = Y [, 1] / (n_corr + is_n_zero) + (0.5 - Y [, 1]) * is_n_zero;    
+  }
+  linear_terms = y_corr;
+  if (dist_type == 1 & link_type == 1) 
+  { # POWER DISTRIBUTION
+    if (link_power ==  0) {
+      if (sum (y_corr < 0) == 0) {
+        is_zero_y_corr = (y_corr == 0);
+        linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - 
is_zero_y_corr);
+      } else { isNaN = 1; }
+      } else if (link_power ==  1.0) {
+        linear_terms = y_corr;
+      } else if (link_power == -1.0) {
+        linear_terms = 1.0 / y_corr;
+      } else if (link_power ==  0.5) {
+        if (sum (y_corr < 0) == 0) {
+          linear_terms = sqrt (y_corr);
+        } else { isNaN = 1; }
+      } else if (link_power >   0) {
+        if (sum (y_corr < 0) == 0) {
+          is_zero_y_corr = (y_corr == 0);
+          linear_terms = (y_corr + is_zero_y_corr) ^ link_power - 
is_zero_y_corr;
+        } else { isNaN = 1; }
+      } else {
+        if (sum (y_corr <= 0) == 0) {
+          linear_terms = y_corr ^ link_power;
+        } else { isNaN = 1; }
+      }
+  }
+  if (dist_type == 2 & link_type >= 1 & link_type <= 5)
+  { # BINOMIAL/BERNOULLI DISTRIBUTION
+    if (link_type == 1 & link_power == 0)  { # Binomial.log
+      if (sum (y_corr < 0) == 0) {
+        is_zero_y_corr = (y_corr == 0);
+        linear_terms = log (y_corr + is_zero_y_corr) - is_zero_y_corr / (1.0 - 
is_zero_y_corr);
+      } else { isNaN = 1; }
+    } 
+    else if (link_type == 1 & link_power >  0)  { # Binomial.power_nonlog pos
+      if (sum (y_corr < 0) == 0) {
+        is_zero_y_corr = (y_corr == 0);
+        linear_terms = (y_corr + is_zero_y_corr) ^ link_power - is_zero_y_corr;
+        } else { isNaN = 1; }
+    } 
+    else if (link_type == 1)                      { # Binomial.power_nonlog neg
+      if (sum (y_corr <= 0) == 0) {
+        linear_terms = y_corr ^ link_power;
+      } else { isNaN = 1; }
+    } 
+    else 
+    { 
+      is_zero_y_corr = (y_corr <= 0);
+      is_one_y_corr  = (y_corr >= 1.0);
+      y_corr = y_corr * (1.0 - is_zero_y_corr) * (1.0 - is_one_y_corr) + 0.5 * 
(is_zero_y_corr + is_one_y_corr);
+      if (link_type == 2)                           { # Binomial.logit
+        linear_terms = log (y_corr / (1.0 - y_corr)) 
+          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - 
is_zero_y_corr);
+      }
+      else if (link_type == 3)                      { # Binomial.probit
+        y_below_half = y_corr + (1.0 - 2.0 * y_corr) * (y_corr > 0.5);
+        t = sqrt (- 2.0 * log (y_below_half));
+        approx_inv_Gauss_CDF = - t + (2.515517 + t * (0.802853 + t * 
0.010328)) / (1.0 + t * (1.432788 + t * (0.189269 + t * 0.001308)));
+        linear_terms = approx_inv_Gauss_CDF * (1.0 - 2.0 * (y_corr > 0.5))
+           + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - 
is_zero_y_corr);
+      } else if (link_type == 4)                    { # Binomial.cloglog
+        linear_terms = log (- log (1.0 - y_corr))
+          - log (- log (0.5)) * (is_zero_y_corr + is_one_y_corr)
+          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - 
is_zero_y_corr);
+      } 
+      else if (link_type == 5)                      { # Binomial.cauchit
+        linear_terms = tan ((y_corr - 0.5) * pi)
+          + is_one_y_corr / (1.0 - is_one_y_corr) - is_zero_y_corr / (1.0 - 
is_zero_y_corr);
+      }
+    }
+  }
+
+  if (isNaN == 0) {
+    [saturated_log_l, isNaN] = 
+      glm_log_likelihood_part (linear_terms, Y, dist_type, var_power, 
link_type, link_power);
+  }
+
+  if ((dist_type == 1 & link_type == 1 & link_power == 0) |
+      (dist_type == 2 & link_type >= 2))
+  {
+    desired_eta = 0.0;
+  }
+  else if (link_type == 1 & link_power == 0) {
+    desired_eta = log (0.5);
+  } else if (link_type == 1) {
+    desired_eta = 0.5 ^ link_power;
+  } else {
+    desired_eta = 0.5;
+  }
+
+  beta = matrix (0.0, rows = ncol(X), cols = 1);
+
+  if (desired_eta != 0) 
+  {
+    if (icept_status == 1 | icept_status == 2) {
+      beta [nrow(beta), 1] = desired_eta;
+    } else {
+      # We want: avg (X %*% ssX_transform %*% beta) = desired_eta
+      # Note that "ssX_transform" is trivial here, hence ignored 
+      beta = straightenX (X, 0.000001, max_iter_CG);  
+      beta = beta * desired_eta;
+    }
+  }
+}
+
+
+glm_dist = function (Matrix[double] linear_terms, Matrix[double] Y,
+                     int dist_type, double var_power, int link_type, double 
link_power)
+return (Matrix[double] g_Y, Matrix[double] w)
+{
+  # ORIGINALLY we returned more meaningful vectors, namely:
+  # Matrix[double] y_residual    : y - y_mean, i.e. y observed - y predicted
+  # Matrix[double] link_gradient : derivative of the link function
+  # Matrix[double] var_function  : variance without dispersion, i.e. the V(mu) 
function
+  # BUT, this caused roundoff errors, so we had to compute "directly useful" 
vectors
+  # and skip over the "meaningful intermediaries".  Now we output these two 
variables:
+  #     g_Y = y_residual / (var_function * link_gradient);
+  #     w   = 1.0 / (var_function * link_gradient ^ 2);
+  zeros_r = matrix(0, nrow(linear_terms), 1);
+  ones_r = matrix(1, nrow(linear_terms), 1);
+  g_Y  = zeros_r;
+  w  = zeros_r;
+
+  # Some constants
+
+  one_over_sqrt_two_pi = 0.39894228040143267793994605993438;
+  p_one_m_one = matrix("1 -1", 1, 2);
+  m_one_p_one = matrix("-1 1", 1, 2);
+  flip_pos = matrix("0 1 1 0", 2, 2);
+  flip_neg = matrix("0 -1 1 0", 2, 2);
+  
+  if (dist_type == 1 & link_type == 1) 
+  { # POWER DISTRIBUTION
+    y_mean = zeros_r;
+    if (link_power ==  0) {
+      y_mean = exp (linear_terms);
+      y_mean_pow = y_mean ^ (1 - var_power);
+      w   = y_mean_pow * y_mean;
+      g_Y = y_mean_pow * (Y - y_mean);
+    } else if (link_power ==  1.0) {
+      y_mean = linear_terms;
+      w   = y_mean ^ (- var_power);
+      g_Y = w * (Y - y_mean);
+    } else {
+      y_mean = linear_terms ^ (1.0 / link_power);
+      c1  = (1 - var_power) / link_power - 1;
+      c2  = (2 - var_power) / link_power - 2;
+      g_Y = (linear_terms ^ c1) * (Y - y_mean) / link_power;
+      w   = (linear_terms ^ c2) / (link_power ^ 2);
+    }
+  }   
+  if (dist_type == 2 & link_type >= 1 & link_type <= 5)
+  { # BINOMIAL/BERNOULLI DISTRIBUTION
+    if (link_type == 1) { # BINOMIAL.POWER LINKS
+      if (link_power == 0)  { # Binomial.log
+        vec1 = 1 / (exp (- linear_terms) - 1);
+        g_Y = Y [, 1] - Y [, 2] * vec1;
+        w   = rowSums (Y) * vec1;
+      } 
+      else {                  # Binomial.nonlog
+        vec1 = zeros_r;
+        if (link_power == 0.5)  {
+          vec1 = 1 / (1 - linear_terms ^ 2);
+        } else if (sum (linear_terms < 0) == 0) {
+          vec1 = linear_terms ^ (- 2 + 1 / link_power) / (1 - linear_terms ^ 
(1 / link_power));
+        } else {isNaN = 1;}
+          # We want a "zero-protected" version of
+          #     vec2 = Y [, 1] / linear_terms;
+          is_y_0 = (Y [, 1] == 0);
+          vec2 = (Y [, 1] + is_y_0) / (linear_terms * (1 - is_y_0) + is_y_0) - 
is_y_0;
+          g_Y =  (vec2 - Y [, 2] * vec1 * linear_terms) / link_power;
+          w   =  rowSums (Y) * vec1 / link_power ^ 2;
+      }
+    } 
+    else {
+      is_LT_infinite = cbind(linear_terms==Inf, linear_terms==-Inf);
+      finite_linear_terms = replace (target =        linear_terms, pattern =  
Inf, replacement = 0);
+      finite_linear_terms = replace (target = finite_linear_terms, pattern = 
-Inf, replacement = 0);
+      if (link_type == 2)                           { # Binomial.logit
+        Y_prob = cbind(exp(finite_linear_terms), ones_r);
+        Y_prob = Y_prob / rowSums (Y_prob);
+        Y_prob = Y_prob * (1.0 - rowSums (is_LT_infinite)) + is_LT_infinite;
+        g_Y = rowSums (Y * (Y_prob %*% flip_neg));           ### = y_residual;
+        w   = rowSums (Y * (Y_prob %*% flip_pos) * Y_prob);  ### = y_variance;
+      }
+      else if (link_type == 3)                  { # Binomial.probit
+        is_lt_pos = (linear_terms >= 0);
+        t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888);  # 
0.231641888 = 0.3275911 / sqrt (2.0)
+        pt_gp = t_gp * ( 0.254829592 
+          + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. 
by M. Abramowitz and I.A. Stegun,
+          + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th print 
(Dec 1972), Sec. 7.1.26, p. 299
+          + t_gp * (-1.453152027 
+          + t_gp *   1.061405429))));
+        the_gauss_exp = exp (- (linear_terms ^ 2) / 2.0);
+        vec1 = 0.25 * pt_gp * (2 - the_gauss_exp * pt_gp);
+        vec2 = Y [, 1] - rowSums (Y) * is_lt_pos + the_gauss_exp * pt_gp * 
rowSums (Y) * (is_lt_pos - 0.5);
+        w   = the_gauss_exp * (one_over_sqrt_two_pi ^ 2) * rowSums (Y) / vec1;
+        g_Y = one_over_sqrt_two_pi * vec2 / vec1;
+      }
+      else if (link_type == 4)                  { # Binomial.cloglog
+        the_exp = exp (linear_terms)
+        the_exp_exp = exp (- the_exp);
+        is_too_small = ((10000000 + the_exp) == 10000000);
+        the_exp_ratio = (1 - is_too_small) * (1 - the_exp_exp) / (the_exp + 
is_too_small) + is_too_small * (1 - the_exp / 2);
+        g_Y =  (rowSums (Y) * the_exp_exp - Y [, 2]) / the_exp_ratio;
+        w   =  the_exp_exp * the_exp * rowSums (Y) / the_exp_ratio;
+      } else if (link_type == 5)                  { # Binomial.cauchit
+        Y_prob = 0.5 + (atan (finite_linear_terms) %*% p_one_m_one) / pi;
+        Y_prob = Y_prob * (1.0 - rowSums (is_LT_infinite)) + is_LT_infinite;
+        y_residual = Y [, 1] * Y_prob [, 2] - Y [, 2] * Y_prob [, 1];
+        var_function = rowSums (Y) * Y_prob [, 1] * Y_prob [, 2];
+        link_gradient_normalized = (1 + linear_terms ^ 2) * pi;
+        g_Y =  rowSums (Y) * y_residual / (var_function * 
link_gradient_normalized);
+        w   = (rowSums (Y) ^ 2) / (var_function * link_gradient_normalized ^ 
2);
+      }
+    }
+  }
+}
+
+
+glm_log_likelihood_part = function (Matrix[double] linear_terms, 
Matrix[double] Y,
+        int dist_type, double var_power, int link_type, double link_power)
+return (double log_l, int isNaN)
+{
+  isNaN = 0;
+  log_l = 0.0;
+  num_records = nrow (Y);
+  zeros_r = matrix (0.0, rows = num_records, cols = 1);
+
+  if (dist_type == 1 & link_type == 1)
+  { # POWER DISTRIBUTION
+    b_cumulant = zeros_r;
+    natural_parameters = zeros_r;
+    is_natural_parameter_log_zero = zeros_r;
+    if(var_power == 1.0 & link_power == 0)  { # Poisson.log
+      b_cumulant = exp (linear_terms);
+      is_natural_parameter_log_zero = (linear_terms == -Inf);
+      natural_parameters = replace (target = linear_terms, pattern = -Inf, 
replacement = 0);
+    } else if (var_power == 1.0 & link_power == 1.0)  { # Poisson.id
+      if (sum (linear_terms < 0) == 0)  {
+        b_cumulant = linear_terms;
+        is_natural_parameter_log_zero = (linear_terms == 0);
+        natural_parameters = log (linear_terms + 
is_natural_parameter_log_zero);
+      } else {isNaN = 1;}
+    } else if (var_power == 1.0 & link_power == 0.5)  { # Poisson.sqrt
+      if (sum (linear_terms < 0) == 0)  {
+        b_cumulant = linear_terms ^ 2;
+        is_natural_parameter_log_zero = (linear_terms == 0);
+        natural_parameters = 2.0 * log (linear_terms + 
is_natural_parameter_log_zero);
+      } else {isNaN = 1;}
+    } else if (var_power == 1.0 & link_power  > 0)  { # Poisson.power_nonlog, 
pos
+      if (sum (linear_terms < 0) == 0)  {
+        is_natural_parameter_log_zero = (linear_terms == 0);
+        b_cumulant = (linear_terms + is_natural_parameter_log_zero) ^ (1.0 / 
link_power) - is_natural_parameter_log_zero;
+        natural_parameters = log (linear_terms + 
is_natural_parameter_log_zero) / link_power;
+      } else {isNaN = 1;}
+    } else if (var_power == 1.0)                      { # 
Poisson.power_nonlog, neg
+      if (sum (linear_terms <= 0) == 0) {
+        b_cumulant = linear_terms ^ (1.0 / link_power);
+        natural_parameters = log (linear_terms) / link_power;
+      } else {isNaN = 1;}
+    } else if (var_power == 2.0 & link_power == -1.0) { # Gamma.inverse
+      if (sum (linear_terms <= 0) == 0) {
+        b_cumulant = - log (linear_terms);
+        natural_parameters = - linear_terms;
+      } else {isNaN = 1;}
+    } else if (var_power == 2.0 & link_power ==  1.0) { # Gamma.id
+      if (sum (linear_terms <= 0) == 0) {
+        b_cumulant = log (linear_terms);
+        natural_parameters = - 1.0 / linear_terms;
+      } else {isNaN = 1;}
+    } else if (var_power == 2.0 & link_power ==  0) { # Gamma.log
+      b_cumulant = linear_terms;
+      natural_parameters = - exp (- linear_terms);
+    } else if (var_power == 2.0)                      { # Gamma.power_nonlog
+      if (sum (linear_terms <= 0) == 0) {
+        b_cumulant = log (linear_terms) / link_power;
+        natural_parameters = - linear_terms ^ (- 1.0 / link_power);
+      } else {isNaN = 1;}
+    } else if                    (link_power ==  0) { # PowerDist.log
+      natural_parameters = exp (linear_terms * (1.0 - var_power)) / (1.0 - 
var_power);
+      b_cumulant = exp (linear_terms * (2.0 - var_power)) / (2.0 - var_power);
+    } 
+    else {                                              # 
PowerDist.power_nonlog
+      if (-2 * link_power == 1.0 - var_power) {
+        natural_parameters = 1.0 / (linear_terms ^ 2) / (1.0 - var_power);
+      } 
+      else if (-1 * link_power == 1.0 - var_power) {
+        natural_parameters = 1.0 / linear_terms / (1.0 - var_power);
+      } else if (     link_power == 1.0 - var_power) {
+        natural_parameters = linear_terms / (1.0 - var_power);
+      } else if ( 2 * link_power == 1.0 - var_power) {
+        natural_parameters = linear_terms ^ 2 / (1.0 - var_power);
+      } else if (sum (linear_terms <= 0) == 0) {
+        power = (1.0 - var_power) / link_power;
+        natural_parameters = (linear_terms ^ power) / (1.0 - var_power);
+      } else {
+        isNaN = 1;
+      }
+
+      if (-2 * link_power == 2.0 - var_power) {
+        b_cumulant = 1.0 / (linear_terms ^ 2) / (2.0 - var_power);
+      } else if (-1 * link_power == 2.0 - var_power) {
+        b_cumulant = 1.0 / linear_terms / (2.0 - var_power);
+      } else if (     link_power == 2.0 - var_power) {
+        b_cumulant = linear_terms / (2.0 - var_power);
+      } else if ( 2 * link_power == 2.0 - var_power) {
+        b_cumulant = linear_terms ^ 2 / (2.0 - var_power);
+      } else if (sum (linear_terms <= 0) == 0) {
+        power = (2.0 - var_power) / link_power;
+        b_cumulant = (linear_terms ^ power) / (2.0 - var_power);
+      } else {
+        isNaN = 1;
+      }
+    }
+    if (sum (is_natural_parameter_log_zero * abs (Y)) > 0) {
+      log_l = -Inf;
+      isNaN = 1;
+    }
+    if (isNaN == 0)
+    {
+      log_l = sum (Y * natural_parameters - b_cumulant);
+      if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) {
+        log_l = -Inf;
+        isNaN = 1;
+      }
+    }
+  }
+
+  if (dist_type == 2 & link_type >= 1 & link_type <= 5)
+  { # BINOMIAL/BERNOULLI DISTRIBUTION
+    [Y_prob, isNaN] = binomial_probability_two_column (linear_terms, 
link_type, link_power);
+
+    if (isNaN == 0) {
+      if( sum((Y_prob<=0) * abs(Y)) == 0 ) {
+        log_l = sum (Y * log (Y_prob * (1 - (Y_prob<=0)) + (Y_prob<=0)));
+        if (log_l != log_l | (log_l == log_l + 1.0 & log_l == log_l * 2.0)) { 
isNaN = 1; }
+      }
+      else {
+        log_l = -Inf;
+        isNaN = 1;
+      }
+    }
+  }
+
+  if (isNaN == 1) {
+    log_l = - Inf; 
+  }
+}
+
+
+binomial_probability_two_column =
+    function (Matrix[double] linear_terms, int link_type, double link_power)
+    return   (Matrix[double] Y_prob, int isNaN)
+{
+  isNaN = 0;
+
+  # Define some auxiliary matrices
+
+  ones_2 = matrix (1.0, rows = 1, cols = 2);
+  zeros_r = matrix(0, nrow(linear_terms), 1);
+  ones_r = matrix(1, nrow(linear_terms), 1);
+
+  # Begin the function body
+
+  Y_prob = zeros_r %*% ones_2;
+  if (link_type == 1) { # Binomial.power
+    if(link_power == 0) { # Binomial.log
+      Y_prob = cbind(exp(linear_terms), 1-exp(linear_terms));
+    } else if (link_power == 0.5) { # Binomial.sqrt
+      Y_prob = cbind(linear_terms^2, 1-linear_terms^2);
+    } else if (sum (linear_terms < 0) == 0) { # Binomial.power_nonlog
+      Y_prob = cbind(linear_terms^(1.0/link_power), 
1-linear_terms^(1.0/link_power));
+    } else {
+      isNaN = 1;
+    }
+  } else {              # Binomial.non_power
+    is_LT_infinite = cbind(linear_terms==Inf, linear_terms==-Inf);
+    finite_linear_terms = replace (target =        linear_terms, pattern =  
Inf, replacement = 0);
+    finite_linear_terms = replace (target = finite_linear_terms, pattern = 
-Inf, replacement = 0);
+    if (link_type == 2)             { # Binomial.logit
+      Y_prob = cbind(exp(finite_linear_terms), ones_r);
+      Y_prob = Y_prob / rowSums (Y_prob);
+    } else if (link_type == 3)    { # Binomial.probit
+      lt_pos_neg = cbind(finite_linear_terms>=0, finite_linear_terms<0);
+      t_gp = 1.0 / (1.0 + abs (finite_linear_terms) * 0.231641888);  # 
0.231641888 = 0.3275911 / sqrt (2.0)
+      pt_gp = t_gp * ( 0.254829592 
+            + t_gp * (-0.284496736 # "Handbook of Mathematical Functions", ed. 
by M. Abramowitz and I.A. Stegun,
+            + t_gp * ( 1.421413741 # U.S. Nat-l Bureau of Standards, 10th 
print (Dec 1972), Sec. 7.1.26, p. 299
+            + t_gp * (-1.453152027 
+            + t_gp *   1.061405429))));
+      the_gauss_exp = exp (- (finite_linear_terms ^ 2) / 2.0);
+      Y_prob = lt_pos_neg + (0.5 - lt_pos_neg) * the_gauss_exp * pt_gp;
+    } else if (link_type == 4)    { # Binomial.cloglog
+      the_exp = exp (finite_linear_terms);
+      the_exp_exp = exp (- the_exp);
+      is_too_small = ((10000000 + the_exp) == 10000000);
+      Y_prob [, 1] = (1 - is_too_small) * (1 - the_exp_exp) + is_too_small * 
the_exp * (1 - the_exp / 2);
+      Y_prob [, 2] = the_exp_exp;
+    } else if (link_type == 5)    { # Binomial.cauchit
+      Y_prob = 0.5 + cbind(atan(finite_linear_terms), 
-atan(finite_linear_terms)) / pi;
+    } else {
+      isNaN = 1;
+    }
+    Y_prob = Y_prob * (1.0 - rowSums (is_LT_infinite)) + is_LT_infinite;
+  }
+}
+
+
+# THE CG-STEIHAUG PROCEDURE SCRIPT
+
+# Apply Conjugate Gradient - Steihaug algorithm in order to approximately 
minimize
+# 0.5 z^T (X^T diag(w) X + diag (lambda)) z + (g + lambda * beta)^T z
+# under constraint:  ||z|| <= trust_delta.
+# See Alg. 7.2 on p. 171 of "Numerical Optimization" 2nd ed. by Nocedal and 
Wright
+# IN THE ABOVE, "X" IS UNDERSTOOD TO BE "X %*% (SHIFT/SCALE TRANSFORM)"; this 
transform
+# is given separately because sparse "X" may become dense after applying the 
transform.
+#
+get_CG_Steihaug_point =
+    function (Matrix[double] X, Matrix[double] scale_X, Matrix[double] 
shift_X, Matrix[double] w,
+    Matrix[double] g, Matrix[double] beta, Matrix[double] lambda, double 
trust_delta, int max_iter_CG)
+return (Matrix[double] z, double neg_log_l_change, int i_CG, int 
reached_trust_boundary)
+{
+  trust_delta_sq = trust_delta ^ 2;
+  size_CG = nrow (g);
+  z = matrix (0.0, rows = size_CG, cols = 1);
+  neg_log_l_change = 0.0;
+  reached_trust_boundary = 0;
+  g_reg = g + lambda * beta;
+  r_CG = g_reg;
+  p_CG = -r_CG;
+  rr_CG = sum(r_CG * r_CG);
+  eps_CG = rr_CG * min (0.25, sqrt (rr_CG));
+  converged_CG = 0;
+  if (rr_CG < eps_CG) {
+    converged_CG = 1;
+  }
+    
+  max_iteration_CG = max_iter_CG;
+  if (max_iteration_CG <= 0) {
+    max_iteration_CG = size_CG;
+  }
+  i_CG = 0;
+  while (converged_CG == 0)
+  {
+    i_CG = i_CG + 1;
+    ssX_p_CG = diag (scale_X) %*% p_CG;
+    ssX_p_CG [size_CG, ] = ssX_p_CG [size_CG, ] + t(shift_X) %*% p_CG;
+    temp_CG = t(X) %*% (w * (X %*% ssX_p_CG));
+    q_CG = (lambda * p_CG) + diag (scale_X) %*% temp_CG + shift_X %*% temp_CG 
[size_CG, ];
+    pq_CG = sum (p_CG * q_CG);
+    if (pq_CG <= 0) {
+      pp_CG = sum (p_CG * p_CG);  
+      if (pp_CG > 0) {
+        [z, neg_log_l_change] = 
+        get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG, 
trust_delta_sq);
+        reached_trust_boundary = 1;
+      } else {
+        neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));
+      }
+      converged_CG = 1;
+    }
+    if (converged_CG == 0) {
+      alpha_CG = rr_CG / pq_CG;
+      new_z = z + alpha_CG * p_CG;
+      if (sum(new_z * new_z) >= trust_delta_sq) {
+        pp_CG = sum (p_CG * p_CG);  
+        [z, neg_log_l_change] = 
+        get_trust_boundary_point (g_reg, z, p_CG, q_CG, r_CG, pp_CG, pq_CG, 
trust_delta_sq);
+        reached_trust_boundary = 1;
+        converged_CG = 1;
+      }
+      if (converged_CG == 0) {
+        z = new_z;
+        old_rr_CG = rr_CG;
+        r_CG = r_CG + alpha_CG * q_CG;
+        rr_CG = sum(r_CG * r_CG);
+        if (i_CG == max_iteration_CG | rr_CG < eps_CG) {
+          neg_log_l_change = 0.5 * sum (z * (r_CG + g_reg));
+          reached_trust_boundary = 0;
+          converged_CG = 1;
+        }
+        if (converged_CG == 0) {
+          p_CG = -r_CG + (rr_CG / old_rr_CG) * p_CG;
+        }
+      }
+    }
+  }
+}
+
+
+# An auxiliary function used twice inside the CG-STEIHAUG loop:
+get_trust_boundary_point = 
+    function (Matrix[double] g, Matrix[double] z, Matrix[double] p, 
+              Matrix[double] q, Matrix[double] r, double pp, double pq, 
+              double trust_delta_sq)
+return (Matrix[double] new_z, double f_change)
+{
+  zz = sum (z * z);  pz = sum (p * z);
+  sq_root_d = sqrt (pz * pz - pp * (zz - trust_delta_sq));
+  tau_1 = (- pz + sq_root_d) / pp;
+  tau_2 = (- pz - sq_root_d) / pp;
+  zq = sum (z * q);  gp = sum (g * p);
+  f_extra = 0.5 * sum (z * (r + g));
+  f_change_1 = f_extra + (0.5 * tau_1 * pq + zq + gp) * tau_1;
+  f_change_2 = f_extra + (0.5 * tau_2 * pq + zq + gp) * tau_2;
+  new_z = z + ifelse(f_change_1<f_change_2, tau_1, tau_2) * p;
+  f_change = min(f_change_1, f_change_2);
+}
+
+
+# Computes vector w such that  ||X %*% w - 1|| -> MIN  given  avg(X %*% w) = 1
+# We find z_LS such that ||X %*% z_LS - 1|| -> MIN unconditionally, then scale
+# it to compute  w = c * z_LS  such that  sum(X %*% w) = nrow(X).
+straightenX = function (Matrix[double] X, double eps, int max_iter_CG)
+return   (Matrix[double] w)
+{
+  w_X = t(colSums(X));
+  lambda_LS = 0.000001 * sum(X ^ 2) / ncol(X);
+  eps_LS = eps * nrow(X);
+
+  # BEGIN LEAST SQUARES
+
+  r_LS = - w_X;
+  z_LS = matrix (0.0, rows = ncol(X), cols = 1);
+  p_LS = - r_LS;
+  norm_r2_LS = sum (r_LS ^ 2);
+  i_LS = 0;
+  while (i_LS < max_iter_CG & i_LS < ncol(X) & norm_r2_LS >= eps_LS)
+  {
+    q_LS = t(X) %*% X %*% p_LS;
+    q_LS = q_LS + lambda_LS * p_LS;
+    alpha_LS = norm_r2_LS / sum (p_LS * q_LS);
+    z_LS = z_LS + alpha_LS * p_LS;
+    old_norm_r2_LS = norm_r2_LS;
+    r_LS = r_LS + alpha_LS * q_LS;
+    norm_r2_LS = sum (r_LS ^ 2);
+    p_LS = -r_LS + (norm_r2_LS / old_norm_r2_LS) * p_LS;
+    i_LS = i_LS + 1;
+  }
+  # END LEAST SQUARES
+
+  w = (nrow(X) / sum (w_X * z_LS)) * z_LS;
+}
+
+
+round_to_print = function (double x_to_truncate)
+return (double mantissa, int eee)
+{
+  mantissa = 1.0;
+  eee = 0;
+  positive_infinity = Inf;
+  x = abs (x_to_truncate);
+  if (x != x / 2.0) {
+    log_ten = log (10.0);
+    d_eee = round (log (x) / log_ten - 0.5);
+    mantissa = round (x * exp (log_ten * (4.0 - d_eee))) / 10000;
+    if (mantissa == 10.0) {
+      mantissa = 1.0;
+      d_eee = d_eee + 1;
+    }
+    if (x_to_truncate < 0) {
+      mantissa = - mantissa;
+    }
+    eee = 0;
+    pow_two = 1;
+    res_eee = abs (d_eee);
+    while (res_eee != 0) {
+      new_res_eee = round (res_eee / 2.0 - 0.3);
+      if (new_res_eee * 2.0 < res_eee) {
+        eee = eee + pow_two;
+      }
+      res_eee = new_res_eee;
+      pow_two = 2 * pow_two;
+    }
+    if (d_eee < 0) {
+      eee = - eee;
+    }
+  } else {
+    mantissa = x_to_truncate;
+  }
+}
diff --git a/src/main/java/org/apache/sysds/common/Builtins.java 
b/src/main/java/org/apache/sysds/common/Builtins.java
index 60b7c18..4c8e1ff 100644
--- a/src/main/java/org/apache/sysds/common/Builtins.java
+++ b/src/main/java/org/apache/sysds/common/Builtins.java
@@ -89,6 +89,7 @@ public enum Builtins {
        EXP("exp", false),
        EVAL("eval", false),
        FLOOR("floor", false),
+       GLM("glm", true),
        GNMF("gnmf", true),
        GRID_SEARCH("gridSearch", true),
        IFELSE("ifelse", false),
diff --git a/src/test/java/org/apache/sysds/test/TestUtils.java 
b/src/test/java/org/apache/sysds/test/TestUtils.java
index 530dccb..80e0328 100644
--- a/src/test/java/org/apache/sysds/test/TestUtils.java
+++ b/src/test/java/org/apache/sysds/test/TestUtils.java
@@ -2330,4 +2330,387 @@ public class TestUtils
        public static String federatedAddress(String host, int port, String 
input) {
                return host + ':' + port + '/' + input;
        }
+
+       public static double gaussian_probability (double point)
+       //  "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. 
Stegun,
+       //  U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, 
p. 299
+       {
+               double t_gp = 1.0 / (1.0 + Math.abs (point) * 0.231641888);  // 
0.231641888 = 0.3275911 / sqrt (2.0)
+               double erf_gp = 1.0 - t_gp * ( 0.254829592
+                               + t_gp * (-0.284496736
+                               + t_gp * ( 1.421413741
+                               + t_gp * (-1.453152027
+                               + t_gp *   1.061405429)))) * Math.exp (- point 
* point / 2.0);
+               erf_gp = erf_gp * (point > 0 ? 1.0 : -1.0);
+               return (0.5 + 0.5 * erf_gp);
+       }
+
+       public static double logFactorial (double x)
+       //  From paper: C. Lanczos "A Precision Approximation of the Gamma 
Function",
+       //  Journal of the SIAM: Numerical Analysis, Series B, Vol. 1, 1964, 
pp. 86-96
+       {
+               final double[] cf = {1.000000000178, 76.180091729406, 
-86.505320327112,
+                               24.014098222230, -1.231739516140, 
0.001208580030, -0.000005363820};
+               double a_5 = cf[0] + cf[1] / (x + 1) + cf[2] / (x + 2) + cf[3] 
/ (x + 3)
+                               + cf[4] / (x + 4) + cf[5] / (x + 5) + cf[6] / 
(x + 6);
+               return Math.log(a_5) + (x + 0.5) * Math.log(x + 5.5) - (x + 
5.5) + 0.91893853320467; // log(sqrt(2 * PI))
+       }
+
+       public static long nextPoisson (Random r, double mu)
+       // Prob[k] = mu^k * exp(-mu) / k!
+       // The main part is from W. H"ormann "The Transformed Rejection Method
+       // for Generating Poisson Random Variables"
+       {
+               if (mu <= 0.0)
+                       return 0;
+               if (mu >= 100000.0)
+                       return Math.round (mu + Math.sqrt (mu) * r.nextGaussian 
());
+               if (mu >= 10.0)
+               {
+                       long output = 0;
+                       double c = mu + 0.445;
+                       double b = 0.931 + 2.53 * Math.sqrt (mu);
+                       double a = -0.059 + 0.02483 * b;
+                       double one_by_alpha = 1.1239 + 1.1328 / (b - 3.4);
+                       double u_r = 0.43;
+                       double v_r = 0.9277 - 3.6224 / (b - 2);
+                       while (true)
+                       {
+                               double U;
+                               double V = r.nextDouble ();
+                               if (V <= 2 * u_r * v_r)
+                               {
+                                       U = V / v_r - u_r;
+                                       output = (long) Math.floor ((2 * a / 
(0.5 - Math.abs (U)) + b) * U + c);
+                                       break;
+                               }
+                               if (V >= v_r)
+                               {
+                                       U = r.nextDouble () - 0.5;
+                               }
+                               else
+                               {
+                                       U = V / v_r - (u_r + 0.5);
+                                       U = Math.signum (U) * 0.5 - U;
+                                       V = v_r * r.nextDouble ();
+                               }
+                               double us = 0.5 - Math.abs (U);
+                               if (0.487 < Math.abs (U) && us < V)
+                                       continue;
+                               long k = (long) Math.floor ((2 * a / us + b) * 
U + c);
+                               double V_to_compare = (V * one_by_alpha) / (a / 
us / us + b);
+                               if (0 <= k &&  Math.log (V_to_compare) <= - mu 
+ k * Math.log (mu) - TestUtils.logFactorial (k))
+                               {
+                                       output = k;
+                                       break;
+                               }
+                       }
+                       return output;
+               }
+               long count = 0;
+               double res_mu = mu;
+               while (res_mu > 0.0)
+               {
+                       count ++;
+                       res_mu += Math.log (r.nextDouble ());
+               }
+               return count - 1;
+       }
+
+       public static double nextGamma (Random r, double alpha)
+       // PDF(x) = x^(alpha-1) * exp(-x) / Gamma(alpha)
+       // D.Knuth "The Art of Computer Programming", 2nd Edition, Vol. 2, Sec. 
3.4.1
+       {
+               double x;
+               if (alpha > 10000.0)
+               {
+                       x = 1.0 - 1.0 / (9.0 * alpha) + r.nextGaussian() / 
Math.sqrt (9.0 * alpha);
+                       return alpha * x * x * x;
+               }
+               else if (alpha > 5.0)
+               {
+                       x = 0.0;
+                       double the_root = Math.sqrt (2.0 * alpha - 1.0);
+                       boolean is_accepted = false;
+                       while (! is_accepted)
+                       {
+                               double y = Math.tan (Math.PI * r.nextDouble());
+                               x = the_root * y + alpha - 1.0;
+                               if (x <= 0)
+                                       continue;
+                               double z = Math.exp ((alpha - 1.0) * (1.0 + 
Math.log (x / (alpha - 1.0))) - x);
+                               is_accepted = (r.nextDouble() <= z * (1.0 + y * 
y));
+                       }
+                       return x;
+               }
+               else if (alpha > 0.0)
+               {
+                       x = 1.0;
+                       double frac_alpha = alpha;
+                       while (frac_alpha >= 1.0)
+                       {
+                               x *= r.nextDouble ();
+                               frac_alpha -= 1.0;
+                       }
+                       double output = - Math.log (x);
+                       if (frac_alpha > 0.0)  // Has to be between 0 and 1
+                       {
+                               double ceee = Math.E / (frac_alpha + Math.E);
+                               boolean is_accepted = false;
+                               while (! is_accepted)
+                               {
+                                       double u = r.nextDouble();
+                                       if (u <= ceee)
+                                       {
+                                               x = Math.pow (u / ceee, 1.0 / 
frac_alpha);
+                                               is_accepted = (r.nextDouble() 
<= Math.exp (- x));
+                                       }
+                                       else
+                                       {
+                                               x = 1.0 - Math.log ((1.0 - u) / 
(1.0 - ceee));
+                                               is_accepted = (r.nextDouble() 
<= Math.pow (x, frac_alpha - 1.0));
+                                       }
+                               }
+                               output += x;
+                       }
+                       return output;
+               }
+               else  //  alpha <= 0.0
+                       return 0.0;
+       }
+
+       public static double[] scaleWeights (double[] w_unscaled, double[][] X, 
double icept, double meanLF, double sigmaLF)
+       {
+               int rows = X.length;
+               int cols = w_unscaled.length;
+               double[] w = new double [cols];
+               for (int j = 0; j < cols; j ++)
+                       w [j] = w_unscaled [j];
+
+               double sum_wx = 0.0;
+               double sum_1x = 0.0;
+               double sum_wxwx = 0.0;
+               double sum_1x1x = 0.0;
+               double sum_wx1x = 0.0;
+
+               for (int i = 0; i < rows; i ++)
+               {
+                       double wx = 0.0;
+                       double one_x = 0.0;
+                       for (int j = 0; j < cols; j ++)
+                       {
+                               wx += w [j] * X [i][j];
+                               one_x += X [i][j];
+                       }
+                       sum_wx += wx;
+                       sum_1x += one_x;
+                       sum_wxwx += wx * wx;
+                       sum_1x1x += one_x * one_x;
+                       sum_wx1x += wx * one_x;
+               }
+
+               double a0 = (meanLF - icept) * rows * sum_wx / (sum_wx * sum_wx 
+ sum_1x * sum_1x);
+               double b0 = (meanLF - icept) * rows * sum_1x / (sum_wx * sum_wx 
+ sum_1x * sum_1x);
+               double a1 = sum_1x;
+               double b1 = - sum_wx;
+               double qA = a1 * a1 * sum_wxwx + 2 * a1 * b1 * sum_wx1x + b1 * 
b1 * sum_1x1x;
+               double qB = 2 * (a0 * a1 * sum_wxwx + a0 * b1 * sum_wx1x + a1 * 
b0 * sum_wx1x + b0 * b1 * sum_1x1x);
+               double qC_nosigmaLF = a0 * a0 * sum_wxwx + 2 * a0 * b0 * 
sum_wx1x + b0 * b0 * sum_1x1x - rows * (meanLF - icept) * (meanLF - icept);
+               double qC = qC_nosigmaLF - rows * sigmaLF * sigmaLF;
+               double qD = qB * qB - 4 * qA * qC;
+               if (qD < 0)
+               {
+                       double new_sigmaLF = Math.sqrt (qC_nosigmaLF / rows - 
qB * qB / (4 * qA * rows));
+                       String error_message = String.format ("Cannot generate 
the weights: linear form variance demand is too tight!  Try sigmaLF >%8.4f", 
new_sigmaLF);
+                       System.out.println (error_message);
+                       System.out.flush ();
+                       throw new IllegalArgumentException (error_message);
+               }
+               double t = (- qB + Math.sqrt (qD)) / (2 * qA);
+               double a = a0 + t * a1;
+               double b = b0 + t * b1;
+               for (int j = 0; j < cols; j ++)
+                       w [j] = a * w [j] + b;
+
+               double sum_eta = 0.0;
+               double sum_sq_eta = 0.0;
+               for (int i = 0; i < rows; i ++)
+               {
+                       double eta = 0.0;
+                       for (int j = 0; j < cols; j ++)
+                               eta +=  w [j] * X [i][j];
+                       sum_eta += eta;
+                       sum_sq_eta += eta * eta;
+               }
+               double mean_eta = icept + sum_eta / rows;
+               double sigma_eta = Math.sqrt ((sum_sq_eta - sum_eta * sum_eta / 
rows) / (rows - 1));
+               System.out.println (String.format ("Linear Form Mean  =%8.4f 
(Desired:%8.4f)",  mean_eta, meanLF));
+               System.out.println (String.format ("Linear Form Sigma =%8.4f 
(Desired:%8.4f)", sigma_eta, sigmaLF));
+
+               return w;
+       }
+       public static class GLMDist
+       {
+               final int dist;         // GLM distribution family type
+               final double param;     // GLM parameter, typically variance 
power of the mean
+               final int link;         // GLM link function type
+               final double link_pow;  // GLM link function as power of the 
mean
+               double dispersion = 1.0;
+               long binom_n = 1;
+
+               public GLMDist (int _dist, double _param, int _link, double 
_link_pow) {
+                       dist = _dist; param = _param; link = _link; link_pow = 
_link_pow;
+               }
+
+               public void set_dispersion (double _dispersion) {
+                       dispersion = _dispersion;
+               }
+
+               public void set_binom_n (long _n) {
+                       binom_n = _n;
+               }
+
+               public boolean is_binom_n_needed () {
+                       return (dist == 2 && param == 1.0);
+               }
+
+               public double nextGLM (Random r, double eta) {
+                       double mu = 0.0;
+                       switch (link) {
+                               case 1: // LINK: POWER
+                                       if (link_pow == 0.0)       // LINK: log
+                                               mu = Math.exp (eta);
+                                       else if (link_pow ==  1.0) // LINK: 
identity
+                                               mu = eta;
+                                       else if (link_pow == -1.0) // LINK: 
inverse
+                                               mu = 1.0 / eta;
+                                       else if (link_pow ==  0.5) // LINK: sqrt
+                                               mu = eta * eta;
+                                       else if (link_pow == -2.0) // LINK: 
1/mu^2
+                                               mu = Math.sqrt (1.0 / eta);
+                                       else
+                                               mu = Math.pow (eta, 1.0 / 
link_pow);
+                                       break;
+                               case 2: // LINK: logit
+                                       mu = 1.0 / (1.0 + Math.exp (- eta));
+                                       break;
+                               case 3: // LINK: probit
+                                       mu = TestUtils.gaussian_probability 
(eta);
+                                       break;
+                               case 4: // LINK: cloglog
+                                       mu = 1.0 - Math.exp (- Math.exp (eta));
+                                       break;
+                               case 5: // LINK: cauchit
+                                       mu = 0.5 + Math.atan (eta) / Math.PI;
+                                       break;
+                               default:
+                                       mu = 0.0;
+                       }
+
+                       double output = 0.0;
+                       if (dist == 1)  // POWER
+                       {
+                               double var_pow = param;
+                               if (var_pow == 0.0) // Gaussian, with 
dispersion = sigma^2
+                               {
+                                       output = mu + Math.sqrt (dispersion) * 
r.nextGaussian ();
+                               }
+                               else if (var_pow == 1.0) // Poisson; Negative 
Binomial if overdispersion
+                               {
+                                       double lambda = mu;
+                                       if (dispersion > 1.000000001)
+                                       {
+                                               // output = Negative Binomial 
random variable with:
+                                               //       Number of failures = 
mu / (dispersion - 1.0)
+                                               //       Probability of success 
= 1.0 - 1.0 / dispersion
+                                               lambda = (dispersion - 1.0) * 
TestUtils.nextGamma (r, mu / (dispersion - 1.0));
+                                       }
+                                       output = TestUtils.nextPoisson (r, 
lambda);
+                               }
+                               else if (var_pow == 2.0) // Gamma
+                               {
+                                       double beta = dispersion * mu;
+                                       output = beta * TestUtils.nextGamma (r, 
mu / beta);
+                               }
+                               else if (var_pow == 3.0) // Inverse Gaussian
+                               {
+                                       // From: Raj Chhikara, J.L. Folks.  The 
Inverse Gaussian Distribution:
+                                       // Theory: Methodology, and 
Applications.  CRC Press, 1988, Section 4.5
+                                       double y_Gauss = r.nextGaussian ();
+                                       double mu_y_sq = mu * y_Gauss * y_Gauss;
+                                       double x_invG = 0.5 * dispersion * mu * 
(2.0 / dispersion + mu_y_sq
+                                                       - Math.sqrt (mu_y_sq * 
(4.0 / dispersion + mu_y_sq)));
+                                       output = ((mu + x_invG) * 
r.nextDouble() < mu ? x_invG : (mu * mu / x_invG));
+                               }
+                               else
+                               {
+                                       output = mu + Math.sqrt (12.0 * 
dispersion) * (r.nextDouble () - 0.5);
+                               }
+                       }
+                       else if (dist == 2 && param != 1.0) // Binomial, 
dispersion ignored
+                       {
+                               double bernoulli_zero = param;
+                               output = (r.nextDouble () < mu ? 1.0 : 
bernoulli_zero);
+                       }
+                       else if (dist == 2) // param == 1.0, Binomial 
Two-Column, dispersion used
+                       {
+                               double alpha_plus_beta = (binom_n - dispersion) 
/ (dispersion - 1.0);
+                               double alpha = mu * alpha_plus_beta;
+                               double x = TestUtils.nextGamma (r, alpha);
+                               double y = TestUtils.nextGamma (r, 
alpha_plus_beta - alpha);
+                               double p = x / (x + y);
+                               long out = 0;
+                               for (long i = 0; i < binom_n; i++)
+                                       if (r.nextDouble() < p)
+                                               out ++;
+                               output = out;
+                       }
+                       return output;
+               }
+       }
+       
+       public static double[][] generateUnbalancedGLMInputDataX(int rows, int 
cols, double logFeatureVarianceDisbalance) {
+               double[][] X = generateTestMatrix(rows, cols, -1.0, 1.0, 1.0, 
34567);
+               double shift_X = 1.0;
+               // make the feature columns of X variance disbalanced
+               for (int j = 0; j < cols; j++) {
+                       double varFactor = Math.pow(10.0, 
logFeatureVarianceDisbalance * (-0.25 + j / (double) (2 * cols - 2)));
+                       for (int i = 0; i < rows; i++)
+                               X[i][j] = shift_X + X[i][j] * varFactor;
+               }
+               return X;
+       }
+       
+       public static double[] generateUnbalancedGLMInputDataB(double[][] X, 
int cols, double intercept, double avgLinearForm, double stdevLinearForm, 
Random r) {
+               double[] beta_unscaled = new double[cols];
+               for (int j = 0; j < cols; j++)
+                       beta_unscaled[j] = r.nextGaussian();
+               return scaleWeights(beta_unscaled, X, intercept, avgLinearForm, 
stdevLinearForm);
+       }
+       
+       public static double[][] generateUnbalancedGLMInputDataY(double[][] X, 
double[] beta, int rows, int cols, GLMDist glmdist, double intercept, double 
dispersion, Random r) {
+               double[][] y = null;
+               if (glmdist.is_binom_n_needed())
+                       y = new double[rows][2];
+               else
+                       y = new double[rows][1];
+
+               for (int i = 0; i < rows; i++) {
+                       double eta = intercept;
+                       for (int j = 0; j < cols; j++) {
+                               eta += X[i][j] * beta[j];
+                       }
+                       if (glmdist.is_binom_n_needed()) {
+                               long n = Math.round(dispersion * (1.0 + 2.0 * 
r.nextDouble()) + 1.0);
+                               glmdist.set_binom_n(n);
+                               y[i][0] = glmdist.nextGLM(r, eta);
+                               y[i][1] = n - y[i][0];
+                       }
+                       else {
+                               y[i][0] = glmdist.nextGLM(r, eta);
+                       }
+               }
+               
+               return y;
+       }
 }
diff --git a/src/test/java/org/apache/sysds/test/applications/GLMTest.java 
b/src/test/java/org/apache/sysds/test/applications/GLMTest.java
index 1cc5878..b0242e2 100644
--- a/src/test/java/org/apache/sysds/test/applications/GLMTest.java
+++ b/src/test/java/org/apache/sysds/test/applications/GLMTest.java
@@ -216,79 +216,26 @@ public class GLMTest extends AutomatedTestBase
                                dispersion +
                                "} ------------");
                
-               int rows = numRecords;                          // # of rows in 
the training data 
-               int cols = numFeatures;                         // # of 
features in the training data 
+               int rows = numRecords;  // # of rows in the training data 
+               int cols = numFeatures; // # of features in the training data
                
-               GLMDist glmdist = new GLMDist (distFamilyType, distParam, 
linkType, linkPower);
+               TestUtils.GLMDist glmdist = new TestUtils.GLMDist 
(distFamilyType, distParam, linkType, linkPower);
                glmdist.set_dispersion (dispersion);
                
                getAndLoadTestConfiguration(TEST_NAME);
 
                // prepare training data set
-                                                          
-               Random r = new Random (314159265);
-               double[][] X = getRandomMatrix (rows, cols, -1.0, 1.0, 1.0, 
34567); // 271828183);
-               double shift_X = 1.0;
-               
-               // make the feature columns of X variance disbalanced
-               
-               for (int j = 0; j < cols; j ++)
-               {
-                       double varFactor = Math.pow (10.0, 
logFeatureVarianceDisbalance * (- 0.25 + j / (double) (2 * cols - 2)));
-                       for (int i = 0; i < rows; i ++)
-                               X [i][j] = shift_X + X [i][j] * varFactor;
-               }
-               
-               double[] beta_unscaled = new double [cols];
-               for (int j = 0; j < cols; j ++)
-                       beta_unscaled [j] = r.nextGaussian ();
-               double[] beta = scaleWeights (beta_unscaled, X, intercept, 
avgLinearForm, stdevLinearForm);
+               Random r = new Random(314159265);
+               double[][] X = TestUtils.generateUnbalancedGLMInputDataX(rows, 
cols, logFeatureVarianceDisbalance);
+               double[] beta = TestUtils.generateUnbalancedGLMInputDataB(X, 
cols, intercept, avgLinearForm, stdevLinearForm, r);
+               double[][] y = TestUtils.generateUnbalancedGLMInputDataY(X, 
beta, rows, cols, glmdist, intercept, dispersion, r);
 
-               long nnz_in_X = 0;
-               long nnz_in_y = 0;
-
-               double[][] y = null;
-               if (glmdist.is_binom_n_needed())
-                       y = new double [rows][2];
-               else
-                       y = new double [rows][1];
-               
-               
-               for (int i = 0; i < rows; i ++)
-               {
-                       double eta = intercept;
-                       for (int j = 0; j < cols; j ++)
-                       {
-                               eta += X [i][j] * beta [j];
-                               if (X [i][j] != 0.0)
-                                       nnz_in_X ++;
-                       }
-                       if (glmdist.is_binom_n_needed())
-                       {
-                               long n = Math.round (dispersion * (1.0 + 2.0 * 
r.nextDouble()) + 1.0);
-                               glmdist.set_binom_n (n);
-                               y [i][0] = glmdist.nextGLM (r, eta);
-                               y [i][1] = n - y[i][0];
-                               if (y [i][0] != 0.0)
-                                       nnz_in_y ++;
-                               if (y [i][1] != 0.0)
-                                       nnz_in_y ++;
-                       }
-                       else
-                       {
-                               y [i][0] = glmdist.nextGLM (r, eta);
-                               if (y [i][0] != 0.0)
-                                       nnz_in_y ++;                            
-                       }
-                       
-               }
-               
                int defaultBlockSize = OptimizerUtils.DEFAULT_BLOCKSIZE;
 
-               MatrixCharacteristics mc_X = new MatrixCharacteristics(rows, 
cols, defaultBlockSize, nnz_in_X);
+               MatrixCharacteristics mc_X = new MatrixCharacteristics(rows, 
cols, defaultBlockSize, -1);
                writeInputMatrixWithMTD ("X", X, true, mc_X);
 
-               MatrixCharacteristics mc_y = new MatrixCharacteristics(rows, 
y[0].length, defaultBlockSize, nnz_in_y);
+               MatrixCharacteristics mc_y = new MatrixCharacteristics(rows, 
y[0].length, defaultBlockSize, -1);
                writeInputMatrixWithMTD ("Y", y, true, mc_y);
                
                List<String> proArgs = new ArrayList<>();
@@ -353,350 +300,4 @@ public class GLMTest extends AutomatedTestBase
                }
                TestUtils.compareMatrices (wR, wSYSTEMDS, eps * max_abs_beta, 
"wR", "wSYSTEMDS");
        }
-       
-       double[] scaleWeights (double[] w_unscaled, double[][] X, double icept, 
double meanLF, double sigmaLF)
-       {
-               int rows = X.length;
-               int cols = w_unscaled.length;
-               double[] w = new double [cols];
-               for (int j = 0; j < cols; j ++)
-                       w [j] = w_unscaled [j];
-               
-               double sum_wx = 0.0;
-               double sum_1x = 0.0;
-               double sum_wxwx = 0.0;
-               double sum_1x1x = 0.0;
-               double sum_wx1x = 0.0; 
-
-               for (int i = 0; i < rows; i ++)
-               {
-                       double wx = 0.0;
-                       double one_x = 0.0;
-                       for (int j = 0; j < cols; j ++)
-                       {
-                               wx += w [j] * X [i][j];
-                               one_x += X [i][j];
-                       }
-                       sum_wx += wx;
-                       sum_1x += one_x;
-                       sum_wxwx += wx * wx;
-                       sum_1x1x += one_x * one_x;
-                       sum_wx1x += wx * one_x; 
-               }
-               
-               double a0 = (meanLF - icept) * rows * sum_wx / (sum_wx * sum_wx 
+ sum_1x * sum_1x);
-               double b0 = (meanLF - icept) * rows * sum_1x / (sum_wx * sum_wx 
+ sum_1x * sum_1x);
-               double a1 = sum_1x;
-               double b1 = - sum_wx;
-               double qA = a1 * a1 * sum_wxwx + 2 * a1 * b1 * sum_wx1x + b1 * 
b1 * sum_1x1x;
-               double qB = 2 * (a0 * a1 * sum_wxwx + a0 * b1 * sum_wx1x + a1 * 
b0 * sum_wx1x + b0 * b1 * sum_1x1x);
-               double qC_nosigmaLF = a0 * a0 * sum_wxwx + 2 * a0 * b0 * 
sum_wx1x + b0 * b0 * sum_1x1x - rows * (meanLF - icept) * (meanLF - icept);
-               double qC = qC_nosigmaLF - rows * sigmaLF * sigmaLF;
-               double qD = qB * qB - 4 * qA * qC;
-               if (qD < 0)
-               {
-                       double new_sigmaLF = Math.sqrt (qC_nosigmaLF / rows - 
qB * qB / (4 * qA * rows));
-                       String error_message = String.format ("Cannot generate 
the weights: linear form variance demand is too tight!  Try sigmaLF >%8.4f", 
new_sigmaLF);
-                       System.out.println (error_message);
-                       System.out.flush ();
-                       throw new IllegalArgumentException (error_message);
-               }
-               double t = (- qB + Math.sqrt (qD)) / (2 * qA);
-               double a = a0 + t * a1;
-               double b = b0 + t * b1;
-               for (int j = 0; j < cols; j ++)
-                       w [j] = a * w [j] + b;
-               
-               double sum_eta = 0.0;
-               double sum_sq_eta = 0.0;
-               for (int i = 0; i < rows; i ++)
-               {
-                       double eta = 0.0;
-                       for (int j = 0; j < cols; j ++)
-                               eta +=  w [j] * X [i][j];
-                       sum_eta += eta;
-                       sum_sq_eta += eta * eta;
-               }
-               double mean_eta = icept + sum_eta / rows;
-               double sigma_eta = Math.sqrt ((sum_sq_eta - sum_eta * sum_eta / 
rows) / (rows - 1));
-               System.out.println (String.format ("Linear Form Mean  =%8.4f 
(Desired:%8.4f)",  mean_eta, meanLF));
-               System.out.println (String.format ("Linear Form Sigma =%8.4f 
(Desired:%8.4f)", sigma_eta, sigmaLF));
-               
-               return w;
-       }
-       
-       public class GLMDist
-       {
-               final int dist;          // GLM distribution family type
-               final double param;      // GLM parameter, typically variance 
power of the mean
-               final int link;          // GLM link function type
-               final double link_pow;  // GLM link function as power of the 
mean
-               @SuppressWarnings("hiding")
-               double dispersion = 1.0;
-               long binom_n = 1;
-               
-               GLMDist (int _dist, double _param, int _link, double _link_pow)
-               {
-                       dist = _dist; param = _param; link = _link; link_pow = 
_link_pow;
-               }
-               
-               void set_dispersion (double _dispersion)
-               {
-                       dispersion = _dispersion;
-               }
-               
-               void set_binom_n (long _n)
-               {
-                       binom_n = _n;
-               }
-               
-               boolean is_binom_n_needed ()
-               {
-                       return (dist == 2 && param == 1.0);
-               }
-               
-               double nextGLM (Random r, double eta)
-               {
-                       double mu = 0.0;
-                       switch (link)
-                       {
-                       case 1: // LINK: POWER
-                               if (link_pow == 0.0)       // LINK: log
-                                       mu = Math.exp (eta);
-                               else if (link_pow ==  1.0) // LINK: identity
-                                       mu = eta;
-                               else if (link_pow == -1.0) // LINK: inverse
-                                       mu = 1.0 / eta;
-                               else if (link_pow ==  0.5) // LINK: sqrt
-                                       mu = eta * eta;
-                               else if (link_pow == -2.0) // LINK: 1/mu^2
-                                       mu = Math.sqrt (1.0 / eta);
-                               else
-                                       mu = Math.pow (eta, 1.0 / link_pow);
-                               break;
-                       case 2: // LINK: logit
-                               mu = 1.0 / (1.0 + Math.exp (- eta));
-                               break;
-                       case 3: // LINK: probit
-                               mu = gaussian_probability (eta);
-                               break;
-                       case 4: // LINK: cloglog
-                               mu = 1.0 - Math.exp (- Math.exp (eta));
-                               break;
-                       case 5: // LINK: cauchit
-                               mu = 0.5 + Math.atan (eta) / Math.PI;
-                               break;
-                       default:
-                               mu = 0.0;
-                       }
-               
-                       double output = 0.0;
-                       if (dist == 1)  // POWER
-                       {
-                               double var_pow = param;
-                               if (var_pow == 0.0) // Gaussian, with 
dispersion = sigma^2
-                               {
-                                       output = mu + Math.sqrt (dispersion) * 
r.nextGaussian ();
-                               }
-                               else if (var_pow == 1.0) // Poisson; Negative 
Binomial if overdispersion
-                               {
-                                       double lambda = mu;
-                                       if (dispersion > 1.000000001)
-                                       {
-                                               // output = Negative Binomial 
random variable with:
-                                               //       Number of failures = 
mu / (dispersion - 1.0)
-                                               //       Probability of success 
= 1.0 - 1.0 / dispersion
-                                               lambda = (dispersion - 1.0) * 
nextGamma (r, mu / (dispersion - 1.0));
-                                       }
-                                       output = nextPoisson (r, lambda);
-                               }
-                               else if (var_pow == 2.0) // Gamma
-                               {
-                                       double beta = dispersion * mu;
-                                       output = beta * nextGamma (r, mu / 
beta);
-                               }
-                               else if (var_pow == 3.0) // Inverse Gaussian
-                               {
-                                       // From: Raj Chhikara, J.L. Folks.  The 
Inverse Gaussian Distribution: 
-                                       // Theory: Methodology, and 
Applications.  CRC Press, 1988, Section 4.5
-                                       double y_Gauss = r.nextGaussian ();
-                                       double mu_y_sq = mu * y_Gauss * y_Gauss;
-                                       double x_invG = 0.5 * dispersion * mu * 
(2.0 / dispersion + mu_y_sq 
-                                                       - Math.sqrt (mu_y_sq * 
(4.0 / dispersion + mu_y_sq)));
-                                       output = ((mu + x_invG) * 
r.nextDouble() < mu ? x_invG : (mu * mu / x_invG));
-                               }
-                               else
-                               {
-                                       output = mu + Math.sqrt (12.0 * 
dispersion) * (r.nextDouble () - 0.5);
-                               }
-                       }
-                       else if (dist == 2 && param != 1.0) // Binomial, 
dispersion ignored
-                       {
-                               double bernoulli_zero = param;
-                               output = (r.nextDouble () < mu ? 1.0 : 
bernoulli_zero);
-                       }
-                       else if (dist == 2) // param == 1.0, Binomial 
Two-Column, dispersion used
-                       {
-                               double alpha_plus_beta = (binom_n - dispersion) 
/ (dispersion - 1.0);
-                               double alpha = mu * alpha_plus_beta;
-                               double x = nextGamma (r, alpha);
-                               double y = nextGamma (r, alpha_plus_beta - 
alpha);
-                               double p = x / (x + y);
-                               long out = 0;
-                               for (long i = 0; i < binom_n; i++)
-                                       if (r.nextDouble() < p)
-                                               out ++;
-                               output = out;
-                       }
-                       return output;
-               }
-       }
-       
-       public double nextGamma (Random r, double alpha)
-       // PDF(x) = x^(alpha-1) * exp(-x) / Gamma(alpha)
-       // D.Knuth "The Art of Computer Programming", 2nd Edition, Vol. 2, Sec. 
3.4.1
-       {
-               double x;
-               if (alpha > 10000.0)
-               {
-                       x = 1.0 - 1.0 / (9.0 * alpha) + r.nextGaussian() / 
Math.sqrt (9.0 * alpha);
-                       return alpha * x * x * x;
-               }
-               else if (alpha > 5.0)
-               {
-                       x = 0.0;
-                       double the_root = Math.sqrt (2.0 * alpha - 1.0);
-                       boolean is_accepted = false;
-                       while (! is_accepted)
-                       {
-                               double y = Math.tan (Math.PI * r.nextDouble());
-                               x = the_root * y + alpha - 1.0;
-                               if (x <= 0)
-                                       continue;
-                               double z = Math.exp ((alpha - 1.0) * (1.0 + 
Math.log (x / (alpha - 1.0))) - x);
-                               is_accepted = (r.nextDouble() <= z * (1.0 + y * 
y));
-                       }
-                       return x;
-               }
-               else if (alpha > 0.0) 
-               {
-                       x = 1.0;
-                       double frac_alpha = alpha;
-                       while (frac_alpha >= 1.0)
-                       {
-                               x *= r.nextDouble ();
-                               frac_alpha -= 1.0;
-                       }
-                       double output = - Math.log (x);
-                       if (frac_alpha > 0.0)  // Has to be between 0 and 1
-                       {
-                               double ceee = Math.E / (frac_alpha + Math.E);
-                               boolean is_accepted = false;
-                               while (! is_accepted)
-                               {
-                                       double u = r.nextDouble();
-                                       if (u <= ceee)
-                                       {
-                                               x = Math.pow (u / ceee, 1.0 / 
frac_alpha);
-                                               is_accepted = (r.nextDouble() 
<= Math.exp (- x));
-                                       }
-                                       else
-                                       {
-                                               x = 1.0 - Math.log ((1.0 - u) / 
(1.0 - ceee));
-                                               is_accepted = (r.nextDouble() 
<= Math.pow (x, frac_alpha - 1.0));
-                                       }
-                               }
-                               output += x;
-                       }
-                       return output;
-               }
-               else  //  alpha <= 0.0
-                       return 0.0;
-       }
-
-       public long nextPoisson (Random r, double mu)
-       // Prob[k] = mu^k * exp(-mu) / k!
-       // The main part is from W. H"ormann "The Transformed Rejection Method
-       // for Generating Poisson Random Variables"
-       {
-               if (mu <= 0.0)
-                       return 0;
-               if (mu >= 100000.0)
-                       return Math.round (mu + Math.sqrt (mu) * r.nextGaussian 
());
-               if (mu >= 10.0)
-               {
-                       long output = 0;
-                       double c = mu + 0.445;
-                       double b = 0.931 + 2.53 * Math.sqrt (mu);
-                       double a = -0.059 + 0.02483 * b;
-                       double one_by_alpha = 1.1239 + 1.1328 / (b - 3.4);
-                       double u_r = 0.43;
-                       double v_r = 0.9277 - 3.6224 / (b - 2);
-                       while (true)
-                       {
-                               double U;
-                               double V = r.nextDouble ();
-                               if (V <= 2 * u_r * v_r)
-                               {
-                                       U = V / v_r - u_r;
-                                       output = (long) Math.floor ((2 * a / 
(0.5 - Math.abs (U)) + b) * U + c);
-                                       break;
-                               }
-                               if (V >= v_r)
-                               {
-                                       U = r.nextDouble () - 0.5;
-                               }
-                               else
-                               {
-                                       U = V / v_r - (u_r + 0.5);
-                                       U = Math.signum (U) * 0.5 - U;
-                                       V = v_r * r.nextDouble ();
-                               }
-                               double us = 0.5 - Math.abs (U);
-                               if (0.487 < Math.abs (U) && us < V)
-                                       continue;
-                               long k = (long) Math.floor ((2 * a / us + b) * 
U + c);
-                               double V_to_compare = (V * one_by_alpha) / (a / 
us / us + b);
-                               if (0 <= k &&  Math.log (V_to_compare) <= - mu 
+ k * Math.log (mu) - logFactorial (k))
-                               {
-                                       output = k;
-                                       break;
-                               }
-                       }
-                       return output;
-               }
-               long count = 0;
-               double res_mu = mu;
-               while (res_mu > 0.0)
-               {
-                       count ++;
-                       res_mu += Math.log (r.nextDouble ());
-               }
-               return count - 1;
-       }
-
-       static double logFactorial (double x)
-       //  From paper: C. Lanczos "A Precision Approximation of the Gamma 
Function",
-       //  Journal of the SIAM: Numerical Analysis, Series B, Vol. 1, 1964, 
pp. 86-96
-       {
-               final double[] cf = {1.000000000178, 76.180091729406, 
-86.505320327112,
-                       24.014098222230, -1.231739516140, 0.001208580030, 
-0.000005363820};
-               double a_5 = cf[0] + cf[1] / (x + 1) + cf[2] / (x + 2) + cf[3] 
/ (x + 3) 
-                       + cf[4] / (x + 4) + cf[5] / (x + 5) + cf[6] / (x + 6);
-               return Math.log(a_5) + (x + 0.5) * Math.log(x + 5.5) - (x + 
5.5) + 0.91893853320467; // log(sqrt(2 * PI))
-       }
-       
-       public double gaussian_probability (double point)
-       //  "Handbook of Mathematical Functions", ed. by M. Abramowitz and I.A. 
Stegun,
-       //  U.S. Nat-l Bureau of Standards, 10th print (Dec 1972), Sec. 7.1.26, 
p. 299
-       {
-               double t_gp = 1.0 / (1.0 + Math.abs (point) * 0.231641888);  // 
0.231641888 = 0.3275911 / sqrt (2.0)
-               double erf_gp = 1.0 - t_gp * ( 0.254829592 
-                       + t_gp * (-0.284496736 
-                       + t_gp * ( 1.421413741 
-                       + t_gp * (-1.453152027 
-                       + t_gp *   1.061405429)))) * Math.exp (- point * point 
/ 2.0);
-               erf_gp = erf_gp * (point > 0 ? 1.0 : -1.0);
-               return (0.5 + 0.5 * erf_gp);
-       }
 }
diff --git 
a/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGLMTest.java 
b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGLMTest.java
new file mode 100644
index 0000000..a6a3939
--- /dev/null
+++ b/src/test/java/org/apache/sysds/test/functions/builtin/BuiltinGLMTest.java
@@ -0,0 +1,269 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.functions.builtin;
+
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.Collection;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Random;
+
+import org.apache.sysds.common.Types;
+import org.apache.sysds.hops.OptimizerUtils;
+import org.apache.sysds.lops.LopProperties;
+import org.apache.sysds.runtime.matrix.data.MatrixValue;
+import org.apache.sysds.runtime.meta.MatrixCharacteristics;
+import org.apache.sysds.test.AutomatedTestBase;
+import org.apache.sysds.test.TestUtils;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+
+@RunWith(value = Parameterized.class)
[email protected]
+
+public class BuiltinGLMTest extends AutomatedTestBase
+{
+       protected final static String TEST_NAME = "glmTest";
+       protected final static String TEST_DIR = "functions/builtin/";
+       protected String TEST_CLASS_DIR = TEST_DIR + 
BuiltinGLMTest.class.getSimpleName() + "/";
+       double eps = 1e-4;
+       // SUPPORTED GLM DISTRIBUTION FAMILIES AND LINKS:
+       // -----------------------------------------------
+       // INPUT PARAMETERS:    MEANING:                        Cano-
+       // dfam vpow link lpow  Distribution.link   nical?
+       // -----------------------------------------------
+       //  1   0.0   1  -1.0   Gaussian.inverse
+       //  1   0.0   1   0.0   Gaussian.log
+       //  1   0.0   1   1.0   Gaussian.id               Yes
+       //  1   1.0   1   0.0   Poisson.log               Yes
+       //  1   1.0   1   0.5   Poisson.sqrt
+       //  1   1.0   1   1.0   Poisson.id
+       //  1   2.0   1  -1.0   Gamma.inverse           Yes
+       //  1   2.0   1   0.0   Gamma.log
+       //  1   2.0   1   1.0   Gamma.id
+       //  1   3.0   1  -2.0   InvGaussian.1/mu^2   Yes
+       //  1   3.0   1  -1.0   InvGaussian.inverse
+       //  1   3.0   1   0.0   InvGaussian.log
+       //  1   3.0   1   1.0   InvGaussian.id
+       //  1   *       1       *       AnyVariance.AnyLink
+       // -----------------------------------------------
+       //  2   *       1   0.0   Binomial.log
+       //  2   *       2       *       Binomial.logit     Yes
+       //  2   *       3       *       Binomial.probit
+       //  2   *       4       *       Binomial.cloglog
+       //  2   *       5       *       Binomial.cauchit
+       // -----------------------------------------------
+
+       protected int numRecords, numFeatures, distFamilyType, linkType, 
intercept;
+       protected double distParam, linkPower, logFeatureVarianceDisbalance, 
avgLinearForm, stdevLinearForm, dispersion;
+
+       public BuiltinGLMTest(int numRecords_, int numFeatures_, int 
distFamilyType_, double distParam_,
+                       int linkType_, double linkPower_, double 
logFeatureVarianceDisbalance_,
+                       double avgLinearForm_, double stdevLinearForm_, double 
dispersion_)
+       {
+               this.numRecords = numRecords_;
+               this.numFeatures = numFeatures_;
+               this.distFamilyType = distFamilyType_;
+               this.distParam = distParam_;
+               this.linkType = linkType_;
+               this.linkPower = linkPower_;
+               this.logFeatureVarianceDisbalance = 
logFeatureVarianceDisbalance_;
+               this.avgLinearForm = avgLinearForm_;
+               this.stdevLinearForm = stdevLinearForm_;
+               this.dispersion = dispersion_;
+       }
+
+       private void setIntercept(int intercept_)
+       {
+               intercept = intercept_/100;
+       }
+
+       @Override
+       public void setUp()
+       {
+               TestUtils.clearAssertionInformation();
+               addTestConfiguration(TEST_CLASS_DIR, TEST_NAME);
+       }
+
+       @Test
+       public void glmTestIntercept_0_CP() {
+               setIntercept(0);
+               runtestGLM();
+       }
+
+       @Test
+       public void glmTestIntercept_1_CP() {
+               setIntercept(1);
+               runtestGLM();
+       }
+
+       @Test
+       public void glmTestIntercept_2_CP() {
+               setIntercept(2);
+               runtestGLM();
+       }
+
+
+       public void runtestGLM() {
+               Types.ExecMode platformOld = 
setExecMode(LopProperties.ExecType.CP);
+               try {
+                       int rows = numRecords;                // # of rows in 
the training data
+                       int cols = numFeatures;                // # of features 
in the training data
+                       System.out.println("------------ BEGIN " + TEST_NAME + 
" TEST WITH {" + rows + ", " + cols 
+                               + ", " + distFamilyType + ", " + distParam + ", 
" + linkType + ", " + linkPower + ", " 
+                               + intercept + ", " + 
logFeatureVarianceDisbalance + ", " + avgLinearForm + ", " + stdevLinearForm 
+                               + ", " + dispersion + "} ------------");
+
+                       TestUtils.GLMDist glmdist = new 
TestUtils.GLMDist(distFamilyType, distParam, linkType, linkPower);
+                       glmdist.set_dispersion(dispersion);
+
+                       loadTestConfiguration(getTestConfiguration(TEST_NAME));
+
+                       // prepare training data set
+                       Random r = new Random(314159265);
+                       double[][] X = 
TestUtils.generateUnbalancedGLMInputDataX(rows, cols, 
logFeatureVarianceDisbalance);
+                       double[] beta = 
TestUtils.generateUnbalancedGLMInputDataB(X, cols, intercept, avgLinearForm, 
stdevLinearForm, r);
+                       double[][] y = 
TestUtils.generateUnbalancedGLMInputDataY(X, beta, rows, cols, glmdist, 
intercept, dispersion, r);
+                       
+                       int defaultBlockSize = OptimizerUtils.DEFAULT_BLOCKSIZE;
+
+                       MatrixCharacteristics mc_X = new 
MatrixCharacteristics(rows, cols, defaultBlockSize, -1);
+                       writeInputMatrixWithMTD("X", X, true, mc_X);
+
+                       MatrixCharacteristics mc_y = new 
MatrixCharacteristics(rows, y[0].length, defaultBlockSize, -1);
+                       writeInputMatrixWithMTD("Y", y, true, mc_y);
+
+                       String HOME = SCRIPT_DIR + TEST_DIR;
+                       fullDMLScriptName = HOME + TEST_NAME + ".dml";
+                       List<String> proArgs = new ArrayList<>();
+                       proArgs.add("-exec");
+                       proArgs.add(" singlenode");
+                       proArgs.add("-nvargs");
+                       proArgs.add("X=" + input("X"));
+                       proArgs.add("Y=" + input("Y"));
+                       proArgs.add("dfam=" + String.valueOf(distFamilyType));
+                       proArgs.add(((distFamilyType == 2 && distParam != 1.0) 
? "yneg=" : "vpow=") + String.valueOf(distParam));
+                       proArgs.add((distFamilyType == 2 && distParam != 1.0) ? 
"vpow=0.0" : "yneg=0.0");
+                       proArgs.add("link=" + String.valueOf(linkType));
+                       proArgs.add("lpow=" + String.valueOf(linkPower));
+                       proArgs.add("icpt=" + String.valueOf(intercept)); // 
INTERCEPT - CHANGE THIS AS NEEDED
+                       proArgs.add("disp=0.0"); // DISPERSION (0.0: ESTIMATE)
+                       proArgs.add("reg=0.0"); // LAMBDA REGULARIZER
+                       proArgs.add("tol=0.000000000001"); // TOLERANCE 
(EPSILON)
+                       proArgs.add("moi=300");
+                       proArgs.add("mii=0");
+                       proArgs.add("B=" + output("betas_SYSTEMDS"));
+                       programArgs = proArgs.toArray(new 
String[proArgs.size()]);
+
+                       fullRScriptName = HOME + TEST_NAME + ".R";
+                       rCmd = getRCmd(input("X.mtx"), input("Y.mtx"),
+                                       String.valueOf(distFamilyType),
+                                       String.valueOf(distParam),
+                                       String.valueOf(linkType),
+                                       String.valueOf(linkPower),
+                                       String.valueOf(intercept),
+                                       "0.000000000001",
+                                       expected("betas_R"));
+
+                       runTest(true, false, null, -1);
+
+                       double max_abs_beta = 0.0;
+                       HashMap<MatrixValue.CellIndex, Double> wTRUE = new 
HashMap<>();
+                       for (int j = 0; j < cols; j++) {
+                               wTRUE.put(new MatrixValue.CellIndex(j + 1, 1), 
Double.valueOf(beta[j]));
+                               max_abs_beta = (max_abs_beta >= 
Math.abs(beta[j]) ? max_abs_beta : Math.abs(beta[j]));
+                       }
+
+                       HashMap<MatrixValue.CellIndex, Double> wSYSTEMDS_raw = 
readDMLMatrixFromHDFS("betas_SYSTEMDS");
+                       HashMap<MatrixValue.CellIndex, Double> wSYSTEMDS = new 
HashMap<>();
+                       for (MatrixValue.CellIndex key : wSYSTEMDS_raw.keySet())
+                               if (key.column == 1)
+                                       wSYSTEMDS.put(key, 
wSYSTEMDS_raw.get(key));
+
+                       runRScript(true);
+
+                       HashMap<MatrixValue.CellIndex, Double> wR = 
readRMatrixFromFS("betas_R");
+
+                       if ((distParam == 0 && linkType == 1)) { // Gaussian.*
+                               //NOTE MB: Gaussian.log was the only test 
failing when we introduced multi-threaded
+                               //matrix multplications (mmchain). After 
discussions with Sasha, we decided to change the eps
+                               //because accuracy is anyway affected by 
various rewrites like binary to unary (-1*x->-x),
+                               //transpose-matrixmult, and dot product sum. 
Disabling these rewrites led to a successful
+                               //test result. Even without multi-threaded 
matrix mult this test was failing for different number
+                               //of rows if these rewrites are enabled. Users 
can turn off rewrites if high accuracy is required.
+                               //However, in the future we might also consider 
to use Kahan plus for aggregations in matrix mult
+                               //(at least for the final aggregation of 
partial results from individual threads).
+
+                               //NOTE MB: similar issues occurred with other 
tests when moving to github action tests
+                               eps *= (linkPower == -1) ? 4 : 2; 
//Gaussian.inverse vs Gaussian.*;
+                       }
+                       TestUtils.compareMatrices(wR, wSYSTEMDS, eps * 
max_abs_beta, "wR", "wSYSTEMDS");
+               }
+               finally {
+                       resetExecMode(platformOld);
+               }
+       }
+
+       @Parameterized.Parameters
+       public static Collection<Object[]> data() {
+               // SCHEMA:
+               // #RECORDS, #FEATURES, DISTRIBUTION_FAMILY, VARIANCE_POWER or 
BERNOULLI_NO, LINK_TYPE, LINK_POWER,
+               // LOG_FEATURE_VARIANCE_DISBALANCE, AVG_LINEAR_FORM, 
ST_DEV_LINEAR_FORM, DISPERSION
+               Object[][] data = new Object[][] {
+                               // #RECS  #FTRS DFM VPOW  LNK LPOW   LFVD  
AVGLT STDLT  DISP
+                               // Both DML and R work and compute close 
results:
+                               { 100000,   50,  1,  0.0,  1,  0.0,   3.0,  
10.0,  2.0,  2.5 },   // Gaussian.log
+                               //{  10000,  100,  1,  0.0,  1,  1.0,   3.0,   
0.0,  2.0,  2.5 },   // Gaussian.id
+                               //{  20000,  100,  1,  0.0,  1, -1.0,   0.0,   
0.2,  0.03, 2.5 },   // Gaussian.inverse
+                               {  10000,  100,  1,  1.0,  1,  0.0,   3.0,   
0.0,  1.0,  2.5 },   // Poisson.log
+                               //{ 100000,   10,  1,  1.0,  1,  0.0,   3.0,   
0.0, 50.0,  2.5 },   // Poisson.log              // Pr[0|x] gets near 1
+                               //{  20000,  100,  1,  1.0,  1,  0.5,  3.0,  
10.0,  2.0,  2.5 },   // Poisson.sqrt
+                               //{  20000,  100,  1,  1.0,  1,  1.0,  3.0,  
50.0, 10.0,  2.5 },   // Poisson.id
+                               { 100000,   50,  1,  2.0,  1,  0.0,  3.0,   
0.0,  2.0,  2.5 },   // Gamma.log
+                               //{ 100000,   50,  1,  2.0,  1, -1.0,  1.0,   
2.0,  0.4,  1.5 },   // Gamma.inverse
+                               //{  10000,  100,  1,  3.0,  1, -2.0,  3.0,  
50.0,  7.0,  1.7 },   // InvGaussian.1/mu^2
+                               //{  10000,  100,  1,  3.0,  1, -1.0,  3.0,  
10.0,  2.0,  2.5 },   // InvGaussian.inverse
+                               //{ 100000,   50,  1,  3.0,  1,  0.0,  2.0,  
-2.0,  1.0,  1.7 },   // InvGaussian.log
+                               //{ 100000,   50,  1,  3.0,  1,  1.0,  1.0,   
0.2,  0.04, 1.7 },   // InvGaussian.id
+
+                               { 100000,   50,  2, -1.0,  1,  0.0,  3.0,  
-5.0,  1.0,  1.0 },   // Bernoulli {-1, 1}.log     // Note: Y is sparse
+                               //{ 100000,   50,  2, -1.0,  1,  1.0,  1.0,   
0.6,  0.1,  1.0 },   // Bernoulli {-1, 1}.id
+                               //{ 100000,   50,  2, -1.0,  1,  0.5,  0.0,   
0.4,  0.05, 1.0 },   // Bernoulli {-1, 1}.sqrt
+                               {  10000,  100,  2, -1.0,  2,  0.0,  3.0,   
0.0,  2.0,  1.0 },   // Bernoulli {-1, 1}.logit
+                               //{  10000,  100,  2, -1.0,  2,  0.0,  3.0,   
0.0, 50.0,  1.0 },   // Bernoulli {-1, 1}.logit   // Pr[y|x] near 0, 1
+                               {  20000,  100,  2, -1.0,  3,  0.0,  3.0,   
0.0,  2.0,  1.0 },   // Bernoulli {-1, 1}.probit
+                               //{ 100000,   10,  2, -1.0,  3,  0.0,  3.0,   
0.0, 50.0,  1.0 },   // Bernoulli {-1, 1}.probit  // Pr[y|x] near 0, 1
+                               //{  10000,  100,  2, -1.0,  4,  0.0,  3.0,  
-2.0,  1.0,  1.0 },   // Bernoulli {-1, 1}.cloglog
+                               //{  50000,   20,  2, -1.0,  4,  0.0,  3.0,  
-2.0, 50.0,  1.0 },   // Bernoulli {-1, 1}.cloglog // Pr[y|x] near 0, 1
+                               //{  50000,  100,  2, -1.0,  5,  0.0,  3.0,   
0.0,  2.0,  1.0 },   // Bernoulli {-1, 1}.cauchit
+
+                               { 100000,   50,  2,  1.0,  1,  0.0,  3.0,  
-5.0,  1.0,  2.5 },   // Binomial two-column.log   // Note: Y is sparse
+                               //{  10000,  100,  2,  1.0,  1,  1.0,  0.0,   
0.4,  0.05, 2.5 },   // Binomial two-column.id
+                               //{ 100000,   50,  2,  1.0,  1,  0.5,  0.0,   
0.4,  0.05, 2.5 },   // Binomial two-column.sqrt
+                               {  10000,  100,  2,  1.0,  2,  0.0,  3.0,   
0.0,  2.0,  2.5 },   // Binomial two-column.logit
+                               {  20000,  100,  2,  1.0,  3,  0.0,  3.0,   
0.0,  2.0,  2.5 },   // Binomial two-column.probit
+                               //{  10000,  100,  2,  1.0,  4,  0.0,  3.0,  
-2.0,  1.0,  2.5 },   // Binomial two-column.cloglog
+                               //{  20000,  100,  2,  1.0,  5,  0.0,  3.0,   
0.0,  2.0,  2.5 },   // Binomial two-column.cauchit
+               };
+               return Arrays.asList(data);
+       }
+}
diff --git a/src/test/scripts/functions/builtin/glmTest.R 
b/src/test/scripts/functions/builtin/glmTest.R
new file mode 100644
index 0000000..5d4c525
--- /dev/null
+++ b/src/test/scripts/functions/builtin/glmTest.R
@@ -0,0 +1,139 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+# 
+#   http://www.apache.org/licenses/LICENSE-2.0
+# 
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+# 
+# Intended to solve GLM Regression using R, in order to compare against the 
DML implementation
+# INPUT 1: Matrix X [rows, columns]
+# and Matrix y [rows, 1]
+# INPUT 2-5: Distribution family and link, see below:
+# ---------------------------------------------
+#   Dst Var Lnk Lnk   Distribution       Cano-
+#   typ pow typ pow   Family.link        nical?
+# ---------------------------------------------
+#    1  0.0  1 -1.0   Gaussian.inverse
+#    1  0.0  1  0.0   Gaussian.log
+#    1  0.0  1  1.0   Gaussian.id         Yes
+#    1  1.0  1  0.0   Poisson.log         Yes
+#    1  1.0  1  0.5   Poisson.sqrt
+#    1  1.0  1  1.0   Poisson.id
+#    1  2.0  1 -1.0   Gamma.inverse       Yes
+#    1  2.0  1  0.0   Gamma.log
+#    1  2.0  1  1.0   Gamma.id
+#    1  3.0  1 -2.0   InvGaussian.1/mu^2  Yes
+#    1  3.0  1 -1.0   InvGaussian.inverse
+#    1  3.0  1  0.0   InvGaussian.log
+#    1  3.0  1  1.0   InvGaussian.id
+#    1   *   1   *    AnyVariance.AnyLink
+# ---------------------------------------------
+#    2 -1.0  *   *    Binomial {-1, 1}
+#    2  0.0  *   *    Binomial { 0, 1}
+#    2  1.0  *   *    Binomial two-column
+#    2   *   1  0.0   Binomial.log
+#    2   *   2   *    Binomial.logit      Yes
+#    2   *   3   *    Binomial.probit
+#    2   *   4   *    Binomial.cloglog
+#    2   *   5   *    Binomial.cauchit
+# ---------------------------------------------
+# INPUT 2: (int) Distribution type
+# INPUT 3: (double) For Power families: Variance power of the mean
+# INPUT 4: (int) Link function type
+# INPUT 5: (double) Link as power of the mean
+# INPUT 6: (int) Intercept: 0 = no, 1 = yes
+# INPUT 7: (double) tolerance (epsilon)
+# INPUT 8: the regression coefficients output file
+# OUTPUT : Matrix beta [columns, 1]
+#
+# Assume that $GLMR_HOME is set to the home of the R script
+# Assume input and output directories are $GLMR_HOME/in/ and 
$GLMR_HOME/expected/
+# Rscript $GLMR_HOME/GLM.R $GLMR_HOME/in/X.mtx $GLMR_HOME/in/y.mtx 2 0.0 2 0.0 
1 0.00000001 $GLMR_HOME/expected/w.mtx
+
+args <- commandArgs (TRUE);
+
+library ("Matrix");
+# library ("batch");
+
+options (warn = -1);
+
+X_here <- as.matrix(readMM(args[1]))
+y_here <- as.matrix(readMM(args[2]))
+
+num_records  <- nrow (X_here);
+num_features <- ncol (X_here);
+dist_type  <- as.integer (args[3]);
+dist_param <- as.numeric (args[4]);
+link_type  <- as.integer (args[5]);
+link_power <- as.numeric (args[6]);
+icept <- as.integer (args[7]);
+eps_n <- as.numeric (args[8]);
+
+f_ly <- gaussian ();
+var_power <- dist_param;
+
+if (dist_type == 1 & var_power == 0.0 & link_type == 1 & link_power ==  1.0) { 
f_ly <- gaussian (link = "identity");         } else
+if (dist_type == 1 & var_power == 0.0 & link_type == 1 & link_power == -1.0) { 
f_ly <- gaussian (link = "inverse");          } else
+if (dist_type == 1 & var_power == 0.0 & link_type == 1 & link_power ==  0.0) { 
f_ly <- gaussian (link = "log");              } else
+if (dist_type == 1 & var_power == 1.0 & link_type == 1 & link_power ==  1.0) { 
f_ly <-  poisson (link = "identity");         } else
+if (dist_type == 1 & var_power == 1.0 & link_type == 1 & link_power ==  0.0) { 
f_ly <-  poisson (link = "log");              } else
+if (dist_type == 1 & var_power == 1.0 & link_type == 1 & link_power ==  0.5) { 
f_ly <-  poisson (link = "sqrt");             } else
+if (dist_type == 1 & var_power == 2.0 & link_type == 1 & link_power ==  1.0) { 
f_ly <-    Gamma (link = "identity");         } else
+if (dist_type == 1 & var_power == 2.0 & link_type == 1 & link_power == -1.0) { 
f_ly <-    Gamma (link = "inverse");          } else
+if (dist_type == 1 & var_power == 2.0 & link_type == 1 & link_power ==  0.0) { 
f_ly <-    Gamma (link = "log");              } else
+if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power ==  1.0) { 
f_ly <- inverse.gaussian (link = "identity"); } else
+if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power == -1.0) { 
f_ly <- inverse.gaussian (link = "inverse");  } else
+if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power ==  0.0) { 
f_ly <- inverse.gaussian (link = "log");      } else
+if (dist_type == 1 & var_power == 3.0 & link_type == 1 & link_power == -2.0) { 
f_ly <- inverse.gaussian (link = "1/mu^2");   } else
+if (dist_type == 2                    & link_type == 1 & link_power ==  0.0) { 
f_ly <- binomial (link = "log");              } else
+if (dist_type == 2                    & link_type == 1 & link_power ==  1.0) { 
f_ly <- binomial (link = "identity");         } else
+if (dist_type == 2                    & link_type == 1 & link_power ==  0.5) { 
f_ly <- binomial (link = "sqrt");             } else
+if (dist_type == 2                    & link_type == 2                     ) { 
f_ly <- binomial (link = "logit");            } else
+if (dist_type == 2                    & link_type == 3                     ) { 
f_ly <- binomial (link = "probit");           } else
+if (dist_type == 2                    & link_type == 4                     ) { 
f_ly <- binomial (link = "cloglog");          } else
+if (dist_type == 2                    & link_type == 5                     ) { 
f_ly <- binomial (link = "cauchit");          }
+
+# quasi(link = "identity", variance = "constant")
+# quasibinomial(link = "logit")
+# quasipoisson(link = "log")
+
+if (dist_type == 2 & dist_param != 1.0) {
+    y_here <- (y_here - dist_param) / (1.0 - dist_param);
+}
+
+# epsilon      tolerance: the iterations converge when |dev - devold|/(|dev| + 
0.1) < epsilon.
+# maxit        integer giving the maximal number of IWLS iterations.
+# trace        logical indicating if output should be produced for each 
iteration.
+#
+c_rol <- glm.control (epsilon = eps_n, maxit = 100, trace = FALSE);
+
+X_matrix = as.matrix (X_here);
+y_matrix = as.matrix (y_here);
+
+if (icept == 0) {
+    glmOut <- glm (y_matrix ~ X_matrix - 1, family = f_ly, control = c_rol); # 
zero intercept
+    betas <- coef (glmOut);
+} else {
+    glmOut <- glm (y_matrix ~ X_matrix    , family = f_ly, control = c_rol);
+    betas <- coef (glmOut);
+    beta_intercept = betas [1];
+    betas [1 : num_features] = betas [2 : (num_features + 1)];
+    betas [num_features + 1] = beta_intercept;
+}
+
+print (c("Deviance", glmOut$deviance));
+writeMM (as (betas, "CsparseMatrix"), args[9], format = "text");
\ No newline at end of file
diff --git a/src/test/scripts/functions/builtin/glmTest.dml 
b/src/test/scripts/functions/builtin/glmTest.dml
new file mode 100644
index 0000000..a5549a8
--- /dev/null
+++ b/src/test/scripts/functions/builtin/glmTest.dml
@@ -0,0 +1,25 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+X = read($X);
+Y = read($Y);
+betas= glm(X=X,  Y=Y, dfam = $dfam, vpow = $vpow, link = $link, lpow = $lpow, 
yneg = $yneg, icpt = $icpt, disp=$disp, reg = $reg, tol = $tol, moi = $moi, 
mii=$mii, verbose=TRUE)
+write(betas, $B)
\ No newline at end of file

[systemml] branch master updated: [SYSTEMDS-391] New built-in GLM function (Generalized Linear Model)

Reply via email to