Repository: systemml
Updated Branches:
  refs/heads/master 82e42957f -> 6bf3e7836


[SYSTEMML-2050] Minor simplifications of GLM, Kmeans, Linreg, MLogreg

This patch makes some minor script-level simplifications using the new
ifelse function to ease understanding and increase the potential for
rewrites and inlining (by avoiding unnecessary DAG cuts).


Project: http://git-wip-us.apache.org/repos/asf/systemml/repo
Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/6bf3e783
Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/6bf3e783
Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/6bf3e783

Branch: refs/heads/master
Commit: 6bf3e78364942c305018a526e80fd3dad441bd86
Parents: 82e4295
Author: Matthias Boehm <[email protected]>
Authored: Thu Jan 18 13:02:57 2018 -0800
Committer: Matthias Boehm <[email protected]>
Committed: Thu Jan 18 13:25:22 2018 -0800

----------------------------------------------------------------------
 scripts/algorithms/GLM.dml                      | 10 ++-----
 scripts/algorithms/Kmeans.dml                   |  7 ++---
 scripts/algorithms/LinearRegCG.dml              | 15 ++--------
 scripts/algorithms/LinearRegDS.dml              | 15 ++--------
 scripts/algorithms/MultiLogReg.dml              | 31 ++++++--------------
 scripts/nn/layers/dropout.dml                   |  8 ++---
 .../sysml/runtime/matrix/data/MatrixBlock.java  | 21 +++++++------
 7 files changed, 32 insertions(+), 75 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/systemml/blob/6bf3e783/scripts/algorithms/GLM.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/GLM.dml b/scripts/algorithms/GLM.dml
index 0e31a1e..8bd822f 100644
--- a/scripts/algorithms/GLM.dml
+++ b/scripts/algorithms/GLM.dml
@@ -1062,14 +1062,8 @@ get_trust_boundary_point =
     f_extra = 0.5 * sum (z * (r + g));
     f_change_1 = f_extra + (0.5 * tau_1 * pq + zq + gp) * tau_1;
     f_change_2 = f_extra + (0.5 * tau_2 * pq + zq + gp) * tau_2;
-    if (f_change_1 < f_change_2) {
-        new_z = z + (tau_1 * p);
-        f_change = f_change_1;
-    }
-    else {
-        new_z = z + (tau_2 * p);
-        f_change = f_change_2;
-    }
+    new_z = z + ifelse(f_change_1<f_change_2, tau_1, tau_2) * p;
+    f_change = min(f_change_1, f_change_2);
 }
 
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bf3e783/scripts/algorithms/Kmeans.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/Kmeans.dml b/scripts/algorithms/Kmeans.dml
index 533a509..99ee264 100644
--- a/scripts/algorithms/Kmeans.dml
+++ b/scripts/algorithms/Kmeans.dml
@@ -118,11 +118,8 @@ for (i in 1 : num_centroids)
     # Update min_distances to preserve the loop invariant:
     distances = X_samples_sq_norms + samples_vs_runs_map %*% rowSums 
(centroids ^ 2)
               - 2 * rowSums (X_samples * (samples_vs_runs_map %*% centroids));
-    if (i == 1) {
-        min_distances = is_row_in_samples * distances;
-    } else {
-        min_distances = min (min_distances, distances);
-}   }
+    min_distances = ifelse(i==1, is_row_in_samples*distances, 
min(min_distances,distances));
+}
 
 # STEP 2: PERFORM K-MEANS ITERATIONS FOR ALL RUNS:
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bf3e783/scripts/algorithms/LinearRegCG.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/LinearRegCG.dml 
b/scripts/algorithms/LinearRegCG.dml
index 9ba7e89..08cbaef 100644
--- a/scripts/algorithms/LinearRegCG.dml
+++ b/scripts/algorithms/LinearRegCG.dml
@@ -224,13 +224,8 @@ ss_res = sum (y_residual ^ 2);
 ss_avg_res = ss_res - n * avg_res ^ 2;
 
 R2 = 1 - ss_res / ss_avg_tot;
-if (n > m_ext) {
-    dispersion  = ss_res / (n - m_ext);
-    adjusted_R2 = 1 - dispersion / (ss_avg_tot / (n - 1));
-} else {
-    dispersion  = 0.0 / 0.0;
-    adjusted_R2 = 0.0 / 0.0;
-}
+dispersion = ifelse(n > m_ext, ss_res / (n - m_ext), 0.0/0.0);
+adjusted_R2 = ifelse(n > m_ext, 1 - dispersion / (ss_avg_tot / (n - 1)), 
0.0/0.0);
 
 R2_nobias = 1 - ss_avg_res / ss_avg_tot;
 deg_freedom = n - m - 1;
@@ -244,11 +239,7 @@ if (deg_freedom > 0) {
 }
 
 R2_vs_0 = 1 - ss_res / ss_tot;
-if (n > m) {
-    adjusted_R2_vs_0 = 1 - (ss_res / (n - m)) / (ss_tot / n);
-} else {
-    adjusted_R2_vs_0 = 0.0 / 0.0;
-}
+adjusted_R2_vs_0 = ifelse(n > m, 1 - (ss_res / (n - m)) / (ss_tot / n), 
0.0/0.0);
 
 str = "AVG_TOT_Y," + avg_tot;                                    #  Average of 
the response value Y
 str = append (str, "STDEV_TOT_Y," + sqrt (var_tot));             #  Standard 
Deviation of the response value Y

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bf3e783/scripts/algorithms/LinearRegDS.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/LinearRegDS.dml 
b/scripts/algorithms/LinearRegDS.dml
index 10def79..3e6a6e3 100644
--- a/scripts/algorithms/LinearRegDS.dml
+++ b/scripts/algorithms/LinearRegDS.dml
@@ -166,13 +166,8 @@ ss_res = sum (y_residual ^ 2);
 ss_avg_res = ss_res - n * avg_res ^ 2;
 
 R2 = 1 - ss_res / ss_avg_tot;
-if (n > m_ext) {
-    dispersion  = ss_res / (n - m_ext);
-    adjusted_R2 = 1 - dispersion / (ss_avg_tot / (n - 1));
-} else {
-    dispersion  = 0.0 / 0.0;
-    adjusted_R2 = 0.0 / 0.0;
-}
+dispersion = ifelse(n > m_ext, ss_res / (n - m_ext), 0.0/0.0);
+adjusted_R2 = ifelse(n > m_ext, 1 - dispersion / (ss_avg_tot / (n - 1)), 
0.0/0.0);
 
 R2_nobias = 1 - ss_avg_res / ss_avg_tot;
 deg_freedom = n - m - 1;
@@ -186,11 +181,7 @@ if (deg_freedom > 0) {
 }
 
 R2_vs_0 = 1 - ss_res / ss_tot;
-if (n > m) {
-    adjusted_R2_vs_0 = 1 - (ss_res / (n - m)) / (ss_tot / n);
-} else {
-    adjusted_R2_vs_0 = 0.0 / 0.0;
-}
+adjusted_R2_vs_0 = ifelse(n > m, 1 - (ss_res / (n - m)) / (ss_tot / n), 
0.0/0.0);
 
 str = "AVG_TOT_Y," + avg_tot;                                    #  Average of 
the response value Y
 str = append (str, "STDEV_TOT_Y," + sqrt (var_tot));             #  Standard 
Deviation of the response value Y

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bf3e783/scripts/algorithms/MultiLogReg.dml
----------------------------------------------------------------------
diff --git a/scripts/algorithms/MultiLogReg.dml 
b/scripts/algorithms/MultiLogReg.dml
index 55ef0e1..8e00d26 100644
--- a/scripts/algorithms/MultiLogReg.dml
+++ b/scripts/algorithms/MultiLogReg.dml
@@ -287,29 +287,16 @@ while (! converge)
        }
 
        alpha2 = obj_new - obj - gs;
-       if (alpha2 <= 0) {
-          alpha = sigma3;
-       } 
-       else {
-          alpha = max (sigma1, -0.5 * gs / alpha2);
-       }
+    alpha = ifelse(alpha2 <= 0, sigma3, max(sigma1, -0.5 * gs / alpha2));
        
-       if (rho < eta0) {
-               delta = min (max (alpha, sigma1) * snorm, sigma2 * delta);
-       }
-       else {
-               if (rho < eta1) {
-                       delta = max (sigma1 * delta, min (alpha * snorm, sigma2 
* delta));
-               }
-               else { 
-                       if (rho < eta2) {
-                               delta = max (sigma1 * delta, min (alpha * 
snorm, sigma3 * delta));
-                       }
-                       else {
-                               delta = max (delta, min (alpha * snorm, sigma3 
* delta));
-                       }
-               }
-       } 
+    if (rho < eta0)
+        delta = min (max (alpha, sigma1) * snorm, sigma2 * delta);
+    else if (rho < eta1)
+        delta = max (sigma1 * delta, min (alpha * snorm, sigma2 * delta));
+    else if (rho < eta2)
+        delta = max (sigma1 * delta, min (alpha * snorm, sigma3 * delta));
+    else
+        delta = max (delta, min (alpha * snorm, sigma3 * delta));
        
        if (is_trust_boundary_reached == 1)
        {

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bf3e783/scripts/nn/layers/dropout.dml
----------------------------------------------------------------------
diff --git a/scripts/nn/layers/dropout.dml b/scripts/nn/layers/dropout.dml
index a36878b..a688bd9 100644
--- a/scripts/nn/layers/dropout.dml
+++ b/scripts/nn/layers/dropout.dml
@@ -46,11 +46,9 @@ forward = function(matrix[double] X, double p, int seed)
   #    `mask = rand(rows=nrow(X), cols=ncol(X), min=0, max=1, seed=seed) <= p`
   # to create a dropout mask.  Fortunately, SystemML has a `sparsity` 
parameter on
   # the `rand` function that allows use to create a mask directly.
-  if (seed == -1) {
-    mask = rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p)
-  } else {
-    mask = rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p, 
seed=seed)
-  }
+  mask = ifelse(seed == -1,
+    rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p),
+    rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p, seed=seed));
   out = X * mask / p
 }
 

http://git-wip-us.apache.org/repos/asf/systemml/blob/6bf3e783/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
----------------------------------------------------------------------
diff --git 
a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java 
b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
index 81cd3a9..5a96bcb 100644
--- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
+++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java
@@ -2788,17 +2788,6 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
        {
                //TODO perf for special cases like ifelse
                
-               final int m = Math.max(Math.max(rlen, m2.rlen), m3.rlen);
-               final int n = Math.max(Math.max(clen, m2.clen), m3.clen);
-               
-               //error handling 
-               if( (rlen != 1 && rlen != m) || (clen != 1 && clen != n)
-                       || (m2.rlen != 1 && m2.rlen != m) || (m2.clen != 1 && 
m2.clen != n)
-                       || (m3.rlen != 1 && m3.rlen != m) || (m3.clen != 1 && 
m3.clen != n) ) {
-                       throw new DMLRuntimeException("Block sizes are not 
matched for ternary cell operations: "
-                               + rlen + "x" + clen + " vs " + m2.rlen + "x" + 
m2.clen + " vs " + m3.rlen + "x" + m3.clen);
-               }
-               
                //prepare inputs
                final boolean s1 = (rlen==1 && clen==1);
                final boolean s2 = (m2.rlen==1 && m2.clen==1);
@@ -2806,6 +2795,16 @@ public class MatrixBlock extends MatrixValue implements 
CacheBlock, Externalizab
                final double d1 = s1 ? quickGetValue(0, 0) : Double.NaN;
                final double d2 = s2 ? m2.quickGetValue(0, 0) : Double.NaN;
                final double d3 = s3 ? m3.quickGetValue(0, 0) : Double.NaN;
+               final int m = Math.max(Math.max(rlen, m2.rlen), m3.rlen);
+               final int n = Math.max(Math.max(clen, m2.clen), m3.clen);
+               
+               //error handling 
+               if( (!s1 && (rlen != m || clen != n))
+                       || (!s2 && (m2.rlen != m || m2.clen != n))
+                       || (!s3 && (m3.rlen != m || m3.clen != n)) ) {
+                       throw new DMLRuntimeException("Block sizes are not 
matched for ternary cell operations: "
+                               + rlen + "x" + clen + " vs " + m2.rlen + "x" + 
m2.clen + " vs " + m3.rlen + "x" + m3.clen);
+               }
                
                //prepare result
                ret.reset(m, n, false);

Reply via email to