Repository: systemml Updated Branches: refs/heads/master 82e42957f -> 6bf3e7836
[SYSTEMML-2050] Minor simplifications of GLM, Kmeans, Linreg, MLogreg This patch makes some minor script-level simplifications using the new ifelse function to ease understanding and increase the potential for rewrites and inlining (by avoiding unnecessary DAG cuts). Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/6bf3e783 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/6bf3e783 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/6bf3e783 Branch: refs/heads/master Commit: 6bf3e78364942c305018a526e80fd3dad441bd86 Parents: 82e4295 Author: Matthias Boehm <[email protected]> Authored: Thu Jan 18 13:02:57 2018 -0800 Committer: Matthias Boehm <[email protected]> Committed: Thu Jan 18 13:25:22 2018 -0800 ---------------------------------------------------------------------- scripts/algorithms/GLM.dml | 10 ++----- scripts/algorithms/Kmeans.dml | 7 ++--- scripts/algorithms/LinearRegCG.dml | 15 ++-------- scripts/algorithms/LinearRegDS.dml | 15 ++-------- scripts/algorithms/MultiLogReg.dml | 31 ++++++-------------- scripts/nn/layers/dropout.dml | 8 ++--- .../sysml/runtime/matrix/data/MatrixBlock.java | 21 +++++++------ 7 files changed, 32 insertions(+), 75 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/6bf3e783/scripts/algorithms/GLM.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/GLM.dml b/scripts/algorithms/GLM.dml index 0e31a1e..8bd822f 100644 --- a/scripts/algorithms/GLM.dml +++ b/scripts/algorithms/GLM.dml @@ -1062,14 +1062,8 @@ get_trust_boundary_point = f_extra = 0.5 * sum (z * (r + g)); f_change_1 = f_extra + (0.5 * tau_1 * pq + zq + gp) * tau_1; f_change_2 = f_extra + (0.5 * tau_2 * pq + zq + gp) * tau_2; - if (f_change_1 < f_change_2) { - new_z = z + (tau_1 * p); - f_change = f_change_1; - } - else { - new_z = z + (tau_2 * p); - f_change = f_change_2; - } + new_z = z + ifelse(f_change_1<f_change_2, tau_1, tau_2) * p; + f_change = min(f_change_1, f_change_2); } http://git-wip-us.apache.org/repos/asf/systemml/blob/6bf3e783/scripts/algorithms/Kmeans.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/Kmeans.dml b/scripts/algorithms/Kmeans.dml index 533a509..99ee264 100644 --- a/scripts/algorithms/Kmeans.dml +++ b/scripts/algorithms/Kmeans.dml @@ -118,11 +118,8 @@ for (i in 1 : num_centroids) # Update min_distances to preserve the loop invariant: distances = X_samples_sq_norms + samples_vs_runs_map %*% rowSums (centroids ^ 2) - 2 * rowSums (X_samples * (samples_vs_runs_map %*% centroids)); - if (i == 1) { - min_distances = is_row_in_samples * distances; - } else { - min_distances = min (min_distances, distances); -} } + min_distances = ifelse(i==1, is_row_in_samples*distances, min(min_distances,distances)); +} # STEP 2: PERFORM K-MEANS ITERATIONS FOR ALL RUNS: http://git-wip-us.apache.org/repos/asf/systemml/blob/6bf3e783/scripts/algorithms/LinearRegCG.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/LinearRegCG.dml b/scripts/algorithms/LinearRegCG.dml index 9ba7e89..08cbaef 100644 --- a/scripts/algorithms/LinearRegCG.dml +++ b/scripts/algorithms/LinearRegCG.dml @@ -224,13 +224,8 @@ ss_res = sum (y_residual ^ 2); ss_avg_res = ss_res - n * avg_res ^ 2; R2 = 1 - ss_res / ss_avg_tot; -if (n > m_ext) { - dispersion = ss_res / (n - m_ext); - adjusted_R2 = 1 - dispersion / (ss_avg_tot / (n - 1)); -} else { - dispersion = 0.0 / 0.0; - adjusted_R2 = 0.0 / 0.0; -} +dispersion = ifelse(n > m_ext, ss_res / (n - m_ext), 0.0/0.0); +adjusted_R2 = ifelse(n > m_ext, 1 - dispersion / (ss_avg_tot / (n - 1)), 0.0/0.0); R2_nobias = 1 - ss_avg_res / ss_avg_tot; deg_freedom = n - m - 1; @@ -244,11 +239,7 @@ if (deg_freedom > 0) { } R2_vs_0 = 1 - ss_res / ss_tot; -if (n > m) { - adjusted_R2_vs_0 = 1 - (ss_res / (n - m)) / (ss_tot / n); -} else { - adjusted_R2_vs_0 = 0.0 / 0.0; -} +adjusted_R2_vs_0 = ifelse(n > m, 1 - (ss_res / (n - m)) / (ss_tot / n), 0.0/0.0); str = "AVG_TOT_Y," + avg_tot; # Average of the response value Y str = append (str, "STDEV_TOT_Y," + sqrt (var_tot)); # Standard Deviation of the response value Y http://git-wip-us.apache.org/repos/asf/systemml/blob/6bf3e783/scripts/algorithms/LinearRegDS.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/LinearRegDS.dml b/scripts/algorithms/LinearRegDS.dml index 10def79..3e6a6e3 100644 --- a/scripts/algorithms/LinearRegDS.dml +++ b/scripts/algorithms/LinearRegDS.dml @@ -166,13 +166,8 @@ ss_res = sum (y_residual ^ 2); ss_avg_res = ss_res - n * avg_res ^ 2; R2 = 1 - ss_res / ss_avg_tot; -if (n > m_ext) { - dispersion = ss_res / (n - m_ext); - adjusted_R2 = 1 - dispersion / (ss_avg_tot / (n - 1)); -} else { - dispersion = 0.0 / 0.0; - adjusted_R2 = 0.0 / 0.0; -} +dispersion = ifelse(n > m_ext, ss_res / (n - m_ext), 0.0/0.0); +adjusted_R2 = ifelse(n > m_ext, 1 - dispersion / (ss_avg_tot / (n - 1)), 0.0/0.0); R2_nobias = 1 - ss_avg_res / ss_avg_tot; deg_freedom = n - m - 1; @@ -186,11 +181,7 @@ if (deg_freedom > 0) { } R2_vs_0 = 1 - ss_res / ss_tot; -if (n > m) { - adjusted_R2_vs_0 = 1 - (ss_res / (n - m)) / (ss_tot / n); -} else { - adjusted_R2_vs_0 = 0.0 / 0.0; -} +adjusted_R2_vs_0 = ifelse(n > m, 1 - (ss_res / (n - m)) / (ss_tot / n), 0.0/0.0); str = "AVG_TOT_Y," + avg_tot; # Average of the response value Y str = append (str, "STDEV_TOT_Y," + sqrt (var_tot)); # Standard Deviation of the response value Y http://git-wip-us.apache.org/repos/asf/systemml/blob/6bf3e783/scripts/algorithms/MultiLogReg.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/MultiLogReg.dml b/scripts/algorithms/MultiLogReg.dml index 55ef0e1..8e00d26 100644 --- a/scripts/algorithms/MultiLogReg.dml +++ b/scripts/algorithms/MultiLogReg.dml @@ -287,29 +287,16 @@ while (! converge) } alpha2 = obj_new - obj - gs; - if (alpha2 <= 0) { - alpha = sigma3; - } - else { - alpha = max (sigma1, -0.5 * gs / alpha2); - } + alpha = ifelse(alpha2 <= 0, sigma3, max(sigma1, -0.5 * gs / alpha2)); - if (rho < eta0) { - delta = min (max (alpha, sigma1) * snorm, sigma2 * delta); - } - else { - if (rho < eta1) { - delta = max (sigma1 * delta, min (alpha * snorm, sigma2 * delta)); - } - else { - if (rho < eta2) { - delta = max (sigma1 * delta, min (alpha * snorm, sigma3 * delta)); - } - else { - delta = max (delta, min (alpha * snorm, sigma3 * delta)); - } - } - } + if (rho < eta0) + delta = min (max (alpha, sigma1) * snorm, sigma2 * delta); + else if (rho < eta1) + delta = max (sigma1 * delta, min (alpha * snorm, sigma2 * delta)); + else if (rho < eta2) + delta = max (sigma1 * delta, min (alpha * snorm, sigma3 * delta)); + else + delta = max (delta, min (alpha * snorm, sigma3 * delta)); if (is_trust_boundary_reached == 1) { http://git-wip-us.apache.org/repos/asf/systemml/blob/6bf3e783/scripts/nn/layers/dropout.dml ---------------------------------------------------------------------- diff --git a/scripts/nn/layers/dropout.dml b/scripts/nn/layers/dropout.dml index a36878b..a688bd9 100644 --- a/scripts/nn/layers/dropout.dml +++ b/scripts/nn/layers/dropout.dml @@ -46,11 +46,9 @@ forward = function(matrix[double] X, double p, int seed) # `mask = rand(rows=nrow(X), cols=ncol(X), min=0, max=1, seed=seed) <= p` # to create a dropout mask. Fortunately, SystemML has a `sparsity` parameter on # the `rand` function that allows use to create a mask directly. - if (seed == -1) { - mask = rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p) - } else { - mask = rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p, seed=seed) - } + mask = ifelse(seed == -1, + rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p), + rand(rows=nrow(X), cols=ncol(X), min=1, max=1, sparsity=p, seed=seed)); out = X * mask / p } http://git-wip-us.apache.org/repos/asf/systemml/blob/6bf3e783/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index 81cd3a9..5a96bcb 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -2788,17 +2788,6 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab { //TODO perf for special cases like ifelse - final int m = Math.max(Math.max(rlen, m2.rlen), m3.rlen); - final int n = Math.max(Math.max(clen, m2.clen), m3.clen); - - //error handling - if( (rlen != 1 && rlen != m) || (clen != 1 && clen != n) - || (m2.rlen != 1 && m2.rlen != m) || (m2.clen != 1 && m2.clen != n) - || (m3.rlen != 1 && m3.rlen != m) || (m3.clen != 1 && m3.clen != n) ) { - throw new DMLRuntimeException("Block sizes are not matched for ternary cell operations: " - + rlen + "x" + clen + " vs " + m2.rlen + "x" + m2.clen + " vs " + m3.rlen + "x" + m3.clen); - } - //prepare inputs final boolean s1 = (rlen==1 && clen==1); final boolean s2 = (m2.rlen==1 && m2.clen==1); @@ -2806,6 +2795,16 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab final double d1 = s1 ? quickGetValue(0, 0) : Double.NaN; final double d2 = s2 ? m2.quickGetValue(0, 0) : Double.NaN; final double d3 = s3 ? m3.quickGetValue(0, 0) : Double.NaN; + final int m = Math.max(Math.max(rlen, m2.rlen), m3.rlen); + final int n = Math.max(Math.max(clen, m2.clen), m3.clen); + + //error handling + if( (!s1 && (rlen != m || clen != n)) + || (!s2 && (m2.rlen != m || m2.clen != n)) + || (!s3 && (m3.rlen != m || m3.clen != n)) ) { + throw new DMLRuntimeException("Block sizes are not matched for ternary cell operations: " + + rlen + "x" + clen + " vs " + m2.rlen + "x" + m2.clen + " vs " + m3.rlen + "x" + m3.clen); + } //prepare result ret.reset(m, n, false);
