Repository: systemml Updated Branches: refs/heads/master 0cff14094 -> e391e114e
[SYSTEMML-1746] Simplify Kmeans script for increased codegen potential This patch simplifies the existing Kmeans algorithm (inner loop) into a form where after rewrites, all relevant operations end up in a single HOP DAG. Since individual HOP DAGs are the granularity of optimization, this significantly increases the optimization and thus, codegen potential. On Kmeans over a 100M x 10 dense input and 20 outer iterations, this patch improved end-to-end performance w/ codegen from 426s to 128s (baseline w/o codegen: 1,972s). Project: http://git-wip-us.apache.org/repos/asf/systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/systemml/commit/e391e114 Tree: http://git-wip-us.apache.org/repos/asf/systemml/tree/e391e114 Diff: http://git-wip-us.apache.org/repos/asf/systemml/diff/e391e114 Branch: refs/heads/master Commit: e391e114eac63f536ddd0b827dae9d7e998e0674 Parents: 0cff140 Author: Matthias Boehm <[email protected]> Authored: Wed Sep 27 15:12:01 2017 -0700 Committer: Matthias Boehm <[email protected]> Committed: Wed Sep 27 15:12:01 2017 -0700 ---------------------------------------------------------------------- scripts/algorithms/Kmeans.dml | 49 +++++++++++++++++++------------------- 1 file changed, 25 insertions(+), 24 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/systemml/blob/e391e114/scripts/algorithms/Kmeans.dml ---------------------------------------------------------------------- diff --git a/scripts/algorithms/Kmeans.dml b/scripts/algorithms/Kmeans.dml index 386c85f..533a509 100644 --- a/scripts/algorithms/Kmeans.dml +++ b/scripts/algorithms/Kmeans.dml @@ -138,7 +138,7 @@ parfor (run_index in 1 : num_runs, check = 0) C_old = C; iter_count = 0; term_code = 0; - wcss = 0; + wcss = 1.0/0.0; #INF while (term_code == 0) { @@ -150,33 +150,34 @@ parfor (run_index in 1 : num_runs, check = 0) wcss_old = wcss; wcss = sumXsq + sum (minD); if (is_verbose == TRUE) { - if (iter_count == 0) { + if (iter_count == 0) print ("Run " + run_index + ", At Start-Up: Centroid WCSS = " + wcss); - } else { + else print ("Run " + run_index + ", Iteration " + iter_count + ": Centroid WCSS = " + wcss + "; Centroid change (avg.sq.dist.) = " + (sum ((C - C_old) ^ 2) / num_centroids)); - } } + } + + # Find the closest centroid for each record + P = D <= minD; + # If some records belong to multiple centroids, share them equally + P = P / rowSums (P); + # Compute the column normalization factor for P + P_denom = colSums (P); + # Compute new centroids as weighted averages over the records + C_new = (t(P) %*% X) / t(P_denom); + # Check if convergence or maximum iteration has been reached - if (wcss_old - wcss < eps * wcss & iter_count > 0) { - term_code = 1; # Convergence is reached - } else { - if (iter_count >= max_iter) { - term_code = 2; # Maximum iteration is reached - } else { - iter_count = iter_count + 1; - # Find the closest centroid for each record - P = (D <= minD); - # If some records belong to multiple centroids, share them equally - P = P / rowSums (P); - # Compute the column normalization factor for P - P_denom = colSums (P); - if (sum (P_denom <= 0) > 0) { - term_code = 3; # There is a "runaway" centroid with 0.0 denominator - } else { - C_old = C; - # Compute new centroids as weighted averages over the records - C = (t(P) %*% X) / t(P_denom); - } } } } + iter_count = iter_count + 1 + if(wcss_old - wcss < eps * wcss) + term_code = 1; # Convergence reached + else if(iter_count >= max_iter) + term_code = 2; # Max iteration reached + else if(sum (P_denom <= 0) > 0) + term_code = 3; # "Runaway" centroid (0.0 denom) + else { + C_old = C; C = C_new; + } + } print ("Run " + run_index + ", Iteration " + iter_count + ": Terminated with code = " + term_code + ", Centroid WCSS = " + wcss); All_Centroids [(num_centroids * (run_index - 1) + 1) : (num_centroids * run_index), ] = C; final_wcss [run_index, 1] = wcss;
