IMPALA-5932: Improve transitive closure computation performance in FE This patch implements the Floyd-Warshall algorithm for the transitive closure computation for the value transfer graph, replacing the existing N^4 brute force algorithm. The performance improvement depends on the size and structure of the value transfer graph. On a random graph with 800 slots and 2800 edges it is 43X faster itself. And the "Equivalence classes computed" event in the runtime profile becomes 21X faster. This computation is covered by the existing tests, which verifies the equivalency of the new and the old value transfer graphs.
Change-Id: Idb00e3c1f904e60ae25567a52b4bf0809a84c6b3 Reviewed-on: http://gerrit.cloudera.org:8080/8098 Reviewed-by: Alex Behm <[email protected]> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/741b0524 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/741b0524 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/741b0524 Branch: refs/heads/master Commit: 741b0524a43c93f2d339dad259dea510524cbdbe Parents: eecbbcb Author: Tianyi Wang <[email protected]> Authored: Thu Sep 14 12:53:42 2017 -0700 Committer: Impala Public Jenkins <[email protected]> Committed: Wed Sep 20 02:45:04 2017 +0000 ---------------------------------------------------------------------- .../org/apache/impala/analysis/Analyzer.java | 23 ++++++-------------- 1 file changed, 7 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/741b0524/fe/src/main/java/org/apache/impala/analysis/Analyzer.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java index c629bb7..df5cf97 100644 --- a/fe/src/main/java/org/apache/impala/analysis/Analyzer.java +++ b/fe/src/main/java/org/apache/impala/analysis/Analyzer.java @@ -2620,7 +2620,7 @@ public class Analyzer { * This step partitions the value transfers into disjoint sets. * 4. Compute the transitive closure of each partition from (3) in the new slot * domain separately. Hopefully, the partitions are small enough to afford - * the O(N^3) complexity of the brute-force transitive closure computation. + * the O(N^3) complexity of the Floyd-Warshall transitive closure computation. * The condensed graph is not transformed back into the original slot domain because * of the potential performance penalty. Instead, hasValueTransfer() consults * coalescedSlots_, valueTransfer_, and completeSubGraphs_ which can together @@ -2695,24 +2695,15 @@ public class Analyzer { p[numPartitionSlots++] = slotId; } // Compute the transitive closure of this graph partition. - // TODO: Since we are operating on a DAG the performance can be improved if - // necessary (e.g., topological sort + backwards propagation of the transitive - // closure). - boolean changed = false; - do { - changed = false; + for (int j = 0; j < numPartitionSlots; ++j) { for (int i = 0; i < numPartitionSlots; ++i) { - for (int j = 0; j < numPartitionSlots; ++j) { - for (int k = 0; k < numPartitionSlots; ++k) { - if (valueTransfer_[p[i]][p[j]] && valueTransfer_[p[j]][p[k]] - && !valueTransfer_[p[i]][p[k]]) { - valueTransfer_[p[i]][p[k]] = true; - changed = true; - } - } + // Our graphs are typically sparse so this filters out a lot of iterations. + if (!valueTransfer_[p[i]][p[j]]) continue; + for (int k = 0; k < numPartitionSlots; ++k) { + valueTransfer_[p[i]][p[k]] |= valueTransfer_[p[j]][p[k]]; } } - } while (changed); + } } long end = System.currentTimeMillis();
