TopN merge performance
Project: http://git-wip-us.apache.org/repos/asf/kylin/repo Commit: http://git-wip-us.apache.org/repos/asf/kylin/commit/e7d31938 Tree: http://git-wip-us.apache.org/repos/asf/kylin/tree/e7d31938 Diff: http://git-wip-us.apache.org/repos/asf/kylin/diff/e7d31938 Branch: refs/heads/yang21-hbase1.x Commit: e7d31938ef9204a671c9894f02da4a9d3ab81b42 Parents: d3ecb0d Author: shaofengshi <shaofeng...@apache.org> Authored: Sun Dec 4 09:39:45 2016 +0800 Committer: shaofengshi <shaofeng...@apache.org> Committed: Sun Dec 4 09:39:45 2016 +0800 ---------------------------------------------------------------------- .../apache/kylin/measure/topn/TopNCounter.java | 49 +++++++------------- 1 file changed, 17 insertions(+), 32 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/kylin/blob/e7d31938/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java ---------------------------------------------------------------------- diff --git a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java index cf9978a..0d0726c 100644 --- a/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java +++ b/core-metadata/src/main/java/org/apache/kylin/measure/topn/TopNCounter.java @@ -26,11 +26,9 @@ import java.util.Iterator; import java.util.LinkedList; import java.util.List; import java.util.Map; -import java.util.Set; -import com.google.common.collect.Maps; import com.google.common.collect.Lists; -import com.google.common.collect.Sets; +import com.google.common.collect.Maps; /** * Modified from the StreamSummary.java in https://github.com/addthis/stream-lib @@ -157,41 +155,28 @@ public class TopNCounter<T> implements Iterable<Counter<T>> { * @return */ public TopNCounter<T> merge(TopNCounter<T> another) { - double m1 = 0.0, m2 = 0.0; - if (this.size() >= this.capacity) { - m1 = this.counterList.getLast().count; - } - - if (another.size() >= another.capacity) { - m2 = another.counterList.getLast().count; - } - - Set<T> duplicateItems = Sets.newHashSet(); - List<T> notDuplicateItems = Lists.newArrayList(); - - for (Map.Entry<T, Counter<T>> entry : this.counterMap.entrySet()) { - T item = entry.getKey(); - Counter<T> existing = another.counterMap.get(item); - if (existing != null) { - duplicateItems.add(item); - } else { - notDuplicateItems.add(item); + boolean thisFull = this.size() >= this.capacity; + boolean anotherFull = another.size() >= another.capacity; + double m1 = thisFull ? this.counterList.getLast().count : 0.0; + double m2 = anotherFull ? another.counterList.getLast().count : 0.0; + + if (thisFull == true) { + for (Counter<T> entry : another.counterMap.values()) { + entry.count += m1; } } - for (T item : duplicateItems) { - this.offer(item, another.counterMap.get(item).count); - } - - for (T item : notDuplicateItems) { - this.offer(item, m2); + if (anotherFull == true) { + for (Counter<T> entry : this.counterMap.values()) { + entry.count += m2; + } } for (Map.Entry<T, Counter<T>> entry : another.counterMap.entrySet()) { - T item = entry.getKey(); - if (duplicateItems.contains(item) == false) { - double counter = entry.getValue().count; - this.offer(item, counter + m1); + if (counterMap.containsKey(entry.getKey())) { + this.offer(entry.getValue().getItem(), anotherFull ? (thisFull ? entry.getValue().count - m2 - m1 : entry.getValue().count - m2) : (thisFull ? (entry.getValue().count - m1) : entry.getValue().count)); + } else { + this.offer(entry.getValue().getItem(), entry.getValue().count); } }