dbwong commented on a change in pull request #482: PHOENIX-4925 Use Segment
tree to organize Guide Post Info
URL: https://github.com/apache/phoenix/pull/482#discussion_r275092924
##########
File path:
phoenix-core/src/main/java/org/apache/phoenix/schema/stats/GuidePostsInfo.java
##########
@@ -17,138 +17,771 @@
*/
package org.apache.phoenix.schema.stats;
-import java.util.Collections;
-import java.util.List;
+import java.util.*;
-import org.apache.hadoop.hbase.io.ImmutableBytesWritable;
+import com.google.common.annotations.VisibleForTesting;
+import com.google.common.base.Preconditions;
+import com.google.common.collect.Lists;
import org.apache.hadoop.hbase.util.Bytes;
-import org.apache.phoenix.util.ByteUtil;
-import org.apache.phoenix.util.SizedUtil;
+import org.apache.hadoop.hbase.util.Pair;
+import org.apache.phoenix.schema.SortOrder;
+import org.apache.phoenix.util.ScanUtil;
+import org.apache.phoenix.util.ScanUtil.BytesComparator;
+import org.apache.phoenix.query.KeyRange;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
-import com.google.common.primitives.Longs;
-/**
- * A class that holds the guidePosts of a region and also allows combining
the
- * guidePosts of different regions when the GuidePostsInfo is formed for a
table.
- */
-public class GuidePostsInfo {
- public final static GuidePostsInfo NO_GUIDEPOST =
- new GuidePostsInfo(Collections.<Long> emptyList(),
- new ImmutableBytesWritable(ByteUtil.EMPTY_BYTE_ARRAY),
- Collections.<Long> emptyList(), 0, 0, Collections.<Long>
emptyList()) {
- @Override
- public int getEstimatedSize() {
- return 0;
- }
- };
-
- public final static byte[] EMPTY_GUIDEPOST_KEY = ByteUtil.EMPTY_BYTE_ARRAY;
-
+class GuidePostTreeNode {
/**
- * the total number of guidePosts for the table combining all the
guidePosts per region per cf.
+ * The key range of the guide posts that this node covers
*/
- private final ImmutableBytesWritable guidePosts;
+ private final KeyRange keyRange;
+
+ /**
+ * The accumulated estimation info of the guide posts that this node covers
+ */
+ private final GuidePostEstimation totalEstimation;
+
+ public GuidePostTreeNode(KeyRange keyRange, GuidePostEstimation
totalEstimation) {
+ this.keyRange = keyRange;
+ this.totalEstimation = totalEstimation;
+ }
+
+ public KeyRange getKeyRange() {
+ return this.keyRange;
+ }
+
+ public GuidePostEstimation getTotalEstimation() {
+ return this.totalEstimation;
+ }
+
/**
- * Maximum length of a guidePost collected
+ * Merge the two child tree nodes into this node which contains the merged
key range
+ * and the "sum" of the estimation info of the child nodes.
+ * @param left
+ * @param right
+ * @return the parent node of the given nodes
*/
- private final int maxLength;
+ public static GuidePostTreeNode merge(GuidePostTreeNode left,
GuidePostTreeNode right) {
+ KeyRange tempKeyRange = KeyRange.getKeyRange(
+ left.getKeyRange().getLowerRange(),
left.getKeyRange().isLowerInclusive(),
+ right.getKeyRange().getUpperRange(),
right.getKeyRange().isUpperInclusive());
+
+ GuidePostEstimation tempEstimation = GuidePostEstimation.merge(
+ left.getTotalEstimation(), right.getTotalEstimation());
+
+ return new GuidePostTreeNode(tempKeyRange, tempEstimation);
+ }
+}
+
+final class GuidePostTreeLeaf extends GuidePostTreeNode {
/**
- * Number of guidePosts
+ * The index of the guide post chunk in the chunk array.
*/
- private final int guidePostsCount;
+ private int guidePostChunkIndex;
+
+ public GuidePostTreeLeaf(int guidePostChunkIndex, KeyRange keyRange,
+ GuidePostEstimation totalEstimation) {
+ super(keyRange, totalEstimation);
+
+ this.guidePostChunkIndex = guidePostChunkIndex;
+ }
+
+ public int getGuidePostChunkIndex() {
+ return this.guidePostChunkIndex;
+ }
+}
+
+@VisibleForTesting
+final class DecodedGuidePostChunk {
/**
- * The rowCounts of each guidePost traversed
+ * The index of the guide post chunk in the chunk array.
*/
- private final long[] rowCounts;
+ private final int guidePostChunkIndex;
+
/**
- * The bytecounts of each guidePost traversed
+ * The guide posts in this chunk.
*/
- private final long[] byteCounts;
+ private final List<byte[]> guidePosts;
+
+ public DecodedGuidePostChunk(int guidePostChunkIndex, List<byte[]>
guidePosts) {
+ assert (guidePostChunkIndex !=
GuidePostChunk.INVALID_GUIDEPOST_CHUNK_INDEX);
+ assert (guidePosts.size() > 0);
+ this.guidePostChunkIndex = guidePostChunkIndex;
+ this.guidePosts = guidePosts;
+ }
+
+ public int getGuidePostChunkIndex() {
+ return guidePostChunkIndex;
+ }
+
+ public List<byte[]> getGuidePosts() {
+ return guidePosts;
+ }
+
/**
- * Estimate of byte size of this instance
+ * The guide post boundaries:
+ * gp_0, gp_1, ..., gp_i0, ..., gp_i1, ..., gp_i2, ..., gp_in, ...,
gp_n
+ * The guide post boundaries:
+ * gp_i0, gp_i1, ..., gp_in, ..., gp_n
+ * The key space split by the guide post chunks:
+ * (UNBOUND, gp_i0](gp_i0, gp_i1](gp_i1, gp_i2]...(gp_in, gp_n](gp_n,
UNBOUND)
+ * The last guide post chunk is a DUMMY chunk which contains one DUMMY
guide post
Review comment:
Dislike use of dummy.
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services