Repository: phoenix Updated Branches: refs/heads/4.x-HBase-0.98 0b80eef66 -> 8f1370f55
PHOENIX-2683 store rowCount and byteCount at guidePost level(Ankit Singhal) Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/8f1370f5 Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/8f1370f5 Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/8f1370f5 Branch: refs/heads/4.x-HBase-0.98 Commit: 8f1370f55bab620abdb14321c1518076be1002d2 Parents: 0b80eef Author: Ankit Singhal <ankitsingha...@gmail.com> Authored: Tue Feb 16 22:04:39 2016 +0530 Committer: Ankit Singhal <ankitsingha...@gmail.com> Committed: Tue Feb 16 22:04:39 2016 +0530 ---------------------------------------------------------------------- .../StatsCollectorWithSplitsAndMultiCFIT.java | 66 +++- .../generated/PGuidePostsProtos.java | 336 ++++++++++++++++++- .../org/apache/phoenix/execute/ScanPlan.java | 5 +- .../org/apache/phoenix/schema/PTableImpl.java | 25 +- .../phoenix/schema/stats/GuidePostsInfo.java | 69 ++-- .../schema/stats/GuidePostsInfoBuilder.java | 61 ++-- .../schema/stats/StatisticsCollector.java | 21 +- .../phoenix/schema/stats/StatisticsWriter.java | 18 +- phoenix-protocol/src/main/PGuidePosts.proto | 2 + 9 files changed, 502 insertions(+), 101 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/phoenix/blob/8f1370f5/phoenix-core/src/it/java/org/apache/phoenix/end2end/StatsCollectorWithSplitsAndMultiCFIT.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/StatsCollectorWithSplitsAndMultiCFIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/StatsCollectorWithSplitsAndMultiCFIT.java index dfe8b60..13cd54c 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/StatsCollectorWithSplitsAndMultiCFIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/StatsCollectorWithSplitsAndMultiCFIT.java @@ -26,9 +26,11 @@ import java.sql.Connection; import java.sql.DriverManager; import java.sql.PreparedStatement; import java.sql.ResultSet; +import java.sql.SQLException; import java.util.List; import java.util.Map; import java.util.Properties; +import java.util.Random; import org.apache.hadoop.hbase.HRegionLocation; import org.apache.hadoop.hbase.util.Bytes; @@ -102,10 +104,10 @@ public class StatsCollectorWithSplitsAndMultiCFIT extends StatsCollectorAbstract TestUtil.analyzeTable(conn, STATS_TEST_TABLE_NAME_NEW); String query = "UPDATE STATISTICS " + STATS_TEST_TABLE_NAME_NEW + " SET \"" - + QueryServices.STATS_GUIDEPOST_WIDTH_BYTES_ATTRIB + "\"=" + Long.toString(2000); + + QueryServices.STATS_GUIDEPOST_WIDTH_BYTES_ATTRIB + "\"=" + Long.toString(250); conn.createStatement().execute(query); keyRanges = getAllSplits(conn, STATS_TEST_TABLE_NAME_NEW); - assertEquals(6, keyRanges.size()); + assertEquals(26, keyRanges.size()); rs = conn.createStatement().executeQuery( "SELECT COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(GUIDE_POSTS_WIDTH),COUNT(*) from SYSTEM.STATS where PHYSICAL_NAME = '" @@ -115,27 +117,71 @@ public class StatsCollectorWithSplitsAndMultiCFIT extends StatsCollectorAbstract assertTrue(rs.next()); assertEquals("A", rs.getString(1)); assertEquals(25, rs.getInt(2)); - assertEquals(11040, rs.getInt(3)); - assertEquals(5, rs.getInt(4)); + assertEquals(12420, rs.getInt(3)); + assertEquals(25, rs.getInt(4)); assertTrue(rs.next()); assertEquals("B", rs.getString(1)); assertEquals(20, rs.getInt(2)); - assertEquals(4432, rs.getInt(3)); - assertEquals(2, rs.getInt(4)); + assertEquals(5540, rs.getInt(3)); + assertEquals(20, rs.getInt(4)); assertTrue(rs.next()); assertEquals("C", rs.getString(1)); assertEquals(25, rs.getInt(2)); - assertEquals(6652, rs.getInt(3)); - assertEquals(3, rs.getInt(4)); + assertEquals(6930, rs.getInt(3)); + assertEquals(25, rs.getInt(4)); assertTrue(rs.next()); assertEquals("D", rs.getString(1)); assertEquals(25, rs.getInt(2)); - assertEquals(6652, rs.getInt(3)); - assertEquals(3, rs.getInt(4)); + assertEquals(6930, rs.getInt(3)); + assertEquals(25, rs.getInt(4)); } + @Test + public void testRowCountAndByteCounts() throws SQLException { + Connection conn; + Properties props = PropertiesUtil.deepCopy(TEST_PROPERTIES); + conn = DriverManager.getConnection(getUrl(), props); + String tableName = "T"; + String ddl = "CREATE TABLE " + tableName + " (t_id VARCHAR NOT NULL,\n" + "k1 INTEGER NOT NULL,\n" + + "k2 INTEGER NOT NULL,\n" + "C3.k3 INTEGER,\n" + "C2.v1 VARCHAR,\n" + + "CONSTRAINT pk PRIMARY KEY (t_id, k1, k2)) split on ('e','j','o')"; + conn.createStatement().execute(ddl); + String[] strings = { "a", "b", "c", "d", "e", "f", "g", "h", "i", "j", "k", "l", "m", "n", "o", "p", "q", "r", + "s", "t", "u", "v", "w", "x", "y", "z" }; + for (int i = 0; i < 26; i++) { + conn.createStatement().execute("UPSERT INTO " + tableName + " values('" + strings[i] + "'," + i + "," + + (i + 1) + "," + (i + 2) + ",'" + strings[25 - i] + "')"); + } + conn.commit(); + ResultSet rs; + String query = "UPDATE STATISTICS " + tableName + " SET \"" + QueryServices.STATS_GUIDEPOST_WIDTH_BYTES_ATTRIB + + "\"=" + Long.toString(20); + conn.createStatement().execute(query); + Random r = new Random(); + int count = 0; + while (count < 4) { + int startIndex = r.nextInt(strings.length); + int endIndex = r.nextInt(strings.length - startIndex) + startIndex; + long rows = endIndex - startIndex; + long c2Bytes = rows * 35; + System.out.println(rows + ":" + startIndex + ":" + endIndex); + rs = conn.createStatement().executeQuery( + "SELECT COLUMN_FAMILY,SUM(GUIDE_POSTS_ROW_COUNT),SUM(GUIDE_POSTS_WIDTH) from SYSTEM.STATS where PHYSICAL_NAME = '" + + tableName + "' AND GUIDE_POST_KEY>= cast('" + strings[startIndex] + + "' as varbinary) AND GUIDE_POST_KEY<cast('" + strings[endIndex] + + "' as varbinary) and COLUMN_FAMILY='C2' group by COLUMN_FAMILY"); + if (startIndex < endIndex) { + assertTrue(rs.next()); + assertEquals("C2", rs.getString(1)); + assertEquals(rows, rs.getLong(2)); + assertEquals(c2Bytes, rs.getLong(3)); + count++; + } + } + } + } http://git-wip-us.apache.org/repos/asf/phoenix/blob/8f1370f5/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/generated/PGuidePostsProtos.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/generated/PGuidePostsProtos.java b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/generated/PGuidePostsProtos.java index d9dcb56..5026307 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/generated/PGuidePostsProtos.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/coprocessor/generated/PGuidePostsProtos.java @@ -74,6 +74,34 @@ public final class PGuidePostsProtos { * <code>optional bytes encodedGuidePosts = 6;</code> */ com.google.protobuf.ByteString getEncodedGuidePosts(); + + // repeated int64 byteCounts = 7; + /** + * <code>repeated int64 byteCounts = 7;</code> + */ + java.util.List<java.lang.Long> getByteCountsList(); + /** + * <code>repeated int64 byteCounts = 7;</code> + */ + int getByteCountsCount(); + /** + * <code>repeated int64 byteCounts = 7;</code> + */ + long getByteCounts(int index); + + // repeated int64 rowCounts = 8; + /** + * <code>repeated int64 rowCounts = 8;</code> + */ + java.util.List<java.lang.Long> getRowCountsList(); + /** + * <code>repeated int64 rowCounts = 8;</code> + */ + int getRowCountsCount(); + /** + * <code>repeated int64 rowCounts = 8;</code> + */ + long getRowCounts(int index); } /** * Protobuf type {@code PGuidePosts} @@ -159,6 +187,48 @@ public final class PGuidePostsProtos { encodedGuidePosts_ = input.readBytes(); break; } + case 56: { + if (!((mutable_bitField0_ & 0x00000040) == 0x00000040)) { + byteCounts_ = new java.util.ArrayList<java.lang.Long>(); + mutable_bitField0_ |= 0x00000040; + } + byteCounts_.add(input.readInt64()); + break; + } + case 58: { + int length = input.readRawVarint32(); + int limit = input.pushLimit(length); + if (!((mutable_bitField0_ & 0x00000040) == 0x00000040) && input.getBytesUntilLimit() > 0) { + byteCounts_ = new java.util.ArrayList<java.lang.Long>(); + mutable_bitField0_ |= 0x00000040; + } + while (input.getBytesUntilLimit() > 0) { + byteCounts_.add(input.readInt64()); + } + input.popLimit(limit); + break; + } + case 64: { + if (!((mutable_bitField0_ & 0x00000080) == 0x00000080)) { + rowCounts_ = new java.util.ArrayList<java.lang.Long>(); + mutable_bitField0_ |= 0x00000080; + } + rowCounts_.add(input.readInt64()); + break; + } + case 66: { + int length = input.readRawVarint32(); + int limit = input.pushLimit(length); + if (!((mutable_bitField0_ & 0x00000080) == 0x00000080) && input.getBytesUntilLimit() > 0) { + rowCounts_ = new java.util.ArrayList<java.lang.Long>(); + mutable_bitField0_ |= 0x00000080; + } + while (input.getBytesUntilLimit() > 0) { + rowCounts_.add(input.readInt64()); + } + input.popLimit(limit); + break; + } } } } catch (com.google.protobuf.InvalidProtocolBufferException e) { @@ -170,6 +240,12 @@ public final class PGuidePostsProtos { if (((mutable_bitField0_ & 0x00000001) == 0x00000001)) { guidePosts_ = java.util.Collections.unmodifiableList(guidePosts_); } + if (((mutable_bitField0_ & 0x00000040) == 0x00000040)) { + byteCounts_ = java.util.Collections.unmodifiableList(byteCounts_); + } + if (((mutable_bitField0_ & 0x00000080) == 0x00000080)) { + rowCounts_ = java.util.Collections.unmodifiableList(rowCounts_); + } this.unknownFields = unknownFields.build(); makeExtensionsImmutable(); } @@ -305,6 +381,52 @@ public final class PGuidePostsProtos { return encodedGuidePosts_; } + // repeated int64 byteCounts = 7; + public static final int BYTECOUNTS_FIELD_NUMBER = 7; + private java.util.List<java.lang.Long> byteCounts_; + /** + * <code>repeated int64 byteCounts = 7;</code> + */ + public java.util.List<java.lang.Long> + getByteCountsList() { + return byteCounts_; + } + /** + * <code>repeated int64 byteCounts = 7;</code> + */ + public int getByteCountsCount() { + return byteCounts_.size(); + } + /** + * <code>repeated int64 byteCounts = 7;</code> + */ + public long getByteCounts(int index) { + return byteCounts_.get(index); + } + + // repeated int64 rowCounts = 8; + public static final int ROWCOUNTS_FIELD_NUMBER = 8; + private java.util.List<java.lang.Long> rowCounts_; + /** + * <code>repeated int64 rowCounts = 8;</code> + */ + public java.util.List<java.lang.Long> + getRowCountsList() { + return rowCounts_; + } + /** + * <code>repeated int64 rowCounts = 8;</code> + */ + public int getRowCountsCount() { + return rowCounts_.size(); + } + /** + * <code>repeated int64 rowCounts = 8;</code> + */ + public long getRowCounts(int index) { + return rowCounts_.get(index); + } + private void initFields() { guidePosts_ = java.util.Collections.emptyList(); byteCount_ = 0L; @@ -312,6 +434,8 @@ public final class PGuidePostsProtos { maxLength_ = 0; encodedGuidePostsCount_ = 0; encodedGuidePosts_ = com.google.protobuf.ByteString.EMPTY; + byteCounts_ = java.util.Collections.emptyList(); + rowCounts_ = java.util.Collections.emptyList(); } private byte memoizedIsInitialized = -1; public final boolean isInitialized() { @@ -343,6 +467,12 @@ public final class PGuidePostsProtos { if (((bitField0_ & 0x00000010) == 0x00000010)) { output.writeBytes(6, encodedGuidePosts_); } + for (int i = 0; i < byteCounts_.size(); i++) { + output.writeInt64(7, byteCounts_.get(i)); + } + for (int i = 0; i < rowCounts_.size(); i++) { + output.writeInt64(8, rowCounts_.get(i)); + } getUnknownFields().writeTo(output); } @@ -381,6 +511,24 @@ public final class PGuidePostsProtos { size += com.google.protobuf.CodedOutputStream .computeBytesSize(6, encodedGuidePosts_); } + { + int dataSize = 0; + for (int i = 0; i < byteCounts_.size(); i++) { + dataSize += com.google.protobuf.CodedOutputStream + .computeInt64SizeNoTag(byteCounts_.get(i)); + } + size += dataSize; + size += 1 * getByteCountsList().size(); + } + { + int dataSize = 0; + for (int i = 0; i < rowCounts_.size(); i++) { + dataSize += com.google.protobuf.CodedOutputStream + .computeInt64SizeNoTag(rowCounts_.get(i)); + } + size += dataSize; + size += 1 * getRowCountsList().size(); + } size += getUnknownFields().getSerializedSize(); memoizedSerializedSize = size; return size; @@ -431,6 +579,10 @@ public final class PGuidePostsProtos { result = result && getEncodedGuidePosts() .equals(other.getEncodedGuidePosts()); } + result = result && getByteCountsList() + .equals(other.getByteCountsList()); + result = result && getRowCountsList() + .equals(other.getRowCountsList()); result = result && getUnknownFields().equals(other.getUnknownFields()); return result; @@ -468,6 +620,14 @@ public final class PGuidePostsProtos { hash = (37 * hash) + ENCODEDGUIDEPOSTS_FIELD_NUMBER; hash = (53 * hash) + getEncodedGuidePosts().hashCode(); } + if (getByteCountsCount() > 0) { + hash = (37 * hash) + BYTECOUNTS_FIELD_NUMBER; + hash = (53 * hash) + getByteCountsList().hashCode(); + } + if (getRowCountsCount() > 0) { + hash = (37 * hash) + ROWCOUNTS_FIELD_NUMBER; + hash = (53 * hash) + getRowCountsList().hashCode(); + } hash = (29 * hash) + getUnknownFields().hashCode(); memoizedHashCode = hash; return hash; @@ -589,6 +749,10 @@ public final class PGuidePostsProtos { bitField0_ = (bitField0_ & ~0x00000010); encodedGuidePosts_ = com.google.protobuf.ByteString.EMPTY; bitField0_ = (bitField0_ & ~0x00000020); + byteCounts_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000040); + rowCounts_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000080); return this; } @@ -642,6 +806,16 @@ public final class PGuidePostsProtos { to_bitField0_ |= 0x00000010; } result.encodedGuidePosts_ = encodedGuidePosts_; + if (((bitField0_ & 0x00000040) == 0x00000040)) { + byteCounts_ = java.util.Collections.unmodifiableList(byteCounts_); + bitField0_ = (bitField0_ & ~0x00000040); + } + result.byteCounts_ = byteCounts_; + if (((bitField0_ & 0x00000080) == 0x00000080)) { + rowCounts_ = java.util.Collections.unmodifiableList(rowCounts_); + bitField0_ = (bitField0_ & ~0x00000080); + } + result.rowCounts_ = rowCounts_; result.bitField0_ = to_bitField0_; onBuilt(); return result; @@ -683,6 +857,26 @@ public final class PGuidePostsProtos { if (other.hasEncodedGuidePosts()) { setEncodedGuidePosts(other.getEncodedGuidePosts()); } + if (!other.byteCounts_.isEmpty()) { + if (byteCounts_.isEmpty()) { + byteCounts_ = other.byteCounts_; + bitField0_ = (bitField0_ & ~0x00000040); + } else { + ensureByteCountsIsMutable(); + byteCounts_.addAll(other.byteCounts_); + } + onChanged(); + } + if (!other.rowCounts_.isEmpty()) { + if (rowCounts_.isEmpty()) { + rowCounts_ = other.rowCounts_; + bitField0_ = (bitField0_ & ~0x00000080); + } else { + ensureRowCountsIsMutable(); + rowCounts_.addAll(other.rowCounts_); + } + onChanged(); + } this.mergeUnknownFields(other.getUnknownFields()); return this; } @@ -950,6 +1144,138 @@ public final class PGuidePostsProtos { return this; } + // repeated int64 byteCounts = 7; + private java.util.List<java.lang.Long> byteCounts_ = java.util.Collections.emptyList(); + private void ensureByteCountsIsMutable() { + if (!((bitField0_ & 0x00000040) == 0x00000040)) { + byteCounts_ = new java.util.ArrayList<java.lang.Long>(byteCounts_); + bitField0_ |= 0x00000040; + } + } + /** + * <code>repeated int64 byteCounts = 7;</code> + */ + public java.util.List<java.lang.Long> + getByteCountsList() { + return java.util.Collections.unmodifiableList(byteCounts_); + } + /** + * <code>repeated int64 byteCounts = 7;</code> + */ + public int getByteCountsCount() { + return byteCounts_.size(); + } + /** + * <code>repeated int64 byteCounts = 7;</code> + */ + public long getByteCounts(int index) { + return byteCounts_.get(index); + } + /** + * <code>repeated int64 byteCounts = 7;</code> + */ + public Builder setByteCounts( + int index, long value) { + ensureByteCountsIsMutable(); + byteCounts_.set(index, value); + onChanged(); + return this; + } + /** + * <code>repeated int64 byteCounts = 7;</code> + */ + public Builder addByteCounts(long value) { + ensureByteCountsIsMutable(); + byteCounts_.add(value); + onChanged(); + return this; + } + /** + * <code>repeated int64 byteCounts = 7;</code> + */ + public Builder addAllByteCounts( + java.lang.Iterable<? extends java.lang.Long> values) { + ensureByteCountsIsMutable(); + super.addAll(values, byteCounts_); + onChanged(); + return this; + } + /** + * <code>repeated int64 byteCounts = 7;</code> + */ + public Builder clearByteCounts() { + byteCounts_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000040); + onChanged(); + return this; + } + + // repeated int64 rowCounts = 8; + private java.util.List<java.lang.Long> rowCounts_ = java.util.Collections.emptyList(); + private void ensureRowCountsIsMutable() { + if (!((bitField0_ & 0x00000080) == 0x00000080)) { + rowCounts_ = new java.util.ArrayList<java.lang.Long>(rowCounts_); + bitField0_ |= 0x00000080; + } + } + /** + * <code>repeated int64 rowCounts = 8;</code> + */ + public java.util.List<java.lang.Long> + getRowCountsList() { + return java.util.Collections.unmodifiableList(rowCounts_); + } + /** + * <code>repeated int64 rowCounts = 8;</code> + */ + public int getRowCountsCount() { + return rowCounts_.size(); + } + /** + * <code>repeated int64 rowCounts = 8;</code> + */ + public long getRowCounts(int index) { + return rowCounts_.get(index); + } + /** + * <code>repeated int64 rowCounts = 8;</code> + */ + public Builder setRowCounts( + int index, long value) { + ensureRowCountsIsMutable(); + rowCounts_.set(index, value); + onChanged(); + return this; + } + /** + * <code>repeated int64 rowCounts = 8;</code> + */ + public Builder addRowCounts(long value) { + ensureRowCountsIsMutable(); + rowCounts_.add(value); + onChanged(); + return this; + } + /** + * <code>repeated int64 rowCounts = 8;</code> + */ + public Builder addAllRowCounts( + java.lang.Iterable<? extends java.lang.Long> values) { + ensureRowCountsIsMutable(); + super.addAll(values, rowCounts_); + onChanged(); + return this; + } + /** + * <code>repeated int64 rowCounts = 8;</code> + */ + public Builder clearRowCounts() { + rowCounts_ = java.util.Collections.emptyList(); + bitField0_ = (bitField0_ & ~0x00000080); + onChanged(); + return this; + } + // @@protoc_insertion_point(builder_scope:PGuidePosts) } @@ -975,13 +1301,13 @@ public final class PGuidePostsProtos { descriptor; static { java.lang.String[] descriptorData = { - "\n\021PGuidePosts.proto\"\224\001\n\013PGuidePosts\022\022\n\ng" + + "\n\021PGuidePosts.proto\"\273\001\n\013PGuidePosts\022\022\n\ng" + "uidePosts\030\001 \003(\014\022\021\n\tbyteCount\030\002 \001(\003\022\020\n\010ro" + "wCount\030\003 \001(\003\022\021\n\tmaxLength\030\004 \001(\005\022\036\n\026encod" + "edGuidePostsCount\030\005 \001(\005\022\031\n\021encodedGuideP" + - "osts\030\006 \001(\014BE\n(org.apache.phoenix.coproce" + - "ssor.generatedB\021PGuidePostsProtosH\001\210\001\001\240\001" + - "\001" + "osts\030\006 \001(\014\022\022\n\nbyteCounts\030\007 \003(\003\022\021\n\trowCou" + + "nts\030\010 \003(\003BE\n(org.apache.phoenix.coproces" + + "sor.generatedB\021PGuidePostsProtosH\001\210\001\001\240\001\001" }; com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner assigner = new com.google.protobuf.Descriptors.FileDescriptor.InternalDescriptorAssigner() { @@ -993,7 +1319,7 @@ public final class PGuidePostsProtos { internal_static_PGuidePosts_fieldAccessorTable = new com.google.protobuf.GeneratedMessage.FieldAccessorTable( internal_static_PGuidePosts_descriptor, - new java.lang.String[] { "GuidePosts", "ByteCount", "RowCount", "MaxLength", "EncodedGuidePostsCount", "EncodedGuidePosts", }); + new java.lang.String[] { "GuidePosts", "ByteCount", "RowCount", "MaxLength", "EncodedGuidePostsCount", "EncodedGuidePosts", "ByteCounts", "RowCounts", }); return null; } }; http://git-wip-us.apache.org/repos/asf/phoenix/blob/8f1370f5/phoenix-core/src/main/java/org/apache/phoenix/execute/ScanPlan.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/execute/ScanPlan.java b/phoenix-core/src/main/java/org/apache/phoenix/execute/ScanPlan.java index efb00a8..f4c570c 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/execute/ScanPlan.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/execute/ScanPlan.java @@ -121,7 +121,10 @@ public class ScanPlan extends BaseQueryPlan { estRegionSize = StatisticsUtil.getGuidePostDepth(guidepostPerRegion, guidepostWidth, desc); } else { // Region size estimated based on total number of bytes divided by number of regions - long totByteSize = gpsInfo.getByteCount(); + long totByteSize = 0; + for (long byteCount : gpsInfo.getByteCounts()) { + totByteSize += byteCount; + } estRegionSize = totByteSize / (gpsInfo.getGuidePostsCount()+1); } // TODO: configurable number of bytes? http://git-wip-us.apache.org/repos/asf/phoenix/blob/8f1370f5/phoenix-core/src/main/java/org/apache/phoenix/schema/PTableImpl.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/schema/PTableImpl.java b/phoenix-core/src/main/java/org/apache/phoenix/schema/PTableImpl.java index abd31c0..bb90c16 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/schema/PTableImpl.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/schema/PTableImpl.java @@ -1029,12 +1029,12 @@ public class PTableImpl implements PTable { SortedMap<byte[], GuidePostsInfo> tableGuidePosts = new TreeMap<byte[], GuidePostsInfo>(Bytes.BYTES_COMPARATOR); for (PTableProtos.PTableStats pTableStatsProto : table.getGuidePostsList()) { PGuidePosts pGuidePosts = pTableStatsProto.getPGuidePosts(); - long guidePostsByteCount = pGuidePosts.getByteCount(); - long rowCount = pGuidePosts.getRowCount(); int maxLength = pGuidePosts.getMaxLength(); int guidePostsCount = pGuidePosts.getEncodedGuidePostsCount(); - GuidePostsInfo info = new GuidePostsInfo(guidePostsByteCount, - new ImmutableBytesWritable(HBaseZeroCopyByteString.zeroCopyGetBytes(pGuidePosts.getEncodedGuidePosts())), rowCount, maxLength, guidePostsCount); + GuidePostsInfo info = new GuidePostsInfo(pGuidePosts.getByteCountsList(), + new ImmutableBytesWritable( + HBaseZeroCopyByteString.zeroCopyGetBytes(pGuidePosts.getEncodedGuidePosts())), + pGuidePosts.getRowCountsList(), maxLength, guidePostsCount); tableGuidePosts.put(pTableStatsProto.getKey().toByteArray(), info); } PTableStats stats = new PTableStatsImpl(tableGuidePosts, table.getStatsTimeStamp()); @@ -1142,17 +1142,16 @@ public class PTableImpl implements PTable { for (Map.Entry<byte[], GuidePostsInfo> entry : table.getTableStats().getGuidePosts().entrySet()) { PTableProtos.PTableStats.Builder statsBuilder = PTableProtos.PTableStats.newBuilder(); statsBuilder.setKey(ByteStringer.wrap(entry.getKey())); - statsBuilder.setGuidePostsByteCount(entry.getValue().getByteCount()); statsBuilder.setGuidePostsCount(entry.getValue().getGuidePostsCount()); PGuidePostsProtos.PGuidePosts.Builder guidePstsBuilder = PGuidePostsProtos.PGuidePosts.newBuilder(); - guidePstsBuilder.setEncodedGuidePosts(ByteStringer.wrap(entry.getValue().getGuidePosts().get())); - guidePstsBuilder.setByteCount(entry.getValue().getByteCount()); - guidePstsBuilder.setRowCount(entry.getValue().getRowCount()); - guidePstsBuilder.setMaxLength(entry.getValue().getMaxLength()); - guidePstsBuilder.setEncodedGuidePostsCount(entry.getValue().getGuidePostsCount()); - statsBuilder.setPGuidePosts(guidePstsBuilder); - builder.addGuidePosts(statsBuilder.build()); - } + guidePstsBuilder.setEncodedGuidePosts(ByteStringer.wrap(entry.getValue().getGuidePosts().get())); + guidePstsBuilder.addAllByteCounts(entry.getValue().getByteCounts()); + guidePstsBuilder.addAllRowCounts(entry.getValue().getRowCounts()); + guidePstsBuilder.setMaxLength(entry.getValue().getMaxLength()); + guidePstsBuilder.setEncodedGuidePostsCount(entry.getValue().getGuidePostsCount()); + statsBuilder.setPGuidePosts(guidePstsBuilder); + builder.addGuidePosts(statsBuilder.build()); + } builder.setStatsTimeStamp(table.getTableStats().getTimestamp()); if (table.getParentName() != null) { http://git-wip-us.apache.org/repos/asf/phoenix/blob/8f1370f5/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/GuidePostsInfo.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/GuidePostsInfo.java b/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/GuidePostsInfo.java index da7d3a5..b8cc3f1 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/GuidePostsInfo.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/GuidePostsInfo.java @@ -17,6 +17,9 @@ */ package org.apache.phoenix.schema.stats; +import java.util.ArrayList; +import java.util.List; + import org.apache.hadoop.hbase.io.ImmutableBytesWritable; import org.apache.phoenix.util.ByteUtil; /** @@ -29,57 +32,69 @@ public class GuidePostsInfo { * the total number of guidePosts for the table combining all the guidePosts per region per cf. */ private ImmutableBytesWritable guidePosts; - /** - * The bytecount that is flattened across the total number of guide posts. - */ - private long byteCount = 0; - - /** - * The rowCount that is flattened across the total number of guide posts. - */ - private long rowCount = 0; - + /** * Maximum length of a guidePost collected */ private int maxLength; - public final static GuidePostsInfo EMPTY_GUIDEPOST = new GuidePostsInfo(0, - new ImmutableBytesWritable(ByteUtil.EMPTY_BYTE_ARRAY), 0, 0, 0); + public final static GuidePostsInfo EMPTY_GUIDEPOST = new GuidePostsInfo(new ArrayList<Long>(), + new ImmutableBytesWritable(ByteUtil.EMPTY_BYTE_ARRAY), new ArrayList<Long>(), 0, 0); public int getMaxLength() { return maxLength; } - + + /** + * Number of guidePosts + */ private int guidePostsCount; /** + * The rowCounts of each guidePost traversed + */ + private List<Long> rowCounts; + + /** + * The bytecounts of each guidePost traversed + */ + private List<Long> byteCounts; + + public List<Long> getRowCounts() { + return rowCounts; + } + + public List<Long> getByteCounts() { + return byteCounts; + } + + /** * Constructor that creates GuidePostsInfo per region - * @param byteCount + * + * @param byteCounts + * The bytecounts of each guidePost traversed * @param guidePosts - * @param rowCount + * Prefix byte encoded guidePosts + * @param rowCounts + * The rowCounts of each guidePost traversed + * @param maxLength + * Maximum length of a guidePost collected + * @param guidePostsCount + * Number of guidePosts */ - public GuidePostsInfo(long byteCount, ImmutableBytesWritable guidePosts, long rowCount, int maxLength, int guidePostsCount) { + public GuidePostsInfo(List<Long> byteCounts, ImmutableBytesWritable guidePosts, List<Long> rowCounts, int maxLength, + int guidePostsCount) { this.guidePosts = new ImmutableBytesWritable(guidePosts); this.maxLength = maxLength; - this.byteCount = byteCount; - this.rowCount = rowCount; this.guidePostsCount = guidePostsCount; + this.rowCounts = rowCounts; + this.byteCounts = byteCounts; } - - public long getByteCount() { - return byteCount; - } - public ImmutableBytesWritable getGuidePosts() { return guidePosts; } - public long getRowCount() { - return this.rowCount; - } - public int getGuidePostsCount() { return guidePostsCount; } http://git-wip-us.apache.org/repos/asf/phoenix/blob/8f1370f5/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/GuidePostsInfoBuilder.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/GuidePostsInfoBuilder.java b/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/GuidePostsInfoBuilder.java index f3ada82..c85b1d6 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/GuidePostsInfoBuilder.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/GuidePostsInfoBuilder.java @@ -19,9 +19,10 @@ package org.apache.phoenix.schema.stats; import java.io.DataOutputStream; import java.io.IOException; +import java.util.LinkedList; +import java.util.List; import org.apache.hadoop.hbase.io.ImmutableBytesWritable; -import org.apache.hadoop.hbase.util.Bytes; import org.apache.phoenix.util.ByteUtil; import org.apache.phoenix.util.PrefixByteCodec; import org.apache.phoenix.util.PrefixByteEncoder; @@ -33,25 +34,31 @@ import org.apache.phoenix.util.TrustedByteArrayOutputStream; public class GuidePostsInfoBuilder { private PrefixByteEncoder encoder; - private byte[] lastRow; + private ImmutableBytesWritable lastRow; private ImmutableBytesWritable guidePosts=new ImmutableBytesWritable(ByteUtil.EMPTY_BYTE_ARRAY); - private long byteCount = 0; private int guidePostsCount; /** * The rowCount that is flattened across the total number of guide posts. */ private long rowCount = 0; - + /** * Maximum length of a guidePost collected */ private int maxLength; private DataOutputStream output; private TrustedByteArrayOutputStream stream; - - public final static GuidePostsInfo EMPTY_GUIDEPOST = new GuidePostsInfo(0, - new ImmutableBytesWritable(ByteUtil.EMPTY_BYTE_ARRAY), 0, 0, 0); + private List<Long> rowCounts = new LinkedList<Long>(); + private List<Long> byteCounts = new LinkedList<Long>(); + + public List<Long> getRowCounts() { + return rowCounts; + } + + public List<Long> getByteCounts() { + return byteCounts; + } public int getMaxLength() { return maxLength; @@ -60,7 +67,7 @@ public class GuidePostsInfoBuilder { this.stream = new TrustedByteArrayOutputStream(1); this.output = new DataOutputStream(stream); this.encoder=new PrefixByteEncoder(); - lastRow = ByteUtil.EMPTY_BYTE_ARRAY; + lastRow = new ImmutableBytesWritable(ByteUtil.EMPTY_BYTE_ARRAY); } /** @@ -71,14 +78,14 @@ public class GuidePostsInfoBuilder { * @return * @throws IOException */ - public boolean addGuidePosts( byte[] row, long byteCount, long rowCount) { - if (row.length != 0 && Bytes.compareTo(lastRow, row) < 0) { + public boolean addGuidePosts(ImmutableBytesWritable row, long byteCount, long rowCount) { + if (row.getLength() != 0 && lastRow.compareTo(row) < 0) { try { - encoder.encode(output, row, 0, row.length); - this.byteCount += byteCount; + encoder.encode(output, row.get(), row.getOffset(), row.getLength()); + rowCounts.add(rowCount); + byteCounts.add(byteCount); this.guidePostsCount++; this.maxLength = encoder.getMaxLength(); - this.rowCount += rowCount; lastRow = row; return true; } catch (IOException e) { @@ -87,13 +94,17 @@ public class GuidePostsInfoBuilder { } return false; } - - public boolean addGuidePosts(byte[] row){ - return addGuidePosts(row, 0, 0); + + public boolean addGuidePosts(byte[] row) { + return addGuidePosts(new ImmutableBytesWritable(row), 0, 0); + } + + public boolean addGuidePosts(byte[] row, long byteCount) { + return addGuidePosts(new ImmutableBytesWritable(row), byteCount, 0); } - public boolean addGuidePosts(byte[] row, long byteCount){ - return addGuidePosts(row, byteCount, 0); + public boolean addGuidePosts(byte[] row, long byteCount, long rowCount) { + return addGuidePosts(new ImmutableBytesWritable(row), byteCount, 0); } private void close() { @@ -102,12 +113,22 @@ public class GuidePostsInfoBuilder { public GuidePostsInfo build() { this.guidePosts.set(stream.getBuffer(), 0, stream.size()); - GuidePostsInfo guidePostsInfo = new GuidePostsInfo(this.byteCount, this.guidePosts, this.rowCount, this.maxLength, this.guidePostsCount); + GuidePostsInfo guidePostsInfo = new GuidePostsInfo(this.byteCounts, this.guidePosts, this.rowCounts, + this.maxLength, this.guidePostsCount); this.close(); return guidePostsInfo; } + public void incrementRowCount() { - this.rowCount++; + this.rowCount++; + } + + public void resetRowCount() { + this.rowCount = 0; + } + + public long getRowCount() { + return rowCount; } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/phoenix/blob/8f1370f5/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/StatisticsCollector.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/StatisticsCollector.java b/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/StatisticsCollector.java index 411fdc3..97de270 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/StatisticsCollector.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/StatisticsCollector.java @@ -41,7 +41,6 @@ import org.apache.phoenix.query.QueryServicesOptions; import org.apache.phoenix.schema.SortOrder; import org.apache.phoenix.schema.types.PInteger; import org.apache.phoenix.schema.types.PLong; -import org.apache.phoenix.util.ByteUtil; import org.apache.phoenix.util.TimeKeeper; import org.slf4j.Logger; import org.slf4j.LoggerFactory; @@ -160,10 +159,6 @@ public class StatisticsCollector { */ public void collectStatistics(final List<Cell> results) { Map<ImmutableBytesPtr, Boolean> famMap = Maps.newHashMap(); - List<GuidePostsInfoBuilder> rowTracker = null; - if (cachedGps == null) { - rowTracker = new ArrayList<GuidePostsInfoBuilder>(); - } for (Cell cell : results) { KeyValue kv = KeyValueUtil.ensureKeyValue(cell); maxTimeStamp = Math.max(maxTimeStamp, kv.getTimestamp()); @@ -179,29 +174,23 @@ public class StatisticsCollector { } if (famMap.get(cfKey) == null) { famMap.put(cfKey, true); - rowTracker.add(gps.getSecond()); + gps.getSecond().incrementRowCount(); } } else { gps = cachedGps; + cachedGps.getSecond().incrementRowCount(); } int kvLength = kv.getLength(); long byteCount = gps.getFirst() + kvLength; gps.setFirst(byteCount); if (byteCount >= guidepostDepth) { - byte[] row = ByteUtil.copyKeyBytesIfNecessary( - new ImmutableBytesWritable(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength())); - if (gps.getSecond().addGuidePosts(row, byteCount)) { + ImmutableBytesWritable row = new ImmutableBytesWritable(kv.getRowArray(), kv.getRowOffset(), kv.getRowLength()); + if (gps.getSecond().addGuidePosts(row, byteCount, gps.getSecond().getRowCount())) { gps.setFirst(0l); + gps.getSecond().resetRowCount(); } } } - if (cachedGps == null) { - for (GuidePostsInfoBuilder s : rowTracker) { - s.incrementRowCount(); - } - } else { - cachedGps.getSecond().incrementRowCount(); - } } public InternalScanner createCompactionScanner(RegionCoprocessorEnvironment env, Store store, InternalScanner s, http://git-wip-us.apache.org/repos/asf/phoenix/blob/8f1370f5/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/StatisticsWriter.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/StatisticsWriter.java b/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/StatisticsWriter.java index 52b6062..6eb2b68 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/StatisticsWriter.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/schema/stats/StatisticsWriter.java @@ -140,28 +140,28 @@ public class StatisticsWriter implements Closeable { } GuidePostsInfo gps = tracker.getGuidePosts(cfKey); if (gps != null) { - boolean rowColumnAdded = false; + List<Long> byteCounts = gps.getByteCounts(); + List<Long> rowCounts = gps.getRowCounts(); ImmutableBytesWritable keys = gps.getGuidePosts(); ByteArrayInputStream stream = new ByteArrayInputStream(keys.get(), keys.getOffset(), keys.getLength()); DataInput input = new DataInputStream(stream); PrefixByteDecoder decoder = new PrefixByteDecoder(gps.getMaxLength()); + int guidePostCount = 0; try { while (true) { ImmutableBytesWritable ptr = decoder.decode(input); byte[] prefix = StatisticsUtil.getRowKey(tableName, cfKey, ptr); Put put = new Put(prefix); - if (!rowColumnAdded) { - put.add(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES, PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH_BYTES, - timeStamp, PLong.INSTANCE.toBytes(gps.getByteCount())); - put.add(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES, - PhoenixDatabaseMetaData.GUIDE_POSTS_ROW_COUNT_BYTES, timeStamp, - PLong.INSTANCE.toBytes(gps.getRowCount())); - rowColumnAdded = true; - } + put.add(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES, PhoenixDatabaseMetaData.GUIDE_POSTS_WIDTH_BYTES, + timeStamp, PLong.INSTANCE.toBytes(byteCounts.get(guidePostCount))); + put.add(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES, + PhoenixDatabaseMetaData.GUIDE_POSTS_ROW_COUNT_BYTES, timeStamp, + PLong.INSTANCE.toBytes(rowCounts.get(guidePostCount))); // Add our empty column value so queries behave correctly put.add(QueryConstants.DEFAULT_COLUMN_FAMILY_BYTES, QueryConstants.EMPTY_COLUMN_BYTES, timeStamp, ByteUtil.EMPTY_BYTE_ARRAY); mutations.add(put); + guidePostCount++; } } catch (EOFException e) { // Ignore as this signifies we're done http://git-wip-us.apache.org/repos/asf/phoenix/blob/8f1370f5/phoenix-protocol/src/main/PGuidePosts.proto ---------------------------------------------------------------------- diff --git a/phoenix-protocol/src/main/PGuidePosts.proto b/phoenix-protocol/src/main/PGuidePosts.proto index 14de2eb..99d2656 100644 --- a/phoenix-protocol/src/main/PGuidePosts.proto +++ b/phoenix-protocol/src/main/PGuidePosts.proto @@ -30,4 +30,6 @@ message PGuidePosts { optional int32 maxLength = 4; optional int32 encodedGuidePostsCount = 5; optional bytes encodedGuidePosts = 6; + repeated int64 byteCounts = 7; + repeated int64 rowCounts = 8; } \ No newline at end of file