On Sun, May 13, 2018 at 9:35 PM, 一米阳光 <[email protected]> wrote:
> hi, super thanks for reply.
> the table schema and partitioning info is:
> Schema
> ColumnIDTypeEncodingCompressionRead defaultWrite default
> *key* 0 string NOT NULL PREFIX_ENCODING LZ4 - -
> metric_value 1 string NOT NULL AUTO_ENCODING LZ4 - -
> dimension_00 2 string NOT NULL AUTO_ENCODING LZ4 - -
> dimension_01 3 string NOT NULL AUTO_ENCODING LZ4 - -
> dimension_02 4 string NOT NULL AUTO_ENCODING LZ4 - -
> dimension_03 5 string NOT NULL AUTO_ENCODING LZ4 - -
> dimension_04 6 string NOT NULL AUTO_ENCODING LZ4 - -Partition Schema
>
> RANGE (key) (
> PARTITION VALUES < "005000",
> PARTITION "005000" <= VALUES < "010000",
> PARTITION "010000" <= VALUES < "015000",
> PARTITION "015000" <= VALUES < "020000",
> PARTITION "020000" <= VALUES < "025000",
> PARTITION "025000" <= VALUES < "030000",
> PARTITION "030000" <= VALUES < "035000",
> PARTITION "035000" <= VALUES < "040000",
> PARTITION "040000" <= VALUES < "045000",
> PARTITION "045000" <= VALUES < "050000",
> PARTITION "050000" <= VALUES < "055000",
> PARTITION "055000" <= VALUES < "060000",
> PARTITION "060000" <= VALUES < "065000",
> PARTITION "065000" <= VALUES < "070000",
> PARTITION "070000" <= VALUES < "075000",
> PARTITION "075000" <= VALUES < "080000",
> PARTITION "080000" <= VALUES < "085000",
> PARTITION "085000" <= VALUES < "090000",
> PARTITION "090000" <= VALUES < "095000",
> PARTITION "095000" <= VALUES < "100000",
> PARTITION "100000" <= VALUES < "1000000",
> PARTITION "1000000" <= VALUES < "105000",
> PARTITION "105000" <= VALUES < "110000",
> PARTITION "110000" <= VALUES < "115000",
> PARTITION "115000" <= VALUES < "120000",
> PARTITION "120000" <= VALUES < "125000",
> PARTITION "125000" <= VALUES < "130000",
> PARTITION "130000" <= VALUES < "135000",
> PARTITION "135000" <= VALUES < "140000",
> PARTITION "140000" <= VALUES < "145000",
> PARTITION "145000" <= VALUES < "150000",
> PARTITION "150000" <= VALUES < "155000",
> PARTITION "155000" <= VALUES < "160000",
> PARTITION "160000" <= VALUES < "165000",
> PARTITION "165000" <= VALUES < "170000",
> PARTITION "170000" <= VALUES < "175000",
> PARTITION "175000" <= VALUES < "180000",
> PARTITION "180000" <= VALUES < "185000",
> PARTITION "185000" <= VALUES < "190000",
> PARTITION "190000" <= VALUES < "195000",
> PARTITION "195000" <= VALUES < "200000",
> PARTITION "200000" <= VALUES < "205000",
> PARTITION "205000" <= VALUES < "210000",
> PARTITION "210000" <= VALUES < "215000",
> PARTITION "215000" <= VALUES < "220000",
> PARTITION "220000" <= VALUES < "225000",
> PARTITION "225000" <= VALUES < "230000",
> PARTITION "230000" <= VALUES < "235000",
> PARTITION "235000" <= VALUES < "240000",
> PARTITION "240000" <= VALUES < "245000",
> PARTITION "245000" <= VALUES < "250000",
> PARTITION "250000" <= VALUES < "255000",
> PARTITION "255000" <= VALUES < "260000",
> PARTITION "260000" <= VALUES < "265000",
> PARTITION "265000" <= VALUES < "270000",
> PARTITION "270000" <= VALUES < "275000",
> PARTITION "275000" <= VALUES < "280000",
> PARTITION "280000" <= VALUES < "285000",
> PARTITION "285000" <= VALUES < "290000",
> PARTITION "290000" <= VALUES < "295000",
> PARTITION "295000" <= VALUES < "300000",
> PARTITION "300000" <= VALUES < "305000",
> PARTITION "305000" <= VALUES < "310000",
> PARTITION "310000" <= VALUES < "315000",
> PARTITION "315000" <= VALUES < "320000",
> PARTITION "320000" <= VALUES < "325000",
> PARTITION "325000" <= VALUES < "330000",
> PARTITION "330000" <= VALUES < "335000",
> PARTITION "335000" <= VALUES < "340000",
> PARTITION "340000" <= VALUES < "345000",
> PARTITION "345000" <= VALUES < "350000",
> PARTITION "350000" <= VALUES < "355000",
> PARTITION "355000" <= VALUES < "360000",
> PARTITION "360000" <= VALUES < "365000",
> PARTITION "365000" <= VALUES < "370000",
> PARTITION "370000" <= VALUES < "375000",
> PARTITION "375000" <= VALUES < "380000",
> PARTITION "380000" <= VALUES < "385000",
> PARTITION "385000" <= VALUES < "390000",
> PARTITION "390000" <= VALUES < "395000",
> PARTITION "395000" <= VALUES < "400000",
> PARTITION "400000" <= VALUES < "405000",
> PARTITION "405000" <= VALUES < "410000",
> PARTITION "410000" <= VALUES < "415000",
> PARTITION "415000" <= VALUES < "420000",
> PARTITION "420000" <= VALUES < "425000",
> PARTITION "425000" <= VALUES < "430000",
> PARTITION "430000" <= VALUES < "435000",
> PARTITION "435000" <= VALUES < "440000",
> PARTITION "440000" <= VALUES < "445000",
> PARTITION "445000" <= VALUES < "450000",
> PARTITION "450000" <= VALUES < "455000",
> PARTITION "455000" <= VALUES < "460000",
> PARTITION "460000" <= VALUES < "465000",
> PARTITION "465000" <= VALUES < "470000",
> PARTITION "470000" <= VALUES < "475000",
> PARTITION "475000" <= VALUES < "480000",
> PARTITION "480000" <= VALUES < "485000",
> PARTITION "485000" <= VALUES < "490000",
> PARTITION "490000" <= VALUES < "495000",
> PARTITION "495000" <= VALUES < "500000",
> PARTITION "500000" <= VALUES < "505000",
> PARTITION "505000" <= VALUES < "510000",
> PARTITION "510000" <= VALUES < "515000",
> PARTITION "515000" <= VALUES < "520000",
> PARTITION "520000" <= VALUES < "525000",
> PARTITION "525000" <= VALUES < "530000",
> PARTITION "530000" <= VALUES < "535000",
> PARTITION "535000" <= VALUES < "540000",
> PARTITION "540000" <= VALUES < "545000",
> PARTITION "545000" <= VALUES < "550000",
> PARTITION "550000" <= VALUES < "555000",
> PARTITION "555000" <= VALUES < "560000",
> PARTITION "560000" <= VALUES < "565000",
> PARTITION "565000" <= VALUES < "570000",
> PARTITION "570000" <= VALUES < "575000",
> PARTITION "575000" <= VALUES < "580000",
> PARTITION "580000" <= VALUES < "585000",
> PARTITION "585000" <= VALUES < "590000",
> PARTITION "590000" <= VALUES < "595000",
> PARTITION "595000" <= VALUES < "600000",
> PARTITION "600000" <= VALUES < "605000",
> PARTITION "605000" <= VALUES < "610000",
> PARTITION "610000" <= VALUES < "615000",
> PARTITION "615000" <= VALUES < "620000",
> PARTITION "620000" <= VALUES < "625000",
> PARTITION "625000" <= VALUES < "630000",
> PARTITION "630000" <= VALUES < "635000",
> PARTITION "635000" <= VALUES < "640000",
> PARTITION "640000" <= VALUES < "645000",
> PARTITION "645000" <= VALUES < "650000",
> PARTITION "650000" <= VALUES < "655000",
> PARTITION "655000" <= VALUES < "660000",
> PARTITION "660000" <= VALUES < "665000",
> PARTITION "665000" <= VALUES < "670000",
> PARTITION "670000" <= VALUES < "675000",
> PARTITION "675000" <= VALUES < "680000",
> PARTITION "680000" <= VALUES < "685000",
> PARTITION "685000" <= VALUES < "690000",
> PARTITION "690000" <= VALUES < "695000",
> PARTITION "695000" <= VALUES < "700000",
> PARTITION "700000" <= VALUES < "705000",
> PARTITION "705000" <= VALUES < "710000",
> PARTITION "710000" <= VALUES < "715000",
> PARTITION "715000" <= VALUES < "720000",
> PARTITION "720000" <= VALUES < "725000",
> PARTITION "725000" <= VALUES < "730000",
> PARTITION "730000" <= VALUES < "735000",
> PARTITION "735000" <= VALUES < "740000",
> PARTITION "740000" <= VALUES < "745000",
> PARTITION "745000" <= VALUES < "750000",
> PARTITION "750000" <= VALUES < "755000",
> PARTITION "755000" <= VALUES < "760000",
> PARTITION "760000" <= VALUES < "765000",
> PARTITION "765000" <= VALUES < "770000",
> PARTITION "770000" <= VALUES < "775000",
> PARTITION "775000" <= VALUES < "780000",
> PARTITION "780000" <= VALUES < "785000",
> PARTITION "785000" <= VALUES < "790000",
> PARTITION "790000" <= VALUES < "795000",
> PARTITION "795000" <= VALUES < "800000",
> PARTITION "800000" <= VALUES < "805000",
> PARTITION "805000" <= VALUES < "810000",
> PARTITION "810000" <= VALUES < "815000",
> PARTITION "815000" <= VALUES < "820000",
> PARTITION "820000" <= VALUES < "825000",
> PARTITION "825000" <= VALUES < "830000",
> PARTITION "830000" <= VALUES < "835000",
> PARTITION "835000" <= VALUES < "840000",
> PARTITION "840000" <= VALUES < "845000",
> PARTITION "845000" <= VALUES < "850000",
> PARTITION "850000" <= VALUES < "855000",
> PARTITION "855000" <= VALUES < "860000",
> PARTITION "860000" <= VALUES < "865000",
> PARTITION "865000" <= VALUES < "870000",
> PARTITION "870000" <= VALUES < "875000",
> PARTITION "875000" <= VALUES < "880000",
> PARTITION "880000" <= VALUES < "885000",
> PARTITION "885000" <= VALUES < "890000",
> PARTITION "890000" <= VALUES < "895000",
> PARTITION "895000" <= VALUES < "900000",
> PARTITION "900000" <= VALUES < "905000",
> PARTITION "905000" <= VALUES < "910000",
> PARTITION "910000" <= VALUES < "915000",
> PARTITION "915000" <= VALUES < "920000",
> PARTITION "920000" <= VALUES < "925000",
> PARTITION "925000" <= VALUES < "930000",
> PARTITION "930000" <= VALUES < "935000",
> PARTITION "935000" <= VALUES < "940000",
> PARTITION "940000" <= VALUES < "945000",
> PARTITION "945000" <= VALUES < "950000",
> PARTITION "950000" <= VALUES < "955000",
> PARTITION "955000" <= VALUES < "960000",
> PARTITION "960000" <= VALUES < "965000",
> PARTITION "965000" <= VALUES < "970000",
> PARTITION "970000" <= VALUES < "975000",
> PARTITION "975000" <= VALUES < "980000",
> PARTITION "980000" <= VALUES < "985000",
> PARTITION "985000" <= VALUES < "990000",
> PARTITION "990000" <= VALUES < "995000",
> PARTITION VALUES >= "995000"
> )
>
>
>
So it looks like you have a numeric value being stored here in the string
column. Are you sure that you are properly zero-padding when creating your
key? For example if you accidentally scan from "50_..." to "80_..." you
will end up scanning a huge portion of your table.
> i did not delete rows in this table ever.
>
> my scanner code is below:
> buildKey method will build the lower bound and the upper bound, the unique
> id is same, the startRow offset(third part) is 0, and the endRow offset is
> 99999999, startRow and endRow only differs from time.
> though the max offset is big(9999999), generally it is less than 100.
>
> private KuduScanner buildScanner(Metric startRow, Metric endRow,
> List<Integer> dimensionIds, List<DimensionFilter> dimensionFilterList) {
> KuduTable kuduTable =
> kuduService.getKuduTable(BizConfig.parseFrom(startRow.getBizId()));
>
> PartialRow lower = kuduTable.getSchema().newPartialRow();
> lower.addString("key", buildKey(startRow));
> PartialRow upper = kuduTable.getSchema().newPartialRow();
> upper.addString("key", buildKey(endRow));
>
> LOG.info("build scanner. lower = {}, upper = {}", buildKey(startRow),
> buildKey(endRow));
>
> KuduScanner.KuduScannerBuilder builder =
> kuduService.getKuduClient().newScannerBuilder(kuduTable);
> builder.setProjectedColumnNames(COLUMNS);
> builder.lowerBound(lower);
> builder.exclusiveUpperBound(upper);
> builder.prefetching(true);
> builder.batchSizeBytes(MAX_BATCH_SIZE);
>
> if (CollectionUtils.isNotEmpty(dimensionFilterList)) {
> for (int i = 0; i < dimensionIds.size() && i < MAX_DIMENSION_NUM;
> i++) {
> for (DimensionFilter dimensionFilter : dimensionFilterList) {
> if (!Objects.equals(dimensionFilter.getDimensionId(),
> dimensionIds.get(i))) {
> continue;
> }
> ColumnSchema columnSchema =
> kuduTable.getSchema().getColumn(String.format("dimension_%02d", i));
> KuduPredicate predicate = buildKuduPredicate(columnSchema,
> dimensionFilter);
> if (predicate != null) {
> builder.addPredicate(predicate);
> LOG.info("add predicate. predicate = {}",
> predicate.toString());
> }
> }
> }
> }
> return builder.build();
> }
>
>
What client version are you using? 1.7.0?
> i checked the metrics, only get content below, it seems no relationship
> with my table.
>
Looks like you got the metrics from the kudu master, not a tablet server.
You need to figure out which tablet server you are scanning and grab the
metrics from that one.
-Todd
--
Todd Lipcon
Software Engineer, Cloudera