http://git-wip-us.apache.org/repos/asf/hive/blob/96862093/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java new file mode 100644 index 0000000..f4e55ed --- /dev/null +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsExtrapolation.java @@ -0,0 +1,717 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.StatObjectConverter; +import org.apache.hadoop.hive.metastore.api.AggrStats; +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.StringColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Table; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.SortedMap; +import java.util.TreeMap; + +public class TestHBaseAggregateStatsExtrapolation { + private static final Logger LOG = LoggerFactory + .getLogger(TestHBaseAggregateStatsExtrapolation.class.getName()); + + @Mock + HTableInterface htable; + private HBaseStore store; + SortedMap<String, Cell> rows = new TreeMap<>(); + + // NDV will be 3 for the bitVectors + String bitVectors = "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}"; + + @Before + public void before() throws IOException { + MockitoAnnotations.initMocks(this); + HiveConf conf = new HiveConf(); + conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true); + store = MockUtils.init(conf, htable, rows); + store.backdoor().getStatsCache().resetCounters(); + } + + private static interface Checker { + void checkStats(AggrStats aggrStats) throws Exception; + } + + @Test + public void allPartitionsHaveBitVectorStatusLong() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col1", "long", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col1"); + obj.setColType("long"); + ColumnStatisticsData data = new ColumnStatisticsData(); + LongColumnStatsData dcsd = new LongColumnStatsData(); + dcsd.setHighValue(1000 + i); + dcsd.setLowValue(-1000 - i); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i + 1); + dcsd.setBitVectors(bitVectors); + data.setLongStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(10, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col1", cso.getColName()); + Assert.assertEquals("long", cso.getColType()); + LongColumnStatsData lcsd = cso.getStatsData().getLongStats(); + Assert.assertEquals(1009, lcsd.getHighValue(), 0.01); + Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01); + Assert.assertEquals(45, lcsd.getNumNulls()); + Assert.assertEquals(3, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col1")); + statChecker.checkStats(aggrStats); + } + + @Test + public void allPartitionsHaveBitVectorStatusDecimal() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col1_decimal", "decimal", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col1_decimal"); + obj.setColType("decimal"); + ColumnStatisticsData data = new ColumnStatisticsData(); + DecimalColumnStatsData dcsd = new DecimalColumnStatsData(); + dcsd.setHighValue(StatObjectConverter.createThriftDecimal("" + (1000 + i))); + dcsd.setLowValue(StatObjectConverter.createThriftDecimal("" + (-1000 - i))); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i + 1); + dcsd.setBitVectors(bitVectors); + data.setDecimalStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(10, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col1_decimal", cso.getColName()); + Assert.assertEquals("decimal", cso.getColType()); + DecimalColumnStatsData lcsd = cso.getStatsData().getDecimalStats(); + Assert.assertEquals(1009, HBaseUtils.getDoubleValue(lcsd.getHighValue()), 0.01); + Assert.assertEquals(-1009, HBaseUtils.getDoubleValue(lcsd.getLowValue()), 0.01); + Assert.assertEquals(45, lcsd.getNumNulls()); + Assert.assertEquals(3, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col1_decimal")); + statChecker.checkStats(aggrStats); + } + + @Test + public void allPartitionsHaveBitVectorStatusDouble() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col1_double", "double", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col1_double"); + obj.setColType("double"); + ColumnStatisticsData data = new ColumnStatisticsData(); + DoubleColumnStatsData dcsd = new DoubleColumnStatsData(); + dcsd.setHighValue(1000 + i); + dcsd.setLowValue(-1000 - i); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i + 1); + dcsd.setBitVectors(bitVectors); + data.setDoubleStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(10, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col1_double", cso.getColName()); + Assert.assertEquals("double", cso.getColType()); + DoubleColumnStatsData lcsd = cso.getStatsData().getDoubleStats(); + Assert.assertEquals(1009, lcsd.getHighValue(), 0.01); + Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01); + Assert.assertEquals(45, lcsd.getNumNulls()); + Assert.assertEquals(3, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col1_double")); + statChecker.checkStats(aggrStats); + } + + @Test + public void allPartitionsHaveBitVectorStatusString() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col1_string", "string", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col1_string"); + obj.setColType("string"); + ColumnStatisticsData data = new ColumnStatisticsData(); + StringColumnStatsData dcsd = new StringColumnStatsData(); + dcsd.setAvgColLen(i + 1); + dcsd.setMaxColLen(i + 10); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i + 1); + dcsd.setBitVectors(bitVectors); + data.setStringStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(10, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col1_string", cso.getColName()); + Assert.assertEquals("string", cso.getColType()); + StringColumnStatsData lcsd = cso.getStatsData().getStringStats(); + Assert.assertEquals(10, lcsd.getAvgColLen(), 0.01); + Assert.assertEquals(19, lcsd.getMaxColLen(), 0.01); + Assert.assertEquals(45, lcsd.getNumNulls()); + Assert.assertEquals(3, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col1_string")); + statChecker.checkStats(aggrStats); + } + + @Test + public void noPartitionsHaveBitVectorStatus() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col2", "long", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col2"); + obj.setColType("long"); + ColumnStatisticsData data = new ColumnStatisticsData(); + LongColumnStatsData dcsd = new LongColumnStatsData(); + dcsd.setHighValue(1000 + i); + dcsd.setLowValue(-1000 - i); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i); + data.setLongStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(10, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col2", cso.getColName()); + Assert.assertEquals("long", cso.getColType()); + LongColumnStatsData lcsd = cso.getStatsData().getLongStats(); + Assert.assertEquals(1009, lcsd.getHighValue(), 0.01); + Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01); + Assert.assertEquals(45, lcsd.getNumNulls()); + Assert.assertEquals(90, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col2")); + statChecker.checkStats(aggrStats); + } + + @Test + public void TwoEndsOfPartitionsHaveBitVectorStatus() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col3", "long", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + if (i < 2 || i > 7) { + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col3"); + obj.setColType("long"); + ColumnStatisticsData data = new ColumnStatisticsData(); + LongColumnStatsData dcsd = new LongColumnStatsData(); + dcsd.setHighValue(1000 + i); + dcsd.setLowValue(-1000 - i); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i); + dcsd.setBitVectors(bitVectors); + data.setLongStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(4, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col3", cso.getColName()); + Assert.assertEquals("long", cso.getColType()); + LongColumnStatsData lcsd = cso.getStatsData().getLongStats(); + Assert.assertEquals(1010, lcsd.getHighValue(), 0.01); + Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01); + Assert.assertEquals(45, lcsd.getNumNulls()); + Assert.assertEquals(3, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col3")); + statChecker.checkStats(aggrStats); + } + + @Test + public void MiddleOfPartitionsHaveBitVectorStatus() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col4", "long", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + if (i > 2 && i < 7) { + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col4"); + obj.setColType("long"); + ColumnStatisticsData data = new ColumnStatisticsData(); + LongColumnStatsData dcsd = new LongColumnStatsData(); + dcsd.setHighValue(1000 + i); + dcsd.setLowValue(-1000 - i); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i); + dcsd.setBitVectors(bitVectors); + data.setLongStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(4, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col4", cso.getColName()); + Assert.assertEquals("long", cso.getColType()); + LongColumnStatsData lcsd = cso.getStatsData().getLongStats(); + Assert.assertEquals(1006, lcsd.getHighValue(), 0.01); + Assert.assertEquals(-1006, lcsd.getLowValue(), 0.01); + Assert.assertEquals(45, lcsd.getNumNulls()); + Assert.assertEquals(3, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col4")); + statChecker.checkStats(aggrStats); + } + + @Test + public void TwoEndsAndMiddleOfPartitionsHaveBitVectorStatusLong() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col5", "long", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + if (i == 0 || i == 2 || i == 3 || i == 5 || i == 6 || i == 8) { + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col5"); + obj.setColType("long"); + ColumnStatisticsData data = new ColumnStatisticsData(); + LongColumnStatsData dcsd = new LongColumnStatsData(); + dcsd.setHighValue(1000 + i); + dcsd.setLowValue(-1000 - i); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i); + dcsd.setBitVectors(bitVectors); + data.setLongStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(6, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col5", cso.getColName()); + Assert.assertEquals("long", cso.getColType()); + LongColumnStatsData lcsd = cso.getStatsData().getLongStats(); + Assert.assertEquals(1010, lcsd.getHighValue(), 0.01); + Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01); + Assert.assertEquals(40, lcsd.getNumNulls()); + Assert.assertEquals(3, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col5")); + statChecker.checkStats(aggrStats); + } + + @Test + public void TwoEndsAndMiddleOfPartitionsHaveBitVectorStatusDouble() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col5_double", "double", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + if (i == 0 || i == 2 || i == 3 || i == 5 || i == 6 || i == 8) { + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col5_double"); + obj.setColType("double"); + ColumnStatisticsData data = new ColumnStatisticsData(); + DoubleColumnStatsData dcsd = new DoubleColumnStatsData(); + dcsd.setHighValue(1000 + i); + dcsd.setLowValue(-1000 - i); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i); + dcsd.setBitVectors(bitVectors); + data.setDoubleStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(6, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col5_double", cso.getColName()); + Assert.assertEquals("double", cso.getColType()); + DoubleColumnStatsData lcsd = cso.getStatsData().getDoubleStats(); + Assert.assertEquals(1010, lcsd.getHighValue(), 0.01); + Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01); + Assert.assertEquals(40, lcsd.getNumNulls()); + Assert.assertEquals(3, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col5_double")); + statChecker.checkStats(aggrStats); + } +}
http://git-wip-us.apache.org/repos/asf/hive/blob/96862093/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java ---------------------------------------------------------------------- diff --git a/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java new file mode 100644 index 0000000..62918be --- /dev/null +++ b/metastore/src/test/org/apache/hadoop/hive/metastore/hbase/TestHBaseAggregateStatsNDVUniformDist.java @@ -0,0 +1,581 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.hadoop.hive.metastore.hbase; + +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; +import org.apache.hadoop.hbase.Cell; +import org.apache.hadoop.hbase.client.HTableInterface; +import org.apache.hadoop.hive.conf.HiveConf; +import org.apache.hadoop.hive.metastore.StatObjectConverter; +import org.apache.hadoop.hive.metastore.api.AggrStats; +import org.apache.hadoop.hive.metastore.api.BooleanColumnStatsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatistics; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsData; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsDesc; +import org.apache.hadoop.hive.metastore.api.ColumnStatisticsObj; +import org.apache.hadoop.hive.metastore.api.DecimalColumnStatsData; +import org.apache.hadoop.hive.metastore.api.DoubleColumnStatsData; +import org.apache.hadoop.hive.metastore.api.FieldSchema; +import org.apache.hadoop.hive.metastore.api.LongColumnStatsData; +import org.apache.hadoop.hive.metastore.api.Partition; +import org.apache.hadoop.hive.metastore.api.SerDeInfo; +import org.apache.hadoop.hive.metastore.api.StorageDescriptor; +import org.apache.hadoop.hive.metastore.api.Table; +import org.junit.Assert; +import org.junit.Before; +import org.junit.Test; +import org.mockito.Mock; +import org.mockito.MockitoAnnotations; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Collections; +import java.util.List; +import java.util.SortedMap; +import java.util.TreeMap; + +public class TestHBaseAggregateStatsNDVUniformDist { + private static final Logger LOG = LoggerFactory + .getLogger(TestHBaseAggregateStatsNDVUniformDist.class.getName()); + + @Mock + HTableInterface htable; + private HBaseStore store; + SortedMap<String, Cell> rows = new TreeMap<>(); + + // NDV will be 3 for bitVectors[0] and 12 for bitVectors[1] + String bitVectors[] = { + "{0, 4, 5, 7}{0, 1}{0, 1, 2}{0, 1, 4}{0}{0, 2}{0, 3}{0, 2, 3, 4}{0, 1, 4}{0, 1}{0}{0, 1, 3, 8}{0, 2}{0, 2}{0, 9}{0, 1, 4}", + "{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}{1, 2}" }; + + @Before + public void before() throws IOException { + MockitoAnnotations.initMocks(this); + HiveConf conf = new HiveConf(); + conf.setBoolean(HBaseReadWrite.NO_CACHE_CONF, true); + conf.setBoolean(HiveConf.ConfVars.HIVE_METASTORE_STATS_NDV_DENSITY_FUNCTION.varname, true); + store = MockUtils.init(conf, htable, rows); + store.backdoor().getStatsCache().resetCounters(); + } + + private static interface Checker { + void checkStats(AggrStats aggrStats) throws Exception; + } + + @Test + public void allPartitionsHaveBitVectorStatus() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col1", "long", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col1"); + obj.setColType("long"); + ColumnStatisticsData data = new ColumnStatisticsData(); + LongColumnStatsData dcsd = new LongColumnStatsData(); + dcsd.setHighValue(1000 + i); + dcsd.setLowValue(-1000 - i); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i + 1); + dcsd.setBitVectors(bitVectors[0]); + data.setLongStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(10, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col1", cso.getColName()); + Assert.assertEquals("long", cso.getColType()); + LongColumnStatsData lcsd = cso.getStatsData().getLongStats(); + Assert.assertEquals(1009, lcsd.getHighValue(), 0.01); + Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01); + Assert.assertEquals(45, lcsd.getNumNulls()); + Assert.assertEquals(3, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col1")); + statChecker.checkStats(aggrStats); + } + + @Test + public void noPartitionsHaveBitVectorStatus() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col2", "long", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col2"); + obj.setColType("long"); + ColumnStatisticsData data = new ColumnStatisticsData(); + LongColumnStatsData dcsd = new LongColumnStatsData(); + dcsd.setHighValue(1000 + i); + dcsd.setLowValue(-1000 - i); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i + 1); + data.setLongStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(10, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col2", cso.getColName()); + Assert.assertEquals("long", cso.getColType()); + LongColumnStatsData lcsd = cso.getStatsData().getLongStats(); + Assert.assertEquals(1009, lcsd.getHighValue(), 0.01); + Assert.assertEquals(-1009, lcsd.getLowValue(), 0.01); + Assert.assertEquals(45, lcsd.getNumNulls()); + Assert.assertEquals(91, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col2")); + statChecker.checkStats(aggrStats); + } + + @Test + public void TwoEndsOfPartitionsHaveBitVectorStatus() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col3", "long", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + if (i < 2 || i > 7) { + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col3"); + obj.setColType("long"); + ColumnStatisticsData data = new ColumnStatisticsData(); + LongColumnStatsData dcsd = new LongColumnStatsData(); + dcsd.setHighValue(1000 + i); + dcsd.setLowValue(-1000 - i); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i + 1); + dcsd.setBitVectors(bitVectors[i / 5]); + data.setLongStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(4, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col3", cso.getColName()); + Assert.assertEquals("long", cso.getColType()); + LongColumnStatsData lcsd = cso.getStatsData().getLongStats(); + Assert.assertEquals(1010, lcsd.getHighValue(), 0.01); + Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01); + Assert.assertEquals(45, lcsd.getNumNulls()); + Assert.assertEquals(12, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col3")); + statChecker.checkStats(aggrStats); + } + + @Test + public void MiddleOfPartitionsHaveBitVectorStatus() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col4", "long", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + if (i > 2 && i < 7) { + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col4"); + obj.setColType("long"); + ColumnStatisticsData data = new ColumnStatisticsData(); + LongColumnStatsData dcsd = new LongColumnStatsData(); + dcsd.setHighValue(1000 + i); + dcsd.setLowValue(-1000 - i); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i + 1); + dcsd.setBitVectors(bitVectors[0]); + data.setLongStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(4, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col4", cso.getColName()); + Assert.assertEquals("long", cso.getColType()); + LongColumnStatsData lcsd = cso.getStatsData().getLongStats(); + Assert.assertEquals(1006, lcsd.getHighValue(), 0.01); + Assert.assertEquals(-1006, lcsd.getLowValue(), 0.01); + Assert.assertEquals(45, lcsd.getNumNulls()); + Assert.assertEquals(3, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col4")); + statChecker.checkStats(aggrStats); + } + + @Test + public void TwoEndsAndMiddleOfPartitionsHaveBitVectorStatusLong() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col5_long", "long", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + if (i == 0 || i == 2 || i == 3 || i == 5 || i == 6 || i == 8) { + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col5_long"); + obj.setColType("long"); + ColumnStatisticsData data = new ColumnStatisticsData(); + LongColumnStatsData dcsd = new LongColumnStatsData(); + dcsd.setHighValue(1000 + i); + dcsd.setLowValue(-1000 - i); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i + 1); + dcsd.setBitVectors(bitVectors[i / 5]); + data.setLongStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(6, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col5_long", cso.getColName()); + Assert.assertEquals("long", cso.getColType()); + LongColumnStatsData lcsd = cso.getStatsData().getLongStats(); + Assert.assertEquals(1010, lcsd.getHighValue(), 0.01); + Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01); + Assert.assertEquals(40, lcsd.getNumNulls()); + Assert.assertEquals(12, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col5_long")); + statChecker.checkStats(aggrStats); + } + + @Test + public void TwoEndsAndMiddleOfPartitionsHaveBitVectorStatusDecimal() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col5_decimal", "decimal", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + if (i == 0 || i == 2 || i == 3 || i == 5 || i == 6 || i == 8) { + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col5_decimal"); + obj.setColType("decimal"); + ColumnStatisticsData data = new ColumnStatisticsData(); + DecimalColumnStatsData dcsd = new DecimalColumnStatsData(); + dcsd.setHighValue(StatObjectConverter.createThriftDecimal("" + (1000 + i))); + dcsd.setLowValue(StatObjectConverter.createThriftDecimal("" + (-1000 - i))); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i + 1); + dcsd.setBitVectors(bitVectors[i / 5]); + data.setDecimalStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(6, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col5_decimal", cso.getColName()); + Assert.assertEquals("decimal", cso.getColType()); + DecimalColumnStatsData lcsd = cso.getStatsData().getDecimalStats(); + Assert.assertEquals(1010, HBaseUtils.getDoubleValue(lcsd.getHighValue()), 0.01); + Assert.assertEquals(-1010, HBaseUtils.getDoubleValue(lcsd.getLowValue()), 0.01); + Assert.assertEquals(40, lcsd.getNumNulls()); + Assert.assertEquals(12, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col5_decimal")); + statChecker.checkStats(aggrStats); + } + + @Test + public void TwoEndsAndMiddleOfPartitionsHaveBitVectorStatusDouble() throws Exception { + String dbName = "default"; + String tableName = "snp"; + long now = System.currentTimeMillis(); + List<FieldSchema> cols = new ArrayList<>(); + cols.add(new FieldSchema("col5_double", "double", "nocomment")); + SerDeInfo serde = new SerDeInfo("serde", "seriallib", null); + StorageDescriptor sd = new StorageDescriptor(cols, "file:/tmp", "input", "output", false, 0, + serde, null, null, Collections.<String, String> emptyMap()); + List<FieldSchema> partCols = new ArrayList<>(); + partCols.add(new FieldSchema("ds", "string", "")); + Table table = new Table(tableName, dbName, "me", (int) now, (int) now, 0, sd, partCols, + Collections.<String, String> emptyMap(), null, null, null); + store.createTable(table); + + List<List<String>> partVals = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + List<String> partVal = Arrays.asList("" + i); + partVals.add(partVal); + StorageDescriptor psd = new StorageDescriptor(sd); + psd.setLocation("file:/tmp/default/hit/ds=" + partVal); + Partition part = new Partition(partVal, dbName, tableName, (int) now, (int) now, psd, + Collections.<String, String> emptyMap()); + store.addPartition(part); + if (i == 0 || i == 2 || i == 3 || i == 5 || i == 6 || i == 8) { + ColumnStatistics cs = new ColumnStatistics(); + ColumnStatisticsDesc desc = new ColumnStatisticsDesc(false, dbName, tableName); + desc.setLastAnalyzed(now); + desc.setPartName("ds=" + partVal); + cs.setStatsDesc(desc); + ColumnStatisticsObj obj = new ColumnStatisticsObj(); + obj.setColName("col5_double"); + obj.setColType("double"); + ColumnStatisticsData data = new ColumnStatisticsData(); + DoubleColumnStatsData dcsd = new DoubleColumnStatsData(); + dcsd.setHighValue(1000 + i); + dcsd.setLowValue(-1000 - i); + dcsd.setNumNulls(i); + dcsd.setNumDVs(10 * i + 1); + dcsd.setBitVectors(bitVectors[i / 5]); + data.setDoubleStats(dcsd); + obj.setStatsData(data); + cs.addToStatsObj(obj); + store.updatePartitionColumnStatistics(cs, partVal); + } + } + + Checker statChecker = new Checker() { + @Override + public void checkStats(AggrStats aggrStats) throws Exception { + Assert.assertEquals(6, aggrStats.getPartsFound()); + Assert.assertEquals(1, aggrStats.getColStatsSize()); + ColumnStatisticsObj cso = aggrStats.getColStats().get(0); + Assert.assertEquals("col5_double", cso.getColName()); + Assert.assertEquals("double", cso.getColType()); + DoubleColumnStatsData lcsd = cso.getStatsData().getDoubleStats(); + Assert.assertEquals(1010, lcsd.getHighValue(), 0.01); + Assert.assertEquals(-1010, lcsd.getLowValue(), 0.01); + Assert.assertEquals(40, lcsd.getNumNulls()); + Assert.assertEquals(12, lcsd.getNumDVs()); + } + }; + List<String> partNames = new ArrayList<>(); + for (int i = 0; i < 10; i++) { + partNames.add("ds=" + i); + } + AggrStats aggrStats = store.get_aggr_stats_for(dbName, tableName, partNames, + Arrays.asList("col5_double")); + statChecker.checkStats(aggrStats); + } +} http://git-wip-us.apache.org/repos/asf/hive/blob/96862093/ql/src/test/results/clientpositive/tez/explainuser_1.q.out ---------------------------------------------------------------------- diff --git a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out index b501f97..0eb9132 100644 --- a/ql/src/test/results/clientpositive/tez/explainuser_1.q.out +++ b/ql/src/test/results/clientpositive/tez/explainuser_1.q.out @@ -426,9 +426,9 @@ Stage-0 <-Map 8 [SIMPLE_EDGE] SHUFFLE [RS_15] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_14] (rows=1 width=101) + Group By Operator [GBY_14] (rows=2 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_49] (rows=3 width=93) + Filter Operator [FIL_49] (rows=5 width=74) predicate:((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) TableScan [TS_11] (rows=20 width=83) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -446,9 +446,9 @@ Stage-0 <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_4] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_3] (rows=1 width=101) + Group By Operator [GBY_3] (rows=2 width=101) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_48] (rows=3 width=93) + Filter Operator [FIL_48] (rows=5 width=74) predicate:((((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) TableScan [TS_0] (rows=20 width=83) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1201,11 +1201,11 @@ Stage-0 Stage-1 Reducer 3 File Output Operator [FS_19] - Select Operator [SEL_18] (rows=21 width=101) + Select Operator [SEL_18] (rows=36 width=101) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_17] (rows=21 width=101) + Filter Operator [FIL_17] (rows=36 width=101) predicate:((_col1 > 0) or (_col6 >= 0)) - Merge Join Operator [MERGEJOIN_28] (rows=21 width=101) + Merge Join Operator [MERGEJOIN_28] (rows=36 width=101) Conds:RS_14._col0=RS_15._col0(Inner),Output:["_col1","_col2","_col3","_col4","_col6"] <-Map 5 [SIMPLE_EDGE] SHUFFLE [RS_15] @@ -1219,25 +1219,25 @@ Stage-0 <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_14] PartitionCols:_col0 - Filter Operator [FIL_9] (rows=6 width=182) + Filter Operator [FIL_9] (rows=10 width=182) predicate:(((_col1 + _col4) = 2) and ((_col4 + 1) = 2)) - Merge Join Operator [MERGEJOIN_27] (rows=25 width=182) + Merge Join Operator [MERGEJOIN_27] (rows=40 width=182) Conds:RS_6._col0=RS_7._col0(Left Outer),Output:["_col0","_col1","_col2","_col3","_col4"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_6] PartitionCols:_col0 - Select Operator [SEL_2] (rows=5 width=74) + Select Operator [SEL_2] (rows=9 width=82) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_24] (rows=5 width=74) + Filter Operator [FIL_24] (rows=9 width=82) predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) TableScan [TS_0] (rows=20 width=83) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Map 4 [SIMPLE_EDGE] SHUFFLE [RS_7] PartitionCols:_col0 - Select Operator [SEL_5] (rows=5 width=71) + Select Operator [SEL_5] (rows=9 width=79) Output:["_col0","_col1"] - Filter Operator [FIL_25] (rows=5 width=74) + Filter Operator [FIL_25] (rows=9 width=82) predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) TableScan [TS_3] (rows=20 width=83) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1257,27 +1257,27 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_14] - Select Operator [SEL_13] (rows=24 width=101) + Select Operator [SEL_13] (rows=50 width=101) Output:["_col0","_col1","_col2","_col3","_col4"] - Filter Operator [FIL_12] (rows=24 width=101) + Filter Operator [FIL_12] (rows=50 width=101) predicate:(((_col1 + _col4) = 2) and ((_col1 > 0) or (_col6 >= 0)) and ((_col4 + 1) = 2)) - Merge Join Operator [MERGEJOIN_19] (rows=72 width=101) + Merge Join Operator [MERGEJOIN_19] (rows=200 width=101) Conds:RS_8._col0=RS_9._col0(Right Outer),RS_8._col0=RS_10._col0(Right Outer),Output:["_col1","_col2","_col3","_col4","_col6"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_8] PartitionCols:_col0 - Select Operator [SEL_2] (rows=6 width=77) + Select Operator [SEL_2] (rows=10 width=83) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_17] (rows=6 width=77) + Filter Operator [FIL_17] (rows=10 width=83) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) TableScan [TS_0] (rows=20 width=83) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Map 3 [SIMPLE_EDGE] SHUFFLE [RS_9] PartitionCols:_col0 - Select Operator [SEL_5] (rows=6 width=74) + Select Operator [SEL_5] (rows=10 width=80) Output:["_col0","_col1"] - Filter Operator [FIL_18] (rows=6 width=77) + Filter Operator [FIL_18] (rows=10 width=83) predicate:(((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) TableScan [TS_3] (rows=20 width=83) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1509,53 +1509,53 @@ Stage-0 Output:["_col0","_col1","_col2"] Filter Operator [FIL_31] (rows=1 width=101) predicate:((_col1 + _col4) >= 0) - Merge Join Operator [MERGEJOIN_60] (rows=1 width=101) + Merge Join Operator [MERGEJOIN_60] (rows=2 width=101) Conds:RS_28._col0=RS_29._col0(Inner),Output:["_col0","_col1","_col2","_col4"] <-Reducer 10 [SIMPLE_EDGE] SHUFFLE [RS_29] PartitionCols:_col0 - Filter Operator [FIL_26] (rows=1 width=105) + Filter Operator [FIL_26] (rows=2 width=62) predicate:_col0 is not null - Limit [LIM_24] (rows=1 width=105) + Limit [LIM_24] (rows=3 width=76) Number of rows:5 - Select Operator [SEL_23] (rows=1 width=105) + Select Operator [SEL_23] (rows=3 width=76) Output:["_col0","_col1"] <-Reducer 9 [SIMPLE_EDGE] SHUFFLE [RS_22] - Select Operator [SEL_20] (rows=1 width=105) + Select Operator [SEL_20] (rows=3 width=76) Output:["_col0","_col1","_col2","_col3"] - Group By Operator [GBY_19] (rows=1 width=101) + Group By Operator [GBY_19] (rows=3 width=70) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 8 [SIMPLE_EDGE] SHUFFLE [RS_18] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_17] (rows=1 width=101) + Group By Operator [GBY_17] (rows=3 width=70) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_58] (rows=4 width=93) + Filter Operator [FIL_58] (rows=6 width=77) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) TableScan [TS_14] (rows=20 width=83) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Reducer 3 [SIMPLE_EDGE] SHUFFLE [RS_28] PartitionCols:_col0 - Filter Operator [FIL_12] (rows=1 width=97) + Filter Operator [FIL_12] (rows=2 width=54) predicate:_col0 is not null - Limit [LIM_10] (rows=1 width=97) + Limit [LIM_10] (rows=3 width=68) Number of rows:5 - Select Operator [SEL_9] (rows=1 width=97) + Select Operator [SEL_9] (rows=3 width=68) Output:["_col0","_col1","_col2"] <-Reducer 2 [SIMPLE_EDGE] SHUFFLE [RS_8] - Select Operator [SEL_6] (rows=1 width=97) + Select Operator [SEL_6] (rows=3 width=68) Output:["_col0","_col1","_col2"] - Group By Operator [GBY_5] (rows=1 width=101) + Group By Operator [GBY_5] (rows=3 width=70) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(VALUE._col0)"],keys:KEY._col0, KEY._col1, KEY._col2 <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_4] PartitionCols:_col0, _col1, _col2 - Group By Operator [GBY_3] (rows=1 width=101) + Group By Operator [GBY_3] (rows=3 width=70) Output:["_col0","_col1","_col2","_col3"],aggregations:["sum(c_int)"],keys:key, c_int, c_float - Filter Operator [FIL_56] (rows=4 width=93) + Filter Operator [FIL_56] (rows=6 width=77) predicate:(((c_int + 1) >= 0) and ((c_int > 0) or (c_float >= 0.0))) TableScan [TS_0] (rows=20 width=83) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1575,16 +1575,16 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_12] - Select Operator [SEL_11] (rows=6 width=4) + Select Operator [SEL_11] (rows=11 width=4) Output:["_col0"] - Merge Join Operator [MERGEJOIN_17] (rows=6 width=4) + Merge Join Operator [MERGEJOIN_17] (rows=11 width=4) Conds:RS_8._col0=RS_9._col0(Left Semi),Output:["_col1"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_8] PartitionCols:_col0 - Select Operator [SEL_2] (rows=5 width=74) + Select Operator [SEL_2] (rows=9 width=82) Output:["_col0","_col1"] - Filter Operator [FIL_15] (rows=5 width=74) + Filter Operator [FIL_15] (rows=9 width=82) predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) TableScan [TS_0] (rows=20 width=83) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] @@ -1615,27 +1615,27 @@ Stage-0 Stage-1 Reducer 2 File Output Operator [FS_18] - Select Operator [SEL_17] (rows=12 width=93) + Select Operator [SEL_17] (rows=16 width=93) Output:["_col0","_col1","_col2"] - Merge Join Operator [MERGEJOIN_28] (rows=12 width=93) + Merge Join Operator [MERGEJOIN_28] (rows=16 width=93) Conds:RS_13._col0=RS_14._col0(Left Semi),RS_13._col0=RS_15._col0(Left Semi),Output:["_col0","_col1","_col2"] <-Map 1 [SIMPLE_EDGE] SHUFFLE [RS_13] PartitionCols:_col0 - Select Operator [SEL_2] (rows=5 width=74) + Select Operator [SEL_2] (rows=9 width=82) Output:["_col0","_col1","_col2"] - Filter Operator [FIL_25] (rows=5 width=74) + Filter Operator [FIL_25] (rows=9 width=82) predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) TableScan [TS_0] (rows=20 width=83) default@cbo_t1,cbo_t1,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"] <-Map 3 [SIMPLE_EDGE] SHUFFLE [RS_14] PartitionCols:_col0 - Group By Operator [GBY_10] (rows=2 width=85) + Group By Operator [GBY_10] (rows=3 width=85) Output:["_col0"],keys:_col0 - Select Operator [SEL_5] (rows=5 width=68) + Select Operator [SEL_5] (rows=9 width=75) Output:["_col0"] - Filter Operator [FIL_26] (rows=5 width=74) + Filter Operator [FIL_26] (rows=9 width=82) predicate:((((c_int + 1) = 2) and ((c_int > 0) or (c_float >= 0.0))) and key is not null) TableScan [TS_3] (rows=20 width=83) default@cbo_t2,cbo_t2,Tbl:COMPLETE,Col:COMPLETE,Output:["key","c_int","c_float"]