Repository: phoenix Updated Branches: refs/heads/5.x-HBase-2.0 754201cfb -> 0454e4211
PHOENIX-4289 UPDATE STATISTICS command does not collect stats for local indexes Project: http://git-wip-us.apache.org/repos/asf/phoenix/repo Commit: http://git-wip-us.apache.org/repos/asf/phoenix/commit/f5131945 Tree: http://git-wip-us.apache.org/repos/asf/phoenix/tree/f5131945 Diff: http://git-wip-us.apache.org/repos/asf/phoenix/diff/f5131945 Branch: refs/heads/5.x-HBase-2.0 Commit: f5131945c75d85108887c49ed5c294a01a28e095 Parents: 754201c Author: Samarth Jain <sama...@apache.org> Authored: Sun Oct 29 22:59:03 2017 -0700 Committer: James Taylor <jtay...@salesforce.com> Committed: Thu Nov 9 12:41:21 2017 -0800 ---------------------------------------------------------------------- .../end2end/ExplainPlanWithStatsEnabledIT.java | 13 ++++ .../phoenix/end2end/index/BaseLocalIndexIT.java | 3 + .../phoenix/end2end/index/LocalIndexIT.java | 46 ++++++++++- .../phoenix/iterate/BaseResultIterators.java | 4 +- .../apache/phoenix/schema/MetaDataClient.java | 80 +++++++++++++------- 5 files changed, 115 insertions(+), 31 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/phoenix/blob/f5131945/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java index cd4555c..62538af 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/ExplainPlanWithStatsEnabledIT.java @@ -32,6 +32,7 @@ import java.util.List; import org.apache.hadoop.hbase.util.Bytes; import org.apache.phoenix.jdbc.PhoenixConnection; +import org.apache.phoenix.query.BaseTest; import org.apache.phoenix.schema.PTableKey; import org.apache.phoenix.schema.TableNotFoundException; import org.apache.phoenix.util.EnvironmentEdge; @@ -306,6 +307,18 @@ public class ExplainPlanWithStatsEnabledIT extends ParallelStatsEnabledIT { final Long estimatedRows; final Long estimateInfoTs; + public Long getEstimatedBytes() { + return estimatedBytes; + } + + public Long getEstimatedRows() { + return estimatedRows; + } + + public Long getEstimateInfoTs() { + return estimateInfoTs; + } + Estimate(Long rows, Long bytes, Long ts) { this.estimatedBytes = bytes; this.estimatedRows = rows; http://git-wip-us.apache.org/repos/asf/phoenix/blob/f5131945/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/BaseLocalIndexIT.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/BaseLocalIndexIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/BaseLocalIndexIT.java index 30baec4..1659d73 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/BaseLocalIndexIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/BaseLocalIndexIT.java @@ -59,6 +59,9 @@ public abstract class BaseLocalIndexIT extends BaseUniqueNamesOwnClusterIT { serverProps.put(QueryServices.IS_NAMESPACE_MAPPING_ENABLED, "true"); Map<String, String> clientProps = Maps.newHashMapWithExpectedSize(1); clientProps.put(QueryServices.IS_NAMESPACE_MAPPING_ENABLED, "true"); + // setting update frequency to a large value to test out that we are + // generating stats for local indexes + clientProps.put(QueryServices.MIN_STATS_UPDATE_FREQ_MS_ATTRIB, "120000"); setUpTestDriver(new ReadOnlyProps(serverProps.entrySet().iterator()), new ReadOnlyProps(clientProps.entrySet().iterator())); } http://git-wip-us.apache.org/repos/asf/phoenix/blob/f5131945/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java index 238b88e..f97ba22 100644 --- a/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java +++ b/phoenix-core/src/it/java/org/apache/phoenix/end2end/index/LocalIndexIT.java @@ -17,6 +17,7 @@ */ package org.apache.phoenix.end2end.index; +import static org.apache.phoenix.end2end.ExplainPlanWithStatsEnabledIT.getByteRowEstimates; import static org.apache.phoenix.util.MetaDataUtil.getViewIndexSequenceName; import static org.apache.phoenix.util.MetaDataUtil.getViewIndexSequenceSchemaName; import static org.junit.Assert.assertArrayEquals; @@ -56,8 +57,10 @@ import org.apache.hadoop.hbase.util.FSUtils; import org.apache.hadoop.hbase.util.Pair; import org.apache.phoenix.compile.QueryPlan; import org.apache.phoenix.coprocessor.BaseScannerRegionObserver; +import org.apache.phoenix.end2end.ExplainPlanWithStatsEnabledIT.Estimate; import org.apache.phoenix.hbase.index.IndexRegionSplitPolicy; import org.apache.phoenix.jdbc.PhoenixConnection; +import org.apache.phoenix.jdbc.PhoenixResultSet; import org.apache.phoenix.jdbc.PhoenixStatement; import org.apache.phoenix.query.QueryConstants; import org.apache.phoenix.schema.PNameFactory; @@ -68,9 +71,10 @@ import org.apache.phoenix.schema.TableNotFoundException; import org.apache.phoenix.util.QueryUtil; import org.apache.phoenix.util.SchemaUtil; import org.apache.phoenix.util.TestUtil; -import org.junit.Ignore; import org.junit.Test; +import com.google.common.collect.Lists; + public class LocalIndexIT extends BaseLocalIndexIT { public LocalIndexIT(boolean isNamespaceMapped) { super(isNamespaceMapped); @@ -722,4 +726,44 @@ public class LocalIndexIT extends BaseLocalIndexIT { } } + @Test // See https://issues.apache.org/jira/browse/PHOENIX-4289 + public void testEstimatesWithLocalIndexes() throws Exception { + String tableName = generateUniqueName(); + String indexName = "IDX_" + generateUniqueName(); + try (Connection conn = DriverManager.getConnection(getUrl())) { + int guidePostWidth = 20; + conn.createStatement() + .execute("CREATE TABLE " + tableName + + " (k INTEGER PRIMARY KEY, a bigint, b bigint)" + + " GUIDE_POSTS_WIDTH=" + guidePostWidth); + conn.createStatement().execute("upsert into " + tableName + " values (100,1,3)"); + conn.createStatement().execute("upsert into " + tableName + " values (101,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (102,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (103,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (104,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (105,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (106,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (107,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (108,2,4)"); + conn.createStatement().execute("upsert into " + tableName + " values (109,2,4)"); + conn.commit(); + conn.createStatement().execute( + "CREATE LOCAL INDEX " + indexName + " ON " + tableName + " (a) INCLUDE (b) "); + String ddl = "ALTER TABLE " + tableName + " SET USE_STATS_FOR_PARALLELIZATION = false"; + conn.createStatement().execute(ddl); + conn.createStatement().execute("UPDATE STATISTICS " + tableName + ""); + } + List<Object> binds = Lists.newArrayList(); + try (Connection conn = DriverManager.getConnection(getUrl())) { + String sql = + "SELECT COUNT(*) " + " FROM " + tableName; + ResultSet rs = conn.createStatement().executeQuery(sql); + assertTrue("Index " + indexName + " should have been used", + rs.unwrap(PhoenixResultSet.class).getStatement().getQueryPlan().getTableRef() + .getTable().getName().getString().equals(indexName)); + Estimate info = getByteRowEstimates(conn, sql, binds); + assertEquals((Long) 10l, info.getEstimatedRows()); + assertTrue(info.getEstimateInfoTs() > 0); + } + } } http://git-wip-us.apache.org/repos/asf/phoenix/blob/f5131945/phoenix-core/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java b/phoenix-core/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java index 46fd55c..9bf9573 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/iterate/BaseResultIterators.java @@ -491,9 +491,7 @@ public abstract class BaseResultIterators extends ExplainTable implements Result scanId = new UUID(ThreadLocalRandom.current().nextLong(), ThreadLocalRandom.current().nextLong()).toString(); initializeScan(plan, perScanLimit, offset, scan); - this.useStatsForParallelization = - context.getConnection().getQueryServices().getConfiguration().getBoolean( - USE_STATS_FOR_PARALLELIZATION, DEFAULT_USE_STATS_FOR_PARALLELIZATION); + this.useStatsForParallelization = table.useStatsForParallelization(); this.scans = getParallelScans(); List<KeyRange> splitRanges = Lists.newArrayListWithExpectedSize(scans.size() * ESTIMATED_GUIDEPOSTS_PER_REGION); for (List<Scan> scanList : scans) { http://git-wip-us.apache.org/repos/asf/phoenix/blob/f5131945/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java ---------------------------------------------------------------------- diff --git a/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java b/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java index 0ce4246..701633b 100644 --- a/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java +++ b/phoenix-core/src/main/java/org/apache/phoenix/schema/MetaDataClient.java @@ -236,7 +236,6 @@ import org.slf4j.Logger; import org.slf4j.LoggerFactory; import com.google.common.base.Strings; -import com.google.common.collect.Iterators; import com.google.common.collect.ListMultimap; import com.google.common.collect.Lists; import com.google.common.collect.Maps; @@ -1088,7 +1087,7 @@ public class MetaDataClient { PTable table = resolver.getTables().get(0).getTable(); long rowCount = 0; if (updateStatisticsStmt.updateColumns()) { - rowCount += updateStatisticsInternal(table.getPhysicalName(), table, updateStatisticsStmt.getProps()); + rowCount += updateStatisticsInternal(table.getPhysicalName(), table, updateStatisticsStmt.getProps(), true); } if (updateStatisticsStmt.updateIndex()) { // TODO: If our table is a VIEW with multiple indexes or a TABLE with local indexes, @@ -1096,25 +1095,50 @@ public class MetaDataClient { // across all indexes in that case so that we don't re-calculate the same stats // multiple times. for (PTable index : table.getIndexes()) { - rowCount += updateStatisticsInternal(index.getPhysicalName(), index, updateStatisticsStmt.getProps()); + // If the table is a view, then we will end up calling update stats + // here for all the view indexes on it. We take care of local indexes later. + if (index.getIndexType() != IndexType.LOCAL) { + rowCount += updateStatisticsInternal(table.getPhysicalName(), index, updateStatisticsStmt.getProps(), true); + } + } + /* + * Update stats for local indexes. This takes care of local indexes on the the table + * as well as local indexes on any views on it. + */ + PName physicalName = table.getPhysicalName(); + List<byte[]> localCFs = MetaDataUtil.getLocalIndexColumnFamilies(connection, physicalName.getBytes()); + if (!localCFs.isEmpty()) { + /* + * We need to pass checkLastStatsUpdateTime as false here. Local indexes are on the + * same table as the physical table. So when the user has requested to update stats + * for both table and indexes on it, we need to make sure that we don't re-check + * LAST_UPDATE_STATS time. If we don't do that then we will end up *not* collecting + * stats for local indexes which would be bad. + * + * Note, that this also means we don't have a way of controlling how often update + * stats can run for local indexes. Consider the case when the user calls UPDATE STATS TABLE + * followed by UPDATE STATS TABLE INDEX. When the second statement is being executed, + * this causes us to skip the check and execute stats collection possibly a bit too frequently. + */ + rowCount += updateStatisticsInternal(physicalName, table, updateStatisticsStmt.getProps(), localCFs, false); } // If analyzing the indexes of a multi-tenant table or a table with view indexes // then analyze all of those indexes too. if (table.getType() != PTableType.VIEW) { if (table.isMultiTenant() || MetaDataUtil.hasViewIndexTable(connection, table.getPhysicalName())) { - final PName physicalName = PNameFactory.newName(MetaDataUtil.getViewIndexPhysicalName(table.getPhysicalName().getBytes())); + final PName viewIndexPhysicalTableName = PNameFactory.newName(MetaDataUtil.getViewIndexPhysicalName(table.getPhysicalName().getBytes())); PTable indexLogicalTable = new DelegateTable(table) { @Override public PName getPhysicalName() { - return physicalName; + return viewIndexPhysicalTableName; } }; - rowCount += updateStatisticsInternal(physicalName, indexLogicalTable, updateStatisticsStmt.getProps()); - } - PName physicalName = table.getPhysicalName(); - List<byte[]> localCFs = MetaDataUtil.getLocalIndexColumnFamilies(connection, physicalName.getBytes()); - if (!localCFs.isEmpty()) { - rowCount += updateStatisticsInternal(physicalName, table, updateStatisticsStmt.getProps(), localCFs); + /* + * Note for future maintainers: local indexes whether on a table or on a view, + * reside on the same physical table as the base table and not the view index + * table. So below call is collecting stats only for non-local view indexes. + */ + rowCount += updateStatisticsInternal(viewIndexPhysicalTableName, indexLogicalTable, updateStatisticsStmt.getProps(), true); } } } @@ -1127,27 +1151,29 @@ public class MetaDataClient { }; } - private long updateStatisticsInternal(PName physicalName, PTable logicalTable, Map<String, Object> statsProps) throws SQLException { - return updateStatisticsInternal(physicalName, logicalTable, statsProps, null); + private long updateStatisticsInternal(PName physicalName, PTable logicalTable, Map<String, Object> statsProps, boolean checkLastStatsUpdateTime) throws SQLException { + return updateStatisticsInternal(physicalName, logicalTable, statsProps, null, checkLastStatsUpdateTime); } - private long updateStatisticsInternal(PName physicalName, PTable logicalTable, Map<String, Object> statsProps, List<byte[]> cfs) throws SQLException { + private long updateStatisticsInternal(PName physicalName, PTable logicalTable, Map<String, Object> statsProps, List<byte[]> cfs, boolean checkLastStatsUpdateTime) throws SQLException { ReadOnlyProps props = connection.getQueryServices().getProps(); final long msMinBetweenUpdates = props .getLong(QueryServices.MIN_STATS_UPDATE_FREQ_MS_ATTRIB, props.getLong(QueryServices.STATS_UPDATE_FREQ_MS_ATTRIB, QueryServicesOptions.DEFAULT_STATS_UPDATE_FREQ_MS) / 2); - byte[] tenantIdBytes = ByteUtil.EMPTY_BYTE_ARRAY; Long scn = connection.getSCN(); // Always invalidate the cache long clientTimeStamp = connection.getSCN() == null ? HConstants.LATEST_TIMESTAMP : scn; - String query = "SELECT CURRENT_DATE()," + LAST_STATS_UPDATE_TIME + " FROM " + PhoenixDatabaseMetaData.SYSTEM_STATS_NAME - + " WHERE " + PHYSICAL_NAME + "='" + physicalName.getString() + "' AND " + COLUMN_FAMILY - + " IS NULL AND " + LAST_STATS_UPDATE_TIME + " IS NOT NULL"; - ResultSet rs = connection.createStatement().executeQuery(query); long msSinceLastUpdate = Long.MAX_VALUE; - if (rs.next()) { - msSinceLastUpdate = rs.getLong(1) - rs.getLong(2); + if (checkLastStatsUpdateTime) { + String query = "SELECT CURRENT_DATE()," + LAST_STATS_UPDATE_TIME + " FROM " + PhoenixDatabaseMetaData.SYSTEM_STATS_NAME + + " WHERE " + PHYSICAL_NAME + "='" + physicalName.getString() + "' AND " + COLUMN_FAMILY + + " IS NULL AND " + LAST_STATS_UPDATE_TIME + " IS NOT NULL"; + ResultSet rs = connection.createStatement().executeQuery(query); + + if (rs.next()) { + msSinceLastUpdate = rs.getLong(1) - rs.getLong(2); + } } long rowCount = 0; if (msSinceLastUpdate >= msMinBetweenUpdates) { @@ -1976,14 +2002,14 @@ public class MetaDataClient { } } - boolean useStatsForParallelization = true; - Boolean useStatsForParallelizationProp = (Boolean) TableProperty.USE_STATS_FOR_PARALLELIZATION.getValue(tableProps); + boolean useStatsForParallelization = + connection.getQueryServices().getProps().getBoolean( + QueryServices.USE_STATS_FOR_PARALLELIZATION, + QueryServicesOptions.DEFAULT_USE_STATS_FOR_PARALLELIZATION); + Boolean useStatsForParallelizationProp = + (Boolean) TableProperty.USE_STATS_FOR_PARALLELIZATION.getValue(tableProps); if (useStatsForParallelizationProp != null) { useStatsForParallelization = useStatsForParallelizationProp; - } else { - useStatsForParallelization = connection.getQueryServices().getProps().getBoolean( - QueryServices.USE_STATS_FOR_PARALLELIZATION, - QueryServicesOptions.DEFAULT_USE_STATS_FOR_PARALLELIZATION); } boolean sharedTable = statement.getTableType() == PTableType.VIEW || allocateIndexId;