HIVE-18264: CachedStore: Store cached partitions/col stats within the table cache and make prewarm non-blocking (Vaibhav Gumashta reviewed by Daniel Dai, Alexander Kolbasov)
Project: http://git-wip-us.apache.org/repos/asf/hive/repo Commit: http://git-wip-us.apache.org/repos/asf/hive/commit/26c0ab6a Tree: http://git-wip-us.apache.org/repos/asf/hive/tree/26c0ab6a Diff: http://git-wip-us.apache.org/repos/asf/hive/diff/26c0ab6a Branch: refs/heads/master Commit: 26c0ab6adb48755ef2f5cff2ec9c4b0e9a431821 Parents: 79e8869 Author: Vaibhav Gumashta <vgumas...@hortonworks.com> Authored: Mon Mar 19 10:47:37 2018 -0700 Committer: Vaibhav Gumashta <vgumas...@hortonworks.com> Committed: Mon Mar 19 10:47:37 2018 -0700 ---------------------------------------------------------------------- .../listener/DummyRawStoreFailEvent.java | 9 +- .../apache/hive/service/server/HiveServer2.java | 6 +- .../hadoop/hive/metastore/HiveMetaStore.java | 4 - .../hadoop/hive/metastore/ObjectStore.java | 30 - .../apache/hadoop/hive/metastore/RawStore.java | 11 - .../hadoop/hive/metastore/cache/CacheUtils.java | 85 +- .../hive/metastore/cache/CachedStore.java | 1552 +++++------------ .../hive/metastore/cache/SharedCache.java | 1588 +++++++++++++----- .../hive/metastore/utils/MetaStoreUtils.java | 11 +- .../DummyRawStoreControlledCommit.java | 7 - .../DummyRawStoreForJdoConnection.java | 7 - .../hive/metastore/cache/TestCachedStore.java | 546 +++--- .../src/test/resources/log4j2.properties | 74 +- 13 files changed, 2043 insertions(+), 1887 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/hive/blob/26c0ab6a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java ---------------------------------------------------------------------- diff --git a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java index 6144b61..e2244a1 100644 --- a/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java +++ b/itests/hcatalog-unit/src/test/java/org/apache/hive/hcatalog/listener/DummyRawStoreFailEvent.java @@ -976,7 +976,7 @@ public class DummyRawStoreFailEvent implements RawStore, Configurable { public List<WMResourcePlan> getAllResourcePlans() throws MetaException { return objectStore.getAllResourcePlans(); } - + @Override public WMFullResourcePlan alterResourcePlan(String name, WMNullableResourcePlan resourcePlan, boolean canActivateDisabled, boolean canDeactivate, boolean isReplace) @@ -1069,13 +1069,6 @@ public class DummyRawStoreFailEvent implements RawStore, Configurable { objectStore.dropWMTriggerToPoolMapping(resourcePlanName, triggerName, poolPath); } - @Override - public List<ColStatsObjWithSourceInfo> getPartitionColStatsForDatabase(String dbName) - throws MetaException, NoSuchObjectException { - // TODO Auto-generated method stub - return null; - } - public void createISchema(ISchema schema) throws AlreadyExistsException, MetaException, NoSuchObjectException { objectStore.createISchema(schema); http://git-wip-us.apache.org/repos/asf/hive/blob/26c0ab6a/service/src/java/org/apache/hive/service/server/HiveServer2.java ---------------------------------------------------------------------- diff --git a/service/src/java/org/apache/hive/service/server/HiveServer2.java b/service/src/java/org/apache/hive/service/server/HiveServer2.java index 5b792ac..bb92c44 100644 --- a/service/src/java/org/apache/hive/service/server/HiveServer2.java +++ b/service/src/java/org/apache/hive/service/server/HiveServer2.java @@ -64,7 +64,6 @@ import org.apache.hadoop.hive.llap.registry.impl.LlapRegistryService; import org.apache.hadoop.hive.metastore.api.WMFullResourcePlan; import org.apache.hadoop.hive.metastore.api.WMPool; import org.apache.hadoop.hive.metastore.api.WMResourcePlan; -import org.apache.hadoop.hive.metastore.cache.CachedStore; import org.apache.hadoop.hive.ql.cache.results.QueryResultsCache; import org.apache.hadoop.hive.ql.exec.spark.session.SparkSessionManagerImpl; import org.apache.hadoop.hive.ql.exec.tez.TezSessionPoolManager; @@ -163,9 +162,6 @@ public class HiveServer2 extends CompositeService { LOG.warn("Could not initiate the HiveServer2 Metrics system. Metrics may not be reported.", t); } - // Initialize cachedstore with background prewarm. The prewarm will only start if configured. - CachedStore.initSharedCacheAsync(hiveConf); - cliService = new CLIService(this); addService(cliService); final HiveServer2 hiveServer2 = this; @@ -570,7 +566,7 @@ public class HiveServer2 extends CompositeService { private void removeServerInstanceFromZooKeeper() throws Exception { setDeregisteredWithZooKeeper(true); - + if (znode != null) { znode.close(); } http://git-wip-us.apache.org/repos/asf/hive/blob/26c0ab6a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java index 66353e7..5285570 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java @@ -73,7 +73,6 @@ import org.apache.hadoop.fs.Path; import org.apache.hadoop.hive.common.StatsSetupConst; import org.apache.hadoop.hive.metastore.api.*; import org.apache.hadoop.hive.metastore.events.AddForeignKeyEvent; -import org.apache.hadoop.hive.metastore.cache.CachedStore; import org.apache.hadoop.hive.metastore.conf.MetastoreConf; import org.apache.hadoop.hive.metastore.conf.MetastoreConf.ConfVars; import org.apache.hadoop.hive.metastore.events.AddNotNullConstraintEvent; @@ -7962,9 +7961,6 @@ public class HiveMetaStore extends ThriftHiveMetastore { ThreadPool.shutdown(); }, 10); - // This will only initialize the cache if configured. - CachedStore.initSharedCacheAsync(conf); - //Start Metrics for Standalone (Remote) Mode if (MetastoreConf.getBoolVar(conf, ConfVars.METRICS_ENABLED)) { try { http://git-wip-us.apache.org/repos/asf/hive/blob/26c0ab6a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java index 1f75105..88d88ed 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/ObjectStore.java @@ -201,7 +201,6 @@ import org.apache.hadoop.hive.metastore.tools.SQLGenerator; import org.apache.hadoop.hive.metastore.utils.FileUtils; import org.apache.hadoop.hive.metastore.utils.JavaUtils; import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils; -import org.apache.hadoop.hive.metastore.utils.MetaStoreUtils.ColStatsObjWithSourceInfo; import org.apache.hadoop.hive.metastore.utils.ObjectPair; import org.apache.thrift.TException; import org.datanucleus.AbstractNucleusContext; @@ -7906,35 +7905,6 @@ public class ObjectStore implements RawStore, Configurable { } @Override - public List<ColStatsObjWithSourceInfo> getPartitionColStatsForDatabase(String dbName) - throws MetaException, NoSuchObjectException { - final boolean enableBitVector = - MetastoreConf.getBoolVar(getConf(), ConfVars.STATS_FETCH_BITVECTOR); - return new GetHelper<List<ColStatsObjWithSourceInfo>>(dbName, null, true, false) { - @Override - protected List<ColStatsObjWithSourceInfo> getSqlResult( - GetHelper<List<ColStatsObjWithSourceInfo>> ctx) throws MetaException { - return directSql.getColStatsForAllTablePartitions(dbName, enableBitVector); - } - - @Override - protected List<ColStatsObjWithSourceInfo> getJdoResult( - GetHelper<List<ColStatsObjWithSourceInfo>> ctx) - throws MetaException, NoSuchObjectException { - // This is fast path for query optimizations, if we can find this info - // quickly using directSql, do it. No point in failing back to slow path - // here. - throw new MetaException("Jdo path is not implemented for getPartitionColStatsForDatabase."); - } - - @Override - protected String describeResult() { - return null; - } - }.run(true); - } - - @Override public void flushCache() { // NOP as there's no caching } http://git-wip-us.apache.org/repos/asf/hive/blob/26c0ab6a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java index b079f8b..ad4af1a 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/RawStore.java @@ -596,17 +596,6 @@ public interface RawStore extends Configurable { List<String> partNames, List<String> colNames) throws MetaException, NoSuchObjectException; /** - * Get column stats for all partitions of all tables in the database - * - * @param dbName - * @return List of column stats objects for all partitions of all tables in the database - * @throws MetaException - * @throws NoSuchObjectException - */ - List<ColStatsObjWithSourceInfo> getPartitionColStatsForDatabase(String dbName) - throws MetaException, NoSuchObjectException; - - /** * Get the next notification event. * @param rqst Request containing information on the last processed notification. * @return list of notifications, sorted by eventId http://git-wip-us.apache.org/repos/asf/hive/blob/26c0ab6a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CacheUtils.java ---------------------------------------------------------------------- diff --git a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CacheUtils.java b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CacheUtils.java index f0f650d..97d8af6 100644 --- a/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CacheUtils.java +++ b/standalone-metastore/src/main/java/org/apache/hadoop/hive/metastore/cache/CacheUtils.java @@ -17,78 +17,57 @@ */ package org.apache.hadoop.hive.metastore.cache; -import java.util.Arrays; import java.util.Collections; import java.util.List; import java.util.regex.Pattern; -import org.apache.commons.collections.CollectionUtils; import org.apache.hadoop.hive.metastore.api.Partition; import org.apache.hadoop.hive.metastore.api.SkewedInfo; import org.apache.hadoop.hive.metastore.api.StorageDescriptor; import org.apache.hadoop.hive.metastore.api.Table; -import org.apache.hadoop.hive.metastore.cache.CachedStore.PartitionWrapper; -import org.apache.hadoop.hive.metastore.cache.CachedStore.TableWrapper; +import org.apache.hadoop.hive.metastore.cache.SharedCache.PartitionWrapper; +import org.apache.hadoop.hive.metastore.cache.SharedCache.TableWrapper; import org.apache.hadoop.hive.metastore.utils.StringUtils; public class CacheUtils { private static final String delimit = "\u0001"; - public static String buildKey(String dbName) { - return dbName; - } - - public static String buildKeyWithDelimit(String dbName) { - return buildKey(dbName) + delimit; - } - - public static String buildKey(String dbName, String tableName) { + /** + * Builds a key for the table cache which is concatenation of database name and table name + * separated by a delimiter + * + * @param dbName + * @param tableName + * @return + */ + public static String buildTableCacheKey(String dbName, String tableName) { return dbName + delimit + tableName; } - public static String buildKeyWithDelimit(String dbName, String tableName) { - return buildKey(dbName, tableName) + delimit; - } - - public static String buildKey(String dbName, String tableName, List<String> partVals) { - String key = buildKey(dbName, tableName); - if (CollectionUtils.isNotEmpty(partVals)) { - key += delimit; - key += String.join(delimit, partVals); + /** + * Builds a key for the partition cache which is concatenation of partition values, each value + * separated by a delimiter + * + * @param list of partition values + * @return cache key for partitions cache + */ + public static String buildPartitionCacheKey(List<String> partVals) { + if (partVals == null || partVals.isEmpty()) { + return ""; } - return key; - } - - public static String buildKeyWithDelimit(String dbName, String tableName, List<String> partVals) { - return buildKey(dbName, tableName, partVals) + delimit; - } - - public static String buildKey(String dbName, String tableName, List<String> partVals, String colName) { - String key = buildKey(dbName, tableName, partVals); - return key + delimit + colName; - } - - public static String buildKey(String dbName, String tableName, String colName) { - String key = buildKey(dbName, tableName); - return key + delimit + colName; - } - - public static String[] splitTableColStats(String key) { - return key.split(delimit); - } - - public static Object[] splitPartitionColStats(String key) { - Object[] result = new Object[4]; - String[] comps = key.split(delimit); - result[0] = comps[0]; - result[1] = comps[1]; - result[2] = Arrays.asList((Arrays.copyOfRange(comps, 2, comps.length - 1))); - result[3] = comps[comps.length-1]; - return result; + return String.join(delimit, partVals); } - public static Object[] splitAggrColStats(String key) { - return key.split(delimit); + /** + * Builds a key for the partitions column cache which is concatenation of partition values, each + * value separated by a delimiter and the column name + * + * @param list of partition values + * @param column name + * @return cache key for partitions column stats cache + */ + public static String buildPartitonColStatsCacheKey(List<String> partVals, String colName) { + return buildPartitionCacheKey(partVals) + delimit + colName; } static Table assemble(TableWrapper wrapper, SharedCache sharedCache) {