Author: gunther
Date: Thu Jan 30 00:24:28 2014
New Revision: 1562653
URL: http://svn.apache.org/r1562653
Log:
HIVE-6157: Fetching column stats slower than the 101 during rush hour (Sergey
Shelukhin via Gunther Hagleitner)
Added:
hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/PartitionsStatsRequest.java
hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/PartitionsStatsResult.java
hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/TableStatsRequest.java
hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/TableStatsResult.java
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
Modified:
hive/trunk/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
hive/trunk/metastore/if/hive_metastore.thrift
hive/trunk/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.cpp
hive/trunk/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.h
hive/trunk/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore_server.skeleton.cpp
hive/trunk/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
hive/trunk/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/AddPartitionsRequest.java
hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/AddPartitionsResult.java
hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ThriftHiveMetastore.java
hive/trunk/metastore/src/gen/thrift/gen-php/metastore/ThriftHiveMetastore.php
hive/trunk/metastore/src/gen/thrift/gen-php/metastore/Types.php
hive/trunk/metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote
hive/trunk/metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore.py
hive/trunk/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
hive/trunk/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
hive/trunk/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/VerifyingObjectStore.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out
Modified:
hive/trunk/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
URL:
http://svn.apache.org/viewvc/hive/trunk/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java?rev=1562653&r1=1562652&r2=1562653&view=diff
==============================================================================
---
hive/trunk/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
(original)
+++
hive/trunk/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
Thu Jan 30 00:24:28 2014
@@ -1437,34 +1437,24 @@ public abstract class TestHiveMetaStore
client.updateTableColumnStatistics(colStats);
// retrieve the stats obj that was just written
- ColumnStatistics colStats2 = client.getTableColumnStatistics(dbName,
tblName, colName[0]);
+ ColumnStatisticsObj colStats2 = client.getTableColumnStatistics(
+ dbName, tblName, Lists.newArrayList(colName[0])).get(0);
// compare stats obj to ensure what we get is what we wrote
assertNotNull(colStats2);
- assertEquals(colStats2.getStatsDesc().getDbName(), dbName);
- assertEquals(colStats2.getStatsDesc().getTableName(), tblName);
- assertEquals(colStats2.getStatsObj().get(0).getColName(), colName[0]);
-
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getLowValue(),
- lowValue);
-
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getHighValue(),
- highValue);
-
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getNumNulls(),
- numNulls);
-
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getNumDVs(),
- numDVs);
- assertEquals(colStats2.getStatsDesc().isIsTblLevel(), isTblLevel);
+ assertEquals(colStats2.getColName(), colName[0]);
+ assertEquals(colStats2.getStatsData().getDoubleStats().getLowValue(),
lowValue);
+ assertEquals(colStats2.getStatsData().getDoubleStats().getHighValue(),
highValue);
+ assertEquals(colStats2.getStatsData().getDoubleStats().getNumNulls(),
numNulls);
+ assertEquals(colStats2.getStatsData().getDoubleStats().getNumDVs(),
numDVs);
// test delete column stats; if no col name is passed all column stats
associated with the
// table is deleted
boolean status = client.deleteTableColumnStatistics(dbName, tblName,
null);
assertTrue(status);
// try to query stats for a column for which stats doesn't exist
- try {
- colStats2 = client.getTableColumnStatistics(dbName, tblName,
colName[1]);
- assertTrue(true);
- } catch (NoSuchObjectException e) {
- System.out.println("Statistics for column=" + colName[1] + " not
found");
- }
+ assertTrue(client.getTableColumnStatistics(
+ dbName, tblName, Lists.newArrayList(colName[1])).isEmpty());
colStats.setStatsDesc(statsDesc);
colStats.setStatsObj(statsObjs);
@@ -1473,7 +1463,8 @@ public abstract class TestHiveMetaStore
client.updateTableColumnStatistics(colStats);
// query column stats for column whose stats were updated in the
previous call
- colStats2 = client.getTableColumnStatistics(dbName, tblName, colName[0]);
+ colStats2 = client.getTableColumnStatistics(
+ dbName, tblName, Lists.newArrayList(colName[0])).get(0);
// partition level column statistics test
// create a table with multiple partitions
@@ -1505,37 +1496,27 @@ public abstract class TestHiveMetaStore
client.updatePartitionColumnStatistics(colStats);
- colStats2 = client.getPartitionColumnStatistics(dbName, tblName,
partName, colName[1]);
+ colStats2 = client.getPartitionColumnStatistics(dbName, tblName,
+ Lists.newArrayList(partName),
Lists.newArrayList(colName[1])).get(partName).get(0);
// compare stats obj to ensure what we get is what we wrote
assertNotNull(colStats2);
- assertEquals(colStats2.getStatsDesc().getDbName(), dbName);
- assertEquals(colStats2.getStatsDesc().getTableName(), tblName);
assertEquals(colStats.getStatsDesc().getPartName(), partName);
- assertEquals(colStats2.getStatsObj().get(0).getColName(), colName[1]);
-
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getMaxColLen(),
- maxColLen);
-
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getAvgColLen(),
- avgColLen);
-
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getNumNulls(),
- numNulls);
-
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getNumDVs(),
- numDVs);
- assertEquals(colStats2.getStatsDesc().isIsTblLevel(), isTblLevel);
+ assertEquals(colStats2.getColName(), colName[1]);
+ assertEquals(colStats2.getStatsData().getStringStats().getMaxColLen(),
maxColLen);
+ assertEquals(colStats2.getStatsData().getStringStats().getAvgColLen(),
avgColLen);
+ assertEquals(colStats2.getStatsData().getStringStats().getNumNulls(),
numNulls);
+ assertEquals(colStats2.getStatsData().getStringStats().getNumDVs(),
numDVs);
// test stats deletion at partition level
client.deletePartitionColumnStatistics(dbName, tblName, partName,
colName[1]);
- colStats2 = client.getPartitionColumnStatistics(dbName, tblName,
partName, colName[0]);
+ colStats2 = client.getPartitionColumnStatistics(dbName, tblName,
+ Lists.newArrayList(partName),
Lists.newArrayList(colName[0])).get(partName).get(0);
// test get stats on a column for which stats doesn't exist
- try {
- colStats2 = client.getPartitionColumnStatistics(dbName, tblName,
partName, colName[1]);
- assertTrue(true);
- } catch (NoSuchObjectException e) {
- System.out.println("Statistics for column=" + colName[1] + " not
found");
- }
-
+ assertTrue(client.getPartitionColumnStatistics(dbName, tblName,
+ Lists.newArrayList(partName),
Lists.newArrayList(colName[1])).isEmpty());
} catch (Exception e) {
System.err.println(StringUtils.stringifyException(e));
System.err.println("testColumnStatistics() failed.");
Modified: hive/trunk/metastore/if/hive_metastore.thrift
URL:
http://svn.apache.org/viewvc/hive/trunk/metastore/if/hive_metastore.thrift?rev=1562653&r1=1562652&r2=1562653&view=diff
==============================================================================
--- hive/trunk/metastore/if/hive_metastore.thrift (original)
+++ hive/trunk/metastore/if/hive_metastore.thrift Thu Jan 30 00:24:28 2014
@@ -67,7 +67,7 @@ const string HIVE_FILTER_FIELD_PARAMS =
const string HIVE_FILTER_FIELD_LAST_ACCESS = "hive_filter_field_last_access__"
enum PartitionEventType {
- LOAD_DONE = 1,
+ LOAD_DONE = 1,
}
struct HiveObjectRef{
@@ -286,6 +286,27 @@ struct PartitionsByExprRequest {
5: optional i16 maxParts=-1
}
+struct TableStatsResult {
+ 1: required list<ColumnStatisticsObj> tableStats
+}
+
+struct PartitionsStatsResult {
+ 1: required map<string, list<ColumnStatisticsObj>> partStats
+}
+
+struct TableStatsRequest {
+ 1: required string dbName,
+ 2: required string tblName,
+ 3: required list<string> colNames
+}
+
+struct PartitionsStatsRequest {
+ 1: required string dbName,
+ 2: required string tblName,
+ 3: required list<string> colNames,
+ 4: required list<string> partNames
+}
+
// Return type for add_partitions_req
struct AddPartitionsResult {
1: optional list<Partition> partitions,
@@ -359,7 +380,7 @@ service ThriftHiveMetastore extends fb30
list<string> get_databases(1:string pattern) throws(1:MetaException o1)
list<string> get_all_databases() throws(1:MetaException o1)
void alter_database(1:string dbname, 2:Database db) throws(1:MetaException
o1, 2:NoSuchObjectException o2)
-
+
// returns the type with given name (make seperate calls for the dependent
types if needed)
Type get_type(1:string name) throws(1:MetaException o1,
2:NoSuchObjectException o2)
bool create_type(1:Type type) throws(1:AlreadyExistsException o1,
2:InvalidObjectException o2, 3:MetaException o3)
@@ -446,7 +467,7 @@ service ThriftHiveMetastore extends fb30
throws (1:InvalidOperationException o1, 2:MetaException
o2)
void alter_table_with_environment_context(1:string dbname, 2:string tbl_name,
3:Table new_tbl, 4:EnvironmentContext environment_context)
- throws (1:InvalidOperationException o1, 2:MetaException o2)
+ throws (1:InvalidOperationException o1, 2:MetaException o2)
// the following applies to only tables that have partitions
// * See notes on DDL_TIME
Partition add_partition(1:Partition new_part)
@@ -486,7 +507,7 @@ service ThriftHiveMetastore extends fb30
throws(1:MetaException o1, 2:NoSuchObjectException o2,
3:InvalidObjectException o3,
4:InvalidInputException o4)
- Partition get_partition_with_auth(1:string db_name, 2:string tbl_name,
3:list<string> part_vals,
+ Partition get_partition_with_auth(1:string db_name, 2:string tbl_name,
3:list<string> part_vals,
4: string user_name, 5: list<string> group_names) throws(1:MetaException
o1, 2:NoSuchObjectException o2)
Partition get_partition_by_name(1:string db_name 2:string tbl_name, 3:string
part_name)
@@ -496,25 +517,25 @@ service ThriftHiveMetastore extends fb30
// If max parts is given then it will return only that many.
list<Partition> get_partitions(1:string db_name, 2:string tbl_name, 3:i16
max_parts=-1)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
- list<Partition> get_partitions_with_auth(1:string db_name, 2:string
tbl_name, 3:i16 max_parts=-1,
- 4: string user_name, 5: list<string> group_names)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
+ list<Partition> get_partitions_with_auth(1:string db_name, 2:string
tbl_name, 3:i16 max_parts=-1,
+ 4: string user_name, 5: list<string> group_names)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
list<string> get_partition_names(1:string db_name, 2:string tbl_name, 3:i16
max_parts=-1)
throws(1:MetaException o2)
-
- // get_partition*_ps methods allow filtering by a partial partition
specification,
- // as needed for dynamic partitions. The values that are not restricted
should
- // be empty strings. Nulls were considered (instead of "") but caused errors
in
+
+ // get_partition*_ps methods allow filtering by a partial partition
specification,
+ // as needed for dynamic partitions. The values that are not restricted
should
+ // be empty strings. Nulls were considered (instead of "") but caused errors
in
// generated Python code. The size of part_vals may be smaller than the
// number of partition columns - the unspecified values are considered the
same
// as "".
- list<Partition> get_partitions_ps(1:string db_name 2:string tbl_name
+ list<Partition> get_partitions_ps(1:string db_name 2:string tbl_name
3:list<string> part_vals, 4:i16 max_parts=-1)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
- list<Partition> get_partitions_ps_with_auth(1:string db_name, 2:string
tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1,
- 5: string user_name, 6: list<string> group_names)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
-
- list<string> get_partition_names_ps(1:string db_name,
+ list<Partition> get_partitions_ps_with_auth(1:string db_name, 2:string
tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1,
+ 5: string user_name, 6: list<string> group_names)
throws(1:NoSuchObjectException o1, 2:MetaException o2)
+
+ list<string> get_partition_names_ps(1:string db_name,
2:string tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1)
throws(1:MetaException o1, 2:NoSuchObjectException
o2)
@@ -538,8 +559,8 @@ service ThriftHiveMetastore extends fb30
// * See notes on DDL_TIME
void alter_partition(1:string db_name, 2:string tbl_name, 3:Partition
new_part)
throws (1:InvalidOperationException o1, 2:MetaException
o2)
-
- // change a list of partitions. All partitions are altered atomically and
all
+
+ // change a list of partitions. All partitions are altered atomically and all
// prehooks are fired together followed by all post hooks
void alter_partitions(1:string db_name, 2:string tbl_name, 3:list<Partition>
new_parts)
throws (1:InvalidOperationException o1, 2:MetaException
o2)
@@ -566,7 +587,7 @@ service ThriftHiveMetastore extends fb30
// thrown.
string get_config_value(1:string name, 2:string defaultValue)
throws(1:ConfigValSecurityException o1)
-
+
// converts a partition name into a partition values array
list<string> partition_name_to_vals(1: string part_name)
throws(1: MetaException o1)
@@ -574,23 +595,23 @@ service ThriftHiveMetastore extends fb30
// the partition cols to the values)
map<string, string> partition_name_to_spec(1: string part_name)
throws(1: MetaException o1)
-
+
void markPartitionForEvent(1:string db_name, 2:string tbl_name,
3:map<string,string> part_vals,
- 4:PartitionEventType eventType) throws (1: MetaException o1,
2: NoSuchObjectException o2,
+ 4:PartitionEventType eventType) throws (1: MetaException o1,
2: NoSuchObjectException o2,
3: UnknownDBException o3, 4: UnknownTableException o4, 5:
UnknownPartitionException o5,
- 6: InvalidPartitionException o6)
- bool isPartitionMarkedForEvent(1:string db_name, 2:string tbl_name,
3:map<string,string> part_vals,
+ 6: InvalidPartitionException o6)
+ bool isPartitionMarkedForEvent(1:string db_name, 2:string tbl_name,
3:map<string,string> part_vals,
4: PartitionEventType eventType) throws (1: MetaException
o1, 2:NoSuchObjectException o2,
3: UnknownDBException o3, 4: UnknownTableException o4, 5:
UnknownPartitionException o5,
- 6: InvalidPartitionException o6)
-
+ 6: InvalidPartitionException o6)
+
//index
Index add_index(1:Index new_index, 2: Table index_table)
throws(1:InvalidObjectException o1,
2:AlreadyExistsException o2, 3:MetaException o3)
void alter_index(1:string dbname, 2:string base_tbl_name, 3:string idx_name,
4:Index new_idx)
throws (1:InvalidOperationException o1, 2:MetaException
o2)
bool drop_index_by_name(1:string db_name, 2:string tbl_name, 3:string
index_name, 4:bool deleteData)
- throws(1:NoSuchObjectException o1, 2:MetaException o2)
+ throws(1:NoSuchObjectException o1, 2:MetaException o2)
Index get_index_by_name(1:string db_name 2:string tbl_name, 3:string
index_name)
throws(1:MetaException o1, 2:NoSuchObjectException o2)
@@ -619,6 +640,10 @@ service ThriftHiveMetastore extends fb30
ColumnStatistics get_partition_column_statistics(1:string db_name, 2:string
tbl_name, 3:string part_name,
4:string col_name) throws (1:NoSuchObjectException o1,
2:MetaException o2,
3:InvalidInputException o3, 4:InvalidObjectException o4)
+ TableStatsResult get_table_statistics_req(1:TableStatsRequest request) throws
+ (1:NoSuchObjectException o1, 2:MetaException o2)
+ PartitionsStatsResult get_partitions_statistics_req(1:PartitionsStatsRequest
request) throws
+ (1:NoSuchObjectException o1, 2:MetaException o2)
// delete APIs attempt to delete column statistics, if found, associated
with a given db_name, tbl_name, [part_name]
// and col_name. If the delete API doesn't find the statistics record in the
metastore, throws NoSuchObjectException
@@ -631,30 +656,30 @@ service ThriftHiveMetastore extends fb30
4:InvalidInputException o4)
//authorization privileges
-
+
bool create_role(1:Role role) throws(1:MetaException o1)
bool drop_role(1:string role_name) throws(1:MetaException o1)
list<string> get_role_names() throws(1:MetaException o1)
- bool grant_role(1:string role_name, 2:string principal_name, 3:PrincipalType
principal_type,
+ bool grant_role(1:string role_name, 2:string principal_name, 3:PrincipalType
principal_type,
4:string grantor, 5:PrincipalType grantorType, 6:bool grant_option)
throws(1:MetaException o1)
- bool revoke_role(1:string role_name, 2:string principal_name,
3:PrincipalType principal_type)
+ bool revoke_role(1:string role_name, 2:string principal_name,
3:PrincipalType principal_type)
throws(1:MetaException o1)
list<Role> list_roles(1:string principal_name, 2:PrincipalType
principal_type) throws(1:MetaException o1)
- PrincipalPrivilegeSet get_privilege_set(1:HiveObjectRef hiveObject, 2:string
user_name,
+ PrincipalPrivilegeSet get_privilege_set(1:HiveObjectRef hiveObject, 2:string
user_name,
3: list<string> group_names) throws(1:MetaException o1)
- list<HiveObjectPrivilege> list_privileges(1:string principal_name,
2:PrincipalType principal_type,
+ list<HiveObjectPrivilege> list_privileges(1:string principal_name,
2:PrincipalType principal_type,
3: HiveObjectRef hiveObject) throws(1:MetaException o1)
-
+
bool grant_privileges(1:PrivilegeBag privileges) throws(1:MetaException o1)
bool revoke_privileges(1:PrivilegeBag privileges) throws(1:MetaException o1)
-
+
// this is used by metastore client to send UGI information to metastore
server immediately
- // after setting up a connection.
+ // after setting up a connection.
list<string> set_ugi(1:string user_name, 2:list<string> group_names) throws
(1:MetaException o1)
//Authentication (delegation token) interfaces
-
+
// get metastore server delegation token for use from the map/reduce tasks
to authenticate
// to metastore server
string get_delegation_token(1:string token_owner, 2:string
renewer_kerberos_principal_name)