Author: gunther
Date: Thu Jan 30 00:24:28 2014
New Revision: 1562653

URL: http://svn.apache.org/r1562653
Log:
HIVE-6157: Fetching column stats slower than the 101 during rush hour (Sergey 
Shelukhin via Gunther Hagleitner)

Added:
    
hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/PartitionsStatsRequest.java
    
hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/PartitionsStatsResult.java
    
hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/TableStatsRequest.java
    
hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/TableStatsResult.java
    
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/StatObjectConverter.java
Modified:
    
hive/trunk/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
    hive/trunk/metastore/if/hive_metastore.thrift
    hive/trunk/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.cpp
    hive/trunk/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore.h
    
hive/trunk/metastore/src/gen/thrift/gen-cpp/ThriftHiveMetastore_server.skeleton.cpp
    hive/trunk/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.cpp
    hive/trunk/metastore/src/gen/thrift/gen-cpp/hive_metastore_types.h
    
hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/AddPartitionsRequest.java
    
hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/AddPartitionsResult.java
    
hive/trunk/metastore/src/gen/thrift/gen-javabean/org/apache/hadoop/hive/metastore/api/ThriftHiveMetastore.java
    
hive/trunk/metastore/src/gen/thrift/gen-php/metastore/ThriftHiveMetastore.php
    hive/trunk/metastore/src/gen/thrift/gen-php/metastore/Types.php
    
hive/trunk/metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore-remote
    
hive/trunk/metastore/src/gen/thrift/gen-py/hive_metastore/ThriftHiveMetastore.py
    hive/trunk/metastore/src/gen/thrift/gen-py/hive_metastore/ttypes.py
    hive/trunk/metastore/src/gen/thrift/gen-rb/hive_metastore_types.rb
    hive/trunk/metastore/src/gen/thrift/gen-rb/thrift_hive_metastore.rb
    
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStore.java
    
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/HiveMetaStoreClient.java
    
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/IMetaStoreClient.java
    
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/MetaStoreDirectSql.java
    
hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/ObjectStore.java
    hive/trunk/metastore/src/java/org/apache/hadoop/hive/metastore/RawStore.java
    
hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreControlledCommit.java
    
hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/DummyRawStoreForJdoConnection.java
    
hive/trunk/metastore/src/test/org/apache/hadoop/hive/metastore/VerifyingObjectStore.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/metadata/Hive.java
    
hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/optimizer/StatsOptimizer.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
    hive/trunk/ql/src/java/org/apache/hadoop/hive/ql/stats/StatsUtils.java
    hive/trunk/ql/src/test/results/clientpositive/metadataonly1.q.out

Modified: 
hive/trunk/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
URL: 
http://svn.apache.org/viewvc/hive/trunk/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java?rev=1562653&r1=1562652&r2=1562653&view=diff
==============================================================================
--- 
hive/trunk/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
 (original)
+++ 
hive/trunk/itests/hive-unit/src/test/java/org/apache/hadoop/hive/metastore/TestHiveMetaStore.java
 Thu Jan 30 00:24:28 2014
@@ -1437,34 +1437,24 @@ public abstract class TestHiveMetaStore 
       client.updateTableColumnStatistics(colStats);
 
       // retrieve the stats obj that was just written
-      ColumnStatistics colStats2 = client.getTableColumnStatistics(dbName, 
tblName, colName[0]);
+      ColumnStatisticsObj colStats2 = client.getTableColumnStatistics(
+          dbName, tblName, Lists.newArrayList(colName[0])).get(0);
 
      // compare stats obj to ensure what we get is what we wrote
       assertNotNull(colStats2);
-      assertEquals(colStats2.getStatsDesc().getDbName(), dbName);
-      assertEquals(colStats2.getStatsDesc().getTableName(), tblName);
-      assertEquals(colStats2.getStatsObj().get(0).getColName(), colName[0]);
-      
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getLowValue(),
-        lowValue);
-      
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getHighValue(),
-        highValue);
-      
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getNumNulls(),
-        numNulls);
-      
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getDoubleStats().getNumDVs(),
-        numDVs);
-      assertEquals(colStats2.getStatsDesc().isIsTblLevel(), isTblLevel);
+      assertEquals(colStats2.getColName(), colName[0]);
+      assertEquals(colStats2.getStatsData().getDoubleStats().getLowValue(), 
lowValue);
+      assertEquals(colStats2.getStatsData().getDoubleStats().getHighValue(), 
highValue);
+      assertEquals(colStats2.getStatsData().getDoubleStats().getNumNulls(), 
numNulls);
+      assertEquals(colStats2.getStatsData().getDoubleStats().getNumDVs(), 
numDVs);
 
       // test delete column stats; if no col name is passed all column stats 
associated with the
       // table is deleted
       boolean status = client.deleteTableColumnStatistics(dbName, tblName, 
null);
       assertTrue(status);
       // try to query stats for a column for which stats doesn't exist
-      try {
-        colStats2 = client.getTableColumnStatistics(dbName, tblName, 
colName[1]);
-        assertTrue(true);
-      } catch (NoSuchObjectException e) {
-        System.out.println("Statistics for column=" + colName[1] + " not 
found");
-      }
+      assertTrue(client.getTableColumnStatistics(
+          dbName, tblName, Lists.newArrayList(colName[1])).isEmpty());
 
       colStats.setStatsDesc(statsDesc);
       colStats.setStatsObj(statsObjs);
@@ -1473,7 +1463,8 @@ public abstract class TestHiveMetaStore 
       client.updateTableColumnStatistics(colStats);
 
       // query column stats for column whose stats were updated in the 
previous call
-      colStats2 = client.getTableColumnStatistics(dbName, tblName, colName[0]);
+      colStats2 = client.getTableColumnStatistics(
+          dbName, tblName, Lists.newArrayList(colName[0])).get(0);
 
       // partition level column statistics test
       // create a table with multiple partitions
@@ -1505,37 +1496,27 @@ public abstract class TestHiveMetaStore 
 
      client.updatePartitionColumnStatistics(colStats);
 
-     colStats2 = client.getPartitionColumnStatistics(dbName, tblName, 
partName, colName[1]);
+     colStats2 = client.getPartitionColumnStatistics(dbName, tblName,
+         Lists.newArrayList(partName), 
Lists.newArrayList(colName[1])).get(partName).get(0);
 
      // compare stats obj to ensure what we get is what we wrote
      assertNotNull(colStats2);
-     assertEquals(colStats2.getStatsDesc().getDbName(), dbName);
-     assertEquals(colStats2.getStatsDesc().getTableName(), tblName);
      assertEquals(colStats.getStatsDesc().getPartName(), partName);
-     assertEquals(colStats2.getStatsObj().get(0).getColName(), colName[1]);
-     
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getMaxColLen(),
-       maxColLen);
-     
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getAvgColLen(),
-       avgColLen);
-     
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getNumNulls(),
-       numNulls);
-     
assertEquals(colStats2.getStatsObj().get(0).getStatsData().getStringStats().getNumDVs(),
-       numDVs);
-     assertEquals(colStats2.getStatsDesc().isIsTblLevel(), isTblLevel);
+     assertEquals(colStats2.getColName(), colName[1]);
+     assertEquals(colStats2.getStatsData().getStringStats().getMaxColLen(), 
maxColLen);
+     assertEquals(colStats2.getStatsData().getStringStats().getAvgColLen(), 
avgColLen);
+     assertEquals(colStats2.getStatsData().getStringStats().getNumNulls(), 
numNulls);
+     assertEquals(colStats2.getStatsData().getStringStats().getNumDVs(), 
numDVs);
 
      // test stats deletion at partition level
      client.deletePartitionColumnStatistics(dbName, tblName, partName, 
colName[1]);
 
-     colStats2 = client.getPartitionColumnStatistics(dbName, tblName, 
partName, colName[0]);
+     colStats2 = client.getPartitionColumnStatistics(dbName, tblName,
+         Lists.newArrayList(partName), 
Lists.newArrayList(colName[0])).get(partName).get(0);
 
      // test get stats on a column for which stats doesn't exist
-     try {
-       colStats2 = client.getPartitionColumnStatistics(dbName, tblName, 
partName, colName[1]);
-       assertTrue(true);
-     } catch (NoSuchObjectException e) {
-       System.out.println("Statistics for column=" + colName[1] + " not 
found");
-     }
-
+     assertTrue(client.getPartitionColumnStatistics(dbName, tblName,
+           Lists.newArrayList(partName), 
Lists.newArrayList(colName[1])).isEmpty());
     } catch (Exception e) {
       System.err.println(StringUtils.stringifyException(e));
       System.err.println("testColumnStatistics() failed.");

Modified: hive/trunk/metastore/if/hive_metastore.thrift
URL: 
http://svn.apache.org/viewvc/hive/trunk/metastore/if/hive_metastore.thrift?rev=1562653&r1=1562652&r2=1562653&view=diff
==============================================================================
--- hive/trunk/metastore/if/hive_metastore.thrift (original)
+++ hive/trunk/metastore/if/hive_metastore.thrift Thu Jan 30 00:24:28 2014
@@ -67,7 +67,7 @@ const string HIVE_FILTER_FIELD_PARAMS = 
 const string HIVE_FILTER_FIELD_LAST_ACCESS = "hive_filter_field_last_access__"
 
 enum PartitionEventType {
-  LOAD_DONE = 1,  
+  LOAD_DONE = 1,
 }
 
 struct HiveObjectRef{
@@ -286,6 +286,27 @@ struct PartitionsByExprRequest {
   5: optional i16 maxParts=-1
 }
 
+struct TableStatsResult {
+  1: required list<ColumnStatisticsObj> tableStats
+}
+
+struct PartitionsStatsResult {
+  1: required map<string, list<ColumnStatisticsObj>> partStats
+}
+
+struct TableStatsRequest {
+ 1: required string dbName,
+ 2: required string tblName,
+ 3: required list<string> colNames
+}
+
+struct PartitionsStatsRequest {
+ 1: required string dbName,
+ 2: required string tblName,
+ 3: required list<string> colNames,
+ 4: required list<string> partNames
+}
+
 // Return type for add_partitions_req
 struct AddPartitionsResult {
   1: optional list<Partition> partitions,
@@ -359,7 +380,7 @@ service ThriftHiveMetastore extends fb30
   list<string> get_databases(1:string pattern) throws(1:MetaException o1)
   list<string> get_all_databases() throws(1:MetaException o1)
   void alter_database(1:string dbname, 2:Database db) throws(1:MetaException 
o1, 2:NoSuchObjectException o2)
-  
+
   // returns the type with given name (make seperate calls for the dependent 
types if needed)
   Type get_type(1:string name)  throws(1:MetaException o1, 
2:NoSuchObjectException o2)
   bool create_type(1:Type type) throws(1:AlreadyExistsException o1, 
2:InvalidObjectException o2, 3:MetaException o3)
@@ -446,7 +467,7 @@ service ThriftHiveMetastore extends fb30
                        throws (1:InvalidOperationException o1, 2:MetaException 
o2)
   void alter_table_with_environment_context(1:string dbname, 2:string tbl_name,
       3:Table new_tbl, 4:EnvironmentContext environment_context)
-      throws (1:InvalidOperationException o1, 2:MetaException o2) 
+      throws (1:InvalidOperationException o1, 2:MetaException o2)
   // the following applies to only tables that have partitions
   // * See notes on DDL_TIME
   Partition add_partition(1:Partition new_part)
@@ -486,7 +507,7 @@ service ThriftHiveMetastore extends fb30
       throws(1:MetaException o1, 2:NoSuchObjectException o2, 
3:InvalidObjectException o3,
       4:InvalidInputException o4)
 
-  Partition get_partition_with_auth(1:string db_name, 2:string tbl_name, 
3:list<string> part_vals, 
+  Partition get_partition_with_auth(1:string db_name, 2:string tbl_name, 
3:list<string> part_vals,
       4: string user_name, 5: list<string> group_names) throws(1:MetaException 
o1, 2:NoSuchObjectException o2)
 
   Partition get_partition_by_name(1:string db_name 2:string tbl_name, 3:string 
part_name)
@@ -496,25 +517,25 @@ service ThriftHiveMetastore extends fb30
   // If max parts is given then it will return only that many.
   list<Partition> get_partitions(1:string db_name, 2:string tbl_name, 3:i16 
max_parts=-1)
                        throws(1:NoSuchObjectException o1, 2:MetaException o2)
-  list<Partition> get_partitions_with_auth(1:string db_name, 2:string 
tbl_name, 3:i16 max_parts=-1, 
-     4: string user_name, 5: list<string> group_names) 
throws(1:NoSuchObjectException o1, 2:MetaException o2)                       
+  list<Partition> get_partitions_with_auth(1:string db_name, 2:string 
tbl_name, 3:i16 max_parts=-1,
+     4: string user_name, 5: list<string> group_names) 
throws(1:NoSuchObjectException o1, 2:MetaException o2)
 
   list<string> get_partition_names(1:string db_name, 2:string tbl_name, 3:i16 
max_parts=-1)
                        throws(1:MetaException o2)
-                       
-  // get_partition*_ps methods allow filtering by a partial partition 
specification, 
-  // as needed for dynamic partitions. The values that are not restricted 
should 
-  // be empty strings. Nulls were considered (instead of "") but caused errors 
in 
+
+  // get_partition*_ps methods allow filtering by a partial partition 
specification,
+  // as needed for dynamic partitions. The values that are not restricted 
should
+  // be empty strings. Nulls were considered (instead of "") but caused errors 
in
   // generated Python code. The size of part_vals may be smaller than the
   // number of partition columns - the unspecified values are considered the 
same
   // as "".
-  list<Partition> get_partitions_ps(1:string db_name 2:string tbl_name 
+  list<Partition> get_partitions_ps(1:string db_name 2:string tbl_name
        3:list<string> part_vals, 4:i16 max_parts=-1)
                        throws(1:MetaException o1, 2:NoSuchObjectException o2)
-  list<Partition> get_partitions_ps_with_auth(1:string db_name, 2:string 
tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1, 
-     5: string user_name, 6: list<string> group_names) 
throws(1:NoSuchObjectException o1, 2:MetaException o2)                       
-  
-  list<string> get_partition_names_ps(1:string db_name, 
+  list<Partition> get_partitions_ps_with_auth(1:string db_name, 2:string 
tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1,
+     5: string user_name, 6: list<string> group_names) 
throws(1:NoSuchObjectException o1, 2:MetaException o2)
+
+  list<string> get_partition_names_ps(1:string db_name,
        2:string tbl_name, 3:list<string> part_vals, 4:i16 max_parts=-1)
                           throws(1:MetaException o1, 2:NoSuchObjectException 
o2)
 
@@ -538,8 +559,8 @@ service ThriftHiveMetastore extends fb30
   // * See notes on DDL_TIME
   void alter_partition(1:string db_name, 2:string tbl_name, 3:Partition 
new_part)
                        throws (1:InvalidOperationException o1, 2:MetaException 
o2)
-                       
-  // change a list of partitions. All partitions are altered atomically and 
all 
+
+  // change a list of partitions. All partitions are altered atomically and all
   // prehooks are fired together followed by all post hooks
   void alter_partitions(1:string db_name, 2:string tbl_name, 3:list<Partition> 
new_parts)
                        throws (1:InvalidOperationException o1, 2:MetaException 
o2)
@@ -566,7 +587,7 @@ service ThriftHiveMetastore extends fb30
   // thrown.
   string get_config_value(1:string name, 2:string defaultValue)
                           throws(1:ConfigValSecurityException o1)
-                          
+
   // converts a partition name into a partition values array
   list<string> partition_name_to_vals(1: string part_name)
                           throws(1: MetaException o1)
@@ -574,23 +595,23 @@ service ThriftHiveMetastore extends fb30
   // the partition cols to the values)
   map<string, string> partition_name_to_spec(1: string part_name)
                           throws(1: MetaException o1)
-  
+
   void markPartitionForEvent(1:string db_name, 2:string tbl_name, 
3:map<string,string> part_vals,
-                  4:PartitionEventType eventType) throws (1: MetaException o1, 
2: NoSuchObjectException o2, 
+                  4:PartitionEventType eventType) throws (1: MetaException o1, 
2: NoSuchObjectException o2,
                   3: UnknownDBException o3, 4: UnknownTableException o4, 5: 
UnknownPartitionException o5,
-                  6: InvalidPartitionException o6) 
-  bool isPartitionMarkedForEvent(1:string db_name, 2:string tbl_name, 
3:map<string,string> part_vals, 
+                  6: InvalidPartitionException o6)
+  bool isPartitionMarkedForEvent(1:string db_name, 2:string tbl_name, 
3:map<string,string> part_vals,
                   4: PartitionEventType eventType) throws (1: MetaException 
o1, 2:NoSuchObjectException o2,
                   3: UnknownDBException o3, 4: UnknownTableException o4, 5: 
UnknownPartitionException o5,
-                  6: InvalidPartitionException o6) 
-                         
+                  6: InvalidPartitionException o6)
+
   //index
   Index add_index(1:Index new_index, 2: Table index_table)
                        throws(1:InvalidObjectException o1, 
2:AlreadyExistsException o2, 3:MetaException o3)
   void alter_index(1:string dbname, 2:string base_tbl_name, 3:string idx_name, 
4:Index new_idx)
                        throws (1:InvalidOperationException o1, 2:MetaException 
o2)
   bool drop_index_by_name(1:string db_name, 2:string tbl_name, 3:string 
index_name, 4:bool deleteData)
-                       throws(1:NoSuchObjectException o1, 2:MetaException o2) 
+                       throws(1:NoSuchObjectException o1, 2:MetaException o2)
   Index get_index_by_name(1:string db_name 2:string tbl_name, 3:string 
index_name)
                        throws(1:MetaException o1, 2:NoSuchObjectException o2)
 
@@ -619,6 +640,10 @@ service ThriftHiveMetastore extends fb30
   ColumnStatistics get_partition_column_statistics(1:string db_name, 2:string 
tbl_name, 3:string part_name,
                4:string col_name) throws (1:NoSuchObjectException o1, 
2:MetaException o2,
                3:InvalidInputException o3, 4:InvalidObjectException o4)
+  TableStatsResult get_table_statistics_req(1:TableStatsRequest request) throws
+              (1:NoSuchObjectException o1, 2:MetaException o2)
+  PartitionsStatsResult get_partitions_statistics_req(1:PartitionsStatsRequest 
request) throws
+              (1:NoSuchObjectException o1, 2:MetaException o2)
 
   // delete APIs attempt to delete column statistics, if found, associated 
with a given db_name, tbl_name, [part_name]
   // and col_name. If the delete API doesn't find the statistics record in the 
metastore, throws NoSuchObjectException
@@ -631,30 +656,30 @@ service ThriftHiveMetastore extends fb30
                4:InvalidInputException o4)
 
   //authorization privileges
-                       
+
   bool create_role(1:Role role) throws(1:MetaException o1)
   bool drop_role(1:string role_name) throws(1:MetaException o1)
   list<string> get_role_names() throws(1:MetaException o1)
-  bool grant_role(1:string role_name, 2:string principal_name, 3:PrincipalType 
principal_type, 
+  bool grant_role(1:string role_name, 2:string principal_name, 3:PrincipalType 
principal_type,
     4:string grantor, 5:PrincipalType grantorType, 6:bool grant_option) 
throws(1:MetaException o1)
-  bool revoke_role(1:string role_name, 2:string principal_name, 
3:PrincipalType principal_type) 
+  bool revoke_role(1:string role_name, 2:string principal_name, 
3:PrincipalType principal_type)
                         throws(1:MetaException o1)
   list<Role> list_roles(1:string principal_name, 2:PrincipalType 
principal_type) throws(1:MetaException o1)
 
-  PrincipalPrivilegeSet get_privilege_set(1:HiveObjectRef hiveObject, 2:string 
user_name, 
+  PrincipalPrivilegeSet get_privilege_set(1:HiveObjectRef hiveObject, 2:string 
user_name,
     3: list<string> group_names) throws(1:MetaException o1)
-  list<HiveObjectPrivilege> list_privileges(1:string principal_name, 
2:PrincipalType principal_type, 
+  list<HiveObjectPrivilege> list_privileges(1:string principal_name, 
2:PrincipalType principal_type,
     3: HiveObjectRef hiveObject) throws(1:MetaException o1)
-  
+
   bool grant_privileges(1:PrivilegeBag privileges) throws(1:MetaException o1)
   bool revoke_privileges(1:PrivilegeBag privileges) throws(1:MetaException o1)
-  
+
   // this is used by metastore client to send UGI information to metastore 
server immediately
-  // after setting up a connection. 
+  // after setting up a connection.
   list<string> set_ugi(1:string user_name, 2:list<string> group_names) throws 
(1:MetaException o1)
 
   //Authentication (delegation token) interfaces
-  
+
   // get metastore server delegation token for use from the map/reduce tasks 
to authenticate
   // to metastore server
   string get_delegation_token(1:string token_owner, 2:string 
renewer_kerberos_principal_name)


Reply via email to