soumyakanti3578 commented on code in PR #5721:
URL: https://github.com/apache/hive/pull/5721#discussion_r2034131783


##########
ql/src/test/results/clientpositive/llap/drop_histogram_stats_for_columns.q.out:
##########
@@ -0,0 +1,307 @@
+PREHOOK: query: CREATE TABLE test_stats (a string, b int, c double) STORED AS 
ORC
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@test_stats
+POSTHOOK: query: CREATE TABLE test_stats (a string, b int, c double) STORED AS 
ORC
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@test_stats
+PREHOOK: query: insert into test_stats (a, b, c) values ("a", 2, 1.1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_stats
+POSTHOOK: query: insert into test_stats (a, b, c) values ("a", 2, 1.1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_stats
+POSTHOOK: Lineage: test_stats.a SCRIPT []
+POSTHOOK: Lineage: test_stats.b SCRIPT []
+POSTHOOK: Lineage: test_stats.c SCRIPT []
+PREHOOK: query: insert into test_stats (a, b, c) values ("b", 2, 2.1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_stats
+POSTHOOK: query: insert into test_stats (a, b, c) values ("b", 2, 2.1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_stats
+POSTHOOK: Lineage: test_stats.a SCRIPT []
+POSTHOOK: Lineage: test_stats.b SCRIPT []
+POSTHOOK: Lineage: test_stats.c SCRIPT []
+PREHOOK: query: insert into test_stats (a, b, c) values ("c", 2, 2.1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_stats
+POSTHOOK: query: insert into test_stats (a, b, c) values ("c", 2, 2.1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_stats
+POSTHOOK: Lineage: test_stats.a SCRIPT []
+POSTHOOK: Lineage: test_stats.b SCRIPT []
+POSTHOOK: Lineage: test_stats.c SCRIPT []
+PREHOOK: query: insert into test_stats (a, b, c) values ("d", 2, 3.1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_stats
+POSTHOOK: query: insert into test_stats (a, b, c) values ("d", 2, 3.1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_stats
+POSTHOOK: Lineage: test_stats.a SCRIPT []
+POSTHOOK: Lineage: test_stats.b SCRIPT []
+POSTHOOK: Lineage: test_stats.c SCRIPT []
+PREHOOK: query: insert into test_stats (a, b, c) values ("e", 2, 3.1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_stats
+POSTHOOK: query: insert into test_stats (a, b, c) values ("e", 2, 3.1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_stats
+POSTHOOK: Lineage: test_stats.a SCRIPT []
+POSTHOOK: Lineage: test_stats.b SCRIPT []
+POSTHOOK: Lineage: test_stats.c SCRIPT []
+PREHOOK: query: insert into test_stats (a, b, c) values ("f", 2, 4.1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_stats
+POSTHOOK: query: insert into test_stats (a, b, c) values ("f", 2, 4.1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_stats
+POSTHOOK: Lineage: test_stats.a SCRIPT []
+POSTHOOK: Lineage: test_stats.b SCRIPT []
+POSTHOOK: Lineage: test_stats.c SCRIPT []
+PREHOOK: query: insert into test_stats (a, b, c) values ("g", 2, 5.1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_stats
+POSTHOOK: query: insert into test_stats (a, b, c) values ("g", 2, 5.1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_stats
+POSTHOOK: Lineage: test_stats.a SCRIPT []
+POSTHOOK: Lineage: test_stats.b SCRIPT []
+POSTHOOK: Lineage: test_stats.c SCRIPT []
+PREHOOK: query: insert into test_stats (a, b, c) values ("h", 2, 6.1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_stats
+POSTHOOK: query: insert into test_stats (a, b, c) values ("h", 2, 6.1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_stats
+POSTHOOK: Lineage: test_stats.a SCRIPT []
+POSTHOOK: Lineage: test_stats.b SCRIPT []
+POSTHOOK: Lineage: test_stats.c SCRIPT []
+PREHOOK: query: insert into test_stats (a, b, c) values ("i", 3, 6.1)
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@test_stats
+POSTHOOK: query: insert into test_stats (a, b, c) values ("i", 3, 6.1)
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@test_stats
+POSTHOOK: Lineage: test_stats.a SCRIPT []
+POSTHOOK: Lineage: test_stats.b SCRIPT []
+POSTHOOK: Lineage: test_stats.c SCRIPT []
+PREHOOK: query: describe formatted test_stats
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@test_stats
+POSTHOOK: query: describe formatted test_stats
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@test_stats
+# col_name             data_type               comment             
+a                      string                                      
+b                      int                                         
+c                      double                                      
+                
+# Detailed Table Information            
+Database:              default                  
+#### A masked pattern was here ####
+Retention:             0                        
+#### A masked pattern was here ####
+Table Type:            MANAGED_TABLE            
+Table Parameters:               
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
+       bucketing_version       2                   
+       numFiles                9                   
+       numRows                 9                   
+       rawDataSize             873                 
+       totalSize               #Masked#
+#### A masked pattern was here ####
+                
+# Storage Information           
+SerDe Library:         org.apache.hadoop.hive.ql.io.orc.OrcSerde        
+InputFormat:           org.apache.hadoop.hive.ql.io.orc.OrcInputFormat  
+OutputFormat:          org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat        
 
+Compressed:            No                       
+Num Buckets:           -1                       
+Bucket Columns:        []                       
+Sort Columns:          []                       
+Storage Desc Params:            
+       serialization.format    1                   
+PREHOOK: query: describe formatted test_stats a
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@test_stats
+POSTHOOK: query: describe formatted test_stats a
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@test_stats
+col_name               a                   
+data_type              string              
+min                                        
+max                                        
+num_nulls              0                   
+distinct_count         9                   
+avg_col_len            1.0                 
+max_col_len            1                   
+num_trues                                  
+num_falses                                 
+bit_vector             HL                  
+histogram                                  
+comment                from deserializer   
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
+PREHOOK: query: describe formatted test_stats b
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@test_stats
+POSTHOOK: query: describe formatted test_stats b
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@test_stats
+col_name               b                   
+data_type              int                 
+min                    2                   
+max                    3                   
+num_nulls              0                   
+distinct_count         2                   
+avg_col_len                                
+max_col_len                                
+num_trues                                  
+num_falses                                 
+bit_vector             HL                  
+histogram              Q1: 2, Q2: 2, Q3: 2 
+comment                from deserializer   
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
+PREHOOK: query: describe formatted test_stats c
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@test_stats
+POSTHOOK: query: describe formatted test_stats c
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@test_stats
+col_name               c                   
+data_type              double              
+min                    1.1                 
+max                    6.1                 
+num_nulls              0                   
+distinct_count         6                   
+avg_col_len                                
+max_col_len                                
+num_trues                                  
+num_falses                                 
+bit_vector             HL                  
+histogram              Q1: 2.1, Q2: 3.1, Q3: 5.1
+comment                from deserializer   
+COLUMN_STATS_ACCURATE  
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}
+PREHOOK: query: alter table test_stats drop statistics for columns a, c
+PREHOOK: type: ALTERTABLE_DROP_COL_STATS
+PREHOOK: Input: default@test_stats
+PREHOOK: Output: default@test_stats
+POSTHOOK: query: alter table test_stats drop statistics for columns a, c
+POSTHOOK: type: ALTERTABLE_DROP_COL_STATS
+POSTHOOK: Input: default@test_stats
+POSTHOOK: Output: default@test_stats
+PREHOOK: query: describe formatted test_stats
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@test_stats
+POSTHOOK: query: describe formatted test_stats
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@test_stats
+# col_name             data_type               comment             
+a                      string                                      
+b                      int                                         
+c                      double                                      
+                
+# Detailed Table Information            
+Database:              default                  
+#### A masked pattern was here ####
+Retention:             0                        
+#### A masked pattern was here ####
+Table Type:            MANAGED_TABLE            
+Table Parameters:               
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}}

Review Comment:
   If I understand correctly, we need to do this in HS2 - after 
`getMSC().deleteColumnStatistics(request);` returns successfully in 
`Hive#deleteColumnStatistics()`.
   
   I think we can probably use 
`StatsSetupConst#removeColumnStatsState(Map<String, String> params, 
List<String> colNames)` to update the column stats for the list of columns in 
the ALTER statement. However, for partitioned tables, when partition info is 
not provided in the ALTER statement, we need to fetch ALL partitions (using 
`List<Partition> getPartitions(Table tbl)` in Hive.java), and then for each 
Partition, we need to call `StatsSetupConst#removeColumnStatsState`.
   
   Do you think there is a better and more efficient way to do this?



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org

For queries about this service, please contact Infrastructure at:
us...@infra.apache.org


---------------------------------------------------------------------
To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org
For additional commands, e-mail: gitbox-h...@hive.apache.org

Reply via email to