soumyakanti3578 commented on code in PR #5721: URL: https://github.com/apache/hive/pull/5721#discussion_r2034131783
########## ql/src/test/results/clientpositive/llap/drop_histogram_stats_for_columns.q.out: ########## @@ -0,0 +1,307 @@ +PREHOOK: query: CREATE TABLE test_stats (a string, b int, c double) STORED AS ORC +PREHOOK: type: CREATETABLE +PREHOOK: Output: database:default +PREHOOK: Output: default@test_stats +POSTHOOK: query: CREATE TABLE test_stats (a string, b int, c double) STORED AS ORC +POSTHOOK: type: CREATETABLE +POSTHOOK: Output: database:default +POSTHOOK: Output: default@test_stats +PREHOOK: query: insert into test_stats (a, b, c) values ("a", 2, 1.1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_stats +POSTHOOK: query: insert into test_stats (a, b, c) values ("a", 2, 1.1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_stats +POSTHOOK: Lineage: test_stats.a SCRIPT [] +POSTHOOK: Lineage: test_stats.b SCRIPT [] +POSTHOOK: Lineage: test_stats.c SCRIPT [] +PREHOOK: query: insert into test_stats (a, b, c) values ("b", 2, 2.1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_stats +POSTHOOK: query: insert into test_stats (a, b, c) values ("b", 2, 2.1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_stats +POSTHOOK: Lineage: test_stats.a SCRIPT [] +POSTHOOK: Lineage: test_stats.b SCRIPT [] +POSTHOOK: Lineage: test_stats.c SCRIPT [] +PREHOOK: query: insert into test_stats (a, b, c) values ("c", 2, 2.1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_stats +POSTHOOK: query: insert into test_stats (a, b, c) values ("c", 2, 2.1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_stats +POSTHOOK: Lineage: test_stats.a SCRIPT [] +POSTHOOK: Lineage: test_stats.b SCRIPT [] +POSTHOOK: Lineage: test_stats.c SCRIPT [] +PREHOOK: query: insert into test_stats (a, b, c) values ("d", 2, 3.1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_stats +POSTHOOK: query: insert into test_stats (a, b, c) values ("d", 2, 3.1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_stats +POSTHOOK: Lineage: test_stats.a SCRIPT [] +POSTHOOK: Lineage: test_stats.b SCRIPT [] +POSTHOOK: Lineage: test_stats.c SCRIPT [] +PREHOOK: query: insert into test_stats (a, b, c) values ("e", 2, 3.1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_stats +POSTHOOK: query: insert into test_stats (a, b, c) values ("e", 2, 3.1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_stats +POSTHOOK: Lineage: test_stats.a SCRIPT [] +POSTHOOK: Lineage: test_stats.b SCRIPT [] +POSTHOOK: Lineage: test_stats.c SCRIPT [] +PREHOOK: query: insert into test_stats (a, b, c) values ("f", 2, 4.1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_stats +POSTHOOK: query: insert into test_stats (a, b, c) values ("f", 2, 4.1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_stats +POSTHOOK: Lineage: test_stats.a SCRIPT [] +POSTHOOK: Lineage: test_stats.b SCRIPT [] +POSTHOOK: Lineage: test_stats.c SCRIPT [] +PREHOOK: query: insert into test_stats (a, b, c) values ("g", 2, 5.1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_stats +POSTHOOK: query: insert into test_stats (a, b, c) values ("g", 2, 5.1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_stats +POSTHOOK: Lineage: test_stats.a SCRIPT [] +POSTHOOK: Lineage: test_stats.b SCRIPT [] +POSTHOOK: Lineage: test_stats.c SCRIPT [] +PREHOOK: query: insert into test_stats (a, b, c) values ("h", 2, 6.1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_stats +POSTHOOK: query: insert into test_stats (a, b, c) values ("h", 2, 6.1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_stats +POSTHOOK: Lineage: test_stats.a SCRIPT [] +POSTHOOK: Lineage: test_stats.b SCRIPT [] +POSTHOOK: Lineage: test_stats.c SCRIPT [] +PREHOOK: query: insert into test_stats (a, b, c) values ("i", 3, 6.1) +PREHOOK: type: QUERY +PREHOOK: Input: _dummy_database@_dummy_table +PREHOOK: Output: default@test_stats +POSTHOOK: query: insert into test_stats (a, b, c) values ("i", 3, 6.1) +POSTHOOK: type: QUERY +POSTHOOK: Input: _dummy_database@_dummy_table +POSTHOOK: Output: default@test_stats +POSTHOOK: Lineage: test_stats.a SCRIPT [] +POSTHOOK: Lineage: test_stats.b SCRIPT [] +POSTHOOK: Lineage: test_stats.c SCRIPT [] +PREHOOK: query: describe formatted test_stats +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_stats +POSTHOOK: query: describe formatted test_stats +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_stats +# col_name data_type comment +a string +b int +c double + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} + bucketing_version 2 + numFiles 9 + numRows 9 + rawDataSize 873 + totalSize #Masked# +#### A masked pattern was here #### + +# Storage Information +SerDe Library: org.apache.hadoop.hive.ql.io.orc.OrcSerde +InputFormat: org.apache.hadoop.hive.ql.io.orc.OrcInputFormat +OutputFormat: org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat +Compressed: No +Num Buckets: -1 +Bucket Columns: [] +Sort Columns: [] +Storage Desc Params: + serialization.format 1 +PREHOOK: query: describe formatted test_stats a +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_stats +POSTHOOK: query: describe formatted test_stats a +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_stats +col_name a +data_type string +min +max +num_nulls 0 +distinct_count 9 +avg_col_len 1.0 +max_col_len 1 +num_trues +num_falses +bit_vector HL +histogram +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} +PREHOOK: query: describe formatted test_stats b +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_stats +POSTHOOK: query: describe formatted test_stats b +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_stats +col_name b +data_type int +min 2 +max 3 +num_nulls 0 +distinct_count 2 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +histogram Q1: 2, Q2: 2, Q3: 2 +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} +PREHOOK: query: describe formatted test_stats c +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_stats +POSTHOOK: query: describe formatted test_stats c +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_stats +col_name c +data_type double +min 1.1 +max 6.1 +num_nulls 0 +distinct_count 6 +avg_col_len +max_col_len +num_trues +num_falses +bit_vector HL +histogram Q1: 2.1, Q2: 3.1, Q3: 5.1 +comment from deserializer +COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} +PREHOOK: query: alter table test_stats drop statistics for columns a, c +PREHOOK: type: ALTERTABLE_DROP_COL_STATS +PREHOOK: Input: default@test_stats +PREHOOK: Output: default@test_stats +POSTHOOK: query: alter table test_stats drop statistics for columns a, c +POSTHOOK: type: ALTERTABLE_DROP_COL_STATS +POSTHOOK: Input: default@test_stats +POSTHOOK: Output: default@test_stats +PREHOOK: query: describe formatted test_stats +PREHOOK: type: DESCTABLE +PREHOOK: Input: default@test_stats +POSTHOOK: query: describe formatted test_stats +POSTHOOK: type: DESCTABLE +POSTHOOK: Input: default@test_stats +# col_name data_type comment +a string +b int +c double + +# Detailed Table Information +Database: default +#### A masked pattern was here #### +Retention: 0 +#### A masked pattern was here #### +Table Type: MANAGED_TABLE +Table Parameters: + COLUMN_STATS_ACCURATE {\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"a\":\"true\",\"b\":\"true\",\"c\":\"true\"}} Review Comment: If I understand correctly, we need to do this in HS2 - after `getMSC().deleteColumnStatistics(request);` returns successfully in `Hive#deleteColumnStatistics()`. I think we can probably use `StatsSetupConst#removeColumnStatsState(Map<String, String> params, List<String> colNames)` to update the column stats for the list of columns in the ALTER statement. However, for partitioned tables, when partition info is not provided in the ALTER statement, we need to fetch ALL partitions (using `List<Partition> getPartitions(Table tbl)` in Hive.java), and then for each Partition, we need to call `StatsSetupConst#removeColumnStatsState`. Do you think there is a better and more efficient way to do this? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org