deniskuzZ commented on code in PR #5540: URL: https://github.com/apache/hive/pull/5540#discussion_r1932671128
########## iceberg/iceberg-handler/src/test/queries/positive/iceberg_minor_compaction_partition_evolution.q: ########## @@ -0,0 +1,65 @@ +-- Mask neededVirtualColumns due to non-strict order +--! qt:replace:/(\s+neededVirtualColumns:\s)(.*)/$1#Masked#/ +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ +-- Mask a random snapshot id +--! qt:replace:/(\s+current-snapshot-id\s+)\S+(\s*)/$1#Masked#/ +-- Mask added file size +--! qt:replace:/(\S\"added-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ +-- Mask total file size +--! qt:replace:/(\S\"total-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ +-- Mask current-snapshot-timestamp-ms +--! qt:replace:/(\s+current-snapshot-timestamp-ms\s+)\S+(\s*)/$1#Masked#$2/ +--! qt:replace:/(MINOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/ +--! qt:replace:/(MINOR\s+refused\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/ +-- Mask compaction id as they will be allocated in parallel threads +--! qt:replace:/^[0-9]/#Masked#/ +-- Mask removed file size +--! qt:replace:/(\S\"removed-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ +-- Mask iceberg version +--! qt:replace:/(\S\"iceberg-version\\\":\\\")(\w+\s\w+\s\d+\.\d+\.\d+\s\(\w+\s\w+\))(\\\")/$1#Masked#$3/ + +set hive.llap.io.enabled=true; +set hive.vectorized.execution.enabled=true; +set hive.optimize.shared.work.merge.ts.schema=true; +set hive.merge.tezfiles=true; + +create database ice_comp with dbproperties('hive.compactor.worker.pool'='iceberg'); +use ice_comp; + +create table ice_orc ( + first_name string, + last_name string, + dept_id bigint + ) +stored by iceberg stored as orc +tblproperties ('format-version'='2'); + +insert into ice_orc VALUES ('fn2','ln2', 1), ('fn1','ln1', 1); +insert into ice_orc VALUES ('fn4','ln4', 1), ('fn3','ln3', 1); + +delete from ice_orc where last_name in ('ln4'); + +alter table ice_orc set partition spec(dept_id); + +insert into ice_orc PARTITION(dept_id=2) VALUES ('fn6','ln6'), ('fn5','ln5'); +insert into ice_orc PARTITION(dept_id=2) VALUES ('fn8','ln8'), ('fn7','ln7'); + +delete from ice_orc where last_name in ('ln8'); + +describe formatted ice_orc; +show compactions order by 'partition'; + +-- Size of every data file in bytes is 463, therefore nothing to compact +alter table ice_orc COMPACT 'minor' and wait file_size_threshold = '400bytes' pool 'iceberg'; + +describe formatted ice_orc; +show compactions order by 'partition'; + +-- Now data files' sizes are below file_size_threshold, therefore compaction is needed +alter table ice_orc COMPACT 'minor' and wait file_size_threshold = '500bytes' pool 'iceberg' where fist_name in ('fn1','fn2','fn5','fn6') order by first_name;; Review Comment: threshold properties should be set on a table level COMPACTOR_THRESHOLD_PREFIX('compactorthreshold') + - 'min.file.size.bytes' - 'min.input.files' -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org