difin commented on code in PR #5028: URL: https://github.com/apache/hive/pull/5028#discussion_r1484417442
########## iceberg/iceberg-handler/src/test/queries/positive/iceberg_optimize_table_unpartitioned.q: ########## @@ -0,0 +1,58 @@ +-- SORT_QUERY_RESULTS +-- Mask neededVirtualColumns due to non-strict order +--! qt:replace:/(\s+neededVirtualColumns:\s)(.*)/$1#Masked#/ +-- Mask the totalSize value as it can have slight variability, causing test flakiness +--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/ +-- Mask random uuid +--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/ +-- Mask a random snapshot id +--! qt:replace:/(\s+current-snapshot-id\s+)\S+(\s*)/$1#Masked#/ +-- Mask added file size +--! qt:replace:/(\S\"added-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ +-- Mask total file size +--! qt:replace:/(\S\"total-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/ +-- Mask current-snapshot-timestamp-ms +--! qt:replace:/(\s+current-snapshot-timestamp-ms\s+)\S+(\s*)/$1#Masked#$2/ +-- Mask the enqueue time which is based on current time +--! qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/ +-- Mask compaction id as they will be allocated in parallel threads +--! qt:replace:/^[0-9]/#Masked#/ + +set hive.llap.io.enabled=true; +set hive.vectorized.execution.enabled=true; +set hive.optimize.shared.work.merge.ts.schema=true; + +create table ice_orc ( + first_name string, + last_name string + ) +stored by iceberg stored as orc +tblproperties ('format-version'='2'); + +insert into ice_orc VALUES ('fn1','ln1'); +insert into ice_orc VALUES ('fn2','ln2'); +insert into ice_orc VALUES ('fn3','ln3'); +insert into ice_orc VALUES ('fn4','ln4'); +insert into ice_orc VALUES ('fn5','ln5'); +insert into ice_orc VALUES ('fn6','ln6'); +insert into ice_orc VALUES ('fn7','ln7'); + +update ice_orc set last_name = 'ln1a' where first_name='fn1'; +update ice_orc set last_name = 'ln2a' where first_name='fn2'; +update ice_orc set last_name = 'ln3a' where first_name='fn3'; +update ice_orc set last_name = 'ln4a' where first_name='fn4'; +update ice_orc set last_name = 'ln5a' where first_name='fn5'; +update ice_orc set last_name = 'ln6a' where first_name='fn6'; +update ice_orc set last_name = 'ln7a' where first_name='fn7'; + +delete from ice_orc where last_name in ('ln5a', 'ln6a', 'ln7a'); + +select * from ice_orc; +describe formatted ice_orc; + +explain optimize table ice_orc rewrite data; +optimize table ice_orc rewrite data; + Review Comment: It is checked using DESCRIBE FORMATTED command. The iceberg compaction q-files contains that command before and after compaction and .q.out files compare number of data and delete files: `current-snapshot-summary {\"replace-partitions\":\"true\",\"added-data-files\":\"1\",\"added-records\":\"4\",\"added-files-size\":\"#Masked#\",\"changed-partition-count\":\"1\",\"total-records\":\"4\",\"total-files-size\":\"#Masked#\",\"total-data-files\":\"1\",\"total-delete-files\":\"0\",\"total-position-deletes\":\"0\",\"total-equality-deletes\":\"0\"}` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org --------------------------------------------------------------------- To unsubscribe, e-mail: gitbox-unsubscr...@hive.apache.org For additional commands, e-mail: gitbox-h...@hive.apache.org