(hive) branch master updated: HIVE-29288: Iceberg: Support Alter table command for Z-ordering (#6184)

dkuzmenko Fri, 12 Dec 2025 10:09:20 -0800

This is an automated email from the ASF dual-hosted git repository.

dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git



The following commit(s) were added to refs/heads/master by this push:
     new a45102243f6 HIVE-29288: Iceberg: Support Alter table command for 
Z-ordering (#6184)
a45102243f6 is described below

commit a45102243f61ad1b94ef5d51528ddbe2f3bc82db
Author: kokila-19 <[email protected]>
AuthorDate: Fri Dec 12 23:39:06 2025 +0530

    HIVE-29288: Iceberg: Support Alter table command for Z-ordering (#6184)
---
 .../iceberg_alter_locally_zordered_table.q         |  38 +++
 .../iceberg_alter_locally_zordered_table.q.out     | 370 +++++++++++++++++++++
 .../test/resources/testconfiguration.properties    |   2 +
 .../hadoop/hive/ql/parse/AlterClauseParser.g       |   8 +
 .../org/apache/hadoop/hive/ql/parse/HiveParser.g   |   1 +
 .../order/AlterTableSetWriteOrderAnalyzer.java     |  87 +++++
 .../storage/order/AlterTableSetWriteOrderDesc.java |  46 +++
 .../order/AlterTableSetWriteOrderOperation.java    |  48 +++
 .../apache/hadoop/hive/ql/plan/HiveOperation.java  |   2 +
 .../authorization/plugin/HiveOperationType.java    |   1 +
 .../plugin/sqlstd/Operation2Privilege.java         |   2 +
 11 files changed, 605 insertions(+)

diff --git 
a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_alter_locally_zordered_table.q
 
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_alter_locally_zordered_table.q
new file mode 100644
index 00000000000..4de93b7f24e
--- /dev/null
+++ 
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_alter_locally_zordered_table.q
@@ -0,0 +1,38 @@
+-- Mask neededVirtualColumns due to non-strict order
+--! qt:replace:/(\s+neededVirtualColumns:\s)(.*)/$1#Masked#/
+-- Mask the totalSize value as it can have slight variability, causing test 
flakiness
+--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/
+-- Mask random uuid
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+-- Mask a random snapshot id
+--! qt:replace:/(\s+current-snapshot-id\s+)\S+(\s*)/$1#Masked#/
+-- Mask added file size
+--! qt:replace:/(\S\"added-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/
+-- Mask total file size
+--! qt:replace:/(\S\"total-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/
+-- Mask removed file size
+--! qt:replace:/(\S\"removed-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/
+-- Mask current-snapshot-timestamp-ms
+--! qt:replace:/(\s+current-snapshot-timestamp-ms\s+)\S+(\s*)/$1#Masked#$2/
+--! 
qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/
+-- Mask iceberg version
+--! 
qt:replace:/(\S\"iceberg-version\\\":\\\")(\w+\s\w+\s\d+\.\d+\.\d+\s\(\w+\s\w+\))(\\\")/$1#Masked#$3/
+set hive.vectorized.execution.enabled=true;
+
+-- Test ALTER TABLE SET WRITE [LOCALLY] ORDERED BY ZORDER
+create table ice_orc_zorder (id int, name string, age int, city string) stored 
by iceberg stored as orc;
+
+describe formatted ice_orc_zorder;
+
+explain insert into ice_orc_zorder values (1, 'Alice', 30, 'NYC'),(2, 'Bob', 
25, 'LA'),(3, 'Charlie', 35, 'SF');
+
+-- Add Z-order via ALTER TABLE command
+alter table ice_orc_zorder set WRITE ORDERED BY ZORDER (id, age);
+
+describe formatted ice_orc_zorder;
+
+explain insert into ice_orc_zorder values (4, 'David', 28, 'Seattle'),(5, 
'Eve', 32, 'Boston'),(6, 'Frank', 29, 'Austin'),(7, 'Grace', 32, 'Denver');
+insert into ice_orc_zorder values (4, 'David', 28, 'Seattle'),(5, 'Eve', 32, 
'Boston'),(6, 'Frank', 29, 'Austin'),(7, 'Grace', 32, 'Denver');
+select * from ice_orc_zorder;
+
+drop table ice_orc_zorder;
diff --git 
a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_alter_locally_zordered_table.q.out
 
b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_alter_locally_zordered_table.q.out
new file mode 100644
index 00000000000..8896cf716ce
--- /dev/null
+++ 
b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_alter_locally_zordered_table.q.out
@@ -0,0 +1,370 @@
+PREHOOK: query: create table ice_orc_zorder (id int, name string, age int, 
city string) stored by iceberg stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ice_orc_zorder
+POSTHOOK: query: create table ice_orc_zorder (id int, name string, age int, 
city string) stored by iceberg stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ice_orc_zorder
+PREHOOK: query: describe formatted ice_orc_zorder
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@ice_orc_zorder
+POSTHOOK: query: describe formatted ice_orc_zorder
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@ice_orc_zorder
+# col_name             data_type               comment             
+id                     int                                         
+name                   string                                      
+age                    int                                         
+city                   string                                      
+                
+# Detailed Table Information            
+Database:              default                  
+#### A masked pattern was here ####
+Retention:             0                        
+#### A masked pattern was here ####
+Table Type:            EXTERNAL_TABLE           
+Table Parameters:               
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"age\":\"true\",\"city\":\"true\",\"id\":\"true\",\"name\":\"true\"}}
+       EXTERNAL                TRUE                
+       bucketing_version       2                   
+       current-schema          
{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"age\",\"required\":false,\"type\":\"int\"},{\"id\":4,\"name\":\"city\",\"required\":false,\"type\":\"string\"}]}
+       format-version          2                   
+       iceberg.orc.files.only  true                
+#### A masked pattern was here ####
+       numFiles                0                   
+       numRows                 0                   
+       parquet.compression     zstd                
+       rawDataSize             0                   
+       serialization.format    1                   
+       snapshot-count          0                   
+       storage_handler         
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+       table_type              ICEBERG             
+       totalSize               #Masked#                   
+#### A masked pattern was here ####
+       uuid                    #Masked#
+       write.delete.mode       merge-on-read       
+       write.format.default    orc                 
+       write.merge.mode        merge-on-read       
+       write.update.mode       merge-on-read       
+                
+# Storage Information           
+SerDe Library:         org.apache.iceberg.mr.hive.HiveIcebergSerDe      
+InputFormat:           org.apache.iceberg.mr.hive.HiveIcebergInputFormat       
 
+OutputFormat:          org.apache.iceberg.mr.hive.HiveIcebergOutputFormat      
 
+Compressed:            No                       
+Sort Columns:          []                       
+PREHOOK: query: explain insert into ice_orc_zorder values (1, 'Alice', 30, 
'NYC'),(2, 'Bob', 25, 'LA'),(3, 'Charlie', 35, 'SF')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_orc_zorder
+POSTHOOK: query: explain insert into ice_orc_zorder values (1, 'Alice', 30, 
'NYC'),(2, 'Bob', 25, 'LA'),(3, 'Charlie', 35, 'SF')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_orc_zorder
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: _dummy_table
+                  Row Limit Per Split: 1
+                  Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: array(const struct(1,'Alice',30,'NYC'),const 
struct(2,'Bob',25,'LA'),const struct(3,'Charlie',35,'SF')) (type: 
array<struct<col1:int,col2:string,col3:int,col4:string>>)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 64 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    UDTF Operator
+                      Statistics: Num rows: 1 Data size: 64 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      function name: inline
+                      Select Operator
+                        expressions: col1 (type: int), col2 (type: string), 
col3 (type: int), col4 (type: string)
+                        outputColumnNames: _col0, _col1, _col2, _col3
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        File Output Operator
+                          compressed: false
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          table:
+                              input format: 
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+                              output format: 
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+                              serde: 
org.apache.iceberg.mr.hive.HiveIcebergSerDe
+                              name: default.ice_orc_zorder
+                        Select Operator
+                          expressions: _col0 (type: int), _col1 (type: 
string), _col2 (type: int), _col3 (type: string)
+                          outputColumnNames: id, name, age, city
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Group By Operator
+                            aggregations: min(id), max(id), count(1), 
count(id), compute_bit_vector_hll(id), max(length(name)), 
avg(COALESCE(length(name),0)), count(name), compute_bit_vector_hll(name), 
min(age), max(age), count(age), compute_bit_vector_hll(age), max(length(city)), 
avg(COALESCE(length(city),0)), count(city), compute_bit_vector_hll(city)
+                            minReductionHashAggr: 0.4
+                            mode: hash
+                            outputColumnNames: _col0, _col1, _col2, _col3, 
_col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, 
_col14, _col15, _col16
+                            Statistics: Num rows: 1 Data size: 792 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            Reduce Output Operator
+                              null sort order: 
+                              sort order: 
+                              Statistics: Num rows: 1 Data size: 792 Basic 
stats: COMPLETE Column stats: COMPLETE
+                              value expressions: _col0 (type: int), _col1 
(type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), 
_col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), 
_col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: 
int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 
(type: struct<count:bigint,sum:double,input:int>), _col15 (type: bigint), 
_col16 (type: binary)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), 
max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), 
compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), 
count(VALUE._col11), compute_bit_vector_hll(VALUE._col12), max(VALUE._col13), 
avg(VALUE._col14), count(VALUE._col15), compute_bit_vector_hll(VALUE._col16)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16
+                Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: 'LONG' (type: string), UDFToLong(_col0) (type: 
bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), 
COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 
'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), 
COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), 
COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 
'LONG' (type: string), UDFToLong(_col9) (typ [...]
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
+                  Statistics: Num rows: 1 Data size: 1060 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 1060 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+              output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+              serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+              name: default.ice_orc_zorder
+
+  Stage: Stage-3
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: id, name, age, city
+          Column Types: int, string, int, string
+          Table: default.ice_orc_zorder
+
+PREHOOK: query: alter table ice_orc_zorder set WRITE ORDERED BY ZORDER (id, 
age)
+PREHOOK: type: ALTERTABLE_SET_WRITE_ORDER
+PREHOOK: Input: default@ice_orc_zorder
+PREHOOK: Output: default@ice_orc_zorder
+POSTHOOK: query: alter table ice_orc_zorder set WRITE ORDERED BY ZORDER (id, 
age)
+POSTHOOK: type: ALTERTABLE_SET_WRITE_ORDER
+POSTHOOK: Input: default@ice_orc_zorder
+POSTHOOK: Output: default@ice_orc_zorder
+PREHOOK: query: describe formatted ice_orc_zorder
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@ice_orc_zorder
+POSTHOOK: query: describe formatted ice_orc_zorder
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@ice_orc_zorder
+# col_name             data_type               comment             
+id                     int                                         
+name                   string                                      
+age                    int                                         
+city                   string                                      
+                
+# Detailed Table Information            
+Database:              default                  
+#### A masked pattern was here ####
+Retention:             0                        
+#### A masked pattern was here ####
+Table Type:            EXTERNAL_TABLE           
+Table Parameters:               
+       COLUMN_STATS_ACCURATE   
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"age\":\"true\",\"city\":\"true\",\"id\":\"true\",\"name\":\"true\"}}
+       EXTERNAL                TRUE                
+       bucketing_version       2                   
+       current-schema          
{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"age\",\"required\":false,\"type\":\"int\"},{\"id\":4,\"name\":\"city\",\"required\":false,\"type\":\"string\"}]}
+       format-version          2                   
+       iceberg.orc.files.only  true                
+#### A masked pattern was here ####
+       numFiles                0                   
+       numRows                 0                   
+       parquet.compression     zstd                
+#### A masked pattern was here ####
+       rawDataSize             0                   
+       serialization.format    1                   
+       snapshot-count          0                   
+       sort.columns            id,age              
+       sort.order              ZORDER              
+       storage_handler         
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+       table_type              ICEBERG             
+       totalSize               #Masked#                   
+#### A masked pattern was here ####
+       uuid                    #Masked#
+       write.delete.mode       merge-on-read       
+       write.format.default    orc                 
+       write.merge.mode        merge-on-read       
+       write.update.mode       merge-on-read       
+                
+# Storage Information           
+SerDe Library:         org.apache.iceberg.mr.hive.HiveIcebergSerDe      
+InputFormat:           org.apache.iceberg.mr.hive.HiveIcebergInputFormat       
 
+OutputFormat:          org.apache.iceberg.mr.hive.HiveIcebergOutputFormat      
 
+Compressed:            No                       
+Sort Columns:          []                       
+PREHOOK: query: explain insert into ice_orc_zorder values (4, 'David', 28, 
'Seattle'),(5, 'Eve', 32, 'Boston'),(6, 'Frank', 29, 'Austin'),(7, 'Grace', 32, 
'Denver')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_orc_zorder
+POSTHOOK: query: explain insert into ice_orc_zorder values (4, 'David', 28, 
'Seattle'),(5, 'Eve', 32, 'Boston'),(6, 'Frank', 29, 'Austin'),(7, 'Grace', 32, 
'Denver')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_orc_zorder
+STAGE DEPENDENCIES:
+  Stage-1 is a root stage
+  Stage-2 depends on stages: Stage-1
+  Stage-0 depends on stages: Stage-2
+  Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+  Stage: Stage-1
+    Tez
+#### A masked pattern was here ####
+      Edges:
+        Reducer 2 <- Map 1 (SIMPLE_EDGE)
+        Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+      Vertices:
+        Map 1 
+            Map Operator Tree:
+                TableScan
+                  alias: _dummy_table
+                  Row Limit Per Split: 1
+                  Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  Select Operator
+                    expressions: array(const 
struct(4,'David',28,'Seattle'),const struct(5,'Eve',32,'Boston'),const 
struct(6,'Frank',29,'Austin'),const struct(7,'Grace',32,'Denver')) (type: 
array<struct<col1:int,col2:string,col3:int,col4:string>>)
+                    outputColumnNames: _col0
+                    Statistics: Num rows: 1 Data size: 72 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    UDTF Operator
+                      Statistics: Num rows: 1 Data size: 72 Basic stats: 
COMPLETE Column stats: COMPLETE
+                      function name: inline
+                      Select Operator
+                        expressions: col1 (type: int), col2 (type: string), 
col3 (type: int), col4 (type: string)
+                        outputColumnNames: _col0, _col1, _col2, _col3
+                        Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                        Reduce Output Operator
+                          key expressions: iceberg_zorder(_col0, _col2) (type: 
binary)
+                          null sort order: z
+                          sort order: +
+                          Map-reduce partition columns: iceberg_zorder(_col0, 
_col2) (type: binary)
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          value expressions: _col0 (type: int), _col1 (type: 
string), _col2 (type: int), _col3 (type: string)
+                        Select Operator
+                          expressions: _col0 (type: int), _col1 (type: 
string), _col2 (type: int), _col3 (type: string)
+                          outputColumnNames: id, name, age, city
+                          Statistics: Num rows: 1 Data size: 8 Basic stats: 
COMPLETE Column stats: COMPLETE
+                          Group By Operator
+                            aggregations: min(id), max(id), count(1), 
count(id), compute_bit_vector_hll(id), max(length(name)), 
avg(COALESCE(length(name),0)), count(name), compute_bit_vector_hll(name), 
min(age), max(age), count(age), compute_bit_vector_hll(age), max(length(city)), 
avg(COALESCE(length(city),0)), count(city), compute_bit_vector_hll(city)
+                            minReductionHashAggr: 0.4
+                            mode: hash
+                            outputColumnNames: _col0, _col1, _col2, _col3, 
_col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, 
_col14, _col15, _col16
+                            Statistics: Num rows: 1 Data size: 792 Basic 
stats: COMPLETE Column stats: COMPLETE
+                            Reduce Output Operator
+                              null sort order: 
+                              sort order: 
+                              Statistics: Num rows: 1 Data size: 792 Basic 
stats: COMPLETE Column stats: COMPLETE
+                              value expressions: _col0 (type: int), _col1 
(type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary), 
_col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>), 
_col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type: 
int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14 
(type: struct<count:bigint,sum:double,input:int>), _col15 (type: bigint), 
_col16 (type: binary)
+            Execution mode: llap
+            LLAP IO: no inputs
+        Reducer 2 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Select Operator
+                expressions: VALUE._col0 (type: int), VALUE._col1 (type: 
string), VALUE._col2 (type: int), VALUE._col3 (type: string), 
KEY.iceberg_zorder(_col0, _col2) (type: binary)
+                outputColumnNames: _col0, _col1, _col2, _col3, 
iceberg_zorder(_col0, _col2)
+                File Output Operator
+                  compressed: false
+                  Dp Sort State: PARTITION_SORTED
+                  Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE 
Column stats: COMPLETE
+                  table:
+                      input format: 
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+                      output format: 
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+                      serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+                      name: default.ice_orc_zorder
+        Reducer 3 
+            Execution mode: vectorized, llap
+            Reduce Operator Tree:
+              Group By Operator
+                aggregations: min(VALUE._col0), max(VALUE._col1), 
count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4), 
max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7), 
compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10), 
count(VALUE._col11), compute_bit_vector_hll(VALUE._col12), max(VALUE._col13), 
avg(VALUE._col14), count(VALUE._col15), compute_bit_vector_hll(VALUE._col16)
+                mode: mergepartial
+                outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16
+                Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE 
Column stats: COMPLETE
+                Select Operator
+                  expressions: 'LONG' (type: string), UDFToLong(_col0) (type: 
bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint), 
COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary), 
'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint), 
COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint), 
COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary), 
'LONG' (type: string), UDFToLong(_col9) (typ [...]
+                  outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5, 
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15, 
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
+                  Statistics: Num rows: 1 Data size: 1060 Basic stats: 
COMPLETE Column stats: COMPLETE
+                  File Output Operator
+                    compressed: false
+                    Statistics: Num rows: 1 Data size: 1060 Basic stats: 
COMPLETE Column stats: COMPLETE
+                    table:
+                        input format: 
org.apache.hadoop.mapred.SequenceFileInputFormat
+                        output format: 
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+                        serde: 
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+  Stage: Stage-2
+    Dependency Collection
+
+  Stage: Stage-0
+    Move Operator
+      tables:
+          replace: false
+          table:
+              input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+              output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+              serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+              name: default.ice_orc_zorder
+
+  Stage: Stage-3
+    Stats Work
+      Basic Stats Work:
+      Column Stats Desc:
+          Columns: id, name, age, city
+          Column Types: int, string, int, string
+          Table: default.ice_orc_zorder
+
+PREHOOK: query: insert into ice_orc_zorder values (4, 'David', 28, 
'Seattle'),(5, 'Eve', 32, 'Boston'),(6, 'Frank', 29, 'Austin'),(7, 'Grace', 32, 
'Denver')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_orc_zorder
+POSTHOOK: query: insert into ice_orc_zorder values (4, 'David', 28, 
'Seattle'),(5, 'Eve', 32, 'Boston'),(6, 'Frank', 29, 'Austin'),(7, 'Grace', 32, 
'Denver')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_orc_zorder
+PREHOOK: query: select * from ice_orc_zorder
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc_zorder
+#### A masked pattern was here ####
+POSTHOOK: query: select * from ice_orc_zorder
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc_zorder
+#### A masked pattern was here ####
+4      David   28      Seattle
+6      Frank   29      Austin
+5      Eve     32      Boston
+7      Grace   32      Denver
+PREHOOK: query: drop table ice_orc_zorder
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@ice_orc_zorder
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ice_orc_zorder
+POSTHOOK: query: drop table ice_orc_zorder
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@ice_orc_zorder
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ice_orc_zorder
diff --git a/itests/src/test/resources/testconfiguration.properties 
b/itests/src/test/resources/testconfiguration.properties
index a6b04b23fa2..d294c1bd26c 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -417,6 +417,7 @@ erasurecoding.only.query.files=\
 
 iceberg.llap.query.files=\
   hadoop_catalog_create_table.q,\
+  iceberg_alter_locally_zordered_table.q,\
   iceberg_bucket_map_join_1.q,\
   iceberg_bucket_map_join_2.q,\
   iceberg_bucket_map_join_3.q,\
@@ -465,6 +466,7 @@ iceberg.llap.query.rest.gravitino.files=\
 
 iceberg.llap.only.query.files=\
   hadoop_catalog_create_table.q,\
+  iceberg_alter_locally_zordered_table.q,\
   iceberg_bucket_map_join_1.q,\
   iceberg_bucket_map_join_2.q,\
   iceberg_bucket_map_join_3.q,\
diff --git 
a/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g 
b/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g
index ed98392d5a5..4c632307375 100644
--- a/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g
+++ b/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g
@@ -85,6 +85,7 @@ alterTableStatementSuffix
     | alterStatementSuffixRenameBranch
     | alterStatementSuffixReplaceBranch
     | alterStatementSuffixReplaceTag
+    | alterStatementSuffixSetWriteOrder
     ;
 
 alterTblPartitionStatementSuffix[boolean partition]
@@ -686,6 +687,13 @@ alterStatementSuffixCreateOrReplaceTag
      -> ^(TOK_ALTERTABLE_CREATE_TAG $tagName KW_REPLACE snapshotIdOfRef? 
refRetain?)
      ;
 
+alterStatementSuffixSetWriteOrder
+@init { gParent.pushMsg("alter table set write order", state); }
+@after { gParent.popMsg(state); }
+     : KW_SET tableWriteLocallyOrderedBy
+     -> ^(TOK_ALTERTABLE_SET_WRITE_ORDER tableWriteLocallyOrderedBy)
+     ;
+
 fileFormat
 @init { gParent.pushMsg("file format specification", state); }
 @after { gParent.popMsg(state); }
diff --git a/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g 
b/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index e12b2e016a2..a5a2d7fc697 100644
--- a/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -231,6 +231,7 @@ TOK_ALTERTABLE_REPLACE_SNAPSHOTREF;
 TOK_RETAIN;
 TOK_WITH_SNAPSHOT_RETENTION;
 TOK_ALTERTABLE_CONVERT;
+TOK_ALTERTABLE_SET_WRITE_ORDER;
 TOK_MSCK;
 TOK_SHOWCATALOGS;
 TOK_SHOWDATABASES;
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderAnalyzer.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderAnalyzer.java
new file mode 100644
index 00000000000..1aade613e77
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderAnalyzer.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.ddl.table.storage.order;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.ql.QueryState;
+import org.apache.hadoop.hive.ql.ddl.DDLUtils;
+import org.apache.hadoop.hive.ql.ddl.DDLWork;
+import org.apache.hadoop.hive.ql.ddl.DDLSemanticAnalyzerFactory.DDLType;
+import org.apache.hadoop.hive.ql.ddl.table.AbstractAlterTableAnalyzer;
+import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+/**
+ * Analyzer for ALTER TABLE ... SET WRITE [LOCALLY] ORDERED BY commands.
+ * Currently supports Z-ORDER only. Regular ORDERED BY support will be added 
in a future commit.
+ */
+@DDLType(types = HiveParser.TOK_ALTERTABLE_SET_WRITE_ORDER)
+public class AlterTableSetWriteOrderAnalyzer extends 
AbstractAlterTableAnalyzer {
+
+  public AlterTableSetWriteOrderAnalyzer(QueryState queryState) throws 
SemanticException {
+    super(queryState);
+  }
+
+  @Override
+  protected void analyzeCommand(TableName tableName, Map<String, String> 
partitionSpec, ASTNode command)
+      throws SemanticException {
+    
+    // Validate if this is an Iceberg table
+    Table table = getTable(tableName);
+    DDLUtils.validateTableIsIceberg(table);
+    
+    ASTNode orderNode = (ASTNode) command.getChild(0);
+    if (orderNode.getType() == HiveParser.TOK_WRITE_LOCALLY_ORDERED_BY_ZORDER) 
{
+      // Handle Z-ORDER
+      ASTNode columnListNode = (ASTNode) orderNode.getChild(0);
+      List<String> columnNames = new ArrayList<>();
+      for (int i = 0; i < columnListNode.getChildCount(); i++) {
+        ASTNode child = (ASTNode) columnListNode.getChild(i);
+        columnNames.add(unescapeIdentifier(child.getText()).toLowerCase());
+      }
+
+      if (columnNames.isEmpty()) {
+        throw new SemanticException("Z-order requires at least one column");
+      }
+
+      // Set Z-order properties in table props sort.order=ZORDER and 
sort.columns=col1,col2,...
+      Map<String, String> props = Map.of(
+          "sort.order", "ZORDER",
+          "sort.columns", String.join(",", columnNames)
+      );
+
+      AlterTableSetWriteOrderDesc desc = new 
AlterTableSetWriteOrderDesc(tableName, partitionSpec, props);
+      addInputsOutputsAlterTable(tableName, partitionSpec, desc, 
desc.getType(), false);
+      
+      rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(), 
desc)));
+    } else if (orderNode.getType() == HiveParser.TOK_WRITE_LOCALLY_ORDERED) {
+      // Regular ORDERED BY - to be implemented in future commit
+      throw new SemanticException("Regular ORDERED BY is not yet supported. 
Only ZORDER is supported.");
+    } else {
+      throw new SemanticException("Unexpected token type: " + 
orderNode.getType());
+    }
+  }
+}
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderDesc.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderDesc.java
new file mode 100644
index 00000000000..2b46feacf84
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderDesc.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.ddl.table.storage.order;
+
+import java.util.Map;
+
+import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.ql.ddl.table.AbstractAlterTableDesc;
+import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.Explain;
+import org.apache.hadoop.hive.ql.plan.Explain.Level;
+
+/**
+ * DDL task description for ALTER TABLE ... SET WRITE [LOCALLY] ORDERED BY 
commands.
+ */
+@Explain(displayName = "Set Write Order", explainLevels = { Level.USER, 
Level.DEFAULT, Level.EXTENDED })
+public class AlterTableSetWriteOrderDesc extends AbstractAlterTableDesc {
+  private static final long serialVersionUID = 1L;
+
+  public AlterTableSetWriteOrderDesc(TableName tableName, Map<String, String> 
partitionSpec,
+      Map<String, String> props) throws SemanticException {
+    super(AlterTableType.ADDPROPS, tableName, partitionSpec, null, false, 
false, props);
+  }
+
+  @Override
+  public boolean mayNeedWriteId() {
+    return false;
+  }
+}
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderOperation.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderOperation.java
new file mode 100644
index 00000000000..a3226459f04
--- /dev/null
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderOperation.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.ddl.table.storage.order;
+
+import org.apache.hadoop.hive.ql.ddl.DDLOperationContext;
+import org.apache.hadoop.hive.ql.ddl.DDLUtils;
+import org.apache.hadoop.hive.ql.ddl.table.AbstractAlterTableOperation;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+/**
+ * Operation process of setting write order properties of a table.
+ */
+public class AlterTableSetWriteOrderOperation extends 
AbstractAlterTableOperation<AlterTableSetWriteOrderDesc> {
+  public AlterTableSetWriteOrderOperation(DDLOperationContext context, 
AlterTableSetWriteOrderDesc desc) {
+    super(context, desc);
+  }
+
+  @Override
+  protected void doAlteration(Table table, Partition partition) throws 
HiveException {
+    // Validate that this is an Iceberg table
+    try {
+      DDLUtils.validateTableIsIceberg(table);
+    } catch (SemanticException e) {
+      throw new HiveException(e);
+    }
+    
+    table.getTTable().getParameters().putAll(desc.getProps());
+  }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java 
b/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java
index 0fcc7e98d14..b2e2e51815a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java
@@ -78,6 +78,8 @@ public enum HiveOperation {
   ALTERTABLE_PROPERTIES("ALTERTABLE_PROPERTIES",
       new int[] {HiveParser.TOK_ALTERTABLE_PROPERTIES, 
HiveParser.TOK_ALTERTABLE_DROPPROPERTIES},
       new Privilege[]{Privilege.ALTER_METADATA}, null),
+  ALTERTABLE_SET_WRITE_ORDER("ALTERTABLE_SET_WRITE_ORDER", 
HiveParser.TOK_ALTERTABLE_SET_WRITE_ORDER,
+      new Privilege[]{Privilege.ALTER_METADATA}, null),
   ALTERTABLE_OWNER("ALTERTABLE_OWNER", HiveParser.TOK_ALTERTABLE_OWNER, null, 
null),
   ALTERTABLE_SETPARTSPEC("ALTERTABLE_SETPARTSPEC", 
HiveParser.TOK_ALTERTABLE_SETPARTSPEC, null, null),
   ALTERTABLE_EXECUTE("ALTERTABLE_EXECUTE", HiveParser.TOK_ALTERTABLE_EXECUTE, 
null, null),
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java
index 8a71a606808..41ee6c85c7c 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java
@@ -68,6 +68,7 @@ public enum HiveOperationType {
   ALTERTABLE_PARTCOLTYPE,
   ALTERTABLE_DROPCONSTRAINT,
   ALTERTABLE_ADDCONSTRAINT,
+  ALTERTABLE_SET_WRITE_ORDER,
   ALTERPARTITION_SERIALIZER,
   ALTERTABLE_SERDEPROPERTIES,
   ALTERPARTITION_SERDEPROPERTIES,
diff --git 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java
 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java
index c828d044f95..fd8e573c0a3 100644
--- 
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java
+++ 
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java
@@ -254,6 +254,8 @@ public HivePrivilegeObjectType getObjectType() {
         PrivRequirement.newIOPrivRequirement(OWNER_PRIV_AR, OWNER_PRIV_AR));
     op2Priv.put(HiveOperationType.ALTERTABLE_DROPTAG,
         PrivRequirement.newIOPrivRequirement(OWNER_PRIV_AR, OWNER_PRIV_AR));
+    op2Priv.put(HiveOperationType.ALTERTABLE_SET_WRITE_ORDER,
+        PrivRequirement.newIOPrivRequirement(OWNER_PRIV_AR, OWNER_PRIV_AR));
 
     // require view ownership for alter/drop view
     op2Priv.put(HiveOperationType.ALTERVIEW_PROPERTIES,

(hive) branch master updated: HIVE-29288: Iceberg: Support Alter table command for Z-ordering (#6184)

Reply via email to