This is an automated email from the ASF dual-hosted git repository.
dkuzmenko pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new a45102243f6 HIVE-29288: Iceberg: Support Alter table command for
Z-ordering (#6184)
a45102243f6 is described below
commit a45102243f61ad1b94ef5d51528ddbe2f3bc82db
Author: kokila-19 <[email protected]>
AuthorDate: Fri Dec 12 23:39:06 2025 +0530
HIVE-29288: Iceberg: Support Alter table command for Z-ordering (#6184)
---
.../iceberg_alter_locally_zordered_table.q | 38 +++
.../iceberg_alter_locally_zordered_table.q.out | 370 +++++++++++++++++++++
.../test/resources/testconfiguration.properties | 2 +
.../hadoop/hive/ql/parse/AlterClauseParser.g | 8 +
.../org/apache/hadoop/hive/ql/parse/HiveParser.g | 1 +
.../order/AlterTableSetWriteOrderAnalyzer.java | 87 +++++
.../storage/order/AlterTableSetWriteOrderDesc.java | 46 +++
.../order/AlterTableSetWriteOrderOperation.java | 48 +++
.../apache/hadoop/hive/ql/plan/HiveOperation.java | 2 +
.../authorization/plugin/HiveOperationType.java | 1 +
.../plugin/sqlstd/Operation2Privilege.java | 2 +
11 files changed, 605 insertions(+)
diff --git
a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_alter_locally_zordered_table.q
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_alter_locally_zordered_table.q
new file mode 100644
index 00000000000..4de93b7f24e
--- /dev/null
+++
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_alter_locally_zordered_table.q
@@ -0,0 +1,38 @@
+-- Mask neededVirtualColumns due to non-strict order
+--! qt:replace:/(\s+neededVirtualColumns:\s)(.*)/$1#Masked#/
+-- Mask the totalSize value as it can have slight variability, causing test
flakiness
+--! qt:replace:/(\s+totalSize\s+)\S+(\s+)/$1#Masked#$2/
+-- Mask random uuid
+--! qt:replace:/(\s+uuid\s+)\S+(\s*)/$1#Masked#$2/
+-- Mask a random snapshot id
+--! qt:replace:/(\s+current-snapshot-id\s+)\S+(\s*)/$1#Masked#/
+-- Mask added file size
+--! qt:replace:/(\S\"added-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/
+-- Mask total file size
+--! qt:replace:/(\S\"total-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/
+-- Mask removed file size
+--! qt:replace:/(\S\"removed-files-size\\\":\\\")(\d+)(\\\")/$1#Masked#$3/
+-- Mask current-snapshot-timestamp-ms
+--! qt:replace:/(\s+current-snapshot-timestamp-ms\s+)\S+(\s*)/$1#Masked#$2/
+--!
qt:replace:/(MAJOR\s+succeeded\s+)[a-zA-Z0-9\-\.\s+]+(\s+manual)/$1#Masked#$2/
+-- Mask iceberg version
+--!
qt:replace:/(\S\"iceberg-version\\\":\\\")(\w+\s\w+\s\d+\.\d+\.\d+\s\(\w+\s\w+\))(\\\")/$1#Masked#$3/
+set hive.vectorized.execution.enabled=true;
+
+-- Test ALTER TABLE SET WRITE [LOCALLY] ORDERED BY ZORDER
+create table ice_orc_zorder (id int, name string, age int, city string) stored
by iceberg stored as orc;
+
+describe formatted ice_orc_zorder;
+
+explain insert into ice_orc_zorder values (1, 'Alice', 30, 'NYC'),(2, 'Bob',
25, 'LA'),(3, 'Charlie', 35, 'SF');
+
+-- Add Z-order via ALTER TABLE command
+alter table ice_orc_zorder set WRITE ORDERED BY ZORDER (id, age);
+
+describe formatted ice_orc_zorder;
+
+explain insert into ice_orc_zorder values (4, 'David', 28, 'Seattle'),(5,
'Eve', 32, 'Boston'),(6, 'Frank', 29, 'Austin'),(7, 'Grace', 32, 'Denver');
+insert into ice_orc_zorder values (4, 'David', 28, 'Seattle'),(5, 'Eve', 32,
'Boston'),(6, 'Frank', 29, 'Austin'),(7, 'Grace', 32, 'Denver');
+select * from ice_orc_zorder;
+
+drop table ice_orc_zorder;
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_alter_locally_zordered_table.q.out
b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_alter_locally_zordered_table.q.out
new file mode 100644
index 00000000000..8896cf716ce
--- /dev/null
+++
b/iceberg/iceberg-handler/src/test/results/positive/llap/iceberg_alter_locally_zordered_table.q.out
@@ -0,0 +1,370 @@
+PREHOOK: query: create table ice_orc_zorder (id int, name string, age int,
city string) stored by iceberg stored as orc
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ice_orc_zorder
+POSTHOOK: query: create table ice_orc_zorder (id int, name string, age int,
city string) stored by iceberg stored as orc
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ice_orc_zorder
+PREHOOK: query: describe formatted ice_orc_zorder
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@ice_orc_zorder
+POSTHOOK: query: describe formatted ice_orc_zorder
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@ice_orc_zorder
+# col_name data_type comment
+id int
+name string
+age int
+city string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: EXTERNAL_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"age\":\"true\",\"city\":\"true\",\"id\":\"true\",\"name\":\"true\"}}
+ EXTERNAL TRUE
+ bucketing_version 2
+ current-schema
{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"age\",\"required\":false,\"type\":\"int\"},{\"id\":4,\"name\":\"city\",\"required\":false,\"type\":\"string\"}]}
+ format-version 2
+ iceberg.orc.files.only true
+#### A masked pattern was here ####
+ numFiles 0
+ numRows 0
+ parquet.compression zstd
+ rawDataSize 0
+ serialization.format 1
+ snapshot-count 0
+ storage_handler
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+ table_type ICEBERG
+ totalSize #Masked#
+#### A masked pattern was here ####
+ uuid #Masked#
+ write.delete.mode merge-on-read
+ write.format.default orc
+ write.merge.mode merge-on-read
+ write.update.mode merge-on-read
+
+# Storage Information
+SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+Compressed: No
+Sort Columns: []
+PREHOOK: query: explain insert into ice_orc_zorder values (1, 'Alice', 30,
'NYC'),(2, 'Bob', 25, 'LA'),(3, 'Charlie', 35, 'SF')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_orc_zorder
+POSTHOOK: query: explain insert into ice_orc_zorder values (1, 'Alice', 30,
'NYC'),(2, 'Bob', 25, 'LA'),(3, 'Charlie', 35, 'SF')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_orc_zorder
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: array(const struct(1,'Alice',30,'NYC'),const
struct(2,'Bob',25,'LA'),const struct(3,'Charlie',35,'SF')) (type:
array<struct<col1:int,col2:string,col3:int,col4:string>>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 64 Basic stats:
COMPLETE Column stats: COMPLETE
+ UDTF Operator
+ Statistics: Num rows: 1 Data size: 64 Basic stats:
COMPLETE Column stats: COMPLETE
+ function name: inline
+ Select Operator
+ expressions: col1 (type: int), col2 (type: string),
col3 (type: int), col4 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+ output format:
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+ serde:
org.apache.iceberg.mr.hive.HiveIcebergSerDe
+ name: default.ice_orc_zorder
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type:
string), _col2 (type: int), _col3 (type: string)
+ outputColumnNames: id, name, age, city
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(id), max(id), count(1),
count(id), compute_bit_vector_hll(id), max(length(name)),
avg(COALESCE(length(name),0)), count(name), compute_bit_vector_hll(name),
min(age), max(age), count(age), compute_bit_vector_hll(age), max(length(city)),
avg(COALESCE(length(city),0)), count(city), compute_bit_vector_hll(city)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3,
_col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,
_col14, _col15, _col16
+ Statistics: Num rows: 1 Data size: 792 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 792 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1
(type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary),
_col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>),
_col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type:
int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14
(type: struct<count:bigint,sum:double,input:int>), _col15 (type: bigint),
_col16 (type: binary)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4),
max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7),
compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10),
count(VALUE._col11), compute_bit_vector_hll(VALUE._col12), max(VALUE._col13),
avg(VALUE._col14), count(VALUE._col15), compute_bit_vector_hll(VALUE._col16)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15,
_col16
+ Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: 'LONG' (type: string), UDFToLong(_col0) (type:
bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint),
COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary),
'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint),
COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint),
COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary),
'LONG' (type: string), UDFToLong(_col9) (typ [...]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15,
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
+ Statistics: Num rows: 1 Data size: 1060 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1060 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+ output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+ serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+ name: default.ice_orc_zorder
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: id, name, age, city
+ Column Types: int, string, int, string
+ Table: default.ice_orc_zorder
+
+PREHOOK: query: alter table ice_orc_zorder set WRITE ORDERED BY ZORDER (id,
age)
+PREHOOK: type: ALTERTABLE_SET_WRITE_ORDER
+PREHOOK: Input: default@ice_orc_zorder
+PREHOOK: Output: default@ice_orc_zorder
+POSTHOOK: query: alter table ice_orc_zorder set WRITE ORDERED BY ZORDER (id,
age)
+POSTHOOK: type: ALTERTABLE_SET_WRITE_ORDER
+POSTHOOK: Input: default@ice_orc_zorder
+POSTHOOK: Output: default@ice_orc_zorder
+PREHOOK: query: describe formatted ice_orc_zorder
+PREHOOK: type: DESCTABLE
+PREHOOK: Input: default@ice_orc_zorder
+POSTHOOK: query: describe formatted ice_orc_zorder
+POSTHOOK: type: DESCTABLE
+POSTHOOK: Input: default@ice_orc_zorder
+# col_name data_type comment
+id int
+name string
+age int
+city string
+
+# Detailed Table Information
+Database: default
+#### A masked pattern was here ####
+Retention: 0
+#### A masked pattern was here ####
+Table Type: EXTERNAL_TABLE
+Table Parameters:
+ COLUMN_STATS_ACCURATE
{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"age\":\"true\",\"city\":\"true\",\"id\":\"true\",\"name\":\"true\"}}
+ EXTERNAL TRUE
+ bucketing_version 2
+ current-schema
{\"type\":\"struct\",\"schema-id\":0,\"fields\":[{\"id\":1,\"name\":\"id\",\"required\":false,\"type\":\"int\"},{\"id\":2,\"name\":\"name\",\"required\":false,\"type\":\"string\"},{\"id\":3,\"name\":\"age\",\"required\":false,\"type\":\"int\"},{\"id\":4,\"name\":\"city\",\"required\":false,\"type\":\"string\"}]}
+ format-version 2
+ iceberg.orc.files.only true
+#### A masked pattern was here ####
+ numFiles 0
+ numRows 0
+ parquet.compression zstd
+#### A masked pattern was here ####
+ rawDataSize 0
+ serialization.format 1
+ snapshot-count 0
+ sort.columns id,age
+ sort.order ZORDER
+ storage_handler
org.apache.iceberg.mr.hive.HiveIcebergStorageHandler
+ table_type ICEBERG
+ totalSize #Masked#
+#### A masked pattern was here ####
+ uuid #Masked#
+ write.delete.mode merge-on-read
+ write.format.default orc
+ write.merge.mode merge-on-read
+ write.update.mode merge-on-read
+
+# Storage Information
+SerDe Library: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+InputFormat: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+OutputFormat: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+Compressed: No
+Sort Columns: []
+PREHOOK: query: explain insert into ice_orc_zorder values (4, 'David', 28,
'Seattle'),(5, 'Eve', 32, 'Boston'),(6, 'Frank', 29, 'Austin'),(7, 'Grace', 32,
'Denver')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_orc_zorder
+POSTHOOK: query: explain insert into ice_orc_zorder values (4, 'David', 28,
'Seattle'),(5, 'Eve', 32, 'Boston'),(6, 'Frank', 29, 'Austin'),(7, 'Grace', 32,
'Denver')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_orc_zorder
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-2 depends on stages: Stage-1
+ Stage-0 depends on stages: Stage-2
+ Stage-3 depends on stages: Stage-0
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (SIMPLE_EDGE)
+ Reducer 3 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: _dummy_table
+ Row Limit Per Split: 1
+ Statistics: Num rows: 1 Data size: 10 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: array(const
struct(4,'David',28,'Seattle'),const struct(5,'Eve',32,'Boston'),const
struct(6,'Frank',29,'Austin'),const struct(7,'Grace',32,'Denver')) (type:
array<struct<col1:int,col2:string,col3:int,col4:string>>)
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 72 Basic stats:
COMPLETE Column stats: COMPLETE
+ UDTF Operator
+ Statistics: Num rows: 1 Data size: 72 Basic stats:
COMPLETE Column stats: COMPLETE
+ function name: inline
+ Select Operator
+ expressions: col1 (type: int), col2 (type: string),
col3 (type: int), col4 (type: string)
+ outputColumnNames: _col0, _col1, _col2, _col3
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ key expressions: iceberg_zorder(_col0, _col2) (type:
binary)
+ null sort order: z
+ sort order: +
+ Map-reduce partition columns: iceberg_zorder(_col0,
_col2) (type: binary)
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1 (type:
string), _col2 (type: int), _col3 (type: string)
+ Select Operator
+ expressions: _col0 (type: int), _col1 (type:
string), _col2 (type: int), _col3 (type: string)
+ outputColumnNames: id, name, age, city
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: min(id), max(id), count(1),
count(id), compute_bit_vector_hll(id), max(length(name)),
avg(COALESCE(length(name),0)), count(name), compute_bit_vector_hll(name),
min(age), max(age), count(age), compute_bit_vector_hll(age), max(length(city)),
avg(COALESCE(length(city),0)), count(city), compute_bit_vector_hll(city)
+ minReductionHashAggr: 0.4
+ mode: hash
+ outputColumnNames: _col0, _col1, _col2, _col3,
_col4, _col5, _col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13,
_col14, _col15, _col16
+ Statistics: Num rows: 1 Data size: 792 Basic
stats: COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 792 Basic
stats: COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: int), _col1
(type: int), _col2 (type: bigint), _col3 (type: bigint), _col4 (type: binary),
_col5 (type: int), _col6 (type: struct<count:bigint,sum:double,input:int>),
_col7 (type: bigint), _col8 (type: binary), _col9 (type: int), _col10 (type:
int), _col11 (type: bigint), _col12 (type: binary), _col13 (type: int), _col14
(type: struct<count:bigint,sum:double,input:int>), _col15 (type: bigint),
_col16 (type: binary)
+ Execution mode: llap
+ LLAP IO: no inputs
+ Reducer 2
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Select Operator
+ expressions: VALUE._col0 (type: int), VALUE._col1 (type:
string), VALUE._col2 (type: int), VALUE._col3 (type: string),
KEY.iceberg_zorder(_col0, _col2) (type: binary)
+ outputColumnNames: _col0, _col1, _col2, _col3,
iceberg_zorder(_col0, _col2)
+ File Output Operator
+ compressed: false
+ Dp Sort State: PARTITION_SORTED
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+ output format:
org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+ serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+ name: default.ice_orc_zorder
+ Reducer 3
+ Execution mode: vectorized, llap
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: min(VALUE._col0), max(VALUE._col1),
count(VALUE._col2), count(VALUE._col3), compute_bit_vector_hll(VALUE._col4),
max(VALUE._col5), avg(VALUE._col6), count(VALUE._col7),
compute_bit_vector_hll(VALUE._col8), min(VALUE._col9), max(VALUE._col10),
count(VALUE._col11), compute_bit_vector_hll(VALUE._col12), max(VALUE._col13),
avg(VALUE._col14), count(VALUE._col15), compute_bit_vector_hll(VALUE._col16)
+ mode: mergepartial
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15,
_col16
+ Statistics: Num rows: 1 Data size: 656 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: 'LONG' (type: string), UDFToLong(_col0) (type:
bigint), UDFToLong(_col1) (type: bigint), (_col2 - _col3) (type: bigint),
COALESCE(ndv_compute_bit_vector(_col4),0) (type: bigint), _col4 (type: binary),
'STRING' (type: string), UDFToLong(COALESCE(_col5,0)) (type: bigint),
COALESCE(_col6,0) (type: double), (_col2 - _col7) (type: bigint),
COALESCE(ndv_compute_bit_vector(_col8),0) (type: bigint), _col8 (type: binary),
'LONG' (type: string), UDFToLong(_col9) (typ [...]
+ outputColumnNames: _col0, _col1, _col2, _col3, _col4, _col5,
_col6, _col7, _col8, _col9, _col10, _col11, _col12, _col13, _col14, _col15,
_col16, _col17, _col18, _col19, _col20, _col21, _col22, _col23
+ Statistics: Num rows: 1 Data size: 1060 Basic stats:
COMPLETE Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 1060 Basic stats:
COMPLETE Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+
+ Stage: Stage-2
+ Dependency Collection
+
+ Stage: Stage-0
+ Move Operator
+ tables:
+ replace: false
+ table:
+ input format: org.apache.iceberg.mr.hive.HiveIcebergInputFormat
+ output format: org.apache.iceberg.mr.hive.HiveIcebergOutputFormat
+ serde: org.apache.iceberg.mr.hive.HiveIcebergSerDe
+ name: default.ice_orc_zorder
+
+ Stage: Stage-3
+ Stats Work
+ Basic Stats Work:
+ Column Stats Desc:
+ Columns: id, name, age, city
+ Column Types: int, string, int, string
+ Table: default.ice_orc_zorder
+
+PREHOOK: query: insert into ice_orc_zorder values (4, 'David', 28,
'Seattle'),(5, 'Eve', 32, 'Boston'),(6, 'Frank', 29, 'Austin'),(7, 'Grace', 32,
'Denver')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@ice_orc_zorder
+POSTHOOK: query: insert into ice_orc_zorder values (4, 'David', 28,
'Seattle'),(5, 'Eve', 32, 'Boston'),(6, 'Frank', 29, 'Austin'),(7, 'Grace', 32,
'Denver')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@ice_orc_zorder
+PREHOOK: query: select * from ice_orc_zorder
+PREHOOK: type: QUERY
+PREHOOK: Input: default@ice_orc_zorder
+#### A masked pattern was here ####
+POSTHOOK: query: select * from ice_orc_zorder
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@ice_orc_zorder
+#### A masked pattern was here ####
+4 David 28 Seattle
+6 Frank 29 Austin
+5 Eve 32 Boston
+7 Grace 32 Denver
+PREHOOK: query: drop table ice_orc_zorder
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@ice_orc_zorder
+PREHOOK: Output: database:default
+PREHOOK: Output: default@ice_orc_zorder
+POSTHOOK: query: drop table ice_orc_zorder
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@ice_orc_zorder
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@ice_orc_zorder
diff --git a/itests/src/test/resources/testconfiguration.properties
b/itests/src/test/resources/testconfiguration.properties
index a6b04b23fa2..d294c1bd26c 100644
--- a/itests/src/test/resources/testconfiguration.properties
+++ b/itests/src/test/resources/testconfiguration.properties
@@ -417,6 +417,7 @@ erasurecoding.only.query.files=\
iceberg.llap.query.files=\
hadoop_catalog_create_table.q,\
+ iceberg_alter_locally_zordered_table.q,\
iceberg_bucket_map_join_1.q,\
iceberg_bucket_map_join_2.q,\
iceberg_bucket_map_join_3.q,\
@@ -465,6 +466,7 @@ iceberg.llap.query.rest.gravitino.files=\
iceberg.llap.only.query.files=\
hadoop_catalog_create_table.q,\
+ iceberg_alter_locally_zordered_table.q,\
iceberg_bucket_map_join_1.q,\
iceberg_bucket_map_join_2.q,\
iceberg_bucket_map_join_3.q,\
diff --git
a/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g
b/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g
index ed98392d5a5..4c632307375 100644
--- a/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g
+++ b/parser/src/java/org/apache/hadoop/hive/ql/parse/AlterClauseParser.g
@@ -85,6 +85,7 @@ alterTableStatementSuffix
| alterStatementSuffixRenameBranch
| alterStatementSuffixReplaceBranch
| alterStatementSuffixReplaceTag
+ | alterStatementSuffixSetWriteOrder
;
alterTblPartitionStatementSuffix[boolean partition]
@@ -686,6 +687,13 @@ alterStatementSuffixCreateOrReplaceTag
-> ^(TOK_ALTERTABLE_CREATE_TAG $tagName KW_REPLACE snapshotIdOfRef?
refRetain?)
;
+alterStatementSuffixSetWriteOrder
+@init { gParent.pushMsg("alter table set write order", state); }
+@after { gParent.popMsg(state); }
+ : KW_SET tableWriteLocallyOrderedBy
+ -> ^(TOK_ALTERTABLE_SET_WRITE_ORDER tableWriteLocallyOrderedBy)
+ ;
+
fileFormat
@init { gParent.pushMsg("file format specification", state); }
@after { gParent.popMsg(state); }
diff --git a/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
b/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
index e12b2e016a2..a5a2d7fc697 100644
--- a/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
+++ b/parser/src/java/org/apache/hadoop/hive/ql/parse/HiveParser.g
@@ -231,6 +231,7 @@ TOK_ALTERTABLE_REPLACE_SNAPSHOTREF;
TOK_RETAIN;
TOK_WITH_SNAPSHOT_RETENTION;
TOK_ALTERTABLE_CONVERT;
+TOK_ALTERTABLE_SET_WRITE_ORDER;
TOK_MSCK;
TOK_SHOWCATALOGS;
TOK_SHOWDATABASES;
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderAnalyzer.java
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderAnalyzer.java
new file mode 100644
index 00000000000..1aade613e77
--- /dev/null
+++
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderAnalyzer.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.ddl.table.storage.order;
+
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Map;
+
+import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.ql.QueryState;
+import org.apache.hadoop.hive.ql.ddl.DDLUtils;
+import org.apache.hadoop.hive.ql.ddl.DDLWork;
+import org.apache.hadoop.hive.ql.ddl.DDLSemanticAnalyzerFactory.DDLType;
+import org.apache.hadoop.hive.ql.ddl.table.AbstractAlterTableAnalyzer;
+import org.apache.hadoop.hive.ql.exec.TaskFactory;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.parse.ASTNode;
+import org.apache.hadoop.hive.ql.parse.HiveParser;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+/**
+ * Analyzer for ALTER TABLE ... SET WRITE [LOCALLY] ORDERED BY commands.
+ * Currently supports Z-ORDER only. Regular ORDERED BY support will be added
in a future commit.
+ */
+@DDLType(types = HiveParser.TOK_ALTERTABLE_SET_WRITE_ORDER)
+public class AlterTableSetWriteOrderAnalyzer extends
AbstractAlterTableAnalyzer {
+
+ public AlterTableSetWriteOrderAnalyzer(QueryState queryState) throws
SemanticException {
+ super(queryState);
+ }
+
+ @Override
+ protected void analyzeCommand(TableName tableName, Map<String, String>
partitionSpec, ASTNode command)
+ throws SemanticException {
+
+ // Validate if this is an Iceberg table
+ Table table = getTable(tableName);
+ DDLUtils.validateTableIsIceberg(table);
+
+ ASTNode orderNode = (ASTNode) command.getChild(0);
+ if (orderNode.getType() == HiveParser.TOK_WRITE_LOCALLY_ORDERED_BY_ZORDER)
{
+ // Handle Z-ORDER
+ ASTNode columnListNode = (ASTNode) orderNode.getChild(0);
+ List<String> columnNames = new ArrayList<>();
+ for (int i = 0; i < columnListNode.getChildCount(); i++) {
+ ASTNode child = (ASTNode) columnListNode.getChild(i);
+ columnNames.add(unescapeIdentifier(child.getText()).toLowerCase());
+ }
+
+ if (columnNames.isEmpty()) {
+ throw new SemanticException("Z-order requires at least one column");
+ }
+
+ // Set Z-order properties in table props sort.order=ZORDER and
sort.columns=col1,col2,...
+ Map<String, String> props = Map.of(
+ "sort.order", "ZORDER",
+ "sort.columns", String.join(",", columnNames)
+ );
+
+ AlterTableSetWriteOrderDesc desc = new
AlterTableSetWriteOrderDesc(tableName, partitionSpec, props);
+ addInputsOutputsAlterTable(tableName, partitionSpec, desc,
desc.getType(), false);
+
+ rootTasks.add(TaskFactory.get(new DDLWork(getInputs(), getOutputs(),
desc)));
+ } else if (orderNode.getType() == HiveParser.TOK_WRITE_LOCALLY_ORDERED) {
+ // Regular ORDERED BY - to be implemented in future commit
+ throw new SemanticException("Regular ORDERED BY is not yet supported.
Only ZORDER is supported.");
+ } else {
+ throw new SemanticException("Unexpected token type: " +
orderNode.getType());
+ }
+ }
+}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderDesc.java
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderDesc.java
new file mode 100644
index 00000000000..2b46feacf84
--- /dev/null
+++
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderDesc.java
@@ -0,0 +1,46 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.ddl.table.storage.order;
+
+import java.util.Map;
+
+import org.apache.hadoop.hive.common.TableName;
+import org.apache.hadoop.hive.ql.ddl.table.AbstractAlterTableDesc;
+import org.apache.hadoop.hive.ql.ddl.table.AlterTableType;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+import org.apache.hadoop.hive.ql.plan.Explain;
+import org.apache.hadoop.hive.ql.plan.Explain.Level;
+
+/**
+ * DDL task description for ALTER TABLE ... SET WRITE [LOCALLY] ORDERED BY
commands.
+ */
+@Explain(displayName = "Set Write Order", explainLevels = { Level.USER,
Level.DEFAULT, Level.EXTENDED })
+public class AlterTableSetWriteOrderDesc extends AbstractAlterTableDesc {
+ private static final long serialVersionUID = 1L;
+
+ public AlterTableSetWriteOrderDesc(TableName tableName, Map<String, String>
partitionSpec,
+ Map<String, String> props) throws SemanticException {
+ super(AlterTableType.ADDPROPS, tableName, partitionSpec, null, false,
false, props);
+ }
+
+ @Override
+ public boolean mayNeedWriteId() {
+ return false;
+ }
+}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderOperation.java
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderOperation.java
new file mode 100644
index 00000000000..a3226459f04
--- /dev/null
+++
b/ql/src/java/org/apache/hadoop/hive/ql/ddl/table/storage/order/AlterTableSetWriteOrderOperation.java
@@ -0,0 +1,48 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hadoop.hive.ql.ddl.table.storage.order;
+
+import org.apache.hadoop.hive.ql.ddl.DDLOperationContext;
+import org.apache.hadoop.hive.ql.ddl.DDLUtils;
+import org.apache.hadoop.hive.ql.ddl.table.AbstractAlterTableOperation;
+import org.apache.hadoop.hive.ql.metadata.HiveException;
+import org.apache.hadoop.hive.ql.metadata.Partition;
+import org.apache.hadoop.hive.ql.metadata.Table;
+import org.apache.hadoop.hive.ql.parse.SemanticException;
+
+/**
+ * Operation process of setting write order properties of a table.
+ */
+public class AlterTableSetWriteOrderOperation extends
AbstractAlterTableOperation<AlterTableSetWriteOrderDesc> {
+ public AlterTableSetWriteOrderOperation(DDLOperationContext context,
AlterTableSetWriteOrderDesc desc) {
+ super(context, desc);
+ }
+
+ @Override
+ protected void doAlteration(Table table, Partition partition) throws
HiveException {
+ // Validate that this is an Iceberg table
+ try {
+ DDLUtils.validateTableIsIceberg(table);
+ } catch (SemanticException e) {
+ throw new HiveException(e);
+ }
+
+ table.getTTable().getParameters().putAll(desc.getProps());
+ }
+}
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java
b/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java
index 0fcc7e98d14..b2e2e51815a 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/plan/HiveOperation.java
@@ -78,6 +78,8 @@ public enum HiveOperation {
ALTERTABLE_PROPERTIES("ALTERTABLE_PROPERTIES",
new int[] {HiveParser.TOK_ALTERTABLE_PROPERTIES,
HiveParser.TOK_ALTERTABLE_DROPPROPERTIES},
new Privilege[]{Privilege.ALTER_METADATA}, null),
+ ALTERTABLE_SET_WRITE_ORDER("ALTERTABLE_SET_WRITE_ORDER",
HiveParser.TOK_ALTERTABLE_SET_WRITE_ORDER,
+ new Privilege[]{Privilege.ALTER_METADATA}, null),
ALTERTABLE_OWNER("ALTERTABLE_OWNER", HiveParser.TOK_ALTERTABLE_OWNER, null,
null),
ALTERTABLE_SETPARTSPEC("ALTERTABLE_SETPARTSPEC",
HiveParser.TOK_ALTERTABLE_SETPARTSPEC, null, null),
ALTERTABLE_EXECUTE("ALTERTABLE_EXECUTE", HiveParser.TOK_ALTERTABLE_EXECUTE,
null, null),
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java
index 8a71a606808..41ee6c85c7c 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/HiveOperationType.java
@@ -68,6 +68,7 @@ public enum HiveOperationType {
ALTERTABLE_PARTCOLTYPE,
ALTERTABLE_DROPCONSTRAINT,
ALTERTABLE_ADDCONSTRAINT,
+ ALTERTABLE_SET_WRITE_ORDER,
ALTERPARTITION_SERIALIZER,
ALTERTABLE_SERDEPROPERTIES,
ALTERPARTITION_SERDEPROPERTIES,
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java
index c828d044f95..fd8e573c0a3 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/security/authorization/plugin/sqlstd/Operation2Privilege.java
@@ -254,6 +254,8 @@ public HivePrivilegeObjectType getObjectType() {
PrivRequirement.newIOPrivRequirement(OWNER_PRIV_AR, OWNER_PRIV_AR));
op2Priv.put(HiveOperationType.ALTERTABLE_DROPTAG,
PrivRequirement.newIOPrivRequirement(OWNER_PRIV_AR, OWNER_PRIV_AR));
+ op2Priv.put(HiveOperationType.ALTERTABLE_SET_WRITE_ORDER,
+ PrivRequirement.newIOPrivRequirement(OWNER_PRIV_AR, OWNER_PRIV_AR));
// require view ownership for alter/drop view
op2Priv.put(HiveOperationType.ALTERVIEW_PROPERTIES,