yongzhi.shao created HIVE-28277:
-----------------------------------
Summary: HIVE does not support update operations for ICEBERG of
type location_based_table.
Key: HIVE-28277
URL: https://issues.apache.org/jira/browse/HIVE-28277
Project: Hive
Issue Type: Improvement
Components: Iceberg integration
Affects Versions: 4.0.0
Environment: ICEBERG:1.5.2
HIVE 4.0.0
Reporter: yongzhi.shao
Currently, when I update the location_based_table using hive, hive incorrectly
empties all data directories and metadata directories.
{code:java}
--spark:
CREATE TABLE IF NOT EXISTS datacenter.default.test_data_04 (
id string,name string
)
using iceberg
PARTITIONED BY (name)
TBLPROPERTIES
('read.orc.vectorization.enabled'='true','write.format.default'='orc','write.orc.bloom.filter.columns'='id','write.orc.compression-codec'='zstd','write.metadata.previous-versions-max'='3','write.metadata.delete-after-commit.enabled'='true');
insert into datacenter.default.test_data_04(id,name) values('1','a'),('2','b');
--hive4:
CREATE EXTERNAL TABLE default.test_data_04
STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
LOCATION 'hdfs://xxxx/iceberg-catalog/warehouse/default/test_data_04'
TBLPROPERTIES
('iceberg.catalog'='location_based_table','engine.hive.enabled'='true');
select distinct id,name from (select id,name from default.test_data_04 limit
10) s1; --2 row
update test_data_04 set name = 'adasd' where id = '1';
ERROR:
2024-05-23T10:26:32,028 ERROR [HiveServer2-Background-Pool: Thread-297]
hive.HiveIcebergStorageHandler: Error while trying to commit job:
job_17061635207991_169536, job_17061635207990_169536,
job_17061635207992_169536, starting rollback changes for table:
default.test_data_04
org.apache.iceberg.exceptions.NoSuchTableException: Table does not exist at
location: /iceberg-catalog/warehouse/default/test_data_04
BEFORE UPDATE:
ICEBERG TABLE DIR:
[root@xxxx ~]# hdfs dfs -ls /iceberg-catalog/warehouse/default/test_data_04
Found 2 items
drwxr-xr-x - hive hdfs 0 2024-05-23 09:26
/iceberg-catalog/warehouse/default/test_data_04/data
drwxr-xr-x - hive hdfs 0 2024-05-23 09:26
/iceberg-catalog/warehouse/default/test_data_04/metadata
AFTER UPDATE:
ICEBERG TABLE DIR:
[root@XXX ~]# hdfs dfs -ls /iceberg-catalog/warehouse/default/test_data_04
Found 3 items
drwxr-xr-x - hive hdfs 0 2024-05-23 10:26
/iceberg-catalog/warehouse/default/test_data_04/-tmp.HIVE_UNION_SUBDIR_1
drwxr-xr-x - hive hdfs 0 2024-05-23 10:26
/iceberg-catalog/warehouse/default/test_data_04/-tmp.HIVE_UNION_SUBDIR_2
drwxr-xr-x - hive hdfs 0 2024-05-23 10:26
/iceberg-catalog/warehouse/default/test_data_04/-tmp.HIVE_UNION_SUBDIR_3
{code}
--
This message was sent by Atlassian Jira
(v8.20.10#820010)