[
https://issues.apache.org/jira/browse/HIVE-29522?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
]
Venugopal Reddy K updated HIVE-29522:
-------------------------------------
Description:
*[Description]*
When the configuration property {{hive.acid.createtable.softdelete}} is set to
true, deletion of ACID table data is performed asynchronously by a background
cleaner thread.
As part of this process, the cleaner thread removes the corresponding entry
from the {{COMPACTION_QUEUE}} and records a completion entry in the
{{COMPLETED_COMPACTIONS}} table using
{{{}org.apache.hadoop.hive.metastore.txn.TxnStore#markCleaned(){}}}.
However, by the time this operation is executed, the associated ACID table has
already been deleted (soft-deleted). As a result, a {{COMPLETED_COMPACTIONS}}
entry is created for a table that no longer exists.
*[Steps to reproduce]*
0: jdbc:hive2://localhost:10000> create database mydb;
0: jdbc:hive2://localhost:10000> use mydb;
0: jdbc:hive2://localhost:10000> set hive.acid.createtable.softdelete=true;
0: jdbc:hive2://localhost:10000> create table t1 (i int);
0: jdbc:hive2://localhost:10000> desc formatted t1;
{+}------------------------------{-}{-}{+}-------------------------------------------------{-}++{-}---------------------------------------------------
| col_name | data_type
| comment |
{+}------------------------------{-}{-}{+}-------------------------------------------------{-}++{-}---------------------------------------------------
|i |int
| |
| |NULL
|NULL |
| # Detailed Table Information |NULL
|NULL |
|Database: |mydb
|NULL |
|OwnerType: |USER
|NULL |
|Owner: |hive
|NULL |
|CreateTime: |Tue Mar 24 01:16:56 IST 2026
|NULL |
|LastAccessTime: |UNKNOWN
|NULL |
|Retention: |0
|NULL |
|Location:
|[file:/tmp/warehouse/managed/mydb.db/t1.v0000001|file:///tmp/warehouse/managed/mydb.db/t1.v0000001]
|NULL |
|Table Type: |MANAGED_TABLE
|NULL |
|Table Parameters: |NULL
|NULL |
| |COLUMN_STATS_ACCURATE
|{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":\{\"i\":\"true\"}}|
| |bucketing_version
|2 |
| |numFiles
|0 |
| |numRows
|0 |
| |rawDataSize
|0 |
| |soft_delete
|true |
| |totalSize
|0 |
| |transactional
|true |
| |transactional_properties
|default |
| |transient_lastDdlTime
|1774295216 |
| |NULL
|NULL |
| # Storage Information |NULL
|NULL |
|SerDe Library: |org.apache.hadoop.hive.ql.io.orc.OrcSerde
|NULL |
|InputFormat: |org.apache.hadoop.hive.ql.io.orc.OrcInputFormat
|NULL |
|OutputFormat: |org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat
|NULL |
|Compressed: |No
|NULL |
|Num Buckets: |-1
|NULL |
|Bucket Columns: |[]
|NULL |
|Sort Columns: |[]
|NULL |
{+}------------------------------{-}{-}{+}-------------------------------------------------{-}++{-}---------------------------------------------------
0: jdbc:hive2://localhost:10000> insert into t1 values (1);
0: jdbc:hive2://localhost:10000> drop table t1;
0: jdbc:hive2://localhost:10000> show compactions; # entry shows data deletion
to happen(ready for cleaning)
{+}--------------{-}{-}{+}-------{-}++{-}--------{-}{-}---------{-}++{-}------{-}{-}-------------------{-}++{-}-----------{-}{-}---------{-}++{-}--------------{-}{-}----------{-}++{-}---------{-}{-}------------{-}++{-}-------------{-}{-}--------------{-}++{-}------------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}--------------{-}{-}-----------------+
|compactionid |dbname |tabname |partname | type | state
|workerhost |workerid | enqueuetime |starttime |duration |hadoopjobid
|errormessage |initiatorhost |initiatorid |poolname |txnid |nexttxnid |
committime |hightestwriteid |
{+}--------------{-}{-}{+}-------{-}++{-}--------{-}{-}---------{-}++{-}------{-}{-}-------------------{-}++{-}-----------{-}{-}---------{-}++{-}--------------{-}{-}----------{-}++{-}---------{-}{-}------------{-}++{-}-------------{-}{-}--------------{-}++{-}------------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}--------------{-}{-}-----------------+
|1 |mydb |t1 | --- |MAJOR |ready for cleaning | ---
| --- |1774296131593 | — | --- |None | --- |
— | — |default |4 |4 |1774296131612 | —
|
{+}--------------{-}{-}{+}-------{-}++{-}--------{-}{-}---------{-}++{-}------{-}{-}-------------------{-}++{-}-----------{-}{-}---------{-}++{-}--------------{-}{-}----------{-}++{-}---------{-}{-}------------{-}++{-}-------------{-}{-}--------------{-}++{-}------------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}--------------{-}{-}-----------------+
0: jdbc:hive2://localhost:10000> show compactions; #wait for data deletion to
complete in cleaner. And then check show compactions again. Entry exists for
table that has already been deleted.
{+}--------------{-}{-}{+}-------{-}++{-}--------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}-----------{-}{-}---------{-}++{-}--------------{-}{-}----------{-}++{-}--------------{-}{-}------------{-}++{-}-------------{-}{-}--------------{-}++{-}------------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}--------------{-}{-}-----------------+
|compactionid |dbname |tabname |partname | type | state |workerhost
|workerid | enqueuetime |starttime | duration |hadoopjobid |errormessage
|initiatorhost |initiatorid |poolname |txnid |nexttxnid | committime
|hightestwriteid |
{+}--------------{-}{-}{+}-------{-}++{-}--------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}-----------{-}{-}---------{-}++{-}--------------{-}{-}----------{-}++{-}--------------{-}{-}------------{-}++{-}-------------{-}{-}--------------{-}++{-}------------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}--------------{-}{-}-----------------+
|1 |mydb |t1 | --- |MAJOR |succeeded | --- | ---
|1774296131593 | — |1774296168263 |None | --- | —
| — |default |4 |4 |1774296131612 | — |
{+}--------------{-}{-}{+}-------{-}++{-}--------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}-----------{-}{-}---------{-}++{-}--------------{-}{-}----------{-}++{-}--------------{-}{-}------------{-}++{-}-------------{-}{-}--------------{-}++{-}------------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}--------------{-}{-}-----------------+
was:
*[Description]*
When the configuration property {{hive.acid.createtable.softdelete}} is set to
{{true, }}deletion of ACID table data is performed asynchronously by a
background cleaner thread.
As part of this process, the cleaner thread removes the corresponding entry
from the {{COMPACTION_QUEUE}} and records a completion entry in the
{{COMPLETED_COMPACTIONS}} table using
{{{}org.apache.hadoop.hive.metastore.txn.TxnStore#markCleaned(){}}}.
However, by the time this operation is executed, the associated ACID table has
already been deleted (soft-deleted). As a result, a {{COMPLETED_COMPACTIONS}}
entry is created for a table that no longer exists.
*[Steps to reproduce]*
0: jdbc:hive2://localhost:10000> create database mydb;
0: jdbc:hive2://localhost:10000> use mydb;
0: jdbc:hive2://localhost:10000> set hive.acid.createtable.softdelete=true;
0: jdbc:hive2://localhost:10000> create table t1 (i int);
0: jdbc:hive2://localhost:10000> desc formatted t1;
+-------------------------------+---------------------------------------------------+----------------------------------------------------+
| col_name | data_type
| comment |
+-------------------------------+---------------------------------------------------+----------------------------------------------------+
| i | int
| |
| | NULL
| NULL |
| # Detailed Table Information | NULL
| NULL |
| Database: | mydb
| NULL |
| OwnerType: | USER
| NULL |
| Owner: | hive
| NULL |
| CreateTime: | Tue Mar 24 01:16:56 IST 2026
| NULL |
| LastAccessTime: | UNKNOWN
| NULL |
| Retention: | 0
| NULL |
| Location: |
file:/tmp/warehouse/managed/mydb.db/t1.v0000001 | NULL
|
| Table Type: | MANAGED_TABLE
| NULL |
| Table Parameters: | NULL
| NULL |
| | COLUMN_STATS_ACCURATE
| \{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":{\"i\":\"true\"}} |
| | bucketing_version
| 2 |
| | numFiles
| 0 |
| | numRows
| 0 |
| | rawDataSize
| 0 |
| | soft_delete
| true |
| | totalSize
| 0 |
| | transactional
| true |
| | transactional_properties
| default |
| | transient_lastDdlTime
| 1774295216 |
| | NULL
| NULL |
| # Storage Information | NULL
| NULL |
| SerDe Library: | org.apache.hadoop.hive.ql.io.orc.OrcSerde
| NULL |
| InputFormat: |
org.apache.hadoop.hive.ql.io.orc.OrcInputFormat | NULL
|
| OutputFormat: |
org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat | NULL
|
| Compressed: | No
| NULL |
| Num Buckets: | -1
| NULL |
| Bucket Columns: | []
| NULL |
| Sort Columns: | []
| NULL |
+-------------------------------+---------------------------------------------------+----------------------------------------------------+
0: jdbc:hive2://localhost:10000> insert into t1 values (1);
0: jdbc:hive2://localhost:10000> drop table t1;
0: jdbc:hive2://localhost:10000> show compactions; # entry shows data deletion
to happen(ready for cleaning)
+---------------+---------+----------+-----------+--------+---------------------+-------------+-----------+----------------+------------+-----------+--------------+---------------+----------------+--------------+-----------+--------+------------+----------------+------------------+
| compactionid | dbname | tabname | partname | type | state
| workerhost | workerid | enqueuetime | starttime | duration |
hadoopjobid | errormessage | initiatorhost | initiatorid | poolname |
txnid | nexttxnid | committime | hightestwriteid |
+---------------+---------+----------+-----------+--------+---------------------+-------------+-----------+----------------+------------+-----------+--------------+---------------+----------------+--------------+-----------+--------+------------+----------------+------------------+
| 1 | mydb | t1 | --- | MAJOR | ready for cleaning
| --- | --- | 1774296131593 | --- | --- | None
| --- | --- | --- | default | 4 | 4
| 1774296131612 | --- |
+---------------+---------+----------+-----------+--------+---------------------+-------------+-----------+----------------+------------+-----------+--------------+---------------+----------------+--------------+-----------+--------+------------+----------------+------------------+
0: jdbc:hive2://localhost:10000> show compactions; #wait for data deletion to
complete in cleaner. And then check show compactions again. Entry exists for
table that has already been deleted.
+---------------+---------+----------+-----------+--------+------------+-------------+-----------+----------------+------------+----------------+--------------+---------------+----------------+--------------+-----------+--------+------------+----------------+------------------+
| compactionid | dbname | tabname | partname | type | state |
workerhost | workerid | enqueuetime | starttime | duration |
hadoopjobid | errormessage | initiatorhost | initiatorid | poolname |
txnid | nexttxnid | committime | hightestwriteid |
+---------------+---------+----------+-----------+--------+------------+-------------+-----------+----------------+------------+----------------+--------------+---------------+----------------+--------------+-----------+--------+------------+----------------+------------------+
| 1 | mydb | t1 | --- | MAJOR | succeeded | ---
| --- | 1774296131593 | --- | 1774296168263 | None
| --- | --- | --- | default | 4 | 4
| 1774296131612 | --- |
+---------------+---------+----------+-----------+--------+------------+-------------+-----------+----------------+------------+----------------+--------------+---------------+----------------+--------------+-----------+--------+------------+----------------+------------------+
> Cleaner thread creates stale COMPLETED_COMPACTIONS entries for soft-deleted
> ACID tables
> ---------------------------------------------------------------------------------------
>
> Key: HIVE-29522
> URL: https://issues.apache.org/jira/browse/HIVE-29522
> Project: Hive
> Issue Type: Bug
> Components: Metastore, Standalone Metastore
> Reporter: Venugopal Reddy K
> Assignee: Venugopal Reddy K
> Priority: Major
>
> *[Description]*
> When the configuration property {{hive.acid.createtable.softdelete}} is set
> to true, deletion of ACID table data is performed asynchronously by a
> background cleaner thread.
> As part of this process, the cleaner thread removes the corresponding entry
> from the {{COMPACTION_QUEUE}} and records a completion entry in the
> {{COMPLETED_COMPACTIONS}} table using
> {{{}org.apache.hadoop.hive.metastore.txn.TxnStore#markCleaned(){}}}.
> However, by the time this operation is executed, the associated ACID table
> has already been deleted (soft-deleted). As a result, a
> {{COMPLETED_COMPACTIONS}} entry is created for a table that no longer exists.
> *[Steps to reproduce]*
> 0: jdbc:hive2://localhost:10000> create database mydb;
> 0: jdbc:hive2://localhost:10000> use mydb;
> 0: jdbc:hive2://localhost:10000> set hive.acid.createtable.softdelete=true;
> 0: jdbc:hive2://localhost:10000> create table t1 (i int);
> 0: jdbc:hive2://localhost:10000> desc formatted t1;
> {+}------------------------------{-}{-}{+}-------------------------------------------------{-}++{-}---------------------------------------------------
> | col_name | data_type
> | comment |
> {+}------------------------------{-}{-}{+}-------------------------------------------------{-}++{-}---------------------------------------------------
> |i |int
> | |
> | |NULL
> |NULL |
> | # Detailed Table Information |NULL
> |NULL |
> |Database: |mydb
> |NULL |
> |OwnerType: |USER
> |NULL |
> |Owner: |hive
> |NULL |
> |CreateTime: |Tue Mar 24 01:16:56 IST 2026
> |NULL |
> |LastAccessTime: |UNKNOWN
> |NULL |
> |Retention: |0
> |NULL |
> |Location:
> |[file:/tmp/warehouse/managed/mydb.db/t1.v0000001|file:///tmp/warehouse/managed/mydb.db/t1.v0000001]
> |NULL |
> |Table Type: |MANAGED_TABLE
> |NULL |
> |Table Parameters: |NULL
> |NULL |
> | |COLUMN_STATS_ACCURATE
> |{\"BASIC_STATS\":\"true\",\"COLUMN_STATS\":\{\"i\":\"true\"}}|
> | |bucketing_version
> |2 |
> | |numFiles
> |0 |
> | |numRows
> |0 |
> | |rawDataSize
> |0 |
> | |soft_delete
> |true |
> | |totalSize
> |0 |
> | |transactional
> |true |
> | |transactional_properties
> |default |
> | |transient_lastDdlTime
> |1774295216 |
> | |NULL
> |NULL |
> | # Storage Information |NULL
> |NULL |
> |SerDe Library: |org.apache.hadoop.hive.ql.io.orc.OrcSerde
> |NULL |
> |InputFormat:
> |org.apache.hadoop.hive.ql.io.orc.OrcInputFormat |NULL
> |
> |OutputFormat:
> |org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat |NULL
> |
> |Compressed: |No
> |NULL |
> |Num Buckets: |-1
> |NULL |
> |Bucket Columns: |[]
> |NULL |
> |Sort Columns: |[]
> |NULL |
> {+}------------------------------{-}{-}{+}-------------------------------------------------{-}++{-}---------------------------------------------------
> 0: jdbc:hive2://localhost:10000> insert into t1 values (1);
> 0: jdbc:hive2://localhost:10000> drop table t1;
> 0: jdbc:hive2://localhost:10000> show compactions; # entry shows data
> deletion to happen(ready for cleaning)
> {+}--------------{-}{-}{+}-------{-}++{-}--------{-}{-}---------{-}++{-}------{-}{-}-------------------{-}++{-}-----------{-}{-}---------{-}++{-}--------------{-}{-}----------{-}++{-}---------{-}{-}------------{-}++{-}-------------{-}{-}--------------{-}++{-}------------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}--------------{-}{-}-----------------+
> |compactionid |dbname |tabname |partname | type | state
> |workerhost |workerid | enqueuetime |starttime |duration |hadoopjobid
> |errormessage |initiatorhost |initiatorid |poolname |txnid |nexttxnid |
> committime |hightestwriteid |
> {+}--------------{-}{-}{+}-------{-}++{-}--------{-}{-}---------{-}++{-}------{-}{-}-------------------{-}++{-}-----------{-}{-}---------{-}++{-}--------------{-}{-}----------{-}++{-}---------{-}{-}------------{-}++{-}-------------{-}{-}--------------{-}++{-}------------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}--------------{-}{-}-----------------+
> |1 |mydb |t1 | --- |MAJOR |ready for cleaning | ---
> | --- |1774296131593 | — | --- |None | ---
> | — | — |default |4 |4 |1774296131612 | —
> |
> {+}--------------{-}{-}{+}-------{-}++{-}--------{-}{-}---------{-}++{-}------{-}{-}-------------------{-}++{-}-----------{-}{-}---------{-}++{-}--------------{-}{-}----------{-}++{-}---------{-}{-}------------{-}++{-}-------------{-}{-}--------------{-}++{-}------------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}--------------{-}{-}-----------------+
> 0: jdbc:hive2://localhost:10000> show compactions; #wait for data deletion to
> complete in cleaner. And then check show compactions again. Entry exists for
> table that has already been deleted.
> {+}--------------{-}{-}{+}-------{-}++{-}--------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}-----------{-}{-}---------{-}++{-}--------------{-}{-}----------{-}++{-}--------------{-}{-}------------{-}++{-}-------------{-}{-}--------------{-}++{-}------------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}--------------{-}{-}-----------------+
> |compactionid |dbname |tabname |partname | type | state |workerhost
> |workerid | enqueuetime |starttime | duration |hadoopjobid
> |errormessage |initiatorhost |initiatorid |poolname |txnid |nexttxnid |
> committime |hightestwriteid |
> {+}--------------{-}{-}{+}-------{-}++{-}--------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}-----------{-}{-}---------{-}++{-}--------------{-}{-}----------{-}++{-}--------------{-}{-}------------{-}++{-}-------------{-}{-}--------------{-}++{-}------------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}--------------{-}{-}-----------------+
> |1 |mydb |t1 | --- |MAJOR |succeeded | --- |
> --- |1774296131593 | — |1774296168263 |None | --- |
> — | — |default |4 |4 |1774296131612 | —
> |
> {+}--------------{-}{-}{+}-------{-}++{-}--------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}-----------{-}{-}---------{-}++{-}--------------{-}{-}----------{-}++{-}--------------{-}{-}------------{-}++{-}-------------{-}{-}--------------{-}++{-}------------{-}{-}---------{-}++{-}------{-}{-}----------{-}++{-}--------------{-}{-}-----------------+
--
This message was sent by Atlassian Jira
(v8.20.10#820010)