This is an automated email from the ASF dual-hosted git repository.
deniskuzZ pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/hive.git
The following commit(s) were added to refs/heads/master by this push:
new d2d7dd2274c HIVE-27190: Fix cache-key collisions for time-travel
queries on Iceberg (#6380)
d2d7dd2274c is described below
commit d2d7dd2274c9ae08d7cca5905d5f875929fd7bec
Author: Denys Kuzmenko <[email protected]>
AuthorDate: Tue May 26 08:29:48 2026 +0300
HIVE-27190: Fix cache-key collisions for time-travel queries on Iceberg
(#6380)
---
.../positive/iceberg_partition_pruner_cache_key.q | 50 +++
.../positive/iceberg_metadata_table_alias.q.out | 2 +-
.../iceberg_partition_pruner_cache_key.q.out | 386 +++++++++++++++++++++
.../org/apache/hadoop/hive/ql/metadata/Table.java | 7 +
.../hive/ql/optimizer/SharedWorkOptimizer.java | 10 +-
.../hive/ql/optimizer/calcite/RelOptHiveTable.java | 67 ++--
.../hive/ql/optimizer/ppr/PartitionPruner.java | 10 +-
.../hadoop/hive/ql/parse/CalcitePlanner.java | 7 +-
.../apache/hadoop/hive/ql/parse/ParseContext.java | 9 +-
.../hadoop/hive/ql/parse/PrunedPartitionList.java | 10 +-
10 files changed, 499 insertions(+), 59 deletions(-)
diff --git
a/iceberg/iceberg-handler/src/test/queries/positive/iceberg_partition_pruner_cache_key.q
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_partition_pruner_cache_key.q
new file mode 100644
index 00000000000..4ec645616b1
--- /dev/null
+++
b/iceberg/iceberg-handler/src/test/queries/positive/iceberg_partition_pruner_cache_key.q
@@ -0,0 +1,50 @@
+set hive.fetch.task.conversion=none;
+set hive.explain.user=false;
+
+create external table tbl_ice_pp_key(a int, b string)
+ partitioned by spec (a) stored by iceberg;
+
+insert into tbl_ice_pp_key values (1, 'one'), (2, 'two');
+alter table tbl_ice_pp_key create tag s1;
+
+insert into tbl_ice_pp_key values
+ (3, 'three'), (4, 'four'), (5, 'five'),
+ (6, 'six'), (7, 'seven'),(8, 'eight'),
+ (9, 'nine'), (10, 'ten');
+
+explain select count(*) from tbl_ice_pp_key;
+select count(*) from tbl_ice_pp_key;
+
+explain select count(*) from tbl_ice_pp_key for system_version as of 's1';
+select count(*) from tbl_ice_pp_key for system_version as of 's1';
+
+explain
+select cur.cnt as cur_cnt, snap.cnt as snap_cnt
+from (
+ select count(*) as cnt from tbl_ice_pp_key
+) cur
+cross join (
+ select count(*) as cnt from tbl_ice_pp_key for system_version as of 's1'
+) snap;
+
+select cur.cnt as cur_cnt, snap.cnt as snap_cnt
+from (
+ select count(*) as cnt from tbl_ice_pp_key
+) cur
+cross join (
+ select count(*) as cnt from tbl_ice_pp_key for system_version as of 's1'
+) snap;
+
+-- with a partition predicate
+explain
+select 'current' as ver, count(*) as cnt from tbl_ice_pp_key where a > 2
+union all
+select 'asof_s1' as ver, count(*) as cnt from tbl_ice_pp_key for
system_version as of 's1'
+where a > 2;
+
+select 'current' as ver, count(*) as cnt from tbl_ice_pp_key where a > 2
+union all
+select 'asof_s1' as ver, count(*) as cnt from tbl_ice_pp_key for
system_version as of 's1'
+where a > 2;
+
+drop table tbl_ice_pp_key;
\ No newline at end of file
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/iceberg_metadata_table_alias.q.out
b/iceberg/iceberg-handler/src/test/results/positive/iceberg_metadata_table_alias.q.out
index b14d10551b0..f574825d2bd 100644
---
a/iceberg/iceberg-handler/src/test/results/positive/iceberg_metadata_table_alias.q.out
+++
b/iceberg/iceberg-handler/src/test/results/positive/iceberg_metadata_table_alias.q.out
@@ -23,7 +23,7 @@ POSTHOOK: type: QUERY
POSTHOOK: Input: default@ice_t
POSTHOOK: Output: hdfs://### HDFS PATH ###
OPTIMIZED SQL: SELECT `committed_at` AS `ice_t.snapshots.committed_at`,
`snapshot_id` AS `ice_t.snapshots.snapshot_id`, `parent_id` AS
`ice_t.snapshots.parent_id`, `operation` AS `ice_t.snapshots.operation`,
`manifest_list` AS `ice_t.snapshots.manifest_list`, `summary` AS
`ice_t.snapshots.summary`
-FROM `default`.`ice_t`
+FROM `default`.`ice_t`.`snapshots`
STAGE DEPENDENCIES:
Stage-0 is a root stage
diff --git
a/iceberg/iceberg-handler/src/test/results/positive/iceberg_partition_pruner_cache_key.q.out
b/iceberg/iceberg-handler/src/test/results/positive/iceberg_partition_pruner_cache_key.q.out
new file mode 100644
index 00000000000..b87b1a62286
--- /dev/null
+++
b/iceberg/iceberg-handler/src/test/results/positive/iceberg_partition_pruner_cache_key.q.out
@@ -0,0 +1,386 @@
+PREHOOK: query: create external table tbl_ice_pp_key(a int, b string)
+ partitioned by spec (a) stored by iceberg
+PREHOOK: type: CREATETABLE
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice_pp_key
+POSTHOOK: query: create external table tbl_ice_pp_key(a int, b string)
+ partitioned by spec (a) stored by iceberg
+POSTHOOK: type: CREATETABLE
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice_pp_key
+PREHOOK: query: insert into tbl_ice_pp_key values (1, 'one'), (2, 'two')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice_pp_key
+POSTHOOK: query: insert into tbl_ice_pp_key values (1, 'one'), (2, 'two')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice_pp_key
+PREHOOK: query: alter table tbl_ice_pp_key create tag s1
+PREHOOK: type: ALTERTABLE_CREATETAG
+PREHOOK: Input: default@tbl_ice_pp_key
+POSTHOOK: query: alter table tbl_ice_pp_key create tag s1
+POSTHOOK: type: ALTERTABLE_CREATETAG
+POSTHOOK: Input: default@tbl_ice_pp_key
+PREHOOK: query: insert into tbl_ice_pp_key values
+ (3, 'three'), (4, 'four'), (5, 'five'),
+ (6, 'six'), (7, 'seven'),(8, 'eight'),
+ (9, 'nine'), (10, 'ten')
+PREHOOK: type: QUERY
+PREHOOK: Input: _dummy_database@_dummy_table
+PREHOOK: Output: default@tbl_ice_pp_key
+POSTHOOK: query: insert into tbl_ice_pp_key values
+ (3, 'three'), (4, 'four'), (5, 'five'),
+ (6, 'six'), (7, 'seven'),(8, 'eight'),
+ (9, 'nine'), (10, 'ten')
+POSTHOOK: type: QUERY
+POSTHOOK: Input: _dummy_database@_dummy_table
+POSTHOOK: Output: default@tbl_ice_pp_key
+PREHOOK: query: explain select count(*) from tbl_ice_pp_key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_pp_key
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select count(*) from tbl_ice_pp_key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_pp_key
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tbl_ice_pp_key
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_pp_key
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice_pp_key
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_pp_key
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+10
+PREHOOK: query: explain select count(*) from tbl_ice_pp_key for system_version
as of 's1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_pp_key
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain select count(*) from tbl_ice_pp_key for
system_version as of 's1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_pp_key
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+ Stage-0 is a root stage
+
+STAGE PLANS:
+ Stage: Stage-0
+ Fetch Operator
+ limit: 1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select count(*) from tbl_ice_pp_key for system_version as of
's1'
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_pp_key
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select count(*) from tbl_ice_pp_key for system_version as of
's1'
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_pp_key
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+2
+Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage
'Reducer 3' is a cross product
+PREHOOK: query: explain
+select cur.cnt as cur_cnt, snap.cnt as snap_cnt
+from (
+ select count(*) as cnt from tbl_ice_pp_key
+) cur
+cross join (
+ select count(*) as cnt from tbl_ice_pp_key for system_version as of 's1'
+) snap
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_pp_key
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain
+select cur.cnt as cur_cnt, snap.cnt as snap_cnt
+from (
+ select count(*) as cnt from tbl_ice_pp_key
+) cur
+cross join (
+ select count(*) as cnt from tbl_ice_pp_key for system_version as of 's1'
+) snap
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_pp_key
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE)
+ Reducer 3 <- Reducer 2 (XPROD_EDGE), Reducer 5 (XPROD_EDGE)
+ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tbl_ice_pp_key
+ Statistics: Num rows: 10 Data size: 63810 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 10 Data size: 63810 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ minReductionHashAggr: 0.9
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: tbl_ice_pp_key
+ As of version: s1
+ Statistics: Num rows: 2 Data size: 12636 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 2 Data size: 12636 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ minReductionHashAggr: 0.5
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reducer 2
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Reducer 3
+ Reduce Operator Tree:
+ Merge Join Operator
+ condition map:
+ Inner Join 0 to 1
+ keys:
+ 0
+ 1
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 1 Data size: 16 Basic stats: COMPLETE
Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde: org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+Warning: Shuffle Join MERGEJOIN[17][tables = [$hdt$_0, $hdt$_1]] in Stage
'Reducer 3' is a cross product
+PREHOOK: query: select cur.cnt as cur_cnt, snap.cnt as snap_cnt
+from (
+ select count(*) as cnt from tbl_ice_pp_key
+) cur
+cross join (
+ select count(*) as cnt from tbl_ice_pp_key for system_version as of 's1'
+) snap
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_pp_key
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select cur.cnt as cur_cnt, snap.cnt as snap_cnt
+from (
+ select count(*) as cnt from tbl_ice_pp_key
+) cur
+cross join (
+ select count(*) as cnt from tbl_ice_pp_key for system_version as of 's1'
+) snap
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_pp_key
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+10 2
+PREHOOK: query: explain
+select 'current' as ver, count(*) as cnt from tbl_ice_pp_key where a > 2
+union all
+select 'asof_s1' as ver, count(*) as cnt from tbl_ice_pp_key for
system_version as of 's1'
+where a > 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_pp_key
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: explain
+select 'current' as ver, count(*) as cnt from tbl_ice_pp_key where a > 2
+union all
+select 'asof_s1' as ver, count(*) as cnt from tbl_ice_pp_key for
system_version as of 's1'
+where a > 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_pp_key
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+STAGE DEPENDENCIES:
+ Stage-1 is a root stage
+ Stage-0 depends on stages: Stage-1
+
+STAGE PLANS:
+ Stage: Stage-1
+ Tez
+#### A masked pattern was here ####
+ Edges:
+ Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS)
+ Reducer 5 <- Map 4 (CUSTOM_SIMPLE_EDGE), Union 3 (CONTAINS)
+#### A masked pattern was here ####
+ Vertices:
+ Map 1
+ Map Operator Tree:
+ TableScan
+ alias: tbl_ice_pp_key
+ filterExpr: (a > 2) (type: boolean)
+ Statistics: Num rows: 8 Data size: 51174 Basic stats:
COMPLETE Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 8 Data size: 51174 Basic stats:
COMPLETE Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ minReductionHashAggr: 0.875
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
COMPLETE Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Map 4
+ Map Operator Tree:
+ TableScan
+ alias: tbl_ice_pp_key
+ As of version: s1
+ filterExpr: (a > 2) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL
Column stats: COMPLETE
+ Filter Operator
+ predicate: (a > 2) (type: boolean)
+ Statistics: Num rows: 1 Data size: 0 Basic stats: PARTIAL
Column stats: COMPLETE
+ Select Operator
+ Statistics: Num rows: 1 Data size: 0 Basic stats:
PARTIAL Column stats: COMPLETE
+ Group By Operator
+ aggregations: count()
+ minReductionHashAggr: 0.99
+ mode: hash
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
PARTIAL Column stats: COMPLETE
+ Reduce Output Operator
+ null sort order:
+ sort order:
+ Statistics: Num rows: 1 Data size: 8 Basic stats:
PARTIAL Column stats: COMPLETE
+ value expressions: _col0 (type: bigint)
+ Execution mode: vectorized
+ Reducer 2
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: COMPLETE
Column stats: COMPLETE
+ Select Operator
+ expressions: 'current' (type: string), _col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 99 Basic stats: COMPLETE
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 107 Basic stats:
PARTIAL Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Reducer 5
+ Execution mode: vectorized
+ Reduce Operator Tree:
+ Group By Operator
+ aggregations: count(VALUE._col0)
+ mode: mergepartial
+ outputColumnNames: _col0
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL
Column stats: COMPLETE
+ Select Operator
+ expressions: 'asof_s1' (type: string), _col0 (type: bigint)
+ outputColumnNames: _col0, _col1
+ Statistics: Num rows: 1 Data size: 8 Basic stats: PARTIAL
Column stats: COMPLETE
+ File Output Operator
+ compressed: false
+ Statistics: Num rows: 2 Data size: 107 Basic stats:
PARTIAL Column stats: COMPLETE
+ table:
+ input format:
org.apache.hadoop.mapred.SequenceFileInputFormat
+ output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat
+ serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe
+ Union 3
+ Vertex: Union 3
+
+ Stage: Stage-0
+ Fetch Operator
+ limit: -1
+ Processor Tree:
+ ListSink
+
+PREHOOK: query: select 'current' as ver, count(*) as cnt from tbl_ice_pp_key
where a > 2
+union all
+select 'asof_s1' as ver, count(*) as cnt from tbl_ice_pp_key for
system_version as of 's1'
+where a > 2
+PREHOOK: type: QUERY
+PREHOOK: Input: default@tbl_ice_pp_key
+PREHOOK: Output: hdfs://### HDFS PATH ###
+POSTHOOK: query: select 'current' as ver, count(*) as cnt from tbl_ice_pp_key
where a > 2
+union all
+select 'asof_s1' as ver, count(*) as cnt from tbl_ice_pp_key for
system_version as of 's1'
+where a > 2
+POSTHOOK: type: QUERY
+POSTHOOK: Input: default@tbl_ice_pp_key
+POSTHOOK: Output: hdfs://### HDFS PATH ###
+current 8
+asof_s1 0
+PREHOOK: query: drop table tbl_ice_pp_key
+PREHOOK: type: DROPTABLE
+PREHOOK: Input: default@tbl_ice_pp_key
+PREHOOK: Output: database:default
+PREHOOK: Output: default@tbl_ice_pp_key
+POSTHOOK: query: drop table tbl_ice_pp_key
+POSTHOOK: type: DROPTABLE
+POSTHOOK: Input: default@tbl_ice_pp_key
+POSTHOOK: Output: database:default
+POSTHOOK: Output: default@tbl_ice_pp_key
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
index f857e7d505f..05faa38d7ca 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/metadata/Table.java
@@ -32,6 +32,7 @@
import java.util.Properties;
import java.util.Set;
import java.util.stream.Collectors;
+import java.util.stream.Stream;
import org.apache.commons.collections4.CollectionUtils;
import org.apache.commons.lang3.StringUtils;
@@ -1355,6 +1356,12 @@ public void setSnapshotRef(String snapshotRef) {
this.snapshotRef = snapshotRef;
}
+ public String getQualifier() {
+ return Stream.of(metaTable, snapshotRef, asOfVersion, asOfTimestamp)
+ .filter(Objects::nonNull).findFirst()
+ .orElse("");
+ }
+
public SourceTable createSourceTable() {
SourceTable sourceTable = new SourceTable();
sourceTable.setTable(this.tTable);
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
index 5a038b4e718..26a5a85b174 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/SharedWorkOptimizer.java
@@ -646,9 +646,12 @@ protected boolean areMergeable(ParseContext pctx,
TableScanOperator tsOp1, Table
return false;
}
- // HIVE-29509: Include snapshotRef to ensure different Iceberg
branches/tags are treated as distinct tables
- if (!Objects.equals(tsOp1.getConf().getSnapshotRef(),
tsOp2.getConf().getSnapshotRef())) {
- LOG.debug("Snapshot Ref differ {} ~ {}",
tsOp1.getConf().getSnapshotRef(), tsOp2.getConf().getSnapshotRef());
+ // Time-travel qualifier (snapshotRef, asOfVersion, asOfTimestamp) must
match,
+ // otherwise the two scans address different snapshots of the same table.
+ String qualifier1 = tsOp1.getConf().getTableMetadata().getQualifier();
+ String qualifier2 = tsOp2.getConf().getTableMetadata().getQualifier();
+ if (!Objects.equals(qualifier1, qualifier2)) {
+ LOG.debug("Qualifiers differ {} ~ {}", qualifier1, qualifier2);
return false;
}
// If partitions do not match, we currently do not merge
@@ -1861,6 +1864,7 @@ private static boolean compareOperator(ParseContext pctx,
Operator<?> op1, Opera
Table tableMeta1 = op1Conf.getTableMetadata();
Table tableMeta2 = op2Conf.getTableMetadata();
if (StringUtils.equals(tableMeta1.getFullyQualifiedName(),
tableMeta2.getFullyQualifiedName())
+ && StringUtils.equals(tableMeta1.getQualifier(),
tableMeta2.getQualifier())
&& op1Conf.getNeededColumns().equals(op2Conf.getNeededColumns())
&& StringUtils.equals(op1Conf.getFilterExprString(),
op2Conf.getFilterExprString())
&& pctx.getPrunedPartitions(tsOp1).getPartitions().equals(
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
index b3e0249b778..f44d8967195 100644
---
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
+++
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/calcite/RelOptHiveTable.java
@@ -24,7 +24,6 @@
import java.util.Map;
import java.util.Set;
import java.util.concurrent.atomic.AtomicInteger;
-import java.util.stream.Collectors;
import org.apache.calcite.linq4j.tree.Expression;
import org.apache.calcite.plan.RelOptSchema;
@@ -82,7 +81,6 @@
import com.google.common.collect.ImmutableList;
import com.google.common.collect.ImmutableMap;
import com.google.common.collect.Lists;
-import com.google.common.collect.Sets;
public class RelOptHiveTable implements RelOptTable {
@@ -125,7 +123,7 @@ public RelOptHiveTable(RelOptSchema calciteSchema,
RelDataTypeFactory typeFactor
this.schema = calciteSchema;
this.typeFactory = typeFactory;
this.qualifiedTblName = ImmutableList.copyOf(qualifiedTblName);
- this.name =
this.qualifiedTblName.stream().collect(Collectors.joining("."));
+ this.name = String.join(".", this.qualifiedTblName);
this.rowType = rowType;
this.hiveTblMetadata = hiveTblMetadata;
this.hiveColStatsMap = new HashMap<>();
@@ -192,15 +190,15 @@ public List<ColumnStrategy> getColumnStrategies() {
public RelOptHiveTable copy(RelDataType newRowType) {
// 1. Build map of column name to col index of original schema
// Assumption: Hive Table can not contain duplicate column names
- Map<String, Integer> nameToColIndxMap = new HashMap<String, Integer>();
+ Map<String, Integer> nameToColIndxMap = new HashMap<>();
for (RelDataTypeField f : this.rowType.getFieldList()) {
nameToColIndxMap.put(f.getName(), f.getIndex());
}
// 2. Build nonPart/Part/Virtual column info for new RowSchema
- List<ColumnInfo> newHiveNonPartitionCols = new ArrayList<ColumnInfo>();
- List<ColumnInfo> newHivePartitionCols = new ArrayList<ColumnInfo>();
- List<VirtualColumn> newHiveVirtualCols = new ArrayList<VirtualColumn>();
+ List<ColumnInfo> newHiveNonPartitionCols = new ArrayList<>();
+ List<ColumnInfo> newHivePartitionCols = new ArrayList<>();
+ List<VirtualColumn> newHiveVirtualCols = new ArrayList<>();
Map<Integer, VirtualColumn> virtualColInfoMap =
HiveCalciteUtil.getVColsMap(this.hiveVirtualCols,
this.noOfNonVirtualCols);
Integer originalColIndx;
@@ -329,8 +327,8 @@ private List<RelReferentialConstraint>
generateReferentialConstraints() {
ImmutableList.Builder<RelReferentialConstraint> builder =
ImmutableList.builder();
if (foreignKeyInfo != null && !foreignKeyInfo.getForeignKeys().isEmpty()) {
for (List<ForeignKeyCol> fkCols :
foreignKeyInfo.getForeignKeys().values()) {
- String parentDatabaseName = fkCols.get(0).parentDatabaseName;
- String parentTableName = fkCols.get(0).parentTableName;
+ String parentDatabaseName = fkCols.getFirst().parentDatabaseName;
+ String parentTableName = fkCols.getFirst().parentTableName;
String qualifiedName;
List<String> parentTableQualifiedName = new ArrayList<>();
if (parentDatabaseName != null && !parentDatabaseName.isEmpty()) {
@@ -390,7 +388,7 @@ public <T> T unwrap(Class<T> arg0) {
@Override
public List<RelCollation> getCollationList() {
- ImmutableList.Builder<RelFieldCollation> collationList = new
ImmutableList.Builder<RelFieldCollation>();
+ ImmutableList.Builder<RelFieldCollation> collationList = new
ImmutableList.Builder<>();
for (Order sortColumn : this.hiveTblMetadata.getSortCols()) {
for (int i=0; i<this.hiveTblMetadata.getSd().getCols().size(); i++) {
FieldSchema field = this.hiveTblMetadata.getSd().getCols().get(i);
@@ -411,7 +409,7 @@ public List<RelCollation> getCollationList() {
@Override
public RelDistribution getDistribution() {
- ImmutableList.Builder<Integer> columnPositions = new
ImmutableList.Builder<Integer>();
+ ImmutableList.Builder<Integer> columnPositions = new
ImmutableList.Builder<>();
for (String bucketColumn : this.hiveTblMetadata.getBucketCols()) {
for (int i=0; i<this.hiveTblMetadata.getSd().getCols().size(); i++) {
FieldSchema field = this.hiveTblMetadata.getSd().getCols().get(i);
@@ -435,7 +433,7 @@ public double getRowCount() {
if (null == partitionList) {
// we are here either unpartitioned table or partitioned table with no
// predicates
- computePartitionList(hiveConf, null, new HashSet<Integer>());
+ computePartitionList(hiveConf, null, new HashSet<>());
}
rowCount = StatsUtils.getNumRows(hiveConf, getNonPartColumns(),
hiveTblMetadata,
partitionList, noColsMissingStats);
@@ -465,7 +463,7 @@ private String getColNamesForLogging(Set<String> colLst) {
public void computePartitionList(HiveConf conf, RexNode pruneNode,
Set<Integer> partOrVirtualCols) {
try {
if (!hiveTblMetadata.isPartitioned() || pruneNode == null
- || InputFinder.bits(pruneNode).length() == 0) {
+ || InputFinder.bits(pruneNode).isEmpty()) {
// there is no predicate on partitioning column, we need all partitions
// in this case.
partitionList = PartitionPruner.prune(hiveTblMetadata, null, conf,
getName(),
@@ -485,11 +483,11 @@ public void computePartitionList(HiveConf conf, RexNode
pruneNode, Set<Integer>
}
private void updateColStats(Set<Integer> projIndxLst, boolean
allowMissingStats) {
- List<String> nonPartColNamesThatRqrStats = new ArrayList<String>();
- List<Integer> nonPartColIndxsThatRqrStats = new ArrayList<Integer>();
- List<String> partColNamesThatRqrStats = new ArrayList<String>();
- List<Integer> partColIndxsThatRqrStats = new ArrayList<Integer>();
- Set<String> colNamesFailedStats = new HashSet<String>();
+ List<String> nonPartColNamesThatRqrStats = new ArrayList<>();
+ List<Integer> nonPartColIndxsThatRqrStats = new ArrayList<>();
+ List<String> partColNamesThatRqrStats = new ArrayList<>();
+ List<Integer> partColIndxsThatRqrStats = new ArrayList<>();
+ Set<String> colNamesFailedStats = new HashSet<>();
// 1. Separate required columns to Non Partition and Partition Cols
ColumnInfo tmp;
@@ -514,19 +512,19 @@ private void updateColStats(Set<Integer> projIndxLst,
boolean allowMissingStats)
if (null == partitionList) {
// We could be here either because its an unpartitioned table or because
// there are no pruning predicates on a partitioned table.
- computePartitionList(hiveConf, null, new HashSet<Integer>());
+ computePartitionList(hiveConf, null, new HashSet<>());
}
- String partitionListKey = partitionList.getKey().orElse(null);
- ColumnStatsList colStatsCached = colStatsCache.get(partitionListKey);
- if (colStatsCached == null) {
- colStatsCached = new ColumnStatsList();
- colStatsCache.put(partitionListKey, colStatsCached);
- }
+ String partitionListKey = partitionList.getKey();
+
+ ColumnStatsList colStatsCached = colStatsCache.computeIfAbsent(
+ partitionListKey,
+ k -> new ColumnStatsList()
+ );
// 2. Obtain Col Stats for Non Partition Cols
- if (nonPartColNamesThatRqrStats.size() > 0) {
- List<ColStatistics> hiveColStats = new ArrayList<ColStatistics>();
+ if (!nonPartColNamesThatRqrStats.isEmpty()) {
+ List<ColStatistics> hiveColStats = new ArrayList<>();
if (!hiveTblMetadata.isPartitioned()) {
// 2.1 Handle the case for unpartitioned table.
@@ -547,9 +545,9 @@ private void updateColStats(Set<Integer> projIndxLst,
boolean allowMissingStats)
if (hiveColStats.isEmpty()) {
colNamesFailedStats.addAll(nonPartColNamesThatRqrStats);
} else if (hiveColStats.size() !=
nonPartColNamesThatRqrStats.size()) {
- Set<String> setOfFiledCols = new
HashSet<String>(nonPartColNamesThatRqrStats);
+ Set<String> setOfFiledCols = new
HashSet<>(nonPartColNamesThatRqrStats);
- Set<String> setOfObtainedColStats = new HashSet<String>();
+ Set<String> setOfObtainedColStats = new HashSet<>();
for (ColStatistics cs : hiveColStats) {
setOfObtainedColStats.add(cs.getColumnName());
}
@@ -561,7 +559,7 @@ private void updateColStats(Set<Integer> projIndxLst,
boolean allowMissingStats)
// nonPartColNamesThatRqrStats. reorder hiveColStats so we can
build hiveColStatsMap
// using nonPartColIndxsThatRqrStats as below
Map<String, ColStatistics> columnStatsMap =
- new HashMap<String, ColStatistics>(hiveColStats.size());
+ new HashMap<>(hiveColStats.size());
for (ColStatistics cs : hiveColStats) {
columnStatsMap.put(cs.getColumnName(), cs);
// even though the stats were estimated we need to warn user that
@@ -586,7 +584,7 @@ private void updateColStats(Set<Integer> projIndxLst,
boolean allowMissingStats)
if (partitionList.getNotDeniedPartns().isEmpty()) {
// no need to make a metastore call
rowCount = 0;
- hiveColStats = new ArrayList<ColStatistics>();
+ hiveColStats = new ArrayList<>();
for (int i = 0; i < nonPartColNamesThatRqrStats.size(); i++) {
// add empty stats object for each column
hiveColStats.add(
@@ -594,14 +592,13 @@ private void updateColStats(Set<Integer> projIndxLst,
boolean allowMissingStats)
nonPartColNamesThatRqrStats.get(i),
hiveNonPartitionColsMap.get(nonPartColIndxsThatRqrStats.get(i)).getTypeName()));
}
- colNamesFailedStats.clear();
colStatsCached.updateState(State.COMPLETE);
} else {
Statistics stats = StatsUtils.collectStatistics(hiveConf,
partitionList,
hiveTblMetadata, hiveNonPartitionCols,
nonPartColNamesThatRqrStats, colStatsCached,
nonPartColNamesThatRqrStats, true);
rowCount = stats.getNumRows();
- hiveColStats = new ArrayList<ColStatistics>();
+ hiveColStats = new ArrayList<>();
for (String c : nonPartColNamesThatRqrStats) {
ColStatistics cs = stats.getColumnStatisticsFromColName(c);
if (cs != null) {
@@ -622,7 +619,7 @@ private void updateColStats(Set<Integer> projIndxLst,
boolean allowMissingStats)
}
}
- if (hiveColStats != null && hiveColStats.size() ==
nonPartColNamesThatRqrStats.size()) {
+ if (hiveColStats.size() == nonPartColNamesThatRqrStats.size()) {
for (int i = 0; i < hiveColStats.size(); i++) {
// the columns in
nonPartColIndxsThatRqrStats/nonPartColNamesThatRqrStats/hiveColStats
// are in same order
@@ -754,7 +751,7 @@ public int hashCode() {
}
public String getPartitionListKey() {
- return partitionList != null ? partitionList.getKey().orElse(null) : null;
+ return partitionList != null ? partitionList.getKey() : null;
}
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
index b1bc9eaf0a7..7574ad5f6d2 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/optimizer/ppr/PartitionPruner.java
@@ -181,12 +181,8 @@ public static PrunedPartitionList prune(Table tab,
ExprNodeDesc prunerExpr,
LOG.trace("prune Expression = " + (prunerExpr == null ? "" :
prunerExpr));
}
- String key = tab.getFullyQualifiedName() + ";";
- if (tab.getMetaTable() != null) {
- key = tab.getFullyQualifiedName() + "." + tab.getMetaTable() + ";";
- } else if (tab.getSnapshotRef() != null) {
- key = tab.getFullyQualifiedName() + "." + tab.getSnapshotRef() + ";";
- }
+ String qualifier = tab.getQualifier();
+ String key = tab.getFullyQualifiedName() + (qualifier.isEmpty() ? "" : "."
+ qualifier) + ";";
if (!tab.isPartitioned()) {
// If the table is not partitioned, return empty list.
@@ -441,7 +437,7 @@ static private boolean hasUserFunctions(ExprNodeDesc expr) {
return false;
}
- private static PrunedPartitionList getPartitionsFromServer(Table tab, String
key, ExprNodeDesc compactExpr,
+ private static PrunedPartitionList getPartitionsFromServer(Table tab, String
key, ExprNodeDesc compactExpr,
HiveConf conf, Set<String> partColsUsedInFilter, boolean
isPruningByExactFilter)
throws SemanticException {
try {
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
index 00cc988eb0a..ffc1673f367 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/CalcitePlanner.java
@@ -3170,8 +3170,11 @@ private RelNode genTableLogicalPlan(String tableAlias,
QB qb) throws SemanticExc
fullyQualifiedTabName.add(tabMetaData.getDbName());
}
fullyQualifiedTabName.add(tabMetaData.getTableName());
- if (tabMetaData.getSnapshotRef() != null) {
- fullyQualifiedTabName.add(tabMetaData.getSnapshotRef());
+ // Include time-travel qualifier (snapshotRef / asOfVersion /
asOfTimestamp)
+ // in the table identity so two scans at different snapshots stay
distinct.
+ String qualifier = tabMetaData.getQualifier();
+ if (!qualifier.isEmpty()) {
+ fullyQualifiedTabName.add(qualifier);
}
optTable = new RelOptHiveTable(relOptSchema,
relOptSchema.getTypeFactory(), fullyQualifiedTabName,
rowType, tabMetaData, nonPartitionColumns, partitionColumns,
virtualCols, conf,
diff --git a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
index b2627bf3bf2..6ea3b7cc1c7 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/ParseContext.java
@@ -128,7 +128,7 @@ public class ParseContext {
private boolean needViewColumnAuthorization;
private Map<ReduceSinkOperator, RuntimeValuesInfo> rsToRuntimeValuesInfo =
- new LinkedHashMap<ReduceSinkOperator, RuntimeValuesInfo>();
+ new LinkedHashMap<>();
/**
* Mapping holding information about semijoins.
*
@@ -451,7 +451,7 @@ public Map<String, ColumnStatsList> getColStatsCache() {
* @return col stats
*/
public ColumnStatsList getColStatsCached(PrunedPartitionList partList) {
- return
ctx.getOpContext().getColStatsCache().get(partList.getKey().orElse(null));
+ return ctx.getOpContext().getColStatsCache().get(partList.getKey());
}
/**
@@ -515,8 +515,7 @@ public Set<ReadEntity> getSemanticInputs() {
return semanticInputs;
}
- public void replaceRootTask(Task<?> rootTask,
- List<? extends Task<?>> tasks) {
+ public void replaceRootTask(Task<?> rootTask, List<? extends Task<?>> tasks)
{
this.rootTasks.remove(rootTask);
this.rootTasks.addAll(tasks);
}
@@ -663,7 +662,7 @@ public void setColumnStatsAutoGatherContexts(
public Collection<Operator> getAllOps() {
List<Operator> ops = new ArrayList<>();
- Set<Operator> visited = new HashSet<Operator>();
+ Set<Operator> visited = new HashSet<>();
for (Operator<?> op : getTopOps().values()) {
getAllOps(ops, visited, op);
}
diff --git
a/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
b/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
index 398dbf555de..3329d4ae749 100644
--- a/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
+++ b/ql/src/java/org/apache/hadoop/hive/ql/parse/PrunedPartitionList.java
@@ -18,11 +18,9 @@
package org.apache.hadoop.hive.ql.parse;
-import java.util.ArrayList;
import java.util.Collections;
import java.util.List;
import java.util.Objects;
-import java.util.Optional;
import java.util.Set;
import org.apache.hadoop.hive.ql.metadata.Partition;
@@ -37,7 +35,7 @@ public class PrunedPartitionList {
private final Table source;
/** Key to identify this partition list. */
- private final Optional<String> ppListKey;
+ private final String ppListKey;
/** Partitions that either satisfy the partition criteria, or may satisfy
it. */
private final Set<Partition> partitions;
@@ -56,7 +54,7 @@ public PrunedPartitionList(Table source, Set<Partition>
partitions,
public PrunedPartitionList(Table source, String key, Set<Partition>
partitions,
List<String> referred, boolean hasUnknowns) {
this.source = Objects.requireNonNull(source);
- this.ppListKey = Optional.ofNullable(key);
+ this.ppListKey = key;
this.referred = Objects.requireNonNull(referred);
this.partitions = Objects.requireNonNull(partitions);
this.hasUnknowns = hasUnknowns;
@@ -66,7 +64,7 @@ public Table getSourceTable() {
return source;
}
- public Optional<String> getKey() {
+ public String getKey() {
return ppListKey;
}
@@ -82,7 +80,7 @@ public Set<Partition> getPartitions() {
* @return all partitions.
*/
public List<Partition> getNotDeniedPartns() {
- return Collections.unmodifiableList(new ArrayList<>(partitions));
+ return List.copyOf(partitions);
}
/**