[
https://issues.apache.org/jira/browse/HIVE-27898?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17819491#comment-17819491
]
Butao Zhang commented on HIVE-27898:
------------------------------------
*After disable cbo, the query is also fine.*
// set conf in beeline:
set hive.fetch.task.conversion=none; //enforce launching tez task
set hive.explain.user=false;
set hive.cbo.enable=false;
{code:java}
0: jdbc:hive2://127.0.0.1:10000/default> show create table testdb.test_data_02;
INFO : Compiling
command(queryId=hive_20240222143047_1bb04d39-fc35-40e3-a897-d4211d3f5e9e): show
create table testdb.test_data_02
INFO : Semantic Analysis Completed (retrial = false)
INFO : Created Hive schema:
Schema(fieldSchemas:[FieldSchema(name:createtab_stmt, type:string, comment:from
deserializer)], properties:null)
INFO : Completed compiling
command(queryId=hive_20240222143047_1bb04d39-fc35-40e3-a897-d4211d3f5e9e); Time
taken: 0.045 seconds
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Executing
command(queryId=hive_20240222143047_1bb04d39-fc35-40e3-a897-d4211d3f5e9e): show
create table testdb.test_data_02
INFO : Starting task [Stage-0:DDL] in serial mode
INFO : Completed executing
command(queryId=hive_20240222143047_1bb04d39-fc35-40e3-a897-d4211d3f5e9e); Time
taken: 0.057 seconds
+----------------------------------------------------+
| createtab_stmt |
+----------------------------------------------------+
| CREATE EXTERNAL TABLE `testdb`.`test_data_02`( |
| `id` string, |
| `name` string) |
| PARTITIONED BY SPEC ( |
| `name`) |
| ROW FORMAT SERDE |
| 'org.apache.iceberg.mr.hive.HiveIcebergSerDe' |
| STORED BY |
| 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler' |
| WITH SERDEPROPERTIES ( |
| 'serialization.format'='1') |
| LOCATION |
| 'hdfs://127.0.0.1:8028/tmp/testiceberg/default/test_data_02' |
| TBLPROPERTIES ( |
| 'bucketing_version'='2', |
| 'current-snapshot-id'='6130266132019241842', |
| 'engine.hive.enabled'='true', |
| 'format-version'='2', |
| 'iceberg.catalog'='location_based_table', |
| 'table_type'='ICEBERG', |
| 'transient_lastDdlTime'='1708508832') |
+----------------------------------------------------+
21 rows selected (0.482 seconds)
0: jdbc:hive2://127.0.0.1:10000/default>
0: jdbc:hive2://127.0.0.1:10000/default>
0: jdbc:hive2://127.0.0.1:10000/default> explain select * from (select * from
testdb.test_data_02 limit 10) s1;
INFO : Compiling
command(queryId=hive_20240222143051_e69515c5-6c8d-4bd7-8973-faa3f9ed6d97):
explain select * from (select * from testdb.test_data_02 limit 10) s1
INFO : Semantic Analysis Completed (retrial = false)
INFO : Created Hive schema: Schema(fieldSchemas:[FieldSchema(name:Explain,
type:string, comment:null)], properties:null)
INFO : Completed compiling
command(queryId=hive_20240222143051_e69515c5-6c8d-4bd7-8973-faa3f9ed6d97); Time
taken: 0.09 seconds
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Executing
command(queryId=hive_20240222143051_e69515c5-6c8d-4bd7-8973-faa3f9ed6d97):
explain select * from (select * from testdb.test_data_02 limit 10) s1
INFO : Starting task [Stage-3:EXPLAIN] in serial mode
INFO : Completed executing
command(queryId=hive_20240222143051_e69515c5-6c8d-4bd7-8973-faa3f9ed6d97); Time
taken: 0.008 seconds
+----------------------------------------------------+
| Explain |
+----------------------------------------------------+
| STAGE DEPENDENCIES: |
| Stage-1 is a root stage |
| Stage-0 depends on stages: Stage-1 |
| |
| STAGE PLANS: |
| Stage: Stage-1 |
| Tez |
| DagId: hive_20240222143051_e69515c5-6c8d-4bd7-8973-faa3f9ed6d97:18 |
| Edges: |
| Reducer 2 <- Map 1 (CUSTOM_SIMPLE_EDGE) |
| DagName: hive_20240222143051_e69515c5-6c8d-4bd7-8973-faa3f9ed6d97:18 |
| Vertices: |
| Map 1 |
| Map Operator Tree: |
| TableScan |
| alias: test_data_02 |
| Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE |
| Limit |
| Number of rows: 10 |
| Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE |
| Select Operator |
| expressions: id (type: string), name (type: string) |
| outputColumnNames: _col0, _col1 |
| Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE |
| Reduce Output Operator |
| null sort order: |
| sort order: |
| Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE |
| TopN Hash Memory Usage: 0.1 |
| value expressions: _col0 (type: string), _col1 (type:
string) |
| Execution mode: vectorized |
| Reducer 2 |
| Execution mode: vectorized |
| Reduce Operator Tree: |
| Limit |
| Number of rows: 10 |
| Statistics: Num rows: 2 Data size: 736 Basic stats: COMPLETE
Column stats: NONE |
| Select Operator |
| expressions: VALUE._col0 (type: string), VALUE._col1 (type:
string) |
| outputColumnNames: _col0, _col1 |
| Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE |
| File Output Operator |
| compressed: false |
| Statistics: Num rows: 2 Data size: 736 Basic stats:
COMPLETE Column stats: NONE |
| table: |
| input format:
org.apache.hadoop.mapred.SequenceFileInputFormat |
| output format:
org.apache.hadoop.hive.ql.io.HiveSequenceFileOutputFormat |
| serde:
org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe |
| |
| Stage: Stage-0 |
| Fetch Operator |
| limit: -1 |
| Processor Tree: |
| ListSink |
| |
+----------------------------------------------------+
55 rows selected (0.171 seconds)
0: jdbc:hive2://127.0.0.1:10000/default> select * from (select * from
testdb.test_data_02 limit 10) s1;
INFO : Compiling
command(queryId=hive_20240222143056_4ab33c8c-6eff-4b03-9022-c6355985f0fd):
select * from (select * from testdb.test_data_02 limit 10) s1
INFO : Semantic Analysis Completed (retrial = false)
INFO : Created Hive schema: Schema(fieldSchemas:[FieldSchema(name:s1.id,
type:string, comment:null), FieldSchema(name:s1.name, type:string,
comment:null)], properties:null)
INFO : Completed compiling
command(queryId=hive_20240222143056_4ab33c8c-6eff-4b03-9022-c6355985f0fd); Time
taken: 0.119 seconds
INFO : Concurrency mode is disabled, not creating a lock manager
INFO : Executing
command(queryId=hive_20240222143056_4ab33c8c-6eff-4b03-9022-c6355985f0fd):
select * from (select * from testdb.test_data_02 limit 10) s1
INFO : Query ID = hive_20240222143056_4ab33c8c-6eff-4b03-9022-c6355985f0fd
INFO : Total jobs = 1
INFO : Launching Job 1 out of 1
INFO : Starting task [Stage-1:MAPRED] in serial mode
INFO : Subscribed to counters: [] for queryId:
hive_20240222143056_4ab33c8c-6eff-4b03-9022-c6355985f0fd
INFO : Session is already open
INFO : Dag name: select * from (select......a_04 limit 10) s1 (Stage-1)
INFO : HS2 Host: [D01-303-D6-10], Query ID:
[hive_20240222143056_4ab33c8c-6eff-4b03-9022-c6355985f0fd], Dag ID:
[dag_1706773972635_0009_7], DAG Session ID: [application_1706773972635_0009]
INFO : Status: Running (Executing on YARN cluster with App id
application_1706773972635_0009)----------------------------------------------------------------------------------------------
VERTICES MODE STATUS TOTAL COMPLETED RUNNING PENDING
FAILED KILLED
----------------------------------------------------------------------------------------------
Map 1 .......... container SUCCEEDED 1 1 0 0
0 0
Reducer 2 ...... container SUCCEEDED 1 1 0 0
0 0
----------------------------------------------------------------------------------------------
VERTICES: 02/02 [==========================>>] 100% ELAPSED TIME: 21.09 s
----------------------------------------------------------------------------------------------
INFO : Completed executing
command(queryId=hive_20240222143056_4ab33c8c-6eff-4b03-9022-c6355985f0fd); Time
taken: 5.694 seconds
+--------+----------+
| s1.id | s1.name |
+--------+----------+
| 1 | a |
| 2 | b |
+--------+----------+
2 rows selected (5.885 seconds)
{code}
> When CBO is disabled, HIVE cannot query ICEBERG tables in nested subqueries
> properly
> ------------------------------------------------------------------------------------
>
> Key: HIVE-27898
> URL: https://issues.apache.org/jira/browse/HIVE-27898
> Project: Hive
> Issue Type: Bug
> Components: Iceberg integration
> Affects Versions: 4.0.0, 4.0.0-beta-1
> Reporter: yongzhi.shao
> Priority: Critical
> Attachments: hive-site.xml
>
>
> Currently, we found that when using HIVE4-BETA1 version, if we use ICEBERG
> table in the subquery, we can't get any data in the end.
> I have used HIVE3-TEZ for cross validation and HIVE3 does not have this
> problem when querying ICEBERG.
> {code:java}
> --spark3.4.1+iceberg 1.4.2
> CREATE TABLE datacenter.dwd.b_std_trade (
> uni_order_id STRING,
> data_from BIGINT,
> partner STRING,
> plat_code STRING,
> order_id STRING,
> uni_shop_id STRING,
> uni_id STRING,
> guide_id STRING,
> shop_id STRING,
> plat_account STRING,
> total_fee DOUBLE,
> item_discount_fee DOUBLE,
> trade_discount_fee DOUBLE,
> adjust_fee DOUBLE,
> post_fee DOUBLE,
> discount_rate DOUBLE,
> payment_no_postfee DOUBLE,
> payment DOUBLE,
> pay_time STRING,
> product_num BIGINT,
> order_status STRING,
> is_refund STRING,
> refund_fee DOUBLE,
> insert_time STRING,
> created STRING,
> endtime STRING,
> modified STRING,
> trade_type STRING,
> receiver_name STRING,
> receiver_country STRING,
> receiver_state STRING,
> receiver_city STRING,
> receiver_district STRING,
> receiver_town STRING,
> receiver_address STRING,
> receiver_mobile STRING,
> trade_source STRING,
> delivery_type STRING,
> consign_time STRING,
> orders_num BIGINT,
> is_presale BIGINT,
> presale_status STRING,
> first_fee_paytime STRING,
> last_fee_paytime STRING,
> first_paid_fee DOUBLE,
> tenant STRING,
> tidb_modified STRING,
> step_paid_fee DOUBLE,
> seller_flag STRING,
> is_used_store_card BIGINT,
> store_card_used DOUBLE,
> store_card_basic_used DOUBLE,
> store_card_expand_used DOUBLE,
> order_promotion_num BIGINT,
> item_promotion_num BIGINT,
> buyer_remark STRING,
> seller_remark STRING,
> trade_business_type STRING)
> USING iceberg
> PARTITIONED BY (uni_shop_id, truncate(4, created))
> LOCATION '/iceberg-catalog/warehouse/dwd/b_std_trade'
> TBLPROPERTIES (
> 'current-snapshot-id' = '7217819472703702905',
> 'format' = 'iceberg/orc',
> 'format-version' = '1',
> 'hive.stored-as' = 'iceberg',
> 'read.orc.vectorization.enabled' = 'true',
> 'sort-order' = 'uni_shop_id ASC NULLS FIRST, created ASC NULLS FIRST',
> 'write.distribution-mode' = 'hash',
> 'write.format.default' = 'orc',
> 'write.metadata.delete-after-commit.enabled' = 'true',
> 'write.metadata.previous-versions-max' = '3',
> 'write.orc.bloom.filter.columns' = 'order_id',
> 'write.orc.compression-codec' = 'zstd')
> --hive-iceberg
> CREATE EXTERNAL TABLE iceberg_dwd.b_std_trade
> STORED BY 'org.apache.iceberg.mr.hive.HiveIcebergStorageHandler'
> LOCATION 'hdfs://xxxx/iceberg-catalog/warehouse/dwd/b_std_trade'
> TBLPROPERTIES
> ('iceberg.catalog'='location_based_table','engine.hive.enabled'='true');
> select * from iceberg_dwd.b_std_trade
> where uni_shop_id = 'TEST|11111' limit 10 --10 rows
> select *
> from (
> select * from iceberg_dwd.b_std_trade
> where uni_shop_id = 'TEST|11111' limit 10
> ) t1; --10 rows
> select uni_shop_id
> from (
> select * from iceberg_dwd.b_std_trade
> where uni_shop_id = 'TEST|11111' limit 10
> ) t1; --0 rows
> select uni_shop_id
> from (
> select uni_shop_id as uni_shop_id from iceberg_dwd.b_std_trade
> where uni_shop_id = 'TEST|11111' limit 10
> ) t1; --0 rows
> --hive-orc
> select uni_shop_id
> from (
> select * from iceberg_dwd.trade_test
> where uni_shop_id = 'TEST|11111' limit 10
> ) t1; --10 ROWS{code}
>
--
This message was sent by Atlassian Jira
(v8.20.10#820010)