hudi-bot opened a new issue, #16264:
URL: https://github.com/apache/hudi/issues/16264
version: hudi-0.14.0-rc1,hudi-0.14.0-rc2
CREATE TABLE `hudi_test`.`tmp_hudi_test_1` (
`id` string,
`name` string,
`dt` bigint,
`day` STRING COMMENT '日期分区',
`hour` INT COMMENT '小时分区'
)using hudi
OPTIONS ('hoodie.datasource.write.hive_style_partitioning' 'false',
'hoodie.datasource.meta.sync.enable' 'false',
'hoodie.datasource.hive_sync.enable' 'false')
tblproperties (
'primaryKey' = 'id',
'type' = 'mor',
'preCombineField'='dt',
'hoodie.index.type' = 'BUCKET',
'hoodie.bucket.index.hash.field' = 'id',
'hoodie.bucket.index.num.buckets'=512
)
PARTITIONED BY (`day`,`hour`);
insert into `hudi_test`.`tmp_hudi_test_1` select '1' as id, 'aa' as name,
123 as dt, '2023-10-12' as `day`, 10 as `hour`;
insert into `hudi_test`.`tmp_hudi_test_1` select '1' as id, 'aa' as name,
123 as dt, '2023-10-12' as `day`, 11 as `hour`;
insert into `hudi_test`.`tmp_hudi_test_1` select '1' as id, 'aa' as name,
123 as dt, '2023-10-12' as `day`, 12 as `hour`;
select * from `hudi_test`.`tmp_hudi_test_1` where day='2023-10-12' and
hour=11;
right stage task number should be 1
if table files is much,would cause driver oom or fullgc for a long time
## JIRA info
- Link: https://issues.apache.org/jira/browse/HUDI-6941
- Type: Bug
- Fix version(s):
- 1.1.0
- Attachment(s):
- 14/Oct/23
03:23;xuzifu;dump_filestatus.jpg;https://issues.apache.org/jira/secure/attachment/13063550/dump_filestatus.jpg
- 13/Oct/23
15:35;xuzifu;expected_and_right_stages.jpg;https://issues.apache.org/jira/secure/attachment/13063545/expected_and_right_stages.jpg
- 13/Oct/23
14:11;xuzifu;paritions_not_pushdown.jpg;https://issues.apache.org/jira/secure/attachment/13063544/paritions_not_pushdown.jpg
- 13/Oct/23 09:19;xuzifu;test
(1).html;https://issues.apache.org/jira/secure/attachment/13063542/test+%281%29.html
---
## Comments
17/Oct/23 04:38;danny0405;Fixed via master branch:
d7d321544644b9e599004beddd9a3c202bc05e7d;;;
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]