[ 
https://issues.apache.org/jira/browse/HUDI-6941?page=com.atlassian.jira.plugin.system.issuetabpanels:all-tabpanel
 ]

xy updated HUDI-6941:
---------------------
    Description: 
version: hudi-0.14.0-rc1,hudi-0.14.0-rc2

 

CREATE TABLE `hudi_test`.`tmp_hudi_test_1` (
  `id` string,
  `name` string,
  `dt` bigint,
  `day` STRING COMMENT '日期分区',
  `hour` INT COMMENT '小时分区'
)using hudi
OPTIONS ('hoodie.datasource.write.hive_style_partitioning' 'false', 
'hoodie.datasource.meta.sync.enable' 'false', 
'hoodie.datasource.hive_sync.enable' 'false')
tblproperties (
  'primaryKey' = 'id',
  'type' = 'mor',
  'preCombineField'='dt',
  'hoodie.index.type' = 'BUCKET',
  'hoodie.bucket.index.hash.field' = 'id',
  'hoodie.bucket.index.num.buckets'=512
)
PARTITIONED BY (`day`,`hour`);

insert into `hudi_test`.`tmp_hudi_test_1` select '1' as id, 'aa' as name, 123 
as dt, '2023-10-12' as `day`, 10 as `hour`;
insert into `hudi_test`.`tmp_hudi_test_1` select '1' as id, 'aa' as name, 123 
as dt, '2023-10-12' as `day`, 11 as `hour`;
insert into `hudi_test`.`tmp_hudi_test_1` select '1' as id, 'aa' as name, 123 
as dt, '2023-10-12' as `day`, 12 as `hour`;

select * from `hudi_test`.`tmp_hudi_test_1` where day='2023-10-12' and hour=11;

 

 

right stage task number show be 1

  was:
version: hudi-0.14.0-rc1,hudi-0.14.0-rc2

 

CREATE TABLE `hudi_test`.`tmp_hudi_test_1` (
  `id` string,
  `name` string,
  `dt` bigint,
  `day` STRING COMMENT '日期分区',
  `hour` INT COMMENT '小时分区'
)using hudi
OPTIONS ('hoodie.datasource.write.hive_style_partitioning' 'false', 
'hoodie.datasource.meta.sync.enable' 'false', 
'hoodie.datasource.hive_sync.enable' 'false')
tblproperties (
  'primaryKey' = 'id',
  'type' = 'mor',
  'preCombineField'='dt',
  'hoodie.index.type' = 'BUCKET',
  'hoodie.bucket.index.hash.field' = 'id',
  'hoodie.bucket.index.num.buckets'=512
)
PARTITIONED BY (`day`,`hour`);

insert into `hudi_test`.`tmp_hudi_test_1` select '1' as id, 'aa' as name, 123 
as dt, '2023-10-12' as `day`, 10 as `hour`;
insert into `hudi_test`.`tmp_hudi_test_1` select '1' as id, 'aa' as name, 123 
as dt, '2023-10-12' as `day`, 11 as `hour`;
insert into `hudi_test`.`tmp_hudi_test_1` select '1' as id, 'aa' as name, 123 
as dt, '2023-10-12' as `day`, 12 as `hour`;

select * from `hudi_test`.`tmp_hudi_test_1` where day='2023-10-12' and hour=11;


> sparksql query perfermance cost more in hudi 0.14-release
> ---------------------------------------------------------
>
>                 Key: HUDI-6941
>                 URL: https://issues.apache.org/jira/browse/HUDI-6941
>             Project: Apache Hudi
>          Issue Type: Bug
>          Components: spark-sql
>            Reporter: xy
>            Priority: Major
>         Attachments: paritions_not_pushdown.jpg, test (1).html
>
>
> version: hudi-0.14.0-rc1,hudi-0.14.0-rc2
>  
> CREATE TABLE `hudi_test`.`tmp_hudi_test_1` (
>   `id` string,
>   `name` string,
>   `dt` bigint,
>   `day` STRING COMMENT '日期分区',
>   `hour` INT COMMENT '小时分区'
> )using hudi
> OPTIONS ('hoodie.datasource.write.hive_style_partitioning' 'false', 
> 'hoodie.datasource.meta.sync.enable' 'false', 
> 'hoodie.datasource.hive_sync.enable' 'false')
> tblproperties (
>   'primaryKey' = 'id',
>   'type' = 'mor',
>   'preCombineField'='dt',
>   'hoodie.index.type' = 'BUCKET',
>   'hoodie.bucket.index.hash.field' = 'id',
>   'hoodie.bucket.index.num.buckets'=512
> )
> PARTITIONED BY (`day`,`hour`);
> insert into `hudi_test`.`tmp_hudi_test_1` select '1' as id, 'aa' as name, 123 
> as dt, '2023-10-12' as `day`, 10 as `hour`;
> insert into `hudi_test`.`tmp_hudi_test_1` select '1' as id, 'aa' as name, 123 
> as dt, '2023-10-12' as `day`, 11 as `hour`;
> insert into `hudi_test`.`tmp_hudi_test_1` select '1' as id, 'aa' as name, 123 
> as dt, '2023-10-12' as `day`, 12 as `hour`;
> select * from `hudi_test`.`tmp_hudi_test_1` where day='2023-10-12' and 
> hour=11;
>  
>  
> right stage task number show be 1



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to