li-ang-666 commented on issue #10299:
URL: https://github.com/apache/hudi/issues/10299#issuecomment-1849854345
here is the sql:
`CREATE TABLE ods (
id DECIMAL(20, 0),
company_id DECIMAL(20, 0),
company_type TINYINT,
company_name STRING,
company_name_english STRING,
establish_date TIMESTAMP(3),
register_addr STRING,
register_capital_amt DECIMAL(20, 0),
register_capital_currency INT,
actual_capital_amt DECIMAL(20, 0),
actual_capital_currency INT,
company_registation_status STRING,
company_logo_url STRING,
company_shortname STRING,
company_phone STRING,
company_email STRING,
company_website STRING,
social_security_headcount BIGINT,
business_type_id DECIMAL(20, 0),
original_type STRING,
tyc_credit_rating INT,
longitude_by_baidu_api DECIMAL(20,15),
latitude_by_baidu_api DECIMAL(20,15),
unified_social_credit_code STRING,
org_type STRING,
is_micro_enterprise TINYINT,
industry_national_std_lv1_code STRING,
industry_national_std_lv2_code STRING,
industry_national_std_lv3_code STRING,
business_areacode_00 STRING,
business_areacode_01 STRING,
business_areacode_02 STRING,
crawl_time TIMESTAMP(3),
province_base_by_register_institute STRING,
create_time TIMESTAMP(3),
update_time TIMESTAMP(3),
op_ts TIMESTAMP(3) METADATA FROM 'op_ts' VIRTUAL,
PRIMARY KEY (id) NOT ENFORCED
) WITH (
'connector' = 'mysql-cdc',
'hostname' =
'ee59dd05fc0f4bb9a2497c8d9146a53cin01.internal.cn-north-4.mysql.rds.myhuaweicloud.com',
'port' = '3306',
'username' = 'jdhw_d_data_dml',
'password' = '2s0^tFa4SLrp72',
'database-name' = 'company_base',
'table-name' = 'company_index',
'server-id' = '6128',
'scan.startup.mode' = 'earliest-offset'
)
2023-12-05 16:39:28.061 [INFO] [flink-akka.actor.default-dispatcher-4]
(HudiJob.java:38) - sql:
CREATE TABLE dwd(
id DECIMAL(20, 0),
company_id DECIMAL(20, 0),
company_type TINYINT,
company_name STRING,
company_name_english STRING,
establish_date TIMESTAMP(3),
register_addr STRING,
register_capital_amt DECIMAL(20, 0),
register_capital_currency INT,
actual_capital_amt DECIMAL(20, 0),
actual_capital_currency INT,
company_registation_status STRING,
company_logo_url STRING,
company_shortname STRING,
company_phone STRING,
company_email STRING,
company_website STRING,
social_security_headcount BIGINT,
business_type_id DECIMAL(20, 0),
original_type STRING,
tyc_credit_rating INT,
longitude_by_baidu_api DECIMAL(20,15),
latitude_by_baidu_api DECIMAL(20,15),
unified_social_credit_code STRING,
org_type STRING,
is_micro_enterprise TINYINT,
industry_national_std_lv1_code STRING,
industry_national_std_lv2_code STRING,
industry_national_std_lv3_code STRING,
business_areacode_00 STRING,
business_areacode_01 STRING,
business_areacode_02 STRING,
crawl_time TIMESTAMP(3),
province_base_by_register_institute STRING,
create_time TIMESTAMP(3),
update_time TIMESTAMP(3),
op_ts TIMESTAMP(3),
PRIMARY KEY (id) NOT ENFORCED
) WITH (
'connector' = 'hudi',
'path' = 'obs://hadoop-obs/hudi_ods/company_index',
'table.type' = 'MERGE_ON_READ',
-- index
'index.type' = 'BUCKET',
'hoodie.bucket.index.num.buckets' = '32',
-- write
'write.tasks' = '2',
'write.task.max.size' = '512',
'write.merge.max_memory' = '0',
'write.precombine' = 'true',
'write.precombine.field' = 'op_ts',
-- compaction
'compaction.async.enabled' = 'false',
'compaction.schedule.enabled' = 'true',
'compaction.trigger.strategy' = 'num_or_time',
'compaction.delta_commits' = '3',
'compaction.delta_seconds' = '3600',
-- clean & archive
'clean.async.enabled' = 'true',
'clean.retain_commits' = '10',
'archive.min_commits' = '20',
'archive.max_commits' = '30',
-- hive
'hive_sync.enabled' = 'true',
'hive_sync.mode' = 'hms',
'hive_sync.metastore.uris' = 'thrift://10.99.202.153:9083',
'hive_sync.db' = 'hudi_ods',
'hive_sync.table' = 'company_index',
'hive_sync.table.strategy' = 'RO',
'hive_sync.skip_ro_suffix' = 'true'
)`
`INSERT INTO dwd SELECT
id, company_id, company_type, company_name, company_name_english,
CAST(CONVERT_TZ(CAST(establish_date AS STRING), 'Asia/Shanghai', 'UTC') AS
TIMESTAMP(3)) establish_date, register_addr, register_capital_amt,
register_capital_currency, actual_capital_amt, actual_capital_currency,
company_registation_status, company_logo_url, company_shortname, company_phone,
company_email, company_website, social_security_headcount, business_type_id,
original_type, tyc_credit_rating, longitude_by_baidu_api,
latitude_by_baidu_api, unified_social_credit_code, org_type,
is_micro_enterprise, industry_national_std_lv1_code,
industry_national_std_lv2_code, industry_national_std_lv3_code,
business_areacode_00, business_areacode_01, business_areacode_02,
CAST(CONVERT_TZ(CAST(crawl_time AS STRING), 'Asia/Shanghai', 'UTC') AS
TIMESTAMP(3)) crawl_time, province_base_by_register_institute,
CAST(CONVERT_TZ(CAST(create_time AS STRING), 'Asia/Shanghai', 'UTC') AS
TIMESTAMP(3)) create_time, CAST(CONVERT_TZ(CAST(up
date_time AS STRING), 'Asia/Shanghai', 'UTC') AS TIMESTAMP(3)) update_time,
CAST(CONVERT_TZ(CAST(op_ts AS STRING), 'Asia/Shanghai', 'UTC') AS TIMESTAMP(3))
op_ts
FROM ods`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]