xiangyuf commented on PR #4208:
URL: https://github.com/apache/paimon/pull/4208#issuecomment-2831903621
Reproduce procedure:
DDL:
`
use catalog paimon;
use paimon_test;
CREATE TABLE dim_orders (
`order_id` INT,
`order_name` STRING,
`order_product_id` INT,
`order_customer_id` INT,
`order_status` STRING,
`create_date` TIMESTAMP,
`create_ts` INT,
PRIMARY key (`order_id`) NOT ENFORCED
)
WITH (
'bucket' = '20',
'bucket-key' = 'order_id'
);
`
Dim update job:
`
CREATE TABLE datagen_source (
`order_id` INT,
`order_name` STRING,
`order_product_id` INT,
`order_customer_id` INT,
`order_status` STRING
)
WITH (
'connector' = 'datagen',
'rows-per-second' = '50000',
'fields.order_name.length' = '10',
'fields.order_status.length' = '5',
'fields.order_id.min' = '1',
'fields.order_id.max' = '100000000',
'fields.order_product_id.min' = '1',
'fields.order_product_id.max' = '1000000',
'fields.order_customer_id.min' = '1',
'fields.order_customer_id.max' = '1000000'
);
CREATE VIEW temp_source AS
SELECT order_id,
order_name,
order_product_id,
order_customer_id,
order_status,
CURRENT_TIMESTAMP AS create_date,
CAST(UNIX_TIMESTAMP() AS INT) AS create_ts
FROM datagen_source;
INSERT INTO `paimon`.`paimon_test`.`dim_orders`
SELECT *
FROM temp_source;
`
Dim join job:
`
CREATE TABLE `datagen_source` (
`product_id` INTEGER,
`product_name` STRING,
`product_category_id` INTEGER,
`product_order_id` INTEGER,
`product_status` STRING,
`create_date` AS `proctime`()
)
WITH (
'fields.product_category_id.min' = '1',
'connector' = 'datagen',
'fields.product_category_id.max' = '1000000',
'fields.product_name.length' = '10',
'fields.product_status.length' = '5',
'rows-per-second' = '48000',
'fields.product_order_id.max' = '200000000',
'fields.product_order_id.min' = '1'
);
CREATE TABLE `print_sink` (
`product_id` INTEGER,
`product_name` STRING,
`product_category_id` INTEGER,
`product_order_id` INTEGER,
`product_status` STRING,
`create_date` TIMESTAMP,
`order_name` STRING,
`orders_customer_id` INTEGER,
`order_status` STRING
)
WITH ('connector' = 'print');
CREATE VIEW `trade_orde_view` AS
SELECT `gen`.`product_id`,
`gen`.`product_name`,
`gen`.`product_category_id`,
`gen`.`product_order_id`,
`gen`.`product_status`,
`gen`.`create_date`,
`orders`.`order_name`,
`orders`.`order_customer_id`,
`order_status`
FROM `datagen_source` AS `gen`
INNER JOIN
`paimon`.`paimon_test`.`dim_orders`
/*+ `OPTIONS`('lookup.cache' = 'AUTO', 'lookup.refresh.async' =
'true', 'lookup.cache-max-disk-size' = '40 gb') */
FOR SYSTEM_TIME AS OF `gen`.`create_date` AS `orders`
ON `gen`.`product_order_id` = `orders`.`order_id`;
INSERT INTO `print_sink` (
SELECT *
FROM `trade_orde_view`
WHERE `order_name` IS NOT NULL
);
`
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]