cnwyc commented on issue #17038:
URL: https://github.com/apache/doris/issues/17038#issuecomment-1441169194
Dear yongkang.zhong:
This is a table creation statement:
CREATE TABLE `dwd_jd_detail` (
`item_id` varchar(255) NOT NULL COMMENT '商品id',
`category` text NULL COMMENT '品类',
`brand_name_cn` text NULL COMMENT '品牌中文名',
`brand_name_en` text NULL COMMENT '品牌英文名',
`model` text NULL COMMENT '型号',
`sku_name` text NULL COMMENT 'sku名称',
`title` text NULL COMMENT '商品标题',
`price` decimal(10, 2) NULL COMMENT '商品价格',
`params` text NULL COMMENT '品参内容',
`store_id` text NULL COMMENT '店铺id',
`store_name` text NULL COMMENT '商店名称',
`tabs` text NULL COMMENT '店铺标签',
`store_flag` text NULL COMMENT '店铺标记',
`source` text NULL COMMENT '数据源平台',
`item_url` text NULL COMMENT '商品网站链接',
`create_time` datetime NULL COMMENT '创建时间'
) ENGINE=OLAP
DUPLICATE KEY(`item_id`)
COMMENT '燃气灶单品数据'
DISTRIBUTED BY HASH(`item_id`) BUCKETS 1
PROPERTIES (
"replication_allocation" = "tag.location.default: 3",
"in_memory" = "false",
"storage_format" = "V2",
"disable_auto_compaction" = "false"
);
This is the statement I executed:
DROP TABLE IF EXISTS temp.sun_pt0;
CREATE TABLE temp.sun_pt0 AS
SELECT item_id
AS item_id
, 'range_hood'
AS category
, brand_name_cn
AS brand_name_cn
, brand_name_en
AS brand_name_en
, get_json_string(params, '$.the_article_number')
AS model
, get_json_string(params, '$.machine_design')
AS product_type
, get_json_string(params, '$.goods_origin')
AS product_origin
, get_json_string(params, '$.product_name')
AS product_name
, NULL
AS time_to_market
, NULL
AS price
, regexp_extract(get_json_string(params, '$.net_weight'), '[0-9.]+', 0)
AS net_weight
, regexp_extract(get_json_string(params, '$.goods_gross_weight'),
'[0-9.]+', 0) AS gross_weight
, IF(color != '', color, NULL)
AS color
, IF(ext_dim != '', regexp_extract(ext_dim, '[1-9]\\d+', 0), NULL)
AS external_dimensions_long
, IF(ext_dim != '', regexp_extract(ext_dim, '[1-9]\\d+.+([1-9]\\d+)',
1), NULL) AS external_dimensions_width
, IF(ext_dim != '', regexp_extract(ext_dim,
'[1-9]\\d+.+[1-9]\\d+.+([1-9]\\d+)', 1), NULL) AS external_dimensions_hight
, NULL
AS internal_dimensions_long
, NULL
AS internal_dimensions_width
, NULL
AS internal_dimensions_hight
, REPLACE(get_json_string(params, '$.panel_material'), '+', ';')
AS panel_material
, REPLACE(get_json_string(params, '$.close_the_smoke_cavity_material'),
'+', ';') AS close_the_smoke_cavity_material
, NULL
AS oil_net_material
, REPLACE(get_json_string(params, '$.material_science'), '+', ';')
AS material_science
, IF(style != '', style, NULL)
AS style
, IF(workmanship = '烤漆', '烤漆', NULL)
AS workmanship -- 使用材料/面板材料只获取烤漆值
, NULL
AS pattern_description
, get_json_string(params, '$.machine_design')
AS material_design
, get_json_string(params, '$.energy_efficiency_grade')
AS energy_efficiency_grade
, NULL
AS power_consumption
, IF(constant_frequency_conversion != '',
constant_frequency_conversion, NULL) AS
constant_frequency_conversion
, IF(rated_power != '', rated_power, NULL)
AS rated_power
, NULL
AS shutdown_power
, IF(lighting_power != '', lighting_power, NULL)
AS lighting_power
, NULL
AS sound_noise
, IF(sound_level_min != '', regexp_extract(sound_level_min, '\\d+', 0),
NULL) AS sound_pressure_level_noise_min
, IF(sound_level_max != '', regexp_extract(sound_level_max, '\\d+', 0),
NULL) AS sound_pressure_level_noise_max
, IF(exhaust_air_min != '', regexp_extract(exhaust_air_min, '\\d+', 0),
NULL) AS exhaust_air_min
, IF(exhaust_air_max != '', regexp_extract(exhaust_air_max, '\\d+', 0),
NULL) AS exhaust_air_max
, IF(wind_pressure_min != '', regexp_extract(wind_pressure_min, '\\d+',
0), NULL) AS wind_pressure_min
, IF(wind_pressure_max != '', regexp_extract(wind_pressure_max, '\\d+',
0), NULL) AS wind_pressure_max
, IF(motor_output_power != '', motor_output_power, NULL)
AS motor_output_power
, NULL
AS oil_separation
, NULL
AS net_flavor_rate
, NULL
AS total_pressure_efficiency
, IF(maximum_static_pressure_value != '',
maximum_static_pressure_value, NULL) AS
maximum_static_pressure_value
, NULL
AS smoke_area
, NULL
AS appointment_function
, IF(child_lock IS NULL, NULL, IF(child_lock REGEXP '^支持', '有', '无'))
AS child_lock
, NULL
AS light_source
, get_json_string(params, '$.way_into_the_wind')
AS way_into_the_wind
, IF(air_outlet_diameter != '', air_outlet_diameter, NULL)
AS air_outlet_diameter
, IF(adjust_gears_num != '', adjust_gears_num, NULL)
AS adjust_gears_num
-- '(自|免|热|自动|自动的|不用|变频|免拆|一键|热炫|光波|双模|智|无|触摸|触控|免拆|体感)清洗'
, IF(cleaning_method != '', cleaning_method, NULL)
AS cleaning_method
, NULL
AS number_of_fans
, NULL
AS intelligent_lifting
, NULL
AS double_folding_and_double_cavity
, NULL
AS smoking_port_under_set
, NULL
AS delay_shutdown
, NULL
AS easy_to_disassemble
, NULL
AS easy_to_clean
, NULL
timing_function
, NULL
anti_backfilling
, REPLACE(get_json_string(params, '$.function'), ',', ';')
AS function
, REPLACE(get_json_string(params, '$.control_mode'), ';', ';')
AS control_mode
, NULL
AS interconnection_version_name
, NULL
AS support_ecology
, NULL
AS agreement
, NULL
AS voice_interaction
, NULL
AS large_screen_interaction
, NULL
AS allhouse_zhilian
, NULL
AS intelligent_smoke_sensing_cruise
, NULL
AS installation_dimensions_long
, NULL
AS installation_dimensions_width
, NULL
AS installation_dimensions_hight
, NULL
AS opening_size
, REPLACE(get_json_string(params, '$.installation_method'),
'(不提供安装服务)', '') AS installation_method
, NULL
AS installation_service_fee
, NULL
AS warranty_period
, NULL
AS other_warranty_period
, NULL
AS target_customer_group
, NULL
AS scenario_description
, NULL
AS feature_selling_points
, NULL
AS technical_selling_points
, NULL
AS selling_points_appearance_design
, NULL
AS service_selling_points
, NULL
AS endorsement
, NULL
AS mechanism
, NULL
AS honor
, NULL
AS patent
FROM (SELECT *
, regexp_extract(get_json_string(params, '$.the_machine_size'),
'[1-9]\\d+.+[1-9]\\d+.+[1-9]\\d+', 0) AS ext_dim
, regexp_extract(params, '([银浅草紫乳]?.|不锈钢|咖啡|卡其)色', 0)
AS color
, regexp_extract(params, '复古|简约|轻奢', 0)
AS style
, NVL(get_json_string(params, '$.material_science'),
get_json_string(params, '$.panel_material')) AS workmanship
, regexp_extract(params, '(定|双?变)频', 0)
AS constant_frequency_conversion
, regexp_extract(UPPER(get_json_string(params, '$.rated_power')),
'([1-9]\\d+)W', 1) AS rated_power
, regexp_extract(REGEXP_REPLACE(get_json_string(params,
'$.lighting_power'), '^0+W', ''), '([0-9.]+)', 0) AS lighting_power
, regexp_extract(get_json_string(params,
'$.sound_pressure_level_noise'), '>\\d+|\\d+-', 0) AS
sound_level_min
, regexp_extract(get_json_string(params,
'$.sound_pressure_level_noise'), '<\\d+|-\\d+', 0) AS
sound_level_max
, regexp_extract(get_json_string(params, '$.exhaust_air'),
'≥\\d+|\\d+-', 0) AS exhaust_air_min
, regexp_extract(get_json_string(params, '$.exhaust_air'),
'≤\\d+|-\\d+', 0) AS exhaust_air_max
, regexp_extract(get_json_string(params, '$.wind_pressure'),
'≥\\d+|\\d+-', 0) AS wind_pressure_min
, regexp_extract(get_json_string(params, '$.wind_pressure'),
'≤\\d+|-\\d+', 0) AS wind_pressure_max
, regexp_extract(get_json_string(params, '$.motor_input_power'),
'[1-9]\\d+', 0) AS motor_output_power
, regexp_extract(get_json_string(params,
'$.maximum_static_pressure_value'), '[1-9]\\d+', 0) AS
maximum_static_pressure_value
, get_json_string(params, '$.bh_function')
AS child_lock
, regexp_extract(get_json_string(params,
'$.the_outlet_diameter'), '\\d+', 0) AS
air_outlet_diameter
, REGEXP_REPLACE(regexp_extract(params, '(\\d+|三)档', 0), '三',
'3') AS adjust_gears_num
, regexp_extract(params, '(自|免|热|自动|一键|热炫|光波)清洗', 0)
AS cleaning_method
FROM dwd_jd.dwd_jd_detail
WHERE category = 'range_hood') t;
Attached is the sample data of the original table
At 2023-02-23 11:01:09, "yongkang.zhong" ***@***.***> wrote:
Could you please provide your SQL statement to help us reproduce the problem
—
Reply to this email directly, view it on GitHub, or unsubscribe.
You are receiving this because you authored the thread.Message ID:
***@***.***>
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]