cnwyc commented on issue #17038:
URL: https://github.com/apache/doris/issues/17038#issuecomment-1441169194

   Dear yongkang.zhong:
      This is a table creation statement:
      CREATE TABLE `dwd_jd_detail` (
     `item_id` varchar(255) NOT NULL COMMENT '商品id',
     `category` text NULL COMMENT '品类',
     `brand_name_cn` text NULL COMMENT '品牌中文名',
     `brand_name_en` text NULL COMMENT '品牌英文名',
     `model` text NULL COMMENT '型号',
     `sku_name` text NULL COMMENT 'sku名称',
     `title` text NULL COMMENT '商品标题',
     `price` decimal(10, 2) NULL COMMENT '商品价格',
     `params` text NULL COMMENT '品参内容',
     `store_id` text NULL COMMENT '店铺id',
     `store_name` text NULL COMMENT '商店名称',
     `tabs` text NULL COMMENT '店铺标签',
     `store_flag` text NULL COMMENT '店铺标记',
     `source` text NULL COMMENT '数据源平台',
     `item_url` text NULL COMMENT '商品网站链接',
     `create_time` datetime NULL COMMENT '创建时间'
   ) ENGINE=OLAP
   DUPLICATE KEY(`item_id`)
   COMMENT '燃气灶单品数据'
   DISTRIBUTED BY HASH(`item_id`) BUCKETS 1
   PROPERTIES (
   "replication_allocation" = "tag.location.default: 3",
   "in_memory" = "false",
   "storage_format" = "V2",
   "disable_auto_compaction" = "false"
   );
   
   
   This is the statement I executed:
   DROP TABLE IF EXISTS temp.sun_pt0;
   CREATE TABLE temp.sun_pt0 AS
   SELECT item_id                                                               
                   AS item_id
        , 'range_hood'                                                          
                   AS category
        , brand_name_cn                                                         
                   AS brand_name_cn
        , brand_name_en                                                         
                   AS brand_name_en
        , get_json_string(params, '$.the_article_number')                       
                   AS model
        , get_json_string(params, '$.machine_design')                           
                   AS product_type
        , get_json_string(params, '$.goods_origin')                             
                   AS product_origin
        , get_json_string(params, '$.product_name')                             
                   AS product_name
        , NULL                                                                  
                   AS time_to_market
        , NULL                                                                  
                   AS price
        , regexp_extract(get_json_string(params, '$.net_weight'), '[0-9.]+', 0) 
                   AS net_weight
        , regexp_extract(get_json_string(params, '$.goods_gross_weight'), 
'[0-9.]+', 0)            AS gross_weight
        , IF(color != '', color, NULL)                                          
                   AS color
        , IF(ext_dim != '', regexp_extract(ext_dim, '[1-9]\\d+', 0), NULL)      
                   AS external_dimensions_long
        , IF(ext_dim != '', regexp_extract(ext_dim, '[1-9]\\d+.+([1-9]\\d+)', 
1), NULL)            AS external_dimensions_width
        , IF(ext_dim != '', regexp_extract(ext_dim, 
'[1-9]\\d+.+[1-9]\\d+.+([1-9]\\d+)', 1), NULL) AS external_dimensions_hight
        , NULL                                                                  
                   AS internal_dimensions_long
        , NULL                                                                  
                   AS internal_dimensions_width
        , NULL                                                                  
                   AS internal_dimensions_hight
        , REPLACE(get_json_string(params, '$.panel_material'), '+', ';')        
                   AS panel_material
        , REPLACE(get_json_string(params, '$.close_the_smoke_cavity_material'), 
'+', ';')          AS close_the_smoke_cavity_material
        , NULL                                                                  
                   AS oil_net_material
        , REPLACE(get_json_string(params, '$.material_science'), '+', ';')      
                   AS material_science
        , IF(style != '', style, NULL)                                          
                   AS style
        , IF(workmanship = '烤漆', '烤漆', NULL)                                    
               AS workmanship -- 使用材料/面板材料只获取烤漆值
        , NULL                                                                  
                   AS pattern_description
        , get_json_string(params, '$.machine_design')                           
                   AS material_design
        , get_json_string(params, '$.energy_efficiency_grade')                  
                   AS energy_efficiency_grade
        , NULL                                                                  
                   AS power_consumption
        , IF(constant_frequency_conversion != '', 
constant_frequency_conversion, NULL)             AS 
constant_frequency_conversion
        , IF(rated_power != '', rated_power, NULL)                              
                   AS rated_power
        , NULL                                                                  
                   AS shutdown_power
        , IF(lighting_power != '', lighting_power, NULL)                        
                   AS lighting_power
        , NULL                                                                  
                   AS sound_noise
        , IF(sound_level_min != '', regexp_extract(sound_level_min, '\\d+', 0), 
NULL)              AS sound_pressure_level_noise_min
        , IF(sound_level_max != '', regexp_extract(sound_level_max, '\\d+', 0), 
NULL)              AS sound_pressure_level_noise_max
        , IF(exhaust_air_min != '', regexp_extract(exhaust_air_min, '\\d+', 0), 
NULL)              AS exhaust_air_min
        , IF(exhaust_air_max != '', regexp_extract(exhaust_air_max, '\\d+', 0), 
NULL)              AS exhaust_air_max
        , IF(wind_pressure_min != '', regexp_extract(wind_pressure_min, '\\d+', 
0), NULL)          AS wind_pressure_min
        , IF(wind_pressure_max != '', regexp_extract(wind_pressure_max, '\\d+', 
0), NULL)          AS wind_pressure_max
        , IF(motor_output_power != '', motor_output_power, NULL)                
                   AS motor_output_power
        , NULL                                                                  
                   AS oil_separation
        , NULL                                                                  
                   AS net_flavor_rate
        , NULL                                                                  
                   AS total_pressure_efficiency
        , IF(maximum_static_pressure_value != '', 
maximum_static_pressure_value, NULL)             AS 
maximum_static_pressure_value
        , NULL                                                                  
                   AS smoke_area
        , NULL                                                                  
                   AS appointment_function
        , IF(child_lock IS NULL, NULL, IF(child_lock REGEXP '^支持', '有', '无'))   
               AS child_lock
        , NULL                                                                  
                   AS light_source
        , get_json_string(params, '$.way_into_the_wind')                        
                   AS way_into_the_wind
        , IF(air_outlet_diameter != '', air_outlet_diameter, NULL)              
                   AS air_outlet_diameter
        , IF(adjust_gears_num != '', adjust_gears_num, NULL)                    
                   AS adjust_gears_num
        -- '(自|免|热|自动|自动的|不用|变频|免拆|一键|热炫|光波|双模|智|无|触摸|触控|免拆|体感)清洗'
        , IF(cleaning_method != '', cleaning_method, NULL)                      
                   AS cleaning_method
        , NULL                                                                  
                   AS number_of_fans
        , NULL                                                                  
                   AS intelligent_lifting
        , NULL                                                                  
                   AS double_folding_and_double_cavity
        , NULL                                                                  
                   AS smoking_port_under_set
        , NULL                                                                  
                   AS delay_shutdown
        , NULL                                                                  
                   AS easy_to_disassemble
        , NULL                                                                  
                   AS easy_to_clean
        , NULL                                                                  
                      timing_function
        , NULL                                                                  
                      anti_backfilling
        , REPLACE(get_json_string(params, '$.function'), ',', ';')              
                   AS function
        , REPLACE(get_json_string(params, '$.control_mode'), ';', ';')          
                   AS control_mode
        , NULL                                                                  
                   AS interconnection_version_name
        , NULL                                                                  
                   AS support_ecology
        , NULL                                                                  
                   AS agreement
        , NULL                                                                  
                   AS voice_interaction
        , NULL                                                                  
                   AS large_screen_interaction
        , NULL                                                                  
                   AS allhouse_zhilian
        , NULL                                                                  
                   AS intelligent_smoke_sensing_cruise
        , NULL                                                                  
                   AS installation_dimensions_long
        , NULL                                                                  
                   AS installation_dimensions_width
        , NULL                                                                  
                   AS installation_dimensions_hight
        , NULL                                                                  
                   AS opening_size
        , REPLACE(get_json_string(params, '$.installation_method'), 
'(不提供安装服务)', '')        AS installation_method
        , NULL                                                                  
                   AS installation_service_fee
        , NULL                                                                  
                   AS warranty_period
        , NULL                                                                  
                   AS other_warranty_period
        , NULL                                                                  
                   AS target_customer_group
        , NULL                                                                  
                   AS scenario_description
        , NULL                                                                  
                   AS feature_selling_points
        , NULL                                                                  
                   AS technical_selling_points
        , NULL                                                                  
                   AS selling_points_appearance_design
        , NULL                                                                  
                   AS service_selling_points
        , NULL                                                                  
                   AS endorsement
        , NULL                                                                  
                   AS mechanism
        , NULL                                                                  
                   AS honor
        , NULL                                                                  
                   AS patent
   FROM (SELECT *
              , regexp_extract(get_json_string(params, '$.the_machine_size'), 
'[1-9]\\d+.+[1-9]\\d+.+[1-9]\\d+', 0)     AS ext_dim
              , regexp_extract(params, '([银浅草紫乳]?.|不锈钢|咖啡|卡其)色', 0)             
                           AS color
              , regexp_extract(params, '复古|简约|轻奢', 0)                           
                                  AS style
              , NVL(get_json_string(params, '$.material_science'), 
get_json_string(params, '$.panel_material'))         AS workmanship
              , regexp_extract(params, '(定|双?变)频', 0)                           
                                    AS constant_frequency_conversion
              , regexp_extract(UPPER(get_json_string(params, '$.rated_power')), 
'([1-9]\\d+)W', 1)                      AS rated_power
              , regexp_extract(REGEXP_REPLACE(get_json_string(params, 
'$.lighting_power'), '^0+W', ''), '([0-9.]+)', 0) AS lighting_power
              , regexp_extract(get_json_string(params, 
'$.sound_pressure_level_noise'), '>\\d+|\\d+-', 0)               AS 
sound_level_min
              , regexp_extract(get_json_string(params, 
'$.sound_pressure_level_noise'), '<\\d+|-\\d+', 0)               AS 
sound_level_max
              , regexp_extract(get_json_string(params, '$.exhaust_air'), 
'≥\\d+|\\d+-', 0)                              AS exhaust_air_min
              , regexp_extract(get_json_string(params, '$.exhaust_air'), 
'≤\\d+|-\\d+', 0)                              AS exhaust_air_max
              , regexp_extract(get_json_string(params, '$.wind_pressure'), 
'≥\\d+|\\d+-', 0)                            AS wind_pressure_min
              , regexp_extract(get_json_string(params, '$.wind_pressure'), 
'≤\\d+|-\\d+', 0)                            AS wind_pressure_max
              , regexp_extract(get_json_string(params, '$.motor_input_power'), 
'[1-9]\\d+', 0)                          AS motor_output_power
              , regexp_extract(get_json_string(params, 
'$.maximum_static_pressure_value'), '[1-9]\\d+', 0)              AS 
maximum_static_pressure_value
              , get_json_string(params, '$.bh_function')                        
                                        AS child_lock
              , regexp_extract(get_json_string(params, 
'$.the_outlet_diameter'), '\\d+', 0)                             AS 
air_outlet_diameter
              , REGEXP_REPLACE(regexp_extract(params, '(\\d+|三)档', 0), '三', 
'3')                                     AS adjust_gears_num
              , regexp_extract(params, '(自|免|热|自动|一键|热炫|光波)清洗', 0)              
                           AS cleaning_method
         FROM dwd_jd.dwd_jd_detail
         WHERE category = 'range_hood') t;
   
   
   Attached is the sample data of the original table
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   
   At 2023-02-23 11:01:09, "yongkang.zhong" ***@***.***> wrote:
   
   Could you please provide your SQL statement to help us reproduce the problem
   
   —
   Reply to this email directly, view it on GitHub, or unsubscribe.
   You are receiving this because you authored the thread.Message ID: 
***@***.***>


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to