github-actions[bot] commented on issue #12971: URL: https://github.com/apache/dolphinscheduler/issues/12971#issuecomment-1323743932
### Search before asking - [X] I had searched in the [issues](https://github.com/apache/dolphinscheduler/issues?q=is%3Aissue) and found no similar feature requirement. ### Description Use standard sql statements to build the scheduling DAG. At present, there are mainstream methods in the market: 1. airlfow gives python DAG construction, and defines tasks and relations through python syntax. This method has a relatively big advantage in the minimalist syntax of python, and many codes can be repeated. It can be done by simply copying and modifying. 2. The drag-and-drop method of dolphinscheduler, this method seems minimalist and foolish, but it is very troublesome for the 8000-1w rows of SQL statements at the enterprise level. You need to sort out the logic clearly and drag and drop one by one , is very cumbersome. The definition of DAG through sql is simple and efficient ### Use case set cron ***** set description "this is demo" CREATE CATALOG python_script WITH ( type="python", script="#_*_coding:utf-8_*_ import partition_sensor.hive_partition_sensor as partition_sensor partition_sensor.check_for_partition('dwd', 'dwd_cosmos_bis_product_version_city_f_d', '${partition}=\'${format}\'')" failedRetries=5 ) CREATE CATALOG hive_sql WITH( type="sql", table.sql-dialect= "hive" sql="INSERT overwrite TABLE bl_ugc.sr_living_expenses_group_table partition(dt='$[yyyyMMdd-1]') select case_code, classfication_type_name, classfication_one_name, classfication_two_name, classfication_three_name, classfication_four_name, a_type_name, b_type_name, create_time, case_end_time, process_name, process_start_time, process_end_time, handler_city_name, handler_warzone_name, handler_region_name, handler_big_group_name, handler_group_name, handler_sgroup_name, handler_minigourp_name, handler_name, handler_code, evaluation, sum(process_urge) as process_urge, sum(transfer) as transfer, city_name, warzone_name, region_name, gourp_name, rating_address, resblock_name, resblock_id, house_source_code, product_line_name, product_version_name, summary_case, income_identity, living_expenses_item, payment_method, finish_duration, t_evaluate_date from (select a. case_code, a. classfication_type_name, a. classfication_one_name, a.classfication_two_name, a.classfication_three_name, a. classfication_four_name, b.a_type_name, b.b_type_name, a. create_time, a. case_end_time, a. process_name, a. process_start_time, a. process_end_time, a.handler_city_name, a.handler_warzone_name, a.handler_region_name, a.handler_big_group_name, a.handler_group_name, a.handler_sgroup_name, a.handler_minigourp_name, a.handler_name, a.handler_code, null as evaluation, count(DISTINCT if((a.agent_process_urge_times+a.user_process_urge_times)>0,a.case_code,NULL)) AS process_urge, count(DISTINCT if(a.handler_name<>a.the_next_handler_name and a.the_next_process_name not regexp'evaluation|end',a.case_code,NULL)) AS transfer, b. city_name, b. warzone_name, b. region_name, b. gourp_name, b. rating_address, b. resblock_name, b. resblock_id, b. house_source_code, b. product_line_name, b. product_version_name, b. summary_case, b. income_identity, b. living_expenses_item, b. payment_method, b. finish_duration, b.t_evaluate_date from bl_ugc.sr_case_process_info as a left join bl_ugc.sr_living_expenses_table as b on a.case_code=b.sr_case_code and b.dt='$[yyyyMMdd-1]' where a.dt='$[yyyyMMdd-1]' and a.process_name not regexp 'x|x' AND a. classfication_one_name IN ('xxxx', 'xxxx') AND a.classfication_two_name='xx' AND a.classfication_four_name IN('xxxx/x', 'x/xx', 'x/x', 'xx/x', 'x/x', 'x', 'x') group by a.case_code, a. classfication_type_name, a. classfication_one_name, a.classfication_two_name, a.classfication_three_name, a. classfication_four_name, b.a_type_name, b.b_type_name, a. create_time, a. case_end_time, a. process_name, a. process_start_time, a. process_end_time, a.handler_city_name, a.handler_warzone_name, a.handler_region_name, a.handler_big_group_name, a.handler_group_name, a.handler_sgroup_name, a.handler_minigourp_name, a.handler_name, a.handler_code, b. city_name, b. warzone_name, b. region_name, b. gourp_name, b. rating_address, b. resblock_name, b. resblock_id, b. house_source_code, b. product_line_name, b. product_version_name, b. summary_case, b. income_identity, b. living_expenses_item, b. payment_method, b. finish_duration, b.t_evaluate_date union all select sr_case_code as case_code, type_name as classfication_type_name, classfication_one_name, classfication_two_name, classfication_three_name, classfication_four_name, a_type_name, b_type_name, create_time, case_finish_time as case_end_time, null as process_name, null as process_start_time, null as process_end_time, evaluate_city_name as handler_city_name, evaluator_warzonename as handler_warzone_name, evaluator_region_name as handler_region_name, evaluator_branch_department as handler_big_group_name, evaluator_area_name as handler_group_name, evaluator_group_name as handler_sgroup_name, evaluator_small_group_name as handler_minigourp_name, staff_name as handler_name, t_be_evaluator_id as handler_code, evaluation, 0 as process_urge, 0 as transfer, city_name, warzone_name, region_name, gourp_name, rating_address, resblock_name, resblock_id, house_source_code, product_line_name, product_version_name, summary_case, income_identity, living_expenses_item, payment_method, finish_duration, t_evaluate_date from bl_ugc.sr_living_expenses_table as t where t.dt='$[yyyyMMdd-1]' and t.data_type='detailed dimension') as z group by case_code, classfication_type_name, classfication_one_name, classfication_two_name, classfication_three_name, classfication_four_name, a_type_name, b_type_name, create_time, case_end_time, process_name, process_start_time, process_end_time, handler_city_name, handler_warzone_name, handler_region_name, handler_big_group_name, handler_group_name, handler_sgroup_name, handler_minigourp_name, handler_name, handler_code, process_urge, transfer, city_name, warzone_name, region_name, gourp_name, rating_address, resblock_name, resblock_id, house_source_code, product_line_name, product_version_name, summary_case, income_identity, living_expenses_item, payment_method, finish_duration, t_evaluate_date, evaluation " ) insert into hive_sql select * from python_script ### Related issues _No response_ ### Are you willing to submit a PR? - [X] Yes I am willing to submit a PR! ### Code of Conduct - [X] I agree to follow this project's [Code of Conduct](https://www.apache.org/foundation/policies/conduct) -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
