Hi Varun, This patch is a step in the right direction, but I think we're not quite there yet.
The patch adds logic about semi-join materialization into the filesort code This makes things really messy. I think, some isolation would be beneficial. The first suggestion: In Sort_param (as this structure is visible in find_all_keys), add members that control the behavior inside find_all_keys: - unpack_function (default is NULL) - bool set_all_read_bits (default is FALSE) Now, Sort_param is allocated in filesort(), so we will have to add the same members in "class Filesort" and have filesort() copy them to Sort_param (necessary because find_all_keys() doesn't see the "Filesort" object). Then, these members can be set in the SQL layer e.g. in JOIN::add_sorting_to_table which creates the Filesort structure. (It would be ideal if semi-join code set them, but this is probably not possible to achieve as Filesort is set up after the semi-join code is set up). If after all these changes "JOIN_TAB::unpacking_to_base_table_fields()" is still there, please rename it to "unpack_..." so that it follows the established convention. That's all the input. On Sat, Sep 28, 2019 at 02:12:01PM +0530, Varun wrote: > revision-id: d48294d15c4895215d5facef97fc80c03cd6b4b0 > (mariadb-10.4.4-341-gd48294d15c4) > parent(s): a340af922361e3958e5d6653c8b840771db282f2 > author: Varun Gupta > committer: Varun Gupta > timestamp: 2019-09-28 13:06:44 +0530 > message: > > MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on > > For the case when the SJM scan table is the first table in the join order, > then if we want to do the sorting on the SJM scan table, then we need to > make sure that we unpack the values to base table fields in two cases: > 1) Reading the SJM table and writing the sort-keys inside the sort-buffer > 2) Reading the sorted data from the sort file > > --- > mysql-test/main/order_by.result | 138 > +++++++++++++++++++++++++++++++++++++++- > mysql-test/main/order_by.test | 34 ++++++++++ > sql/filesort.cc | 17 +++++ > sql/opt_subselect.cc | 10 ++- > sql/records.cc | 13 ++++ > sql/records.h | 1 + > sql/sql_select.cc | 89 ++++++++++---------------- > sql/sql_select.h | 4 +- > sql/table.h | 1 + > 9 files changed, 246 insertions(+), 61 deletions(-) > > diff --git a/mysql-test/main/order_by.result b/mysql-test/main/order_by.result > index b059cc686cd..e74583670fc 100644 > --- a/mysql-test/main/order_by.result > +++ b/mysql-test/main/order_by.result > @@ -3322,7 +3322,7 @@ WHERE books.library_id = 8663 AND > books.scheduled_for_removal=0 ) > ORDER BY wings.id; > id select_type table type possible_keys key key_len ref > rows filtered Extra > -1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL > 2 100.00 Using temporary; Using filesort > +1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL > 2 100.00 Using filesort > 1 PRIMARY wings eq_ref PRIMARY PRIMARY 4 test.books.wings_id > 1 100.00 > 2 MATERIALIZED books ref library_idx library_idx 4 > const 2 100.00 Using where > Warnings: > @@ -3436,3 +3436,139 @@ Note 1003 select `test`.`t4`.`a` AS > `a`,`test`.`t4`.`b` AS `b`,`test`.`t4`.`c` A > set histogram_size=@tmp_h, histogram_type=@tmp_ht, use_stat_tables=@tmp_u, > optimizer_use_condition_selectivity=@tmp_o; > drop table t1,t2,t3,t4; > +# > +# MDEV-13694: Wrong result upon GROUP BY with orderby_uses_equalities=on > +# > +CREATE TABLE t1 (a INT, b int, primary key(a)); > +CREATE TABLE t2 (a INT, b INT); > +INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6), > +(2354,7),(321421,3),(535,2),(4535,3); > +INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3); > +# Join order should have the SJM scan table as the first table for both > +# the queries with GROUP BY and ORDER BY clause. > +EXPLAIN SELECT t1.a > +FROM t1 > +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) > +ORDER BY t1.a DESC; > +id select_type table type possible_keys key key_len ref > rows Extra > +1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL > 3 Using filesort > +1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1 > Using index > +2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 > Using where > +EXPLAIN FORMAT=JSON SELECT t1.a > +FROM t1 > +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) > +ORDER BY t1.a DESC; > +EXPLAIN > +{ > + "query_block": { > + "select_id": 1, > + "read_sorted_file": { > + "filesort": { > + "sort_key": "t1.a desc", > + "table": { > + "table_name": "<subquery2>", > + "access_type": "ALL", > + "possible_keys": ["distinct_key"], > + "rows": 3, > + "filtered": 100, > + "materialized": { > + "unique": 1, > + "query_block": { > + "select_id": 2, > + "table": { > + "table_name": "t2", > + "access_type": "ALL", > + "rows": 3, > + "filtered": 100, > + "attached_condition": "t2.b = 3 and t2.a is not null" > + } > + } > + } > + } > + } > + }, > + "table": { > + "table_name": "t1", > + "access_type": "eq_ref", > + "possible_keys": ["PRIMARY"], > + "key": "PRIMARY", > + "key_length": "4", > + "used_key_parts": ["a"], > + "ref": ["test.t2.a"], > + "rows": 1, > + "filtered": 100, > + "using_index": true > + } > + } > +} > +SELECT t1.a > +FROM t1 > +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) > +ORDER BY t1.a DESC; > +a > +273 > +96 > +58 > +EXPLAIN SELECT t1.a, group_concat(t1.b) > +FROM t1 > +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) > +GROUP BY t1.a DESC; > +id select_type table type possible_keys key key_len ref > rows Extra > +1 PRIMARY <subquery2> ALL distinct_key NULL NULL NULL > 3 Using filesort > +1 PRIMARY t1 eq_ref PRIMARY PRIMARY 4 test.t2.a 1 > +2 MATERIALIZED t2 ALL NULL NULL NULL NULL 3 > Using where > +EXPLAIN FORMAT=JSON SELECT t1.a, group_concat(t1.b) > +FROM t1 > +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) > +GROUP BY t1.a DESC; > +EXPLAIN > +{ > + "query_block": { > + "select_id": 1, > + "read_sorted_file": { > + "filesort": { > + "sort_key": "t1.a desc", > + "table": { > + "table_name": "<subquery2>", > + "access_type": "ALL", > + "possible_keys": ["distinct_key"], > + "rows": 3, > + "filtered": 100, > + "materialized": { > + "unique": 1, > + "query_block": { > + "select_id": 2, > + "table": { > + "table_name": "t2", > + "access_type": "ALL", > + "rows": 3, > + "filtered": 100, > + "attached_condition": "t2.b = 3 and t2.a is not null" > + } > + } > + } > + } > + } > + }, > + "table": { > + "table_name": "t1", > + "access_type": "eq_ref", > + "possible_keys": ["PRIMARY"], > + "key": "PRIMARY", > + "key_length": "4", > + "used_key_parts": ["a"], > + "ref": ["test.t2.a"], > + "rows": 1, > + "filtered": 100 > + } > + } > +} > +SELECT t1.a, group_concat(t1.b) > +FROM t1 > +WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) > +GROUP BY t1.a DESC; > +a group_concat(t1.b) > +273 3 > +96 2 > +58 1 > +DROP TABLE t1, t2; > diff --git a/mysql-test/main/order_by.test b/mysql-test/main/order_by.test > index 934c503302f..b3e43d27e2f 100644 > --- a/mysql-test/main/order_by.test > +++ b/mysql-test/main/order_by.test > @@ -2276,3 +2276,37 @@ set histogram_size=@tmp_h, histogram_type=@tmp_ht, > use_stat_tables=@tmp_u, > optimizer_use_condition_selectivity=@tmp_o; > > drop table t1,t2,t3,t4; > + > + > +--echo # > +--echo # MDEV-13694: Wrong result upon GROUP BY with > orderby_uses_equalities=on > +--echo # > + > +CREATE TABLE t1 (a INT, b int, primary key(a)); > +CREATE TABLE t2 (a INT, b INT); > + > +INSERT INTO t1 (a,b) VALUES (58,1),(96,2),(273,3),(23,4),(231,5),(525,6), > + (2354,7),(321421,3),(535,2),(4535,3); > +INSERT INTO t2 (a,b) VALUES (58,3),(96,3),(273,3); > + > +--echo # Join order should have the SJM scan table as the first table for > both > +--echo # the queries with GROUP BY and ORDER BY clause. > + > +let $query= SELECT t1.a > + FROM t1 > + WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) > + ORDER BY t1.a DESC; > + > +eval EXPLAIN $query; > +eval EXPLAIN FORMAT=JSON $query; > +eval $query; > + > +let $query= SELECT t1.a, group_concat(t1.b) > + FROM t1 > + WHERE t1.a IN (SELECT a FROM t2 WHERE b=3) > + GROUP BY t1.a DESC; > + > +eval EXPLAIN $query; > +eval EXPLAIN FORMAT=JSON $query; > +eval $query; > +DROP TABLE t1, t2; > diff --git a/sql/filesort.cc b/sql/filesort.cc > index 3f4291cfb1f..e5c83293e9f 100644 > --- a/sql/filesort.cc > +++ b/sql/filesort.cc > @@ -716,11 +716,21 @@ static ha_rows find_all_keys(THD *thd, Sort_param > *param, SQL_SELECT *select, > *found_rows= 0; > ref_pos= &file->ref[0]; > next_pos=ref_pos; > + JOIN_TAB *tab= sort_form->reginfo.join_tab; > + JOIN *join= tab ? tab->join : NULL; > + bool first_is_in_sjm_nest= FALSE; > > DBUG_EXECUTE_IF("show_explain_in_find_all_keys", > dbug_serve_apcs(thd, 1); > ); > > + if (join && join->table_count != join->const_tables && > + (join->join_tab + join->const_tables == tab)) > + { > + TABLE_LIST *tbl_for_first= sort_form->pos_in_table_list; > + first_is_in_sjm_nest= tbl_for_first && > tbl_for_first->is_sjm_scan_table(); > + } > + > if (!quick_select) > { > next_pos=(uchar*) 0; /* Find records in sequence */ > @@ -756,13 +766,20 @@ static ha_rows find_all_keys(THD *thd, Sort_param > *param, SQL_SELECT *select, > goto err; > } > > + if (first_is_in_sjm_nest) > + sort_form->column_bitmaps_set(save_read_set, save_write_set); > + > DEBUG_SYNC(thd, "after_index_merge_phase1"); > for (;;) > { > if (quick_select) > error= select->quick->get_next(); > else /* Not quick-select */ > + { > error= file->ha_rnd_next(sort_form->record[0]); > + if (first_is_in_sjm_nest) > + tab->unpacking_to_base_table_fields(); > + } > if (unlikely(error)) > break; > file->position(sort_form->record[0]); > diff --git a/sql/opt_subselect.cc b/sql/opt_subselect.cc > index 87458357865..f837a6394af 100644 > --- a/sql/opt_subselect.cc > +++ b/sql/opt_subselect.cc > @@ -4252,11 +4252,11 @@ bool setup_sj_materialization_part2(JOIN_TAB *sjm_tab) > sjm_tab->type= JT_ALL; > > /* Initialize full scan */ > - sjm_tab->read_first_record= join_read_record_no_init; > + sjm_tab->read_first_record= join_init_read_record; > sjm_tab->read_record.copy_field= sjm->copy_field; > sjm_tab->read_record.copy_field_end= sjm->copy_field + > sjm->sjm_table_cols.elements; > - sjm_tab->read_record.read_record_func= rr_sequential_and_unpack; > + sjm_tab->read_record.read_record_func= > read_record_func_for_rr_and_unpack; > } > > sjm_tab->bush_children->end[-1].next_select= end_sj_materialize; > @@ -7105,3 +7105,9 @@ bool > Item_in_subselect::pushdown_cond_for_in_subquery(THD *thd, Item *cond) > thd->lex->current_select= save_curr_select; > DBUG_RETURN(FALSE); > } > + > + > +bool TABLE_LIST::is_sjm_scan_table() > +{ > + return is_active_sjm() && sj_mat_info->is_sj_scan; > +} > diff --git a/sql/records.cc b/sql/records.cc > index 3d709182a4e..f6885f773d5 100644 > --- a/sql/records.cc > +++ b/sql/records.cc > @@ -709,3 +709,16 @@ static int rr_cmp(uchar *a,uchar *b) > return (int) a[7] - (int) b[7]; > #endif > } > + > + > +int read_record_func_for_rr_and_unpack(READ_RECORD *info) > +{ > + int error; > + if ((error= info->read_record_func_and_unpack_calls(info))) > + return error; > + > + for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++) > + (*cp->do_copy)(cp); > + > + return error; > +} > diff --git a/sql/records.h b/sql/records.h > index faf0d13c9a9..037a06b9d34 100644 > --- a/sql/records.h > +++ b/sql/records.h > @@ -55,6 +55,7 @@ struct READ_RECORD > TABLE *table; /* Head-form */ > Unlock_row_func unlock_row; > Read_func read_record_func; > + Read_func read_record_func_and_unpack_calls; > THD *thd; > SQL_SELECT *select; > uint ref_length, reclength, rec_cache_size, error_offset; > diff --git a/sql/sql_select.cc b/sql/sql_select.cc > index 36d9eda3383..28bc57c692f 100644 > --- a/sql/sql_select.cc > +++ b/sql/sql_select.cc > @@ -14015,37 +14015,8 @@ remove_const(JOIN *join,ORDER *first_order, COND > *cond, > can be used without tmp. table. > */ > bool can_subst_to_first_table= false; > - bool first_is_in_sjm_nest= false; > - if (first_is_base_table) > - { > - TABLE_LIST *tbl_for_first= > - join->join_tab[join->const_tables].table->pos_in_table_list; > - first_is_in_sjm_nest= tbl_for_first->sj_mat_info && > - tbl_for_first->sj_mat_info->is_used; > - } > - /* > - Currently we do not employ the optimization that uses multiple > - equalities for ORDER BY to remove tmp table in the case when > - the first table happens to be the result of materialization of > - a semi-join nest ( <=> first_is_in_sjm_nest == true). > - > - When a semi-join nest is materialized and scanned to look for > - possible matches in the remaining tables for every its row > - the fields from the result of materialization are copied > - into the record buffers of tables from the semi-join nest. > - So these copies are used to access the remaining tables rather > - than the fields from the result of materialization. > - > - Unfortunately now this so-called 'copy back' technique is > - supported only if the rows are scanned with the rr_sequential > - function, but not with other rr_* functions that are employed > - when the result of materialization is required to be sorted. > - > - TODO: either to support 'copy back' technique for the above case, > - or to get rid of this technique altogether. > - */ > if (optimizer_flag(join->thd, OPTIMIZER_SWITCH_ORDERBY_EQ_PROP) && > - first_is_base_table && !first_is_in_sjm_nest && > + first_is_base_table && > order->item[0]->real_item()->type() == Item::FIELD_ITEM && > join->cond_equal) > { > @@ -19922,19 +19893,6 @@ do_select(JOIN *join, Procedure *procedure) > } > > > -int rr_sequential_and_unpack(READ_RECORD *info) > -{ > - int error; > - if (unlikely((error= rr_sequential(info)))) > - return error; > - > - for (Copy_field *cp= info->copy_field; cp != info->copy_field_end; cp++) > - (*cp->do_copy)(cp); > - > - return error; > -} > - > - > /** > @brief > Instantiates temporary table > @@ -21223,6 +21181,8 @@ bool test_if_use_dynamic_range_scan(JOIN_TAB > *join_tab) > > int join_init_read_record(JOIN_TAB *tab) > { > + bool need_unpacking= FALSE; > + JOIN *join= tab->join; > /* > Note: the query plan tree for the below operations is constructed in > save_agg_explain_data. > @@ -21232,6 +21192,12 @@ int join_init_read_record(JOIN_TAB *tab) > if (tab->filesort && tab->sort_table()) // Sort table. > return 1; > > + if (join->top_join_tab_count != join->const_tables) > + { > + TABLE_LIST *tbl= tab->table->pos_in_table_list; > + need_unpacking= tbl ? tbl->is_sjm_scan_table() : FALSE; > + } > + > tab->build_range_rowid_filter_if_needed(); > > DBUG_EXECUTE_IF("kill_join_init_read_record", > @@ -21249,16 +21215,6 @@ int join_init_read_record(JOIN_TAB *tab) > if (!tab->preread_init_done && tab->preread_init()) > return 1; > > - > - if (init_read_record(&tab->read_record, tab->join->thd, tab->table, > - tab->select, tab->filesort_result, 1,1, FALSE)) > - return 1; > - return tab->read_record.read_record(); > -} > - > -int > -join_read_record_no_init(JOIN_TAB *tab) > -{ > Copy_field *save_copy, *save_copy_end; > > /* > @@ -21268,12 +21224,20 @@ join_read_record_no_init(JOIN_TAB *tab) > save_copy= tab->read_record.copy_field; > save_copy_end= tab->read_record.copy_field_end; > > - init_read_record(&tab->read_record, tab->join->thd, tab->table, > - tab->select, tab->filesort_result, 1, 1, FALSE); > + > + if (init_read_record(&tab->read_record, tab->join->thd, tab->table, > + tab->select, tab->filesort_result, 1, 1, FALSE)) > + return 1; > > tab->read_record.copy_field= save_copy; > tab->read_record.copy_field_end= save_copy_end; > - tab->read_record.read_record_func= rr_sequential_and_unpack; > + > + if (need_unpacking) > + { > + tab->read_record.read_record_func_and_unpack_calls= > + > tab->read_record.read_record_func; > + tab->read_record.read_record_func = read_record_func_for_rr_and_unpack; > + } > > return tab->read_record.read_record(); > } > @@ -28981,6 +28945,19 @@ void > build_notnull_conds_for_inner_nest_of_outer_join(JOIN *join, > } > > > +/* > + @brief > + Unpacking temp table fields to base table fields. > +*/ > + > +void JOIN_TAB::unpacking_to_base_table_fields() > +{ > + for (Copy_field *cp= read_record.copy_field; > + cp != read_record.copy_field_end; cp++) > + (*cp->do_copy)(cp); > +} > + > + > /** > @} (end of group Query_Optimizer) > */ > diff --git a/sql/sql_select.h b/sql/sql_select.h > index 4f7bf49f635..545d4a788cc 100644 > --- a/sql/sql_select.h > +++ b/sql/sql_select.h > @@ -223,7 +223,7 @@ typedef enum_nested_loop_state > (*Next_select_func)(JOIN *, struct st_join_table *, bool); > Next_select_func setup_end_select_func(JOIN *join, JOIN_TAB *tab); > int rr_sequential(READ_RECORD *info); > -int rr_sequential_and_unpack(READ_RECORD *info); > +int read_record_func_for_rr_and_unpack(READ_RECORD *info); > Item *remove_pushed_top_conjuncts(THD *thd, Item *cond); > Item *and_new_conditions_to_optimized_cond(THD *thd, Item *cond, > COND_EQUAL **cond_eq, > @@ -676,6 +676,7 @@ typedef struct st_join_table { > table_map remaining_tables); > bool fix_splitting(SplM_plan_info *spl_plan, table_map remaining_tables, > bool is_const_table); > + void unpacking_to_base_table_fields(); > } JOIN_TAB; > > > @@ -2352,7 +2353,6 @@ create_virtual_tmp_table(THD *thd, Field *field) > > int test_if_item_cache_changed(List<Cached_item> &list); > int join_init_read_record(JOIN_TAB *tab); > -int join_read_record_no_init(JOIN_TAB *tab); > void set_position(JOIN *join,uint idx,JOIN_TAB *table,KEYUSE *key); > inline Item * and_items(THD *thd, Item* cond, Item *item) > { > diff --git a/sql/table.h b/sql/table.h > index 1a7e5fbd4dc..35ba9bbb95d 100644 > --- a/sql/table.h > +++ b/sql/table.h > @@ -2622,6 +2622,7 @@ struct TABLE_LIST > */ > const char *get_table_name() const { return view != NULL ? view_name.str : > table_name.str; } > bool is_active_sjm(); > + bool is_sjm_scan_table(); > bool is_jtbm() { return MY_TEST(jtbm_subselect != NULL); } > st_select_lex_unit *get_unit(); > st_select_lex *get_single_select(); > _______________________________________________ > commits mailing list > comm...@mariadb.org > https://lists.askmonty.org/cgi-bin/mailman/listinfo/commits -- BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp