Hi Sanja, Please find review comments below.
On Thu, Apr 02, 2015 at 06:19:41PM +0200, [email protected] wrote: > revision-id: fc31f6d95720b4b946b8b68c816026d65831f347 > parent(s): 01d7da6785284383b2c04f2d4474feccebb0bb6f > committer: Oleksandr Byelkin > branch nick: server > timestamp: 2015-04-02 18:19:33 +0200 > message: > > MDEV-7833:ANALYZE FORMAT=JSON and Range checked for each record > > --- > mysql-test/r/analyze_format_json.result | 62 +++++++++++++++++++++++++++++ > mysql-test/t/analyze_format_json.test | 30 ++++++++++++++ > sql/sql_explain.cc | 26 +++++++++++-- > sql/sql_explain.h | 12 +++++- > sql/sql_select.cc | 69 > ++++++++++++++++++++++++++++++--- > sql/sql_select.h | 7 +++- > 6 files changed, 195 insertions(+), 11 deletions(-) > > diff --git a/mysql-test/r/analyze_format_json.result > b/mysql-test/r/analyze_format_json.result > index 9022d8c..b3ef075 100644 > --- a/mysql-test/r/analyze_format_json.result > +++ b/mysql-test/r/analyze_format_json.result > @@ -264,3 +264,65 @@ ANALYZE > } > } > drop table t1; > +# > +# MDEV-7833:ANALYZE FORMAT=JSON and Range checked for each record > +# > +create table t3(a int); > +insert into t3 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); > +create table t4(a int); > +insert into t4 select A.a + B.a* 10 + C.a * 100 from t3 A, t3 B, t3 C; > +create table t1 (lb1 int, rb1 int, lb2 int, rb2 int, c1 int, c2 int); > +insert into t1 values (1,2,10,20,15,15); > +insert into t1 values (3,5,10,20,15,15); > +insert into t1 values (10,20,10,20,15,15); > +insert into t1 values (10,20,1,2,15,15); > +insert into t1 values (10,20,10,20,1,3); > +create table t2 (key1 int, key2 int, key3 int, key4 int, col1 int, > +key(key1), key(key2), key(key3), key(key4)); > +insert into t2 select a,a,a,a,a from t3; > +insert into t2 select 15,15,15,15,15 from t4; > +analyze format=json > +select * from t1, t2 where (t2.key1 between t1.lb1 and t1.rb1) and > +(t2.key2 between t1.lb2 and t1.rb2) and > +(t2.key3=t1.c1 OR t2.key4=t1.c2); > +ANALYZE > +{ > + "query_block": { > + "select_id": 1, > + "r_loops": 1, > + "r_total_time_ms": "REPLACED", > + "table": { > + "table_name": "t1", > + "access_type": "ALL", > + "r_loops": 1, > + "rows": 5, > + "r_rows": 5, > + "r_total_time_ms": "REPLACED", > + "filtered": 100, > + "r_filtered": 100 > + }, > + "range-checked-for-each-record": { > + "keys": ["key1", "key2", "key3", "key4"], > + "r_keys_none": 1, > + "r_keys_index_merge": 1, > + "r_keys": { > + "key1": 2, > + "key2": 1, > + "key3": 0, > + "key4": 0 > + }, It seems wrong to have multiple r_XXX elements. I think, having one will be better. How about: r_keys: { "none": 1, ## Maybe, change to "full_table_scan" ? (btw can it be a full index scan instead I wonder?) "index_merge": nnn, "range" : { "key1" : nnn, "key2" : nnn, "key3" : nnn } } Let's discuss it. > + "table": { > + "table_name": "t2", > + "access_type": "ALL", > + "possible_keys": ["key1", "key2", "key3", "key4"], > + "r_loops": 5, > + "rows": 1010, > + "r_rows": 203.8, > + "r_total_time_ms": "REPLACED", > + "filtered": 100, > + "r_filtered": 98.135 > + } > + } > + } > +} > +drop table t1,t2,t3,t4; > diff --git a/mysql-test/t/analyze_format_json.test > b/mysql-test/t/analyze_format_json.test > index 64bafd7..0ae9d4f 100644 > --- a/mysql-test/t/analyze_format_json.test > +++ b/mysql-test/t/analyze_format_json.test > @@ -75,3 +75,33 @@ disconnect con1; > connection default; > drop table t1; > > + > +--echo # > +--echo # MDEV-7833:ANALYZE FORMAT=JSON and Range checked for each record > +--echo # > +create table t3(a int); > +insert into t3 values (0),(1),(2),(3),(4),(5),(6),(7),(8),(9); > + > +create table t4(a int); > +insert into t4 select A.a + B.a* 10 + C.a * 100 from t3 A, t3 B, t3 C; > + > +create table t1 (lb1 int, rb1 int, lb2 int, rb2 int, c1 int, c2 int); > + > +insert into t1 values (1,2,10,20,15,15); > +insert into t1 values (3,5,10,20,15,15); > +insert into t1 values (10,20,10,20,15,15); > +insert into t1 values (10,20,1,2,15,15); > +insert into t1 values (10,20,10,20,1,3); > + > +create table t2 (key1 int, key2 int, key3 int, key4 int, col1 int, > + key(key1), key(key2), key(key3), key(key4)); > +insert into t2 select a,a,a,a,a from t3; > +insert into t2 select 15,15,15,15,15 from t4; > + > +--replace_regex /"r_total_time_ms": [0-9]*[.]?[0-9]*/"r_total_time_ms": > "REPLACED"/ > +analyze format=json > +select * from t1, t2 where (t2.key1 between t1.lb1 and t1.rb1) and > + (t2.key2 between t1.lb2 and t1.rb2) and > + (t2.key3=t1.c1 OR t2.key4=t1.c2); > + > +drop table t1,t2,t3,t4; > diff --git a/sql/sql_explain.cc b/sql/sql_explain.cc > index 9d82f4f..edbc2bf 100644 > --- a/sql/sql_explain.cc > +++ b/sql/sql_explain.cc > @@ -1076,15 +1076,15 @@ int > Explain_table_access::print_explain(select_result_sink *output, uint8 explai > } > > Please add a comment: @return ptr - pointer to the added string NULL - out of memory error Does it make sense to change the return type to 'const char*' ? > -bool String_list::append_str(MEM_ROOT *mem_root, const char *str) > +char *String_list::append_str(MEM_ROOT *mem_root, const char *str) > { > size_t len= strlen(str); > char *cp; > if (!(cp = (char*)alloc_root(mem_root, len+1))) > - return 1; > + return NULL; > memcpy(cp, str, len+1); > push_back(cp); > - return 0; > + return cp; > } > > > @@ -1209,6 +1209,26 @@ void > Explain_table_access::print_explain_json(Explain_query *query, > { > writer->add_member("range-checked-for-each-record").start_object(); > add_json_keyset(writer, "keys", &possible_keys); > + if (is_analyze) > + { > + writer->add_member("r_keys_none").add_ll(range_checked_fer->none); > + writer->add_member("r_keys_index_merge").add_ll(range_checked_fer-> > + index_merge); > + if (range_checked_fer->keys_stat) > + { > + writer->add_member("r_keys").start_object(); > + for (uint i= 0; i < range_checked_fer->keys; i++) > + { > + if (range_checked_fer->keys_stat_names[i]) > + { > + writer->add_member(range_checked_fer-> > + keys_stat_names[i]).add_ll(range_checked_fer-> > + keys_stat[i]); > + } > + } > + writer->end_object(); > + } > + } If we move this code into Range_checked_fer::print_json(), will it be possible to make its members private? > } > > if (full_scan_on_null_key) > diff --git a/sql/sql_explain.h b/sql/sql_explain.h > index e3b41ee..f86886d 100644 > --- a/sql/sql_explain.h > +++ b/sql/sql_explain.h > @@ -54,7 +54,7 @@ it into the slow query log. > class String_list: public List<char> > { > public: > - bool append_str(MEM_ROOT *mem_root, const char *str); > + char *append_str(MEM_ROOT *mem_root, const char *str); > }; > > class Json_writer; > @@ -627,6 +627,16 @@ class Explain_range_checked_fer : public Sql_alloc > public: > String_list key_set; > key_map keys_map; > + > + ha_rows none, index_merge; 'none' looks weird. Would a 'full_scan' be better. > + ha_rows *keys_stat; > + char **keys_stat_names; > + uint keys; This needs to be documented. > + > + Explain_range_checked_fer() > + :Sql_alloc(), none(0), index_merge(0), > + keys_stat(0), keys_stat_names(0), keys(0) > + {} > }; > > /* > diff --git a/sql/sql_select.cc b/sql/sql_select.cc > index 5c3e116..4eb3d81 100644 > --- a/sql/sql_select.cc > +++ b/sql/sql_select.cc > @@ -11398,6 +11398,7 @@ void JOIN_TAB::cleanup() > table->reginfo.join_tab= 0; > } > end_read_record(&read_record); > + explain_plan= 0; It's a pointer, right? Please change s/0/NULL/ > DBUG_VOID_RETURN; > } > > @@ -18762,9 +18763,30 @@ test_if_quick_select(JOIN_TAB *tab) > > delete tab->select->quick; > tab->select->quick=0; > - return tab->select->test_quick_select(tab->join->thd, tab->keys, > - (table_map) 0, HA_POS_ERROR, 0, > - FALSE, /*remove where parts*/FALSE); > + int res= tab->select->test_quick_select(tab->join->thd, tab->keys, > + (table_map) 0, HA_POS_ERROR, 0, > + FALSE, /*remove where > parts*/FALSE); > + if (tab->explain_plan && tab->explain_plan->range_checked_fer) > + { > + if (tab->select->quick) > + { > + QUICK_SELECT_I *quick= tab->select->quick; > + if (quick->index == MAX_KEY) > + tab->explain_plan->range_checked_fer->index_merge++; > + else > + { > + DBUG_ASSERT(quick->index < > tab->explain_plan->range_checked_fer->keys); > + DBUG_ASSERT(tab->explain_plan->range_checked_fer->keys_stat); > + DBUG_ASSERT(tab->explain_plan->range_checked_fer->keys_stat_names); > + DBUG_ASSERT(tab->explain_plan->range_checked_fer->keys_stat_names[ > + quick->index]); > + tab->explain_plan->range_checked_fer->keys_stat[quick->index]++; > + } > + } > + else > + tab->explain_plan->range_checked_fer->none++; > + } > + return res; I think it's bad that join execution code becomes littered with details about how things are stored in the Explain data structures. Can you add a set of methods to range_checked_fer: count_full_scan(); count_index_merge(); count_range_scan(); and then only call these methods from here? > } > > > @@ -23360,6 +23382,32 @@ int append_possible_keys(MEM_ROOT *alloc, > String_list &list, TABLE *table, > return 0; > } > What does this function do? AFAIU it does two things: - copies indexes from possible_keys into a String_list. - allocates an empty array of ha_rows(). The set of data structures used by Range_checked_fer is quite peculiar. This function is only useful for Range_checked_fer. Do you think it would make sense to made this a method of Range_checked_fer? > +int append_possible_keys_stat(MEM_ROOT *alloc, String_list &list, > + ha_rows **stat, char ***names, > + TABLE *table, key_map possible_keys) > +{ > + uint j; > + multi_alloc_root(alloc, stat, sizeof(ha_rows) * table->s->keys, > + names, sizeof(char *) * table->s->keys, NULL); > + if ((!(*stat)) || (!(*names))) > + { > + (*stat)= NULL; (*names)= NULL; > + return 1; > + } > + bzero(*stat, sizeof(ha_rows) * table->s->keys); > + for (j= 0; j < table->s->keys; j++) > + { > + if (possible_keys.is_set(j)) > + { > + char *nm= list.append_str(alloc, table->key_info[j].name); > + (*names)[j]= nm; > + } > + else > + (*names)[j]= NULL; > + } > + return 0; > +} > + > /* > TODO: this function is only applicable for the first non-const optimization > join tab. > @@ -23400,6 +23448,8 @@ void JOIN_TAB::save_explain_data(Explain_table_access > *eta, table_map prefix_tab > quick_type= -1; > QUICK_SELECT_I *quick= NULL; > > + > + explain_plan= eta; > eta->key.clear(); > eta->quick_info= NULL; > > @@ -23663,9 +23713,16 @@ void > JOIN_TAB::save_explain_data(Explain_table_access *eta, table_map prefix_tab > { > eta->push_extra(ET_RANGE_CHECKED_FOR_EACH_RECORD); > eta->range_checked_fer= new (thd->mem_root) > Explain_range_checked_fer; > - eta->range_checked_fer->keys_map= tab->keys; > - append_possible_keys(thd->mem_root, eta->range_checked_fer->key_set, > - table, tab->keys); > + if (eta->range_checked_fer) > + { > + eta->range_checked_fer->keys_map= tab->keys; > + eta->range_checked_fer->keys= table->s->keys; > + append_possible_keys_stat(thd->mem_root, > + eta->range_checked_fer->key_set, > + &eta->range_checked_fer->keys_stat, > + &eta->range_checked_fer->keys_stat_names, > + table, tab->keys); > + } > } > else if (tab->select->cond || > (tab->cache_select && tab->cache_select->cond)) > diff --git a/sql/sql_select.h b/sql/sql_select.h > index 0557e32..ef92690 100644 > --- a/sql/sql_select.h > +++ b/sql/sql_select.h > @@ -362,7 +362,12 @@ typedef struct st_join_table { > SJ_TMP_TABLE *check_weed_out_table; > /* for EXPLAIN only: */ > SJ_TMP_TABLE *first_weedout_table; > - > + > + /** > + reference to saved plan and execution statistics > + */ > + Explain_table_access *explain_plan; > + > /* > If set, means we should stop join enumeration after we've got the first > match and return to the specified join tab. May point to BR Sergei -- Sergei Petrunia, Software Developer MariaDB Corporation | Skype: sergefp | Blog: http://s.petrunia.net/blog _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : [email protected] Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp

