Hi! Here is the third and last part of the review.
>>>>> "Sergey" == Sergey Petrunya <[email protected]> writes: <cut> > @@ -7433,8 +7718,18 @@ > > table_map used_tables2= (join->const_table_map | > OUTER_REF_TABLE_BIT | RAND_TABLE_BIT); > - for (tab= join->join_tab+join->const_tables; tab <= last_tab ; tab++) > + for (JOIN_TAB *tab= first_linear_tab(join, TRUE); > + tab; > + tab= next_linear_tab(join, tab, TRUE)) > { > + if (!tab->table) > + { > + /* > + psergey-todo: this is probably incorrect, fix this when we get > + correct processing for outer joins + semi joins > + */ > + continue; > + } When is the above fix going to happen ? (Which worklog). What is the effect if we execute the continue? a) Wrong result b) Crash 3) Slower code ? Please add a comment about this if it's 3) If it's 1 or 2 we should fix it before pushing to trunk. > @@ -7480,37 +7775,12 @@ > (*sel_cond_ref)->update_used_tables(); > if (cond_tab->select) > cond_tab->select->cond= cond_tab->select_cond; > - } > + } > + if (tab == last_tab) > + break; Wouldn't it be better to fix the 'for' loop, as otherwise you may get a crash if tab->table is not true for the tab == last_tab. How about this for a better for loop for (JOIN_TAB *tab= first_linear_tab(join, TRUE), prev_tab= 0; prev_tab != last_tab; prev_tab= tab, tab = next_linear_tab(join, tab, TRUE)) Note that we don't have to test for tab == 0 as we will always find last_tab first. > @@ -7968,8 +8240,8 @@ > Don't use join buffering if we're dictated not to by no_jbuf_after (this > ...) > */ > - if (!(i <= no_jbuf_after) || tab->loosescan_match_tab || > - sj_is_materialize_strategy(join->best_positions[i].sj_strategy)) > + if ((!tab->bush_root_tab? !(i <= no_jbuf_after) : FALSE) || > + tab->loosescan_match_tab || tab->bush_children) > goto no_join_cache; The above was a bit complex if. How about: if (((!tab->bush_root_tab && !(i <= no_jbuf_after)) || tab->loosescan_match_tab || tab->bush_children) goto no_join_cache; Don't really understand how we could use join_cache with SMJ_NEST child (ie, a tab with tab->bush_root_tab set). > for (JOIN_TAB *first_inner= tab->first_inner; first_inner; > @@ -8122,16 +8394,24 @@ > void check_join_cache_usage_for_tables(JOIN *join, ulonglong options, > uint no_jbuf_after) > { > - JOIN_TAB *first_sjm_table= NULL; > - JOIN_TAB *last_sjm_table= NULL; > + //JOIN_TAB *first_sjm_table= NULL; > + //JOIN_TAB *last_sjm_table= NULL; Remove comments. (Please remove all these before you request a review as the deleted lines will show up nicely in the diff anyway). > + JOIN_TAB *tab; > > - for (uint i= join->const_tables; i < join->tables; i++) > - join->join_tab[i].used_join_cache_level= > join->max_allowed_join_cache_level; > - > - for (uint i= join->const_tables; i < join->tables; i++) > - { > - JOIN_TAB *tab= join->join_tab+i; > + //for (uint i= join->const_tables; i < join->tables; i++) Remove comment > + for (tab= first_linear_tab(join, TRUE); > + tab; > + tab= next_linear_tab(join, tab, TRUE)) > + { > + tab->used_join_cache_level= join->max_allowed_join_cache_level; > + } > > + //for (uint i= join->const_tables; i < join->tables; i++) Remove comment > + for (tab= first_linear_tab(join, TRUE); > + tab; > + tab= next_linear_tab(join, tab, TRUE)) > + { > +#if 0 > if (sj_is_materialize_strategy(join->best_positions[i].sj_strategy)) > { > first_sjm_table= tab; > @@ -8142,9 +8422,16 @@ > } > if (!(tab >= first_sjm_table && tab < last_sjm_table)) > tab->first_sjm_sibling= NULL; > - > +#endif Remove above ifdefed block > + JOIN_TAB *prev_tab; > +restart: > tab->icp_other_tables_ok= TRUE; > tab->idx_cond_fact_out= TRUE; > + > + prev_tab= tab - 1; Isn't it more clear to set prev_tab as part of the for loop? (This will make it independent of how next_linear_tab() is implemented) > @@ -8154,12 +8441,22 @@ > + /* > if (join->return_tab) > i= join->return_tab-join->join_tab-1; // always >= 0 > + */ Remove commented code > @@ -8204,10 +8501,17 @@ > setup_semijoin_dups_elimination(join, options, no_jbuf_after)) > DBUG_RETURN(TRUE); /* purecov: inspected */ > > - for (i= 0; i < join->const_tables; i++) > - join->join_tab[i].partial_join_cardinality= 1; > + for (tab= first_linear_tab(join, TRUE); > + tab; > + tab= next_linear_tab(join, tab, TRUE)) > + { > + tab->partial_join_cardinality= 1; > + } Isn't the above loop wrong ? It was supposed to only loop over const tables, not skip const tables. > - for (i=join->const_tables ; i < join->tables ; i++) > + //for (i=join->const_tables ; i < join->tables ; i++) Remove comment > + for (tab= first_linear_tab(join, TRUE), i= join->const_tables; > + tab; > + tab= next_linear_tab(join, tab, TRUE)) > { > /* > The approximation below for partial join cardinality is not good > because > @@ -8215,24 +8519,45 @@ > - it does not differentiate between inner joins, outer joins and > semi-joins. > Later it should be improved. > */ > - JOIN_TAB *tab=join->join_tab+i; > + JOIN_TAB *prev_tab= tab - 1; Set prev_tab in the for loop. This allows us to get automaticly the second part of the following if done: > + if ((tab->bush_root_tab && tab->bush_root_tab->bush_children->start == > tab) || > + (tab == join->join_tab + join->const_tables)) > + prev_tab= NULL; In other words use: for (tab= first_linear_tab(join, TRUE), i= join->const_tables, prev_tab=0; tab; prev_tab= tab; tab= next_linear_tab(join, tab, TRUE)) > + DBUG_ASSERT(tab->bush_children || tab->table == > join->best_positions[i].table->table); > tab->partial_join_cardinality= join->best_positions[i].records_read * > - (i ? (tab-1)->partial_join_cardinality : > 1); > + (prev_tab? > prev_tab->partial_join_cardinality : 1); > + if (!tab->bush_children) > + i++; Why do you assign and increment i? I can't see it beeing used anymore in the code. > - for (i=join->const_tables ; i < join->tables ; i++) > + //for (i=join->const_tables ; i < join->tables ; i++) Remove comment. > + for (tab= first_linear_tab(join, TRUE), i= join->const_tables; > + tab; > + tab= next_linear_tab(join, tab, TRUE), i++) > { > - JOIN_TAB *tab=join->join_tab+i; > + //JOIN_TAB *tab=join->join_tab+i; Remove comment. > + if (tab->bush_children) > + { > + if (setup_sj_materialization(tab)) > + return TRUE; > + } > + > TABLE *table=tab->table; > uint jcl= tab->used_join_cache_level; > tab->read_record.table= table; > tab->read_record.file=table->file; > tab->read_record.unlock_row= rr_unlock_row; > - tab->next_select=sub_select; /* normal select */ > tab->sorted= sorted; > sorted= 0; // only first must be sorted > + > + if (!(tab->bush_root_tab && > + tab->bush_root_tab->bush_children->end == tab + 1)) > + { > + tab->next_select=sub_select; /* normal select */ > + } The above needs a comment like: /* We should not set tab->next_select for the last table in the SMJ-nest, as this is already set in setup_sj_materialization(). */ (It would be nicer to have next_select set to 0 by default and here only set it if it was not set before). > @@ -8395,13 +8707,16 @@ > abort(); /* purecov: deadcode */ > } > } > - join->join_tab[join->tables-1].next_select=0; /* Set by do_select */ > + uint n_top_tables= join->join_tab_ranges.head()->end - > + join->join_tab_ranges.head()->start; > + join->join_tab[n_top_tables - 1].next_select=0; /* Set by do_select */ > Isn't the above same as (join->join_tab_ranges.head()->end -1).next_select= 0 ? > -/* > + /* > If a join buffer is used to join a table the ordering by an index > for the first non-constant table cannot be employed anymore. > */ > - for (i=join->const_tables ; i < join->tables ; i++) > + //for (i=join->const_tables ; i < join->tables ; i++) > + for (i=join->const_tables ; i < n_top_tables ; i++) > { Wouldn't it be better to also here use the normal for loop for JOIN_TAB's to hide the implementation a bit ? for (tab= first_linear_tab(join, TRUE), tab; tab= next_linear_tab(join, tab, FALSE)) > @@ -8481,6 +8799,12 @@ > { > table->disable_keyread(); > table->file->ha_index_or_rnd_end(); > + > + if (table->pos_in_table_list && > + table->pos_in_table_list->jtbm_subselect) > + { > + table->pos_in_table_list->jtbm_subselect->cleanup(); > + } > /* > We need to reset this for next select > (Tested in part_of_refkey) > @@ -8675,7 +8999,7 @@ > > if (table) > { > - JOIN_TAB *tab,*end; > + JOIN_TAB *tab; > /* > Only a sorted table may be cached. This sorted table is always the > first non const table in join->table > @@ -8688,13 +9012,15 @@ > > if (full) > { > - for (tab= join_tab, end= tab+tables; tab != end; tab++) > + for (tab= top_jtrange_tables?join_tab:NULL; tab; tab= > next_linear_tab(this, tab, TRUE)) > tab->cleanup(); > + //psergey4: how is the above supposed to work when > + //top_jtrange_tables==FALSE? It will crash right away! Don't understand the comment. If top_jtrange_tables is 0 then tab is 0 and the for loop will never execute. anyway, it would be better to use our standard loop: for (tab= first_linear_tab(join, FALSE), tab; tab= next_linear_tab(join, tab, TRUE)) instead of having yet another way to do the loop over JOIN_TAB's > table= 0; > } > else > { > - for (tab= join_tab, end= tab+tables; tab != end; tab++) > + for (tab= top_jtrange_tables?join_tab:NULL; tab; tab= > next_linear_tab(this, tab, TRUE)) Same here; Change to a basic for loop > @@ -9982,7 +10343,8 @@ > > /* > Pick the "head" item: the constant one or the first in the join order > - that's not inside some SJM nest. > + that's not inside some SJM nest. psergey2: out-of-date comment. It is ok > + inside SJM, too. Please update comment to be correct. > */ > if (item_const) > head= item_const; > @@ -11084,6 +11446,9 @@ > for (i= first_tab; i <= last_tab; i++) > reopt_remaining_tables |= join->positions[i].table->table->map; > > + table_map save_cur_sj_inner_tables= join->cur_sj_inner_tables; > + join->cur_sj_inner_tables= 0; > + Add a comment why the above has to be saved, reset & restored. (Didn't se anything in the loop that would obviously need this) > for (i= first_tab; i <= last_tab; i++) > { > JOIN_TAB *rs= join->positions[i].table; > @@ -11109,6 +11474,7 @@ > if (!rs->emb_sj_nest) > *outer_rec_count *= pos.records_read; > } > + join->cur_sj_inner_tables= save_cur_sj_inner_tables; > *reopt_cost= cost; > } > > @@ -12391,6 +12757,8 @@ > share->keys=1; > share->uniques= test(using_unique_constraint); > table->key_info= table->s->key_info= keyinfo; > + table->keys_in_use_for_query.set_bit(0); > + share->keys_in_use.set_bit(0); > keyinfo->key_part=key_part_info; > keyinfo->flags=HA_NOSAME; > keyinfo->usable_key_parts=keyinfo->key_parts= param->group_parts; > @@ -12406,6 +12774,8 @@ > bool maybe_null=(*cur_group->item)->maybe_null; > key_part_info->null_bit=0; > key_part_info->field= field; > + if (cur_group == group) > + field->key_start.set_bit(0); > key_part_info->offset= field->offset(table->record[0]); > key_part_info->length= (uint16) field->key_length(); > key_part_info->type= (uint8) field->key_type(); > @@ -12475,6 +12845,8 @@ > keyinfo->key_parts * sizeof(KEY_PART_INFO)))) > goto err; > bzero((void*) key_part_info, keyinfo->key_parts * sizeof(KEY_PART_INFO)); > + table->keys_in_use_for_query.set_bit(0); > + share->keys_in_use.set_bit(0); > table->key_info= table->s->key_info= keyinfo; > keyinfo->key_part=key_part_info; > keyinfo->flags=HA_NOSAME | HA_NULL_ARE_EQUAL; > @@ -12513,6 +12885,13 @@ > { > key_part_info->null_bit=0; Remove above as you are setting it below. > key_part_info->field= *reg_field; > + (*reg_field)->flags |= PART_KEY_FLAG; > + if (key_part_info == keyinfo->key_part) > + (*reg_field)->key_start.set_bit(0); > + key_part_info->null_bit= (*reg_field)->null_bit; > + key_part_info->null_offset= (uint) ((*reg_field)->null_ptr - > + (uchar*) table->record[0]); > + You should only set null_offset if the field has a null_ptr. It's strange that the old code worked without setting null_bit.... Found the issue; in ha_heap.cc we corrected a wrongly set null bit value. However we should not count in this behavior (which the above fix ensures) > @@ -14603,13 +14848,24 @@ > return (*tab->read_record.read_record)(&tab->read_record); > } > > -static int > +int > join_read_record_no_init(JOIN_TAB *tab) > { > + Copy_field *save_copy, *save_copy_end; > + Add comment: /* init_read_record resets all elements of tab->read_record(). Remember things that we don't want to have reset. */ I think it would be better to move copy_field and copy_field_end to JOIN_TAB as these don't have anything directly to do with the read_record functions. > + save_copy= tab->read_record.copy_field; > + save_copy_end= tab->read_record.copy_field_end; > + > + init_read_record(&tab->read_record, tab->join->thd, tab->table, > + tab->select,1,1, FALSE); > + > + tab->read_record.copy_field= save_copy; > + tab->read_record.copy_field_end= save_copy_end; > + tab->read_record.read_record= rr_sequential_and_unpack; > + > return (*tab->read_record.read_record)(&tab->read_record); > } <cut> > @@ -19032,16 +19322,21 @@ > { > table_map used_tables=0; > > - uchar sjm_nests[MAX_TABLES]; > - uint sjm_nests_cur=0; > - uint sjm_nests_end= 0; > - uint end_table= join->tables; > bool printing_materialize_nest= FALSE; > uint select_id= join->select_lex->select_number; > > - for (uint i=0 ; i < end_table ; i++) > + List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges); > + JOIN_TAB_RANGE *jt_range; > + while ((jt_range= it++)) Please replace with a function that goes over all ranges, so that we don't get an extra indentation level here. (Part of suggestion of review 1) > @@ -19321,12 +19550,16 @@ > examined_rows= tab->limit; > else > { > - tab->table->file->info(HA_STATUS_VARIABLE); > - examined_rows= tab->table->file->stats.records; > + //tab->table->file->info(HA_STATUS_VARIABLE); Remove comment > + if (!tab->table->pos_in_table_list || > + tab->table->is_filled_at_execution()) // temporary, > is_filled_at_execution > + examined_rows= tab->records; > + else > + examined_rows= tab->table->file->stats.records; Shouldn't you call tab->table->file->info(HA_STATUS_VARIABLE) in the last case ? If not, where is it called now? > @@ -19347,7 +19580,7 @@ > */ > float f= 0.0; > if (examined_rows) > - f= (float) (100.0 * join->best_positions[i].records_read / > + f= (float) (100.0 * > tab->records_read/*join->best_positions[i].records_read*/ / Removed comment as it's not accurate anymore <cut> > diff -urN --exclude='.*' 5.3-noc/sql/sql_show.cc > maria-5.3-subqueries-r36-noc/sql/sql_show.cc > --- 5.3-noc/sql/sql_show.cc 2011-01-27 21:39:33.000000000 +0300 > +JOIN_TAB *first_linear_tab(JOIN *join, bool after_const_tables); > +JOIN_TAB *next_linear_tab(JOIN* join, JOIN_TAB* tab, bool > include_bush_roots); Please add the prototypes in sql_select.h and include this instead of having prototypes to functions in many places. > @@ -6602,14 +6605,17 @@ > bool get_schema_tables_result(JOIN *join, > enum enum_schema_table_state executed_place) > { > - JOIN_TAB *tmp_join_tab= join->join_tab+join->tables; > + //JOIN_TAB *tmp_join_tab= join->join_tab+join->tables; Remove comment > THD *thd= join->thd; > LEX *lex= thd->lex; > bool result= 0; > DBUG_ENTER("get_schema_tables_result"); > > thd->no_warnings_for_error= 1; > - for (JOIN_TAB *tab= join->join_tab; tab < tmp_join_tab; tab++) > + //for (JOIN_TAB *tab= join->join_tab; tab < tmp_join_tab; tab++) Remove comment > + for (JOIN_TAB *tab= first_linear_tab(join, FALSE); > + tab; > + tab= next_linear_tab(join, tab, FALSE)) > { > if (!tab->table || !tab->table->pos_in_table_list) > break; > +++ maria-5.3-subqueries-r36-noc/sql/sql_test.cc 2011-02-16 > 14:42:52.000000000 +0300 > @@ -166,58 +166,66 @@ > void > TEST_join(JOIN *join) > { > - uint i,ref; > + uint ref; > + int i; > + List_iterator<JOIN_TAB_RANGE> it(join->join_tab_ranges); > + JOIN_TAB_RANGE *jt_range; > DBUG_ENTER("TEST_join"); > > - /* > - Assemble results of all the calls to full_name() first, > - in order not to garble the tabular output below. > - */ > - String ref_key_parts[MAX_TABLES]; > - for (i= 0; i < join->tables; i++) > - { > - JOIN_TAB *tab= join->join_tab + i; > - for (ref= 0; ref < tab->ref.key_parts; ref++) > - { > - ref_key_parts[i].append(tab->ref.items[ref]->full_name()); > - ref_key_parts[i].append(" "); > - } > - } > - > DBUG_LOCK_FILE; > VOID(fputs("\nInfo about JOIN\n",DBUG_FILE)); > + > + while ((jt_range= it++)) > { > + /* > + Assemble results of all the calls to full_name() first, > + in order not to garble the tabular output below. > + */ > + String ref_key_parts[MAX_TABLES]; > + for (i= 0; i < (jt_range->end - jt_range->start); i++) Store (jt_range->end - jt_range->start) in a variable and use it above and below. <cut> > + for (i= 0; i < (jt_range->end - jt_range->start); i++) <cut> > +++ maria-5.3-subqueries-r36-noc/sql/table.cc 2011-02-16 14:42:52.000000000 > +0300 > @@ -5337,6 +5337,12 @@ > (parent && parent->children_attached)); > } > Add also a comment here what this functions means (copy from table.h) > + > +bool st_table::is_filled_at_execution() > +{ > + return test(pos_in_table_list->jtbm_subselect); > +} > + Regards, Monty _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : [email protected] Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp

