Hello Igor, Please find some minor comments below. Ok to push after they are addressed.
On Wed, Oct 06, 2010 at 05:14:08AM -0700, Igor Babaev wrote: > At file:///home/igor/maria/maria-5.3-mwl128-bug57024/ > > ------------------------------------------------------------ > revno: 2827 > revision-id: [email protected] > parent: [email protected] > committer: Igor Babaev <[email protected]> > branch nick: maria-5.3-mwl128-bug57024 > timestamp: Wed 2010-10-06 05:14:08 -0700 > message: > Ported the fix for bug #57024 (a performance issue for outer joins). > Employed the same kind of optimization as in the fix for the cases > when join buffer is used. > The optimization performs early evaluation of the conditions from > on expression with table references to only outer tables of > an outer join. ... > === modified file 'sql/sql_join_cache.cc' > @@ -1604,6 +1625,12 @@ > uchar *init_pos= pos; > CACHE_FIELD *copy= field_descr; > CACHE_FIELD *copy_end= copy+flag_fields; > + if (with_match_flag) > + { > + copy->str[0]= test((Match_flag) pos[0] == MATCH_FOUND); > + pos+= copy->length; > + copy++; > + } > for ( ; copy < copy_end; copy++) > { > memcpy(copy->str, pos, copy->length); TODO where does the above copy to? > @@ -1754,30 +1781,68 @@ > > > /* > - Skip record from join buffer if its match flag is on: default > implementation > - > - SYNOPSIS > - skip_recurrent_match() > - > - DESCRIPTION > - This default implementation of the virtual function skip_record_if_match > - skips the next record from the join buffer if its match flag is set on. > - If the record is skipped the value of 'pos' is set to points to the > position > - right after the record. > - > - RETURN VALUE > - TRUE the match flag is on and the record has been skipped > - FALSE the match flag is off > -*/ > - > -bool JOIN_CACHE::skip_recurrent_match() > -{ > - DBUG_ASSERT(with_length); > - uint offset= size_of_rec_len; > - if (prev_cache) > - offset+= prev_cache->get_size_of_rec_offset(); > - /* Check whether the match flag is on */ > - if (get_match_flag_by_pos(pos+offset)) > + Skip record from join buffer if's already matched: default implementation > + > + SYNOPSIS > + skip_as_matched() > + > + DESCRIPTION > + This default implementation of the virtual function skip_as_matched > + skips the next record from the join buffer if its match flag is set to > 1. > + If the record is skipped the value of 'pos' is set to points to the > position > + right after the record. > + > + RETURN VALUE > + TRUE the match flag is set to 1 and the record has been skipped > + FALSE the match flag is not 1 Please s/1/MATCH_FOUND/ > +*/ > + > +bool JOIN_CACHE::skip_as_matched() Please rename the function to skip_if_matched(), because that what it actually does: checks *if* the records is matched and skips it if it is so. > +{ > + DBUG_ASSERT(with_length); > + uint offset= size_of_rec_len; > + if (prev_cache) > + offset+= prev_cache->get_size_of_rec_offset(); > + /* Check whether the match flag is MATCH_FOUND */ > + if (get_match_flag_by_pos(pos+offset) == MATCH_FOUND) > + { > + pos+= size_of_rec_len + get_rec_length(pos); > + return TRUE; > + } > + return FALSE; > +} > + > + > +/* > + Skip record from join buffer if the match isn't needed: default > implementation > + > + SYNOPSIS > + skip_as_unneeded_match() > + > + DESCRIPTION > + This default implementation of the virtual function > skip_as_unneeded_match > + skips the next record from the join buffer if its match flag is not > + MATCH_NOT_FOUND, and, either its value is MATCH_FOUND and join_tab is the > + first inner table of an inner join, or, its value is MATCH_IMPOSSIBLE > + and join_tab is the first inner table of an outer join. > + If the record is skipped the value of 'pos' is set to points to the > position Typo, s/points/point/ > + right after the record. > + > + RETURN VALUE > + TRUE the record has to be been skipped ^^^^^^^^^^^ wrong grammar. > + FALSE otherwise > +*/ > + > +bool JOIN_CACHE::skip_as_unneeded_match() > +{ > + DBUG_ASSERT(with_length); > + enum Match_flag match_fl; > + uint offset= size_of_rec_len; > + if (prev_cache) > + offset+= prev_cache->get_size_of_rec_offset(); > + > + if ((match_fl= get_match_flag_by_pos(pos+offset)) != MATCH_NOT_FOUND && > + (join_tab->check_only_first_match() == (match_fl == MATCH_FOUND)) ) > { > pos+= size_of_rec_len + get_rec_length(pos); > return TRUE; > @@ -1990,9 +2055,9 @@ > { > int error; > enum_nested_loop_state rc= NESTED_LOOP_OK; > + join_tab->table->null_row= 0; > bool check_only_first_match= join_tab->check_only_first_match(); > - > - join_tab->table->null_row= 0; > + bool outer_join_first_inner= join_tab->is_first_inner_for_outer_join(); > > /* Return at once if there are no records in the join buffer */ > if (!records) > @@ -2042,9 +2107,11 @@ > /* > If only the first match is needed, and, it has been already found for > the next record read from the join buffer, then the record is > skipped. > + Also those records that must be null complemented are not considered > + as candidates for matches. > */ > - if (!(check_only_first_match && > - skip_recurrent_candidate_for_match(rec_ptr))) > + if ((!check_only_first_match && !outer_join_first_inner) || > + !skip_next_candidate_for_match(rec_ptr)) > { > read_next_candidate_for_match(rec_ptr); > rc= generate_full_extensions(rec_ptr); > @@ -2268,7 +2335,6 @@ > uint cnt; > enum_nested_loop_state rc= NESTED_LOOP_OK; > bool is_first_inner= join_tab == join_tab->first_unmatched; > - bool is_last_inner= join_tab == join_tab->first_unmatched->last_inner; > > /* Return at once if there are no records in the join buffer */ > if (!records) > @@ -2289,7 +2355,7 @@ > goto finish; > } > /* Just skip the whole record if a match for it has been already found */ > - if (!is_first_inner || !skip_recurrent_match()) > + if (!is_first_inner || !skip_as_matched()) > { > get_record(); > /* The outer row is complemented by nulls for each inner table */ > @@ -2527,6 +2593,8 @@ > is attached to the key entry. The key value is either placed in the hash > element added for the key or, if the use_emb_key flag is set, remains in > the record from the partial join. > + If the match flag field of a record contains MATCH_IMPOSSIBLE the key is > + not created for this record. > > RETURN VALUE > TRUE if it has been decided that it should be the last record > @@ -2550,6 +2618,9 @@ > link= prev_cache->get_curr_rec_link(); > write_record_data(link, &is_full); > > + if (last_written_is_null_compl) > + return is_full; > + > if (use_emb_key) > key= get_curr_emb_key(); > else > @@ -2634,26 +2705,55 @@ > > > /* > - Skip record from a hashed join buffer if its match flag is on > - > - SYNOPSIS > - skip_recurrent_match() > - > - DESCRIPTION > - This implementation of the virtual function skip_recurrent_match does > - the same as the default implementation does, but it takes into account > - the link element used to connect the records with the same key into a > chain. > - > - RETURN VALUE > - TRUE the match flag is on and the record has been skipped > + Skip record from a hashed join buffer if its match flag is set to 1 > + > + SYNOPSIS > + skip_as_matched() > + > + DESCRIPTION > + This implementation of the virtual function skip_as_matched does > + the same as the default implementation does, but it takes into account > + the link element used to connect the records with the same key into a > chain. > + > + RETURN VALUE > + TRUE the match flag is MATCH_FOUND and the record has been skipped > + FALSE the match flag is not MATCH_FOUND > +*/ > + > +bool JOIN_CACHE_HASHED::skip_as_matched() > +{ > + uchar *save_pos= pos; > + pos+= get_size_of_rec_offset(); > + if (!this->JOIN_CACHE::skip_as_matched()) > + { > + pos= save_pos; > + return FALSE; > + } > + return TRUE; > +} > + > + > +/* > + Skip record from a hashed join buffer if its match flag dictates to do so > + > + SYNOPSIS > + skip_as_uneeded_match() > + > + DESCRIPTION > + This implementation of the virtual function skip_as_uneeded_match does > + the same as the default implementation does, but it takes into account > + the link element used to connect the records with the same key into a > chain. > + > + RETURN VALUE > + TRUE the match flag dictates to skip the record > FALSE the match flag is off > */ > > -bool JOIN_CACHE_HASHED::skip_recurrent_match() > +bool JOIN_CACHE_HASHED::skip_as_unneeded_match() > { > uchar *save_pos= pos; > pos+= get_size_of_rec_offset(); > - if (!this->JOIN_CACHE::skip_recurrent_match()) > + if (!this->JOIN_CACHE::skip_as_unneeded_match()) > { > pos= save_pos; > return FALSE; > @@ -2780,7 +2880,7 @@ > last element of this chain. > > RETURN VALUE > - TRUE if each retrieved record has its match flag set on > + TRUE if each retrieved record has its match flag set to MATCH_FOUND > FALSE otherwise > */ > > @@ -2792,7 +2892,7 @@ > { > next_rec_ref_ptr= get_next_rec_ref(next_rec_ref_ptr); > uchar *rec_ptr= next_rec_ref_ptr+rec_fields_offset; > - if (!get_match_flag_by_pos(rec_ptr)) > + if (get_match_flag_by_pos(rec_ptr) != MATCH_FOUND) > return FALSE; > } > while (next_rec_ref_ptr != last_rec_ref_ptr); > @@ -3000,25 +3100,28 @@ > Check whether the matching record from the BNL cache is to be skipped > > SYNOPSIS > - skip_recurrent_candidate_for_match > + skip_next_candidate_for_match > rec_ptr pointer to the position in the join buffer right after the > prefix > of the current record > > DESCRIPTION > This implementation of the virtual function just calls the > - method skip_recurrent_match to check whether the record referenced by > - ref_ptr has its match flag set on and, if so, just skips this record > - setting the value of the cursor 'pos' to the position right after it. > + method skip_as_unneded_match to check whether the record referenced by > + ref_ptr has its match flag set either to MATCH_FOUND and join_tab is the > + first inner table of a semi-join, or it's set to MATCH_IMPOSSIBLE and > + join_tab is the first inner table of an outer join. > + If so, the function just skips this record setting the value of the > + cursor 'pos' to the position right after it. > > RETURN VALUE > TRUE the record referenced by rec_ptr has been skipped > FALSE otherwise > */ > > -bool JOIN_CACHE_BNL::skip_recurrent_candidate_for_match(uchar *rec_ptr) > +bool JOIN_CACHE_BNL::skip_next_candidate_for_match(uchar *rec_ptr) > { > pos= rec_ptr-base_prefix_length; > - return skip_recurrent_match(); > + return skip_as_unneeded_match(); > } > > > @@ -3162,7 +3265,7 @@ > The methods performs the necessary preparations to read the next record > from the join buffer into the record buffer by the method > read_next_candidate_for_match, or, to skip the next record from the join > - buffer by the method skip_recurrent_candidate_for_match. > + buffer by the method skip_next_candidate_for_match. > This implementation of the virtual method moves to the next record > in the chain of all records from the join buffer that are to be > equi-joined with the current record from join_tab. > @@ -3187,23 +3290,25 @@ > Check whether the matching record from the BNLH cache is to be skipped > > SYNOPSIS > - skip_recurrent_candidate_for_match > + skip_next_candidate_for_match > rec_ptr pointer to the position in the join buffer right after > the previous record > > DESCRIPTION > This implementation of the virtual function just calls the > method get_match_flag_by_pos to check whether the record referenced > - by ref_ptr has its match flag set on. > + by ref_ptr has its match flag set to MATCH_FOUND. > > RETURN VALUE > - TRUE the record referenced by rec_ptr has its match flag set on > + TRUE the record referenced by rec_ptr has its match flag set to > + MATCH_FOUND > FALSE otherwise > */ > > -bool JOIN_CACHE_BNLH::skip_recurrent_candidate_for_match(uchar *rec_ptr) > +bool JOIN_CACHE_BNLH::skip_next_candidate_for_match(uchar *rec_ptr) > { > - return get_match_flag_by_pos(rec_ptr); > + return join_tab->check_only_first_match() && > + (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND); > } > > > @@ -3364,7 +3469,7 @@ > int JOIN_TAB_SCAN_MRR::next() > { > char **ptr= (char **) cache->get_curr_association_ptr(); > - uint rc= join_tab->table->file->multi_range_read_next(ptr) ? -1 : 0; > + int rc= join_tab->table->file->multi_range_read_next(ptr) ? -1 : 0; > if (!rc) > { > /* > @@ -3482,7 +3587,8 @@ > { > DBUG_ENTER("bka_range_seq_skip_record"); > JOIN_CACHE_BKA *cache= (JOIN_CACHE_BKA *) rseq; > - bool res= cache->get_match_flag_by_pos((uchar *) range_info); > + bool res= cache->get_match_flag_by_pos((uchar *) range_info) == > + JOIN_CACHE::MATCH_FOUND; > DBUG_RETURN(res); > } > > @@ -3560,7 +3666,7 @@ > The method performs the necessary preparations to read the next record > from the join buffer into the record buffer by the method > read_next_candidate_for_match, or, to skip the next record from the join > - buffer by the method skip_recurrent_match. > + buffer by the method skip_as_unneeded_match. > This implementation of the virtual method get_next_candidate_for_match > just decrements the counter of the records that are to be iterated over > and returns the value of curr_association as a reference to the position > @@ -3584,23 +3690,25 @@ > Check whether the matching record from the BKA cache is to be skipped > > SYNOPSIS > - skip_recurrent_candidate_for_match > + skip_next_candidate_for_match > rec_ptr pointer to the position in the join buffer right after > the previous record > > DESCRIPTION > This implementation of the virtual function just calls the > method get_match_flag_by_pos to check whether the record referenced > - by ref_ptr has its match flag set on. > + by ref_ptr has its match flag set to MATCH_FOUND. > > RETURN VALUE > - TRUE the record referenced by rec_ptr has its match flag set on > + TRUE the record referenced by rec_ptr has its match flag set to > + MATCH_FOUND > FALSE otherwise > */ > > -bool JOIN_CACHE_BKA::skip_recurrent_candidate_for_match(uchar *rec_ptr) > +bool JOIN_CACHE_BKA::skip_next_candidate_for_match(uchar *rec_ptr) > { > - return get_match_flag_by_pos(rec_ptr); > + return join_tab->check_only_first_match() && > + (get_match_flag_by_pos(rec_ptr) == MATCH_FOUND); > } > > > @@ -3691,7 +3799,9 @@ > If is assumed that the functions starts reading at the position of > the record length which is provided for each records in a BKA cache. > After the key is built the 'pos' value points to the first position after > - the current record. > + the current record. > + The function just skips the records with MATCH_IMPOSSIBLE in the > + match flag field if there is any. > The function returns 0 if the initial position is after the beginning > of the record fields for last record from the join buffer. > > @@ -3708,6 +3818,8 @@ > uchar *init_pos; > JOIN_CACHE *cache; > > +start: > + > /* Any record in a BKA cache is prepended with its length */ > DBUG_ASSERT(with_length); > > @@ -3727,6 +3839,13 @@ > > /* Read all flag fields of the record */ > read_flag_fields(); > + > + if (with_match_flag && > + (Match_flag) curr_rec_pos[0] == MATCH_IMPOSSIBLE ) > + { > + pos= init_pos+rec_len; > + goto start; > + } > > if (use_emb_key) > { > > === modified file 'sql/sql_select.cc' > --- a/sql/sql_select.cc 2010-10-01 18:05:27 +0000 > +++ b/sql/sql_select.cc 2010-10-06 12:14:08 +0000 > @@ -7090,9 +7090,13 @@ > used_tables2|= current_map; > COND *tmp_cond= make_cond_for_table(on_expr, used_tables2, > current_map, FALSE); > + add_cond_and_fix(&tmp_cond, tab->on_precond); > if (tmp_cond) > { > JOIN_TAB *cond_tab= tab < first_inner_tab ? first_inner_tab : > tab; > + Item **sel_cond_ref= tab < first_inner_tab ? > + &first_inner_tab->on_precond : > + &tab->select_cond; > /* > First add the guards for match variables of > all embedding outer join operations. > @@ -7115,15 +7119,15 @@ > tmp_cond->quick_fix_field(); > /* Add the predicate to other pushed down predicates */ > DBUG_PRINT("info", ("Item_cond_and")); > - cond_tab->select_cond= !cond_tab->select_cond ? tmp_cond : > - new Item_cond_and(cond_tab->select_cond, > - tmp_cond); > + *sel_cond_ref= !(*sel_cond_ref) ? > + tmp_cond : > + new Item_cond_and(*sel_cond_ref, tmp_cond); > DBUG_PRINT("info", ("Item_cond_and 0x%lx", > - (ulong)cond_tab->select_cond)); > - if (!cond_tab->select_cond) > - DBUG_RETURN(1); > - cond_tab->select_cond->update_used_tables(); > - cond_tab->select_cond->quick_fix_field(); > + (ulong)(*sel_cond_ref))); > + if (!(*sel_cond_ref)) > + DBUG_RETURN(1); > + (*sel_cond_ref)->quick_fix_field(); > + (*sel_cond_ref)->update_used_tables(); > if (cond_tab->select) > cond_tab->select->cond= cond_tab->select_cond; > } > @@ -13246,7 +13250,7 @@ > DBUG_RETURN(nls); > } > int error; > - enum_nested_loop_state rc; > + enum_nested_loop_state rc= NESTED_LOOP_OK; > READ_RECORD *info= &join_tab->read_record; > > if (join_tab->flush_weedout_table) > @@ -13279,18 +13283,21 @@ > > /* Set first_unmatched for the last inner table of this group */ > join_tab->last_inner->first_unmatched= join_tab; > - } > + if (join_tab->on_precond && !join_tab->on_precond->val_int()) > + rc= NESTED_LOOP_NO_MORE_ROWS; > + } > join->thd->row_count= 0; > > if (join_tab->loosescan_match_tab) > join_tab->loosescan_match_tab->found_match= FALSE; > > - error= (*join_tab->read_first_record)(join_tab); > - > - if (join_tab->keep_current_rowid) > - join_tab->table->file->position(join_tab->table->record[0]); > - > - rc= evaluate_join_record(join, join_tab, error); > + if (rc != NESTED_LOOP_NO_MORE_ROWS) > + { > + error= (*join_tab->read_first_record)(join_tab); > + if (join_tab->keep_current_rowid) > + join_tab->table->file->position(join_tab->table->record[0]); > + rc= evaluate_join_record(join, join_tab, error); > + } > } > > /* > > === modified file 'sql/sql_select.h' > --- a/sql/sql_select.h 2010-10-01 18:05:27 +0000 > +++ b/sql/sql_select.h 2010-10-06 12:14:08 +0000 > @@ -161,6 +161,8 @@ > KEYUSE *keyuse; /**< pointer to first used key > */ > SQL_SELECT *select; > COND *select_cond; > + COND *on_precond; /**< part of on condition to check before > + accessing the first inner table > */ > QUICK_SELECT_I *quick; > /* > The value of select_cond before we've attempted to do Index Condition > @@ -678,6 +680,14 @@ > */ > uchar *curr_rec_link; > > + /* > + This flag is set to TRUE if join_tab is the first inner table of an outer > + join and the latest record written to the join buffer is detected to be > + null complemented after checking on conditions over the outer tables for > + this outer join operation > + */ > + bool last_written_is_null_compl; > + > /* > The number of fields put in the join buffer of the join cache that are > used in building keys to access the table join_tab > @@ -792,8 +802,11 @@ > /* Read a referenced field from the join buffer */ > bool read_referenced_field(CACHE_FIELD *copy, uchar *rec_ptr, uint *len); > > - /* Shall skip record from the join buffer if its match flag is on */ > - virtual bool skip_recurrent_match(); > + /* Shall skip record from the join buffer if its match flag is set to 1 */ > + virtual bool skip_as_matched(); Please s/1/MATCH_FOUND/; > + > + /* Shall skip record from the join buffer if its match flag is not-zero */ > + virtual bool skip_as_unneeded_match(); > > /* > True if rec_ptr points to the record whose blob data stay in > @@ -836,11 +849,11 @@ > virtual uchar *get_next_candidate_for_match()= 0; > /* > Shall check whether the given record from the join buffer has its match > - flag set on and, if so, skip the record in the buffer. > + flag settings commands to skip the record in the buffer. > */ > - virtual bool skip_recurrent_candidate_for_match(uchar *rec_ptr)= 0; > + virtual bool skip_next_candidate_for_match(uchar *rec_ptr)= 0; > /* > - Shall read the given record from the join buffer into the > + Shall read the given record from the join buffer intTCo the Is the above change a typo ^^ > the corresponding record buffer > */ > virtual void read_next_candidate_for_match(uchar *rec_ptr)= 0; > @@ -868,6 +881,21 @@ > > public: > > + /* > + The enumeration type Match_flag describes possible states of the match > flag > + field stored for the records of the first inner tables of outer joins > and > + semi-joins in the cases when the first match strategy is used for them. > + When a record with match flag field is written into the join buffer the > + state of the field usually is MATCH_NOT_FOUND unless this is a record of > the > + first inner table of the outer join for which the on precondition (the > + condition from on expression over outer tables) has turned out not to > be > + true. In the last case the state of the match flag is MATCH_IMPOSSIBLE. > + The state of the match flag field is changed to MATCH_FOUND as soon as > + the first full matching combination of inner tables of the outer join or > + the semi-join is discovered. > + */ > + enum Match_flag { MATCH_NOT_FOUND, MATCH_FOUND, MATCH_IMPOSSIBLE }; > + > /* Table to be joined with the partial join records from the cache */ > JOIN_TAB *join_tab; > > @@ -920,7 +948,7 @@ > virtual void get_record_by_pos(uchar *rec_ptr); > > /* Shall return the value of the match flag for the positioned record */ > - virtual bool get_match_flag_by_pos(uchar *rec_ptr); > + virtual enum Match_flag get_match_flag_by_pos(uchar *rec_ptr); > > /* Shall return the position of the current record */ > virtual uchar *get_curr_rec() { return curr_rec_pos; } BR Sergey -- Sergey Petrunia, Software Developer Monty Program AB, http://askmonty.org Blog: http://s.petrunia.net/blog _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : [email protected] Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp

