Hello Sergei! On Tue, 4 Oct 2022 at 21:02, Sergei Golubchik <s...@mariadb.org> wrote:
> Hi, Nikita, > > This review applies to the combined diff e2f8dff^..52f489e > > On Oct 04, Nikita Malyavin wrote: > > > diff --git a/sql/log_event.h b/sql/log_event.h > > index 91a9eeee10f..8452e665c98 100644 > > --- a/sql/log_event.h > > +++ b/sql/log_event.h > > @@ -5275,8 +5278,11 @@ class Rows_log_event : public Log_event > > uchar *m_key; /* Buffer to keep key value during searches */ > > KEY *m_key_info; /* Pointer to KEY info for m_key_nr */ > > uint m_key_nr; /* Key number */ > > + uint m_key_parts_suit; /* A number of key_parts suited to lookup > */ > > that's very confusing name. Took me a while to understand what you meant. > Better call it m_usable_key_parts > > > bool master_had_triggers; /* set after tables opening */ > > > > + uint key_parts_suit_event(const KEY *key) const; > > and this could be "get_usable_key_parts()" > Okay, no problem. Both sound suitable, or usable, for me:) > > > + > > int find_key(); // Find a best key to use in find_row() > > int find_row(rpl_group_info *); > > int write_row(rpl_group_info *, const bool); > > diff --git a/sql/log_event_server.cc b/sql/log_event_server.cc > > index 422d496d5c3..25705d13641 100644 > > --- a/sql/log_event_server.cc > > +++ b/sql/log_event_server.cc > > @@ -6062,7 +6065,7 @@ int Rows_log_event::do_apply_event(rpl_group_info > *rgi) > > */ > > sql_mode_t saved_sql_mode= thd->variables.sql_mode; > > if (!is_auto_inc_in_extra_columns()) > > - thd->variables.sql_mode= (rpl_data.is_online_alter() ? > saved_sql_mode :0) > > + thd->variables.sql_mode= (m_online_alter ? saved_sql_mode :0) > > wouldn't it be more correct to use saved_sql_mode also in normal > replication? > using MODE_NO_AUTO_VALUE_ON_ZERO was a fix to Bug #56662 Assertion failed: next_insert_id == 0, file .\handler.cc. See commit d5bf6b8aa8bb > > | MODE_NO_AUTO_VALUE_ON_ZERO; > > > > // row processing loop > > @@ -7949,14 +7952,35 @@ uint8 Write_rows_log_event::get_trg_event_map() > > > **************************************************************************/ > > > > #if defined(HAVE_REPLICATION) > > -/* > > - Compares table->record[0] and table->record[1] > > +/** > > + @brief Compares table->record[0] and table->record[1] > > + > > + @param masrter_columns a number of columns on the source replica, > > + 0 if ONLINE ALTER TABLE > > > > - Returns TRUE if different. > > + @returns true if different. > > */ > > -static bool record_compare(TABLE *table) > > +static bool record_compare(TABLE *table, uint master_columns) > > { > > - bool result= FALSE; > > + bool result= false; > > + > > + /* > > + Determine if the optimized check is possible (and we can > > + goto record_compare_exit). > > + We should ensure that all extra columns (i.e. fieldnr > > master_columns) > > + have explicit values. If not, we will exclude them from comparison, > > + as they can contain non-deterministic values. > > + > > + master_columns == 0 case is optimization for ONLINE ALTER to check > > + all columns fast. > > + */ > > + bool all_values_set= master_columns == 0 > > + && bitmap_is_set_all(&table->has_value_set); > > + for (uint i = master_columns; all_values_set && i < table->s->fields; > i++) > > + { > > + all_values_set= bitmap_is_set(&table->has_value_set, i); > > + } > > wait, this doesn't make any sense. You only do the loop if > all_values_set is true. > But it is true only if bitmap_is_set_all(&table->has_value_set). > There is no point in doing bitmap_is_set() for individual bits, > when you already know that all bits are set :) > > shouldn't it simply be > > bool all_values_set= bitmap_is_set_all(&table->has_value_set); > > ? > but see below, perhaps you don't need all_values_set at all > > Yes, you are correct here. We've been throught this on a previous e-mail, it should have been fixed already to the moment of your review. Perhaps, some part is not included here. > + > > /** > > Compare full record only if: > > - there are no blob fields (otherwise we would also need > > @@ -7969,40 +7993,43 @@ static bool record_compare(TABLE *table) > > */ > > if ((table->s->blob_fields + > > table->s->varchar_fields + > > - table->s->null_fields) == 0) > > + table->s->null_fields) == 0 > > + && all_values_set) > > { > > result= cmp_record(table,record[1]); > > goto record_compare_exit; > > } > > > > /* Compare null bits */ > > - if (memcmp(table->null_flags, > > - table->null_flags+table->s->rec_buff_length, > > - table->s->null_bytes)) > > + if (all_values_set && memcmp(table->null_flags, > > + > table->null_flags+table->s->rec_buff_length, > > + table->s->null_bytes)) > > { > > - result= TRUE; // Diff in NULL value > > + result= true; // Diff in NULL value > > goto record_compare_exit; > > } > > > > /* Compare fields */ > > for (Field **ptr=table->field ; *ptr ; ptr++) > > { > > + Field *f= *ptr; > > if (table->versioned() && (*ptr)->vers_sys_field()) > > { > > continue; > > } > > - /** > > - We only compare field contents that are not null. > > - NULL fields (i.e., their null bits) were compared > > - earlier. > > - */ > > - if (!(*(ptr))->is_null()) > > + /* > > + We only compare fields that exist on the source (or in ONLINE > > + ALTER case, that were in the original table). For reference, > > + replica tables can also contain extra fields. > > + */ > > + if (f->field_index > master_columns && !f->has_explicit_value()) > > + continue; > > same question as in a previous review. Do you mean that when > f->field_index < master_columns, it's ok to compare values below > even when !f->has_explicit_value() ? > Note, however, that it is an important condition in find_key(). See the commits of the next review:) > > + > > + if (f->is_null() != f->is_null(table->s->rec_buff_length) > > + || f->cmp_binary_offset(table->s->rec_buff_length)) > > may be it's better to create a fast copy of this function, like > > if (bitmap_is_set_all(&table->has_value_set)) > { > ... old record_compare() code starting from > if ((table->s->blob_fields + > table->s->varchar_fields + > table->s->null_fields) == 0) > } > else > { > ... your code, only the loop over fields. > } > > in this case you won't even need all_values_set or master_columns > I don't see a demand on refactoring this. The function looks readable enough for me, and it's less than two screens, while there are functions begging to be refactored by someone:) I guess you might not like all_values_set checked two times in a row... Well, in this particular case i'm sure it will be perfectly optimized out. > > > { > > - if ((*ptr)->cmp_binary_offset(table->s->rec_buff_length)) > > - { > > - result= TRUE; > > - goto record_compare_exit; > > - } > > + result= true; > > + goto record_compare_exit; > > } > > } > > > > @@ -8010,6 +8037,32 @@ static bool record_compare(TABLE *table) > > return result; > > } > > > > +/** > > + Newly added fields with non-deterministic defaults (i.e. > DEFAULT(RANDOM()), > > + CURRENT_TIMESTAMP, AUTO_INCREMENT) should be excluded from key search. > > + Basically we exclude all the default-filled fields based on > > + has_explicit_value bitmap. > > + */ > > +uint Rows_log_event::key_parts_suit_event(const KEY *key) const > > +{ > > + uint master_columns= m_online_alter ? 0 : m_cols.n_bits; > > get_master_cols() > 👍 > > > + uint p= 0; > > + for (;p < key->ext_key_parts; p++) > > + { > > + Field *f= key->key_part[p].field; > > + uint non_deterministic_default= f->default_value && > > + f->default_value->flags | > VCOL_NOT_STRICTLY_DETERMINISTIC; > > + > > + int next_number_field= f->table->next_number_field ? > > + f->table->next_number_field->field_index : > -1; > > eh? > 1. why it's *in the loop* ? > 2. why not simply > > || f == f->table->next_number_field > > or, to be extra safe > > || f->table->field[f->field_index] == f->table->next_number_field > Thanks! Turns out there's no need in extra safety, it just works. > > > + > > + if (f->field_index >= master_columns > > I don't think this condition ^^^ is needed. Same as elsewhere, > if f->field_index < master_columns but !bitmap_is_set, > does it mean the keyseg is ok? I don't think so. > It *is* needed. An extra field with default can be used in the key lookup. A usual field should have an explicit value. If it's not, it means it's not sent (MINIMAL/NOBLOB) and we don't know what real value is > > > + && !bitmap_is_set(&m_table->has_value_set, f->field_index) > > + && (non_deterministic_default || next_number_field == > f->field_index)) > > + break; > > + } > > + return p; > > +} > > > > /** > > Find the best key to use when locating the row in @c find_row(). > > @@ -8024,13 +8077,26 @@ static bool record_compare(TABLE *table) > > */ > > int Rows_log_event::find_key() > > { > > - uint i, best_key_nr, last_part; > > + uint i, best_key_nr; > > KEY *key, *UNINIT_VAR(best_key); > > ulong UNINIT_VAR(best_rec_per_key), tmp; > > DBUG_ENTER("Rows_log_event::find_key"); > > DBUG_ASSERT(m_table); > > > > + #ifdef DBUG_TRACE > > #ifndef DBUG_OFF > Pity we have a double negation in similar places, I tried to avoid it and came across that macro > > > + auto _ = make_scope_exit([this]() { > > + DBUG_EXECUTE_IF("rpl_report_chosen_key", > > + push_warning_printf(m_table->in_use, > > + Sql_condition::WARN_LEVEL_NOTE, > > + ER_UNKNOWN_ERROR, "Key chosen: > %d", > > + m_key_nr == MAX_KEY ? > > + -1 : m_key_nr);); > > + }); > > + #endif > > + > > + m_key_nr= MAX_KEY; > > best_key_nr= MAX_KEY; > > + uint parts_suit= 0; > > > > /* > > Keys are sorted so that any primary key is first, followed by > unique keys, > > @@ -8041,33 +8107,38 @@ int Rows_log_event::find_key() > > { > > if (!m_table->s->keys_in_use.is_set(i)) > > continue; > > + parts_suit= key_parts_suit_event(key); > > + if (!parts_suit) > > + continue; > > /* > > We cannot use a unique key with NULL-able columns to uniquely > identify > > a row (but we can still select it for range scan below if nothing > better > > is available). > > */ > > - if ((key->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME) > > + if ((key->flags & (HA_NOSAME | HA_NULL_PART_KEY)) == HA_NOSAME && > > + parts_suit == key->user_defined_key_parts) > > this doesn't make use of ext key parts. Should be something like > > why not just use ext_key_parts (which I'm using now btw)? if (parts_suit == key->ext_key_parts || > (parts_suit >= key->user_defined_key_parts && (key->flags & ....))) > > > { > > best_key_nr= i; > > best_key= key; > > + m_key_parts_suit= parts_suit; > > break; > > } > > /* > > We can only use a non-unique key if it allows range scans (ie. > skip > > FULLTEXT indexes and such). > > */ > > - last_part= key->user_defined_key_parts - 1; > > DBUG_PRINT("info", ("Index %s rec_per_key[%u]= %lu", > > - key->name.str, last_part, > key->rec_per_key[last_part])); > > - if (!(m_table->file->index_flags(i, last_part, 1) & HA_READ_NEXT)) > > + key->name.str, parts_suit, > key->rec_per_key[parts_suit])); > > + if (!(m_table->file->index_flags(i, parts_suit, 1) & HA_READ_NEXT)) > > continue; > > > > - tmp= key->rec_per_key[last_part]; > > + tmp= key->rec_per_key[parts_suit - 1]; > > if (best_key_nr == MAX_KEY || (tmp > 0 && tmp < best_rec_per_key)) > > { > > best_key_nr= i; > > best_key= key; > > best_rec_per_key= tmp; > > + m_key_parts_suit= parts_suit; > > } > > } > > > -- Yours truly, Nikita Malyavin
_______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp