Hi, Michael! On May 12, Michael Widenius wrote: > revision-id: c80f867b0df (mariadb-10.6.0-78-gc80f867b0df) > parent(s): eebebe8ef75 > author: Michael Widenius <michael.widen...@gmail.com> > committer: Michael Widenius <michael.widen...@gmail.com> > timestamp: 2021-05-04 21:09:11 +0300 > message: > > MDEV-23844 Atomic DROP TABLE ... > diff --git a/sql/sql_view.cc b/sql/sql_view.cc > index e6d726b30d7..ff8aa55adef 100644 > --- a/sql/sql_view.cc > +++ b/sql/sql_view.cc > @@ -1861,8 +1868,18 @@ bool mysql_drop_view(THD *thd, TABLE_LIST *views, > enum_drop_mode drop_mode) > not_exists_count++; > continue; > } > + if (!first_table++)
yuck. so your "first_table" actually means "not_first_view", because when it's FALSE, it's the first view. why not to rename it to "not_first_view" ? > + { > + if (ddl_log_drop_view_init(thd, &ddl_log_state)) > + DBUG_RETURN(TRUE); what about a crash here? the first entry is written, the second is not. > + } > + if (ddl_log_drop_view(thd, &ddl_log_state, &cpath, &view->db, > + &view->table_name)) > + DBUG_RETURN(TRUE); > + debug_crash_here("ddl_log_drop_before_delete_view"); > if (unlikely(mysql_file_delete(key_file_frm, path, MYF(MY_WME)))) > delete_error= TRUE; > + debug_crash_here("ddl_log_drop_after_delete_view"); > > some_views_deleted= TRUE; > > diff --git a/sql/log.cc b/sql/log.cc > index 6e0a9706ec8..dba999054e4 100644 > --- a/sql/log.cc > +++ b/sql/log.cc > @@ -10471,16 +10483,16 @@ int TC_LOG_BINLOG::recover(LOG_INFO *linfo, const > char *last_log_name, > bool last_gtid_standalone= false; > bool last_gtid_valid= false; > #endif > + DBUG_ENTER("TC_LOG_BINLOG::recover"); > > if (! fdle->is_valid() || > - (do_xa && my_hash_init(key_memory_binlog_recover_exec, &xids, > - &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0, > - sizeof(my_xid), 0, 0, MYF(0)))) > + (my_hash_init(key_memory_binlog_recover_exec, &xids, > + &my_charset_bin, TC_LOG_PAGE_SIZE/3, 0, > + sizeof(my_xid), 0, 0, MYF(0)))) > goto err1; > > - if (do_xa) > - init_alloc_root(key_memory_binlog_recover_exec, &mem_root, > - TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE, MYF(0)); > + init_alloc_root(key_memory_binlog_recover_exec, &mem_root, > + TC_LOG_PAGE_SIZE, TC_LOG_PAGE_SIZE, MYF(0)); I don't think these two changes are correct. do_xa is TRUE on crash recovery and FALSE on gtid state recovery on 5.5->10.0 upgade or if gtid state file was deleted. There is no point to initialize the xid hash or mem_root, as it's not a crash recovery and there can be no incomplete transactions or ddl statements. Yes, I've seen the commit comment: > - TC_LOG_BINLOG::recover() was changed to always collect Xid's from the > binary log and always call ddl_log_close_binlogged_events(). This was > needed to be able to collect DROP TABLE events with embedded Xid's > (used by ddl log). but I cannot understand how it applies here. > fdle->flags&= ~LOG_EVENT_BINLOG_IN_USE_F; // abort on the first error > > diff --git a/sql/sql_table.cc b/sql/sql_table.cc > index 7a88ffdfd1b..0a83396ae14 100644 > --- a/sql/sql_table.cc > +++ b/sql/sql_table.cc > @@ -1363,6 +1373,17 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST > *tables, bool if_exists, > > thd->replication_flags= 0; > was_view= table_type == TABLE_TYPE_VIEW; > + > + if (!first_table++) first_table -> not_first_table > + { > + LEX_CSTRING comment= {comment_start, (size_t) comment_len}; > + if (ddl_log_drop_table_init(thd, &ddl_log_state, &comment)) > + { > + error= 1; > + goto err; > + } > + } > + > if ((table_type == TABLE_TYPE_UNKNOWN) || (was_view && !drop_view) || > (table_type != TABLE_TYPE_SEQUENCE && drop_sequence)) > { > @@ -1565,6 +1611,7 @@ int mysql_rm_table_no_locks(THD *thd, TABLE_LIST > *tables, bool if_exists, > table_name.str, (uint)table_name.length); > mysql_audit_drop_table(thd, table); > } > + ddl_log_update_phase(&ddl_log_state, DDL_DROP_PHASE_COLLECT); better call it DDL_DROP_PHASE_BINLOG. It would've saved me ~10 minutes reading the code back and forth, as ""DDL_DROP_PHASE_BINLOG" immediately explains what this phase is for. > > if (!dont_log_query && > (!error || table_dropped || non_existing_table_error(error))) > diff --git a/sql/ddl_log.h b/sql/ddl_log.h > index 316e6708f22..b843404fc3c 100644 > --- a/sql/ddl_log.h > +++ b/sql/ddl_log.h > @@ -97,6 +101,14 @@ enum enum_ddl_log_rename_table_phase { > DDL_RENAME_PHASE_TABLE, > }; > > +enum enum_ddl_log_drop_table_phase { > + DDL_DROP_PHASE_TABLE=0, > + DDL_DROP_PHASE_TRIGGER, no phase for deleting data from stat tables? > + DDL_DROP_PHASE_COLLECT, > + DDL_DROP_PHASE_RESET, /* Reset found list of dropped tables */ this one is unused (also unused in further patches, I've checked) > + DDL_DROP_PHASE_END > +}; > + > /* > Setting ddl_log_entry.phase to this has the same effect as setting > action_type to DDL_IGNORE_LOG_ENTRY_CODE > diff --git a/sql/ddl_log.cc b/sql/ddl_log.cc > index 777dfdddbc7..6145b2f7318 100644 > --- a/sql/ddl_log.cc > +++ b/sql/ddl_log.cc > @@ -109,6 +112,7 @@ struct st_global_ddl_log > }; > > st_global_ddl_log global_ddl_log; > +String ddl_drop_query; // Used during startup recovery static > > mysql_mutex_t LOCK_gdl; > > @@ -1132,6 +1149,122 @@ static int ddl_log_execute_action(THD *thd, MEM_ROOT > *mem_root, > (void) update_phase(entry_pos, DDL_LOG_FINAL_PHASE); > } > break; > + case DDL_LOG_DROP_TABLE_INIT_ACTION: > + { > + LEX_CSTRING *comment= &ddl_log_entry->tmp_name; > + ddl_drop_query.length(0); > + ddl_drop_query.set_charset(system_charset_info); > + ddl_drop_query.append(STRING_WITH_LEN("DROP TABLE IF EXISTS ")); > + if (comment->length) > + { > + ddl_drop_query.append(comment); > + ddl_drop_query.append(' '); > + } > + /* We don't increment phase as we want to retry this in case of crash */ > + break; > + } > + case DDL_LOG_DROP_TABLE_ACTION: > + { > + LEX_CSTRING db, table, path; > + db= ddl_log_entry->db; > + table= ddl_log_entry->name; > + /* Note that path is without .frm extension */ > + path= ddl_log_entry->tmp_name; > + > + switch (ddl_log_entry->phase) { > + case DDL_DROP_PHASE_TABLE: > + if (hton) > + { > + if ((error= hton->drop_table(hton, path.str))) > + { > + if (!non_existing_table_error(error)) > + break; > + error= -1; > + } > + } > + else > + error= ha_delete_table_force(thd, path.str, &db, &table); > + if (error <= 0) > + { > + /* Not found or already deleted. Delete .frm if it exists */ > + strxnmov(to_path, sizeof(to_path)-1, path.str, reg_ext, NullS); > + mysql_file_delete(key_file_frm, to_path, > MYF(MY_WME|MY_IGNORE_ENOENT)); this should rather be inside the previous if(). no need to do it for ha_delete_table_force(), as it doesn't leave any traces and removes all remnants of a table. > + } > + if (ddl_log_increment_phase_no_lock(entry_pos)) > + break; > + (void) ddl_log_sync_no_lock(); No need to, ddl_log_increment_phase_no_lock() syncs internally > + /* Fall through */ > + case DDL_DROP_PHASE_TRIGGER: > + Table_triggers_list::drop_all_triggers(thd, &db, &table, > + MYF(MY_WME | MY_IGNORE_ENOENT)); > + if (ddl_log_increment_phase_no_lock(entry_pos)) > + break; > + (void) ddl_log_sync_no_lock(); same. and in other places too, I'd expect. > + /* Fall through */ > + > + case DDL_DROP_PHASE_COLLECT: indentation? > + append_identifier(thd, &ddl_drop_query, &db); > + ddl_drop_query.append('.'); > + append_identifier(thd, &ddl_drop_query, &table); > + ddl_drop_query.append(','); > + /* We don't increment phase as we want to retry this in case of crash > */ > + > + if (!ddl_log_entry->next_entry && mysql_bin_log.is_open()) > + { > + /* Last drop table. Write query to binlog */ > + LEX_CSTRING end_comment= > + { STRING_WITH_LEN(" /* generated by ddl log */")}; "generated by ddl log" ? a "log" cannot "generate" a statement, may be "generated during ddl recovery" ? > + ddl_drop_query.length(ddl_drop_query.length()-1); > + ddl_drop_query.append(&end_comment); > + > + mysql_mutex_unlock(&LOCK_gdl); > + (void) thd->binlog_query(THD::STMT_QUERY_TYPE, ddl_drop_query.ptr(), > + ddl_drop_query.length(), TRUE, FALSE, > + FALSE, 0); > + mysql_mutex_lock(&LOCK_gdl); > + } > + break; > + } > + break; > + } > + case DDL_LOG_DROP_VIEW_INIT_ACTION: > + { > + ddl_drop_query.length(0); > + ddl_drop_query.set_charset(system_charset_info); > + ddl_drop_query.append(STRING_WITH_LEN("DROP VIEW IF EXISTS ")); > + /* We don't increment phase as we want to retry this in case of crash */ > + break; > + } > + case DDL_LOG_DROP_VIEW_ACTION: > + { > + LEX_CSTRING db, table, path; > + db= ddl_log_entry->db; > + table= ddl_log_entry->name; > + /* Note that for views path is WITH .frm extension */ > + path= ddl_log_entry->tmp_name; > + > + mysql_file_delete(key_file_frm, path.str, MYF(MY_WME|MY_IGNORE_ENOENT)); > + append_identifier(thd, &ddl_drop_query, &db); > + ddl_drop_query.append('.'); > + append_identifier(thd, &ddl_drop_query, &table); > + ddl_drop_query.append(','); > + > + if (!ddl_log_entry->next_entry) > + { > + /* Last drop view. Write query to binlog */ > + LEX_CSTRING end_comment= > + { STRING_WITH_LEN(" /* generated by ddl log */")}; > + ddl_drop_query.length(ddl_drop_query.length()-1); > + ddl_drop_query.append(&end_comment); > + > + mysql_mutex_unlock(&LOCK_gdl); > + (void) thd->binlog_query(THD::STMT_QUERY_TYPE, ddl_drop_query.ptr(), > + ddl_drop_query.length(), TRUE, FALSE, > + FALSE, 0); > + mysql_mutex_lock(&LOCK_gdl); > + } > + break; > + } > default: > DBUG_ASSERT(0); > break; > @@ -1682,6 +1823,7 @@ void ddl_log_release_entries(DDL_LOG_STATE > *ddl_log_state) > next= log_entry->next_active_log_entry; > ddl_log_release_memory_entry(log_entry); > } > + ddl_log_state->list= 0; may be you should also clear ddl_log_state->execute_entry below? > > if (ddl_log_state->execute_entry) > ddl_log_release_memory_entry(ddl_log_state->execute_entry); > @@ -1776,6 +1918,33 @@ bool ddl_log_update_xid(DDL_LOG_STATE *state, > ulonglong xid) > } > > > +/* > + Write ddl_log_entry and write or update ddl_execute_entry > +*/ > + > +static bool ddl_log_write(DDL_LOG_STATE *ddl_state, > + DDL_LOG_ENTRY *ddl_log_entry) could you use a more descriptive name, please? > +{ > + int error; > + DDL_LOG_MEMORY_ENTRY *log_entry; > + DBUG_ENTER("ddl_log_write"); > + > + mysql_mutex_lock(&LOCK_gdl); > + error= ((ddl_log_write_entry(ddl_log_entry, &log_entry)) || > + ddl_log_write_execute_entry(log_entry->entry_pos, > + &ddl_state->execute_entry)); > + mysql_mutex_unlock(&LOCK_gdl); > + if (error) > + { > + if (log_entry) > + ddl_log_release_memory_entry(log_entry); > + DBUG_RETURN(1); > + } > + add_log_entry(ddl_state, log_entry); > + DBUG_RETURN(0); > +} > + > + > /** > Logging of rename table > */ Regards, Sergei VP of MariaDB Server Engineering and secur...@mariadb.org _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp