Hi, Nikita, this is a review of the combined diff of your branch. Of `git diff github/10.8...github/bb-10.8-online-alter`, that is of
cf7cc376bac fix savepoints in myisam 4a128ca5458 fix main.delayed 7300151cc84 fix skipping rocksdb 19504dfdbcc add binlog/standalone combinations 55e993595aa add rocksdb combination 06645b17450 savepoints 9ffcd13b9fe rename tests 730926ae962 MDEV-16329 [5/5] ALTER ONLINE TABLE cf52ea33a70 MDEV-16329 [4/5] Refactor MYSQL_BIN_LOG: extract Event_log ancestor 0445c8f1ee9 MDEV-16329 [3/5] use binlog_cache_data directly in most places 3f3da0f65a3 MDEV-16329 [2/5] refactor binlog and cache_mngr dc79667ca73 MDEV-16329 [1/5] add THD::binlog_get_cache_mngr 6f9dd00ce2d rpl: repack table_def faf8680df8d Copy_field: add const to arguments It was mostly fine the last time already, so only few comments below: > diff --git a/mysql-test/include/have_log_bin_off.require > b/mysql-test/include/have_log_bin_off.require > new file mode 100644 > index 00000000000..979dbe75f80 > --- /dev/null > +++ b/mysql-test/include/have_log_bin_off.require > @@ -0,0 +1,2 @@ > +Variable_name Value > +log_bin OFF better use `if (...) { skip }` pattern *.require is an obsolete 20-year old technique from before if- and skip-times > diff --git a/mysql-test/main/alter_table.test > b/mysql-test/main/alter_table.test > index 31c69783248..91423ee8c1f 100644 > --- a/mysql-test/main/alter_table.test > +++ b/mysql-test/main/alter_table.test > @@ -1532,8 +1532,12 @@ ALTER TABLE t1 DROP INDEX i1, DROP INDEX i2, DROP > INDEX i3, DROP INDEX i4; > ALTER TABLE t1 ADD INDEX i1(b), ALGORITHM= INPLACE, LOCK= NONE; > ALTER TABLE t1 ADD INDEX i2(b), ALGORITHM= INPLACE, LOCK= SHARED; > ALTER TABLE t1 ADD INDEX i3(b), ALGORITHM= INPLACE, LOCK= EXCLUSIVE; > ---error ER_ALTER_OPERATION_NOT_SUPPORTED_REASON > +--disable_info > +--disable_warnings > +--error 0,ER_ALTER_OPERATION_NOT_SUPPORTED_REASON why, because of embedded? > ALTER TABLE t1 ADD INDEX i4(b), ALGORITHM= COPY, LOCK= NONE; > +--enable_warnings > +--enable_info > ALTER TABLE t1 ADD INDEX i5(b), ALGORITHM= COPY, LOCK= SHARED; > ALTER TABLE t1 ADD INDEX i6(b), ALGORITHM= COPY, LOCK= EXCLUSIVE; > > diff --git a/mysql-test/main/alter_table_locknone.result > b/mysql-test/main/alter_table_locknone.result > new file mode 100644 > index 00000000000..ea040925efe > --- /dev/null > +++ b/mysql-test/main/alter_table_locknone.result > @@ -0,0 +1,270 @@ > +create table t1 (a int not null primary key, b int, c varchar(80), e > enum('a','b')) engine=myisam; > +insert into t1 (a) values (1),(2),(3); > +alter online table t1 modify b int default 5, alter c set default 'X'; > +alter online table t1 change b new_name int; > +alter online table t1 modify e enum('a','b','c'); > +alter online table t1 comment "new comment"; > +alter table t1 add constraint q check (a > 0); > +alter online table t1 drop constraint q; > +alter online table t1 algorithm=INPLACE, lock=NONE; > +alter online table t1; > +alter table t1 algorithm=INPLACE; > +alter table t1 lock=NONE; > +show create table t1; > +Table Create Table > +t1 CREATE TABLE `t1` ( > + `a` int(11) NOT NULL, > + `new_name` int(11) DEFAULT NULL, > + `c` varchar(80) DEFAULT 'X', > + `e` enum('a','b','c') DEFAULT NULL, > + PRIMARY KEY (`a`) > +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COMMENT='new comment' > +drop table t1; > +create temporary table t1 (a int not null primary key, b int, c varchar(80), > e enum('a','b')); > +insert into t1 (a) values (1),(2),(3); > +alter online table t1 modify b int default 5, alter c set default 'X'; > +alter online table t1 change b new_name int; > +alter online table t1 modify e enum('a','b','c'); > +alter online table t1 comment "new comment"; > +alter online table t1 rename to t2; > +show create table t2; > +Table Create Table > +t2 CREATE TEMPORARY TABLE `t2` ( > + `a` int(11) NOT NULL, > + `new_name` int(11) DEFAULT NULL, > + `c` varchar(80) DEFAULT 'X', > + `e` enum('a','b','c') DEFAULT NULL, > + PRIMARY KEY (`a`) > +) ENGINE=MyISAM DEFAULT CHARSET=latin1 COMMENT='new comment' > +drop table t2; > +create table t1 (a int not null primary key, b int, c varchar(80), e > enum('a','b')) engine=aria; > +insert into t1 (a) values (1),(2),(3); > +alter online table t1 modify b int default 5; > +alter online table t1 change b new_name int; > +alter online table t1 modify e enum('a','b','c'); > +alter online table t1 comment "new comment"; > +show create table t1; > +Table Create Table > +t1 CREATE TABLE `t1` ( > + `a` int(11) NOT NULL, > + `new_name` int(11) DEFAULT NULL, > + `c` varchar(80) DEFAULT NULL, > + `e` enum('a','b','c') DEFAULT NULL, > + PRIMARY KEY (`a`) > +) ENGINE=Aria DEFAULT CHARSET=latin1 PAGE_CHECKSUM=1 COMMENT='new comment' > +alter online table t1 page_checksum=1; > +alter online table t1 page_checksum=0; > +ERROR 0A000: LOCK=NONE is not supported for this operation. Try LOCK=SHARED Why LOCK=NONE would be not supported now? Shouldn't your new feature allow just everything with LOCK=NONE? > +drop table t1; > +create table t1 (a int not null primary key, b int, c varchar(80), e > enum('a','b')); > +insert into t1 (a) values (1),(2),(3); > +alter online table t1 drop column b, add b int; > +ERROR 0A000: LOCK=NONE is not supported for this operation. Try LOCK=SHARED ... > diff --git a/mysql-test/main/alter_table_online.combinations > b/mysql-test/main/alter_table_online.combinations > new file mode 100644 > index 00000000000..ae144432c68 > --- /dev/null > +++ b/mysql-test/main/alter_table_online.combinations > @@ -0,0 +1,2 @@ > +[innodb] > +[rocksdb] what about non-trans tables? > diff --git a/mysql-test/main/alter_table_online.test > b/mysql-test/main/alter_table_online.test > index 6ef9661c43b..815483d252d 100644 > --- a/mysql-test/main/alter_table_online.test > +++ b/mysql-test/main/alter_table_online.test > @@ -1,335 +1,596 @@ > -# > -# Test of ALTER ONLINE TABLE syntax > -# > - > ---source include/have_innodb.inc > ---source include/have_partition.inc > -# > -# Test of things that can be done online > -# > - > -create table t1 (a int not null primary key, b int, c varchar(80), e > enum('a','b')) engine=myisam; > -insert into t1 (a) values (1),(2),(3); > - > -alter online table t1 modify b int default 5, alter c set default 'X'; > -alter online table t1 change b new_name int; > -alter online table t1 modify e enum('a','b','c'); > -alter online table t1 comment "new comment"; > -alter table t1 add constraint q check (a > 0); > -alter online table t1 drop constraint q; > - > -# No OPs > - > -alter online table t1 algorithm=INPLACE, lock=NONE; > -alter online table t1; > -alter table t1 algorithm=INPLACE; > -alter table t1 lock=NONE; > -show create table t1; > -drop table t1; > +-- source include/have_debug_sync.inc > +-- source include/not_embedded.inc > +-- source alter_table_online_binlog.inc > > -# > -# everything with temporary tables is "online", i.e. without locks > -# > -create temporary table t1 (a int not null primary key, b int, c varchar(80), > e enum('a','b')); > -insert into t1 (a) values (1),(2),(3); > - > -alter online table t1 modify b int default 5, alter c set default 'X'; > -alter online table t1 change b new_name int; > -alter online table t1 modify e enum('a','b','c'); > -alter online table t1 comment "new comment"; > -alter online table t1 rename to t2; > -show create table t2; > -drop table t2; > +-- disable_query_log > +-- if ($MTR_COMBINATION_INNODB) { > +-- source include/have_innodb.inc > +set default_storage_engine= innodb; > +-- } > +-- if ($MTR_COMBINATION_ROCKSDB) { > +-- source include/have_rocksdb.inc > +set default_storage_engine= rocksdb; > +-- } > +-- enable_query_log > + > +-- let $default_engine= `select @@default_storage_engine` > + > +--connect (con2, localhost, root,,) > +--connection default > + > + > +--echo # > +--echo # Test insert > +--echo # > + > +--echo # Insert and add column > +create or replace table t1 (a int) engine=innodb; why do you specify the engine explicitly if you run it in two engine combinations? > +insert t1 values (5); > + > +--connection con2 > +--send > +set debug_sync= 'now WAIT_FOR ended'; > + > +--connection default > +set debug_sync= 'alter_table_copy_end SIGNAL ended WAIT_FOR end'; > + ... > + > +--connection default > +--reap > +--sorted_result > +select * from t1; > + > +--echo # > +--echo # MYISAM. Only Inserts can be tested. why? > +--echo # > + > +create or replace table t1 (a int) engine=myisam; > +insert t1 values (5); > + > +--connection con2 > +--send > +set debug_sync= 'now WAIT_FOR ended'; > + > +--connection default > +set debug_sync= 'alter_table_copy_end SIGNAL ended WAIT_FOR end'; > + > +--send > +alter table t1 add b int NULL, algorithm= copy, lock= none; > + ... > diff --git a/mysql-test/main/mdl_sync.test b/mysql-test/main/mdl_sync.test > index 3df19aca806..8c6bc76f5ca 100644 > --- a/mysql-test/main/mdl_sync.test > +++ b/mysql-test/main/mdl_sync.test > @@ -42,7 +42,7 @@ connection con1; > set debug_sync='mdl_upgrade_lock SIGNAL parked WAIT_FOR go'; > --send alter table t1 rename t3 > > -connection default; > + connection default; eh? (here and many other changes in this file) > --echo connection: default > set debug_sync= 'now WAIT_FOR parked'; > > diff --git a/mysql-test/suite/gcol/inc/gcol_select.inc > b/mysql-test/suite/gcol/inc/gcol_select.inc > index 2386c55fdbc..8f6314233bd 100644 > --- a/mysql-test/suite/gcol/inc/gcol_select.inc > +++ b/mysql-test/suite/gcol/inc/gcol_select.inc > @@ -872,7 +872,9 @@ CREATE TABLE t1(a INT); > INSERT INTO t1 VALUES(2147483647); > ALTER TABLE t1 ADD COLUMN h INT AS (a) VIRTUAL; > ALTER TABLE t1 CHANGE h i INT AS (a) VIRTUAL, ALGORITHM=COPY; > +--error ER_WARN_DATA_OUT_OF_RANGE,ER_ALTER_OPERATION_NOT_SUPPORTED_REASON why two? embedded, again? > ALTER TABLE t1 ADD COLUMN b SMALLINT AS (a) VIRTUAL, ALGORITHM=COPY, > LOCK=NONE; > +--error ER_WARN_DATA_OUT_OF_RANGE,ER_ALTER_OPERATION_NOT_SUPPORTED_REASON > ALTER TABLE t1 ADD COLUMN e SMALLINT AS (a) VIRTUAL, ALGORITHM=COPY, > LOCK=NONE; > ALTER TABLE t1 ADD COLUMN f SMALLINT AS (a) VIRTUAL, ALGORITHM=COPY, > LOCK=SHARED; > ALTER TABLE t1 ADD COLUMN g SMALLINT AS (a) VIRTUAL, ALGORITHM=COPY, > LOCK=EXCLUSIVE; > diff --git a/sql/handler.h b/sql/handler.h > index fe61666bf20..5e46fa59d7e 100644 > --- a/sql/handler.h > +++ b/sql/handler.h > @@ -3538,6 +3543,7 @@ class handler :public Sql_alloc > /** to be actually called to get 'check()' functionality*/ > int ha_check(THD *thd, HA_CHECK_OPT *check_opt); > int ha_repair(THD* thd, HA_CHECK_OPT* check_opt); > + virtual void open_read_view(){} why did you change to that from start_consistent_snapshot()? > void ha_start_bulk_insert(ha_rows rows, uint flags= 0) > { > DBUG_ENTER("handler::ha_start_bulk_insert"); > diff --git a/sql/log.cc b/sql/log.cc > index 70ceecc66f8..9beab80773c 100644 > --- a/sql/log.cc > +++ b/sql/log.cc > @@ -101,6 +102,9 @@ static int binlog_flush_cache(THD *thd, binlog_cache_mngr > *cache_mngr, > Log_event *end_ev, bool all, bool using_stmt, > bool using_trx, bool is_ro_1pc); > > +int binlog_online_alter_commit(THD *thd, bool all); > +void binlog_online_alter_rollback(THD *thd, bool all); static? > + > static const LEX_CSTRING write_error_msg= > { STRING_WITH_LEN("error writing to the binary log") }; > > @@ -3565,6 +3613,91 @@ bool MYSQL_BIN_LOG::open_index_file(const char > *index_file_name_arg, > } > > > +bool Event_log::open(const char *log_name, > + const char *new_name, ulong next_file_number, > + enum cache_type io_cache_type_arg) > +{ > + bool error= false; > + if (log_name || new_name) > + { > + error= MYSQL_LOG::open( > +#ifdef HAVE_PSI_INTERFACE > + 0, > +#endif > + log_name, LOG_NORMAL, new_name, next_file_number, > io_cache_type_arg); > + } > + else > + { > +#ifdef HAVE_PSI_INTERFACE > + /* Keep the key for reopen */ > + m_log_file_key= 0; > +#endif > + error= init_io_cache(&log_file, -1, LOG_BIN_IO_SIZE, > + io_cache_type_arg, 0, 0, > + MYF(MY_WME | MY_NABP | MY_WAIT_IF_FULL)); > + > + log_state= LOG_OPENED; > + inited= true; > + } > + if (error) > + return error; > + > + longlong bytes_written= write_description_event( > + > (enum_binlog_checksum_alg)binlog_checksum_options, > + encrypt_binlog, false); > + error= bytes_written < 0; > + return error; > +} > + > +longlong > +Event_log::write_description_event(enum_binlog_checksum_alg checksum_alg, > + bool encrypt, bool dont_set_created) > +{ > + Format_description_log_event s(BINLOG_VERSION); > + /* > + don't set LOG_EVENT_BINLOG_IN_USE_F for SEQ_READ_APPEND io_cache > + as we won't be able to reset it later > + */ > + if (io_cache_type == WRITE_CACHE) this was incorrectly rebased. in the previous patch you *moved* this block of code from MYSQL_BIN_LOG::open into a separate method. here you copied it. you forgot to delete it from MYSQL_BIN_LOG::open(). > + s.flags |= LOG_EVENT_BINLOG_IN_USE_F; > + s.checksum_alg= checksum_alg; > + > + crypto.scheme = 0; > + DBUG_ASSERT(s.checksum_alg != BINLOG_CHECKSUM_ALG_UNDEF); > + if (!s.is_valid()) > + return -1; > + s.dont_set_created= dont_set_created; > + if (write_event(&s, 0, &log_file)) > + return -1; > + > + if (encrypt) > + { > + uint key_version= > encryption_key_get_latest_version(ENCRYPTION_KEY_SYSTEM_DATA); > + if (key_version == ENCRYPTION_KEY_VERSION_INVALID) > + { > + sql_print_error("Failed to enable encryption of binary logs"); > + return -1; > + } > + > + if (key_version != ENCRYPTION_KEY_NOT_ENCRYPTED) > + { > + if (my_random_bytes(crypto.nonce, sizeof(crypto.nonce))) > + return -1; > + > + Start_encryption_log_event sele(1, key_version, crypto.nonce); > + sele.checksum_alg= s.checksum_alg; > + if (write_event(&sele, 0, &log_file)) > + return -1; > + > + // Start_encryption_log_event is written, enable the encryption > + if (crypto.init(sele.crypto_scheme, key_version)) > + return -1; > + } > + } > + return (longlong)s.data_written; > +} > + > + > /** > Open a (new) binlog file. > > @@ -7252,6 +7477,160 @@ class CacheWriter: public Log_event_writer > bool first; > }; > > +int cache_copy(IO_CACHE *to, IO_CACHE *from) > +{ > + DBUG_ENTER("cache_copy"); > + if (reinit_io_cache(from, READ_CACHE, 0, 0, 0)) > + DBUG_RETURN(ER_ERROR_ON_WRITE); > + size_t bytes_in_cache= my_b_bytes_in_cache(from); > + > + do > + { > + my_b_write(to, from->read_pos, bytes_in_cache); > + > + from->read_pos += bytes_in_cache; > + bytes_in_cache= my_b_fill(from); > + if (from->error || to->error) > + DBUG_RETURN(ER_ERROR_ON_WRITE); > + } while (bytes_in_cache); > + > + DBUG_RETURN(0); > +} > + > +int binlog_online_alter_commit(THD *thd, bool all) > +{ > + DBUG_ENTER("online_alter_commit"); > + int error= 0; > +#ifdef HAVE_REPLICATION > + > + if (thd->online_alter_cache_list.empty()) > + DBUG_RETURN(0); > + > + bool is_ending_transaction= ending_trans(thd, all); > + > + for (auto &cache_mngr: thd->online_alter_cache_list) > + { > + auto *binlog= cache_mngr.share->online_alter_binlog; > + DBUG_ASSERT(binlog); > + > + error= binlog_flush_pending_rows_event(thd, > + /* > + do not set STMT_END for last > event > + to leave table open in altering > thd > + */ > + false, > + true, > + binlog, > + is_ending_transaction > + ? &cache_mngr.trx_cache > + : &cache_mngr.stmt_cache); > + if (is_ending_transaction) > + { > + mysql_mutex_lock(binlog->get_log_lock()); > + error= binlog->write_cache(thd, &cache_mngr.trx_cache.cache_log); > + > + mysql_mutex_unlock(binlog->get_log_lock()); > + } > + else > + { > + error= cache_copy(&cache_mngr.trx_cache.cache_log, > + &cache_mngr.stmt_cache.cache_log); Why do you need two caches here? It seems you could have just one trx_cache and if the statement fails you simply truncate it to the beginning of the statement. > + } > + > + if (error) > + { > + my_error(ER_ERROR_ON_WRITE, MYF(ME_ERROR_LOG), > + binlog->get_name(), errno); > + binlog_online_alter_cleanup(thd->online_alter_cache_list, > + is_ending_transaction); > + DBUG_RETURN(error); > + } > + } > + > + binlog_online_alter_cleanup(thd->online_alter_cache_list, > + is_ending_transaction); > + > + for (TABLE *table= thd->open_tables; table; table= table->next) > + { > + table->online_alter_cache= NULL; Why? > + } > +#endif // HAVE_REPLICATION > + DBUG_RETURN(error); > +} > + > +void binlog_online_alter_rollback(THD *thd, bool all) > +{ > +#ifdef HAVE_REPLICATION > + bool is_ending_trans= ending_trans(thd, all); > + > + /* > + This is a crucial moment that we are running through > + thd->online_alter_cache_list, and not through thd->open_tables to cleanup > + stmt cache, though both have it. The reason is that the tables can be > closed > + to that moment in case of an error. > + The same reason applies to the fact we don't store cache_mngr in the > table > + itself -- because it can happen to be not existing. > + Still in case if tables are left opened > + */ > + binlog_online_alter_cleanup(thd->online_alter_cache_list, is_ending_trans); > +#endif // HAVE_REPLICATION > +} > + > +SAVEPOINT** find_savepoint_in_list(THD *thd, LEX_CSTRING name, > + SAVEPOINT ** const list); > + > +SAVEPOINT* savepoint_add(THD *thd, LEX_CSTRING name, SAVEPOINT **list, > + int (*release_old)(THD*, SAVEPOINT*)); may be in transaction.h? > + > +int online_alter_savepoint_set(THD *thd, LEX_CSTRING name) > +{ > + > + DBUG_ENTER("binlog_online_alter_savepoint"); > +#ifdef HAVE_REPLICATION > + if (thd->online_alter_cache_list.empty()) > + DBUG_RETURN(0); > + > + if (savepoint_alloc_size < sizeof (SAVEPOINT) + sizeof(my_off_t)) > + savepoint_alloc_size= sizeof (SAVEPOINT) + sizeof(my_off_t); > + > + for (auto &cache: thd->online_alter_cache_list) > + { > + if (cache.share->db_type()->savepoint_set == NULL) > + continue; > + > + SAVEPOINT *sv= savepoint_add(thd, name, &cache.sv_list, NULL); wouldn't it be simpler to log SAVEPOINT and ROLLBACK/RELEASE as Query_log_event to the cache? Then you won't need to do any maintenance. > + if(unlikely(sv == NULL)) > + DBUG_RETURN(1); > + my_off_t *pos= (my_off_t*)(sv+1); > + *pos= cache.trx_cache.get_byte_position(); > + > + sv->prev= cache.sv_list; > + cache.sv_list= sv; > + } > +#endif > + DBUG_RETURN(0); > +} > + > +int online_alter_savepoint_rollback(THD *thd, LEX_CSTRING name) > +{ > + DBUG_ENTER("online_alter_savepoint_rollback"); > +#ifdef HAVE_REPLICATION > + for (auto &cache: thd->online_alter_cache_list) > + { > + if (cache.share->db_type()->savepoint_set == NULL) > + continue; > + > + SAVEPOINT **sv= find_savepoint_in_list(thd, name, &cache.sv_list); > + // sv is null if savepoint was set up before online table was modified > + my_off_t pos= *sv ? *(my_off_t*)(*sv+1) : 0; > + > + cache.trx_cache.restore_savepoint(pos); > + } > + > +#endif > + DBUG_RETURN(0); > +} > + > /* > Write the contents of a cache to the binary log. > > diff --git a/sql/rpl_rli.h b/sql/rpl_rli.h > index cc807852bf2..57757ecf3dd 100644 > --- a/sql/rpl_rli.h > +++ b/sql/rpl_rli.h > @@ -900,14 +900,20 @@ struct rpl_group_info > } > } > > - bool get_table_data(TABLE *table_arg, table_def **tabledef_var, TABLE > **conv_table_var) const > + bool get_table_data(TABLE *table_arg, table_def **tabledef_var, > + TABLE **conv_table_var, > + const Copy_field *copy[], const Copy_field **copy_end) const > { > DBUG_ASSERT(tabledef_var && conv_table_var); > for (TABLE_LIST *ptr= tables_to_lock ; ptr != NULL ; ptr= > ptr->next_global) > if (ptr->table == table_arg) > { > - *tabledef_var= &static_cast<RPL_TABLE_LIST*>(ptr)->m_tabledef; > - *conv_table_var= static_cast<RPL_TABLE_LIST*>(ptr)->m_conv_table; > + auto *rpl_table_list= static_cast<RPL_TABLE_LIST*>(ptr); > + if (rpl_table_list->m_tabledef_valid) when can it be false? > + *tabledef_var= &rpl_table_list->m_tabledef; > + *conv_table_var= rpl_table_list->m_conv_table; > + *copy= rpl_table_list->m_online_alter_copy_fields; > + *copy_end= rpl_table_list->m_online_alter_copy_fields_end; > DBUG_PRINT("debug", ("Fetching table data for table %s.%s:" > " tabledef: %p, conv_table: %p", > table_arg->s->db.str, > table_arg->s->table_name.str, > diff --git a/sql/sql_table.cc b/sql/sql_table.cc > index b07efb29bba..9dfe2178667 100644 > --- a/sql/sql_table.cc > +++ b/sql/sql_table.cc > @@ -9533,6 +9539,19 @@ bool mysql_alter_table(THD *thd, const LEX_CSTRING > *new_db, > has been already processed. > */ > table_list->required_type= TABLE_TYPE_NORMAL; > + > + > + if (alter_info->requested_lock == Alter_info::ALTER_TABLE_LOCK_SHARED > + || alter_info->requested_lock == Alter_info::ALTER_TABLE_LOCK_EXCLUSIVE > + || thd->locked_tables_mode == LTM_LOCK_TABLES > + || thd->lex->sql_command == SQLCOM_OPTIMIZE > + || alter_info->algorithm(thd) == > Alter_info::ALTER_TABLE_ALGORITHM_NOCOPY) why only NOCOPY? What about INPLACE and INSTANT? the rest of the condition looks fine. > + online= false; > + > + if (online) > + { > + table_list->lock_type= TL_READ; > + } > > DEBUG_SYNC(thd, "alter_table_before_open_tables"); > > @@ -10961,6 +10983,58 @@ bool mysql_trans_commit_alter_copy_data(THD *thd) > DBUG_RETURN(error); > } > > +#ifdef HAVE_REPLICATION > +static int online_alter_read_from_binlog(THD *thd, rpl_group_info *rgi, > + Cache_flip_event_log *log) > +{ > + MEM_ROOT event_mem_root; > + Query_arena backup_arena; > + Query_arena event_arena(&event_mem_root, Query_arena::STMT_INITIALIZED); > + init_sql_alloc(key_memory_gdl, &event_mem_root, > + MEM_ROOT_BLOCK_SIZE, 0, MYF(0)); > + > + int error= 0; > + > + IO_CACHE *log_file= log->flip(); > + > + thd_progress_report(thd, 0, my_b_write_tell(log_file)); > + > + Abort_on_warning_instant_set old_abort_on_warning(thd, 0); > + do > + { > + const auto *descr_event= rgi->rli->relay_log.description_event_for_exec; > + auto *ev= Log_event::read_log_event(log_file, descr_event, false); > + if (!ev) > + break; > + > + ev->thd= thd; > + thd->set_n_backup_active_arena(&event_arena, &backup_arena); > + error= ev->apply_event(rgi); > + thd->restore_active_arena(&event_arena, &backup_arena); > + > + event_arena.free_items(); > + free_root(&event_mem_root, MYF(MY_KEEP_PREALLOC)); > + if (ev != rgi->rli->relay_log.description_event_for_exec) > + delete ev; > + thd_progress_report(thd, my_b_tell(log_file), thd->progress.max_counter); > + DEBUG_SYNC(thd, "alter_table_online_progress"); > + } while(!error); > + > + return error; > +} > +#endif // HAVE_REPLICATION > + > +static void online_alter_cleanup_binlog(THD *thd, TABLE_SHARE *s) > +{ > +#ifdef HAVE_REPLICATION > + if (!s->online_alter_binlog) > + return; > + // s->online_alter_binlog->reset_logs(thd, false, NULL, 0, 0); forgot to delete? > + s->online_alter_binlog->cleanup(); > + s->online_alter_binlog->~Cache_flip_event_log(); > + s->online_alter_binlog= NULL; > +#endif > +} > > static int > copy_data_between_tables(THD *thd, TABLE *from, TABLE *to, > @@ -11306,6 +11431,76 @@ copy_data_between_tables(THD *thd, TABLE *from, > TABLE *to, > cleanup_done= 1; > to->file->extra(HA_EXTRA_NO_IGNORE_DUP_KEY); > > +#ifdef HAVE_REPLICATION > + if (likely(online && error < 0)) > + { > + Ha_trx_info *trx_info_save= thd->transaction->all.ha_list; > + thd->transaction->all.ha_list = NULL; why? > + thd_progress_next_stage(thd); > + Table_map_log_event table_event(thd, from, from->s->table_map_id, > + from->file->has_transactions()); > + Relay_log_info rli(false); > + rpl_group_info rgi(&rli); > + RPL_TABLE_LIST rpl_table(to, TL_WRITE, from, table_event.get_table_def(), > + copy, copy_end); > + Cache_flip_event_log *binlog= from->s->online_alter_binlog; > + rgi.thd= thd; > + rgi.tables_to_lock= &rpl_table; > + > + rgi.m_table_map.set_table(from->s->table_map_id, to); > + > + DBUG_ASSERT(binlog->is_open()); > + > + rli.relay_log.description_event_for_exec= > + new > Format_description_log_event(4); > + > + // We restore bitmaps, because update event is going to mess up with > them. > + to->default_column_bitmaps(); > + > + error= online_alter_read_from_binlog(thd, &rgi, binlog); > + > + DEBUG_SYNC(thd, "alter_table_online_before_lock"); > + > + int lock_error= > + thd->mdl_context.upgrade_shared_lock(from->mdl_ticket, MDL_EXCLUSIVE, > + > (double)thd->variables.lock_wait_timeout); > + if (!error) > + error= lock_error; > + > + if (!error) > + { > + thd_progress_next_stage(thd); > + error= online_alter_read_from_binlog(thd, &rgi, binlog); > + } > + > + thd->transaction->all.ha_list = trx_info_save; > + } > + else if (unlikely(online)) // error was on copy stage > + { > + /* > + We need to issue a barrier to clean up gracefully. > + Without this, following possible: > + T1: ALTER TABLE starts > + T2: INSERT starts > + T1: ALTER TABLE fails with error (i.e. ER_DUP_KEY) > + T1: from->s->online_alter_binlog sets to NULL > + T2: INSERT committs > + T2: thd->online_alter_cache_list is not empty > + T2: binlog_commit: DBUG_ASSERT(binlog); is issued. 1. do you have a test for that? 2. can it be fixed, like, on iterating thd->online_alter_cache_list thd notices that from->s->online_alter_cache_list is NULL, so it simply discards the cache? Then you won't need MDL_SHARED_NO_WRITE > + */ > + // Ignore the return result. We already have an error. > + thd->mdl_context.upgrade_shared_lock(from->mdl_ticket, > + MDL_SHARED_NO_WRITE, > + thd->variables.lock_wait_timeout); > + } > +#endif > + > + if (error > 0 && !from->s->tmp_table) > + { > + /* We are going to drop the temporary table */ > + to->file->extra(HA_EXTRA_PREPARE_FOR_DROP); > + } > + > DEBUG_SYNC(thd, "copy_data_between_tables_before_reset_backup_lock"); > if (backup_reset_alter_copy_lock(thd)) > error= 1; Regards, Sergei VP of MariaDB Server Engineering and secur...@mariadb.org _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : maria-developers@lists.launchpad.net Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp