Hi Nirbhay, I'm not expert on this code area but some questions, comments below.
On Tue, May 3, 2016 at 1:26 AM, Nirbhay Choubey <[email protected]> wrote: > revision-id: a6fb98fd74ab6f14fab0c1ef4630b010ff28880f > (mariadb-10.1.13-6-ga6fb98f) > parent(s): f8cdb9e7e8fb692f8eb3b9bb4792cd60653e0eb8 > author: Nirbhay Choubey > committer: Nirbhay Choubey > timestamp: 2016-05-02 18:26:58 -0400 > message: > > MDEV-6368: assertion xid_seqno > trx_sys_cur_xid_seqno > > - Validate the specified wsrep_start_position value by also > checking the return status of wsrep->sst_received. This also > ensures that changes in wsrep_start_position is not allowed > when the node is not in JOINING state. > - Do not allow decrease in seqno within same UUID. > - The initial checkpoint in SEs should be [0...:-1]. > > > diff --git a/sql/wsrep_sst.cc b/sql/wsrep_sst.cc > index 4f6cc51..44fdd03 100644 > --- a/sql/wsrep_sst.cc > +++ b/sql/wsrep_sst.cc > @@ -264,42 +264,91 @@ void wsrep_sst_complete (const wsrep_uuid_t* > sst_uuid, > mysql_mutex_unlock (&LOCK_wsrep_sst); > } > > -void wsrep_sst_received (wsrep_t* const wsrep, > +/* > + If wsrep provider is loaded, inform that the new state snapshot > + has been received. Also update the local checkpoint. > + > + @param wsrep [IN] wsrep handle > + @param uuid [IN] Initial state UUID > + @param seqno [IN] Initial state sequence number > + @param state [IN] Always NULL, also ignored by wsrep > provider (?) > + @param state_len [IN] Always 0, also ignored by wsrep > provider (?) > + @param implicit [IN] Whether invoked implicitly due to > SST > + (true) or explicitly because if > change > + in wsrep_start_position by user > (false). > + @return false Success > + true Error > + > +*/ > +bool wsrep_sst_received (wsrep_t* const wsrep, > const wsrep_uuid_t& uuid, > wsrep_seqno_t const seqno, > const void* const state, > - size_t const state_len) > Why not const size_t state_len ? Use of const here is confusing. > + size_t const state_len, > + bool const implicit) > { > - wsrep_get_SE_checkpoint(local_uuid, local_seqno); > + /* > + To keep track of whether the local uuid:seqno should be updated. > Also, note > + that local state (uuid:seqno) is updated/checkpointed only after we > get an > + OK from wsrep provider. By doing so, the values remain consistent > across > + the server & wsrep provider. > + */ > + bool do_update= false; > + > + // Get the locally stored uuid:seqno. > + wsrep_get_SE_checkpoint(local_uuid, local_seqno); > What if this fails, no error handling ? > > - if (memcmp(&local_uuid, &uuid, sizeof(wsrep_uuid_t)) || > - local_seqno < seqno || seqno < 0) > + if (memcmp(&local_uuid, &uuid, sizeof(wsrep_uuid_t)) || > + local_seqno < seqno) > + { > + do_update= true; > + } > + else if (local_seqno > seqno) > + { > + WSREP_WARN("SST position can't be set in past. Requested: %lld, > Current: " > + " %lld.", (long long)seqno, (long long)local_seqno); > + /* > + If we are here because of SET command, simply return true (error) > instead of > + aborting. > + */ > + if (implicit) > { > - wsrep_set_SE_checkpoint(uuid, seqno); > - local_uuid = uuid; > - local_seqno = seqno; > + WSREP_WARN("Can't continue."); > + unireg_abort(1); > } > - else if (local_seqno > seqno) > + else > { > - WSREP_WARN("SST postion is in the past: %lld, current: %lld. " > - "Can't continue.", > - (long long)seqno, (long long)local_seqno); > - unireg_abort(1); > + return true; > } > + } > > #ifdef GTID_SUPPORT > - wsrep_init_sidno(uuid); > + wsrep_init_sidno(uuid); > #endif /* GTID_SUPPORT */ > > - if (wsrep) > - { > - int const rcode(seqno < 0 ? seqno : 0); > - wsrep_gtid_t const state_id = { > - uuid, (rcode ? WSREP_SEQNO_UNDEFINED : seqno) > - }; > + if (wsrep) > + { > + int const rcode(seqno < 0 ? seqno : 0); > When seqno can be < 0 ? > + wsrep_gtid_t const state_id= {uuid, > + (rcode ? WSREP_SEQNO_UNDEFINED : seqno)}; > + > + wsrep_status_t ret= wsrep->sst_received(wsrep, &state_id, state, > + state_len, rcode); > > - wsrep->sst_received(wsrep, &state_id, state, state_len, rcode); > + if (ret != WSREP_OK) > + { > + return true; > } > + } > + > + // Now is the good time to update the local state and checkpoint. > + if (do_update) { > + wsrep_set_SE_checkpoint(uuid, seqno); > Again, I would add error handling. > + local_uuid= uuid; > + local_seqno= seqno; > + } > + > + return false; > } > > // Let applier threads to continue > @@ -308,7 +357,7 @@ void wsrep_sst_continue () > if (sst_needed) > { > WSREP_INFO("Signalling provider to continue."); > - wsrep_sst_received (wsrep, local_uuid, local_seqno, NULL, 0); > + wsrep_sst_received (wsrep, local_uuid, local_seqno, NULL, 0, true); > You should check the return code. > } > } > > diff --git a/sql/wsrep_var.cc b/sql/wsrep_var.cc > index 9c01e54..bd94190 100644 > > > bool wsrep_start_position_update (sys_var *self, THD* thd, enum_var_type > type) > { > - WSREP_INFO ("wsrep_start_position var submitted: '%s'", > - wsrep_start_position); > - // since this value passed wsrep_start_position_check, don't check > anything > - // here > - wsrep_set_local_position (wsrep_start_position, true); > - return 0; > + // Print a confirmation that wsrep_start_position has been updated. > + WSREP_INFO ("wsrep_start_position set to '%s'", wsrep_start_position); > + return false; > Hmm, normally I thought that system variable set would go like if (variable_check()) variable_update() but now update does not do nothing ? > } > void wsrep_start_position_init (const char* val) > @@ -164,7 +194,7 @@ void wsrep_start_position_init (const char* val) > return; > } > > - wsrep_set_local_position (val, false); > + wsrep_set_local_position (val, strlen(val), false); > Add error handling. > } > > static bool refresh_provider_options() > diff --git a/storage/innobase/trx/trx0sys.cc > b/storage/innobase/trx/trx0sys.cc > index 76c8613..1625e8d 100644 > --- a/storage/innobase/trx/trx0sys.cc > +++ b/storage/innobase/trx/trx0sys.cc > @@ -349,10 +349,10 @@ void read_wsrep_xid_uuid(const XID* xid, unsigned > char* buf) > unsigned char xid_uuid[16]; > long long xid_seqno = read_wsrep_xid_seqno(xid); > read_wsrep_xid_uuid(xid, xid_uuid); > - if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 8)) > + if (!memcmp(xid_uuid, trx_sys_cur_xid_uuid, 16) && > + (xid_seqno > -1)) > Does this mean that you ignore uuid if it is exactly the same, or is there error handling on else? R: Jan
_______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : [email protected] Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp

