Hi! >>>>> "sanja" == sanja <[email protected]> writes:
sanja> At file:///Users/bell/maria/bzr/work-maria-5.2-groupcommit/ sanja> ------------------------------------------------------------ sanja> revno: 2740 sanja> revision-id: [email protected] sanja> parent: [email protected] sanja> committer: [email protected] sanja> branch nick: work-maria-5.2-groupcommit sanja> timestamp: Tue 2010-02-09 10:32:59 +0200 sanja> message: sanja> Group commit for maria storage engine. > === modified file 'storage/maria/ha_maria.cc' > --- a/storage/maria/ha_maria.cc 2009-12-03 11:34:11 +0000 > +++ b/storage/maria/ha_maria.cc 2010-02-09 08:32:59 +0000 <cut> > +static MYSQL_SYSVAR_ULONG(group_commit_interval, maria_group_commit_interval, > + PLUGIN_VAR_RQCMDARG, > + "Interval between commite in microseconds (1/1000000c)." > + " 0 stands for no waiting" > + "for other threads to come and do a commit in \"hard\" mode and no" > + " sync()/commit at all in \"soft\" mode. Option has only an effect" > + "if maria_group_commit is used", Fix so that you have a space last for the continous lines. (The above has 2 text errors: watingfor and effectif) > === modified file 'storage/maria/ma_loghandler.c' > --- a/storage/maria/ma_loghandler.c 2010-01-06 21:27:53 +0000 > +++ b/storage/maria/ma_loghandler.c 2010-02-09 08:32:59 +0000 > @@ -2997,7 +3068,25 @@ > } > DBUG_ASSERT(LSN_FILE_NO(addr) == > LSN_FILE_NO(curr_buffer->offset)); > from= curr_buffer->buffer + (addr - curr_buffer->offset); > - memcpy(buffer, from, TRANSLOG_PAGE_SIZE); > + if (skipped_data > (addr - curr_buffer->offset)) > + { > + /* > + We read page part of which is not present in buffer, > + so we should read absent part from file (page cache actually) > + */ > + file= get_logfile_by_number(file_no); > + DBUG_ASSERT(file != NULL); > + buffer= pagecache_read(log_descriptor.pagecache, &file->handler, > + LSN_OFFSET(addr) / TRANSLOG_PAGE_SIZE, > + 3, buffer, > + PAGECACHE_PLAIN_PAGE, > + PAGECACHE_LOCK_LEFT_UNLOCKED, > + NULL); > + } > + else > + skipped_data= 0; /* Read after skipped in buffer data */ > + memcpy(buffer + skipped_data, from + skipped_data, > + TRANSLOG_PAGE_SIZE - skipped_data); Above you don't handle the unlikely (but possible) error that buffer == 0 <cut> > + pthread_mutex_lock(&log_descriptor.log_flush_lock); > + DBUG_PRINT("info", ("Everything is flushed up to (%lu,0x%lx)", > + LSN_IN_PARTS(log_descriptor.flushed))); > + if (cmp_translog_addr(log_descriptor.flushed, lsn) >= 0) > + > + Remove the two empty lines above. cut> > + for (;;) > + { > + /* Following function flushes buffers and makes translog_unlock() */ > + translog_flush_buffers(&lsn, &sent_to_disk, &flush_horizon); > + > + if (!hgroup_commit_at_start) > + break; /* flush pass is ended */ > + > +retest: > + if (flush_interval != 0 && > + (my_micro_time() - flush_start) >= flush_interval) > + break; /* flush pass is ended */ > + > + pthread_mutex_lock(&log_descriptor.log_flush_lock); > + if (log_descriptor.next_pass_max_lsn != LSN_IMPOSSIBLE) > + { > + /* take next goal */ > + lsn= log_descriptor.next_pass_max_lsn; > + log_descriptor.next_pass_max_lsn= LSN_IMPOSSIBLE; > + /* prevent other thread from continue */ > + log_descriptor.max_lsn_requester= pthread_self(); > + DBUG_PRINT("info", ("flush took next goal: (%lu,0x%lx)", > + LSN_IN_PARTS(lsn))); > + } > + else > + { > + if (flush_interval == 0 || > + (time_spent= (my_micro_time() - flush_start)) >= flush_interval) > { > + pthread_mutex_unlock(&log_descriptor.log_flush_lock); > + break; > } > + DBUG_PRINT("info", ("flush waits: %llu interval: %llu spent: %llu", > + flush_interval - time_spent, > + flush_interval, time_spent)); > + /* wait time or next goal */ > + set_timespec_nsec(abstime, flush_interval - time_spent); > + pthread_cond_timedwait(&log_descriptor.new_goal_cond, > + &log_descriptor.log_flush_lock, > + &abstime); > + pthread_mutex_unlock(&log_descriptor.log_flush_lock); > + DBUG_PRINT("info", ("retest conditions")); > + goto retest; > + } > + pthread_mutex_unlock(&log_descriptor.log_flush_lock); > + > + /* next flush pass */ > + DBUG_PRINT("info", ("next flush pass")); > + translog_lock(); > + } What about the idea of setting flush_start to the last call of my_micro_time() ? This would mean that on a busy system we would wait, but on a non busy system (with little syncs) we would not do any waits at all. And we would win some calls to my_micro_time() too (which may not be a very cheap call) The code change would be very simple: - Remove old call of testing flush_start. - In the above calls, replace my_micro_time() with 'cached_micro_time= my_micro_time()' and after end of loop, set flush_start= cached_micro_time Hm.... maybe this wouldn't work as we are calling sync after this, which is causing some delays. > + > + /* > + sync() files from previous flush till current one > + */ > + if (!soft_sync || hgroup_commit_at_start) > + { > + if ((rc= > + translog_sync_files(LSN_FILE_NO(log_descriptor.flushed), > + LSN_FILE_NO(lsn), > + sync_log_dir >= TRANSLOG_SYNC_DIR_ALWAYS && > + (LSN_FILE_NO(log_descriptor. > + previous_flush_horizon) != > + LSN_FILE_NO(flush_horizon) || > + (LSN_OFFSET(log_descriptor. > + previous_flush_horizon) / > + TRANSLOG_PAGE_SIZE) != > + (LSN_OFFSET(flush_horizon) / > + TRANSLOG_PAGE_SIZE))))) > + { > + sent_to_disk= LSN_IMPOSSIBLE; > + pthread_mutex_lock(&log_descriptor.log_flush_lock); > + goto out; > + } > + /* keep values for soft sync() and forced sync() actual */ > + { > + uint32 fileno= LSN_FILE_NO(lsn); > + my_atomic_rwlock_wrlock(&soft_sync_rwl); > + my_atomic_store32(&soft_sync_min, fileno); > + my_atomic_store32(&soft_sync_max, fileno); > + my_atomic_rwlock_wrunlock(&soft_sync_rwl); Don't understand why my_atomic_rwlock_wrlock is enough protection here. Why use my_atomic_store32 ? > + } > + } > + else > + { > + my_atomic_rwlock_wrlock(&soft_sync_rwl); > + my_atomic_store32(&soft_sync_max, LSN_FILE_NO(lsn)); > + my_atomic_rwlock_wrunlock(&soft_sync_rwl); Do we really need a lock to store one variable? what is the problem with just doing: soft_sync_max= LSN_FILE_NO(lsn); As after all, only one thread can be here at once. > + } > @@ -8113,6 +8427,8 @@ > my_bool translog_purge(TRANSLOG_ADDRESS low) > { > uint32 last_need_file= LSN_FILE_NO(low); > + uint32 min_unsync; > + int soft; > TRANSLOG_ADDRESS horizon= translog_get_horizon(); > int rc= 0; > DBUG_ENTER("translog_purge"); > @@ -8120,12 +8436,23 @@ > DBUG_ASSERT(translog_status == TRANSLOG_OK || > translog_status == TRANSLOG_READONLY); > > + soft= soft_sync; > + DBUG_PRINT("info", ("min_unsync: %lu", (ulong) min_unsync)); > + if (soft && min_unsync < last_need_file) > + { > + last_need_file= min_unsync; > + DBUG_PRINT("info", ("last_need_file set to %lu", (ulong)last_need_file)); > + } The above must be a bug as you are never giving min_unsync a value. > > +void translog_sync() > +{ > + uint32 max= get_current_logfile()->number; > + uint32 min; > + DBUG_ENTER("ma_translog_sync"); > + > + my_atomic_rwlock_rdlock(&soft_sync_rwl); > + min= my_atomic_load32(&soft_sync_min); > + my_atomic_rwlock_rdunlock(&soft_sync_rwl); Don't understand why you need to do atomic operations above: - You are only reading on value - get_current_logfile() is read without a mutex, so the values are already independent. > +/** > + @brief syncing service thread > +*/ > + > +static pthread_handler_t > +ma_soft_sync_background( void *arg __attribute__((unused))) > +{ > + > + my_thread_init(); > + { > + DBUG_ENTER("ma_soft_sync_background"); > + for(;;) > + { > + ulonglong prev_loop= my_micro_time(); > + ulonglong time, sleep; > + uint32 min, max; > + my_atomic_rwlock_rdlock(&soft_sync_rwl); > + min= my_atomic_load32(&soft_sync_min); > + max= my_atomic_load32(&soft_sync_max); > + my_atomic_store32(&soft_sync_min, max); > + my_atomic_rwlock_rdunlock(&soft_sync_rwl); Don't still understand what ensures that the above operations are safe from other threads as my_atomic_rwlock_rdlock() and my_atomic_rwlock_rdunlock() may be dummy operations. > + > + sleep= group_commit_wait; > + translog_sync_files(min, max, FALSE); It would be good to have a test above that if there is nothing to sync, we don't call translog_sync_files(). Regards, Monty _______________________________________________ Mailing list: https://launchpad.net/~maria-developers Post to : [email protected] Unsubscribe : https://launchpad.net/~maria-developers More help : https://help.launchpad.net/ListHelp

