The current time caching implementation in util_time.c and mod_log_config.c is not guaranteed to work with all compilers and cpu architectures. I have some proposed code I've written, that I want to get input from the mailing list on, before continuing on to compile and test it.
The old implementation tries to do a wait-free algorithm, but incorrectly, because the algorithm relies on total memory ordering, and the implementation does not guarantee total memory ordering from the compiler or CPU. My proposal is to use the standard and portable apr_thread_mutex_trylock for enforcing the memory barriers. For APR's Linux x86_64 implementation, this basically turns into a lock: cmpxchgl instruction, and a lock: decl instruction for the unlock. Because only trylock is used, not lock, the futex is never spun and arbitrated on by the kernel, it's all done in userspace (if there's contention, each thread just calculates the value itself instead of reading from the cache). So the replacement is also a wait-free algorithm, using the standard and portable apr_thread_mutex calls. Also, the old algorithm does two memcpy operations when reading from the cache, while the new algorithm just does one. For APR_HAS_THREADS==0 or a non-threaded mpm in use, no locking is done. The first part of the code is generic time caching code I've written in util_time_caching.h. I use these macros to create five different time caches: excerpt w/o boilerplate: ----------------------------------- #define TIME_CACHE(CACHE_T, CACHE_PTR, CACHE_SIZE_POWER)\ static CACHE_T CACHE_PTR[1<<CACHE_SIZE_POWER]; #define TIME_CACHE_FUNC_PTR(FUNC_PTR, VALUE_T, CALC_FUNC)\ static apr_status_t (*FUNC_PTR)(VALUE_T *, apr_time_t) = &CALC_FUNC; /* TIME_CACHE_FUNCTION macro: * The cache is implemented as a ring buffer. The key in the * cache element indicates which second the cache value is for. * We use a wait-free algorithm: if we cannot access the cache, * we just calculate the value, doing useful work, instead of * spinning trying to access the cache. * We only update the cache with newer times, because older times * should have a lower cache hit ratio, and we want to keep the * caches small to fit in the CPU L1/L2 caches. */ #define TIME_CACHE_FUNCTION(FUNC_NAME, VALUE_T, VALUE_SIZE, \ CACHE_T, CACHE_PTR, CACHE_SIZE_POWER, \ CALC_FUNC, TRY_LOCK, RELEASE_LOCK, AFTER_READ_WORK)\ static apr_status_t FUNC_NAME(VALUE_T *value, apr_time_t t)\ {\ const apr_int64_t seconds = apr_time_sec(t);\ apr_status_t status;\ CACHE_T * const cache_element = \ &(CACHE_PTR[seconds & ((1<<CACHE_SIZE_POWER)-1)]);\ /* seconds==0 can be confused with unitialized cache; don't use cache */\ if (seconds==0) return CALC_FUNC(value, t);\ if (TRY_LOCK) {\ if (seconds == cache_element->key) {\ memcpy(value, &cache_element->value, VALUE_SIZE);\ RELEASE_LOCK;\ AFTER_READ_WORK;\ return APR_SUCCESS;\ }\ if (seconds < cache_element->key) {\ RELEASE_LOCK;\ return CALC_FUNC(value, t);\ }\ RELEASE_LOCK;\ }\ status = CALC_FUNC(value, t);\ if (status == APR_SUCCESS) {\ if (TRY_LOCK) {\ if (seconds > cache_element->key) {\ cache_element->key = seconds;\ memcpy(&cache_element->value, value, VALUE_SIZE);\ }\ RELEASE_LOCK;\ }\ }\ return status;\ } ---------------------------------------------- Here is an example of implementing one of the caches in util_time.c: ---------------------------------------------- /* Have small sized caches, to stay in CPU's L1/L2 caches. * There should be few requests older than 3 seconds, so set * cache size power to 2. */ #define TIME_CACHE_SIZE_POWER 2 typedef struct { apr_int64_t key; apr_time_exp_t value; } explode_time_cache_t; TIME_CACHE(explode_time_cache_t, explode_time_lt_cache, TIME_CACHE_SIZE_POWER) TIME_CACHE_FUNC_PTR(explode_time_lt_ptr, apr_time_exp_t, apr_time_exp_lt) TIME_CACHE_FUNCTION( explode_time_lt_nolocking, apr_time_exp_t, sizeof(apr_time_exp_t), explode_time_cache_t, explode_time_lt_cache, TIME_CACHE_SIZE_POWER, apr_time_exp_lt, 1,, value->tm_usec = (apr_int32_t) apr_time_usec(t)) #if APR_HAS_THREADS static apr_thread_mutex_t *explode_time_lt_lock; TIME_CACHE_FUNCTION( explode_time_lt_locking, apr_time_exp_t, sizeof(apr_time_exp_t), explode_time_cache_t, explode_time_lt_cache, TIME_CACHE_SIZE_POWER, apr_time_exp_lt, apr_thread_mutex_trylock(explode_time_lt_lock)==APR_SUCCESS, apr_thread_mutex_unlock(explode_time_lt_lock), value->tm_usec = (apr_int32_t) apr_time_usec(t) #endif AP_DECLARE(apr_status_t) ap_explode_recent_localtime(apr_time_exp_t * tm, apr_time_t t) { return explode_recent_lt_ptr(tm, t); } ------------------------------------------------- The function pointer initially points to the uncached CALC_FUNC; only after child_init is run is the function pointer replaced with the function using the cache. Using the function pointer, the API and ABI of ap_explode_recent_localtime stays the same. I've implemented five caches: explode local/gmt, rfc822 date, and common log format local/gmt. For the rfc822 cache, it will cache the formatted string, while in the old implementation, it'd only cache the exploded time structure, so the old implementation formatted the string on every request. Here is the child init function in util_time.c that will be referenced in server/core.c: ------------------------------------------------- void ap_util_time_child_init(apr_pool_t *p, server_rec *s) { #if APR_HAS_THREADS int threaded_mpm; if (ap_mpm_query(AP_MPMQ_IS_THREADED, &threaded_mpm) == APR_SUCCESS && threaded_mpm) { #define init_cache_func(mutex, func_ptr, func_name)\ apr_thread_mutex_create(&mutex, APR_THREAD_MUTEX_DEFAULT, p);\ func_ptr = &func_name; init_cache_func(explode_time_lt_lock, explode_time_lt_ptr, explode_time_lt_locking) init_cache_func(explode_time_gmt_lock, explode_time_gmt_ptr, explode_time_gmt_locking) init_cache_func(rfc822_date_lock, rfc822_date_ptr, rfc822_date_locking) init_cache_func(clf_time_local_lock, clf_time_local_ptr, clf_time_local_locking) init_cache_func(clf_time_gmt_lock, clf_time_gmt_ptr, clf_time_gmt_locking) } else { #endif explode_time_lt_ptr = &explode_time_lt_nolocking; explode_time_gmt_ptr = &explode_time_gmt_nolocking; rfc822_date_ptr = &rfc822_date_nolocking; clf_time_local_ptr = &clf_time_local_nolocking; clf_time_gmt_ptr = &clf_time_gmt_nolocking; #if APR_HAS_THREADS } #endif } ------------------------------------------------------------- Comments?