This is a multi-part message in MIME format. --------------000000040604020102000407 Content-Type: text/plain; charset=windows-1251; format=flowed Content-Transfer-Encoding: 7bit
The attached files is derived from OpenLDAP Software. All of the modifications to OpenLDAP Software represented in the following patch(es) were developed by Peter-Service LLC, Moscow, Russia. Peter-Service LLC has not assigned rights and/or interest in this work to any party. I, Leonid Yuriev am authorized by Peter-Service LLC, my employer, to release this work under the following terms. Peter-Service LLC hereby places the following modifications to OpenLDAP Software (and only these modifications) into the public domain. Hence, these modifications may be freely used and/or redistributed for any purpose with or without attribution and/or other notice. --------------000000040604020102000407 Content-Type: text/x-patch; name="0001-lmdb-ITS-7974-oomkiller-feature.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="0001-lmdb-ITS-7974-oomkiller-feature.patch" >From 85fce95eaa0e71ee43625ccc202c173f7d4acb4a Mon Sep 17 00:00:00 2001 From: Leo Yuriev <[email protected]> Date: Tue, 21 Oct 2014 19:25:32 +0400 Subject: [PATCH 1/2] lmdb: ITS#7974 oomkiller feature. --- libraries/liblmdb/lmdb.h | 34 +++++++++++++++++ libraries/liblmdb/mdb.c | 95 ++++++++++++++++++++++++++++++++++++++++++++++-- 2 files changed, 126 insertions(+), 3 deletions(-) diff --git a/libraries/liblmdb/lmdb.h b/libraries/liblmdb/lmdb.h index bdbb0b9..a3ca62e 100644 --- a/libraries/liblmdb/lmdb.h +++ b/libraries/liblmdb/lmdb.h @@ -1537,6 +1537,40 @@ int mdb_reader_list(MDB_env *env, MDB_msg_func *func, void *ctx); * @return 0 on success, non-zero on failure. */ int mdb_reader_check(MDB_env *env, int *dead); + + /** @brief A callback function for killing a laggard readers, + * called in case of MDB_MAP_FULL error. + * + * @param[in] env An environment handle returned by #mdb_env_create(). + * @param[in] pid pid of the reader process. + * @param[in] thread_id thread_id of the reader thread. + * @param[in] txn Transaction number on which stalled. + * @return -1 on failure (reader is not killed), + * 0 on a race condition (no such reader), + * 1 on success (reader was killed), + * >1 on success (reader was SURE killed). + */ +typedef int (MDB_oomkiller_func)(MDB_env *env, int pid, void* thread_id, size_t txn); + + /** @brief Set the oomkiller callback. + * + * Callback will be called only on out-of-pages case for killing + * a laggard readers to allowing reclaiming of freeDB. + * + * @param[in] env An environment handle returned by #mdb_env_create(). + * @param[in] oomkiller A #MDB_oomkiller_func function or NULL to disable. + */ +void mdb_env_set_oomkiller(MDB_env *env, MDB_oomkiller_func *oomkiller); + + /** @brief Get the current oomkiller callback. + * + * Callback will be called only on out-of-pages case for killing + * a laggard readers to allowing reclaiming of freeDB. + * + * @param[in] env An environment handle returned by #mdb_env_create(). + * @return A #MDB_oomkiller_func function or NULL if disabled. + */ +MDB_oomkiller_func* mdb_env_get_oomkiller(MDB_env *env); /** @} */ #ifdef __cplusplus diff --git a/libraries/liblmdb/mdb.c b/libraries/liblmdb/mdb.c index 6cc3433..e60d83d 100644 --- a/libraries/liblmdb/mdb.c +++ b/libraries/liblmdb/mdb.c @@ -1145,6 +1145,7 @@ struct MDB_env { #endif void *me_userctx; /**< User-settable context */ MDB_assert_func *me_assert_func; /**< Callback for assertion failures */ + MDB_oomkiller_func *me_oomkiller; /**< Callback for killing laggard readers */ }; /** Nested transaction */ @@ -1900,6 +1901,77 @@ mdb_find_oldest(MDB_txn *txn) return oldest; } +static txnid_t +mdb_laggard_reader(MDB_env *env, int *laggard) +{ + txnid_t tail = 0; + if (laggard) + *laggard = -1; + if (env->me_txns->mti_txnid > 1) { + int i; + MDB_reader *r = env->me_txns->mti_readers; + + tail = env->me_txns->mti_txnid - 1; + for (i = env->me_txns->mti_numreaders; --i >= 0; ) { + if (r[i].mr_pid) { + txnid_t mr = r[i].mr_txnid; + if (tail > mr) { + tail = mr; + if (laggard) + *laggard = i; + } + } + } + } + + return tail; +} + +static int +mdb_oomkill_laggard(MDB_env *env) +{ + int dead, idx; + txnid_t tail = mdb_laggard_reader(env, &idx); + if (idx < 0) + return 0; + + for(;;) { + MDB_reader *r; + MDB_THR_T tid; + pid_t pid; + int rc; + + if (mdb_reader_check(env, &dead)) + break; + + if (dead && tail < mdb_laggard_reader(env, NULL)) + return 1; + + if (!env->me_oomkiller) + break; + + r = &env->me_txns->mti_readers[ idx ]; + pid = r->mr_pid; + tid = r->mr_tid; + if (r->mr_txnid != tail || pid <= 0) + continue; + + rc = env->me_oomkiller(env, pid, (void*) tid, tail); + if (rc < 0) + break; + + if (rc) { + r->mr_txnid = (txnid_t)-1; + if (rc > 1) { + r->mr_tid = 0; + r->mr_pid = 0; + } + } + } + + return tail < mdb_laggard_reader(env, NULL); +} + /** Add a page to the txn's dirty list */ static void mdb_page_dirty(MDB_txn *txn, MDB_page *mp) @@ -1978,6 +2050,7 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) goto fail; } +oomkill_retry:; for (op = MDB_FIRST;; op = MDB_NEXT) { MDB_val key, data; MDB_node *leaf; @@ -2073,9 +2146,11 @@ mdb_page_alloc(MDB_cursor *mc, int num, MDB_page **mp) i = 0; pgno = txn->mt_next_pgno; if (pgno + num >= env->me_maxpg) { - DPUTS("DB size maxed out"); - rc = MDB_MAP_FULL; - goto fail; + DPUTS("DB size maxed out"); + if (mdb_oomkill_laggard(env)) + goto oomkill_retry; + rc = MDB_MAP_FULL; + goto fail; } search_done: @@ -9403,4 +9478,18 @@ mdb_reader_check(MDB_env *env, int *dead) *dead = count; return MDB_SUCCESS; } + +void +mdb_env_set_oomkiller(MDB_env *env, MDB_oomkiller_func *oomkiller) +{ + if (env) + env->me_oomkiller = oomkiller; +} + +MDB_oomkiller_func* +mdb_env_get_oomkiller(MDB_env *env) +{ + return env ? env->me_oomkiller : NULL; +} + /** @} */ -- 2.1.0 --------------000000040604020102000407 Content-Type: text/x-patch; name="0002-slapd-ITS-7974-oomkiller-feature.patch" Content-Transfer-Encoding: 7bit Content-Disposition: attachment; filename="0002-slapd-ITS-7974-oomkiller-feature.patch" >From caf076698e093077fa44e490797847c3187d485b Mon Sep 17 00:00:00 2001 From: Leo Yuriev <[email protected]> Date: Tue, 21 Oct 2014 19:49:05 +0400 Subject: [PATCH 2/2] slapd: ITS#7974 oomkiller feature. --- servers/slapd/back-mdb/back-mdb.h | 3 +++ servers/slapd/back-mdb/config.c | 40 ++++++++++++++++++++++++++++++++++---- servers/slapd/back-mdb/init.c | 2 ++ servers/slapd/back-mdb/proto-mdb.h | 2 ++ 4 files changed, 43 insertions(+), 4 deletions(-) diff --git a/servers/slapd/back-mdb/back-mdb.h b/servers/slapd/back-mdb/back-mdb.h index 9d5d4b1..be82da8 100644 --- a/servers/slapd/back-mdb/back-mdb.h +++ b/servers/slapd/back-mdb/back-mdb.h @@ -81,6 +81,9 @@ struct mdb_info { uint32_t mi_txn_cp_kbyte; struct re_s *mi_txn_cp_task; struct re_s *mi_index_task; + uint32_t mi_renew_lag; + uint32_t mi_renew_percent; + int mi_oomkill; mdb_monitor_t mi_monitor; diff --git a/servers/slapd/back-mdb/config.c b/servers/slapd/back-mdb/config.c index 5b402c5..b54da49 100644 --- a/servers/slapd/back-mdb/config.c +++ b/servers/slapd/back-mdb/config.c @@ -106,6 +106,8 @@ static slap_verbmasks mdb_envflags[] = { { BER_BVC("writemap"), MDB_WRITEMAP }, { BER_BVC("mapasync"), MDB_MAPASYNC }, { BER_BVC("nordahead"), MDB_NORDAHEAD }, +#define MDB_OOMKILL (MDB_NOMEMINIT << 4) + { BER_BVC("oomkill"), MDB_OOMKILL }, { BER_BVNULL, 0 } }; @@ -123,6 +125,23 @@ mdb_checkpoint( void *ctx, void *arg ) return NULL; } +/* perform killing a laggard readers */ +int +mdb_oomkiller(MDB_env *env, int pid, void* thread_id, size_t txn) +{ + if ( pid != getpid() ) { + if ( kill( pid, SIGKILL ) == 0 ) { + Debug( LDAP_DEBUG_ANY, "oomkiller: SIGKILL to pid %i\n", pid, 0, 0 ); + sched_yield(); + return 2; + } + if ( errno == ESRCH ) + return 0; + Debug( LDAP_DEBUG_ANY, "oomkiller: SIGKILL to pid %i: %s\n", pid, strerror(errno), 0 ); + } + return -1; +} + /* reindex entries on the fly */ static void * mdb_online_index( void *ctx, void *arg ) @@ -313,12 +332,15 @@ mdb_cf_gen( ConfigArgs *c ) c->value_int = 1; break; - case MDB_ENVFLAGS: - if ( mdb->mi_dbenv_flags ) { - mask_to_verbs( mdb_envflags, mdb->mi_dbenv_flags, &c->rvalue_vals ); - } + case MDB_ENVFLAGS: { + long flags = mdb->mi_dbenv_flags; + if ( mdb->mi_oomkill ) + flags |= MDB_OOMKILL; + if ( flags ) + mask_to_verbs( mdb_envflags, flags, &c->rvalue_vals ); if ( !c->rvalue_vals ) rc = 1; break; + } case MDB_INDEX: mdb_attr_index_unparse( mdb, &c->rvalue_vals ); @@ -380,6 +402,8 @@ mdb_cf_gen( ConfigArgs *c ) break; case MDB_ENVFLAGS: + mdb->mi_oomkill = 0; + mdb_env_set_oomkiller( mdb->mi_dbenv, NULL ); if ( c->valx == -1 ) { int i; for ( i=0; mdb_envflags[i].mask; i++) { @@ -596,6 +620,14 @@ mdb_cf_gen( ConfigArgs *c ) for ( i=1; i<c->argc; i++ ) { j = verb_to_mask( c->argv[i], mdb_envflags ); if ( mdb_envflags[j].mask ) { + + if ( MDB_OOMKILL == mdb_envflags[j].mask ) { + mdb->mi_oomkill = 1; + if ( mdb->mi_flags & MDB_IS_OPEN ) + mdb_env_set_oomkiller( mdb->mi_dbenv, mdb_oomkiller ); + break; + } + if ( mdb->mi_flags & MDB_IS_OPEN ) rc = mdb_env_set_flags( mdb->mi_dbenv, mdb_envflags[j].mask, 1 ); else diff --git a/servers/slapd/back-mdb/init.c b/servers/slapd/back-mdb/init.c index 1c5ab83..c7c09d4 100644 --- a/servers/slapd/back-mdb/init.c +++ b/servers/slapd/back-mdb/init.c @@ -150,6 +150,8 @@ mdb_db_open( BackendDB *be, ConfigReply *cr ) goto fail; } + mdb_env_set_oomkiller( mdb->mi_dbenv, mdb->mi_oomkill ? mdb_oomkiller : NULL); + #ifdef HAVE_EBCDIC strcpy( path, mdb->mi_dbenv_home ); __atoe( path ); diff --git a/servers/slapd/back-mdb/proto-mdb.h b/servers/slapd/back-mdb/proto-mdb.h index b6b8d7c..3ec986e 100644 --- a/servers/slapd/back-mdb/proto-mdb.h +++ b/servers/slapd/back-mdb/proto-mdb.h @@ -20,6 +20,8 @@ LDAP_BEGIN_DECL #define MDB_UCTYPE "MDB" +MDB_oomkiller_func mdb_oomkiller; + /* * attr.c */ -- 2.1.0 --------------000000040604020102000407--
