The branch, master has been updated
       via  55b431f s3:dbwrap: Use milliseconds for "Held tdb lock" message
       via  32f232d s3:dbwrap: Store warning thresholds in db_ctdb_ctx
       via  ef23930 s3:dbwrap include the hashchain in the logs
       via  63a8f7e s3:dbwrap report time for chainlock and CTDB migrate
       via  b64abab dbwrap_ctdb: Instrument chainunlock timing
       via  34bfd0b ctdb_conn: Log long fetch_lock calls
      from  a92fd11 ctdb-daemon: Remove ctdb_fork_with_logging()

http://gitweb.samba.org/?p=samba.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 55b431fad575b0ea4bf2b7b2e940f5fcf66751df
Author: Christof Schmitt <[email protected]>
Date:   Tue Jan 14 14:23:04 2014 -0700

    s3:dbwrap: Use milliseconds for "Held tdb lock" message
    
    This is consistent with the parameter using milliseconds and the other
    warnings in the same file also using milliseconds.
    
    Signed-off-by: Christof Schmitt <[email protected]>
    Reviewed-by: Volker Lendecke <[email protected]>
    
    Autobuild-User(master): Volker Lendecke <[email protected]>
    Autobuild-Date(master): Thu Jan 16 11:19:39 CET 2014 on sn-devel-104

commit 32f232d9ad05d476933641a72cc5899d49cbb220
Author: Christof Schmitt <[email protected]>
Date:   Tue Jan 14 14:17:32 2014 -0700

    s3:dbwrap: Store warning thresholds in db_ctdb_ctx
    
    Avoid the parameter lookup for the warning thresholds in the hot code
    path by reading them in db_open_ctdb and storing them in the
    db_ctdb_ctx.
    
    Signed-off-by: Christof Schmitt <[email protected]>
    Reviewed-by: Volker Lendecke <[email protected]>

commit ef239302ed8e190fe8cb5cee1a4791f7a9959980
Author: Christian Ambach <[email protected]>
Date:   Mon May 13 18:20:43 2013 +0200

    s3:dbwrap include the hashchain in the logs
    
    Signed-off-by: Christian Ambach <[email protected]>
    Reviewed-by: Christof Schmitt <[email protected]>
    Reviewed-by: Volker Lendecke <[email protected]>

commit 63a8f7e4419dd2bb80675f9e11ed2f473317db01
Author: Christian Ambach <[email protected]>
Date:   Mon May 13 17:02:17 2013 +0200

    s3:dbwrap report time for chainlock and CTDB migrate
    
    Signed-off-by: Christian Ambach <[email protected]>
    Reviewed-by: Christof Schmitt <[email protected]>
    Reviewed-by: Volker Lendecke <[email protected]>

commit b64abab807655114285be578564fd5d3a5f5e2e1
Author: Volker Lendecke <[email protected]>
Date:   Mon May 6 10:56:12 2013 +0200

    dbwrap_ctdb: Instrument chainunlock timing
    
    We need an indication whether we run into the fcntl thundering
    herd. fcntl unlock should be blindingly fast in the normal case. If it
    takes longer than 5 milliseconds, warn. The timeout can be adapted by
    setting
    
    ctdb:unlock_warn_threshold = <number-of-milliseconds>
    
    Reviewed-by: Christof Schmitt <[email protected]>
    Signed-off-by: Volker Lendecke <[email protected]>

commit 34bfd0b6d210e9f617ace69dbf80a55f0988d81d
Author: Volker Lendecke <[email protected]>
Date:   Fri Feb 1 12:49:52 2013 +0100

    ctdb_conn: Log long fetch_lock calls
    
    With this patch, the number of fetch_lock attempts before dbwrap_ctdb
    logs that it took x attempts to get a record is configurable with
    
    net conf setparm global ctdb:migrate_attempts 10
    
    This patch also adds
    
    net conf setparm global ctdb:migrate_duration 5000
    
    to trigger the same log message if it took longer than x milliseconds
    to retrieve a record.
    
    Reviewed-by: Christof Schmitt <[email protected]>
    Signed-off-by: Volker Lendecke <[email protected]>

-----------------------------------------------------------------------

Summary of changes:
 source3/lib/dbwrap/dbwrap_ctdb.c |  103 +++++++++++++++++++++++++++++++++----
 1 files changed, 91 insertions(+), 12 deletions(-)


Changeset truncated at 500 lines:

diff --git a/source3/lib/dbwrap/dbwrap_ctdb.c b/source3/lib/dbwrap/dbwrap_ctdb.c
index 5a473f9..98a9138 100644
--- a/source3/lib/dbwrap/dbwrap_ctdb.c
+++ b/source3/lib/dbwrap/dbwrap_ctdb.c
@@ -73,6 +73,12 @@ struct db_ctdb_ctx {
        uint32_t db_id;
        struct db_ctdb_transaction_handle *transaction;
        struct g_lock_ctx *lock_ctx;
+
+       /* thresholds for warning messages */
+       int warn_unlock_msecs;
+       int warn_migrate_msecs;
+       int warn_migrate_attempts;
+       int warn_locktime_msecs;
 };
 
 struct db_ctdb_rec {
@@ -944,6 +950,9 @@ static int db_ctdb_record_destr(struct db_record* data)
        struct db_ctdb_rec *crec = talloc_get_type_abort(
                data->private_data, struct db_ctdb_rec);
        int threshold;
+       int ret;
+       struct timeval before;
+       double timediff;
 
        DEBUG(10, (DEBUGLEVEL > 10
                   ? "Unlocking db %u key %s\n"
@@ -952,20 +961,42 @@ static int db_ctdb_record_destr(struct db_record* data)
                   hex_encode_talloc(data, (unsigned char *)data->key.dptr,
                              data->key.dsize)));
 
-       tdb_chainunlock(crec->ctdb_ctx->wtdb->tdb, data->key);
+       before = timeval_current();
+
+       ret = tdb_chainunlock(crec->ctdb_ctx->wtdb->tdb, data->key);
+
+       timediff = timeval_elapsed(&before);
+       timediff *= 1000;       /* get us milliseconds */
+
+       if (timediff > crec->ctdb_ctx->warn_unlock_msecs) {
+               char *key;
+               key = hex_encode_talloc(talloc_tos(),
+                                       (unsigned char *)data->key.dptr,
+                                       data->key.dsize);
+               DEBUG(0, ("tdb_chainunlock on db %s, key %s took %f 
milliseconds\n",
+                         tdb_name(crec->ctdb_ctx->wtdb->tdb), key,
+                         timediff));
+               TALLOC_FREE(key);
+       }
+
+       if (ret != 0) {
+               DEBUG(0, ("tdb_chainunlock failed\n"));
+               return -1;
+       }
 
-       threshold = lp_ctdb_locktime_warn_threshold();
+       threshold = crec->ctdb_ctx->warn_locktime_msecs;
        if (threshold != 0) {
-               double timediff = timeval_elapsed(&crec->lock_time);
-               if ((timediff * 1000) > threshold) {
+               timediff = timeval_elapsed(&crec->lock_time) * 1000;
+               if (timediff > threshold) {
                        const char *key;
 
                        key = hex_encode_talloc(data,
                                                (unsigned char *)data->key.dptr,
                                                data->key.dsize);
-                       DEBUG(0, ("Held tdb lock on db %s, key %s %f seconds\n",
-                                 tdb_name(crec->ctdb_ctx->wtdb->tdb), key,
-                                 timediff));
+                       DEBUG(0, ("Held tdb lock on db %s, key %s "
+                                 "%f milliseconds\n",
+                                 tdb_name(crec->ctdb_ctx->wtdb->tdb),
+                                 key, timediff));
                }
        }
 
@@ -1017,7 +1048,13 @@ static struct db_record *fetch_locked_internal(struct 
db_ctdb_ctx *ctx,
        struct db_ctdb_rec *crec;
        NTSTATUS status;
        TDB_DATA ctdb_data;
-       int migrate_attempts = 0;
+       int migrate_attempts;
+       struct timeval migrate_start;
+       struct timeval chainlock_start;
+       struct timeval ctdb_start_time;
+       double chainlock_time = 0;
+       double ctdb_time = 0;
+       int duration_msecs;
        int lockret;
 
        if (!(result = talloc(mem_ctx, struct db_record))) {
@@ -1044,6 +1081,9 @@ static struct db_record *fetch_locked_internal(struct 
db_ctdb_ctx *ctx,
                return NULL;
        }
 
+       migrate_attempts = 0;
+       GetTimeOfDay(&migrate_start);
+
        /*
         * Do a blocking lock on the record
         */
@@ -1058,9 +1098,12 @@ again:
                TALLOC_FREE(keystr);
        }
 
+       GetTimeOfDay(&chainlock_start);
        lockret = tryonly
                ? tdb_chainlock_nonblock(ctx->wtdb->tdb, key)
                : tdb_chainlock(ctx->wtdb->tdb, key);
+       chainlock_time += timeval_elapsed(&chainlock_start);
+
        if (lockret != 0) {
                DEBUG(3, ("tdb_chainlock failed\n"));
                TALLOC_FREE(result);
@@ -1098,8 +1141,11 @@ again:
                           ctdb_data.dptr ?
                           ((struct ctdb_ltdb_header *)ctdb_data.dptr)->flags : 
0));
 
+               GetTimeOfDay(&ctdb_start_time);
                status = ctdbd_migrate(messaging_ctdbd_connection(), ctx->db_id,
                                       key);
+               ctdb_time += timeval_elapsed(&ctdb_start_time);
+
                if (!NT_STATUS_IS_OK(status)) {
                        DEBUG(5, ("ctdb_migrate failed: %s\n",
                                  nt_errstr(status)));
@@ -1110,13 +1156,38 @@ again:
                goto again;
        }
 
-       if (migrate_attempts > 10) {
-               DEBUG(0, ("db_ctdb_fetch_locked for %s key %s needed %d "
-                         "attempts\n", tdb_name(ctx->wtdb->tdb),
+       {
+               double duration;
+               duration = timeval_elapsed(&migrate_start);
+
+               /*
+                * Convert the duration to milliseconds to avoid a
+                * floating-point division of
+                * lp_parm_int("migrate_duration") by 1000.
+                */
+               duration_msecs = duration * 1000;
+       }
+
+       if ((migrate_attempts > ctx->warn_migrate_attempts) ||
+           (duration_msecs > ctx->warn_migrate_msecs)) {
+               int chain = 0;
+
+               if (tdb_get_flags(ctx->wtdb->tdb) & TDB_INCOMPATIBLE_HASH) {
+                       chain = tdb_jenkins_hash(&key) %
+                               tdb_hash_size(ctx->wtdb->tdb);
+               }
+
+               DEBUG(0, ("db_ctdb_fetch_locked for %s key %s, chain %d "
+                         "needed %d attempts, %d milliseconds, "
+                         "chainlock: %d ms, CTDB %d ms\n",
+                         tdb_name(ctx->wtdb->tdb),
                          hex_encode_talloc(talloc_tos(),
                                            (unsigned char *)key.dptr,
                                            key.dsize),
-                         migrate_attempts));
+                         chain,
+                         migrate_attempts, duration_msecs,
+                         (int) chainlock_time * 1000,
+                         (int) ctdb_time * 1000));
        }
 
        GetTimeOfDay(&crec->lock_time);
@@ -1599,6 +1670,14 @@ struct db_context *db_open_ctdb(TALLOC_CTX *mem_ctx,
                }
        }
 
+       db_ctdb->warn_unlock_msecs = lp_parm_int(-1, "ctdb",
+                                                "unlock_warn_threshold", 5);
+       db_ctdb->warn_migrate_attempts = lp_parm_int(-1, "ctdb",
+                                                    "migrate_attempts", 10);
+       db_ctdb->warn_migrate_msecs = lp_parm_int(-1, "ctdb",
+                                                 "migrate_duration", 5000);
+       db_ctdb->warn_locktime_msecs = lp_ctdb_locktime_warn_threshold();
+
        result->private_data = (void *)db_ctdb;
        result->fetch_locked = db_ctdb_fetch_locked;
        result->try_fetch_locked = db_ctdb_try_fetch_locked;


-- 
Samba Shared Repository

Reply via email to