The branch, master has been updated
       via  7794497 ctdb-tests: Add robust mutex test
       via  3a56a16 ctdb-locking: Explicitly unlock record/db in lock helper
       via  5b1076d ctdb-locking: Remove support for locking multiple databases
      from  b4f40e4 s4:tests/sec_descriptor: use more unique oid values

https://git.samba.org/?p=samba.git;a=shortlog;h=master


- Log -----------------------------------------------------------------
commit 7794497bc909fa7b02da9d9ce1fc496a8fa2a9ae
Author: Amitay Isaacs <[email protected]>
Date:   Fri Dec 2 15:11:20 2016 +1100

    ctdb-tests: Add robust mutex test
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=12469
    
    This demonstrates robust mutex bug on linux/glibc system.
    
    Signed-off-by: Amitay Isaacs <[email protected]>
    Reviewed-by: Martin Schwenke <[email protected]>
    
    Autobuild-User(master): Amitay Isaacs <[email protected]>
    Autobuild-Date(master): Thu Jan 12 07:59:34 CET 2017 on sn-devel-144

commit 3a56a16b06cf6d1cce613ec29f5ea46630902072
Author: Amitay Isaacs <[email protected]>
Date:   Tue Nov 29 17:20:45 2016 +1100

    ctdb-locking: Explicitly unlock record/db in lock helper
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=12469
    
    Instead of killing lock helper processes with SIGKILL, send SIGTERM so
    lock helper processes can explicitly unlock record/db.
    
    Signed-off-by: Amitay Isaacs <[email protected]>
    Reviewed-by: Martin Schwenke <[email protected]>

commit 5b1076dc61f5e3f006c1b8cef98e7d2d3cc1bfba
Author: Amitay Isaacs <[email protected]>
Date:   Tue Nov 29 17:13:41 2016 +1100

    ctdb-locking: Remove support for locking multiple databases
    
    BUG: https://bugzilla.samba.org/show_bug.cgi?id=12469
    
    The code to lock multiple databases has been dropped from ctdb_lock.c.
    
    Signed-off-by: Amitay Isaacs <[email protected]>
    Reviewed-by: Martin Schwenke <[email protected]>

-----------------------------------------------------------------------

Summary of changes:
 ctdb/server/ctdb_lock.c         |   6 +-
 ctdb/server/ctdb_lock_helper.c  | 194 ++++++++++++++++++++++++-----
 ctdb/tests/src/test_mutex_raw.c | 261 ++++++++++++++++++++++++++++++++++++++++
 ctdb/wscript                    |   8 +-
 4 files changed, 435 insertions(+), 34 deletions(-)
 create mode 100644 ctdb/tests/src/test_mutex_raw.c


Changeset truncated at 500 lines:

diff --git a/ctdb/server/ctdb_lock.c b/ctdb/server/ctdb_lock.c
index 0203369..b2c6d15 100644
--- a/ctdb/server/ctdb_lock.c
+++ b/ctdb/server/ctdb_lock.c
@@ -192,7 +192,7 @@ static int ctdb_lock_context_destructor(struct lock_context 
*lock_ctx)
                lock_ctx->request->lctx = NULL;
        }
        if (lock_ctx->child > 0) {
-               ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGKILL);
+               ctdb_kill(lock_ctx->ctdb, lock_ctx->child, SIGTERM);
                if (lock_ctx->type == LOCK_RECORD) {
                        DLIST_REMOVE(lock_ctx->ctdb_db->lock_current, lock_ctx);
                } else {
@@ -683,7 +683,7 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb)
                                            ctdb_lock_timeout_handler,
                                            (void *)lock_ctx);
        if (lock_ctx->ttimer == NULL) {
-               ctdb_kill(ctdb, lock_ctx->child, SIGKILL);
+               ctdb_kill(ctdb, lock_ctx->child, SIGTERM);
                lock_ctx->child = -1;
                close(lock_ctx->fd[0]);
                return;
@@ -698,7 +698,7 @@ static void ctdb_lock_schedule(struct ctdb_context *ctdb)
                                      (void *)lock_ctx);
        if (lock_ctx->tfd == NULL) {
                TALLOC_FREE(lock_ctx->ttimer);
-               ctdb_kill(ctdb, lock_ctx->child, SIGKILL);
+               ctdb_kill(ctdb, lock_ctx->child, SIGTERM);
                lock_ctx->child = -1;
                close(lock_ctx->fd[0]);
                return;
diff --git a/ctdb/server/ctdb_lock_helper.c b/ctdb/server/ctdb_lock_helper.c
index 34823fb..8b9f5aa 100644
--- a/ctdb/server/ctdb_lock_helper.c
+++ b/ctdb/server/ctdb_lock_helper.c
@@ -20,11 +20,14 @@
 #include "replace.h"
 #include "system/filesys.h"
 #include "system/network.h"
+#include "system/wait.h"
 
 #include <talloc.h>
+#include <tevent.h>
 #include <tdb.h>
 
 #include "lib/util/sys_rw.h"
+#include "lib/util/tevent_unix.h"
 
 #include "protocol/protocol.h"
 
@@ -33,6 +36,11 @@
 static char *progname = NULL;
 static bool realtime = true;
 
+struct lock_state {
+       struct tdb_context *tdb;
+       TDB_DATA key;
+};
+
 static void set_priority(void)
 {
        const char *ptr;
@@ -73,7 +81,7 @@ static void usage(void)
 {
        fprintf(stderr, "\n");
        fprintf(stderr, "Usage: %s <ctdbd-pid> <output-fd> RECORD <db-path> 
<db-flags> <db-key>\n", progname);
-       fprintf(stderr, "       %s <ctdbd-pid> <output-fd> DB <db1-path> 
<db1-flags> [<db2-path> <db2-flags>...]\n", progname);
+       fprintf(stderr, "       %s <ctdbd-pid> <output-fd> DB <db-path> 
<db-flags>\n", progname);
 }
 
 static uint8_t *hex_decode_talloc(TALLOC_CTX *mem_ctx,
@@ -93,10 +101,9 @@ static uint8_t *hex_decode_talloc(TALLOC_CTX *mem_ctx,
        return buffer;
 }
 
-static int lock_record(const char *dbpath, const char *dbflags, const char 
*dbkey)
+static int lock_record(const char *dbpath, const char *dbflags,
+                      const char *dbkey, struct lock_state *state)
 {
-       TDB_DATA key;
-       struct tdb_context *tdb;
        int tdb_flags;
 
        /* No error checking since CTDB always passes sane values */
@@ -104,23 +111,24 @@ static int lock_record(const char *dbpath, const char 
*dbflags, const char *dbke
 
        /* Convert hex key to key */
        if (strcmp(dbkey, "NULL") == 0) {
-               key.dptr = NULL;
-               key.dsize = 0;
+               state->key.dptr = NULL;
+               state->key.dsize = 0;
        } else {
-               key.dptr = hex_decode_talloc(NULL, dbkey, &key.dsize);
+               state->key.dptr = hex_decode_talloc(NULL, dbkey,
+                                                   &state->key.dsize);
        }
 
-       tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600);
-       if (tdb == NULL) {
+       state->tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600);
+       if (state->tdb == NULL) {
                fprintf(stderr, "locking: Error opening database %s\n", dbpath);
                return 1;
        }
 
        set_priority();
 
-       if (tdb_chainlock(tdb, key) < 0) {
+       if (tdb_chainlock(state->tdb, state->key) < 0) {
                fprintf(stderr, "locking: Error getting record lock (%s)\n",
-                       tdb_errorstr(tdb));
+                       tdb_errorstr(state->tdb));
                return 1;
        }
 
@@ -130,26 +138,25 @@ static int lock_record(const char *dbpath, const char 
*dbflags, const char *dbke
 
 }
 
-
-static int lock_db(const char *dbpath, const char *dbflags)
+static int lock_db(const char *dbpath, const char *dbflags,
+                  struct lock_state *state)
 {
-       struct tdb_context *tdb;
        int tdb_flags;
 
        /* No error checking since CTDB always passes sane values */
        tdb_flags = strtol(dbflags, NULL, 0);
 
-       tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600);
-       if (tdb == NULL) {
+       state->tdb = tdb_open(dbpath, 0, tdb_flags, O_RDWR, 0600);
+       if (state->tdb == NULL) {
                fprintf(stderr, "locking: Error opening database %s\n", dbpath);
                return 1;
        }
 
        set_priority();
 
-       if (tdb_lockall(tdb) < 0) {
+       if (tdb_lockall(state->tdb) < 0) {
                fprintf(stderr, "locking: Error getting db lock (%s)\n",
-                       tdb_errorstr(tdb));
+                       tdb_errorstr(state->tdb));
                return 1;
        }
 
@@ -158,13 +165,114 @@ static int lock_db(const char *dbpath, const char 
*dbflags)
        return 0;
 }
 
+struct wait_for_parent_state {
+       struct tevent_context *ev;
+       pid_t ppid;
+};
+
+static void wait_for_parent_check(struct tevent_req *subreq);
+
+static struct tevent_req *wait_for_parent_send(TALLOC_CTX *mem_ctx,
+                                              struct tevent_context *ev,
+                                              pid_t ppid)
+{
+       struct tevent_req *req, *subreq;
+       struct wait_for_parent_state *state;
+
+       req = tevent_req_create(mem_ctx, &state, struct wait_for_parent_state);
+       if (req == NULL) {
+               return NULL;
+       }
+
+       state->ev = ev;
+       state->ppid = ppid;
+
+       if (ppid == 1) {
+               tevent_req_done(req);
+               return tevent_req_post(req, ev);
+       }
+
+       subreq = tevent_wakeup_send(state, ev,
+                                   tevent_timeval_current_ofs(5,0));
+       if (tevent_req_nomem(subreq, req)) {
+               return tevent_req_post(req, ev);
+       }
+       tevent_req_set_callback(subreq, wait_for_parent_check, req);
+
+       return req;
+}
+
+static void wait_for_parent_check(struct tevent_req *subreq)
+{
+       struct tevent_req *req = tevent_req_callback_data(
+               subreq, struct tevent_req);
+       struct wait_for_parent_state *state = tevent_req_data(
+               req, struct wait_for_parent_state);
+       bool status;
+
+       status = tevent_wakeup_recv(subreq);
+       TALLOC_FREE(subreq);
+       if (! status) {
+               /* Ignore error */
+               fprintf(stderr, "locking: tevent_wakeup_recv() failed\n");
+       }
+
+       if (kill(state->ppid, 0) == -1 && errno == ESRCH) {
+               tevent_req_done(req);
+               return;
+       }
+
+       subreq = tevent_wakeup_send(state, state->ev,
+                                   tevent_timeval_current_ofs(5,0));
+       if (tevent_req_nomem(subreq, req)) {
+               return;
+       }
+       tevent_req_set_callback(subreq, wait_for_parent_check, req);
+}
+
+static bool wait_for_parent_recv(struct tevent_req *req)
+{
+       if (tevent_req_is_unix_error(req, NULL)) {
+               return false;
+       }
+
+       return true;
+}
+
+static void cleanup(struct lock_state *state)
+{
+       if (state->tdb != NULL) {
+               if (state->key.dsize == 0) {
+                       tdb_unlockall(state->tdb);
+               } else {
+                       tdb_chainunlock(state->tdb, state->key);
+               }
+               tdb_close(state->tdb);
+       }
+}
+
+static void signal_handler(struct tevent_context *ev,
+                          struct tevent_signal *se,
+                          int signum, int count, void *siginfo,
+                          void *private_data)
+{
+       struct lock_state *state = (struct lock_state *)private_data;
+
+       cleanup(state);
+       exit(0);
+}
 
 int main(int argc, char *argv[])
 {
+       struct tevent_context *ev;
+       struct tevent_signal *se;
+       struct tevent_req *req;
+       struct lock_state state = { 0 };
        int write_fd;
        char result = 0;
        int ppid;
        const char *lock_type;
+       bool status;
 
        reset_scheduler();
 
@@ -179,6 +287,20 @@ int main(int argc, char *argv[])
        write_fd = atoi(argv[2]);
        lock_type = argv[3];
 
+       ev = tevent_context_init(NULL);
+       if (ev == NULL) {
+               fprintf(stderr, "locking: tevent_context_init() failed\n");
+               exit(1);
+       }
+
+       se = tevent_add_signal(ev, ev, SIGTERM, 0,
+                              signal_handler, &state);
+       if (se == NULL) {
+               fprintf(stderr, "locking: tevent_add_signal() failed\n");
+               talloc_free(ev);
+               exit(1);
+       }
+
        if (strcmp(lock_type, "RECORD") == 0) {
                if (argc != 7) {
                        fprintf(stderr,
@@ -187,20 +309,17 @@ int main(int argc, char *argv[])
                        usage();
                        exit(1);
                }
-               result = lock_record(argv[4], argv[5], argv[6]);
+               result = lock_record(argv[4], argv[5], argv[6], &state);
 
        } else if (strcmp(lock_type, "DB") == 0) {
-               int n;
-
-               /* If there are no databases specified, no need for lock */
-               if (argc > 4) {
-                       for (n=4; n+1<argc; n+=2) {
-                               result = lock_db(argv[n], argv[n+1]);
-                               if (result != 0) {
-                                       break;
-                               }
-                       }
+               if (argc != 6) {
+                       fprintf(stderr,
+                               "locking: Invalid number of arguments (%d)\n",
+                               argc);
+                       usage();
+                       exit(1);
                }
+               result = lock_db(argv[4], argv[5], &state);
 
        } else {
                fprintf(stderr, "locking: Invalid lock-type '%s'\n", lock_type);
@@ -210,6 +329,21 @@ int main(int argc, char *argv[])
 
        send_result(write_fd, result);
 
-       ctdb_wait_for_process_to_exit(ppid);
+       req = wait_for_parent_send(ev, ev, ppid);
+       if (req == NULL) {
+               fprintf(stderr, "locking: wait_for_parent_send() failed\n");
+               cleanup(&state);
+               exit(1);
+       }
+
+       tevent_req_poll(req, ev);
+
+       status = wait_for_parent_recv(req);
+       if (! status) {
+               fprintf(stderr, "locking: wait_for_parent_recv() failed\n");
+       }
+
+       talloc_free(ev);
+       cleanup(&state);
        return 0;
 }
diff --git a/ctdb/tests/src/test_mutex_raw.c b/ctdb/tests/src/test_mutex_raw.c
new file mode 100644
index 0000000..8e3cae3
--- /dev/null
+++ b/ctdb/tests/src/test_mutex_raw.c
@@ -0,0 +1,261 @@
+/*
+   Robust mutex test
+
+   Copyright (C) Amitay Isaacs  2016
+
+   This program is free software; you can redistribute it and/or modify
+   it under the terms of the GNU General Public License as published by
+   the Free Software Foundation; either version 3 of the License, or
+   (at your option) any later version.
+
+   This program is distributed in the hope that it will be useful,
+   but WITHOUT ANY WARRANTY; without even the implied warranty of
+   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+   GNU General Public License for more details.
+
+   You should have received a copy of the GNU General Public License
+   along with this program; if not, see <http://www.gnu.org/licenses/>.
+*/
+
+/*
+ * Run this test as follows:
+ *
+ * 1. Running all processes at normal priority
+ *
+ *  $ while true ; do ./bin/test_mutex_raw /tmp/foo 10 0 ; done
+ *
+ * 2. Running all processes at real-time priority
+ *
+ *  # while true ; do ./bin/test_mutex_raw /tmp/foo 10 1 ; done
+ *
+ * The test will block after few iterations.  At this time none of the 
+ * child processes is holding the mutex.
+ *
+ * To check which process is holding a lock:
+ *
+ *  $ ./bin/test_mutex_raw /tmp/foo debug
+ *
+ *  If no pid is printed, then no process is holding the mutex.
+ */
+
+#include <stdio.h>
+#include <unistd.h>
+#include <inttypes.h>
+#include <sys/types.h>
+#include <sys/fcntl.h>
+#include <stdlib.h>
+#include <string.h>
+#include <sys/wait.h>
+#include <sched.h>
+#include <sys/mman.h>
+#include <pthread.h>
+#include <errno.h>
+#include <stdbool.h>
+
+int pthread_mutex_consistent_np(pthread_mutex_t *);
+
+static void set_realtime(void)
+{
+       struct sched_param p;
+       int ret;
+
+       p.sched_priority = 1;
+
+       ret = sched_setscheduler(0, SCHED_FIFO, &p);
+       if (ret == -1) {
+               fprintf(stderr, "Failed to set scheduler to SCHED_FIFO\n");
+       }
+}
+
+static void high_priority(void)
+{
+       int ret;
+
+       ret = nice(-20);
+       if (ret == -1) {
+               fprintf(stderr, "Failed to set high priority\n");
+       }
+}
+
+static void run_child(const char *filename)
+{
+       pthread_mutex_t *mutex;
+       void *addr;
+       int ret, fd;
+
+       fd = open(filename, O_RDWR, 0600);
+       if (fd == -1) {
+               exit(1);
+       }
+
+       addr = mmap(NULL, sizeof(pthread_mutex_t), PROT_READ|PROT_WRITE,
+                   MAP_SHARED|MAP_FILE, fd, 0);
+       if (addr == NULL) {
+               exit(2);
+       }
+
+       mutex = (pthread_mutex_t *)addr;
+
+again:
+       ret = pthread_mutex_lock(mutex);
+       if (ret == EOWNERDEAD) {
+               ret = pthread_mutex_consistent_np(mutex);
+       } else if (ret == EAGAIN) {
+               goto again;
+       }
+       if (ret != 0) {
+               fprintf(stderr, "pid %u lock failed, ret=%d\n", getpid(), ret);
+               exit(3);
+       }
+
+       fprintf(stderr, "pid %u locked\n", getpid());
+       kill(getpid(), SIGKILL);
+}
+
+#define PRIO_NORMAL    0
+#define PRIO_REALTIME  1
+#define PRIO_NICE_20   2
+
+int main(int argc, const char **argv)
+{
+       pthread_mutexattr_t ma;
+       pthread_mutex_t *mutex;
+       int fd, ret, i;
+       pid_t pid;
+       void *addr;
+       int num_children;
+       int priority = PRIO_NORMAL;
+
+       if (argc < 3 || argc > 4) {
+               fprintf(stderr, "Usage: %s <file> <n> [0|1|2]\n", argv[0]);
+               fprintf(stderr, "       %s <file> debug\n", argv[0]);
+               exit(1);
+       }
+
+       if (argc == 4) {
+               priority = atoi(argv[3]);
+       }
+
+       if (priority == PRIO_REALTIME) {
+               set_realtime();
+       } else if (priority == PRIO_NICE_20) {
+               high_priority();
+       }
+
+       fd = open(argv[1], O_CREAT|O_RDWR, 0600);
+       if (fd == -1) {
+               fprintf(stderr, "open failed\n");
+               exit(1);
+       }
+
+       ret = lseek(fd, 0, SEEK_SET);
+       if (ret != 0) {
+               fprintf(stderr, "lseek failed\n");
+               exit(1);
+       }
+
+       ret = ftruncate(fd, sizeof(pthread_mutex_t));


-- 
Samba Shared Repository

Reply via email to