Here's my idea for fixing this in appliance-head, without reworking the mutex reference count.
Basically it tries to - avoid undefined behaviour in the case where we fail to acquire the mutex - avoid leaking locks in the case where we fail to connect to the server - avoid releasing the mutex more times than it has been acquired, because this causes a panic I haven't tested this in place yet, but I thought I'd send it in the hope that jra could tell me if I'm on the right track. Index: winbindd_cm.c =================================================================== RCS file: /data/cvs/samba/source/nsswitch/winbindd_cm.c,v retrieving revision 1.33.2.19 diff -u -u -p -r1.33.2.19 winbindd_cm.c --- winbindd_cm.c 10 Dec 2002 00:50:28 -0000 1.33.2.19 +++ winbindd_cm.c 10 Jan 2003 06:27:09 -0000 @@ -45,6 +45,22 @@ */ /* + The per-server mutex on opening server connections is required to + work around a suspected bug in NT, which causes failures if the same + client host tries to authenticate on two connections at the same + time. + + In addition, the mutex is still held after opening the connection + when trying to do a NetLogon. + + If we fail to acquire the mutex because somebody else is hogging it, + then we can still proceed to open the connection and we take our + chances with NT. However we must then be careful not to release it. + + This whole mechanism is quite different in HEAD. +*/ + +/* TODO: - I'm pretty annoyed by all the make_nmb_name() stuff. It should be @@ -68,7 +84,12 @@ struct winbindd_cm_conn { fstring domain; fstring controller; fstring pipe_name; + + /** Tells how many callers inside this process are using the + * lock on connections to this server. When 0, the + * system-wide mutex in the tdb is released. **/ size_t mutex_ref_count; + struct cli_state *cli; POLICY_HND pol; }; @@ -163,10 +184,16 @@ static void add_failed_connection_entry( -/* Open a connction to the remote server, cache failures for 30 seconds */ - +/** + * Open a connection to the remote server, cache failures for 30 seconds + * + * @param keep_mutex If true, a reservation on the server mutex is + * still held on successful return, so that the caller can use it and + * release it later. + **/ static NTSTATUS cm_open_connection(const char *domain, const int pipe_index, - struct winbindd_cm_conn *new_conn, BOOL keep_mutex) + struct winbindd_cm_conn *new_conn, + BOOL keep_mutex) { struct failed_connection_cache *fcc; NTSTATUS result; @@ -228,13 +255,15 @@ static NTSTATUS cm_open_connection(const DEBUG(5, ("connecting to %s from %s with username [%s]\\[%s]\n", new_conn->controller, global_myname_unix(), ipc_domain, ipc_username)); + if (!secrets_named_mutex(new_conn->controller, WINBIND_SERVER_MUTEX_WAIT_TIME, + &new_conn->mutex_ref_count)) { + DEBUG(0,("cm_open_connection: mutex grab failed for %s\n", + new_conn->controller)); + /* continue anyway; note that the mutex may not actually be + * held during the rest of this function. */ + } + for (i = 0; retry && (i < NUM_CLI_AUTH_CONNECT_RETRIES); i++) { - - if (!secrets_named_mutex(new_conn->controller, WINBIND_SERVER_MUTEX_WAIT_TIME, &new_conn->mutex_ref_count)) { - DEBUG(0,("cm_open_connection: mutex grab failed for %s\n", new_conn->controller)); - continue; - } - result = cli_full_connection(&new_conn->cli, global_myname_unix(), new_conn->controller, &dc_ip, 0, CLI_AUTH_TIMEOUT, "IPC$", "IPC", ipc_username, ipc_domain, @@ -249,7 +278,8 @@ static NTSTATUS cm_open_connection(const SAFE_FREE(ipc_password); if (!NT_STATUS_IS_OK(result)) { - secrets_named_mutex_release(new_conn->controller, &new_conn->mutex_ref_count); + if (new_conn->mutex_ref_count > 0) + secrets_named_mutex_release(new_conn->controller, +&new_conn->mutex_ref_count); add_failed_connection_entry(new_conn, result); return result; } @@ -264,15 +294,19 @@ static NTSTATUS cm_open_connection(const * if the PDC is an NT4 box. but since there is only one 2k * specific UUID right now, i'm not going to bother. --jerry */ - secrets_named_mutex_release(new_conn->controller, &new_conn->mutex_ref_count); + if (new_conn->mutex_ref_count > 0) + secrets_named_mutex_release(new_conn->controller, +&new_conn->mutex_ref_count); if ( !is_win2k_pipe(pipe_index) ) add_failed_connection_entry(new_conn, result); cli_shutdown(new_conn->cli); return result; } - if (!keep_mutex) + if (!keep_mutex && new_conn->mutex_ref_count > 0) { + /* Need to release it if we hold it */ secrets_named_mutex_release(new_conn->controller, &new_conn->mutex_ref_count); + } + return NT_STATUS_OK; } @@ -308,8 +342,15 @@ static BOOL connection_ok(struct winbind return True; } -/* Get a connection to the remote DC and open the pipe. If there is already a connection, use that */ - +/** + * Get a connection to the remote DC and open the pipe. If there is + * already a connection, use that. + * + * @param keep_mutex If true, we attempt to hold a reservation on the server + * mutex on successful return, so that the caller can use it and release it + * later. The caller needs to check the reference count to see whether it's + * actually held or not. + **/ static NTSTATUS get_connection_from_cache(const char *domain, const char *pipe_name, struct winbindd_cm_conn **conn_out, BOOL keep_mutex) { struct winbindd_cm_conn *conn, conn_temp; -- Martin