On Tue, Apr 24, 2012 at 03:52:00PM +0200, Petr Spacek wrote:
> On 04/24/2012 03:21 PM, Petr Spacek wrote:
> >Hello,
> >
> >this patch adds deadlock detection (based on simple timeout) to current code.
> >If (probable) deadlock is detected, current action is stopped with proper 
> >error.
> >
> >It properly detects "Simo's deadlock" with 'connections' parameter == 1.
> >(Described in https://fedorahosted.org/bind-dyndb-ldap/ticket/66)
> >
> >Deadlock itself will be fixed by separate patch.
> >
> >Petr^2 Spacek
> 
> Self-NACK :-)
> 
> Second version of the patch is attached. ldap_pool_getconnection()
> and ldap_pool_putconnection() now has same interface and more
> consistent behaviour.
> 
> Overall functionality is same.

Hi,

although I'm not fully happy with current design of the detection logic, we can
include it before we create something better, for example based on thread IDs
(one thread can acquire semaphore only one time).

Please check my comments inside the patch.

Regards, Adam

> From ed7596a9410269c0c29418247971f262b7fa77f3 Mon Sep 17 00:00:00 2001
> From: Petr Spacek <pspa...@redhat.com>
> Date: Tue, 24 Apr 2012 15:09:32 +0200
> Subject: [PATCH] Add simple semaphore deadlock detection logic.
>  Signed-off-by: Petr Spacek <pspa...@redhat.com>
> 
> ---
>  src/ldap_helper.c |   78 
> ++++++++++++++++++++++++++++++++---------------------
>  src/semaphore.c   |   26 +++++++++++++++---
>  src/semaphore.h   |    6 +++-
>  3 files changed, 74 insertions(+), 36 deletions(-)
> 
> diff --git a/src/ldap_helper.c b/src/ldap_helper.c
> index 47c0559..f4c4d02 100644
> --- a/src/ldap_helper.c
> +++ b/src/ldap_helper.c
> @@ -296,9 +296,10 @@ ldap_delete_zone2(ldap_instance_t *inst, dns_name_t 
> *name, isc_boolean_t lock);
>  static isc_result_t ldap_pool_create(isc_mem_t *mctx, unsigned int 
> connections,
>               ldap_pool_t **poolp);
>  static void ldap_pool_destroy(ldap_pool_t **poolp);
> -static ldap_connection_t * ldap_pool_getconnection(ldap_pool_t *pool);
> +static isc_result_t ldap_pool_getconnection(ldap_pool_t *pool,
> +             ldap_connection_t ** conn);
>  static void ldap_pool_putconnection(ldap_pool_t *pool,
> -             ldap_connection_t *ldap_conn);
> +             ldap_connection_t ** conn);
>  static isc_result_t ldap_pool_connect(ldap_pool_t *pool,
>               ldap_instance_t *ldap_inst);
>  
> @@ -402,6 +403,10 @@ new_ldap_instance(isc_mem_t *mctx, const char *db_name,
>       ldap_settings[i++].target = &ldap_inst->dyn_update;
>       CHECK(set_settings(ldap_settings, argv));
>  
> +     /* Set timer for deadlock detection inside semaphore_wait_timed . */
> +     if (semaphore_wait_timeout.seconds < 
> ldap_inst->timeout+SEM_WAIT_TIMEOUT_ADD)
> +             semaphore_wait_timeout.seconds = 
> ldap_inst->timeout+SEM_WAIT_TIMEOUT_ADD;

I'm affraid such low timeout (12 sec by default) will cause too many false
positives. I recommend to set semaphore timeout to at least 60 seconds.

>       /* Validate and check settings. */
>       str_toupper(ldap_inst->sasl_mech);
>       if (ldap_inst->connections < 1) {
> @@ -1089,7 +1094,7 @@ isc_result_t
>  refresh_zones_from_ldap(ldap_instance_t *ldap_inst)
>  {
>       isc_result_t result = ISC_R_SUCCESS;
> -     ldap_connection_t *ldap_conn;
> +     ldap_connection_t *ldap_conn = NULL;
>       int zone_count = 0;
>       ldap_entry_t *entry;
>       dns_rbt_t *rbt = NULL;
> @@ -1114,7 +1119,7 @@ refresh_zones_from_ldap(ldap_instance_t *ldap_inst)
>  
>       log_debug(2, "refreshing list of zones for %s", ldap_inst->db_name);
>  
> -     ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
> +     CHECK(ldap_pool_getconnection(ldap_inst->pool, &ldap_conn));
>  
>       CHECK(ldap_query(ldap_inst, ldap_conn, str_buf(ldap_inst->base),
>                        LDAP_SCOPE_SUBTREE, config_attrs, 0,
> @@ -1227,7 +1232,7 @@ cleanup:
>       if (invalidate_nodechain)
>               dns_rbtnodechain_invalidate(&chain);
>  
> -     ldap_pool_putconnection(ldap_inst->pool, ldap_conn);
> +     ldap_pool_putconnection(ldap_inst->pool, &ldap_conn);
>  
>       log_debug(2, "finished refreshing list of zones");
>  
> @@ -1381,7 +1386,7 @@ ldapdb_nodelist_get(isc_mem_t *mctx, ldap_instance_t 
> *ldap_inst, dns_name_t *nam
>                    dns_name_t *origin, ldapdb_nodelist_t *nodelist)
>  {
>       isc_result_t result;
> -     ldap_connection_t *ldap_conn;
> +     ldap_connection_t *ldap_conn = NULL;
>       ldap_entry_t *entry;
>       ld_string_t *string = NULL;
>       ldapdb_node_t *node;
> @@ -1392,7 +1397,7 @@ ldapdb_nodelist_get(isc_mem_t *mctx, ldap_instance_t 
> *ldap_inst, dns_name_t *nam
>       REQUIRE(nodelist != NULL);
>  
>       /* RRs aren't in the cache, perform ordinary LDAP query */
> -     ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
> +     CHECK(ldap_pool_getconnection(ldap_inst->pool, &ldap_conn));
>  
>       INIT_LIST(*nodelist);
>       CHECK(str_new(mctx, &string));
> @@ -1439,7 +1444,7 @@ ldapdb_nodelist_get(isc_mem_t *mctx, ldap_instance_t 
> *ldap_inst, dns_name_t *nam
>       result = ISC_R_SUCCESS;
>  
>  cleanup:
> -     ldap_pool_putconnection(ldap_inst->pool, ldap_conn);
> +     ldap_pool_putconnection(ldap_inst->pool, &ldap_conn);
>       str_destroy(&string);
>  
>       return result;
> @@ -1450,7 +1455,7 @@ ldapdb_rdatalist_get(isc_mem_t *mctx, ldap_instance_t 
> *ldap_inst, dns_name_t *na
>                    dns_name_t *origin, ldapdb_rdatalist_t *rdatalist)
>  {
>       isc_result_t result;
> -     ldap_connection_t *ldap_conn;
> +     ldap_connection_t *ldap_conn = NULL;
>       ldap_entry_t *entry;
>       ld_string_t *string = NULL;
>       ldap_cache_t *cache;
> @@ -1468,12 +1473,11 @@ ldapdb_rdatalist_get(isc_mem_t *mctx, ldap_instance_t 
> *ldap_inst, dns_name_t *na
>               return result;
>  
>       /* RRs aren't in the cache, perform ordinary LDAP query */
> -     ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
> -
>       INIT_LIST(*rdatalist);
>       CHECK(str_new(mctx, &string));
>       CHECK(dnsname_to_dn(ldap_inst->zone_register, name, string));
>  
> +     CHECK(ldap_pool_getconnection(ldap_inst->pool, &ldap_conn));
>       CHECK(ldap_query(ldap_inst, ldap_conn, str_buf(string),
>                        LDAP_SCOPE_BASE, NULL, 0, "(objectClass=idnsRecord)"));
>  
> @@ -1500,7 +1504,7 @@ ldapdb_rdatalist_get(isc_mem_t *mctx, ldap_instance_t 
> *ldap_inst, dns_name_t *na
>               result = ISC_R_NOTFOUND;
>  
>  cleanup:
> -     ldap_pool_putconnection(ldap_inst->pool, ldap_conn);
> +     ldap_pool_putconnection(ldap_inst->pool, &ldap_conn);
>       str_destroy(&string);
>  
>       if (result != ISC_R_SUCCESS)
> @@ -2250,7 +2254,7 @@ modify_ldap_common(dns_name_t *owner, ldap_instance_t 
> *ldap_inst,
>               zone_dn += 1; /* skip whitespace */
>       }
>  
> -     ldap_conn = ldap_pool_getconnection(ldap_inst->pool);
> +     CHECK(ldap_pool_getconnection(ldap_inst->pool, &ldap_conn));
>       CHECK(ldap_query(ldap_inst, ldap_conn, zone_dn,
>                                        LDAP_SCOPE_BASE, attrs, 0,
>                                        
> "(&(objectClass=idnsZone)(idnsZoneActive=TRUE))"));
> @@ -2481,9 +2485,7 @@ modify_ldap_common(dns_name_t *owner, ldap_instance_t 
> *ldap_inst,
>       }
>       
>  cleanup:
> -     if (ldap_conn != NULL)
> -             ldap_pool_putconnection(ldap_inst->pool, ldap_conn);
> -
> +     ldap_pool_putconnection(ldap_inst->pool, &ldap_conn);
>       str_destroy(&owner_dn_ptr);
>       str_destroy(&owner_dn);
>       free_ldapmod(mctx, &change[0]);
> @@ -2565,15 +2567,18 @@ ldap_pool_destroy(ldap_pool_t **poolp)
>       MEM_PUT_AND_DETACH(pool);
>  }
>  
> -static ldap_connection_t *
> -ldap_pool_getconnection(ldap_pool_t *pool)
> +static isc_result_t
> +ldap_pool_getconnection(ldap_pool_t *pool, ldap_connection_t ** conn)
>  {
>       ldap_connection_t *ldap_conn = NULL;
>       unsigned int i;
> +     isc_result_t result;
>  
>       REQUIRE(pool != NULL);
> +     REQUIRE(conn != NULL && *conn == NULL);
> +     ldap_conn = *conn;
>  
> -     semaphore_wait(&pool->conn_semaphore);
> +     CHECK(semaphore_wait_timed(&pool->conn_semaphore));
>       for (i = 0; i < pool->connections; i++) {
>               ldap_conn = pool->conns[i];
>               if (isc_mutex_trylock(&ldap_conn->lock) == ISC_R_SUCCESS)
> @@ -2583,16 +2588,30 @@ ldap_pool_getconnection(ldap_pool_t *pool)
>       RUNTIME_CHECK(ldap_conn != NULL);
>  
>       INIT_LIST(ldap_conn->ldap_entries);
> +     *conn = ldap_conn;
>  
> -     return ldap_conn;
> +cleanup:
> +     if (result != ISC_R_SUCCESS) {
> +             log_error("timeout in ldap_pool_getconnection(): try to raise "
> +                             "'connections' parameter; potential deadlock?");
> +     }
> +     return result;
>  }
>  
>  static void
> -ldap_pool_putconnection(ldap_pool_t *pool, ldap_connection_t *ldap_conn)
> +ldap_pool_putconnection(ldap_pool_t *pool, ldap_connection_t **conn)
>  {
> +     REQUIRE(conn);

Although this is correct, please use REQUIRE(conn != NULL);

> +     ldap_connection_t *ldap_conn = *conn;
> +
> +     if (ldap_conn == NULL)
> +             return;
> +
>       ldap_query_free(ldap_conn);
>       UNLOCK(&ldap_conn->lock);
>       semaphore_signal(&pool->conn_semaphore);
> +
> +     *conn = NULL;
>  }
>  
>  static isc_result_t
> @@ -2719,7 +2738,7 @@ update_action(isc_task_t *task, isc_event_t *event)
>       ldap_psearchevent_t *pevent = (ldap_psearchevent_t *)event;
>       isc_result_t result ;
>       ldap_instance_t *inst = NULL;
> -     ldap_connection_t *conn;
> +     ldap_connection_t *conn = NULL;
>       ldap_entry_t *entry;
>       isc_boolean_t delete = ISC_TRUE;
>       isc_mem_t *mctx;
> @@ -2736,7 +2755,7 @@ update_action(isc_task_t *task, isc_event_t *event)
>       if (result != ISC_R_SUCCESS)
>               goto cleanup;
>  
> -     conn = ldap_pool_getconnection(inst->pool);
> +     CHECK(ldap_pool_getconnection(inst->pool, &conn));
>  
>       CHECK(ldap_query(inst, conn, pevent->dn,
>                        LDAP_SCOPE_BASE, attrs, 0,
> @@ -2754,14 +2773,13 @@ update_action(isc_task_t *task, isc_event_t *event)
>       if (delete)
>               CHECK(ldap_delete_zone(inst, pevent->dn, ISC_TRUE));
>  
> -        ldap_pool_putconnection(inst->pool, conn);
> -
>  cleanup:
>       if (result != ISC_R_SUCCESS)
>               log_error("update_action (psearch) failed for %s. "
>                         "Zones can be outdated, run `rndc reload`",
>                         pevent->dn);
>  
> +     ldap_pool_putconnection(inst->pool, &conn);
>       isc_mem_free(mctx, pevent->dbname);
>       isc_mem_free(mctx, pevent->dn);
>       isc_mem_detach(&mctx);
> @@ -2790,7 +2808,7 @@ update_config(isc_task_t *task, isc_event_t *event)
>       if (result != ISC_R_SUCCESS)
>               goto cleanup;
>  
> -     conn = ldap_pool_getconnection(inst->pool);
> +     CHECK(ldap_pool_getconnection(inst->pool, &conn));
>  
>       CHECK(ldap_query(inst, conn, pevent->dn,
>                        LDAP_SCOPE_BASE, attrs, 0,
> @@ -2809,14 +2827,12 @@ update_config(isc_task_t *task, isc_event_t *event)
>  
>  
>  cleanup:
> -     if (conn != NULL)
> -             ldap_pool_putconnection(inst->pool, conn);
> -
>       if (result != ISC_R_SUCCESS)
>               log_error("update_config (psearch) failed for %s. "
>                         "Configuration can be outdated, run `rndc reload`",
>                         pevent->dn);
>  
> +     ldap_pool_putconnection(inst->pool, &conn);
>       isc_mem_free(mctx, pevent->dbname);
>       isc_mem_free(mctx, pevent->dn);
>       isc_mem_detach(&mctx);
> @@ -3079,7 +3095,7 @@ ldap_psearch_watcher(isc_threadarg_t arg)
>       tv.tv_usec = 0;
>  
>       /* Pick connection, one is reserved purely for this thread */
> -     conn = ldap_pool_getconnection(inst->pool);
> +     CHECK(ldap_pool_getconnection(inst->pool, &conn));
>  
>       /* Try to connect. */
>       while (conn->handle == NULL) {
> @@ -3189,7 +3205,7 @@ soft_err:
>       log_debug(1, "Ending ldap_psearch_watcher");
>  
>  cleanup:
> -     ldap_pool_putconnection(inst->pool, conn);
> +     ldap_pool_putconnection(inst->pool, &conn);
>  
>       return (isc_threadresult_t)0;
>  }
> diff --git a/src/semaphore.c b/src/semaphore.c
> index 41d6a30..76e6aa2 100644
> --- a/src/semaphore.c
> +++ b/src/semaphore.c
> @@ -27,8 +27,19 @@
>  #include <isc/condition.h>
>  #include <isc/result.h>
>  #include <isc/util.h>
> +#include <isc/time.h>
>  
>  #include "semaphore.h"
> +#include "util.h"
> +
> +/*
> + * Timer setting for deadlock detection. Format: seconds, nanoseconds.
> + * These values will be overwriten during initialization
> + * from set_settings() with max(setting+SEM_WAIT_TIMEOUT_ADD, curr_value).
> + *
> + * Initial value can be useful in early phases of initialization.
> + */
> +isc_interval_t semaphore_wait_timeout = { 3, 0 };
>  
>  /*
>   * Initialize a semaphore.
> @@ -74,20 +85,27 @@ semaphore_destroy(semaphore_t *sem)
>  /*
>   * Wait on semaphore. This operation will try to acquire a lock on the
>   * semaphore. If the semaphore is already acquired as many times at it 
> allows,
> - * the function will block until someone releases the lock.
> + * the function will block until someone releases the lock OR timeout expire.
> + *
> + * @return ISC_R_SUCCESS or ISC_R_TIMEDOUT or other errors from ISC libs
>   */
> -void
> -semaphore_wait(semaphore_t *sem)
> +isc_result_t
> +semaphore_wait_timed(semaphore_t *sem)
>  {
> +     isc_result_t result;
> +     isc_time_t abs_timeout;
>       REQUIRE(sem != NULL);
>  
>       LOCK(&sem->mutex);
> +     CHECK(isc_time_nowplusinterval(&abs_timeout, &semaphore_wait_timeout));

The isc_time_nowplusinterval doesn't modify shared data, please call it before
LOCK().

>       while (sem->value <= 0)
> -             WAIT(&sem->cond, &sem->mutex);
> +             CHECK(WAITUNTIL(&sem->cond, &sem->mutex, &abs_timeout));
>       sem->value--;
>  
> +cleanup:
>       UNLOCK(&sem->mutex);
> +     return result;
>  }
>  
>  /*
> diff --git a/src/semaphore.h b/src/semaphore.h
> index 4ca4f65..5aef5d2 100644
> --- a/src/semaphore.h
> +++ b/src/semaphore.h
> @@ -24,6 +24,10 @@
>  #include <isc/condition.h>
>  #include <isc/mutex.h>
>  
> +/* How many seconds will be added to user-defined connection parameter 
> 'timeout'. */
> +#define SEM_WAIT_TIMEOUT_ADD 2 /* seconds */
> +extern isc_interval_t semaphore_wait_timeout;
> +
>  /*
>   * Semaphore can be "acquired" multiple times. However, it has a maximum
>   * number of times someone can acquire him. If a semaphore is already 
> acquired
> @@ -40,7 +44,7 @@ typedef struct semaphore    semaphore_t;
>  /* Public functions. */
>  isc_result_t semaphore_init(semaphore_t *sem, int value);
>  void         semaphore_destroy(semaphore_t *sem);
> -void         semaphore_wait(semaphore_t *sem);
> +isc_result_t semaphore_wait_timed(semaphore_t *sem);
>  void         semaphore_signal(semaphore_t *sem);
>  
>  #endif /* !_LD_SEMAPHORE_H_ */
> -- 
> 1.7.7.6
> 


-- 
Adam Tkac, Red Hat, Inc.

_______________________________________________
Freeipa-devel mailing list
Freeipa-devel@redhat.com
https://www.redhat.com/mailman/listinfo/freeipa-devel

Reply via email to