Hi Praveen,

What I understand about deadlock as below:

When other API and saNtfFinalize() called in parallel, both take the
ntfhandle by ncshm_take_hdl(), the cell->use_ct increase 
(and > 1). The saNtfFinalize() call ncshm_destroy_hdl() to destroy
ntfhandle, but ncshm_destroy_hdl() will check cell->use_ct greater than
1 or not. If it greater than one, the thread will be blocked by creating a
semaphore, and wait until cell->use_ct is decrease and equal 1. 
This mean that waiting other thread give handle by ncshm_give_hdl().

If both another API and saNtfFinalize() use locking on ntfa_cb.cb_lock and
the locking in saNtfFinalize() cover ncshm_destroy_hdl(),
the deadlock may be happen. Because the thread run saNtfFinalize() will stop
in ncshm_destroy_hdl() to wait for other thread given handle, and don't
unlock on ntfa_cb.cb_lock while another API in other thread need to lock on
ntfa_cb.cb_lock and it cannot. This makes the deadlock. 

But if the locking in in saNtfFinalize() does not cover ncshm_destroy_hdl(),
the deadlock won't happen. Because in locking in saNtfFinalize(),
it does not need to wait for any resources.

The fixing of issue need to remove item in the list. Other API use the same
list to do removing or adding action. So we need to use mutex to protect
here.

Do I misunderstand or be wrong here?

Thanks,
Canh.

-----Original Message-----
From: praveen malviya [mailto:praveen.malv...@oracle.com] 
Sent: Wednesday, October 05, 2016 6:42 PM
To: Canh Van Truong
Cc: minh.c...@dektech.com.au; opensaf-devel@lists.sourceforge.net
Subject: Re: [PATCH 1 of 1] ntf: fix ntfa does not remove subscriber in
subscriberNoList at finalize [#1978]

Hi Canh,

Please find one comment inline with [Praveen].

Thanks,
Praveen

On 28-Sep-16 9:05 AM, Canh Van Truong wrote:
>  osaf/libs/agents/saf/ntfa/ntfa_api.c         |   2 +
>  osaf/libs/agents/saf/ntfa/ntfa_util.c        |  68
+++++++++++++++++++--------
>  tests/ntfsv/tet_saNtfNotificationSubscribe.c |  32 +++++++++++++
>  3 files changed, 81 insertions(+), 21 deletions(-)
>
>
> In finalize(), ntfa deletes client and does not remove the subscriber 
> that relate to this client from subscriber list. The subscriber with 
> subscriptionId keep in list, then we cannot subscribe with this
subscriptionId.
>
> The patch does:
>       - Deleting all subscribers that relate to finalized client.
>       - Checking pointer before using in subscriberListItemRemove()
>
> diff --git a/osaf/libs/agents/saf/ntfa/ntfa_api.c 
> b/osaf/libs/agents/saf/ntfa/ntfa_api.c
> --- a/osaf/libs/agents/saf/ntfa/ntfa_api.c
> +++ b/osaf/libs/agents/saf/ntfa/ntfa_api.c
> @@ -2059,6 +2059,7 @@ static void subscriberListItemRemove(SaN
>       ntfa_subscriber_list_t *listPtr = NULL;
>       osafassert(pthread_mutex_lock(&ntfa_cb.cb_lock) == 0);
>       listPtr = listItemGet(subscriptionId);
> +     if (listPtr == NULL) goto done;
>       if (listPtr->next != NULL) {
>               listPtr->next->prev = listPtr->prev;
>       }
> @@ -2074,6 +2075,7 @@ static void subscriberListItemRemove(SaN
>       TRACE_1("REMOVE: listPtr->SubscriptionId %d",
listPtr->subscriberListSubscriptionId);
>       ntfa_del_ntf_filter_ptrs(&listPtr->filters);
>       free(listPtr);
> +done:
>       osafassert(pthread_mutex_unlock(&ntfa_cb.cb_lock) == 0);  }
>
> diff --git a/osaf/libs/agents/saf/ntfa/ntfa_util.c 
> b/osaf/libs/agents/saf/ntfa/ntfa_util.c
> --- a/osaf/libs/agents/saf/ntfa/ntfa_util.c
> +++ b/osaf/libs/agents/saf/ntfa/ntfa_util.c
> @@ -743,6 +743,46 @@ void ntfa_subscriber_list_del()
>       }
>       subscriberNoList = NULL;
>  }
> +
>
+/**************************************************************************
**
> +  Name          : subscriber_removed_by_handle
> +
> +  Description   : This routine delete subscribers of this client out of
the
> +               subcriberNoList
> +
> +  Arguments     : SaNtfHandleT ntfHandle
> +
> +  Return Values : None
> +
> +  Notes         :
> +*********************************************************************
> +*********/ void ntfa_subscriber_del_by_handle(SaNtfHandleT ntfHandle) 
> +{
> +     TRACE_ENTER();
> +     ntfa_subscriber_list_t* subscriber_hdl = subscriberNoList;
> +     while (subscriber_hdl != NULL) {
> +             ntfa_subscriber_list_t *rm_subscriber = subscriber_hdl;
> +             subscriber_hdl = subscriber_hdl->next;
> +             if (ntfHandle == rm_subscriber->subscriberListNtfHandle) {
> +                     if (rm_subscriber->next != NULL) {
> +                             rm_subscriber->next->prev =
rm_subscriber->prev;
> +                     }
> +
> +                     if (rm_subscriber->prev != NULL) {
> +                             rm_subscriber->prev->next =
rm_subscriber->next;
> +                     } else {
> +                             if (rm_subscriber->next != NULL)
> +                                     subscriberNoList =
rm_subscriber->next;
> +                             else
> +                                     subscriberNoList = NULL;
> +                     }
> +                     ntfa_del_ntf_filter_ptrs(&rm_subscriber->filters);
> +                     free(rm_subscriber);
> +             }
> +     }
> +     TRACE_LEAVE();
> +}
> +
>
/***************************************************************************
*
>    Name          : ntfa_hdl_list_del
>
> @@ -910,27 +950,7 @@ void ntfa_hdl_rec_force_del(ntfa_client_
>               ntfa_msg_destroy(cbk_msg);
>       }
>       /* delete subscriber of this client out of the subcriberNoList*/
> -     ntfa_subscriber_list_t* subscriber_hdl = subscriberNoList;
> -     while (subscriber_hdl != NULL) {
> -             ntfa_subscriber_list_t *rm_subscriber = subscriber_hdl;
> -             subscriber_hdl = subscriber_hdl->next;
> -             if (rm_node->local_hdl ==
rm_subscriber->subscriberListNtfHandle) {
> -                     if (rm_subscriber->next != NULL) {
> -                             rm_subscriber->next->prev =
rm_subscriber->prev;
> -                     }
> -
> -                     if (rm_subscriber->prev != NULL) {
> -                             rm_subscriber->prev->next =
rm_subscriber->next;
> -                     } else {
> -                             if (rm_subscriber->next != NULL)
> -                                     subscriberNoList =
rm_subscriber->next;
> -                             else
> -                                     subscriberNoList = NULL;
> -                     }
> -                     ntfa_del_ntf_filter_ptrs(&rm_subscriber->filters);
> -                     free(rm_subscriber);
> -             }
> -     }
> +     ntfa_subscriber_del_by_handle(rm_node->local_hdl);
>       /* Now delete client */
>       m_NCS_IPC_DETACH(&rm_node->mbx, ntfa_clear_mbx, NULL);
>       m_NCS_IPC_RELEASE(&rm_node->mbx, NULL); @@ -946,6 +966,7 @@ void 
> ntfa_hdl_rec_force_del(ntfa_client_
>
>       TRACE_LEAVE();
>  }
> +
>
/***************************************************************************
*
>    Name          : ntfa_hdl_rec_del
>
> @@ -970,6 +991,11 @@ uint32_t ntfa_hdl_rec_del(ntfa_client_hd
>       TRACE_ENTER();
>  /* TODO: free all resources allocated by the client */
>
> +     /* Remove subscribers of this client if there are any in
subcriberNoList */
> +     pthread_mutex_lock(&ntfa_cb.cb_lock);
> +     ntfa_subscriber_del_by_handle(rm_node->local_hdl);
> +     pthread_mutex_unlock(&ntfa_cb.cb_lock);
> +
[Praveen] These is a note function header of saNtfFinalize() API which
states that taking lock in this API needs to be avoided as it causes
deadlock situaion. This function ntfa_hdl_rec_del() is called from the
saNtfFinalize() API.
Can't we fix this issue by avoiding mutex?


>       /* If the to be removed record is the first record */
>       if (list_iter == rm_node) {
>               *list_head = rm_node->next;
> diff --git a/tests/ntfsv/tet_saNtfNotificationSubscribe.c 
> b/tests/ntfsv/tet_saNtfNotificationSubscribe.c
> --- a/tests/ntfsv/tet_saNtfNotificationSubscribe.c
> +++ b/tests/ntfsv/tet_saNtfNotificationSubscribe.c
> @@ -157,6 +157,37 @@ void saNtfNotificationSubscribe_04(void)
>      test_validate(rc, SA_AIS_ERR_EXIST);  }
>
> +SaAisErrorT saNtfSubscribe(SaNtfSubscriptionIdT subscriptionId) {
> +    SaAisErrorT ret;
> +    SaNtfObjectCreateDeleteNotificationFilterT obcf;
> +    SaNtfNotificationTypeFilterHandlesT FilterHandles;
> +    memset(&FilterHandles, 0, sizeof(FilterHandles));
> +
> +    ret = saNtfObjectCreateDeleteNotificationFilterAllocate(ntfHandle,
&obcf,0,0,0,1,0);
> +    obcf.notificationFilterHeader.notificationClassIds->vendorId =
SA_NTF_VENDOR_ID_SAF;
> +    obcf.notificationFilterHeader.notificationClassIds->majorId = 222;
> +    obcf.notificationFilterHeader.notificationClassIds->minorId = 222;
> +    FilterHandles.objectCreateDeleteFilterHandle =
obcf.notificationFilterHandle;
> +    ret = saNtfNotificationSubscribe(&FilterHandles, subscriptionId);
> +    return ret;
> +}
> +
> +void saNtfNotificationSubscribe_05(void)
> +{
> +    SaNtfHandleT ntfHandle1 = 0;
> +    safassert(saNtfInitialize(&ntfHandle1, &ntfCallbacks, &ntfVersion),
SA_AIS_OK);
> +    safassert(saNtfInitialize(&ntfHandle, &ntfCallbacks, &ntfVersion),
SA_AIS_OK);
> +    safassert(saNtfSubscribe(666), SA_AIS_OK);
> +    safassert(saNtfFinalize(ntfHandle), SA_AIS_OK);
> +
> +    safassert(saNtfInitialize(&ntfHandle, &ntfCallbacks, &ntfVersion),
SA_AIS_OK);
> +    rc = saNtfSubscribe(666);
> +    safassert(saNtfFinalize(ntfHandle), SA_AIS_OK);
> +    safassert(saNtfFinalize(ntfHandle1), SA_AIS_OK);
> +    test_validate(rc, SA_AIS_OK);
> +}
> +
>  __attribute__ ((constructor)) static void 
> saNtfNotificationSubscribe_constructor(void)
>  {
>      test_suite_add(10, "Consumer operations - subscribe"); @@ -164,6 
> +195,7 @@ void saNtfNotificationSubscribe_04(void)
>      test_case_add(10, saNtfNotificationSubscribe_02,
"saNtfNotificationSubscribe null handle SA_AIS_ERR_INVALID_PARAM");
>      test_case_add(10, saNtfNotificationSubscribe_03,
"saNtfNotificationSubscribe All filter handles null
SA_AIS_ERR_INVALID_PARAM");
>      test_case_add(10, saNtfNotificationSubscribe_04, 
> "saNtfNotificationSubscribe subscriptionId exist SA_AIS_ERR_EXIST");
> +    test_case_add(10, saNtfNotificationSubscribe_05, 
> + "saNtfNotificationSubscribe subscriptionId if old handle has 
> + subscriptionId had been finalized SA_AIS_OK");
>  }
>
>
>


------------------------------------------------------------------------------
Check out the vibrant tech community on one of the world's most 
engaging tech sites, SlashDot.org! http://sdm.link/slashdot
_______________________________________________
Opensaf-devel mailing list
Opensaf-devel@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/opensaf-devel

Reply via email to