If an application exits while asynchronous accept processing is queued, it's possible for the async processing to access the IbCmId after it has been freed. A similar problem to this was fixed that dealt with accessing the verbs QP handle.
A simpler, more generic solution to this problem is to handle application exit in the same manner as device removal, and lock the winverb provider lookup lists with exclusive access. Asynchronous operations that are in process will run to completion, and future operations will be blocked until the provider cleanup thread has completed. Once they run, they will fail to acquire a reference on the desired object, which should result in a graceful failure. This avoids more complicated locking to use handles belonging to the lower level code. If a reference on an object can be acquired, the handle will be available for use until the reference is released. To handle IB CM callbacks, additional state checking is required to avoid processing CM events when we're trying to destroy the endpoint. Signed-off-by: Sean Hefty <[email protected]> --- This should be added to WinOF 2.1 Index: core/winverbs/kernel/wv_ep.c =================================================================== --- core/winverbs/kernel/wv_ep.c (revision 2406) +++ core/winverbs/kernel/wv_ep.c (working copy) @@ -57,7 +57,7 @@ KeAcquireGuardedMutex(&pProvider->Lock); WvProviderDisableRemove(pProvider); ep = IndexListAt(&pProvider->EpIndex, (SIZE_T) Id); - if (ep != NULL) { + if (ep != NULL && ep->State != WvEpDestroying) { WvEpGet(ep); } else { ep = NULL; @@ -185,14 +185,14 @@ pEndpoint->State = WvEpDestroying; WdfObjectReleaseLock(pEndpoint->Queue); - if (pEndpoint->pIbCmId != NULL) { - IbCmInterface.CM.destroy_id(pEndpoint->pIbCmId); - } - if (InterlockedDecrement(&pEndpoint->Ref) > 0) { KeWaitForSingleObject(&pEndpoint->Event, Executive, KernelMode, FALSE, NULL); } + if (pEndpoint->pIbCmId != NULL) { + IbCmInterface.CM.destroy_id(pEndpoint->pIbCmId); + } + WdfIoQueuePurgeSynchronously(pEndpoint->Queue); WdfObjectDelete(pEndpoint->Queue); ExFreePoolWithTag(pEndpoint, 'pevw'); @@ -373,10 +373,6 @@ pEndpoint->Attributes.Param.Connect.DataLength = len; } -/* - * The QP transition to error may be done from an async worker thread. - * Synchronize against application exit. - */ static NTSTATUS WvEpModifyQpErr(WV_QUEUE_PAIR *pQp, UINT8 *pVerbsData, UINT32 VerbsSize) { @@ -384,12 +380,6 @@ ib_api_status_t ib_status; NTSTATUS status; - KeAcquireGuardedMutex(&pQp->Lock); - if (pQp->hVerbsQp == NULL) { - status = STATUS_NOT_FOUND; - goto out; - } - attr.req_state = IB_QPS_ERROR; ib_status = pQp->pVerbs->ndi_modify_qp(pQp->hVerbsQp, &attr, NULL, VerbsSize, pVerbsData); @@ -399,8 +389,6 @@ status = STATUS_UNSUCCESSFUL; } -out: - KeReleaseGuardedMutex(&pQp->Lock); return status; } @@ -505,10 +493,19 @@ ep = pId->context; switch (pEvent->type) { case iba_cm_req_error: + WdfObjectAcquireLock(ep->Queue); + if (ep->State == WvEpActiveConnect) { + ep->State = WvEpDisconnected; + WvCompleteRequests(ep->Queue, STATUS_IO_TIMEOUT); + } + WdfObjectReleaseLock(ep->Queue); + break; case iba_cm_rep_error: WdfObjectAcquireLock(ep->Queue); - ep->State = WvEpDisconnected; - WvCompleteRequests(ep->Queue, STATUS_IO_TIMEOUT); + if (ep->State == WvEpPassiveConnect) { + ep->State = WvEpDisconnected; + WvCompleteRequests(ep->Queue, STATUS_IO_TIMEOUT); + } WdfObjectReleaseLock(ep->Queue); break; case iba_cm_dreq_error: @@ -557,8 +554,10 @@ break; default: WdfObjectAcquireLock(ep->Queue); - ep->State = WvEpDisconnected; - WvCompleteRequests(ep->Queue, STATUS_NOT_IMPLEMENTED); + if (ep->State != WvEpDestroying) { + ep->State = WvEpDisconnected; + WvCompleteRequests(ep->Queue, STATUS_NOT_IMPLEMENTED); + } WdfObjectReleaseLock(ep->Queue); break; } Index: core/winverbs/kernel/wv_provider.c =================================================================== --- core/winverbs/kernel/wv_provider.c (revision 2388) +++ core/winverbs/kernel/wv_provider.c (working copy) @@ -53,6 +53,38 @@ } } +// See comment above WvProviderRemoveHandler. +static void WvProviderLockRemove(WV_PROVIDER *pProvider) +{ + KeAcquireGuardedMutex(&pProvider->Lock); + pProvider->Exclusive++; + KeClearEvent(&pProvider->SharedEvent); + while (pProvider->Active > 0) { + KeReleaseGuardedMutex(&pProvider->Lock); +DbgPrint("WvProviderLockRemove - someone is active - waiting\n"); + KeWaitForSingleObject(&pProvider->ExclusiveEvent, Executive, KernelMode, + FALSE, NULL); +DbgPrint("WvProviderLockRemove - signaled - continuing\n"); + KeAcquireGuardedMutex(&pProvider->Lock); + } + pProvider->Active++; + KeReleaseGuardedMutex(&pProvider->Lock); +} + +// See comment above WvProviderRemoveHandler. +static void WvProviderUnlockRemove(WV_PROVIDER *pProvider) +{ + KeAcquireGuardedMutex(&pProvider->Lock); + pProvider->Exclusive--; + pProvider->Active--; + if (pProvider->Exclusive > 0) { + KeSetEvent(&pProvider->ExclusiveEvent, 0, FALSE); + } else if (pProvider->Pending > 0) { + KeSetEvent(&pProvider->SharedEvent, 0, FALSE); + } + KeReleaseGuardedMutex(&pProvider->Lock); +} + NTSTATUS WvProviderInit(WDFDEVICE Device, WV_PROVIDER *pProvider) { NTSTATUS status; @@ -83,6 +115,15 @@ return STATUS_SUCCESS; } +/* + * We could be processing asynchronous requests or have them queued + * when cleaning up. To synchronize with async request processing, + * acquire the provider lock with exclusive access until we're done + * destroying all resoureces. This will allow active requests to + * complete their processing, but prevent queued requests from + * running until cleanup is done. At that point, queue requests will + * be unable to acquire any winverbs resources. + */ void WvProviderCleanup(WV_PROVIDER *pProvider) { WV_DEVICE *dev; @@ -95,6 +136,7 @@ WV_ENDPOINT *ep; SIZE_T i; + WvProviderLockRemove(pProvider); IndexListForEach(&pProvider->EpIndex, i) { ep = (WV_ENDPOINT *) IndexListAt(&pProvider->EpIndex, i); if (ep->State == WvEpListening) { @@ -131,6 +173,7 @@ while ((dev = IndexListRemoveHead(&pProvider->DevIndex)) != NULL) { WvDeviceFree(dev); } + WvProviderUnlockRemove(pProvider); if (InterlockedDecrement(&pProvider->Ref) > 0) { KeWaitForSingleObject(&pProvider->Event, Executive, KernelMode, FALSE, NULL); @@ -147,36 +190,6 @@ WorkQueueDestroy(&pProvider->WorkQueue); } -// See comment above WvProviderRemoveHandler. -static void WvProviderLockRemove(WV_PROVIDER *pProvider) -{ - KeAcquireGuardedMutex(&pProvider->Lock); - pProvider->Exclusive++; - KeClearEvent(&pProvider->SharedEvent); - while (pProvider->Active > 0) { - KeReleaseGuardedMutex(&pProvider->Lock); - KeWaitForSingleObject(&pProvider->ExclusiveEvent, Executive, KernelMode, - FALSE, NULL); - KeAcquireGuardedMutex(&pProvider->Lock); - } - pProvider->Active++; - KeReleaseGuardedMutex(&pProvider->Lock); -} - -// See comment above WvProviderRemoveHandler. -static void WvProviderUnlockRemove(WV_PROVIDER *pProvider) -{ - KeAcquireGuardedMutex(&pProvider->Lock); - pProvider->Exclusive--; - pProvider->Active--; - if (pProvider->Exclusive > 0) { - KeSetEvent(&pProvider->ExclusiveEvent, 0, FALSE); - } else if (pProvider->Pending > 0) { - KeSetEvent(&pProvider->SharedEvent, 0, FALSE); - } - KeReleaseGuardedMutex(&pProvider->Lock); -} - /* * Must hold pProvider->Lock. Function may release and re-acquire. * See comment above WvProviderRemoveHandler. Index: core/winverbs/kernel/wv_qp.c =================================================================== --- core/winverbs/kernel/wv_qp.c (revision 2406) +++ core/winverbs/kernel/wv_qp.c (working copy) @@ -519,21 +519,11 @@ WdfRequestComplete(Request, status); } -/* - * Synchronize freeing the QP due to application exit with asynchronous - * disconnect processing transitioning the QP into the error state. - */ void WvQpFree(WV_QUEUE_PAIR *pQp) { WV_MULTICAST *mc; LIST_ENTRY *entry; - ib_qp_handle_t hqp; - KeAcquireGuardedMutex(&pQp->Lock); - hqp = pQp->hVerbsQp; - pQp->hVerbsQp = NULL; - KeReleaseGuardedMutex(&pQp->Lock); - while ((entry = RemoveHeadList(&pQp->McList)) != &pQp->McList) { mc = CONTAINING_RECORD(entry, WV_MULTICAST, Entry); if (mc->hVerbsMc != NULL) { @@ -546,8 +536,8 @@ KeWaitForSingleObject(&pQp->Event, Executive, KernelMode, FALSE, NULL); } - if (hqp != NULL) { - pQp->pVerbs->destroy_qp(hqp, 0); + if (pQp->hVerbsQp != NULL) { + pQp->pVerbs->destroy_qp(pQp->hVerbsQp, 0); } if (pQp->pSrq != NULL) {
wv-async-free-race.diff
Description: Binary data
_______________________________________________ ofw mailing list [email protected] http://lists.openfabrics.org/cgi-bin/mailman/listinfo/ofw
