Print this page
NEX-4592 race condition allows FC initiator LUN offline hotplug handler deadlocks with FMA faulting leaf vdev between LDI and ZFS
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>

*** 3317,3327 **** */ int ldi_invoke_notify(dev_info_t *dip, dev_t dev, int spec_type, char *event, void *ev_data) { ! ldi_ev_callback_impl_t *lecp; list_t *listp; int ret; char *lec_event; ASSERT(dip); --- 3317,3328 ---- */ int ldi_invoke_notify(dev_info_t *dip, dev_t dev, int spec_type, char *event, void *ev_data) { ! ldi_ev_callback_impl_t *lecp, ! *saved_lecp; list_t *listp; int ret; char *lec_event; ASSERT(dip);
*** 3338,3347 **** --- 3339,3349 ---- ret = LDI_EV_NONE; ldi_ev_lock(); VERIFY(ldi_ev_callback_list.le_walker_next == NULL); + restart_loop: listp = &ldi_ev_callback_list.le_head; for (lecp = list_head(listp); lecp; lecp = ldi_ev_callback_list.le_walker_next) { ldi_ev_callback_list.le_walker_next = list_next(listp, lecp);
*** 3373,3408 **** LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): Not matching" " event {%s,%s}. skipping", event, lec_event)); continue; } ! lecp->lec_lhp->lh_flags |= LH_FLAGS_NOTIFY; ! if (lecp->lec_notify(lecp->lec_lhp, lecp->lec_cookie, ! lecp->lec_arg, ev_data) != LDI_EV_SUCCESS) { ret = LDI_EV_FAILURE; LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): notify" " FAILURE")); break; } /* We have a matching callback that allows the event to occur */ - ret = LDI_EV_SUCCESS; - LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): 1 consumer success")); } if (ret != LDI_EV_FAILURE) goto out; LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): undoing notify")); /* * Undo notifies already sent */ - lecp = list_prev(listp, lecp); VERIFY(ldi_ev_callback_list.le_walker_prev == NULL); ! for (; lecp; lecp = ldi_ev_callback_list.le_walker_prev) { ldi_ev_callback_list.le_walker_prev = list_prev(listp, lecp); /* * Check if matching device */ --- 3375,3438 ---- LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): Not matching" " event {%s,%s}. skipping", event, lec_event)); continue; } ! /* ! * To keep the sematics of only calling the notify callback ! * once ignore this entry if during a previous loop start this ! * bit was set. This bit will be cleared from all entries ! * at the end of this routine. ! */ ! if (lecp->lec_lhp->lh_flags & LH_FLAGS_NOTIFY_NOTIFY) ! continue; ! ! lecp->lec_lhp->lh_flags |= LH_FLAGS_NOTIFY | ! LH_FLAGS_NOTIFY_NOTIFY; ! ! /* ! * Need to drop the lock here before starting the notify ! * callback. A problem was found where the callback is ! * for ZFS which attempts to grab an internal lock of it's own. ! * Unfortunately there's another thread which starts with ! * ZFS that grabs the internal lock and then calls into the ! * LDI layer which attempts to grab this lock. Deadlock. ! * So, drop and reaquire later which means the loop must ! * be restarted since there's no telling what happened to ! * the linked list. ! */ ! ldi_ev_unlock(); ! ret = lecp->lec_notify(lecp->lec_lhp, lecp->lec_cookie, ! lecp->lec_arg, ev_data); ! ldi_ev_lock(); ! ! if (ret != LDI_EV_SUCCESS) { ret = LDI_EV_FAILURE; LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): notify" " FAILURE")); break; } /* We have a matching callback that allows the event to occur */ LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): 1 consumer success")); + goto restart_loop; } if (ret != LDI_EV_FAILURE) goto out; + saved_lecp = list_prev(listp, lecp); + LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): undoing notify")); /* * Undo notifies already sent */ VERIFY(ldi_ev_callback_list.le_walker_prev == NULL); ! restart_2nd_loop: ! for (lecp = saved_lecp; lecp; ! lecp = ldi_ev_callback_list.le_walker_prev) { ldi_ev_callback_list.le_walker_prev = list_prev(listp, lecp); /* * Check if matching device */
*** 3432,3443 **** --- 3462,3479 ---- continue; } LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): calling finalize")); + if (lecp->lec_lhp->lh_flags & LH_FLAGS_NOTIFY_FINALIZE) + continue; + lecp->lec_lhp->lh_flags |= LH_FLAGS_NOTIFY_FINALIZE; + + ldi_ev_unlock(); lecp->lec_finalize(lecp->lec_lhp, lecp->lec_cookie, LDI_EV_FAILURE, lecp->lec_arg, ev_data); + ldi_ev_lock(); /* * If LDI native event and LDI handle closed in context * of notify, NULL out the finalize callback as we have * already called the 1 finalize above allowed in this situation
*** 3447,3459 **** LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): NULL-ing finalize after " "calling 1 finalize following ldi_close")); lecp->lec_finalize = NULL; } } - out: ldi_ev_callback_list.le_walker_next = NULL; ldi_ev_callback_list.le_walker_prev = NULL; ldi_ev_unlock(); if (ret == LDI_EV_NONE) { --- 3483,3504 ---- LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): NULL-ing finalize after " "calling 1 finalize following ldi_close")); lecp->lec_finalize = NULL; } + goto restart_2nd_loop; } out: + /* + * Clear out the temporary flag for future runs. + */ + for (lecp = list_head(listp); lecp; lecp = list_next(listp, lecp)) { + if (lecp->lec_lhp == NULL) + continue; + lecp->lec_lhp->lh_flags &= ~(LH_FLAGS_NOTIFY_NOTIFY | + LH_FLAGS_NOTIFY_FINALIZE); + } ldi_ev_callback_list.le_walker_next = NULL; ldi_ev_callback_list.le_walker_prev = NULL; ldi_ev_unlock(); if (ret == LDI_EV_NONE) {