Print this page
NEX-4592 race condition allows FC initiator LUN offline hotplug handler deadlocks with FMA faulting leaf vdev between LDI and ZFS
Reviewed by: Rob Gittins <rob.gittins@nexenta.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/os/driver_lyr.c
          +++ new/usr/src/uts/common/os/driver_lyr.c
↓ open down ↓ 3311 lines elided ↑ open up ↑
3312 3312   *              LDI_EV_NONE    - No matching LDI callbacks
3313 3313   *
3314 3314   * This function is *not* to be called by layered drivers. It is for I/O
3315 3315   * framework code in Solaris, such as the I/O retire code and DR code
3316 3316   * to call while servicing a device event such as offline or degraded.
3317 3317   */
3318 3318  int
3319 3319  ldi_invoke_notify(dev_info_t *dip, dev_t dev, int spec_type, char *event,
3320 3320      void *ev_data)
3321 3321  {
3322      -        ldi_ev_callback_impl_t *lecp;
     3322 +        ldi_ev_callback_impl_t  *lecp,
     3323 +                                *saved_lecp;
3323 3324          list_t  *listp;
3324 3325          int     ret;
3325 3326          char    *lec_event;
3326 3327  
3327 3328          ASSERT(dip);
3328 3329          ASSERT(dev != DDI_DEV_T_NONE);
3329 3330          ASSERT(dev != NODEV);
3330 3331          ASSERT((dev == DDI_DEV_T_ANY && spec_type == 0) ||
3331 3332              (spec_type == S_IFCHR || spec_type == S_IFBLK));
3332 3333          ASSERT(event);
3333 3334          ASSERT(ldi_native_event(event));
3334 3335          ASSERT(ldi_ev_sync_event(event));
3335 3336  
3336 3337          LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): entered: dip=%p, ev=%s",
3337 3338              (void *)dip, event));
3338 3339  
3339 3340          ret = LDI_EV_NONE;
3340 3341          ldi_ev_lock();
3341 3342  
3342 3343          VERIFY(ldi_ev_callback_list.le_walker_next == NULL);
     3344 +restart_loop:
3343 3345          listp = &ldi_ev_callback_list.le_head;
3344 3346          for (lecp = list_head(listp); lecp; lecp =
3345 3347              ldi_ev_callback_list.le_walker_next) {
3346 3348                  ldi_ev_callback_list.le_walker_next = list_next(listp, lecp);
3347 3349  
3348 3350                  /* Check if matching device */
3349 3351                  if (!ldi_ev_device_match(lecp, dip, dev, spec_type))
3350 3352                          continue;
3351 3353  
3352 3354                  if (lecp->lec_lhp == NULL) {
↓ open down ↓ 15 lines elided ↑ open up ↑
3368 3370                  /*
3369 3371                   * Check if matching event
3370 3372                   */
3371 3373                  lec_event = ldi_ev_get_type(lecp->lec_cookie);
3372 3374                  if (strcmp(event, lec_event) != 0) {
3373 3375                          LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): Not matching"
3374 3376                              " event {%s,%s}. skipping", event, lec_event));
3375 3377                          continue;
3376 3378                  }
3377 3379  
3378      -                lecp->lec_lhp->lh_flags |= LH_FLAGS_NOTIFY;
3379      -                if (lecp->lec_notify(lecp->lec_lhp, lecp->lec_cookie,
3380      -                    lecp->lec_arg, ev_data) != LDI_EV_SUCCESS) {
     3380 +                /*
     3381 +                 * To keep the sematics of only calling the notify callback
     3382 +                 * once ignore this entry if during a previous loop start this
     3383 +                 * bit was set. This bit will be cleared from all entries
     3384 +                 * at the end of this routine.
     3385 +                 */
     3386 +                if (lecp->lec_lhp->lh_flags & LH_FLAGS_NOTIFY_NOTIFY)
     3387 +                        continue;
     3388 +
     3389 +                lecp->lec_lhp->lh_flags |= LH_FLAGS_NOTIFY |
     3390 +                    LH_FLAGS_NOTIFY_NOTIFY;
     3391 +
     3392 +                /*
     3393 +                 * Need to drop the lock here before starting the notify
     3394 +                 * callback. A problem was found where the callback is
     3395 +                 * for ZFS which attempts to grab an internal lock of it's own.
     3396 +                 * Unfortunately there's another thread which starts with
     3397 +                 * ZFS that grabs the internal lock and then calls into the
     3398 +                 * LDI layer which attempts to grab this lock. Deadlock.
     3399 +                 * So, drop and reaquire later which means the loop must
     3400 +                 * be restarted since there's no telling what happened to
     3401 +                 * the linked list.
     3402 +                 */
     3403 +                ldi_ev_unlock();
     3404 +                ret = lecp->lec_notify(lecp->lec_lhp, lecp->lec_cookie,
     3405 +                    lecp->lec_arg, ev_data);
     3406 +                ldi_ev_lock();
     3407 +
     3408 +                if (ret != LDI_EV_SUCCESS) {
3381 3409                          ret = LDI_EV_FAILURE;
3382 3410                          LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): notify"
3383 3411                              " FAILURE"));
3384 3412                          break;
3385 3413                  }
3386 3414  
3387 3415                  /* We have a matching callback that allows the event to occur */
3388      -                ret = LDI_EV_SUCCESS;
3389      -
3390 3416                  LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): 1 consumer success"));
     3417 +                goto restart_loop;
3391 3418          }
3392 3419  
3393 3420          if (ret != LDI_EV_FAILURE)
3394 3421                  goto out;
3395 3422  
     3423 +        saved_lecp = list_prev(listp, lecp);
     3424 +
3396 3425          LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): undoing notify"));
3397 3426  
3398 3427          /*
3399 3428           * Undo notifies already sent
3400 3429           */
3401      -        lecp = list_prev(listp, lecp);
3402 3430          VERIFY(ldi_ev_callback_list.le_walker_prev == NULL);
3403      -        for (; lecp; lecp = ldi_ev_callback_list.le_walker_prev) {
     3431 +restart_2nd_loop:
     3432 +        for (lecp = saved_lecp; lecp;
     3433 +            lecp = ldi_ev_callback_list.le_walker_prev) {
3404 3434                  ldi_ev_callback_list.le_walker_prev = list_prev(listp, lecp);
3405 3435  
3406 3436                  /*
3407 3437                   * Check if matching device
3408 3438                   */
3409 3439                  if (!ldi_ev_device_match(lecp, dip, dev, spec_type))
3410 3440                          continue;
3411 3441  
3412 3442  
3413 3443                  if (lecp->lec_finalize == NULL) {
↓ open down ↓ 13 lines elided ↑ open up ↑
3427 3457                  /* Check if matching event */
3428 3458                  lec_event = ldi_ev_get_type(lecp->lec_cookie);
3429 3459                  if (strcmp(event, lec_event) != 0) {
3430 3460                          LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): not matching "
3431 3461                              "event: %s,%s, skipping", event, lec_event));
3432 3462                          continue;
3433 3463                  }
3434 3464  
3435 3465                  LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): calling finalize"));
3436 3466  
     3467 +                if (lecp->lec_lhp->lh_flags & LH_FLAGS_NOTIFY_FINALIZE)
     3468 +                        continue;
     3469 +                lecp->lec_lhp->lh_flags |= LH_FLAGS_NOTIFY_FINALIZE;
     3470 +
     3471 +                ldi_ev_unlock();
3437 3472                  lecp->lec_finalize(lecp->lec_lhp, lecp->lec_cookie,
3438 3473                      LDI_EV_FAILURE, lecp->lec_arg, ev_data);
     3474 +                ldi_ev_lock();
3439 3475  
3440 3476                  /*
3441 3477                   * If LDI native event and LDI handle closed in context
3442 3478                   * of notify, NULL out the finalize callback as we have
3443 3479                   * already called the 1 finalize above allowed in this situation
3444 3480                   */
3445 3481                  if (lecp->lec_lhp == NULL &&
3446 3482                      ldi_native_cookie(lecp->lec_cookie)) {
3447 3483                          LDI_EVDBG((CE_NOTE,
3448 3484                              "ldi_invoke_notify(): NULL-ing finalize after "
3449 3485                              "calling 1 finalize following ldi_close"));
3450 3486                          lecp->lec_finalize = NULL;
3451 3487                  }
     3488 +                goto restart_2nd_loop;
3452 3489          }
3453      -
3454 3490  out:
     3491 +        /*
     3492 +         * Clear out the temporary flag for future runs.
     3493 +         */
     3494 +        for (lecp = list_head(listp); lecp; lecp = list_next(listp, lecp)) {
     3495 +                if (lecp->lec_lhp == NULL)
     3496 +                        continue;
     3497 +                lecp->lec_lhp->lh_flags &= ~(LH_FLAGS_NOTIFY_NOTIFY |
     3498 +                                             LH_FLAGS_NOTIFY_FINALIZE);
     3499 +        }
3455 3500          ldi_ev_callback_list.le_walker_next = NULL;
3456 3501          ldi_ev_callback_list.le_walker_prev = NULL;
3457 3502          ldi_ev_unlock();
3458 3503  
3459 3504          if (ret == LDI_EV_NONE) {
3460 3505                  LDI_EVDBG((CE_NOTE, "ldi_invoke_notify(): no matching "
3461 3506                      "LDI callbacks"));
3462 3507          }
3463 3508  
3464 3509          return (ret);
↓ open down ↓ 293 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX