Print this page
NEX-6353 The "DKIOCSOLIDSTATE failed, assuming non-SSD media" messages don't provide any useful information
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
NEX-5736 implement autoreplace matching based on FRU slot number
NEX-6200 hot spares are not reactivated after reinserting into enclosure
NEX-9403 need to update FRU for spare and l2cache devices
NEX-9404 remove lofi autoreplace support from syseventd
NEX-9409 hotsparing doesn't work for vdevs without FRU
NEX-9424 zfs`vdev_online() needs better notification about state changes
Portions contributed by: Alek Pinchuk <alek@nexenta.com>
Portions contributed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Steve Peng <steve.peng@nexenta.com>
Reviewed by: Sanjay Nadkarni <sanjay.nadkarni@nexenta.com>
NEX-8065 ZFS doesn't notice when disk vdevs have no write cache
Reviewed by: Dan Fields <dan.fields@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
NEX-2846 Enable Automatic/Intelligent Hot Sparing capability
Reviewed by: Jeffry Molanus <jeffry.molanus@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
Reviewed by: Saso Kiselkov <saso.kiselkov@nexenta.com>
6494 ASSERT supported zio_types for file and disk vdevs
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Approved by: Albert Lee <trisk@omniti.com>
NEX-3984 On-demand TRIM
Reviewed by: Alek Pinchuk <alek@nexenta.com>
Reviewed by: Josef 'Jeff' Sipek <josef.sipek@nexenta.com>
Conflicts:
        usr/src/common/zfs/zpool_prop.c
        usr/src/uts/common/sys/fs/zfs.h
NEX-3508 CLONE - Port NEX-2946 Add UNMAP/TRIM functionality to ZFS and illumos
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Conflicts:
    usr/src/uts/common/io/scsi/targets/sd.c
    usr/src/uts/common/sys/scsi/targets/sddef.h
NEX-2933 tip of nza-kernel hangs during zpool offline
Reviewed by: Alek Pinchuk <alek.pinchuk@nexenta.com>
Reviewed by: Josef Sipek <josef.sipek@nexenta.com>
NEX-1142 move rwlock to vdev to protect vdev_tsd
not just ldi handle.
This way we serialize open/close, yet allow parallel I/O.
4370 avoid transmitting holes during zfs send
4371 DMU code clean up
Reviewed by: Matthew Ahrens <mahrens@delphix.com>
Reviewed by: George Wilson <george.wilson@delphix.com>
Reviewed by: Christopher Siden <christopher.siden@delphix.com>
Reviewed by: Josef 'Jeff' Sipek <jeffpc@josefsipek.net>
Approved by: Garrett D'Amore <garrett@damore.org>
NEX-1065 Added serialization to avoid race (fix lint)
NEX-1065 Added serialization to avoid race
between ldi notification and I/O path.
Also fixes OS-124, NEX-1051, NEX-1062.

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/common/fs/zfs/vdev_disk.c
          +++ new/usr/src/uts/common/fs/zfs/vdev_disk.c
↓ open down ↓ 10 lines elided ↑ open up ↑
  11   11   * and limitations under the License.
  12   12   *
  13   13   * When distributing Covered Code, include this CDDL HEADER in each
  14   14   * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15   15   * If applicable, add the following below this CDDL HEADER, with the
  16   16   * fields enclosed by brackets "[]" replaced with your own identifying
  17   17   * information: Portions Copyright [yyyy] [name of copyright owner]
  18   18   *
  19   19   * CDDL HEADER END
  20   20   */
       21 +
  21   22  /*
  22   23   * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
  23      - * Copyright (c) 2012, 2016 by Delphix. All rights reserved.
  24      - * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
       24 + * Copyright (c) 2012, 2015 by Delphix. All rights reserved.
  25   25   * Copyright (c) 2013 Joyent, Inc.  All rights reserved.
       26 + * Copyright 2017 Nexenta Systems, Inc.
  26   27   */
  27   28  
  28   29  #include <sys/zfs_context.h>
  29   30  #include <sys/spa_impl.h>
  30   31  #include <sys/refcount.h>
  31   32  #include <sys/vdev_disk.h>
  32   33  #include <sys/vdev_impl.h>
  33   34  #include <sys/abd.h>
  34   35  #include <sys/fs/zfs.h>
  35   36  #include <sys/zio.h>
  36   37  #include <sys/sunldi.h>
  37   38  #include <sys/efi_partition.h>
  38   39  #include <sys/fm/fs/zfs.h>
  39   40  
  40   41  /*
  41   42   * Virtual device vector for disks.
  42   43   */
  43   44  
  44   45  extern ldi_ident_t zfs_li;
  45   46  
  46      -static void vdev_disk_close(vdev_t *);
       47 +static void vdev_disk_close_impl(vdev_t *, boolean_t);
  47   48  
  48   49  typedef struct vdev_disk_ldi_cb {
  49   50          list_node_t             lcb_next;
  50   51          ldi_callback_id_t       lcb_id;
  51   52  } vdev_disk_ldi_cb_t;
  52   53  
  53      -static void
  54      -vdev_disk_alloc(vdev_t *vd)
       54 +static vdev_disk_t *
       55 +vdev_disk_alloc(void)
  55   56  {
  56   57          vdev_disk_t *dvd;
  57   58  
  58      -        dvd = vd->vdev_tsd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
       59 +        dvd = kmem_zalloc(sizeof (vdev_disk_t), KM_SLEEP);
  59   60          /*
  60   61           * Create the LDI event callback list.
  61   62           */
  62   63          list_create(&dvd->vd_ldi_cbs, sizeof (vdev_disk_ldi_cb_t),
  63   64              offsetof(vdev_disk_ldi_cb_t, lcb_next));
       65 +        return (dvd);
  64   66  }
  65   67  
  66   68  static void
  67      -vdev_disk_free(vdev_t *vd)
       69 +vdev_disk_free_locked(vdev_t *vd)
  68   70  {
  69      -        vdev_disk_t *dvd = vd->vdev_tsd;
  70   71          vdev_disk_ldi_cb_t *lcb;
       72 +        vdev_disk_t *dvd = vd->vdev_tsd;
  71   73  
       74 +        ASSERT(rw_lock_held(&vd->vdev_tsd_lock));
       75 +
  72   76          if (dvd == NULL)
  73   77                  return;
  74   78  
  75   79          /*
  76   80           * We have already closed the LDI handle. Clean up the LDI event
  77   81           * callbacks and free vd->vdev_tsd.
  78   82           */
       83 +        vd->vdev_tsd = NULL;
  79   84          while ((lcb = list_head(&dvd->vd_ldi_cbs)) != NULL) {
  80   85                  list_remove(&dvd->vd_ldi_cbs, lcb);
  81   86                  (void) ldi_ev_remove_callbacks(lcb->lcb_id);
  82   87                  kmem_free(lcb, sizeof (vdev_disk_ldi_cb_t));
  83   88          }
  84   89          list_destroy(&dvd->vd_ldi_cbs);
  85   90          kmem_free(dvd, sizeof (vdev_disk_t));
  86      -        vd->vdev_tsd = NULL;
  87   91  }
  88   92  
       93 +static void
       94 +vdev_disk_free(vdev_t *vd)
       95 +{
       96 +        rw_enter(&vd->vdev_tsd_lock, RW_WRITER);
       97 +        vdev_disk_free_locked(vd);
       98 +        rw_exit(&vd->vdev_tsd_lock);
       99 +}
      100 +
  89  101  /* ARGSUSED */
  90  102  static int
  91  103  vdev_disk_off_notify(ldi_handle_t lh, ldi_ev_cookie_t ecookie, void *arg,
  92  104      void *ev_data)
  93  105  {
  94  106          vdev_t *vd = (vdev_t *)arg;
  95      -        vdev_disk_t *dvd = vd->vdev_tsd;
  96  107  
  97  108          /*
  98  109           * Ignore events other than offline.
  99  110           */
 100  111          if (strcmp(ldi_ev_get_type(ecookie), LDI_EV_OFFLINE) != 0)
 101  112                  return (LDI_EV_SUCCESS);
 102  113  
 103  114          /*
 104  115           * All LDI handles must be closed for the state change to succeed, so
 105  116           * call on vdev_disk_close() to do this.
 106  117           *
 107  118           * We inform vdev_disk_close that it is being called from offline
 108  119           * notify context so it will defer cleanup of LDI event callbacks and
 109  120           * freeing of vd->vdev_tsd to the offline finalize or a reopen.
 110  121           */
 111      -        dvd->vd_ldi_offline = B_TRUE;
 112      -        vdev_disk_close(vd);
      122 +        vdev_disk_close_impl(vd, B_TRUE);
 113  123  
 114  124          /*
 115  125           * Now that the device is closed, request that the spa_async_thread
 116  126           * mark the device as REMOVED and notify FMA of the removal.
 117  127           */
 118  128          zfs_post_remove(vd->vdev_spa, vd);
 119  129          vd->vdev_remove_wanted = B_TRUE;
 120  130          spa_async_request(vd->vdev_spa, SPA_ASYNC_REMOVE);
 121  131  
 122  132          return (LDI_EV_SUCCESS);
↓ open down ↓ 112 lines elided ↑ open up ↑
 235  245                      dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool));
 236  246                  vd->vdev_name_vp = NULL;
 237  247          }
 238  248          if (vd->vdev_devid_vp) {
 239  249                  VN_RELE_ASYNC(vd->vdev_devid_vp,
 240  250                      dsl_pool_vnrele_taskq(vd->vdev_spa->spa_dsl_pool));
 241  251                  vd->vdev_devid_vp = NULL;
 242  252          }
 243  253  }
 244  254  
 245      -/*
 246      - * We want to be loud in DEBUG kernels when DKIOCGMEDIAINFOEXT fails, or when
 247      - * even a fallback to DKIOCGMEDIAINFO fails.
 248      - */
 249      -#ifdef DEBUG
 250      -#define VDEV_DEBUG(...) cmn_err(CE_NOTE, __VA_ARGS__)
 251      -#else
 252      -#define VDEV_DEBUG(...) /* Nothing... */
 253      -#endif
 254      -
 255  255  static int
 256  256  vdev_disk_open(vdev_t *vd, uint64_t *psize, uint64_t *max_psize,
 257  257      uint64_t *ashift)
 258  258  {
 259  259          spa_t *spa = vd->vdev_spa;
 260      -        vdev_disk_t *dvd = vd->vdev_tsd;
      260 +        vdev_disk_t *dvd;
 261  261          ldi_ev_cookie_t ecookie;
 262  262          vdev_disk_ldi_cb_t *lcb;
 263  263          union {
 264  264                  struct dk_minfo_ext ude;
 265  265                  struct dk_minfo ud;
 266  266          } dks;
 267  267          struct dk_minfo_ext *dkmext = &dks.ude;
 268  268          struct dk_minfo *dkm = &dks.ud;
 269  269          int error;
 270  270          dev_t dev;
 271      -        int otyp;
      271 +        int otyp, vdev_ssd;
 272  272          boolean_t validate_devid = B_FALSE;
 273  273          ddi_devid_t devid;
 274  274          uint64_t capacity = 0, blksz = 0, pbsize;
 275  275  
 276  276          /*
 277  277           * We must have a pathname, and it must be absolute.
 278  278           */
 279  279          if (vd->vdev_path == NULL || vd->vdev_path[0] != '/') {
 280  280                  vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
 281  281                  return (SET_ERROR(EINVAL));
 282  282          }
 283      -
      283 +        rw_enter(&vd->vdev_tsd_lock, RW_WRITER);
      284 +        dvd = vd->vdev_tsd;
 284  285          /*
 285  286           * Reopen the device if it's not currently open. Otherwise,
 286  287           * just update the physical size of the device.
 287  288           */
 288  289          if (dvd != NULL) {
 289      -                if (dvd->vd_ldi_offline && dvd->vd_lh == NULL) {
 290      -                        /*
 291      -                         * If we are opening a device in its offline notify
 292      -                         * context, the LDI handle was just closed. Clean
 293      -                         * up the LDI event callbacks and free vd->vdev_tsd.
 294      -                         */
 295      -                        vdev_disk_free(vd);
 296      -                } else {
 297      -                        ASSERT(vd->vdev_reopening);
 298      -                        goto skip_open;
      290 +                ASSERT(vd->vdev_reopening);
      291 +                /*
      292 +                 * Here vd_lh is protected by vdev_tsd_lock
      293 +                 */
      294 +                ASSERT(dvd->vd_lh != NULL);
      295 +                /* This should not happen, but let's be safe */
      296 +                if (dvd->vd_lh == NULL) {
      297 +                        /* What are we going to do here??? */
      298 +                        rw_exit(&vd->vdev_tsd_lock);
      299 +                        return (SET_ERROR(ENXIO));
 299  300                  }
      301 +                goto skip_open;
 300  302          }
 301      -
 302  303          /*
 303      -         * Create vd->vdev_tsd.
      304 +         * Create dvd to be used as vd->vdev_tsd.
 304  305           */
 305      -        vdev_disk_alloc(vd);
 306      -        dvd = vd->vdev_tsd;
      306 +        vd->vdev_tsd = dvd = vdev_disk_alloc();
 307  307  
 308  308          /*
 309  309           * When opening a disk device, we want to preserve the user's original
 310  310           * intent.  We always want to open the device by the path the user gave
 311  311           * us, even if it is one of multiple paths to the same device.  But we
 312  312           * also want to be able to survive disks being removed/recabled.
 313  313           * Therefore the sequence of opening devices is:
 314  314           *
 315  315           * 1. Try opening the device by path.  For legacy pools without the
 316  316           *    'whole_disk' property, attempt to fix the path by appending 's0'.
↓ open down ↓ 1 lines elided ↑ open up ↑
 318  318           * 2. If the devid of the device matches the stored value, return
 319  319           *    success.
 320  320           *
 321  321           * 3. Otherwise, the device may have moved.  Try opening the device
 322  322           *    by the devid instead.
 323  323           */
 324  324          if (vd->vdev_devid != NULL) {
 325  325                  if (ddi_devid_str_decode(vd->vdev_devid, &dvd->vd_devid,
 326  326                      &dvd->vd_minor) != 0) {
 327  327                          vd->vdev_stat.vs_aux = VDEV_AUX_BAD_LABEL;
 328      -                        vdev_dbgmsg(vd, "vdev_disk_open: invalid "
 329      -                            "vdev_devid '%s'", vd->vdev_devid);
      328 +                        vdev_disk_free_locked(vd);
      329 +                        rw_exit(&vd->vdev_tsd_lock);
 330  330                          return (SET_ERROR(EINVAL));
 331  331                  }
 332  332          }
 333  333  
 334  334          error = EINVAL;         /* presume failure */
 335  335  
 336  336          if (vd->vdev_path != NULL) {
 337  337  
 338  338                  if (vd->vdev_wholedisk == -1ULL) {
 339  339                          size_t len = strlen(vd->vdev_path) + 3;
↓ open down ↓ 72 lines elided ↑ open up ↑
 412  412                   * as above.  This hasn't been used in a very long time and we
 413  413                   * don't need to propagate its oddities to this edge condition.
 414  414                   */
 415  415                  if (error && vd->vdev_path != NULL)
 416  416                          error = ldi_open_by_name(vd->vdev_path, spa_mode(spa),
 417  417                              kcred, &dvd->vd_lh, zfs_li);
 418  418          }
 419  419  
 420  420          if (error) {
 421  421                  vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
 422      -                vdev_dbgmsg(vd, "vdev_disk_open: failed to open [error=%d]",
 423      -                    error);
      422 +                vdev_disk_free_locked(vd);
      423 +                rw_exit(&vd->vdev_tsd_lock);
 424  424                  return (error);
 425  425          }
 426  426  
 427  427          /*
 428  428           * Now that the device has been successfully opened, update the devid
 429  429           * if necessary.
 430  430           */
 431  431          if (validate_devid && spa_writeable(spa) &&
 432  432              ldi_get_devid(dvd->vd_lh, &devid) == 0) {
 433  433                  if (ddi_devid_compare(devid, dvd->vd_devid) != 0) {
 434  434                          char *vd_devid;
 435  435  
 436  436                          vd_devid = ddi_devid_str_encode(devid, dvd->vd_minor);
 437      -                        vdev_dbgmsg(vd, "vdev_disk_open: update devid from "
 438      -                            "'%s' to '%s'", vd->vdev_devid, vd_devid);
      437 +                        zfs_dbgmsg("vdev %s: update devid from %s, "
      438 +                            "to %s", vd->vdev_path, vd->vdev_devid, vd_devid);
 439  439                          spa_strfree(vd->vdev_devid);
 440  440                          vd->vdev_devid = spa_strdup(vd_devid);
 441  441                          ddi_devid_str_free(vd_devid);
 442  442                  }
 443  443                  ddi_devid_free(devid);
 444  444          }
 445  445  
 446  446          /*
 447  447           * Once a device is opened, verify that the physical device path (if
 448  448           * available) is up to date.
↓ open down ↓ 33 lines elided ↑ open up ↑
 482  482          /*
 483  483           * Register callbacks for the LDI degrade event.
 484  484           */
 485  485          if (ldi_ev_get_cookie(dvd->vd_lh, LDI_EV_DEGRADE, &ecookie) ==
 486  486              LDI_EV_SUCCESS) {
 487  487                  lcb = kmem_zalloc(sizeof (vdev_disk_ldi_cb_t), KM_SLEEP);
 488  488                  list_insert_tail(&dvd->vd_ldi_cbs, lcb);
 489  489                  (void) ldi_ev_register_callbacks(dvd->vd_lh, ecookie,
 490  490                      &vdev_disk_dgrd_callb, (void *) vd, &lcb->lcb_id);
 491  491          }
      492 +
      493 +        /* Reset TRIM flag, as underlying device support may have changed */
      494 +        vd->vdev_notrim = B_FALSE;
      495 +
 492  496  skip_open:
      497 +        ASSERT(dvd != NULL);
 493  498          /*
 494  499           * Determine the actual size of the device.
 495  500           */
 496  501          if (ldi_get_size(dvd->vd_lh, psize) != 0) {
 497  502                  vd->vdev_stat.vs_aux = VDEV_AUX_OPEN_FAILED;
 498      -                vdev_dbgmsg(vd, "vdev_disk_open: failed to get size");
      503 +                vdev_disk_free_locked(vd);
      504 +                rw_exit(&vd->vdev_tsd_lock);
 499  505                  return (SET_ERROR(EINVAL));
 500  506          }
 501  507  
 502  508          *max_psize = *psize;
 503  509  
 504  510          /*
 505  511           * Determine the device's minimum transfer size.
 506  512           * If the ioctl isn't supported, assume DEV_BSIZE.
 507  513           */
 508  514          if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFOEXT,
 509  515              (intptr_t)dkmext, FKIOCTL, kcred, NULL)) == 0) {
 510  516                  capacity = dkmext->dki_capacity - 1;
 511  517                  blksz = dkmext->dki_lbsize;
 512  518                  pbsize = dkmext->dki_pbsize;
 513  519          } else if ((error = ldi_ioctl(dvd->vd_lh, DKIOCGMEDIAINFO,
 514  520              (intptr_t)dkm, FKIOCTL, kcred, NULL)) == 0) {
 515      -                VDEV_DEBUG(
 516      -                    "vdev_disk_open(\"%s\"): fallback to DKIOCGMEDIAINFO\n",
 517      -                    vd->vdev_path);
 518  521                  capacity = dkm->dki_capacity - 1;
 519  522                  blksz = dkm->dki_lbsize;
 520  523                  pbsize = blksz;
 521  524          } else {
 522      -                VDEV_DEBUG("vdev_disk_open(\"%s\"): "
 523      -                    "both DKIOCGMEDIAINFO{,EXT} calls failed, %d\n",
 524      -                    vd->vdev_path, error);
 525  525                  pbsize = DEV_BSIZE;
 526  526          }
 527  527  
 528  528          *ashift = highbit64(MAX(pbsize, SPA_MINBLOCKSIZE)) - 1;
 529  529  
 530  530          if (vd->vdev_wholedisk == 1) {
 531  531                  int wce = 1;
 532  532  
 533  533                  if (error == 0) {
 534  534                          /*
↓ open down ↓ 6 lines elided ↑ open up ↑
 541  541                  }
 542  542  
 543  543                  /*
 544  544                   * Since we own the whole disk, try to enable disk write
 545  545                   * caching.  We ignore errors because it's OK if we can't do it.
 546  546                   */
 547  547                  (void) ldi_ioctl(dvd->vd_lh, DKIOCSETWCE, (intptr_t)&wce,
 548  548                      FKIOCTL, kcred, NULL);
 549  549          }
 550  550  
      551 +        if (ldi_ioctl(dvd->vd_lh, DKIOCSOLIDSTATE, (intptr_t)&vdev_ssd,
      552 +            FKIOCTL, kcred, NULL) != 0)
      553 +                vd->vdev_is_ssd = B_FALSE;
      554 +        else
      555 +                vd->vdev_is_ssd = vdev_ssd ? B_TRUE : B_FALSE;
      556 +
 551  557          /*
      558 +         * We are done with vd_lh and vdev_tsd, release the vdev_tsd_lock
      559 +         */
      560 +        rw_exit(&vd->vdev_tsd_lock);
      561 +
      562 +        /*
 552  563           * Clear the nowritecache bit, so that on a vdev_reopen() we will
 553  564           * try again.
 554  565           */
 555  566          vd->vdev_nowritecache = B_FALSE;
 556  567  
      568 +        /*
      569 +         * vdev open has succeeded - reset fault flags if last fault was due
      570 +         * to a failed open since the open fault looks to have been transient
      571 +         */
      572 +        if (vd->vdev_removed || (vd->vdev_faulted &&
      573 +            vd->vdev_label_aux == VDEV_AUX_OPEN_FAILED)) {
      574 +                vd->vdev_faulted = vd->vdev_removed = 0ULL;
      575 +                vd->vdev_label_aux = VDEV_AUX_NONE;
      576 +        }
      577 +
 557  578          return (0);
 558  579  }
 559  580  
 560  581  static void
 561      -vdev_disk_close(vdev_t *vd)
      582 +vdev_disk_close_impl(vdev_t *vd, boolean_t ldi_offline)
 562  583  {
 563      -        vdev_disk_t *dvd = vd->vdev_tsd;
      584 +        vdev_disk_t *dvd;
 564  585  
      586 +        rw_enter(&vd->vdev_tsd_lock, RW_WRITER);
      587 +        dvd = vd->vdev_tsd;
      588 +
 565  589          if (vd->vdev_reopening || dvd == NULL)
 566      -                return;
      590 +                goto out;
 567  591  
 568  592          if (dvd->vd_minor != NULL) {
 569  593                  ddi_devid_str_free(dvd->vd_minor);
 570  594                  dvd->vd_minor = NULL;
 571  595          }
 572  596  
 573  597          if (dvd->vd_devid != NULL) {
 574  598                  ddi_devid_free(dvd->vd_devid);
 575  599                  dvd->vd_devid = NULL;
 576  600          }
↓ open down ↓ 2 lines elided ↑ open up ↑
 579  603                  (void) ldi_close(dvd->vd_lh, spa_mode(vd->vdev_spa), kcred);
 580  604                  dvd->vd_lh = NULL;
 581  605          }
 582  606  
 583  607          vd->vdev_delayed_close = B_FALSE;
 584  608          /*
 585  609           * If we closed the LDI handle due to an offline notify from LDI,
 586  610           * don't free vd->vdev_tsd or unregister the callbacks here;
 587  611           * the offline finalize callback or a reopen will take care of it.
 588  612           */
 589      -        if (dvd->vd_ldi_offline)
 590      -                return;
      613 +        if (!ldi_offline)
      614 +                vdev_disk_free_locked(vd);
      615 +out:
      616 +        rw_exit(&vd->vdev_tsd_lock);
      617 +}
 591  618  
 592      -        vdev_disk_free(vd);
      619 +static void
      620 +vdev_disk_close(vdev_t *vd)
      621 +{
      622 +        vdev_disk_close_impl(vd, B_FALSE);
 593  623  }
 594  624  
 595  625  int
 596  626  vdev_disk_physio(vdev_t *vd, caddr_t data,
 597  627      size_t size, uint64_t offset, int flags, boolean_t isdump)
 598  628  {
 599      -        vdev_disk_t *dvd = vd->vdev_tsd;
      629 +        int rc = EIO;
      630 +        vdev_disk_t *dvd;
 600  631  
      632 +        rw_enter(&vd->vdev_tsd_lock, RW_READER);
      633 +        dvd = vd->vdev_tsd;
 601  634          /*
 602  635           * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
 603  636           * Nothing to be done here but return failure.
 604  637           */
 605      -        if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL))
 606      -                return (EIO);
      638 +        if (dvd == NULL || dvd->vd_lh == NULL)
      639 +                goto out;
 607  640  
 608  641          ASSERT(vd->vdev_ops == &vdev_disk_ops);
 609  642  
 610  643          /*
 611  644           * If in the context of an active crash dump, use the ldi_dump(9F)
 612  645           * call instead of ldi_strategy(9F) as usual.
 613  646           */
 614  647          if (isdump) {
 615  648                  ASSERT3P(dvd, !=, NULL);
 616      -                return (ldi_dump(dvd->vd_lh, data, lbtodb(offset),
 617      -                    lbtodb(size)));
      649 +                rc = ldi_dump(dvd->vd_lh, data, lbtodb(offset), lbtodb(size));
      650 +                goto out;
 618  651          }
 619  652  
 620      -        return (vdev_disk_ldi_physio(dvd->vd_lh, data, size, offset, flags));
      653 +        rc = vdev_disk_ldi_physio(dvd->vd_lh, data, size, offset, flags);
      654 +out:
      655 +        rw_exit(&vd->vdev_tsd_lock);
      656 +        return (rc);
 621  657  }
 622  658  
 623  659  int
 624  660  vdev_disk_ldi_physio(ldi_handle_t vd_lh, caddr_t data,
 625  661      size_t size, uint64_t offset, int flags)
 626  662  {
 627  663          buf_t *bp;
 628  664          int error = 0;
 629  665  
 630  666          if (vd_lh == NULL)
↓ open down ↓ 62 lines elided ↑ open up ↑
 693  729  
 694  730          zio->io_error = error;
 695  731  
 696  732          zio_interrupt(zio);
 697  733  }
 698  734  
 699  735  static void
 700  736  vdev_disk_io_start(zio_t *zio)
 701  737  {
 702  738          vdev_t *vd = zio->io_vd;
 703      -        vdev_disk_t *dvd = vd->vdev_tsd;
      739 +        vdev_disk_t *dvd;
 704  740          vdev_buf_t *vb;
 705  741          struct dk_callback *dkc;
 706  742          buf_t *bp;
 707  743          int error;
 708  744  
      745 +        rw_enter(&vd->vdev_tsd_lock, RW_READER);
      746 +        dvd = vd->vdev_tsd;
 709  747          /*
 710  748           * If the vdev is closed, it's likely in the REMOVED or FAULTED state.
 711  749           * Nothing to be done here but return failure.
 712  750           */
 713      -        if (dvd == NULL || (dvd->vd_ldi_offline && dvd->vd_lh == NULL)) {
      751 +        if (dvd == NULL || dvd->vd_lh == NULL) {
 714  752                  zio->io_error = ENXIO;
      753 +                rw_exit(&vd->vdev_tsd_lock);
 715  754                  zio_interrupt(zio);
 716  755                  return;
 717  756          }
 718  757  
 719  758          if (zio->io_type == ZIO_TYPE_IOCTL) {
 720  759                  /* XXPOLICY */
 721  760                  if (!vdev_readable(vd)) {
 722  761                          zio->io_error = SET_ERROR(ENXIO);
      762 +                        rw_exit(&vd->vdev_tsd_lock);
 723  763                          zio_interrupt(zio);
 724  764                          return;
 725  765                  }
 726  766  
 727  767                  switch (zio->io_cmd) {
 728  768  
 729  769                  case DKIOCFLUSHWRITECACHE:
 730  770  
 731  771                          if (zfs_nocacheflush)
 732  772                                  break;
↓ open down ↓ 12 lines elided ↑ open up ↑
 745  785  
 746  786                          error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
 747  787                              (uintptr_t)dkc, FKIOCTL, kcred, NULL);
 748  788  
 749  789                          if (error == 0) {
 750  790                                  /*
 751  791                                   * The ioctl will be done asychronously,
 752  792                                   * and will call vdev_disk_ioctl_done()
 753  793                                   * upon completion.
 754  794                                   */
      795 +                                rw_exit(&vd->vdev_tsd_lock);
 755  796                                  return;
 756  797                          }
 757  798  
 758  799                          zio->io_error = error;
 759  800  
 760  801                          break;
 761  802  
      803 +                case DKIOCFREE:
      804 +                        /*
      805 +                         * We perform device support checks here instead of
      806 +                         * in zio_trim(), as zio_trim() might be invoked on
      807 +                         * top of a top-level vdev, whereas vdev_disk_io_start
      808 +                         * is guaranteed to be operating a leaf vdev.
      809 +                         */
      810 +                        if (vd->vdev_notrim &&
      811 +                            spa_get_force_trim(vd->vdev_spa) !=
      812 +                            SPA_FORCE_TRIM_ON) {
      813 +                                zio->io_error = SET_ERROR(ENOTSUP);
      814 +                                break;
      815 +                        }
      816 +
      817 +                        /*
      818 +                         * zio->io_private contains a dkioc_free_list_t
      819 +                         * specifying which offsets are to be freed
      820 +                         */
      821 +                        ASSERT(zio->io_private != NULL);
      822 +                        error = ldi_ioctl(dvd->vd_lh, zio->io_cmd,
      823 +                            (uintptr_t)zio->io_private, FKIOCTL, kcred, NULL);
      824 +
      825 +                        if (error == ENOTSUP || error == ENOTTY)
      826 +                                vd->vdev_notrim = B_TRUE;
      827 +
      828 +                        zio->io_error = error;
      829 +
      830 +                        break;
      831 +
 762  832                  default:
 763  833                          zio->io_error = SET_ERROR(ENOTSUP);
 764  834                  }
 765  835  
      836 +                rw_exit(&vd->vdev_tsd_lock);
 766  837                  zio_execute(zio);
 767  838                  return;
 768  839          }
 769  840  
 770  841          ASSERT(zio->io_type == ZIO_TYPE_READ || zio->io_type == ZIO_TYPE_WRITE);
 771  842          zio->io_target_timestamp = zio_handle_io_delay(zio);
 772  843  
 773  844          vb = kmem_alloc(sizeof (vdev_buf_t), KM_SLEEP);
 774  845  
 775  846          vb->vb_io = zio;
↓ open down ↓ 13 lines elided ↑ open up ↑
 789  860                  bp->b_un.b_addr =
 790  861                      abd_borrow_buf_copy(zio->io_abd, zio->io_size);
 791  862          }
 792  863  
 793  864          bp->b_lblkno = lbtodb(zio->io_offset);
 794  865          bp->b_bufsize = zio->io_size;
 795  866          bp->b_iodone = (int (*)())vdev_disk_io_intr;
 796  867  
 797  868          /* ldi_strategy() will return non-zero only on programming errors */
 798  869          VERIFY(ldi_strategy(dvd->vd_lh, bp) == 0);
      870 +        rw_exit(&vd->vdev_tsd_lock);
 799  871  }
 800  872  
 801  873  static void
 802  874  vdev_disk_io_done(zio_t *zio)
 803  875  {
 804  876          vdev_t *vd = zio->io_vd;
 805  877  
 806  878          /*
 807  879           * If the device returned EIO, then attempt a DKIOCSTATE ioctl to see if
 808  880           * the device has been removed.  If this is the case, then we trigger an
 809  881           * asynchronous removal of the device. Otherwise, probe the device and
 810  882           * make sure it's still accessible.
 811  883           */
 812  884          if (zio->io_error == EIO && !vd->vdev_remove_wanted) {
 813      -                vdev_disk_t *dvd = vd->vdev_tsd;
 814      -                int state = DKIO_NONE;
      885 +                vdev_disk_t *dvd;
      886 +                int rc = EIO, state = DKIO_NONE;
 815  887  
 816      -                if (ldi_ioctl(dvd->vd_lh, DKIOCSTATE, (intptr_t)&state,
 817      -                    FKIOCTL, kcred, NULL) == 0 && state != DKIO_INSERTED) {
      888 +                rw_enter(&vd->vdev_tsd_lock, RW_READER);
      889 +                dvd = vd->vdev_tsd;
      890 +                if (dvd != NULL && dvd->vd_lh != NULL)
      891 +                        rc = ldi_ioctl(dvd->vd_lh, DKIOCSTATE,
      892 +                            (intptr_t)&state, FKIOCTL, kcred, NULL);
      893 +                rw_exit(&vd->vdev_tsd_lock);
      894 +                if (rc == 0 && state != DKIO_INSERTED) {
 818  895                          /*
 819  896                           * We post the resource as soon as possible, instead of
 820  897                           * when the async removal actually happens, because the
 821  898                           * DE is using this information to discard previous I/O
 822  899                           * errors.
 823  900                           */
 824  901                          zfs_post_remove(zio->io_spa, vd);
 825  902                          vd->vdev_remove_wanted = B_TRUE;
 826  903                          spa_async_request(zio->io_spa, SPA_ASYNC_REMOVE);
 827  904                  } else if (!vd->vdev_delayed_close) {
↓ open down ↓ 97 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX