1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright (c) 2009, 2010, Oracle and/or its affiliates. All rights reserved.
  23  * Copyright 2012 Garrett D'Amore <garrett@damore.org>.  All rights reserved.
  24  * Copyright 2012 Alexey Zaytsev <alexey.zaytsev@gmail.com> All rights reserved.
  25  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
  26  * Copyright 2017 The MathWorks, Inc.  All rights reserved.
  27  */
  28 
  29 #include <sys/types.h>
  30 #include <sys/ksynch.h>
  31 #include <sys/kmem.h>
  32 #include <sys/file.h>
  33 #include <sys/errno.h>
  34 #include <sys/open.h>
  35 #include <sys/buf.h>
  36 #include <sys/uio.h>
  37 #include <sys/aio_req.h>
  38 #include <sys/cred.h>
  39 #include <sys/modctl.h>
  40 #include <sys/cmlb.h>
  41 #include <sys/conf.h>
  42 #include <sys/devops.h>
  43 #include <sys/list.h>
  44 #include <sys/sysmacros.h>
  45 #include <sys/dkio.h>
  46 #include <sys/vtoc.h>
  47 #include <sys/scsi/scsi.h>        /* for DTYPE_DIRECT */
  48 #include <sys/kstat.h>
  49 #include <sys/fs/dv_node.h>
  50 #include <sys/ddi.h>
  51 #include <sys/sunddi.h>
  52 #include <sys/note.h>
  53 #include <sys/mhd.h>
  54 #include <sys/blkdev.h>
  55 #include <sys/scsi/impl/inquiry.h>
  56 
  57 #define BD_MAXPART      64
  58 #define BDINST(dev)     (getminor(dev) / BD_MAXPART)
  59 #define BDPART(dev)     (getminor(dev) % BD_MAXPART)
  60 
  61 typedef struct bd bd_t;
  62 typedef struct bd_xfer_impl bd_xfer_impl_t;
  63 
  64 struct bd {
  65         void            *d_private;
  66         dev_info_t      *d_dip;
  67         kmutex_t        d_ocmutex;
  68         kmutex_t        d_iomutex;
  69         kmutex_t        *d_errmutex;
  70         kmutex_t        d_statemutex;
  71         kcondvar_t      d_statecv;
  72         enum dkio_state d_state;
  73         cmlb_handle_t   d_cmlbh;
  74         unsigned        d_open_lyr[BD_MAXPART]; /* open count */
  75         uint64_t        d_open_excl;    /* bit mask indexed by partition */
  76         uint64_t        d_open_reg[OTYPCNT];            /* bit mask */
  77 
  78         uint32_t        d_qsize;
  79         uint32_t        d_qactive;
  80         uint32_t        d_maxxfer;
  81         uint32_t        d_blkshift;
  82         uint32_t        d_pblkshift;
  83         uint64_t        d_numblks;
  84         ddi_devid_t     d_devid;
  85 
  86         kmem_cache_t    *d_cache;
  87         list_t          d_runq;
  88         list_t          d_waitq;
  89         kstat_t         *d_ksp;
  90         kstat_io_t      *d_kiop;
  91         kstat_t         *d_errstats;
  92         struct bd_errstats *d_kerr;
  93 
  94         boolean_t       d_rdonly;
  95         boolean_t       d_ssd;
  96         boolean_t       d_removable;
  97         boolean_t       d_hotpluggable;
  98         boolean_t       d_use_dma;
  99 
 100         ddi_dma_attr_t  d_dma;
 101         bd_ops_t        d_ops;
 102         bd_handle_t     d_handle;
 103 };
 104 
 105 struct bd_handle {
 106         bd_ops_t        h_ops;
 107         ddi_dma_attr_t  *h_dma;
 108         dev_info_t      *h_parent;
 109         dev_info_t      *h_child;
 110         void            *h_private;
 111         bd_t            *h_bd;
 112         char            *h_name;
 113         char            h_addr[30];     /* enough for w%0.16x,%X */
 114 };
 115 
 116 struct bd_xfer_impl {
 117         bd_xfer_t       i_public;
 118         list_node_t     i_linkage;
 119         bd_t            *i_bd;
 120         buf_t           *i_bp;
 121         uint_t          i_num_win;
 122         uint_t          i_cur_win;
 123         off_t           i_offset;
 124         int             (*i_func)(void *, bd_xfer_t *);
 125         uint32_t        i_blkshift;
 126         size_t          i_len;
 127         size_t          i_resid;
 128 };
 129 
 130 #define i_dmah          i_public.x_dmah
 131 #define i_dmac          i_public.x_dmac
 132 #define i_ndmac         i_public.x_ndmac
 133 #define i_kaddr         i_public.x_kaddr
 134 #define i_nblks         i_public.x_nblks
 135 #define i_blkno         i_public.x_blkno
 136 #define i_flags         i_public.x_flags
 137 
 138 
 139 /*
 140  * Private prototypes.
 141  */
 142 
 143 static void bd_prop_update_inqstring(dev_info_t *, char *, char *, size_t);
 144 static void bd_create_inquiry_props(dev_info_t *, bd_drive_t *);
 145 static void bd_create_errstats(bd_t *, int, bd_drive_t *);
 146 static void bd_errstats_setstr(kstat_named_t *, char *, size_t, char *);
 147 static void bd_init_errstats(bd_t *, bd_drive_t *);
 148 
 149 static int bd_getinfo(dev_info_t *, ddi_info_cmd_t, void *, void **);
 150 static int bd_attach(dev_info_t *, ddi_attach_cmd_t);
 151 static int bd_detach(dev_info_t *, ddi_detach_cmd_t);
 152 
 153 static int bd_open(dev_t *, int, int, cred_t *);
 154 static int bd_close(dev_t, int, int, cred_t *);
 155 static int bd_strategy(struct buf *);
 156 static int bd_ioctl(dev_t, int, intptr_t, int, cred_t *, int *);
 157 static int bd_dump(dev_t, caddr_t, daddr_t, int);
 158 static int bd_read(dev_t, struct uio *, cred_t *);
 159 static int bd_write(dev_t, struct uio *, cred_t *);
 160 static int bd_aread(dev_t, struct aio_req *, cred_t *);
 161 static int bd_awrite(dev_t, struct aio_req *, cred_t *);
 162 static int bd_prop_op(dev_t, dev_info_t *, ddi_prop_op_t, int, char *,
 163     caddr_t, int *);
 164 
 165 static int bd_tg_rdwr(dev_info_t *, uchar_t, void *, diskaddr_t, size_t,
 166     void *);
 167 static int bd_tg_getinfo(dev_info_t *, int, void *, void *);
 168 static int bd_xfer_ctor(void *, void *, int);
 169 static void bd_xfer_dtor(void *, void *);
 170 static void bd_sched(bd_t *);
 171 static void bd_submit(bd_t *, bd_xfer_impl_t *);
 172 static void bd_runq_exit(bd_xfer_impl_t *, int);
 173 static void bd_update_state(bd_t *);
 174 static int bd_check_state(bd_t *, enum dkio_state *);
 175 static int bd_flush_write_cache(bd_t *, struct dk_callback *);
 176 static int bd_reserve(bd_t *bd, int);
 177 static int bd_check_uio(dev_t, struct uio *);
 178 
 179 struct cmlb_tg_ops bd_tg_ops = {
 180         TG_DK_OPS_VERSION_1,
 181         bd_tg_rdwr,
 182         bd_tg_getinfo,
 183 };
 184 
 185 static struct cb_ops bd_cb_ops = {
 186         bd_open,                /* open */
 187         bd_close,               /* close */
 188         bd_strategy,            /* strategy */
 189         nodev,                  /* print */
 190         bd_dump,                /* dump */
 191         bd_read,                /* read */
 192         bd_write,               /* write */
 193         bd_ioctl,               /* ioctl */
 194         nodev,                  /* devmap */
 195         nodev,                  /* mmap */
 196         nodev,                  /* segmap */
 197         nochpoll,               /* poll */
 198         bd_prop_op,             /* cb_prop_op */
 199         0,                      /* streamtab  */
 200         D_64BIT | D_MP,         /* Driver comaptibility flag */
 201         CB_REV,                 /* cb_rev */
 202         bd_aread,               /* async read */
 203         bd_awrite               /* async write */
 204 };
 205 
 206 struct dev_ops bd_dev_ops = {
 207         DEVO_REV,               /* devo_rev, */
 208         0,                      /* refcnt  */
 209         bd_getinfo,             /* getinfo */
 210         nulldev,                /* identify */
 211         nulldev,                /* probe */
 212         bd_attach,              /* attach */
 213         bd_detach,              /* detach */
 214         nodev,                  /* reset */
 215         &bd_cb_ops,                 /* driver operations */
 216         NULL,                   /* bus operations */
 217         NULL,                   /* power */
 218         ddi_quiesce_not_needed, /* quiesce */
 219 };
 220 
 221 static struct modldrv modldrv = {
 222         &mod_driverops,
 223         "Generic Block Device",
 224         &bd_dev_ops,
 225 };
 226 
 227 static struct modlinkage modlinkage = {
 228         MODREV_1, { &modldrv, NULL }
 229 };
 230 
 231 static void *bd_state;
 232 static krwlock_t bd_lock;
 233 
 234 int
 235 _init(void)
 236 {
 237         int     rv;
 238 
 239         rv = ddi_soft_state_init(&bd_state, sizeof (struct bd), 2);
 240         if (rv != DDI_SUCCESS) {
 241                 return (rv);
 242         }
 243         rw_init(&bd_lock, NULL, RW_DRIVER, NULL);
 244         rv = mod_install(&modlinkage);
 245         if (rv != DDI_SUCCESS) {
 246                 rw_destroy(&bd_lock);
 247                 ddi_soft_state_fini(&bd_state);
 248         }
 249         return (rv);
 250 }
 251 
 252 int
 253 _fini(void)
 254 {
 255         int     rv;
 256 
 257         rv = mod_remove(&modlinkage);
 258         if (rv == DDI_SUCCESS) {
 259                 rw_destroy(&bd_lock);
 260                 ddi_soft_state_fini(&bd_state);
 261         }
 262         return (rv);
 263 }
 264 
 265 int
 266 _info(struct modinfo *modinfop)
 267 {
 268         return (mod_info(&modlinkage, modinfop));
 269 }
 270 
 271 static int
 272 bd_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **resultp)
 273 {
 274         bd_t    *bd;
 275         minor_t inst;
 276 
 277         _NOTE(ARGUNUSED(dip));
 278 
 279         inst = BDINST((dev_t)arg);
 280 
 281         switch (cmd) {
 282         case DDI_INFO_DEVT2DEVINFO:
 283                 bd = ddi_get_soft_state(bd_state, inst);
 284                 if (bd == NULL) {
 285                         return (DDI_FAILURE);
 286                 }
 287                 *resultp = (void *)bd->d_dip;
 288                 break;
 289 
 290         case DDI_INFO_DEVT2INSTANCE:
 291                 *resultp = (void *)(intptr_t)inst;
 292                 break;
 293 
 294         default:
 295                 return (DDI_FAILURE);
 296         }
 297         return (DDI_SUCCESS);
 298 }
 299 
 300 static void
 301 bd_prop_update_inqstring(dev_info_t *dip, char *name, char *data, size_t len)
 302 {
 303         int     ilen;
 304         char    *data_string;
 305 
 306         ilen = scsi_ascii_inquiry_len(data, len);
 307         ASSERT3U(ilen, <=, len);
 308         if (ilen <= 0)
 309                 return;
 310         /* ensure null termination */
 311         data_string = kmem_zalloc(ilen + 1, KM_SLEEP);
 312         bcopy(data, data_string, ilen);
 313         (void) ndi_prop_update_string(DDI_DEV_T_NONE, dip, name, data_string);
 314         kmem_free(data_string, ilen + 1);
 315 }
 316 
 317 static void
 318 bd_create_inquiry_props(dev_info_t *dip, bd_drive_t *drive)
 319 {
 320         if (drive->d_vendor_len > 0)
 321                 bd_prop_update_inqstring(dip, INQUIRY_VENDOR_ID,
 322                     drive->d_vendor, drive->d_vendor_len);
 323 
 324         if (drive->d_product_len > 0)
 325                 bd_prop_update_inqstring(dip, INQUIRY_PRODUCT_ID,
 326                     drive->d_product, drive->d_product_len);
 327 
 328         if (drive->d_serial_len > 0)
 329                 bd_prop_update_inqstring(dip, INQUIRY_SERIAL_NO,
 330                     drive->d_serial, drive->d_serial_len);
 331 
 332         if (drive->d_revision_len > 0)
 333                 bd_prop_update_inqstring(dip, INQUIRY_REVISION_ID,
 334                     drive->d_revision, drive->d_revision_len);
 335 }
 336 
 337 static void
 338 bd_create_errstats(bd_t *bd, int inst, bd_drive_t *drive)
 339 {
 340         char    ks_module[KSTAT_STRLEN];
 341         char    ks_name[KSTAT_STRLEN];
 342         int     ndata = sizeof (struct bd_errstats) / sizeof (kstat_named_t);
 343 
 344         if (bd->d_errstats != NULL)
 345                 return;
 346 
 347         (void) snprintf(ks_module, sizeof (ks_module), "%serr",
 348             ddi_driver_name(bd->d_dip));
 349         (void) snprintf(ks_name, sizeof (ks_name), "%s%d,err",
 350             ddi_driver_name(bd->d_dip), inst);
 351 
 352         bd->d_errstats = kstat_create(ks_module, inst, ks_name, "device_error",
 353             KSTAT_TYPE_NAMED, ndata, KSTAT_FLAG_PERSISTENT);
 354 
 355         if (bd->d_errstats == NULL) {
 356                 /*
 357                  * Even if we cannot create the kstat, we create a
 358                  * scratch kstat.  The reason for this is to ensure
 359                  * that we can update the kstat all of the time,
 360                  * without adding an extra branch instruction.
 361                  */
 362                 bd->d_kerr = kmem_zalloc(sizeof (struct bd_errstats),
 363                     KM_SLEEP);
 364                 bd->d_errmutex = kmem_zalloc(sizeof (kmutex_t), KM_SLEEP);
 365                 mutex_init(bd->d_errmutex, NULL, MUTEX_DRIVER, NULL);
 366         } else {
 367                 if (bd->d_errstats->ks_lock == NULL) {
 368                         bd->d_errstats->ks_lock = kmem_zalloc(sizeof (kmutex_t),
 369                             KM_SLEEP);
 370                         mutex_init(bd->d_errstats->ks_lock, NULL, MUTEX_DRIVER,
 371                             NULL);
 372                 }
 373 
 374                 bd->d_errmutex = bd->d_errstats->ks_lock;
 375                 bd->d_kerr = (struct bd_errstats *)bd->d_errstats->ks_data;
 376         }
 377 
 378         kstat_named_init(&bd->d_kerr->bd_softerrs,    "Soft Errors",
 379             KSTAT_DATA_UINT32);
 380         kstat_named_init(&bd->d_kerr->bd_harderrs,    "Hard Errors",
 381             KSTAT_DATA_UINT32);
 382         kstat_named_init(&bd->d_kerr->bd_transerrs,   "Transport Errors",
 383             KSTAT_DATA_UINT32);
 384 
 385         if (drive->d_model_len > 0) {
 386                 kstat_named_init(&bd->d_kerr->bd_model,       "Model",
 387                     KSTAT_DATA_STRING);
 388         } else {
 389                 kstat_named_init(&bd->d_kerr->bd_vid, "Vendor",
 390                     KSTAT_DATA_STRING);
 391                 kstat_named_init(&bd->d_kerr->bd_pid, "Product",
 392                     KSTAT_DATA_STRING);
 393         }
 394 
 395         kstat_named_init(&bd->d_kerr->bd_revision,    "Revision",
 396             KSTAT_DATA_STRING);
 397         kstat_named_init(&bd->d_kerr->bd_serial,      "Serial No",
 398             KSTAT_DATA_STRING);
 399         kstat_named_init(&bd->d_kerr->bd_capacity,    "Size",
 400             KSTAT_DATA_ULONGLONG);
 401         kstat_named_init(&bd->d_kerr->bd_rq_media_err,        "Media Error",
 402             KSTAT_DATA_UINT32);
 403         kstat_named_init(&bd->d_kerr->bd_rq_ntrdy_err,        "Device Not Ready",
 404             KSTAT_DATA_UINT32);
 405         kstat_named_init(&bd->d_kerr->bd_rq_nodev_err,        "No Device",
 406             KSTAT_DATA_UINT32);
 407         kstat_named_init(&bd->d_kerr->bd_rq_recov_err,        "Recoverable",
 408             KSTAT_DATA_UINT32);
 409         kstat_named_init(&bd->d_kerr->bd_rq_illrq_err,        "Illegal Request",
 410             KSTAT_DATA_UINT32);
 411         kstat_named_init(&bd->d_kerr->bd_rq_pfa_err,
 412             "Predictive Failure Analysis", KSTAT_DATA_UINT32);
 413 
 414         bd->d_errstats->ks_private = bd;
 415 
 416         kstat_install(bd->d_errstats);
 417 }
 418 
 419 static void
 420 bd_errstats_setstr(kstat_named_t *k, char *str, size_t len, char *alt)
 421 {
 422         char    *tmp;
 423 
 424         if (KSTAT_NAMED_STR_PTR(k) == NULL) {
 425                 if (len > 0) {
 426                         tmp = kmem_alloc(len + 1, KM_SLEEP);
 427                         (void) strlcpy(tmp, str, len + 1);
 428                 } else {
 429                         tmp = alt;
 430                 }
 431 
 432                 kstat_named_setstr(k, tmp);
 433         }
 434 }
 435 
 436 static void
 437 bd_init_errstats(bd_t *bd, bd_drive_t *drive)
 438 {
 439         struct bd_errstats      *est = bd->d_kerr;
 440 
 441         mutex_enter(bd->d_errmutex);
 442 
 443         if (drive->d_model_len > 0 &&
 444             KSTAT_NAMED_STR_PTR(&est->bd_model) == NULL) {
 445                 bd_errstats_setstr(&est->bd_model, drive->d_model,
 446                     drive->d_model_len, NULL);
 447         } else {
 448                 bd_errstats_setstr(&est->bd_vid, drive->d_vendor,
 449                     drive->d_vendor_len, "Unknown ");
 450                 bd_errstats_setstr(&est->bd_pid, drive->d_product,
 451                     drive->d_product_len, "Unknown         ");
 452         }
 453 
 454         bd_errstats_setstr(&est->bd_revision, drive->d_revision,
 455             drive->d_revision_len, "0001");
 456         bd_errstats_setstr(&est->bd_serial, drive->d_serial,
 457             drive->d_serial_len, "0               ");
 458 
 459         mutex_exit(bd->d_errmutex);
 460 }
 461 
 462 static int
 463 bd_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
 464 {
 465         int             inst;
 466         bd_handle_t     hdl;
 467         bd_t            *bd;
 468         bd_drive_t      drive;
 469         int             rv;
 470         char            name[16];
 471         char            kcache[32];
 472 
 473         switch (cmd) {
 474         case DDI_ATTACH:
 475                 break;
 476         case DDI_RESUME:
 477                 /* We don't do anything native for suspend/resume */
 478                 return (DDI_SUCCESS);
 479         default:
 480                 return (DDI_FAILURE);
 481         }
 482 
 483         inst = ddi_get_instance(dip);
 484         hdl = ddi_get_parent_data(dip);
 485 
 486         (void) snprintf(name, sizeof (name), "%s%d",
 487             ddi_driver_name(dip), ddi_get_instance(dip));
 488         (void) snprintf(kcache, sizeof (kcache), "%s_xfer", name);
 489 
 490         if (hdl == NULL) {
 491                 cmn_err(CE_WARN, "%s: missing parent data!", name);
 492                 return (DDI_FAILURE);
 493         }
 494 
 495         if (ddi_soft_state_zalloc(bd_state, inst) != DDI_SUCCESS) {
 496                 cmn_err(CE_WARN, "%s: unable to zalloc soft state!", name);
 497                 return (DDI_FAILURE);
 498         }
 499         bd = ddi_get_soft_state(bd_state, inst);
 500 
 501         if (hdl->h_dma) {
 502                 bd->d_dma = *(hdl->h_dma);
 503                 bd->d_dma.dma_attr_granular =
 504                     max(DEV_BSIZE, bd->d_dma.dma_attr_granular);
 505                 bd->d_use_dma = B_TRUE;
 506 
 507                 if (bd->d_maxxfer &&
 508                     (bd->d_maxxfer != bd->d_dma.dma_attr_maxxfer)) {
 509                         cmn_err(CE_WARN,
 510                             "%s: inconsistent maximum transfer size!",
 511                             name);
 512                         /* We force it */
 513                         bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
 514                 } else {
 515                         bd->d_maxxfer = bd->d_dma.dma_attr_maxxfer;
 516                 }
 517         } else {
 518                 bd->d_use_dma = B_FALSE;
 519                 if (bd->d_maxxfer == 0) {
 520                         bd->d_maxxfer = 1024 * 1024;
 521                 }
 522         }
 523         bd->d_ops = hdl->h_ops;
 524         bd->d_private = hdl->h_private;
 525         bd->d_blkshift = 9;  /* 512 bytes, to start */
 526 
 527         if (bd->d_maxxfer % DEV_BSIZE) {
 528                 cmn_err(CE_WARN, "%s: maximum transfer misaligned!", name);
 529                 bd->d_maxxfer &= ~(DEV_BSIZE - 1);
 530         }
 531         if (bd->d_maxxfer < DEV_BSIZE) {
 532                 cmn_err(CE_WARN, "%s: maximum transfer size too small!", name);
 533                 ddi_soft_state_free(bd_state, inst);
 534                 return (DDI_FAILURE);
 535         }
 536 
 537         bd->d_dip = dip;
 538         bd->d_handle = hdl;
 539         hdl->h_bd = bd;
 540         ddi_set_driver_private(dip, bd);
 541 
 542         mutex_init(&bd->d_iomutex, NULL, MUTEX_DRIVER, NULL);
 543         mutex_init(&bd->d_ocmutex, NULL, MUTEX_DRIVER, NULL);
 544         mutex_init(&bd->d_statemutex, NULL, MUTEX_DRIVER, NULL);
 545         cv_init(&bd->d_statecv, NULL, CV_DRIVER, NULL);
 546 
 547         list_create(&bd->d_waitq, sizeof (bd_xfer_impl_t),
 548             offsetof(struct bd_xfer_impl, i_linkage));
 549         list_create(&bd->d_runq, sizeof (bd_xfer_impl_t),
 550             offsetof(struct bd_xfer_impl, i_linkage));
 551 
 552         bd->d_cache = kmem_cache_create(kcache, sizeof (bd_xfer_impl_t), 8,
 553             bd_xfer_ctor, bd_xfer_dtor, NULL, bd, NULL, 0);
 554 
 555         bd->d_ksp = kstat_create(ddi_driver_name(dip), inst, NULL, "disk",
 556             KSTAT_TYPE_IO, 1, KSTAT_FLAG_PERSISTENT);
 557         if (bd->d_ksp != NULL) {
 558                 bd->d_ksp->ks_lock = &bd->d_iomutex;
 559                 kstat_install(bd->d_ksp);
 560                 bd->d_kiop = bd->d_ksp->ks_data;
 561         } else {
 562                 /*
 563                  * Even if we cannot create the kstat, we create a
 564                  * scratch kstat.  The reason for this is to ensure
 565                  * that we can update the kstat all of the time,
 566                  * without adding an extra branch instruction.
 567                  */
 568                 bd->d_kiop = kmem_zalloc(sizeof (kstat_io_t), KM_SLEEP);
 569         }
 570 
 571         cmlb_alloc_handle(&bd->d_cmlbh);
 572 
 573         bd->d_state = DKIO_NONE;
 574 
 575         bzero(&drive, sizeof (drive));
 576         bd->d_ops.o_drive_info(bd->d_private, &drive);
 577         bd->d_qsize = drive.d_qsize;
 578         bd->d_removable = drive.d_removable;
 579         bd->d_hotpluggable = drive.d_hotpluggable;
 580 
 581         if (drive.d_maxxfer && drive.d_maxxfer < bd->d_maxxfer)
 582                 bd->d_maxxfer = drive.d_maxxfer;
 583 
 584         bd_create_inquiry_props(dip, &drive);
 585 
 586         bd_create_errstats(bd, inst, &drive);
 587         bd_init_errstats(bd, &drive);
 588         bd_update_state(bd);
 589 
 590         rv = cmlb_attach(dip, &bd_tg_ops, DTYPE_DIRECT,
 591             bd->d_removable, bd->d_hotpluggable,
 592             /*LINTED: E_BAD_PTR_CAST_ALIGN*/
 593             *(uint64_t *)drive.d_eui64 != 0 ? DDI_NT_BLOCK_BLKDEV :
 594             drive.d_lun >= 0 ? DDI_NT_BLOCK_CHAN : DDI_NT_BLOCK,
 595             CMLB_FAKE_LABEL_ONE_PARTITION, bd->d_cmlbh, 0);
 596         if (rv != 0) {
 597                 cmlb_free_handle(&bd->d_cmlbh);
 598                 kmem_cache_destroy(bd->d_cache);
 599                 mutex_destroy(&bd->d_iomutex);
 600                 mutex_destroy(&bd->d_ocmutex);
 601                 mutex_destroy(&bd->d_statemutex);
 602                 cv_destroy(&bd->d_statecv);
 603                 list_destroy(&bd->d_waitq);
 604                 list_destroy(&bd->d_runq);
 605                 if (bd->d_ksp != NULL) {
 606                         kstat_delete(bd->d_ksp);
 607                         bd->d_ksp = NULL;
 608                 } else {
 609                         kmem_free(bd->d_kiop, sizeof (kstat_io_t));
 610                 }
 611                 ddi_soft_state_free(bd_state, inst);
 612                 return (DDI_FAILURE);
 613         }
 614 
 615         if (bd->d_ops.o_devid_init != NULL) {
 616                 rv = bd->d_ops.o_devid_init(bd->d_private, dip, &bd->d_devid);
 617                 if (rv == DDI_SUCCESS) {
 618                         if (ddi_devid_register(dip, bd->d_devid) !=
 619                             DDI_SUCCESS) {
 620                                 cmn_err(CE_WARN,
 621                                     "%s: unable to register devid", name);
 622                         }
 623                 }
 624         }
 625 
 626         /*
 627          * Add a zero-length attribute to tell the world we support
 628          * kernel ioctls (for layered drivers).  Also set up properties
 629          * used by HAL to identify removable media.
 630          */
 631         (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
 632             DDI_KERNEL_IOCTL, NULL, 0);
 633         if (bd->d_removable) {
 634                 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
 635                     "removable-media", NULL, 0);
 636         }
 637         if (bd->d_hotpluggable) {
 638                 (void) ddi_prop_create(DDI_DEV_T_NONE, dip, DDI_PROP_CANSLEEP,
 639                     "hotpluggable", NULL, 0);
 640         }
 641 
 642         ddi_report_dev(dip);
 643 
 644         return (DDI_SUCCESS);
 645 }
 646 
 647 static int
 648 bd_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
 649 {
 650         bd_t    *bd;
 651 
 652         bd = ddi_get_driver_private(dip);
 653 
 654         switch (cmd) {
 655         case DDI_DETACH:
 656                 break;
 657         case DDI_SUSPEND:
 658                 /* We don't suspend, but our parent does */
 659                 return (DDI_SUCCESS);
 660         default:
 661                 return (DDI_FAILURE);
 662         }
 663         if (bd->d_ksp != NULL) {
 664                 kstat_delete(bd->d_ksp);
 665                 bd->d_ksp = NULL;
 666         } else {
 667                 kmem_free(bd->d_kiop, sizeof (kstat_io_t));
 668         }
 669 
 670         if (bd->d_errstats != NULL) {
 671                 kstat_delete(bd->d_errstats);
 672                 bd->d_errstats = NULL;
 673         } else {
 674                 kmem_free(bd->d_kerr, sizeof (struct bd_errstats));
 675                 mutex_destroy(bd->d_errmutex);
 676         }
 677 
 678         cmlb_detach(bd->d_cmlbh, 0);
 679         cmlb_free_handle(&bd->d_cmlbh);
 680         if (bd->d_devid)
 681                 ddi_devid_free(bd->d_devid);
 682         kmem_cache_destroy(bd->d_cache);
 683         mutex_destroy(&bd->d_iomutex);
 684         mutex_destroy(&bd->d_ocmutex);
 685         mutex_destroy(&bd->d_statemutex);
 686         cv_destroy(&bd->d_statecv);
 687         list_destroy(&bd->d_waitq);
 688         list_destroy(&bd->d_runq);
 689         ddi_soft_state_free(bd_state, ddi_get_instance(dip));
 690         return (DDI_SUCCESS);
 691 }
 692 
 693 static int
 694 bd_xfer_ctor(void *buf, void *arg, int kmflag)
 695 {
 696         bd_xfer_impl_t  *xi;
 697         bd_t            *bd = arg;
 698         int             (*dcb)(caddr_t);
 699 
 700         if (kmflag == KM_PUSHPAGE || kmflag == KM_SLEEP) {
 701                 dcb = DDI_DMA_SLEEP;
 702         } else {
 703                 dcb = DDI_DMA_DONTWAIT;
 704         }
 705 
 706         xi = buf;
 707         bzero(xi, sizeof (*xi));
 708         xi->i_bd = bd;
 709 
 710         if (bd->d_use_dma) {
 711                 if (ddi_dma_alloc_handle(bd->d_dip, &bd->d_dma, dcb, NULL,
 712                     &xi->i_dmah) != DDI_SUCCESS) {
 713                         return (-1);
 714                 }
 715         }
 716 
 717         return (0);
 718 }
 719 
 720 static void
 721 bd_xfer_dtor(void *buf, void *arg)
 722 {
 723         bd_xfer_impl_t  *xi = buf;
 724 
 725         _NOTE(ARGUNUSED(arg));
 726 
 727         if (xi->i_dmah)
 728                 ddi_dma_free_handle(&xi->i_dmah);
 729         xi->i_dmah = NULL;
 730 }
 731 
 732 static bd_xfer_impl_t *
 733 bd_xfer_alloc(bd_t *bd, struct buf *bp, int (*func)(void *, bd_xfer_t *),
 734     int kmflag)
 735 {
 736         bd_xfer_impl_t          *xi;
 737         int                     rv = 0;
 738         int                     status;
 739         unsigned                dir;
 740         int                     (*cb)(caddr_t);
 741         size_t                  len;
 742         uint32_t                shift;
 743 
 744         if (kmflag == KM_SLEEP) {
 745                 cb = DDI_DMA_SLEEP;
 746         } else {
 747                 cb = DDI_DMA_DONTWAIT;
 748         }
 749 
 750         xi = kmem_cache_alloc(bd->d_cache, kmflag);
 751         if (xi == NULL) {
 752                 bioerror(bp, ENOMEM);
 753                 return (NULL);
 754         }
 755 
 756         ASSERT(bp);
 757 
 758         xi->i_bp = bp;
 759         xi->i_func = func;
 760         xi->i_blkno = bp->b_lblkno >> (bd->d_blkshift - DEV_BSHIFT);
 761 
 762         if (bp->b_bcount == 0) {
 763                 xi->i_len = 0;
 764                 xi->i_nblks = 0;
 765                 xi->i_kaddr = NULL;
 766                 xi->i_resid = 0;
 767                 xi->i_num_win = 0;
 768                 goto done;
 769         }
 770 
 771         if (bp->b_flags & B_READ) {
 772                 dir = DDI_DMA_READ;
 773                 xi->i_func = bd->d_ops.o_read;
 774         } else {
 775                 dir = DDI_DMA_WRITE;
 776                 xi->i_func = bd->d_ops.o_write;
 777         }
 778 
 779         shift = bd->d_blkshift;
 780         xi->i_blkshift = shift;
 781 
 782         if (!bd->d_use_dma) {
 783                 bp_mapin(bp);
 784                 rv = 0;
 785                 xi->i_offset = 0;
 786                 xi->i_num_win =
 787                     (bp->b_bcount + (bd->d_maxxfer - 1)) / bd->d_maxxfer;
 788                 xi->i_cur_win = 0;
 789                 xi->i_len = min(bp->b_bcount, bd->d_maxxfer);
 790                 xi->i_nblks = xi->i_len >> shift;
 791                 xi->i_kaddr = bp->b_un.b_addr;
 792                 xi->i_resid = bp->b_bcount;
 793         } else {
 794 
 795                 /*
 796                  * We have to use consistent DMA if the address is misaligned.
 797                  */
 798                 if (((bp->b_flags & (B_PAGEIO | B_REMAPPED)) != B_PAGEIO) &&
 799                     ((uintptr_t)bp->b_un.b_addr & 0x7)) {
 800                         dir |= DDI_DMA_CONSISTENT | DDI_DMA_PARTIAL;
 801                 } else {
 802                         dir |= DDI_DMA_STREAMING | DDI_DMA_PARTIAL;
 803                 }
 804 
 805                 status = ddi_dma_buf_bind_handle(xi->i_dmah, bp, dir, cb,
 806                     NULL, &xi->i_dmac, &xi->i_ndmac);
 807                 switch (status) {
 808                 case DDI_DMA_MAPPED:
 809                         xi->i_num_win = 1;
 810                         xi->i_cur_win = 0;
 811                         xi->i_offset = 0;
 812                         xi->i_len = bp->b_bcount;
 813                         xi->i_nblks = xi->i_len >> shift;
 814                         xi->i_resid = bp->b_bcount;
 815                         rv = 0;
 816                         break;
 817                 case DDI_DMA_PARTIAL_MAP:
 818                         xi->i_cur_win = 0;
 819 
 820                         if ((ddi_dma_numwin(xi->i_dmah, &xi->i_num_win) !=
 821                             DDI_SUCCESS) ||
 822                             (ddi_dma_getwin(xi->i_dmah, 0, &xi->i_offset,
 823                             &len, &xi->i_dmac, &xi->i_ndmac) !=
 824                             DDI_SUCCESS) ||
 825                             (P2PHASE(len, (1U << shift)) != 0)) {
 826                                 (void) ddi_dma_unbind_handle(xi->i_dmah);
 827                                 rv = EFAULT;
 828                                 goto done;
 829                         }
 830                         xi->i_len = len;
 831                         xi->i_nblks = xi->i_len >> shift;
 832                         xi->i_resid = bp->b_bcount;
 833                         rv = 0;
 834                         break;
 835                 case DDI_DMA_NORESOURCES:
 836                         rv = EAGAIN;
 837                         goto done;
 838                 case DDI_DMA_TOOBIG:
 839                         rv = EINVAL;
 840                         goto done;
 841                 case DDI_DMA_NOMAPPING:
 842                 case DDI_DMA_INUSE:
 843                 default:
 844                         rv = EFAULT;
 845                         goto done;
 846                 }
 847         }
 848 
 849 done:
 850         if (rv != 0) {
 851                 kmem_cache_free(bd->d_cache, xi);
 852                 bioerror(bp, rv);
 853                 return (NULL);
 854         }
 855 
 856         return (xi);
 857 }
 858 
 859 static void
 860 bd_xfer_free(bd_xfer_impl_t *xi)
 861 {
 862         if (xi->i_dmah) {
 863                 (void) ddi_dma_unbind_handle(xi->i_dmah);
 864         }
 865         kmem_cache_free(xi->i_bd->d_cache, xi);
 866 }
 867 
 868 static int
 869 bd_open(dev_t *devp, int flag, int otyp, cred_t *credp)
 870 {
 871         dev_t           dev = *devp;
 872         bd_t            *bd;
 873         minor_t         part;
 874         minor_t         inst;
 875         uint64_t        mask;
 876         boolean_t       ndelay;
 877         int             rv;
 878         diskaddr_t      nblks;
 879         diskaddr_t      lba;
 880         int             i;
 881 
 882         _NOTE(ARGUNUSED(credp));
 883 
 884         part = BDPART(dev);
 885         inst = BDINST(dev);
 886 
 887         if (otyp >= OTYPCNT)
 888                 return (EINVAL);
 889 
 890         ndelay = (flag & (FNDELAY | FNONBLOCK)) ? B_TRUE : B_FALSE;
 891 
 892         /*
 893          * Block any DR events from changing the set of registered
 894          * devices while we function.
 895          */
 896         rw_enter(&bd_lock, RW_READER);
 897         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
 898                 rw_exit(&bd_lock);
 899                 return (ENXIO);
 900         }
 901 
 902         mutex_enter(&bd->d_ocmutex);
 903 
 904         ASSERT(part < 64);
 905         mask = (1U << part);
 906 
 907         bd_update_state(bd);
 908 
 909         if (cmlb_validate(bd->d_cmlbh, 0, 0) != 0) {
 910 
 911                 /* non-blocking opens are allowed to succeed */
 912                 if (!ndelay) {
 913                         rv = ENXIO;
 914                         goto done;
 915                 }
 916         } else if (cmlb_partinfo(bd->d_cmlbh, part, &nblks, &lba,
 917             NULL, NULL, 0) == 0) {
 918 
 919                 /*
 920                  * We read the partinfo, verify valid ranges.  If the
 921                  * partition is invalid, and we aren't blocking or
 922                  * doing a raw access, then fail. (Non-blocking and
 923                  * raw accesses can still succeed to allow a disk with
 924                  * bad partition data to opened by format and fdisk.)
 925                  */
 926                 if ((!nblks) && ((!ndelay) || (otyp != OTYP_CHR))) {
 927                         rv = ENXIO;
 928                         goto done;
 929                 }
 930         } else if (!ndelay) {
 931                 /*
 932                  * cmlb_partinfo failed -- invalid partition or no
 933                  * disk label.
 934                  */
 935                 rv = ENXIO;
 936                 goto done;
 937         }
 938 
 939         if ((flag & FWRITE) && bd->d_rdonly) {
 940                 rv = EROFS;
 941                 goto done;
 942         }
 943 
 944         if ((bd->d_open_excl) & (mask)) {
 945                 rv = EBUSY;
 946                 goto done;
 947         }
 948         if (flag & FEXCL) {
 949                 if (bd->d_open_lyr[part]) {
 950                         rv = EBUSY;
 951                         goto done;
 952                 }
 953                 for (i = 0; i < OTYP_LYR; i++) {
 954                         if (bd->d_open_reg[i] & mask) {
 955                                 rv = EBUSY;
 956                                 goto done;
 957                         }
 958                 }
 959         }
 960 
 961         if (otyp == OTYP_LYR) {
 962                 bd->d_open_lyr[part]++;
 963         } else {
 964                 bd->d_open_reg[otyp] |= mask;
 965         }
 966         if (flag & FEXCL) {
 967                 bd->d_open_excl |= mask;
 968         }
 969 
 970         rv = 0;
 971 done:
 972         mutex_exit(&bd->d_ocmutex);
 973         rw_exit(&bd_lock);
 974 
 975         return (rv);
 976 }
 977 
 978 static int
 979 bd_close(dev_t dev, int flag, int otyp, cred_t *credp)
 980 {
 981         bd_t            *bd;
 982         minor_t         inst;
 983         minor_t         part;
 984         uint64_t        mask;
 985         boolean_t       last = B_TRUE;
 986         int             i;
 987 
 988         _NOTE(ARGUNUSED(flag));
 989         _NOTE(ARGUNUSED(credp));
 990 
 991         part = BDPART(dev);
 992         inst = BDINST(dev);
 993 
 994         ASSERT(part < 64);
 995         mask = (1U << part);
 996 
 997         rw_enter(&bd_lock, RW_READER);
 998 
 999         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1000                 rw_exit(&bd_lock);
1001                 return (ENXIO);
1002         }
1003 
1004         mutex_enter(&bd->d_ocmutex);
1005         if (bd->d_open_excl & mask) {
1006                 bd->d_open_excl &= ~mask;
1007         }
1008         if (otyp == OTYP_LYR) {
1009                 bd->d_open_lyr[part]--;
1010         } else {
1011                 bd->d_open_reg[otyp] &= ~mask;
1012         }
1013         for (i = 0; i < 64; i++) {
1014                 if (bd->d_open_lyr[part]) {
1015                         last = B_FALSE;
1016                 }
1017         }
1018         for (i = 0; last && (i < OTYP_LYR); i++) {
1019                 if (bd->d_open_reg[i]) {
1020                         last = B_FALSE;
1021                 }
1022         }
1023         mutex_exit(&bd->d_ocmutex);
1024 
1025         if (last) {
1026                 cmlb_invalidate(bd->d_cmlbh, 0);
1027         }
1028         rw_exit(&bd_lock);
1029 
1030         return (0);
1031 }
1032 
1033 static int
1034 bd_dump(dev_t dev, caddr_t caddr, daddr_t blkno, int nblk)
1035 {
1036         minor_t         inst;
1037         minor_t         part;
1038         diskaddr_t      pstart;
1039         diskaddr_t      psize;
1040         bd_t            *bd;
1041         bd_xfer_impl_t  *xi;
1042         buf_t           *bp;
1043         int             rv;
1044         uint32_t        shift;
1045         daddr_t         d_blkno;
1046         int     d_nblk;
1047 
1048         rw_enter(&bd_lock, RW_READER);
1049 
1050         part = BDPART(dev);
1051         inst = BDINST(dev);
1052 
1053         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1054                 rw_exit(&bd_lock);
1055                 return (ENXIO);
1056         }
1057         shift = bd->d_blkshift;
1058         d_blkno = blkno >> (shift - DEV_BSHIFT);
1059         d_nblk = nblk >> (shift - DEV_BSHIFT);
1060         /*
1061          * do cmlb, but do it synchronously unless we already have the
1062          * partition (which we probably should.)
1063          */
1064         if (cmlb_partinfo(bd->d_cmlbh, part, &psize, &pstart, NULL, NULL,
1065             (void *)1)) {
1066                 rw_exit(&bd_lock);
1067                 return (ENXIO);
1068         }
1069 
1070         if ((d_blkno + d_nblk) > psize) {
1071                 rw_exit(&bd_lock);
1072                 return (EINVAL);
1073         }
1074         bp = getrbuf(KM_NOSLEEP);
1075         if (bp == NULL) {
1076                 rw_exit(&bd_lock);
1077                 return (ENOMEM);
1078         }
1079 
1080         bp->b_bcount = nblk << DEV_BSHIFT;
1081         bp->b_resid = bp->b_bcount;
1082         bp->b_lblkno = blkno;
1083         bp->b_un.b_addr = caddr;
1084 
1085         xi = bd_xfer_alloc(bd, bp,  bd->d_ops.o_write, KM_NOSLEEP);
1086         if (xi == NULL) {
1087                 rw_exit(&bd_lock);
1088                 freerbuf(bp);
1089                 return (ENOMEM);
1090         }
1091         xi->i_blkno = d_blkno + pstart;
1092         xi->i_flags = BD_XFER_POLL;
1093         bd_submit(bd, xi);
1094         rw_exit(&bd_lock);
1095 
1096         /*
1097          * Generally, we should have run this entirely synchronously
1098          * at this point and the biowait call should be a no-op.  If
1099          * it didn't happen this way, it's a bug in the underlying
1100          * driver not honoring BD_XFER_POLL.
1101          */
1102         (void) biowait(bp);
1103         rv = geterror(bp);
1104         freerbuf(bp);
1105         return (rv);
1106 }
1107 
1108 void
1109 bd_minphys(struct buf *bp)
1110 {
1111         minor_t inst;
1112         bd_t    *bd;
1113         inst = BDINST(bp->b_edev);
1114 
1115         bd = ddi_get_soft_state(bd_state, inst);
1116 
1117         /*
1118          * In a non-debug kernel, bd_strategy will catch !bd as
1119          * well, and will fail nicely.
1120          */
1121         ASSERT(bd);
1122 
1123         if (bp->b_bcount > bd->d_maxxfer)
1124                 bp->b_bcount = bd->d_maxxfer;
1125 }
1126 
1127 static int
1128 bd_check_uio(dev_t dev, struct uio *uio)
1129 {
1130         bd_t            *bd;
1131         uint32_t        shift;
1132 
1133         if ((bd = ddi_get_soft_state(bd_state, BDINST(dev))) == NULL) {
1134                 return (ENXIO);
1135         }
1136 
1137         shift = bd->d_blkshift;
1138         if ((P2PHASE(uio->uio_loffset, (1U << shift)) != 0) ||
1139             (P2PHASE(uio->uio_iov->iov_len, (1U << shift)) != 0)) {
1140                 return (EINVAL);
1141         }
1142 
1143         return (0);
1144 }
1145 
1146 static int
1147 bd_read(dev_t dev, struct uio *uio, cred_t *credp)
1148 {
1149         _NOTE(ARGUNUSED(credp));
1150         int     ret = bd_check_uio(dev, uio);
1151         if (ret != 0) {
1152                 return (ret);
1153         }
1154         return (physio(bd_strategy, NULL, dev, B_READ, bd_minphys, uio));
1155 }
1156 
1157 static int
1158 bd_write(dev_t dev, struct uio *uio, cred_t *credp)
1159 {
1160         _NOTE(ARGUNUSED(credp));
1161         int     ret = bd_check_uio(dev, uio);
1162         if (ret != 0) {
1163                 return (ret);
1164         }
1165         return (physio(bd_strategy, NULL, dev, B_WRITE, bd_minphys, uio));
1166 }
1167 
1168 static int
1169 bd_aread(dev_t dev, struct aio_req *aio, cred_t *credp)
1170 {
1171         _NOTE(ARGUNUSED(credp));
1172         int     ret = bd_check_uio(dev, aio->aio_uio);
1173         if (ret != 0) {
1174                 return (ret);
1175         }
1176         return (aphysio(bd_strategy, anocancel, dev, B_READ, bd_minphys, aio));
1177 }
1178 
1179 static int
1180 bd_awrite(dev_t dev, struct aio_req *aio, cred_t *credp)
1181 {
1182         _NOTE(ARGUNUSED(credp));
1183         int     ret = bd_check_uio(dev, aio->aio_uio);
1184         if (ret != 0) {
1185                 return (ret);
1186         }
1187         return (aphysio(bd_strategy, anocancel, dev, B_WRITE, bd_minphys, aio));
1188 }
1189 
1190 static int
1191 bd_strategy(struct buf *bp)
1192 {
1193         minor_t         inst;
1194         minor_t         part;
1195         bd_t            *bd;
1196         diskaddr_t      p_lba;
1197         diskaddr_t      p_nblks;
1198         diskaddr_t      b_nblks;
1199         bd_xfer_impl_t  *xi;
1200         uint32_t        shift;
1201         int             (*func)(void *, bd_xfer_t *);
1202         diskaddr_t      lblkno;
1203 
1204         part = BDPART(bp->b_edev);
1205         inst = BDINST(bp->b_edev);
1206 
1207         ASSERT(bp);
1208 
1209         bp->b_resid = bp->b_bcount;
1210 
1211         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1212                 bioerror(bp, ENXIO);
1213                 biodone(bp);
1214                 return (0);
1215         }
1216 
1217         if (cmlb_partinfo(bd->d_cmlbh, part, &p_nblks, &p_lba,
1218             NULL, NULL, 0)) {
1219                 bioerror(bp, ENXIO);
1220                 biodone(bp);
1221                 return (0);
1222         }
1223 
1224         shift = bd->d_blkshift;
1225         lblkno = bp->b_lblkno >> (shift - DEV_BSHIFT);
1226         if ((P2PHASE(bp->b_lblkno, (1U << (shift - DEV_BSHIFT))) != 0) ||
1227             (P2PHASE(bp->b_bcount, (1U << shift)) != 0) ||
1228             (lblkno > p_nblks)) {
1229                 bioerror(bp, EINVAL);
1230                 biodone(bp);
1231                 return (0);
1232         }
1233         b_nblks = bp->b_bcount >> shift;
1234         if ((lblkno == p_nblks) || (bp->b_bcount == 0)) {
1235                 biodone(bp);
1236                 return (0);
1237         }
1238 
1239         if ((b_nblks + lblkno) > p_nblks) {
1240                 bp->b_resid = ((lblkno + b_nblks - p_nblks) << shift);
1241                 bp->b_bcount -= bp->b_resid;
1242         } else {
1243                 bp->b_resid = 0;
1244         }
1245         func = (bp->b_flags & B_READ) ? bd->d_ops.o_read : bd->d_ops.o_write;
1246 
1247         xi = bd_xfer_alloc(bd, bp, func, KM_NOSLEEP);
1248         if (xi == NULL) {
1249                 xi = bd_xfer_alloc(bd, bp, func, KM_PUSHPAGE);
1250         }
1251         if (xi == NULL) {
1252                 /* bd_request_alloc will have done bioerror */
1253                 biodone(bp);
1254                 return (0);
1255         }
1256         xi->i_blkno = lblkno + p_lba;
1257 
1258         bd_submit(bd, xi);
1259 
1260         return (0);
1261 }
1262 
1263 static int
1264 bd_ioctl(dev_t dev, int cmd, intptr_t arg, int flag, cred_t *credp, int *rvalp)
1265 {
1266         minor_t         inst;
1267         uint16_t        part;
1268         bd_t            *bd;
1269         void            *ptr = (void *)arg;
1270         int             rv;
1271 
1272         part = BDPART(dev);
1273         inst = BDINST(dev);
1274 
1275         if ((bd = ddi_get_soft_state(bd_state, inst)) == NULL) {
1276                 return (ENXIO);
1277         }
1278 
1279         rv = cmlb_ioctl(bd->d_cmlbh, dev, cmd, arg, flag, credp, rvalp, 0);
1280         if (rv != ENOTTY)
1281                 return (rv);
1282 
1283         if (rvalp != NULL) {
1284                 /* the return value of the ioctl is 0 by default */
1285                 *rvalp = 0;
1286         }
1287 
1288         switch (cmd) {
1289         case DKIOCGMEDIAINFO: {
1290                 struct dk_minfo minfo;
1291 
1292                 /* make sure our state information is current */
1293                 bd_update_state(bd);
1294                 bzero(&minfo, sizeof (minfo));
1295                 minfo.dki_media_type = DK_FIXED_DISK;
1296                 minfo.dki_lbsize = (1U << bd->d_blkshift);
1297                 minfo.dki_capacity = bd->d_numblks;
1298                 if (ddi_copyout(&minfo, ptr, sizeof (minfo), flag)) {
1299                         return (EFAULT);
1300                 }
1301                 return (0);
1302         }
1303         case DKIOCGMEDIAINFOEXT: {
1304                 struct dk_minfo_ext miext;
1305 
1306                 /* make sure our state information is current */
1307                 bd_update_state(bd);
1308                 bzero(&miext, sizeof (miext));
1309                 miext.dki_media_type = DK_FIXED_DISK;
1310                 miext.dki_lbsize = (1U << bd->d_blkshift);
1311                 miext.dki_pbsize = (1U << bd->d_pblkshift);
1312                 miext.dki_capacity = bd->d_numblks;
1313                 if (ddi_copyout(&miext, ptr, sizeof (miext), flag)) {
1314                         return (EFAULT);
1315                 }
1316                 return (0);
1317         }
1318         case DKIOCINFO: {
1319                 struct dk_cinfo cinfo;
1320                 bzero(&cinfo, sizeof (cinfo));
1321                 cinfo.dki_ctype = DKC_BLKDEV;
1322                 cinfo.dki_cnum = ddi_get_instance(ddi_get_parent(bd->d_dip));
1323                 (void) snprintf(cinfo.dki_cname, sizeof (cinfo.dki_cname),
1324                     "%s", ddi_driver_name(ddi_get_parent(bd->d_dip)));
1325                 (void) snprintf(cinfo.dki_dname, sizeof (cinfo.dki_dname),
1326                     "%s", ddi_driver_name(bd->d_dip));
1327                 cinfo.dki_unit = inst;
1328                 cinfo.dki_flags = DKI_FMTVOL;
1329                 cinfo.dki_partition = part;
1330                 cinfo.dki_maxtransfer = bd->d_maxxfer / DEV_BSIZE;
1331                 cinfo.dki_addr = 0;
1332                 cinfo.dki_slave = 0;
1333                 cinfo.dki_space = 0;
1334                 cinfo.dki_prio = 0;
1335                 cinfo.dki_vec = 0;
1336                 if (ddi_copyout(&cinfo, ptr, sizeof (cinfo), flag)) {
1337                         return (EFAULT);
1338                 }
1339                 return (0);
1340         }
1341         case DKIOCREMOVABLE: {
1342                 int i;
1343                 i = bd->d_removable ? 1 : 0;
1344                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1345                         return (EFAULT);
1346                 }
1347                 return (0);
1348         }
1349         case DKIOCHOTPLUGGABLE: {
1350                 int i;
1351                 i = bd->d_hotpluggable ? 1 : 0;
1352                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1353                         return (EFAULT);
1354                 }
1355                 return (0);
1356         }
1357         case DKIOCREADONLY: {
1358                 int i;
1359                 i = bd->d_rdonly ? 1 : 0;
1360                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1361                         return (EFAULT);
1362                 }
1363                 return (0);
1364         }
1365         case DKIOCSOLIDSTATE: {
1366                 int i;
1367                 i = bd->d_ssd ? 1 : 0;
1368                 if (ddi_copyout(&i, ptr, sizeof (i), flag)) {
1369                         return (EFAULT);
1370                 }
1371                 return (0);
1372         }
1373         case DKIOCSTATE: {
1374                 enum dkio_state state;
1375                 if (ddi_copyin(ptr, &state, sizeof (state), flag)) {
1376                         return (EFAULT);
1377                 }
1378                 if ((rv = bd_check_state(bd, &state)) != 0) {
1379                         return (rv);
1380                 }
1381                 if (ddi_copyout(&state, ptr, sizeof (state), flag)) {
1382                         return (EFAULT);
1383                 }
1384                 return (0);
1385         }
1386         case DKIOCFLUSHWRITECACHE: {
1387                 struct dk_callback *dkc = NULL;
1388 
1389                 if (flag & FKIOCTL)
1390                         dkc = (void *)arg;
1391 
1392                 rv = bd_flush_write_cache(bd, dkc);
1393                 return (rv);
1394         }
1395 
1396         case MHIOCTKOWN:
1397         {
1398                 return (bd_reserve(bd, BD_XFER_MHD_TKOWN));
1399         }
1400 
1401         case MHIOCRELEASE:
1402         {
1403                 return (bd_reserve(bd, BD_XFER_MHD_RELEASE));
1404         }
1405 
1406         case MHIOCSTATUS:
1407         {
1408                 rv = bd_reserve(bd, BD_XFER_MHD_STATUS);
1409                 if (rvalp != NULL)
1410                         *rvalp = rv == 0 ? 0: 1;
1411                 return (0);
1412         }
1413 
1414         case MHIOCQRESERVE:
1415         {
1416                 return (bd_reserve(bd, BD_XFER_MHD_QRESERVE));
1417         }
1418 
1419         case MHIOCENFAILFAST:
1420         {
1421                 return (bd_reserve(bd, BD_XFER_MHD_ENFAILFAST));
1422         }
1423 
1424         default:
1425                 break;
1426 
1427         }
1428         return (ENOTTY);
1429 }
1430 
1431 static int
1432 bd_prop_op(dev_t dev, dev_info_t *dip, ddi_prop_op_t prop_op, int mod_flags,
1433     char *name, caddr_t valuep, int *lengthp)
1434 {
1435         bd_t    *bd;
1436 
1437         bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1438         if (bd == NULL)
1439                 return (ddi_prop_op(dev, dip, prop_op, mod_flags,
1440                     name, valuep, lengthp));
1441 
1442         return (cmlb_prop_op(bd->d_cmlbh, dev, dip, prop_op, mod_flags, name,
1443             valuep, lengthp, BDPART(dev), 0));
1444 }
1445 
1446 
1447 static int
1448 bd_tg_rdwr(dev_info_t *dip, uchar_t cmd, void *bufaddr, diskaddr_t start,
1449     size_t length, void *tg_cookie)
1450 {
1451         bd_t            *bd;
1452         buf_t           *bp;
1453         bd_xfer_impl_t  *xi;
1454         int             rv;
1455         int             (*func)(void *, bd_xfer_t *);
1456         int             kmflag;
1457 
1458         /*
1459          * If we are running in polled mode (such as during dump(9e)
1460          * execution), then we cannot sleep for kernel allocations.
1461          */
1462         kmflag = tg_cookie ? KM_NOSLEEP : KM_SLEEP;
1463 
1464         bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1465 
1466         if (P2PHASE(length, (1U << bd->d_blkshift)) != 0) {
1467                 /* We can only transfer whole blocks at a time! */
1468                 return (EINVAL);
1469         }
1470 
1471         if ((bp = getrbuf(kmflag)) == NULL) {
1472                 return (ENOMEM);
1473         }
1474 
1475         switch (cmd) {
1476         case TG_READ:
1477                 bp->b_flags = B_READ;
1478                 func = bd->d_ops.o_read;
1479                 break;
1480         case TG_WRITE:
1481                 bp->b_flags = B_WRITE;
1482                 func = bd->d_ops.o_write;
1483                 break;
1484         default:
1485                 freerbuf(bp);
1486                 return (EINVAL);
1487         }
1488 
1489         bp->b_un.b_addr = bufaddr;
1490         bp->b_bcount = length;
1491         xi = bd_xfer_alloc(bd, bp, func, kmflag);
1492         if (xi == NULL) {
1493                 rv = geterror(bp);
1494                 freerbuf(bp);
1495                 return (rv);
1496         }
1497         xi->i_flags = tg_cookie ? BD_XFER_POLL : 0;
1498         xi->i_blkno = start;
1499         bd_submit(bd, xi);
1500         (void) biowait(bp);
1501         rv = geterror(bp);
1502         freerbuf(bp);
1503 
1504         return (rv);
1505 }
1506 
1507 static int
1508 bd_tg_getinfo(dev_info_t *dip, int cmd, void *arg, void *tg_cookie)
1509 {
1510         bd_t            *bd;
1511 
1512         _NOTE(ARGUNUSED(tg_cookie));
1513         bd = ddi_get_soft_state(bd_state, ddi_get_instance(dip));
1514 
1515         switch (cmd) {
1516         case TG_GETPHYGEOM:
1517         case TG_GETVIRTGEOM:
1518                 /*
1519                  * We don't have any "geometry" as such, let cmlb
1520                  * fabricate something.
1521                  */
1522                 return (ENOTTY);
1523 
1524         case TG_GETCAPACITY:
1525                 bd_update_state(bd);
1526                 *(diskaddr_t *)arg = bd->d_numblks;
1527                 return (0);
1528 
1529         case TG_GETBLOCKSIZE:
1530                 *(uint32_t *)arg = (1U << bd->d_blkshift);
1531                 return (0);
1532 
1533         case TG_GETATTR:
1534                 /*
1535                  * It turns out that cmlb really doesn't do much for
1536                  * non-writable media, but lets make the information
1537                  * available for it in case it does more in the
1538                  * future.  (The value is currently used for
1539                  * triggering special behavior for CD-ROMs.)
1540                  */
1541                 bd_update_state(bd);
1542                 ((tg_attribute_t *)arg)->media_is_writable =
1543                     bd->d_rdonly ? B_FALSE : B_TRUE;
1544                 ((tg_attribute_t *)arg)->media_is_solid_state = bd->d_ssd;
1545                 ((tg_attribute_t *)arg)->media_is_rotational = B_FALSE;
1546                 return (0);
1547 
1548         default:
1549                 return (EINVAL);
1550         }
1551 }
1552 
1553 
1554 static void
1555 bd_sched(bd_t *bd)
1556 {
1557         bd_xfer_impl_t  *xi;
1558         struct buf      *bp;
1559         int             rv;
1560 
1561         mutex_enter(&bd->d_iomutex);
1562 
1563         while ((bd->d_qactive < bd->d_qsize) &&
1564             ((xi = list_remove_head(&bd->d_waitq)) != NULL)) {
1565                 bd->d_qactive++;
1566                 kstat_waitq_to_runq(bd->d_kiop);
1567                 list_insert_tail(&bd->d_runq, xi);
1568 
1569                 /*
1570                  * Submit the job to the driver.  We drop the I/O mutex
1571                  * so that we can deal with the case where the driver
1572                  * completion routine calls back into us synchronously.
1573                  */
1574 
1575                 mutex_exit(&bd->d_iomutex);
1576 
1577                 rv = xi->i_func(bd->d_private, &xi->i_public);
1578                 if (rv != 0) {
1579                         bp = xi->i_bp;
1580                         bioerror(bp, rv);
1581                         biodone(bp);
1582 
1583                         atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
1584 
1585                         mutex_enter(&bd->d_iomutex);
1586                         bd->d_qactive--;
1587                         kstat_runq_exit(bd->d_kiop);
1588                         list_remove(&bd->d_runq, xi);
1589                         bd_xfer_free(xi);
1590                 } else {
1591                         mutex_enter(&bd->d_iomutex);
1592                 }
1593         }
1594 
1595         mutex_exit(&bd->d_iomutex);
1596 }
1597 
1598 static void
1599 bd_submit(bd_t *bd, bd_xfer_impl_t *xi)
1600 {
1601         mutex_enter(&bd->d_iomutex);
1602         list_insert_tail(&bd->d_waitq, xi);
1603         kstat_waitq_enter(bd->d_kiop);
1604         mutex_exit(&bd->d_iomutex);
1605 
1606         bd_sched(bd);
1607 }
1608 
1609 static void
1610 bd_runq_exit(bd_xfer_impl_t *xi, int err)
1611 {
1612         bd_t    *bd = xi->i_bd;
1613         buf_t   *bp = xi->i_bp;
1614 
1615         mutex_enter(&bd->d_iomutex);
1616         bd->d_qactive--;
1617         kstat_runq_exit(bd->d_kiop);
1618         list_remove(&bd->d_runq, xi);
1619         mutex_exit(&bd->d_iomutex);
1620 
1621         if (err == 0) {
1622                 if (bp->b_flags & B_READ) {
1623                         bd->d_kiop->reads++;
1624                         bd->d_kiop->nread += (bp->b_bcount - xi->i_resid);
1625                 } else {
1626                         bd->d_kiop->writes++;
1627                         bd->d_kiop->nwritten += (bp->b_bcount - xi->i_resid);
1628                 }
1629         }
1630         bd_sched(bd);
1631 }
1632 
1633 static void
1634 bd_update_state(bd_t *bd)
1635 {
1636         enum    dkio_state      state = DKIO_INSERTED;
1637         boolean_t               docmlb = B_FALSE;
1638         bd_media_t              media;
1639 
1640         bzero(&media, sizeof (media));
1641 
1642         mutex_enter(&bd->d_statemutex);
1643         if (bd->d_ops.o_media_info(bd->d_private, &media) != 0) {
1644                 bd->d_numblks = 0;
1645                 state = DKIO_EJECTED;
1646                 goto done;
1647         }
1648 
1649         if ((media.m_blksize < 512) ||
1650             (!ISP2(media.m_blksize)) ||
1651             (P2PHASE(bd->d_maxxfer, media.m_blksize))) {
1652                 cmn_err(CE_WARN, "%s%d: Invalid media block size (%d)",
1653                     ddi_driver_name(bd->d_dip), ddi_get_instance(bd->d_dip),
1654                     media.m_blksize);
1655                 /*
1656                  * We can't use the media, treat it as not present.
1657                  */
1658                 state = DKIO_EJECTED;
1659                 bd->d_numblks = 0;
1660                 goto done;
1661         }
1662 
1663         if (((1U << bd->d_blkshift) != media.m_blksize) ||
1664             (bd->d_numblks != media.m_nblks)) {
1665                 /* Device size changed */
1666                 docmlb = B_TRUE;
1667         }
1668 
1669         bd->d_blkshift = ddi_ffs(media.m_blksize) - 1;
1670         bd->d_pblkshift = bd->d_blkshift;
1671         bd->d_numblks = media.m_nblks;
1672         bd->d_rdonly = media.m_readonly;
1673         bd->d_ssd = media.m_solidstate;
1674 
1675         /*
1676          * Only use the supplied physical block size if it is non-zero,
1677          * greater or equal to the block size, and a power of 2. Ignore it
1678          * if not, it's just informational and we can still use the media.
1679          */
1680         if ((media.m_pblksize != 0) &&
1681             (media.m_pblksize >= media.m_blksize) &&
1682             (ISP2(media.m_pblksize)))
1683                 bd->d_pblkshift = ddi_ffs(media.m_pblksize) - 1;
1684 
1685 done:
1686         if (state != bd->d_state) {
1687                 bd->d_state = state;
1688                 cv_broadcast(&bd->d_statecv);
1689                 docmlb = B_TRUE;
1690         }
1691         mutex_exit(&bd->d_statemutex);
1692 
1693         bd->d_kerr->bd_capacity.value.ui64 = bd->d_numblks << bd->d_blkshift;
1694 
1695         if (docmlb) {
1696                 if (state == DKIO_INSERTED) {
1697                         (void) cmlb_validate(bd->d_cmlbh, 0, 0);
1698                 } else {
1699                         cmlb_invalidate(bd->d_cmlbh, 0);
1700                 }
1701         }
1702 }
1703 
1704 static int
1705 bd_check_state(bd_t *bd, enum dkio_state *state)
1706 {
1707         clock_t         when;
1708 
1709         for (;;) {
1710 
1711                 bd_update_state(bd);
1712 
1713                 mutex_enter(&bd->d_statemutex);
1714 
1715                 if (bd->d_state != *state) {
1716                         *state = bd->d_state;
1717                         mutex_exit(&bd->d_statemutex);
1718                         break;
1719                 }
1720 
1721                 when = drv_usectohz(1000000);
1722                 if (cv_reltimedwait_sig(&bd->d_statecv, &bd->d_statemutex,
1723                     when, TR_CLOCK_TICK) == 0) {
1724                         mutex_exit(&bd->d_statemutex);
1725                         return (EINTR);
1726                 }
1727 
1728                 mutex_exit(&bd->d_statemutex);
1729         }
1730 
1731         return (0);
1732 }
1733 
1734 static int
1735 bd_flush_write_cache_done(struct buf *bp)
1736 {
1737         struct dk_callback *dc = (void *)bp->b_private;
1738 
1739         (*dc->dkc_callback)(dc->dkc_cookie, geterror(bp));
1740         kmem_free(dc, sizeof (*dc));
1741         freerbuf(bp);
1742         return (0);
1743 }
1744 
1745 static int
1746 bd_flush_write_cache(bd_t *bd, struct dk_callback *dkc)
1747 {
1748         buf_t                   *bp;
1749         struct dk_callback      *dc;
1750         bd_xfer_impl_t          *xi;
1751         int                     rv;
1752 
1753         if (bd->d_ops.o_sync_cache == NULL) {
1754                 return (ENOTSUP);
1755         }
1756         if ((bp = getrbuf(KM_SLEEP)) == NULL) {
1757                 return (ENOMEM);
1758         }
1759         bp->b_resid = 0;
1760         bp->b_bcount = 0;
1761 
1762         xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_sync_cache, KM_SLEEP);
1763         if (xi == NULL) {
1764                 rv = geterror(bp);
1765                 freerbuf(bp);
1766                 return (rv);
1767         }
1768 
1769         /* Make an asynchronous flush, but only if there is a callback */
1770         if (dkc != NULL && dkc->dkc_callback != NULL) {
1771                 /* Make a private copy of the callback structure */
1772                 dc = kmem_alloc(sizeof (*dc), KM_SLEEP);
1773                 *dc = *dkc;
1774                 bp->b_private = dc;
1775                 bp->b_iodone = bd_flush_write_cache_done;
1776 
1777                 bd_submit(bd, xi);
1778                 return (0);
1779         }
1780 
1781         /* In case there is no callback, perform a synchronous flush */
1782         bd_submit(bd, xi);
1783         (void) biowait(bp);
1784         rv = geterror(bp);
1785         freerbuf(bp);
1786 
1787         return (rv);
1788 }
1789 
1790 static int
1791 bd_reserve(bd_t *bd, int flag)
1792 {
1793         buf_t                   *bp;
1794         bd_xfer_impl_t          *xi;
1795         int                     rv;
1796 
1797         if (bd->d_ops.o_reserve == NULL) {
1798                 return (ENOTSUP);
1799         }
1800         if ((bp = getrbuf(KM_SLEEP)) == NULL) {
1801                 return (ENOMEM);
1802         }
1803         bp->b_resid = 0;
1804         bp->b_bcount = 0;
1805 
1806         xi = bd_xfer_alloc(bd, bp, bd->d_ops.o_reserve, KM_SLEEP);
1807         if (xi == NULL) {
1808                 rv = geterror(bp);
1809                 freerbuf(bp);
1810                 return (rv);
1811         }
1812 
1813         xi->i_flags = flag;
1814 
1815         bd_submit(bd, xi);
1816 
1817         /* wait synchronously */
1818         (void) biowait(bp);
1819         rv = geterror(bp);
1820         freerbuf(bp);
1821 
1822         return (rv);
1823 }
1824 
1825 /*
1826  * Nexus support.
1827  */
1828 int
1829 bd_bus_ctl(dev_info_t *dip, dev_info_t *rdip, ddi_ctl_enum_t ctlop,
1830     void *arg, void *result)
1831 {
1832         bd_handle_t     hdl;
1833 
1834         switch (ctlop) {
1835         case DDI_CTLOPS_REPORTDEV:
1836                 cmn_err(CE_CONT, "?Block device: %s@%s, %s%d\n",
1837                     ddi_node_name(rdip), ddi_get_name_addr(rdip),
1838                     ddi_driver_name(rdip), ddi_get_instance(rdip));
1839                 return (DDI_SUCCESS);
1840 
1841         case DDI_CTLOPS_INITCHILD:
1842                 hdl = ddi_get_parent_data((dev_info_t *)arg);
1843                 if (hdl == NULL) {
1844                         return (DDI_NOT_WELL_FORMED);
1845                 }
1846                 ddi_set_name_addr((dev_info_t *)arg, hdl->h_addr);
1847                 return (DDI_SUCCESS);
1848 
1849         case DDI_CTLOPS_UNINITCHILD:
1850                 ddi_set_name_addr((dev_info_t *)arg, NULL);
1851                 ndi_prop_remove_all((dev_info_t *)arg);
1852                 return (DDI_SUCCESS);
1853 
1854         default:
1855                 return (ddi_ctlops(dip, rdip, ctlop, arg, result));
1856         }
1857 }
1858 
1859 /*
1860  * Functions for device drivers.
1861  */
1862 bd_handle_t
1863 bd_alloc_handle(void *private, bd_ops_t *ops, ddi_dma_attr_t *dma, int kmflag)
1864 {
1865         bd_handle_t     hdl;
1866 
1867         hdl = kmem_zalloc(sizeof (*hdl), kmflag);
1868         if (hdl != NULL) {
1869                 hdl->h_ops = *ops;
1870                 hdl->h_dma = dma;
1871                 hdl->h_private = private;
1872         }
1873 
1874         return (hdl);
1875 }
1876 
1877 void
1878 bd_free_handle(bd_handle_t hdl)
1879 {
1880         kmem_free(hdl, sizeof (*hdl));
1881 }
1882 
1883 int
1884 bd_attach_handle(dev_info_t *dip, bd_handle_t hdl)
1885 {
1886         dev_info_t      *child;
1887         bd_drive_t      drive = { 0 };
1888 
1889         /*
1890          * It's not an error if bd_attach_handle() is called on a handle that
1891          * already is attached. We just ignore the request to attach and return.
1892          * This way drivers using blkdev don't have to keep track about blkdev
1893          * state, they can just call this function to make sure it attached.
1894          */
1895         if (hdl->h_child != NULL) {
1896                 return (DDI_SUCCESS);
1897         }
1898 
1899         /* if drivers don't override this, make it assume none */
1900         drive.d_lun = -1;
1901         hdl->h_ops.o_drive_info(hdl->h_private, &drive);
1902 
1903         hdl->h_parent = dip;
1904         hdl->h_name = "blkdev";
1905 
1906         /*LINTED: E_BAD_PTR_CAST_ALIGN*/
1907         if (*(uint64_t *)drive.d_eui64 != 0) {
1908                 if (drive.d_lun >= 0) {
1909                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1910                             "w%02X%02X%02X%02X%02X%02X%02X%02X,%X",
1911                             drive.d_eui64[0], drive.d_eui64[1],
1912                             drive.d_eui64[2], drive.d_eui64[3],
1913                             drive.d_eui64[4], drive.d_eui64[5],
1914                             drive.d_eui64[6], drive.d_eui64[7], drive.d_lun);
1915                 } else {
1916                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1917                             "w%02X%02X%02X%02X%02X%02X%02X%02X",
1918                             drive.d_eui64[0], drive.d_eui64[1],
1919                             drive.d_eui64[2], drive.d_eui64[3],
1920                             drive.d_eui64[4], drive.d_eui64[5],
1921                             drive.d_eui64[6], drive.d_eui64[7]);
1922                 }
1923         } else {
1924                 if (drive.d_lun >= 0) {
1925                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1926                             "%X,%X", drive.d_target, drive.d_lun);
1927                 } else {
1928                         (void) snprintf(hdl->h_addr, sizeof (hdl->h_addr),
1929                             "%X", drive.d_target);
1930                 }
1931         }
1932 
1933         if (ndi_devi_alloc(dip, hdl->h_name, (pnode_t)DEVI_SID_NODEID,
1934             &child) != NDI_SUCCESS) {
1935                 cmn_err(CE_WARN, "%s%d: unable to allocate node %s@%s",
1936                     ddi_driver_name(dip), ddi_get_instance(dip),
1937                     "blkdev", hdl->h_addr);
1938                 return (DDI_FAILURE);
1939         }
1940 
1941         ddi_set_parent_data(child, hdl);
1942         hdl->h_child = child;
1943 
1944         if (ndi_devi_online(child, 0) == NDI_FAILURE) {
1945                 cmn_err(CE_WARN, "%s%d: failed bringing node %s@%s online",
1946                     ddi_driver_name(dip), ddi_get_instance(dip),
1947                     hdl->h_name, hdl->h_addr);
1948                 (void) ndi_devi_free(child);
1949                 return (DDI_FAILURE);
1950         }
1951 
1952         return (DDI_SUCCESS);
1953 }
1954 
1955 int
1956 bd_detach_handle(bd_handle_t hdl)
1957 {
1958         int     circ;
1959         int     rv;
1960         char    *devnm;
1961 
1962         /*
1963          * It's not an error if bd_detach_handle() is called on a handle that
1964          * already is detached. We just ignore the request to detach and return.
1965          * This way drivers using blkdev don't have to keep track about blkdev
1966          * state, they can just call this function to make sure it detached.
1967          */
1968         if (hdl->h_child == NULL) {
1969                 return (DDI_SUCCESS);
1970         }
1971         ndi_devi_enter(hdl->h_parent, &circ);
1972         if (i_ddi_node_state(hdl->h_child) < DS_INITIALIZED) {
1973                 rv = ddi_remove_child(hdl->h_child, 0);
1974         } else {
1975                 devnm = kmem_alloc(MAXNAMELEN + 1, KM_SLEEP);
1976                 (void) ddi_deviname(hdl->h_child, devnm);
1977                 (void) devfs_clean(hdl->h_parent, devnm + 1, DV_CLEAN_FORCE);
1978                 rv = ndi_devi_unconfig_one(hdl->h_parent, devnm + 1, NULL,
1979                     NDI_DEVI_REMOVE | NDI_UNCONFIG);
1980                 kmem_free(devnm, MAXNAMELEN + 1);
1981         }
1982         if (rv == 0) {
1983                 hdl->h_child = NULL;
1984         }
1985 
1986         ndi_devi_exit(hdl->h_parent, circ);
1987         return (rv == NDI_SUCCESS ? DDI_SUCCESS : DDI_FAILURE);
1988 }
1989 
1990 void
1991 bd_xfer_done(bd_xfer_t *xfer, int err)
1992 {
1993         bd_xfer_impl_t  *xi = (void *)xfer;
1994         buf_t           *bp = xi->i_bp;
1995         int             rv = DDI_SUCCESS;
1996         bd_t            *bd = xi->i_bd;
1997         size_t          len;
1998 
1999         if (err != 0) {
2000                 bd_runq_exit(xi, err);
2001                 atomic_inc_32(&bd->d_kerr->bd_harderrs.value.ui32);
2002 
2003                 bp->b_resid += xi->i_resid;
2004                 bd_xfer_free(xi);
2005                 bioerror(bp, err);
2006                 biodone(bp);
2007                 return;
2008         }
2009 
2010         xi->i_cur_win++;
2011         xi->i_resid -= xi->i_len;
2012 
2013         if (xi->i_resid == 0) {
2014                 /* Job completed succcessfully! */
2015                 bd_runq_exit(xi, 0);
2016 
2017                 bd_xfer_free(xi);
2018                 biodone(bp);
2019                 return;
2020         }
2021 
2022         xi->i_blkno += xi->i_nblks;
2023 
2024         if (bd->d_use_dma) {
2025                 /* More transfer still pending... advance to next DMA window. */
2026                 rv = ddi_dma_getwin(xi->i_dmah, xi->i_cur_win,
2027                     &xi->i_offset, &len, &xi->i_dmac, &xi->i_ndmac);
2028         } else {
2029                 /* Advance memory window. */
2030                 xi->i_kaddr += xi->i_len;
2031                 xi->i_offset += xi->i_len;
2032                 len = min(bp->b_bcount - xi->i_offset, bd->d_maxxfer);
2033         }
2034 
2035 
2036         if ((rv != DDI_SUCCESS) ||
2037             (P2PHASE(len, (1U << xi->i_blkshift)) != 0)) {
2038                 bd_runq_exit(xi, EFAULT);
2039 
2040                 bp->b_resid += xi->i_resid;
2041                 bd_xfer_free(xi);
2042                 bioerror(bp, EFAULT);
2043                 biodone(bp);
2044                 return;
2045         }
2046         xi->i_len = len;
2047         xi->i_nblks = len >> xi->i_blkshift;
2048 
2049         /* Submit next window to hardware. */
2050         rv = xi->i_func(bd->d_private, &xi->i_public);
2051         if (rv != 0) {
2052                 bd_runq_exit(xi, rv);
2053 
2054                 atomic_inc_32(&bd->d_kerr->bd_transerrs.value.ui32);
2055 
2056                 bp->b_resid += xi->i_resid;
2057                 bd_xfer_free(xi);
2058                 bioerror(bp, rv);
2059                 biodone(bp);
2060         }
2061 }
2062 
2063 void
2064 bd_error(bd_xfer_t *xfer, int error)
2065 {
2066         bd_xfer_impl_t  *xi = (void *)xfer;
2067         bd_t            *bd = xi->i_bd;
2068 
2069         switch (error) {
2070         case BD_ERR_MEDIA:
2071                 atomic_inc_32(&bd->d_kerr->bd_rq_media_err.value.ui32);
2072                 break;
2073         case BD_ERR_NTRDY:
2074                 atomic_inc_32(&bd->d_kerr->bd_rq_ntrdy_err.value.ui32);
2075                 break;
2076         case BD_ERR_NODEV:
2077                 atomic_inc_32(&bd->d_kerr->bd_rq_nodev_err.value.ui32);
2078                 break;
2079         case BD_ERR_RECOV:
2080                 atomic_inc_32(&bd->d_kerr->bd_rq_recov_err.value.ui32);
2081                 break;
2082         case BD_ERR_ILLRQ:
2083                 atomic_inc_32(&bd->d_kerr->bd_rq_illrq_err.value.ui32);
2084                 break;
2085         case BD_ERR_PFA:
2086                 atomic_inc_32(&bd->d_kerr->bd_rq_pfa_err.value.ui32);
2087                 break;
2088         default:
2089                 cmn_err(CE_PANIC, "bd_error: unknown error type %d", error);
2090                 break;
2091         }
2092 }
2093 
2094 void
2095 bd_state_change(bd_handle_t hdl)
2096 {
2097         bd_t            *bd;
2098 
2099         if ((bd = hdl->h_bd) != NULL) {
2100                 bd_update_state(bd);
2101         }
2102 }
2103 
2104 void
2105 bd_mod_init(struct dev_ops *devops)
2106 {
2107         static struct bus_ops bd_bus_ops = {
2108                 BUSO_REV,               /* busops_rev */
2109                 nullbusmap,             /* bus_map */
2110                 NULL,                   /* bus_get_intrspec (OBSOLETE) */
2111                 NULL,                   /* bus_add_intrspec (OBSOLETE) */
2112                 NULL,                   /* bus_remove_intrspec (OBSOLETE) */
2113                 i_ddi_map_fault,        /* bus_map_fault */
2114                 NULL,                   /* bus_dma_map (OBSOLETE) */
2115                 ddi_dma_allochdl,       /* bus_dma_allochdl */
2116                 ddi_dma_freehdl,        /* bus_dma_freehdl */
2117                 ddi_dma_bindhdl,        /* bus_dma_bindhdl */
2118                 ddi_dma_unbindhdl,      /* bus_dma_unbindhdl */
2119                 ddi_dma_flush,          /* bus_dma_flush */
2120                 ddi_dma_win,            /* bus_dma_win */
2121                 ddi_dma_mctl,           /* bus_dma_ctl */
2122                 bd_bus_ctl,             /* bus_ctl */
2123                 ddi_bus_prop_op,        /* bus_prop_op */
2124                 NULL,                   /* bus_get_eventcookie */
2125                 NULL,                   /* bus_add_eventcall */
2126                 NULL,                   /* bus_remove_eventcall */
2127                 NULL,                   /* bus_post_event */
2128                 NULL,                   /* bus_intr_ctl (OBSOLETE) */
2129                 NULL,                   /* bus_config */
2130                 NULL,                   /* bus_unconfig */
2131                 NULL,                   /* bus_fm_init */
2132                 NULL,                   /* bus_fm_fini */
2133                 NULL,                   /* bus_fm_access_enter */
2134                 NULL,                   /* bus_fm_access_exit */
2135                 NULL,                   /* bus_power */
2136                 NULL,                   /* bus_intr_op */
2137         };
2138 
2139         devops->devo_bus_ops = &bd_bus_ops;
2140 
2141         /*
2142          * NB: The device driver is free to supply its own
2143          * character entry device support.
2144          */
2145 }
2146 
2147 void
2148 bd_mod_fini(struct dev_ops *devops)
2149 {
2150         devops->devo_bus_ops = NULL;
2151 }