1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright (c) 2015, Nexenta Systems, Inc. All rights reserved.
  14  * Copyright (c) 2012, Alexey Zaytsev <alexey.zaytsev@gmail.com>
  15  * Copyright 2017, Joyent Inc.
  16  */
  17 
  18 /*
  19  * VirtIO block device driver
  20  */
  21 
  22 #include <sys/modctl.h>
  23 #include <sys/blkdev.h>
  24 #include <sys/types.h>
  25 #include <sys/errno.h>
  26 #include <sys/param.h>
  27 #include <sys/stropts.h>
  28 #include <sys/stream.h>
  29 #include <sys/strsubr.h>
  30 #include <sys/kmem.h>
  31 #include <sys/conf.h>
  32 #include <sys/devops.h>
  33 #include <sys/ksynch.h>
  34 #include <sys/stat.h>
  35 #include <sys/modctl.h>
  36 #include <sys/debug.h>
  37 #include <sys/pci.h>
  38 #include <sys/sysmacros.h>
  39 #include "virtiovar.h"
  40 #include "virtioreg.h"
  41 
  42 /* Feature bits */
  43 #define VIRTIO_BLK_F_BARRIER    (1<<0)
  44 #define VIRTIO_BLK_F_SIZE_MAX   (1<<1)
  45 #define VIRTIO_BLK_F_SEG_MAX    (1<<2)
  46 #define VIRTIO_BLK_F_GEOMETRY   (1<<4)
  47 #define VIRTIO_BLK_F_RO         (1<<5)
  48 #define VIRTIO_BLK_F_BLK_SIZE   (1<<6)
  49 #define VIRTIO_BLK_F_SCSI       (1<<7)
  50 #define VIRTIO_BLK_F_FLUSH      (1<<9)
  51 #define VIRTIO_BLK_F_TOPOLOGY   (1<<10)
  52 
  53 /* Configuration registers */
  54 #define VIRTIO_BLK_CONFIG_CAPACITY      0 /* 64bit */
  55 #define VIRTIO_BLK_CONFIG_SIZE_MAX      8 /* 32bit */
  56 #define VIRTIO_BLK_CONFIG_SEG_MAX       12 /* 32bit */
  57 #define VIRTIO_BLK_CONFIG_GEOMETRY_C    16 /* 16bit */
  58 #define VIRTIO_BLK_CONFIG_GEOMETRY_H    18 /* 8bit */
  59 #define VIRTIO_BLK_CONFIG_GEOMETRY_S    19 /* 8bit */
  60 #define VIRTIO_BLK_CONFIG_BLK_SIZE      20 /* 32bit */
  61 #define VIRTIO_BLK_CONFIG_TOPO_PBEXP    24 /* 8bit */
  62 #define VIRTIO_BLK_CONFIG_TOPO_ALIGN    25 /* 8bit */
  63 #define VIRTIO_BLK_CONFIG_TOPO_MIN_SZ   26 /* 16bit */
  64 #define VIRTIO_BLK_CONFIG_TOPO_OPT_SZ   28 /* 32bit */
  65 
  66 /* Command */
  67 #define VIRTIO_BLK_T_IN                 0
  68 #define VIRTIO_BLK_T_OUT                1
  69 #define VIRTIO_BLK_T_SCSI_CMD           2
  70 #define VIRTIO_BLK_T_SCSI_CMD_OUT       3
  71 #define VIRTIO_BLK_T_FLUSH              4
  72 #define VIRTIO_BLK_T_FLUSH_OUT          5
  73 #define VIRTIO_BLK_T_GET_ID             8
  74 #define VIRTIO_BLK_T_BARRIER            0x80000000
  75 
  76 #define VIRTIO_BLK_ID_BYTES     20 /* devid */
  77 
  78 /* Statuses */
  79 #define VIRTIO_BLK_S_OK         0
  80 #define VIRTIO_BLK_S_IOERR      1
  81 #define VIRTIO_BLK_S_UNSUPP     2
  82 
  83 #define DEF_MAXINDIRECT         (128)
  84 #define DEF_MAXSECTOR           (4096)
  85 
  86 #define VIOBLK_POISON           0xdead0001dead0001
  87 
  88 /*
  89  * Static Variables.
  90  */
  91 static char vioblk_ident[] = "VirtIO block driver";
  92 
  93 /* Request header structure */
  94 struct vioblk_req_hdr {
  95         uint32_t                type;   /* VIRTIO_BLK_T_* */
  96         uint32_t                ioprio;
  97         uint64_t                sector;
  98 };
  99 
 100 struct vioblk_req {
 101         struct vioblk_req_hdr   hdr;
 102         uint8_t                 status;
 103         uint8_t                 unused[3];
 104         unsigned int            ndmac;
 105         ddi_dma_handle_t        dmah;
 106         ddi_dma_handle_t        bd_dmah;
 107         ddi_dma_cookie_t        dmac;
 108         bd_xfer_t               *xfer;
 109 };
 110 
 111 struct vioblk_stats {
 112         struct kstat_named      sts_rw_outofmemory;
 113         struct kstat_named      sts_rw_badoffset;
 114         struct kstat_named      sts_rw_queuemax;
 115         struct kstat_named      sts_rw_cookiesmax;
 116         struct kstat_named      sts_rw_cacheflush;
 117         struct kstat_named      sts_intr_queuemax;
 118         struct kstat_named      sts_intr_total;
 119         struct kstat_named      sts_io_errors;
 120         struct kstat_named      sts_unsupp_errors;
 121         struct kstat_named      sts_nxio_errors;
 122 };
 123 
 124 struct vioblk_lstats {
 125         uint64_t                rw_cacheflush;
 126         uint64_t                intr_total;
 127         unsigned int            rw_cookiesmax;
 128         unsigned int            intr_queuemax;
 129         unsigned int            io_errors;
 130         unsigned int            unsupp_errors;
 131         unsigned int            nxio_errors;
 132 };
 133 
 134 struct vioblk_softc {
 135         dev_info_t              *sc_dev; /* mirrors virtio_softc->sc_dev */
 136         struct virtio_softc     sc_virtio;
 137         struct virtqueue        *sc_vq;
 138         bd_handle_t             bd_h;
 139         struct vioblk_req       *sc_reqs;
 140         struct vioblk_stats     *ks_data;
 141         kstat_t                 *sc_intrstat;
 142         uint64_t                sc_capacity;
 143         uint64_t                sc_nblks;
 144         struct vioblk_lstats    sc_stats;
 145         short                   sc_blkflags;
 146         boolean_t               sc_in_poll_mode;
 147         boolean_t               sc_readonly;
 148         int                     sc_blk_size;
 149         int                     sc_pblk_size;
 150         int                     sc_seg_max;
 151         int                     sc_seg_size_max;
 152         kmutex_t                lock_devid;
 153         kcondvar_t              cv_devid;
 154         char                    devid[VIRTIO_BLK_ID_BYTES + 1];
 155 };
 156 
 157 static int vioblk_get_id(struct vioblk_softc *sc);
 158 
 159 static int vioblk_read(void *arg, bd_xfer_t *xfer);
 160 static int vioblk_write(void *arg, bd_xfer_t *xfer);
 161 static int vioblk_flush(void *arg, bd_xfer_t *xfer);
 162 static void vioblk_driveinfo(void *arg, bd_drive_t *drive);
 163 static int vioblk_mediainfo(void *arg, bd_media_t *media);
 164 static int vioblk_devid_init(void *, dev_info_t *, ddi_devid_t *);
 165 uint_t vioblk_int_handler(caddr_t arg1, caddr_t arg2);
 166 
 167 static bd_ops_t vioblk_ops = {
 168         BD_OPS_VERSION_0,
 169         vioblk_driveinfo,
 170         vioblk_mediainfo,
 171         vioblk_devid_init,
 172         vioblk_flush,
 173         vioblk_read,
 174         vioblk_write,
 175 };
 176 
 177 static int vioblk_quiesce(dev_info_t *);
 178 static int vioblk_attach(dev_info_t *, ddi_attach_cmd_t);
 179 static int vioblk_detach(dev_info_t *, ddi_detach_cmd_t);
 180 
 181 static struct dev_ops vioblk_dev_ops = {
 182         DEVO_REV,
 183         0,
 184         ddi_no_info,
 185         nulldev,        /* identify */
 186         nulldev,        /* probe */
 187         vioblk_attach,  /* attach */
 188         vioblk_detach,  /* detach */
 189         nodev,          /* reset */
 190         NULL,           /* cb_ops */
 191         NULL,           /* bus_ops */
 192         NULL,           /* power */
 193         vioblk_quiesce  /* quiesce */
 194 };
 195 
 196 
 197 
 198 /* Standard Module linkage initialization for a Streams driver */
 199 extern struct mod_ops mod_driverops;
 200 
 201 static struct modldrv modldrv = {
 202         &mod_driverops,             /* Type of module.  This one is a driver */
 203         vioblk_ident,    /* short description */
 204         &vioblk_dev_ops     /* driver specific ops */
 205 };
 206 
 207 static struct modlinkage modlinkage = {
 208         MODREV_1,
 209         {
 210                 (void *)&modldrv,
 211                 NULL,
 212         },
 213 };
 214 
 215 ddi_device_acc_attr_t vioblk_attr = {
 216         DDI_DEVICE_ATTR_V0,
 217         DDI_NEVERSWAP_ACC,      /* virtio is always native byte order */
 218         DDI_STORECACHING_OK_ACC,
 219         DDI_DEFAULT_ACC
 220 };
 221 
 222 /* DMA attr for the header/status blocks. */
 223 static ddi_dma_attr_t vioblk_req_dma_attr = {
 224         DMA_ATTR_V0,                    /* dma_attr version     */
 225         0,                              /* dma_attr_addr_lo     */
 226         0xFFFFFFFFFFFFFFFFull,          /* dma_attr_addr_hi     */
 227         0x00000000FFFFFFFFull,          /* dma_attr_count_max   */
 228         1,                              /* dma_attr_align       */
 229         1,                              /* dma_attr_burstsizes  */
 230         1,                              /* dma_attr_minxfer     */
 231         0xFFFFFFFFull,                  /* dma_attr_maxxfer     */
 232         0xFFFFFFFFFFFFFFFFull,          /* dma_attr_seg         */
 233         1,                              /* dma_attr_sgllen      */
 234         1,                              /* dma_attr_granular    */
 235         0,                              /* dma_attr_flags       */
 236 };
 237 
 238 /* DMA attr for the data blocks. */
 239 static ddi_dma_attr_t vioblk_bd_dma_attr = {
 240         DMA_ATTR_V0,                    /* dma_attr version     */
 241         0,                              /* dma_attr_addr_lo     */
 242         0xFFFFFFFFFFFFFFFFull,          /* dma_attr_addr_hi     */
 243         0x00000000FFFFFFFFull,          /* dma_attr_count_max   */
 244         1,                              /* dma_attr_align       */
 245         1,                              /* dma_attr_burstsizes  */
 246         1,                              /* dma_attr_minxfer     */
 247         0,                              /* dma_attr_maxxfer, set in attach */
 248         0xFFFFFFFFFFFFFFFFull,          /* dma_attr_seg         */
 249         0,                              /* dma_attr_sgllen, set in attach */
 250         1,                              /* dma_attr_granular    */
 251         0,                              /* dma_attr_flags       */
 252 };
 253 
 254 static int
 255 vioblk_rw(struct vioblk_softc *sc, bd_xfer_t *xfer, int type,
 256     uint32_t len)
 257 {
 258         struct vioblk_req *req;
 259         struct vq_entry *ve_hdr;
 260         int total_cookies, write;
 261 
 262         write = (type == VIRTIO_BLK_T_OUT ||
 263             type == VIRTIO_BLK_T_FLUSH_OUT) ? 1 : 0;
 264         total_cookies = 2;
 265 
 266         if ((xfer->x_blkno + xfer->x_nblks) > sc->sc_nblks) {
 267                 sc->ks_data->sts_rw_badoffset.value.ui64++;
 268                 return (EINVAL);
 269         }
 270 
 271         /* allocate top entry */
 272         ve_hdr = vq_alloc_entry(sc->sc_vq);
 273         if (!ve_hdr) {
 274                 sc->ks_data->sts_rw_outofmemory.value.ui64++;
 275                 return (ENOMEM);
 276         }
 277 
 278         /* getting request */
 279         req = &sc->sc_reqs[ve_hdr->qe_index];
 280         req->hdr.type = type;
 281         req->hdr.ioprio = 0;
 282         req->hdr.sector = xfer->x_blkno;
 283         req->xfer = xfer;
 284 
 285         /* Header */
 286         virtio_ve_add_indirect_buf(ve_hdr, req->dmac.dmac_laddress,
 287             sizeof (struct vioblk_req_hdr), B_TRUE);
 288 
 289         /* Payload */
 290         if (len > 0) {
 291                 virtio_ve_add_cookie(ve_hdr, xfer->x_dmah, xfer->x_dmac,
 292                     xfer->x_ndmac, write ? B_TRUE : B_FALSE);
 293                 total_cookies += xfer->x_ndmac;
 294         }
 295 
 296         /* Status */
 297         virtio_ve_add_indirect_buf(ve_hdr,
 298             req->dmac.dmac_laddress + sizeof (struct vioblk_req_hdr),
 299             sizeof (uint8_t), B_FALSE);
 300 
 301         /* sending the whole chain to the device */
 302         virtio_push_chain(ve_hdr, B_TRUE);
 303 
 304         if (sc->sc_stats.rw_cookiesmax < total_cookies)
 305                 sc->sc_stats.rw_cookiesmax = total_cookies;
 306 
 307         return (DDI_SUCCESS);
 308 }
 309 
 310 /*
 311  * Now in polling mode. Interrupts are off, so we
 312  * 1) poll for the already queued requests to complete.
 313  * 2) push our request.
 314  * 3) wait for our request to complete.
 315  */
 316 static int
 317 vioblk_rw_poll(struct vioblk_softc *sc, bd_xfer_t *xfer,
 318     int type, uint32_t len)
 319 {
 320         clock_t tmout;
 321         int ret;
 322 
 323         ASSERT(xfer->x_flags & BD_XFER_POLL);
 324 
 325         /* Prevent a hard hang. */
 326         tmout = drv_usectohz(30000000);
 327 
 328         /* Poll for an empty queue */
 329         while (vq_num_used(sc->sc_vq)) {
 330                 /* Check if any pending requests completed. */
 331                 ret = vioblk_int_handler((caddr_t)&sc->sc_virtio, NULL);
 332                 if (ret != DDI_INTR_CLAIMED) {
 333                         drv_usecwait(10);
 334                         tmout -= 10;
 335                         return (ETIMEDOUT);
 336                 }
 337         }
 338 
 339         ret = vioblk_rw(sc, xfer, type, len);
 340         if (ret)
 341                 return (ret);
 342 
 343         tmout = drv_usectohz(30000000);
 344         /* Poll for an empty queue again. */
 345         while (vq_num_used(sc->sc_vq)) {
 346                 /* Check if any pending requests completed. */
 347                 ret = vioblk_int_handler((caddr_t)&sc->sc_virtio, NULL);
 348                 if (ret != DDI_INTR_CLAIMED) {
 349                         drv_usecwait(10);
 350                         tmout -= 10;
 351                         return (ETIMEDOUT);
 352                 }
 353         }
 354 
 355         return (DDI_SUCCESS);
 356 }
 357 
 358 static int
 359 vioblk_read(void *arg, bd_xfer_t *xfer)
 360 {
 361         int ret;
 362         struct vioblk_softc *sc = (void *)arg;
 363 
 364         if (xfer->x_flags & BD_XFER_POLL) {
 365                 if (!sc->sc_in_poll_mode) {
 366                         virtio_stop_vq_intr(sc->sc_vq);
 367                         sc->sc_in_poll_mode = 1;
 368                 }
 369 
 370                 ret = vioblk_rw_poll(sc, xfer, VIRTIO_BLK_T_IN,
 371                     xfer->x_nblks * DEV_BSIZE);
 372         } else {
 373                 if (sc->sc_in_poll_mode) {
 374                         virtio_start_vq_intr(sc->sc_vq);
 375                         sc->sc_in_poll_mode = 0;
 376                 }
 377 
 378                 ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_IN,
 379                     xfer->x_nblks * DEV_BSIZE);
 380         }
 381 
 382         return (ret);
 383 }
 384 
 385 static int
 386 vioblk_write(void *arg, bd_xfer_t *xfer)
 387 {
 388         int ret;
 389         struct vioblk_softc *sc = (void *)arg;
 390 
 391         if (xfer->x_flags & BD_XFER_POLL) {
 392                 if (!sc->sc_in_poll_mode) {
 393                         virtio_stop_vq_intr(sc->sc_vq);
 394                         sc->sc_in_poll_mode = 1;
 395                 }
 396 
 397                 ret = vioblk_rw_poll(sc, xfer, VIRTIO_BLK_T_OUT,
 398                     xfer->x_nblks * DEV_BSIZE);
 399         } else {
 400                 if (sc->sc_in_poll_mode) {
 401                         virtio_start_vq_intr(sc->sc_vq);
 402                         sc->sc_in_poll_mode = 0;
 403                 }
 404 
 405                 ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_OUT,
 406                     xfer->x_nblks * DEV_BSIZE);
 407         }
 408         return (ret);
 409 }
 410 
 411 static int
 412 vioblk_flush(void *arg, bd_xfer_t *xfer)
 413 {
 414         int ret;
 415         struct vioblk_softc *sc = (void *)arg;
 416 
 417         ASSERT((xfer->x_flags & BD_XFER_POLL) == 0);
 418 
 419         ret = vioblk_rw(sc, xfer, VIRTIO_BLK_T_FLUSH_OUT,
 420             xfer->x_nblks * DEV_BSIZE);
 421 
 422         if (!ret)
 423                 sc->sc_stats.rw_cacheflush++;
 424 
 425         return (ret);
 426 }
 427 
 428 
 429 static void
 430 vioblk_driveinfo(void *arg, bd_drive_t *drive)
 431 {
 432         struct vioblk_softc *sc = (void *)arg;
 433 
 434         drive->d_qsize = sc->sc_vq->vq_num;
 435         drive->d_removable = B_FALSE;
 436         drive->d_hotpluggable = B_TRUE;
 437         drive->d_target = 0;
 438         drive->d_lun = 0;
 439 
 440         drive->d_vendor = "Virtio";
 441         drive->d_vendor_len = strlen(drive->d_vendor);
 442 
 443         drive->d_product = "Block Device";
 444         drive->d_product_len = strlen(drive->d_product);
 445 
 446         (void) vioblk_get_id(sc);
 447         drive->d_serial = sc->devid;
 448         drive->d_serial_len = strlen(drive->d_serial);
 449 
 450         drive->d_revision = "0000";
 451         drive->d_revision_len = strlen(drive->d_revision);
 452 }
 453 
 454 static int
 455 vioblk_mediainfo(void *arg, bd_media_t *media)
 456 {
 457         struct vioblk_softc *sc = (void *)arg;
 458 
 459         media->m_nblks = sc->sc_nblks;
 460         media->m_blksize = sc->sc_blk_size;
 461         media->m_readonly = sc->sc_readonly;
 462         media->m_pblksize = sc->sc_pblk_size;
 463         return (0);
 464 }
 465 
 466 static int
 467 vioblk_get_id(struct vioblk_softc *sc)
 468 {
 469         clock_t deadline;
 470         int ret;
 471         bd_xfer_t xfer;
 472 
 473         deadline = ddi_get_lbolt() + (clock_t)drv_usectohz(3 * 1000000);
 474         (void) memset(&xfer, 0, sizeof (bd_xfer_t));
 475         xfer.x_nblks = 1;
 476 
 477         ret = ddi_dma_alloc_handle(sc->sc_dev, &vioblk_bd_dma_attr,
 478             DDI_DMA_SLEEP, NULL, &xfer.x_dmah);
 479         if (ret != DDI_SUCCESS)
 480                 goto out_alloc;
 481 
 482         ret = ddi_dma_addr_bind_handle(xfer.x_dmah, NULL, (caddr_t)&sc->devid,
 483             VIRTIO_BLK_ID_BYTES, DDI_DMA_READ | DDI_DMA_CONSISTENT,
 484             DDI_DMA_SLEEP, NULL, &xfer.x_dmac, &xfer.x_ndmac);
 485         if (ret != DDI_DMA_MAPPED) {
 486                 ret = DDI_FAILURE;
 487                 goto out_map;
 488         }
 489 
 490         mutex_enter(&sc->lock_devid);
 491 
 492         ret = vioblk_rw(sc, &xfer, VIRTIO_BLK_T_GET_ID,
 493             VIRTIO_BLK_ID_BYTES);
 494         if (ret) {
 495                 mutex_exit(&sc->lock_devid);
 496                 goto out_rw;
 497         }
 498 
 499         /* wait for reply */
 500         ret = cv_timedwait(&sc->cv_devid, &sc->lock_devid, deadline);
 501         mutex_exit(&sc->lock_devid);
 502 
 503         (void) ddi_dma_unbind_handle(xfer.x_dmah);
 504         ddi_dma_free_handle(&xfer.x_dmah);
 505 
 506         /* timeout */
 507         if (ret < 0) {
 508                 dev_err(sc->sc_dev, CE_WARN,
 509                     "Cannot get devid from the device");
 510                 return (DDI_FAILURE);
 511         }
 512 
 513         return (0);
 514 
 515 out_rw:
 516         (void) ddi_dma_unbind_handle(xfer.x_dmah);
 517 out_map:
 518         ddi_dma_free_handle(&xfer.x_dmah);
 519 out_alloc:
 520         return (ret);
 521 }
 522 
 523 static int
 524 vioblk_devid_init(void *arg, dev_info_t *devinfo, ddi_devid_t *devid)
 525 {
 526         struct vioblk_softc *sc = (void *)arg;
 527         int ret;
 528 
 529         ret = vioblk_get_id(sc);
 530         if (ret != DDI_SUCCESS)
 531                 return (ret);
 532 
 533         ret = ddi_devid_init(devinfo, DEVID_ATA_SERIAL,
 534             VIRTIO_BLK_ID_BYTES, sc->devid, devid);
 535         if (ret != DDI_SUCCESS) {
 536                 dev_err(devinfo, CE_WARN, "Cannot build devid from the device");
 537                 return (ret);
 538         }
 539 
 540         dev_debug(sc->sc_dev, CE_NOTE,
 541             "devid %x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x%x",
 542             sc->devid[0], sc->devid[1], sc->devid[2], sc->devid[3],
 543             sc->devid[4], sc->devid[5], sc->devid[6], sc->devid[7],
 544             sc->devid[8], sc->devid[9], sc->devid[10], sc->devid[11],
 545             sc->devid[12], sc->devid[13], sc->devid[14], sc->devid[15],
 546             sc->devid[16], sc->devid[17], sc->devid[18], sc->devid[19]);
 547 
 548         return (0);
 549 }
 550 
 551 static void
 552 vioblk_show_features(struct vioblk_softc *sc, const char *prefix,
 553     uint32_t features)
 554 {
 555         char buf[512];
 556         char *bufp = buf;
 557         char *bufend = buf + sizeof (buf);
 558 
 559         /* LINTED E_PTRDIFF_OVERFLOW */
 560         bufp += snprintf(bufp, bufend - bufp, prefix);
 561 
 562         /* LINTED E_PTRDIFF_OVERFLOW */
 563         bufp += virtio_show_features(features, bufp, bufend - bufp);
 564 
 565 
 566         /* LINTED E_PTRDIFF_OVERFLOW */
 567         bufp += snprintf(bufp, bufend - bufp, "Vioblk ( ");
 568 
 569         if (features & VIRTIO_BLK_F_BARRIER)
 570                 /* LINTED E_PTRDIFF_OVERFLOW */
 571                 bufp += snprintf(bufp, bufend - bufp, "BARRIER ");
 572         if (features & VIRTIO_BLK_F_SIZE_MAX)
 573                 /* LINTED E_PTRDIFF_OVERFLOW */
 574                 bufp += snprintf(bufp, bufend - bufp, "SIZE_MAX ");
 575         if (features & VIRTIO_BLK_F_SEG_MAX)
 576                 /* LINTED E_PTRDIFF_OVERFLOW */
 577                 bufp += snprintf(bufp, bufend - bufp, "SEG_MAX ");
 578         if (features & VIRTIO_BLK_F_GEOMETRY)
 579                 /* LINTED E_PTRDIFF_OVERFLOW */
 580                 bufp += snprintf(bufp, bufend - bufp, "GEOMETRY ");
 581         if (features & VIRTIO_BLK_F_RO)
 582                 /* LINTED E_PTRDIFF_OVERFLOW */
 583                 bufp += snprintf(bufp, bufend - bufp, "RO ");
 584         if (features & VIRTIO_BLK_F_BLK_SIZE)
 585                 /* LINTED E_PTRDIFF_OVERFLOW */
 586                 bufp += snprintf(bufp, bufend - bufp, "BLK_SIZE ");
 587         if (features & VIRTIO_BLK_F_SCSI)
 588                 /* LINTED E_PTRDIFF_OVERFLOW */
 589                 bufp += snprintf(bufp, bufend - bufp, "SCSI ");
 590         if (features & VIRTIO_BLK_F_FLUSH)
 591                 /* LINTED E_PTRDIFF_OVERFLOW */
 592                 bufp += snprintf(bufp, bufend - bufp, "FLUSH ");
 593         if (features & VIRTIO_BLK_F_TOPOLOGY)
 594                 /* LINTED E_PTRDIFF_OVERFLOW */
 595                 bufp += snprintf(bufp, bufend - bufp, "TOPOLOGY ");
 596 
 597         /* LINTED E_PTRDIFF_OVERFLOW */
 598         bufp += snprintf(bufp, bufend - bufp, ")");
 599         *bufp = '\0';
 600 
 601         dev_debug(sc->sc_dev, CE_NOTE, "%s", buf);
 602 }
 603 
 604 static int
 605 vioblk_dev_features(struct vioblk_softc *sc)
 606 {
 607         uint32_t host_features;
 608 
 609         host_features = virtio_negotiate_features(&sc->sc_virtio,
 610             VIRTIO_BLK_F_RO |
 611             VIRTIO_BLK_F_GEOMETRY |
 612             VIRTIO_BLK_F_BLK_SIZE |
 613             VIRTIO_BLK_F_FLUSH |
 614             VIRTIO_BLK_F_TOPOLOGY |
 615             VIRTIO_BLK_F_SEG_MAX |
 616             VIRTIO_BLK_F_SIZE_MAX |
 617             VIRTIO_F_RING_INDIRECT_DESC);
 618 
 619         vioblk_show_features(sc, "Host features: ", host_features);
 620         vioblk_show_features(sc, "Negotiated features: ",
 621             sc->sc_virtio.sc_features);
 622 
 623         if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
 624                 dev_err(sc->sc_dev, CE_NOTE,
 625                     "Host does not support RING_INDIRECT_DESC, bye.");
 626                 return (DDI_FAILURE);
 627         }
 628 
 629         return (DDI_SUCCESS);
 630 }
 631 
 632 /* ARGSUSED */
 633 uint_t
 634 vioblk_int_handler(caddr_t arg1, caddr_t arg2)
 635 {
 636         struct virtio_softc *vsc = (void *)arg1;
 637         struct vioblk_softc *sc = container_of(vsc,
 638             struct vioblk_softc, sc_virtio);
 639         struct vq_entry *ve;
 640         uint32_t len;
 641         int i = 0, error;
 642 
 643         while ((ve = virtio_pull_chain(sc->sc_vq, &len))) {
 644                 struct vioblk_req *req = &sc->sc_reqs[ve->qe_index];
 645                 bd_xfer_t *xfer = req->xfer;
 646                 uint8_t status = req->status;
 647                 uint32_t type = req->hdr.type;
 648 
 649                 if (req->xfer == (void *)VIOBLK_POISON) {
 650                         dev_err(sc->sc_dev, CE_WARN, "Poisoned descriptor!");
 651                         virtio_free_chain(ve);
 652                         return (DDI_INTR_CLAIMED);
 653                 }
 654 
 655                 req->xfer = (void *) VIOBLK_POISON;
 656 
 657                 /* Note: blkdev tears down the payload mapping for us. */
 658                 virtio_free_chain(ve);
 659 
 660                 /* returning payload back to blkdev */
 661                 switch (status) {
 662                         case VIRTIO_BLK_S_OK:
 663                                 error = 0;
 664                                 break;
 665                         case VIRTIO_BLK_S_IOERR:
 666                                 error = EIO;
 667                                 sc->sc_stats.io_errors++;
 668                                 break;
 669                         case VIRTIO_BLK_S_UNSUPP:
 670                                 sc->sc_stats.unsupp_errors++;
 671                                 error = ENOTTY;
 672                                 break;
 673                         default:
 674                                 sc->sc_stats.nxio_errors++;
 675                                 error = ENXIO;
 676                                 break;
 677                 }
 678 
 679                 if (type == VIRTIO_BLK_T_GET_ID) {
 680                         /* notify devid_init */
 681                         mutex_enter(&sc->lock_devid);
 682                         cv_broadcast(&sc->cv_devid);
 683                         mutex_exit(&sc->lock_devid);
 684                 } else
 685                         bd_xfer_done(xfer, error);
 686 
 687                 i++;
 688         }
 689 
 690         /* update stats */
 691         if (sc->sc_stats.intr_queuemax < i)
 692                 sc->sc_stats.intr_queuemax = i;
 693         sc->sc_stats.intr_total++;
 694 
 695         return (DDI_INTR_CLAIMED);
 696 }
 697 
 698 /* ARGSUSED */
 699 uint_t
 700 vioblk_config_handler(caddr_t arg1, caddr_t arg2)
 701 {
 702         return (DDI_INTR_CLAIMED);
 703 }
 704 
 705 static int
 706 vioblk_register_ints(struct vioblk_softc *sc)
 707 {
 708         int ret;
 709 
 710         struct virtio_int_handler vioblk_conf_h = {
 711                 vioblk_config_handler
 712         };
 713 
 714         struct virtio_int_handler vioblk_vq_h[] = {
 715                 { vioblk_int_handler },
 716                 { NULL },
 717         };
 718 
 719         ret = virtio_register_ints(&sc->sc_virtio,
 720             &vioblk_conf_h, vioblk_vq_h);
 721 
 722         return (ret);
 723 }
 724 
 725 static void
 726 vioblk_free_reqs(struct vioblk_softc *sc)
 727 {
 728         int i, qsize;
 729 
 730         qsize = sc->sc_vq->vq_num;
 731 
 732         for (i = 0; i < qsize; i++) {
 733                 struct vioblk_req *req = &sc->sc_reqs[i];
 734 
 735                 if (req->ndmac)
 736                         (void) ddi_dma_unbind_handle(req->dmah);
 737 
 738                 if (req->dmah)
 739                         ddi_dma_free_handle(&req->dmah);
 740         }
 741 
 742         kmem_free(sc->sc_reqs, sizeof (struct vioblk_req) * qsize);
 743 }
 744 
 745 static int
 746 vioblk_alloc_reqs(struct vioblk_softc *sc)
 747 {
 748         int i, qsize;
 749         int ret;
 750 
 751         qsize = sc->sc_vq->vq_num;
 752 
 753         sc->sc_reqs = kmem_zalloc(sizeof (struct vioblk_req) * qsize, KM_SLEEP);
 754 
 755         for (i = 0; i < qsize; i++) {
 756                 struct vioblk_req *req = &sc->sc_reqs[i];
 757 
 758                 ret = ddi_dma_alloc_handle(sc->sc_dev, &vioblk_req_dma_attr,
 759                     DDI_DMA_SLEEP, NULL, &req->dmah);
 760                 if (ret != DDI_SUCCESS) {
 761 
 762                         dev_err(sc->sc_dev, CE_WARN,
 763                             "Can't allocate dma handle for req "
 764                             "buffer %d", i);
 765                         goto exit;
 766                 }
 767 
 768                 ret = ddi_dma_addr_bind_handle(req->dmah, NULL,
 769                     (caddr_t)&req->hdr,
 770                     sizeof (struct vioblk_req_hdr) + sizeof (uint8_t),
 771                     DDI_DMA_RDWR | DDI_DMA_CONSISTENT, DDI_DMA_SLEEP,
 772                     NULL, &req->dmac, &req->ndmac);
 773                 if (ret != DDI_DMA_MAPPED) {
 774                         dev_err(sc->sc_dev, CE_WARN,
 775                             "Can't bind req buffer %d", i);
 776                         goto exit;
 777                 }
 778         }
 779 
 780         return (0);
 781 
 782 exit:
 783         vioblk_free_reqs(sc);
 784         return (ENOMEM);
 785 }
 786 
 787 
 788 static int
 789 vioblk_ksupdate(kstat_t *ksp, int rw)
 790 {
 791         struct vioblk_softc *sc = ksp->ks_private;
 792 
 793         if (rw == KSTAT_WRITE)
 794                 return (EACCES);
 795 
 796         sc->ks_data->sts_rw_cookiesmax.value.ui32 = sc->sc_stats.rw_cookiesmax;
 797         sc->ks_data->sts_intr_queuemax.value.ui32 = sc->sc_stats.intr_queuemax;
 798         sc->ks_data->sts_unsupp_errors.value.ui32 = sc->sc_stats.unsupp_errors;
 799         sc->ks_data->sts_nxio_errors.value.ui32 = sc->sc_stats.nxio_errors;
 800         sc->ks_data->sts_io_errors.value.ui32 = sc->sc_stats.io_errors;
 801         sc->ks_data->sts_rw_cacheflush.value.ui64 = sc->sc_stats.rw_cacheflush;
 802         sc->ks_data->sts_intr_total.value.ui64 = sc->sc_stats.intr_total;
 803 
 804 
 805         return (0);
 806 }
 807 
 808 static int
 809 vioblk_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
 810 {
 811         int ret = DDI_SUCCESS;
 812         int instance;
 813         struct vioblk_softc *sc;
 814         struct virtio_softc *vsc;
 815         struct vioblk_stats *ks_data;
 816 
 817         instance = ddi_get_instance(devinfo);
 818 
 819         switch (cmd) {
 820         case DDI_ATTACH:
 821                 break;
 822 
 823         case DDI_RESUME:
 824         case DDI_PM_RESUME:
 825                 dev_err(devinfo, CE_WARN, "resume not supported yet");
 826                 return (DDI_FAILURE);
 827 
 828         default:
 829                 dev_err(devinfo, CE_WARN, "cmd 0x%x not recognized", cmd);
 830                 return (DDI_FAILURE);
 831         }
 832 
 833         sc = kmem_zalloc(sizeof (struct vioblk_softc), KM_SLEEP);
 834         ddi_set_driver_private(devinfo, sc);
 835 
 836         vsc = &sc->sc_virtio;
 837 
 838         /* Duplicate for faster access / less typing */
 839         sc->sc_dev = devinfo;
 840         vsc->sc_dev = devinfo;
 841 
 842         cv_init(&sc->cv_devid, NULL, CV_DRIVER, NULL);
 843         mutex_init(&sc->lock_devid, NULL, MUTEX_DRIVER, NULL);
 844 
 845         /*
 846          * Initialize interrupt kstat.  This should not normally fail, since
 847          * we don't use a persistent stat.  We do it this way to avoid having
 848          * to test for it at run time on the hot path.
 849          */
 850         sc->sc_intrstat = kstat_create("vioblk", instance,
 851             "intrs", "controller", KSTAT_TYPE_NAMED,
 852             sizeof (struct vioblk_stats) / sizeof (kstat_named_t),
 853             KSTAT_FLAG_PERSISTENT);
 854         if (sc->sc_intrstat == NULL) {
 855                 dev_err(devinfo, CE_WARN, "kstat_create failed");
 856                 goto exit_intrstat;
 857         }
 858         ks_data = (struct vioblk_stats *)sc->sc_intrstat->ks_data;
 859         kstat_named_init(&ks_data->sts_rw_outofmemory,
 860             "total_rw_outofmemory", KSTAT_DATA_UINT64);
 861         kstat_named_init(&ks_data->sts_rw_badoffset,
 862             "total_rw_badoffset", KSTAT_DATA_UINT64);
 863         kstat_named_init(&ks_data->sts_intr_total,
 864             "total_intr", KSTAT_DATA_UINT64);
 865         kstat_named_init(&ks_data->sts_io_errors,
 866             "total_io_errors", KSTAT_DATA_UINT32);
 867         kstat_named_init(&ks_data->sts_unsupp_errors,
 868             "total_unsupp_errors", KSTAT_DATA_UINT32);
 869         kstat_named_init(&ks_data->sts_nxio_errors,
 870             "total_nxio_errors", KSTAT_DATA_UINT32);
 871         kstat_named_init(&ks_data->sts_rw_cacheflush,
 872             "total_rw_cacheflush", KSTAT_DATA_UINT64);
 873         kstat_named_init(&ks_data->sts_rw_cookiesmax,
 874             "max_rw_cookies", KSTAT_DATA_UINT32);
 875         kstat_named_init(&ks_data->sts_intr_queuemax,
 876             "max_intr_queue", KSTAT_DATA_UINT32);
 877         sc->ks_data = ks_data;
 878         sc->sc_intrstat->ks_private = sc;
 879         sc->sc_intrstat->ks_update = vioblk_ksupdate;
 880         kstat_install(sc->sc_intrstat);
 881 
 882         /* map BAR0 */
 883         ret = ddi_regs_map_setup(devinfo, 1,
 884             (caddr_t *)&sc->sc_virtio.sc_io_addr,
 885             0, 0, &vioblk_attr, &sc->sc_virtio.sc_ioh);
 886         if (ret != DDI_SUCCESS) {
 887                 dev_err(devinfo, CE_WARN, "unable to map bar0: [%d]", ret);
 888                 goto exit_map;
 889         }
 890 
 891         virtio_device_reset(&sc->sc_virtio);
 892         virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
 893         virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
 894 
 895         if (vioblk_register_ints(sc)) {
 896                 dev_err(devinfo, CE_WARN, "Unable to add interrupt");
 897                 goto exit_int;
 898         }
 899 
 900         ret = vioblk_dev_features(sc);
 901         if (ret)
 902                 goto exit_features;
 903 
 904         if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_RO)
 905                 sc->sc_readonly = B_TRUE;
 906         else
 907                 sc->sc_readonly = B_FALSE;
 908 
 909         sc->sc_capacity = virtio_read_device_config_8(&sc->sc_virtio,
 910             VIRTIO_BLK_CONFIG_CAPACITY);
 911         sc->sc_nblks = sc->sc_capacity;
 912 
 913         sc->sc_blk_size = DEV_BSIZE;
 914         if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_BLK_SIZE) {
 915                 sc->sc_blk_size = virtio_read_device_config_4(&sc->sc_virtio,
 916                     VIRTIO_BLK_CONFIG_BLK_SIZE);
 917         }
 918 
 919         sc->sc_pblk_size = sc->sc_blk_size;
 920         if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_TOPOLOGY) {
 921                 sc->sc_pblk_size <<= virtio_read_device_config_1(&sc->sc_virtio,
 922                     VIRTIO_BLK_CONFIG_TOPO_PBEXP);
 923         }
 924 
 925         /* Flushing is not supported. */
 926         if (!(sc->sc_virtio.sc_features & VIRTIO_BLK_F_FLUSH)) {
 927                 vioblk_ops.o_sync_cache = NULL;
 928         }
 929 
 930         sc->sc_seg_max = DEF_MAXINDIRECT;
 931         /* The max number of segments (cookies) in a request */
 932         if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_SEG_MAX) {
 933                 sc->sc_seg_max = virtio_read_device_config_4(&sc->sc_virtio,
 934                     VIRTIO_BLK_CONFIG_SEG_MAX);
 935 
 936                 /* That's what Linux does. */
 937                 if (!sc->sc_seg_max)
 938                         sc->sc_seg_max = 1;
 939 
 940                 /*
 941                  * SEG_MAX corresponds to the number of _data_
 942                  * blocks in a request
 943                  */
 944                 sc->sc_seg_max += 2;
 945         }
 946         /* 2 descriptors taken for header/status */
 947         vioblk_bd_dma_attr.dma_attr_sgllen = sc->sc_seg_max - 2;
 948 
 949 
 950         /* The maximum size for a cookie in a request. */
 951         sc->sc_seg_size_max = DEF_MAXSECTOR;
 952         if (sc->sc_virtio.sc_features & VIRTIO_BLK_F_SIZE_MAX) {
 953                 sc->sc_seg_size_max = virtio_read_device_config_4(
 954                     &sc->sc_virtio, VIRTIO_BLK_CONFIG_SIZE_MAX);
 955         }
 956 
 957         /* The maximum request size */
 958         vioblk_bd_dma_attr.dma_attr_maxxfer =
 959             vioblk_bd_dma_attr.dma_attr_sgllen * sc->sc_seg_size_max;
 960 
 961         dev_debug(devinfo, CE_NOTE,
 962             "nblks=%" PRIu64 " blksize=%d (%d) num_seg=%d, "
 963             "seg_size=%d, maxxfer=%" PRIu64,
 964             sc->sc_nblks, sc->sc_blk_size, sc->sc_pblk_size,
 965             vioblk_bd_dma_attr.dma_attr_sgllen,
 966             sc->sc_seg_size_max,
 967             vioblk_bd_dma_attr.dma_attr_maxxfer);
 968 
 969 
 970         sc->sc_vq = virtio_alloc_vq(&sc->sc_virtio, 0, 0,
 971             sc->sc_seg_max, "I/O request");
 972         if (sc->sc_vq == NULL) {
 973                 goto exit_alloc1;
 974         }
 975 
 976         ret = vioblk_alloc_reqs(sc);
 977         if (ret) {
 978                 goto exit_alloc2;
 979         }
 980 
 981         sc->bd_h = bd_alloc_handle(sc, &vioblk_ops, &vioblk_bd_dma_attr,
 982             KM_SLEEP);
 983 
 984 
 985         virtio_set_status(&sc->sc_virtio,
 986             VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
 987         virtio_start_vq_intr(sc->sc_vq);
 988 
 989         ret = virtio_enable_ints(&sc->sc_virtio);
 990         if (ret)
 991                 goto exit_enable_ints;
 992 
 993         ret = bd_attach_handle(devinfo, sc->bd_h);
 994         if (ret != DDI_SUCCESS) {
 995                 dev_err(devinfo, CE_WARN, "Failed to attach blkdev");
 996                 goto exit_attach_bd;
 997         }
 998 
 999         return (DDI_SUCCESS);
1000 
1001 exit_attach_bd:
1002         /*
1003          * There is no virtio_disable_ints(), it's done in virtio_release_ints.
1004          * If they ever get split, don't forget to add a call here.
1005          */
1006 exit_enable_ints:
1007         virtio_stop_vq_intr(sc->sc_vq);
1008         bd_free_handle(sc->bd_h);
1009         vioblk_free_reqs(sc);
1010 exit_alloc2:
1011         virtio_free_vq(sc->sc_vq);
1012 exit_alloc1:
1013 exit_features:
1014         virtio_release_ints(&sc->sc_virtio);
1015 exit_int:
1016         virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1017         ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1018 exit_map:
1019         kstat_delete(sc->sc_intrstat);
1020 exit_intrstat:
1021         mutex_destroy(&sc->lock_devid);
1022         cv_destroy(&sc->cv_devid);
1023         kmem_free(sc, sizeof (struct vioblk_softc));
1024         return (DDI_FAILURE);
1025 }
1026 
1027 static int
1028 vioblk_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1029 {
1030         struct vioblk_softc *sc = ddi_get_driver_private(devinfo);
1031 
1032         switch (cmd) {
1033         case DDI_DETACH:
1034                 break;
1035 
1036         case DDI_PM_SUSPEND:
1037                 cmn_err(CE_WARN, "suspend not supported yet");
1038                 return (DDI_FAILURE);
1039 
1040         default:
1041                 cmn_err(CE_WARN, "cmd 0x%x unrecognized", cmd);
1042                 return (DDI_FAILURE);
1043         }
1044 
1045         (void) bd_detach_handle(sc->bd_h);
1046         virtio_stop_vq_intr(sc->sc_vq);
1047         virtio_release_ints(&sc->sc_virtio);
1048         vioblk_free_reqs(sc);
1049         virtio_free_vq(sc->sc_vq);
1050         virtio_device_reset(&sc->sc_virtio);
1051         ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1052         kstat_delete(sc->sc_intrstat);
1053         kmem_free(sc, sizeof (struct vioblk_softc));
1054 
1055         return (DDI_SUCCESS);
1056 }
1057 
1058 static int
1059 vioblk_quiesce(dev_info_t *devinfo)
1060 {
1061         struct vioblk_softc *sc = ddi_get_driver_private(devinfo);
1062 
1063         virtio_stop_vq_intr(sc->sc_vq);
1064         virtio_device_reset(&sc->sc_virtio);
1065 
1066         return (DDI_SUCCESS);
1067 }
1068 
1069 int
1070 _init(void)
1071 {
1072         int rv;
1073 
1074         bd_mod_init(&vioblk_dev_ops);
1075 
1076         if ((rv = mod_install(&modlinkage)) != 0) {
1077                 bd_mod_fini(&vioblk_dev_ops);
1078         }
1079 
1080         return (rv);
1081 }
1082 
1083 int
1084 _fini(void)
1085 {
1086         int rv;
1087 
1088         if ((rv = mod_remove(&modlinkage)) == 0) {
1089                 bd_mod_fini(&vioblk_dev_ops);
1090         }
1091 
1092         return (rv);
1093 }
1094 
1095 int
1096 _info(struct modinfo *modinfop)
1097 {
1098         return (mod_info(&modlinkage, modinfop));
1099 }