1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2013 Nexenta Inc.  All rights reserved.
  14  * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
  15  */
  16 
  17 /* Based on the NetBSD virtio driver by Minoura Makoto. */
  18 /*
  19  * Copyright (c) 2010 Minoura Makoto.
  20  * All rights reserved.
  21  *
  22  * Redistribution and use in source and binary forms, with or without
  23  * modification, are permitted provided that the following conditions
  24  * are met:
  25  * 1. Redistributions of source code must retain the above copyright
  26  *    notice, this list of conditions and the following disclaimer.
  27  * 2. Redistributions in binary form must reproduce the above copyright
  28  *    notice, this list of conditions and the following disclaimer in the
  29  *    documentation and/or other materials provided with the distribution.
  30  *
  31  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  32  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  34  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
  35  * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
  36  * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
  37  * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
  38  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
  39  * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
  40  * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
  41  */
  42 
  43 #include <sys/types.h>
  44 #include <sys/errno.h>
  45 #include <sys/param.h>
  46 #include <sys/stropts.h>
  47 #include <sys/stream.h>
  48 #include <sys/strsubr.h>
  49 #include <sys/kmem.h>
  50 #include <sys/conf.h>
  51 #include <sys/devops.h>
  52 #include <sys/ksynch.h>
  53 #include <sys/stat.h>
  54 #include <sys/modctl.h>
  55 #include <sys/debug.h>
  56 #include <sys/pci.h>
  57 #include <sys/ethernet.h>
  58 
  59 #define VLAN_TAGSZ 4
  60 
  61 #include <sys/dlpi.h>
  62 #include <sys/taskq.h>
  63 #include <sys/cyclic.h>
  64 
  65 #include <sys/pattr.h>
  66 #include <sys/strsun.h>
  67 
  68 #include <sys/random.h>
  69 #include <sys/sysmacros.h>
  70 #include <sys/stream.h>
  71 
  72 #include <sys/mac.h>
  73 #include <sys/mac_provider.h>
  74 #include <sys/mac_ether.h>
  75 
  76 #include "virtiovar.h"
  77 #include "virtioreg.h"
  78 
  79 #if !defined(__packed)
  80 #define __packed __attribute__((packed))
  81 #endif /* __packed */
  82 
  83 /* Configuration registers */
  84 #define VIRTIO_NET_CONFIG_MAC           0 /* 8bit x 6byte */
  85 #define VIRTIO_NET_CONFIG_STATUS        6 /* 16bit */
  86 
  87 /* Feature bits */
  88 #define VIRTIO_NET_F_CSUM       (1 << 0) /* Host handles pkts w/ partial csum */
  89 #define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* Guest handles pkts w/ part csum */
  90 #define VIRTIO_NET_F_MAC        (1 << 5) /* Host has given MAC address. */
  91 #define VIRTIO_NET_F_GSO        (1 << 6) /* Host handles pkts w/ any GSO type */
  92 #define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* Guest can handle TSOv4 in. */
  93 #define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* Guest can handle TSOv6 in. */
  94 #define VIRTIO_NET_F_GUEST_ECN  (1 << 9) /* Guest can handle TSO[6] w/ ECN in */
  95 #define VIRTIO_NET_F_GUEST_UFO  (1 << 10) /* Guest can handle UFO in. */
  96 #define VIRTIO_NET_F_HOST_TSO4  (1 << 11) /* Host can handle TSOv4 in. */
  97 #define VIRTIO_NET_F_HOST_TSO6  (1 << 12) /* Host can handle TSOv6 in. */
  98 #define VIRTIO_NET_F_HOST_ECN   (1 << 13) /* Host can handle TSO[6] w/ ECN in */
  99 #define VIRTIO_NET_F_HOST_UFO   (1 << 14) /* Host can handle UFO in. */
 100 #define VIRTIO_NET_F_MRG_RXBUF  (1 << 15) /* Host can merge receive buffers. */
 101 #define VIRTIO_NET_F_STATUS     (1 << 16) /* Config.status available */
 102 #define VIRTIO_NET_F_CTRL_VQ    (1 << 17) /* Control channel available */
 103 #define VIRTIO_NET_F_CTRL_RX    (1 << 18) /* Control channel RX mode support */
 104 #define VIRTIO_NET_F_CTRL_VLAN  (1 << 19) /* Control channel VLAN filtering */
 105 #define VIRTIO_NET_F_CTRL_RX_EXTRA (1 << 20) /* Extra RX mode control support */
 106 
 107 /* Status */
 108 #define VIRTIO_NET_S_LINK_UP    1
 109 
 110 /* Packet header structure */
 111 struct virtio_net_hdr {
 112         uint8_t         flags;
 113         uint8_t         gso_type;
 114         uint16_t        hdr_len;
 115         uint16_t        gso_size;
 116         uint16_t        csum_start;
 117         uint16_t        csum_offset;
 118 };
 119 
 120 #define VIRTIO_NET_HDR_F_NEEDS_CSUM     1 /* flags */
 121 #define VIRTIO_NET_HDR_GSO_NONE         0 /* gso_type */
 122 #define VIRTIO_NET_HDR_GSO_TCPV4        1 /* gso_type */
 123 #define VIRTIO_NET_HDR_GSO_UDP          3 /* gso_type */
 124 #define VIRTIO_NET_HDR_GSO_TCPV6        4 /* gso_type */
 125 #define VIRTIO_NET_HDR_GSO_ECN          0x80 /* gso_type, |'ed */
 126 
 127 
 128 /* Control virtqueue */
 129 struct virtio_net_ctrl_cmd {
 130         uint8_t class;
 131         uint8_t command;
 132 } __packed;
 133 
 134 #define VIRTIO_NET_CTRL_RX              0
 135 #define VIRTIO_NET_CTRL_RX_PROMISC      0
 136 #define VIRTIO_NET_CTRL_RX_ALLMULTI     1
 137 
 138 #define VIRTIO_NET_CTRL_MAC             1
 139 #define VIRTIO_NET_CTRL_MAC_TABLE_SET   0
 140 
 141 #define VIRTIO_NET_CTRL_VLAN            2
 142 #define VIRTIO_NET_CTRL_VLAN_ADD        0
 143 #define VIRTIO_NET_CTRL_VLAN_DEL        1
 144 
 145 struct virtio_net_ctrl_status {
 146         uint8_t ack;
 147 } __packed;
 148 
 149 struct virtio_net_ctrl_rx {
 150         uint8_t onoff;
 151 } __packed;
 152 
 153 struct virtio_net_ctrl_mac_tbl {
 154         uint32_t nentries;
 155         uint8_t macs[][ETHERADDRL];
 156 } __packed;
 157 
 158 struct virtio_net_ctrl_vlan {
 159         uint16_t id;
 160 } __packed;
 161 
 162 static int vioif_quiesce(dev_info_t *);
 163 static int vioif_attach(dev_info_t *, ddi_attach_cmd_t);
 164 static int vioif_detach(dev_info_t *, ddi_detach_cmd_t);
 165 
 166 DDI_DEFINE_STREAM_OPS(vioif_ops,
 167         nulldev,                /* identify */
 168         nulldev,                /* probe */
 169         vioif_attach,           /* attach */
 170         vioif_detach,           /* detach */
 171         nodev,                  /* reset */
 172         NULL,                   /* cb_ops */
 173         D_MP,                   /* bus_ops */
 174         NULL,                   /* power */
 175         vioif_quiesce           /* quiesce */
 176 );
 177 
 178 static char vioif_ident[] = "VirtIO ethernet driver";
 179 
 180 /* Standard Module linkage initialization for a Streams driver */
 181 extern struct mod_ops mod_driverops;
 182 
 183 static struct modldrv modldrv = {
 184         &mod_driverops,             /* Type of module.  This one is a driver */
 185         vioif_ident,            /* short description */
 186         &vioif_ops          /* driver specific ops */
 187 };
 188 
 189 static struct modlinkage modlinkage = {
 190         MODREV_1,
 191         {
 192                 (void *)&modldrv,
 193                 NULL,
 194         },
 195 };
 196 
 197 ddi_device_acc_attr_t vioif_attr = {
 198         DDI_DEVICE_ATTR_V0,
 199         DDI_NEVERSWAP_ACC,      /* virtio is always native byte order */
 200         DDI_STORECACHING_OK_ACC,
 201         DDI_DEFAULT_ACC
 202 };
 203 
 204 /*
 205  * A mapping represents a binding for a single buffer that is contiguous in the
 206  * virtual address space.
 207  */
 208 struct vioif_buf_mapping {
 209         caddr_t                 vbm_buf;
 210         ddi_dma_handle_t        vbm_dmah;
 211         ddi_acc_handle_t        vbm_acch;
 212         ddi_dma_cookie_t        vbm_dmac;
 213         unsigned int            vbm_ncookies;
 214 };
 215 
 216 /*
 217  * Rx buffers can be loaned upstream, so the code has
 218  * to allocate them dynamically.
 219  */
 220 struct vioif_rx_buf {
 221         struct vioif_softc      *rb_sc;
 222         frtn_t                  rb_frtn;
 223 
 224         struct vioif_buf_mapping rb_mapping;
 225 };
 226 
 227 /*
 228  * Tx buffers have two mapping types. One, "inline", is pre-allocated and is
 229  * used to hold the virtio_net_header. Small packets also get copied there, as
 230  * it's faster then mapping them. Bigger packets get mapped using the "external"
 231  * mapping array. An array is used, because a packet may consist of muptiple
 232  * fragments, so each fragment gets bound to an entry. According to my
 233  * observations, the number of fragments does not exceed 2, but just in case,
 234  * a bigger, up to VIOIF_INDIRECT_MAX - 1 array is allocated. To save resources,
 235  * the dma handles are allocated lazily in the tx path.
 236  */
 237 struct vioif_tx_buf {
 238         mblk_t                  *tb_mp;
 239 
 240         /* inline buffer */
 241         struct vioif_buf_mapping tb_inline_mapping;
 242 
 243         /* External buffers */
 244         struct vioif_buf_mapping *tb_external_mapping;
 245         unsigned int            tb_external_num;
 246 };
 247 
 248 struct vioif_softc {
 249         dev_info_t              *sc_dev; /* mirrors virtio_softc->sc_dev */
 250         struct virtio_softc     sc_virtio;
 251 
 252         mac_handle_t sc_mac_handle;
 253         mac_register_t *sc_macp;
 254 
 255         struct virtqueue        *sc_rx_vq;
 256         struct virtqueue        *sc_tx_vq;
 257         struct virtqueue        *sc_ctrl_vq;
 258 
 259         unsigned int            sc_tx_stopped:1;
 260 
 261         /* Feature bits. */
 262         unsigned int            sc_rx_csum:1;
 263         unsigned int            sc_tx_csum:1;
 264         unsigned int            sc_tx_tso4:1;
 265 
 266         int                     sc_mtu;
 267         uint8_t                 sc_mac[ETHERADDRL];
 268         /*
 269          * For rx buffers, we keep a pointer array, because the buffers
 270          * can be loaned upstream, and we have to repopulate the array with
 271          * new members.
 272          */
 273         struct vioif_rx_buf     **sc_rxbufs;
 274 
 275         /*
 276          * For tx, we just allocate an array of buffers. The packet can
 277          * either be copied into the inline buffer, or the external mapping
 278          * could be used to map the packet
 279          */
 280         struct vioif_tx_buf     *sc_txbufs;
 281 
 282         kstat_t                 *sc_intrstat;
 283         /*
 284          * We "loan" rx buffers upstream and reuse them after they are
 285          * freed. This lets us avoid allocations in the hot path.
 286          */
 287         kmem_cache_t            *sc_rxbuf_cache;
 288         ulong_t                 sc_rxloan;
 289 
 290         /* Copying small packets turns out to be faster then mapping them. */
 291         unsigned long           sc_rxcopy_thresh;
 292         unsigned long           sc_txcopy_thresh;
 293         /* Some statistic coming here */
 294         uint64_t                sc_ipackets;
 295         uint64_t                sc_opackets;
 296         uint64_t                sc_rbytes;
 297         uint64_t                sc_obytes;
 298         uint64_t                sc_brdcstxmt;
 299         uint64_t                sc_brdcstrcv;
 300         uint64_t                sc_multixmt;
 301         uint64_t                sc_multircv;
 302         uint64_t                sc_norecvbuf;
 303         uint64_t                sc_notxbuf;
 304         uint64_t                sc_ierrors;
 305         uint64_t                sc_oerrors;
 306 };
 307 
 308 #define ETHER_HEADER_LEN                sizeof (struct ether_header)
 309 
 310 /* MTU + the ethernet header. */
 311 #define MAX_PAYLOAD     65535
 312 #define MAX_MTU         (MAX_PAYLOAD - ETHER_HEADER_LEN)
 313 #define DEFAULT_MTU     ETHERMTU
 314 
 315 /*
 316  * Yeah, we spend 8M per device. Turns out, there is no point
 317  * being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF),
 318  * because vhost does not support them, and we expect to be used with
 319  * vhost in production environment.
 320  */
 321 /* The buffer keeps both the packet data and the virtio_net_header. */
 322 #define VIOIF_RX_SIZE (MAX_PAYLOAD + sizeof (struct virtio_net_hdr))
 323 
 324 /*
 325  * We win a bit on header alignment, but the host wins a lot
 326  * more on moving aligned buffers. Might need more thought.
 327  */
 328 #define VIOIF_IP_ALIGN 0
 329 
 330 /* Maximum number of indirect descriptors, somewhat arbitrary. */
 331 #define VIOIF_INDIRECT_MAX 128
 332 
 333 /*
 334  * We pre-allocate a reasonably large buffer to copy small packets
 335  * there. Bigger packets are mapped, packets with multiple
 336  * cookies are mapped as indirect buffers.
 337  */
 338 #define VIOIF_TX_INLINE_SIZE 2048
 339 
 340 /* Native queue size for all queues */
 341 #define VIOIF_RX_QLEN 0
 342 #define VIOIF_TX_QLEN 0
 343 #define VIOIF_CTRL_QLEN 0
 344 
 345 static uchar_t vioif_broadcast[ETHERADDRL] = {
 346         0xff, 0xff, 0xff, 0xff, 0xff, 0xff
 347 };
 348 
 349 #define VIOIF_TX_THRESH_MAX     640
 350 #define VIOIF_RX_THRESH_MAX     640
 351 
 352 #define CACHE_NAME_SIZE 32
 353 
 354 static char vioif_txcopy_thresh[] =
 355         "vioif_txcopy_thresh";
 356 static char vioif_rxcopy_thresh[] =
 357         "vioif_rxcopy_thresh";
 358 
 359 static char *vioif_priv_props[] = {
 360         vioif_txcopy_thresh,
 361         vioif_rxcopy_thresh,
 362         NULL
 363 };
 364 
 365 /* Add up to ddi? */
 366 static ddi_dma_cookie_t *
 367 vioif_dma_curr_cookie(ddi_dma_handle_t dmah)
 368 {
 369         ddi_dma_impl_t *dmah_impl = (void *) dmah;
 370         ASSERT(dmah_impl->dmai_cookie);
 371         return (dmah_impl->dmai_cookie);
 372 }
 373 
 374 static void
 375 vioif_dma_reset_cookie(ddi_dma_handle_t dmah, ddi_dma_cookie_t *dmac)
 376 {
 377         ddi_dma_impl_t *dmah_impl = (void *) dmah;
 378         dmah_impl->dmai_cookie = dmac;
 379 }
 380 
 381 static link_state_t
 382 vioif_link_state(struct vioif_softc *sc)
 383 {
 384         if (sc->sc_virtio.sc_features & VIRTIO_NET_F_STATUS) {
 385                 if (virtio_read_device_config_2(&sc->sc_virtio,
 386                     VIRTIO_NET_CONFIG_STATUS) & VIRTIO_NET_S_LINK_UP) {
 387                         return (LINK_STATE_UP);
 388                 } else {
 389                         return (LINK_STATE_DOWN);
 390                 }
 391         }
 392 
 393         return (LINK_STATE_UP);
 394 }
 395 
 396 static ddi_dma_attr_t vioif_inline_buf_dma_attr = {
 397         DMA_ATTR_V0,            /* Version number */
 398         0,                      /* low address */
 399         0xFFFFFFFFFFFFFFFF,     /* high address */
 400         0xFFFFFFFF,             /* counter register max */
 401         1,                      /* page alignment */
 402         1,                      /* burst sizes: 1 - 32 */
 403         1,                      /* minimum transfer size */
 404         0xFFFFFFFF,             /* max transfer size */
 405         0xFFFFFFFFFFFFFFF,      /* address register max */
 406         1,                      /* scatter-gather capacity */
 407         1,                      /* device operates on bytes */
 408         0,                      /* attr flag: set to 0 */
 409 };
 410 
 411 static ddi_dma_attr_t vioif_mapped_buf_dma_attr = {
 412         DMA_ATTR_V0,            /* Version number */
 413         0,                      /* low address */
 414         0xFFFFFFFFFFFFFFFF,     /* high address */
 415         0xFFFFFFFF,             /* counter register max */
 416         1,                      /* page alignment */
 417         1,                      /* burst sizes: 1 - 32 */
 418         1,                      /* minimum transfer size */
 419         0xFFFFFFFF,             /* max transfer size */
 420         0xFFFFFFFFFFFFFFF,      /* address register max */
 421 
 422         /* One entry is used for the virtio_net_hdr on the tx path */
 423         VIOIF_INDIRECT_MAX - 1, /* scatter-gather capacity */
 424         1,                      /* device operates on bytes */
 425         0,                      /* attr flag: set to 0 */
 426 };
 427 
 428 static ddi_device_acc_attr_t vioif_bufattr = {
 429         DDI_DEVICE_ATTR_V0,
 430         DDI_NEVERSWAP_ACC,
 431         DDI_STORECACHING_OK_ACC,
 432         DDI_DEFAULT_ACC
 433 };
 434 
 435 static void
 436 vioif_rx_free(caddr_t free_arg)
 437 {
 438         struct vioif_rx_buf *buf = (void *) free_arg;
 439         struct vioif_softc *sc = buf->rb_sc;
 440 
 441         kmem_cache_free(sc->sc_rxbuf_cache, buf);
 442         atomic_dec_ulong(&sc->sc_rxloan);
 443 }
 444 
 445 static int
 446 vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
 447 {
 448         _NOTE(ARGUNUSED(kmflags));
 449         struct vioif_softc *sc = user_arg;
 450         struct vioif_rx_buf *buf = buffer;
 451         size_t len;
 452 
 453         if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
 454             DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
 455                 dev_err(sc->sc_dev, CE_WARN,
 456                     "Can't allocate dma handle for rx buffer");
 457                 goto exit_handle;
 458         }
 459 
 460         if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
 461             VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
 462             &vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
 463             NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
 464                 dev_err(sc->sc_dev, CE_WARN,
 465                     "Can't allocate rx buffer");
 466                 goto exit_alloc;
 467         }
 468         ASSERT(len >= VIOIF_RX_SIZE);
 469 
 470         if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
 471             buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
 472             DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
 473             &buf->rb_mapping.vbm_ncookies)) {
 474                 dev_err(sc->sc_dev, CE_WARN, "Can't bind tx buffer");
 475 
 476                 goto exit_bind;
 477         }
 478 
 479         ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
 480 
 481         buf->rb_sc = sc;
 482         buf->rb_frtn.free_arg = (void *) buf;
 483         buf->rb_frtn.free_func = vioif_rx_free;
 484 
 485         return (0);
 486 exit_bind:
 487         ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
 488 exit_alloc:
 489         ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
 490 exit_handle:
 491 
 492         return (ENOMEM);
 493 }
 494 
 495 static void
 496 vioif_rx_destruct(void *buffer, void *user_arg)
 497 {
 498         _NOTE(ARGUNUSED(user_arg));
 499         struct vioif_rx_buf *buf = buffer;
 500 
 501         ASSERT(buf->rb_mapping.vbm_acch);
 502         ASSERT(buf->rb_mapping.vbm_acch);
 503 
 504         (void) ddi_dma_unbind_handle(buf->rb_mapping.vbm_dmah);
 505         ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
 506         ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
 507 }
 508 
 509 static void
 510 vioif_free_mems(struct vioif_softc *sc)
 511 {
 512         int i;
 513 
 514         for (i = 0; i < sc->sc_tx_vq->vq_num; i++) {
 515                 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
 516                 int j;
 517 
 518                 /* Tear down the internal mapping. */
 519 
 520                 ASSERT(buf->tb_inline_mapping.vbm_acch);
 521                 ASSERT(buf->tb_inline_mapping.vbm_dmah);
 522 
 523                 (void) ddi_dma_unbind_handle(buf->tb_inline_mapping.vbm_dmah);
 524                 ddi_dma_mem_free(&buf->tb_inline_mapping.vbm_acch);
 525                 ddi_dma_free_handle(&buf->tb_inline_mapping.vbm_dmah);
 526 
 527                 /* We should not see any in-flight buffers at this point. */
 528                 ASSERT(!buf->tb_mp);
 529 
 530                 /* Free all the dma hdnales we allocated lazily. */
 531                 for (j = 0; buf->tb_external_mapping[j].vbm_dmah; j++)
 532                         ddi_dma_free_handle(
 533                             &buf->tb_external_mapping[j].vbm_dmah);
 534                 /* Free the external mapping array. */
 535                 kmem_free(buf->tb_external_mapping,
 536                     sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1);
 537         }
 538 
 539         kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) *
 540             sc->sc_tx_vq->vq_num);
 541 
 542         for (i = 0; i < sc->sc_rx_vq->vq_num; i++) {
 543                 struct vioif_rx_buf *buf = sc->sc_rxbufs[i];
 544 
 545                 if (buf)
 546                         kmem_cache_free(sc->sc_rxbuf_cache, buf);
 547         }
 548         kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf *) *
 549             sc->sc_rx_vq->vq_num);
 550 }
 551 
 552 static int
 553 vioif_alloc_mems(struct vioif_softc *sc)
 554 {
 555         int i, txqsize, rxqsize;
 556         size_t len;
 557         unsigned int nsegments;
 558 
 559         txqsize = sc->sc_tx_vq->vq_num;
 560         rxqsize = sc->sc_rx_vq->vq_num;
 561 
 562         sc->sc_txbufs = kmem_zalloc(sizeof (struct vioif_tx_buf) * txqsize,
 563             KM_SLEEP);
 564         if (sc->sc_txbufs == NULL) {
 565                 dev_err(sc->sc_dev, CE_WARN,
 566                     "Failed to allocate the tx buffers array");
 567                 goto exit_txalloc;
 568         }
 569 
 570         /*
 571          * We don't allocate the rx vioif_bufs, just the pointers, as
 572          * rx vioif_bufs can be loaned upstream, and we don't know the
 573          * total number we need.
 574          */
 575         sc->sc_rxbufs = kmem_zalloc(sizeof (struct vioif_rx_buf *) * rxqsize,
 576             KM_SLEEP);
 577         if (sc->sc_rxbufs == NULL) {
 578                 dev_err(sc->sc_dev, CE_WARN,
 579                     "Failed to allocate the rx buffers pointer array");
 580                 goto exit_rxalloc;
 581         }
 582 
 583         for (i = 0; i < txqsize; i++) {
 584                 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
 585 
 586                 /* Allocate and bind an inline mapping. */
 587 
 588                 if (ddi_dma_alloc_handle(sc->sc_dev,
 589                     &vioif_inline_buf_dma_attr,
 590                     DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_dmah)) {
 591 
 592                         dev_err(sc->sc_dev, CE_WARN,
 593                             "Can't allocate dma handle for tx buffer %d", i);
 594                         goto exit_tx;
 595                 }
 596 
 597                 if (ddi_dma_mem_alloc(buf->tb_inline_mapping.vbm_dmah,
 598                     VIOIF_TX_INLINE_SIZE, &vioif_bufattr, DDI_DMA_STREAMING,
 599                     DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_buf,
 600                     &len, &buf->tb_inline_mapping.vbm_acch)) {
 601 
 602                         dev_err(sc->sc_dev, CE_WARN,
 603                             "Can't allocate tx buffer %d", i);
 604                         goto exit_tx;
 605                 }
 606                 ASSERT(len >= VIOIF_TX_INLINE_SIZE);
 607 
 608                 if (ddi_dma_addr_bind_handle(buf->tb_inline_mapping.vbm_dmah,
 609                     NULL, buf->tb_inline_mapping.vbm_buf, len,
 610                     DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
 611                     &buf->tb_inline_mapping.vbm_dmac, &nsegments)) {
 612 
 613                         dev_err(sc->sc_dev, CE_WARN,
 614                             "Can't bind tx buffer %d", i);
 615                         goto exit_tx;
 616                 }
 617 
 618                 /* We asked for a single segment */
 619                 ASSERT(nsegments == 1);
 620 
 621                 /*
 622                  * We allow up to VIOIF_INDIRECT_MAX - 1 external mappings.
 623                  * In reality, I don't expect more then 2-3 used, but who
 624                  * knows.
 625                  */
 626                 buf->tb_external_mapping = kmem_zalloc(
 627                     sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1,
 628                     KM_SLEEP);
 629 
 630                 /*
 631                  * The external mapping's dma handles are allocate lazily,
 632                  * as we don't expect most of them to be used..
 633                  */
 634         }
 635 
 636         return (0);
 637 
 638 exit_tx:
 639         for (i = 0; i < txqsize; i++) {
 640                 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
 641 
 642                 if (buf->tb_inline_mapping.vbm_dmah)
 643                         (void) ddi_dma_unbind_handle(
 644                             buf->tb_inline_mapping.vbm_dmah);
 645 
 646                 if (buf->tb_inline_mapping.vbm_acch)
 647                         ddi_dma_mem_free(
 648                             &buf->tb_inline_mapping.vbm_acch);
 649 
 650                 if (buf->tb_inline_mapping.vbm_dmah)
 651                         ddi_dma_free_handle(
 652                             &buf->tb_inline_mapping.vbm_dmah);
 653 
 654                 if (buf->tb_external_mapping)
 655                         kmem_free(buf->tb_external_mapping,
 656                             sizeof (struct vioif_tx_buf) *
 657                             VIOIF_INDIRECT_MAX - 1);
 658         }
 659 
 660         kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf) * rxqsize);
 661 
 662 exit_rxalloc:
 663         kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) * txqsize);
 664 exit_txalloc:
 665         return (ENOMEM);
 666 }
 667 
 668 /* ARGSUSED */
 669 int
 670 vioif_multicst(void *arg, boolean_t add, const uint8_t *macaddr)
 671 {
 672         return (DDI_SUCCESS);
 673 }
 674 
 675 /* ARGSUSED */
 676 int
 677 vioif_promisc(void *arg, boolean_t on)
 678 {
 679         return (DDI_SUCCESS);
 680 }
 681 
 682 /* ARGSUSED */
 683 int
 684 vioif_unicst(void *arg, const uint8_t *macaddr)
 685 {
 686         return (DDI_FAILURE);
 687 }
 688 
 689 
 690 static int
 691 vioif_add_rx(struct vioif_softc *sc, int kmflag)
 692 {
 693         struct vq_entry *ve;
 694         struct vioif_rx_buf *buf;
 695 
 696         ve = vq_alloc_entry(sc->sc_rx_vq);
 697         if (!ve) {
 698                 /*
 699                  * Out of free descriptors - ring already full.
 700                  * It would be better to update sc_norxdescavail
 701                  * but MAC does not ask for this info, hence we
 702                  * update sc_norecvbuf.
 703                  */
 704                 sc->sc_norecvbuf++;
 705                 goto exit_vq;
 706         }
 707         buf = sc->sc_rxbufs[ve->qe_index];
 708 
 709         if (!buf) {
 710                 /* First run, allocate the buffer. */
 711                 buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
 712                 sc->sc_rxbufs[ve->qe_index] = buf;
 713         }
 714 
 715         /* Still nothing? Bye. */
 716         if (!buf) {
 717                 dev_err(sc->sc_dev, CE_WARN, "Can't allocate rx buffer");
 718                 sc->sc_norecvbuf++;
 719                 goto exit_buf;
 720         }
 721 
 722         ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
 723 
 724         /*
 725          * For an unknown reason, the virtio_net_hdr must be placed
 726          * as a separate virtio queue entry.
 727          */
 728         virtio_ve_add_indirect_buf(ve, buf->rb_mapping.vbm_dmac.dmac_laddress,
 729             sizeof (struct virtio_net_hdr), B_FALSE);
 730 
 731         /* Add the rest of the first cookie. */
 732         virtio_ve_add_indirect_buf(ve,
 733             buf->rb_mapping.vbm_dmac.dmac_laddress +
 734             sizeof (struct virtio_net_hdr),
 735             buf->rb_mapping.vbm_dmac.dmac_size -
 736             sizeof (struct virtio_net_hdr), B_FALSE);
 737 
 738         /*
 739          * If the buffer consists of a single cookie (unlikely for a
 740          * 64-k buffer), we are done. Otherwise, add the rest of the cookies
 741          * using indirect entries.
 742          */
 743         if (buf->rb_mapping.vbm_ncookies > 1) {
 744                 ddi_dma_cookie_t *first_extra_dmac;
 745                 ddi_dma_cookie_t dmac;
 746                 first_extra_dmac =
 747                     vioif_dma_curr_cookie(buf->rb_mapping.vbm_dmah);
 748 
 749                 ddi_dma_nextcookie(buf->rb_mapping.vbm_dmah, &dmac);
 750                 virtio_ve_add_cookie(ve, buf->rb_mapping.vbm_dmah,
 751                     dmac, buf->rb_mapping.vbm_ncookies - 1, B_FALSE);
 752                 vioif_dma_reset_cookie(buf->rb_mapping.vbm_dmah,
 753                     first_extra_dmac);
 754         }
 755 
 756         virtio_push_chain(ve, B_FALSE);
 757 
 758         return (DDI_SUCCESS);
 759 
 760 exit_buf:
 761         vq_free_entry(sc->sc_rx_vq, ve);
 762 exit_vq:
 763         return (DDI_FAILURE);
 764 }
 765 
 766 static int
 767 vioif_populate_rx(struct vioif_softc *sc, int kmflag)
 768 {
 769         int i = 0;
 770         int ret;
 771 
 772         for (;;) {
 773                 ret = vioif_add_rx(sc, kmflag);
 774                 if (ret)
 775                         /*
 776                          * We could not allocate some memory. Try to work with
 777                          * what we've got.
 778                          */
 779                         break;
 780                 i++;
 781         }
 782 
 783         if (i)
 784                 virtio_sync_vq(sc->sc_rx_vq);
 785 
 786         return (i);
 787 }
 788 
 789 static int
 790 vioif_process_rx(struct vioif_softc *sc)
 791 {
 792         struct vq_entry *ve;
 793         struct vioif_rx_buf *buf;
 794         mblk_t *mp;
 795         uint32_t len;
 796         int i = 0;
 797 
 798         while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) {
 799 
 800                 buf = sc->sc_rxbufs[ve->qe_index];
 801                 ASSERT(buf);
 802 
 803                 if (len < sizeof (struct virtio_net_hdr)) {
 804                         dev_err(sc->sc_dev, CE_WARN, "RX: Cnain too small: %u",
 805                             len - (uint32_t)sizeof (struct virtio_net_hdr));
 806                         sc->sc_ierrors++;
 807                         virtio_free_chain(ve);
 808                         continue;
 809                 }
 810 
 811                 len -= sizeof (struct virtio_net_hdr);
 812                 /*
 813                  * We copy small packets that happenned to fit into a single
 814                  * cookie and reuse the buffers. For bigger ones, we loan
 815                  * the buffers upstream.
 816                  */
 817                 if (len < sc->sc_rxcopy_thresh) {
 818                         mp = allocb(len, 0);
 819                         if (!mp) {
 820                                 sc->sc_norecvbuf++;
 821                                 sc->sc_ierrors++;
 822 
 823                                 virtio_free_chain(ve);
 824                                 break;
 825                         }
 826 
 827                         bcopy((char *)buf->rb_mapping.vbm_buf +
 828                             sizeof (struct virtio_net_hdr), mp->b_rptr, len);
 829                         mp->b_wptr = mp->b_rptr + len;
 830 
 831                 } else {
 832                         mp = desballoc((unsigned char *)
 833                             buf->rb_mapping.vbm_buf +
 834                             sizeof (struct virtio_net_hdr) +
 835                             VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
 836                         if (!mp) {
 837                                 sc->sc_norecvbuf++;
 838                                 sc->sc_ierrors++;
 839 
 840                                 virtio_free_chain(ve);
 841                                 break;
 842                         }
 843                         mp->b_wptr = mp->b_rptr + len;
 844 
 845                         atomic_inc_ulong(&sc->sc_rxloan);
 846                         /*
 847                          * Buffer loaned, we will have to allocte a new one
 848                          * for this slot.
 849                          */
 850                         sc->sc_rxbufs[ve->qe_index] = NULL;
 851                 }
 852 
 853                 /*
 854                  * virtio-net does not tell us if this packet is multicast
 855                  * or broadcast, so we have to check it.
 856                  */
 857                 if (mp->b_rptr[0] & 0x1) {
 858                         if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
 859                                 sc->sc_multircv++;
 860                         else
 861                                 sc->sc_brdcstrcv++;
 862                 }
 863 
 864                 sc->sc_rbytes += len;
 865                 sc->sc_ipackets++;
 866 
 867                 virtio_free_chain(ve);
 868                 mac_rx(sc->sc_mac_handle, NULL, mp);
 869                 i++;
 870         }
 871 
 872         return (i);
 873 }
 874 
 875 static void
 876 vioif_reclaim_used_tx(struct vioif_softc *sc)
 877 {
 878         struct vq_entry *ve;
 879         struct vioif_tx_buf *buf;
 880         uint32_t len;
 881         mblk_t *mp;
 882         int i = 0;
 883 
 884         while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
 885                 /* We don't chain descriptors for tx, so don't expect any. */
 886                 ASSERT(!ve->qe_next);
 887 
 888                 buf = &sc->sc_txbufs[ve->qe_index];
 889                 mp = buf->tb_mp;
 890                 buf->tb_mp = NULL;
 891 
 892                 if (mp) {
 893                         for (i = 0; i < buf->tb_external_num; i++)
 894                                 (void) ddi_dma_unbind_handle(
 895                                     buf->tb_external_mapping[i].vbm_dmah);
 896                 }
 897 
 898                 virtio_free_chain(ve);
 899 
 900                 /* External mapping used, mp was not freed in vioif_send() */
 901                 if (mp)
 902                         freemsg(mp);
 903                 i++;
 904         }
 905 
 906         if (sc->sc_tx_stopped && i) {
 907                 sc->sc_tx_stopped = 0;
 908                 mac_tx_update(sc->sc_mac_handle);
 909         }
 910 }
 911 
 912 /* sc will be used to update stat counters. */
 913 /* ARGSUSED */
 914 static inline void
 915 vioif_tx_inline(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
 916     size_t msg_size)
 917 {
 918         struct vioif_tx_buf *buf;
 919         buf = &sc->sc_txbufs[ve->qe_index];
 920 
 921         ASSERT(buf);
 922 
 923         /* Frees mp */
 924         mcopymsg(mp, buf->tb_inline_mapping.vbm_buf +
 925             sizeof (struct virtio_net_hdr));
 926 
 927         virtio_ve_add_indirect_buf(ve,
 928             buf->tb_inline_mapping.vbm_dmac.dmac_laddress +
 929             sizeof (struct virtio_net_hdr), msg_size, B_TRUE);
 930 }
 931 
 932 static inline int
 933 vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf,
 934     int i)
 935 {
 936         int ret = DDI_SUCCESS;
 937 
 938         if (!buf->tb_external_mapping[i].vbm_dmah) {
 939                 ret = ddi_dma_alloc_handle(sc->sc_dev,
 940                     &vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
 941                     &buf->tb_external_mapping[i].vbm_dmah);
 942                 if (ret != DDI_SUCCESS) {
 943                         dev_err(sc->sc_dev, CE_WARN,
 944                             "Can't allocate dma handle for external tx buffer");
 945                 }
 946         }
 947 
 948         return (ret);
 949 }
 950 
 951 static inline int
 952 vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
 953     size_t msg_size)
 954 {
 955         _NOTE(ARGUNUSED(msg_size));
 956 
 957         struct vioif_tx_buf *buf;
 958         mblk_t *nmp;
 959         int i, j;
 960         int ret = DDI_SUCCESS;
 961 
 962         buf = &sc->sc_txbufs[ve->qe_index];
 963 
 964         ASSERT(buf);
 965 
 966         buf->tb_external_num = 0;
 967         i = 0;
 968         nmp = mp;
 969 
 970         while (nmp) {
 971                 size_t len;
 972                 ddi_dma_cookie_t dmac;
 973                 unsigned int ncookies;
 974 
 975                 len = MBLKL(nmp);
 976                 /*
 977                  * For some reason, the network stack can
 978                  * actually send us zero-length fragments.
 979                  */
 980                 if (len == 0) {
 981                         nmp = nmp->b_cont;
 982                         continue;
 983                 }
 984 
 985                 ret = vioif_tx_lazy_handle_alloc(sc, buf, i);
 986                 if (ret != DDI_SUCCESS) {
 987                         sc->sc_notxbuf++;
 988                         sc->sc_oerrors++;
 989                         goto exit_lazy_alloc;
 990                 }
 991                 ret = ddi_dma_addr_bind_handle(
 992                     buf->tb_external_mapping[i].vbm_dmah, NULL,
 993                     (caddr_t)nmp->b_rptr, len,
 994                     DDI_DMA_WRITE | DDI_DMA_STREAMING,
 995                     DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
 996 
 997                 if (ret != DDI_SUCCESS) {
 998                         sc->sc_oerrors++;
 999                         dev_err(sc->sc_dev, CE_NOTE,
1000                             "TX: Failed to bind external handle");
1001                         goto exit_bind;
1002                 }
1003 
1004                 /* Check if we still fit into the indirect table. */
1005                 if (virtio_ve_indirect_available(ve) < ncookies) {
1006                         dev_err(sc->sc_dev, CE_NOTE,
1007                             "TX: Indirect descriptor table limit reached."
1008                             " It took %d fragments.", i);
1009                         sc->sc_notxbuf++;
1010                         sc->sc_oerrors++;
1011 
1012                         ret = DDI_FAILURE;
1013                         goto exit_limit;
1014                 }
1015 
1016                 virtio_ve_add_cookie(ve, buf->tb_external_mapping[i].vbm_dmah,
1017                     dmac, ncookies, B_TRUE);
1018 
1019                 nmp = nmp->b_cont;
1020                 i++;
1021         }
1022 
1023         buf->tb_external_num = i;
1024         /* Save the mp to free it when the packet is sent. */
1025         buf->tb_mp = mp;
1026 
1027         return (DDI_SUCCESS);
1028 
1029 exit_limit:
1030 exit_bind:
1031 exit_lazy_alloc:
1032 
1033         for (j = 0; j < i; j++) {
1034                 (void) ddi_dma_unbind_handle(
1035                     buf->tb_external_mapping[j].vbm_dmah);
1036         }
1037 
1038         return (ret);
1039 }
1040 
1041 static boolean_t
1042 vioif_send(struct vioif_softc *sc, mblk_t *mp)
1043 {
1044         struct vq_entry *ve;
1045         struct vioif_tx_buf *buf;
1046         struct virtio_net_hdr *net_header = NULL;
1047         size_t msg_size = 0;
1048         uint32_t csum_start;
1049         uint32_t csum_stuff;
1050         uint32_t csum_flags;
1051         uint32_t lso_flags;
1052         uint32_t lso_mss;
1053         mblk_t *nmp;
1054         int ret;
1055         boolean_t lso_required = B_FALSE;
1056 
1057         for (nmp = mp; nmp; nmp = nmp->b_cont)
1058                 msg_size += MBLKL(nmp);
1059 
1060         if (sc->sc_tx_tso4) {
1061                 mac_lso_get(mp, &lso_mss, &lso_flags);
1062                 lso_required = (lso_flags & HW_LSO);
1063         }
1064 
1065         ve = vq_alloc_entry(sc->sc_tx_vq);
1066 
1067         if (!ve) {
1068                 sc->sc_notxbuf++;
1069                 /* Out of free descriptors - try later. */
1070                 return (B_FALSE);
1071         }
1072         buf = &sc->sc_txbufs[ve->qe_index];
1073 
1074         /* Use the inline buffer of the first entry for the virtio_net_hdr. */
1075         (void) memset(buf->tb_inline_mapping.vbm_buf, 0,
1076             sizeof (struct virtio_net_hdr));
1077 
1078         /* LINTED E_BAD_PTR_CAST_ALIGN */
1079         net_header = (struct virtio_net_hdr *)
1080             buf->tb_inline_mapping.vbm_buf;
1081 
1082         mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL,
1083             NULL, &csum_flags);
1084 
1085         /* They want us to do the TCP/UDP csum calculation. */
1086         if (csum_flags & HCK_PARTIALCKSUM) {
1087                 struct ether_header *eth_header;
1088                 int eth_hsize;
1089 
1090                 /* Did we ask for it? */
1091                 ASSERT(sc->sc_tx_csum);
1092 
1093                 /* We only asked for partial csum packets. */
1094                 ASSERT(!(csum_flags & HCK_IPV4_HDRCKSUM));
1095                 ASSERT(!(csum_flags & HCK_FULLCKSUM));
1096 
1097                 eth_header = (void *) mp->b_rptr;
1098                 if (eth_header->ether_type == htons(ETHERTYPE_VLAN)) {
1099                         eth_hsize = sizeof (struct ether_vlan_header);
1100                 } else {
1101                         eth_hsize = sizeof (struct ether_header);
1102                 }
1103                 net_header->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1104                 net_header->csum_start = eth_hsize + csum_start;
1105                 net_header->csum_offset = csum_stuff - csum_start;
1106         }
1107 
1108         /* setup LSO fields if required */
1109         if (lso_required) {
1110                 net_header->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1111                 net_header->gso_size = (uint16_t)lso_mss;
1112         }
1113 
1114         virtio_ve_add_indirect_buf(ve,
1115             buf->tb_inline_mapping.vbm_dmac.dmac_laddress,
1116             sizeof (struct virtio_net_hdr), B_TRUE);
1117 
1118         /* meanwhile update the statistic */
1119         if (mp->b_rptr[0] & 0x1) {
1120                 if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
1121                                 sc->sc_multixmt++;
1122                         else
1123                                 sc->sc_brdcstxmt++;
1124         }
1125 
1126         /*
1127          * We copy small packets into the inline buffer. The bigger ones
1128          * get mapped using the mapped buffer.
1129          */
1130         if (msg_size < sc->sc_txcopy_thresh) {
1131                 vioif_tx_inline(sc, ve, mp, msg_size);
1132         } else {
1133                 /* statistic gets updated by vioif_tx_external when fail */
1134                 ret = vioif_tx_external(sc, ve, mp, msg_size);
1135                 if (ret != DDI_SUCCESS)
1136                         goto exit_tx_external;
1137         }
1138 
1139         virtio_push_chain(ve, B_TRUE);
1140 
1141         sc->sc_opackets++;
1142         sc->sc_obytes += msg_size;
1143 
1144         return (B_TRUE);
1145 
1146 exit_tx_external:
1147 
1148         vq_free_entry(sc->sc_tx_vq, ve);
1149         /*
1150          * vioif_tx_external can fail when the buffer does not fit into the
1151          * indirect descriptor table. Free the mp. I don't expect this ever
1152          * to happen.
1153          */
1154         freemsg(mp);
1155 
1156         return (B_TRUE);
1157 }
1158 
1159 mblk_t *
1160 vioif_tx(void *arg, mblk_t *mp)
1161 {
1162         struct vioif_softc *sc = arg;
1163         mblk_t  *nmp;
1164 
1165         while (mp != NULL) {
1166                 nmp = mp->b_next;
1167                 mp->b_next = NULL;
1168 
1169                 if (!vioif_send(sc, mp)) {
1170                         sc->sc_tx_stopped = 1;
1171                         mp->b_next = nmp;
1172                         break;
1173                 }
1174                 mp = nmp;
1175         }
1176 
1177         return (mp);
1178 }
1179 
1180 int
1181 vioif_start(void *arg)
1182 {
1183         struct vioif_softc *sc = arg;
1184 
1185         mac_link_update(sc->sc_mac_handle,
1186             vioif_link_state(sc));
1187 
1188         virtio_start_vq_intr(sc->sc_rx_vq);
1189 
1190         return (DDI_SUCCESS);
1191 }
1192 
1193 void
1194 vioif_stop(void *arg)
1195 {
1196         struct vioif_softc *sc = arg;
1197 
1198         virtio_stop_vq_intr(sc->sc_rx_vq);
1199 }
1200 
1201 /* ARGSUSED */
1202 static int
1203 vioif_stat(void *arg, uint_t stat, uint64_t *val)
1204 {
1205         struct vioif_softc *sc = arg;
1206 
1207         switch (stat) {
1208         case MAC_STAT_IERRORS:
1209                 *val = sc->sc_ierrors;
1210                 break;
1211         case MAC_STAT_OERRORS:
1212                 *val = sc->sc_oerrors;
1213                 break;
1214         case MAC_STAT_MULTIRCV:
1215                 *val = sc->sc_multircv;
1216                 break;
1217         case MAC_STAT_BRDCSTRCV:
1218                 *val = sc->sc_brdcstrcv;
1219                 break;
1220         case MAC_STAT_MULTIXMT:
1221                 *val = sc->sc_multixmt;
1222                 break;
1223         case MAC_STAT_BRDCSTXMT:
1224                 *val = sc->sc_brdcstxmt;
1225                 break;
1226         case MAC_STAT_IPACKETS:
1227                 *val = sc->sc_ipackets;
1228                 break;
1229         case MAC_STAT_RBYTES:
1230                 *val = sc->sc_rbytes;
1231                 break;
1232         case MAC_STAT_OPACKETS:
1233                 *val = sc->sc_opackets;
1234                 break;
1235         case MAC_STAT_OBYTES:
1236                 *val = sc->sc_obytes;
1237                 break;
1238         case MAC_STAT_NORCVBUF:
1239                 *val = sc->sc_norecvbuf;
1240                 break;
1241         case MAC_STAT_NOXMTBUF:
1242                 *val = sc->sc_notxbuf;
1243                 break;
1244         case MAC_STAT_IFSPEED:
1245                 /* always 1 Gbit */
1246                 *val = 1000000000ULL;
1247                 break;
1248         case ETHER_STAT_LINK_DUPLEX:
1249                 /* virtual device, always full-duplex */
1250                 *val = LINK_DUPLEX_FULL;
1251                 break;
1252 
1253         default:
1254                 return (ENOTSUP);
1255         }
1256 
1257         return (DDI_SUCCESS);
1258 }
1259 
1260 static int
1261 vioif_set_prop_private(struct vioif_softc *sc, const char *pr_name,
1262     uint_t pr_valsize, const void *pr_val)
1263 {
1264         _NOTE(ARGUNUSED(pr_valsize));
1265 
1266         long result;
1267 
1268         if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1269 
1270                 if (pr_val == NULL)
1271                         return (EINVAL);
1272 
1273                 (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1274 
1275                 if (result < 0 || result > VIOIF_TX_THRESH_MAX)
1276                         return (EINVAL);
1277                 sc->sc_txcopy_thresh = result;
1278         }
1279         if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1280 
1281                 if (pr_val == NULL)
1282                         return (EINVAL);
1283 
1284                 (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1285 
1286                 if (result < 0 || result > VIOIF_RX_THRESH_MAX)
1287                         return (EINVAL);
1288                 sc->sc_rxcopy_thresh = result;
1289         }
1290         return (0);
1291 }
1292 
1293 static int
1294 vioif_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1295     uint_t pr_valsize, const void *pr_val)
1296 {
1297         struct vioif_softc *sc = arg;
1298         const uint32_t *new_mtu;
1299         int err;
1300 
1301         switch (pr_num) {
1302         case MAC_PROP_MTU:
1303                 new_mtu = pr_val;
1304 
1305                 if (*new_mtu > MAX_MTU) {
1306                         return (EINVAL);
1307                 }
1308 
1309                 err = mac_maxsdu_update(sc->sc_mac_handle, *new_mtu);
1310                 if (err) {
1311                         return (err);
1312                 }
1313                 break;
1314         case MAC_PROP_PRIVATE:
1315                 err = vioif_set_prop_private(sc, pr_name,
1316                     pr_valsize, pr_val);
1317                 if (err)
1318                         return (err);
1319                 break;
1320         default:
1321                 return (ENOTSUP);
1322         }
1323 
1324         return (0);
1325 }
1326 
1327 static int
1328 vioif_get_prop_private(struct vioif_softc *sc, const char *pr_name,
1329     uint_t pr_valsize, void *pr_val)
1330 {
1331         int err = ENOTSUP;
1332         int value;
1333 
1334         if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1335 
1336                 value = sc->sc_txcopy_thresh;
1337                 err = 0;
1338                 goto done;
1339         }
1340         if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1341 
1342                 value = sc->sc_rxcopy_thresh;
1343                 err = 0;
1344                 goto done;
1345         }
1346 done:
1347         if (err == 0) {
1348                 (void) snprintf(pr_val, pr_valsize, "%d", value);
1349         }
1350         return (err);
1351 }
1352 
1353 static int
1354 vioif_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1355     uint_t pr_valsize, void *pr_val)
1356 {
1357         struct vioif_softc *sc = arg;
1358         int err = ENOTSUP;
1359 
1360         switch (pr_num) {
1361         case MAC_PROP_PRIVATE:
1362                 err = vioif_get_prop_private(sc, pr_name,
1363                     pr_valsize, pr_val);
1364                 break;
1365         default:
1366                 break;
1367         }
1368         return (err);
1369 }
1370 
1371 static void
1372 vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1373     mac_prop_info_handle_t prh)
1374 {
1375         struct vioif_softc *sc = arg;
1376         char valstr[64];
1377         int value;
1378 
1379         switch (pr_num) {
1380         case MAC_PROP_MTU:
1381                 mac_prop_info_set_range_uint32(prh, ETHERMIN, MAX_MTU);
1382                 break;
1383 
1384         case MAC_PROP_PRIVATE:
1385                 bzero(valstr, sizeof (valstr));
1386                 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1387 
1388                         value = sc->sc_txcopy_thresh;
1389                 } else  if (strcmp(pr_name,
1390                     vioif_rxcopy_thresh) == 0) {
1391                         value = sc->sc_rxcopy_thresh;
1392                 } else {
1393                         return;
1394                 }
1395                 (void) snprintf(valstr, sizeof (valstr), "%d", value);
1396                 break;
1397 
1398         default:
1399                 break;
1400         }
1401 }
1402 
1403 static boolean_t
1404 vioif_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1405 {
1406         struct vioif_softc *sc = arg;
1407 
1408         switch (cap) {
1409         case MAC_CAPAB_HCKSUM:
1410                 if (sc->sc_tx_csum) {
1411                         uint32_t *txflags = cap_data;
1412 
1413                         *txflags = HCKSUM_INET_PARTIAL;
1414                         return (B_TRUE);
1415                 }
1416                 return (B_FALSE);
1417         case MAC_CAPAB_LSO:
1418                 if (sc->sc_tx_tso4) {
1419                         mac_capab_lso_t *cap_lso = cap_data;
1420 
1421                         cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
1422                         cap_lso->lso_basic_tcp_ipv4.lso_max = MAX_MTU;
1423                         return (B_TRUE);
1424                 }
1425                 return (B_FALSE);
1426         default:
1427                 break;
1428         }
1429         return (B_FALSE);
1430 }
1431 
1432 static mac_callbacks_t vioif_m_callbacks = {
1433         .mc_callbacks   = (MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO),
1434         .mc_getstat     = vioif_stat,
1435         .mc_start       = vioif_start,
1436         .mc_stop        = vioif_stop,
1437         .mc_setpromisc  = vioif_promisc,
1438         .mc_multicst    = vioif_multicst,
1439         .mc_unicst      = vioif_unicst,
1440         .mc_tx          = vioif_tx,
1441         /* Optional callbacks */
1442         .mc_reserved    = NULL,         /* reserved */
1443         .mc_ioctl       = NULL,         /* mc_ioctl */
1444         .mc_getcapab    = vioif_getcapab,               /* mc_getcapab */
1445         .mc_open        = NULL,         /* mc_open */
1446         .mc_close       = NULL,         /* mc_close */
1447         .mc_setprop     = vioif_setprop,
1448         .mc_getprop     = vioif_getprop,
1449         .mc_propinfo    = vioif_propinfo,
1450 };
1451 
1452 static void
1453 vioif_show_features(struct vioif_softc *sc, const char *prefix,
1454     uint32_t features)
1455 {
1456         char buf[512];
1457         char *bufp = buf;
1458         char *bufend = buf + sizeof (buf);
1459 
1460         /* LINTED E_PTRDIFF_OVERFLOW */
1461         bufp += snprintf(bufp, bufend - bufp, prefix);
1462 
1463         /* LINTED E_PTRDIFF_OVERFLOW */
1464         bufp += virtio_show_features(features, bufp, bufend - bufp);
1465 
1466         /* LINTED E_PTRDIFF_OVERFLOW */
1467         bufp += snprintf(bufp, bufend - bufp, "Vioif ( ");
1468 
1469         if (features & VIRTIO_NET_F_CSUM)
1470                 /* LINTED E_PTRDIFF_OVERFLOW */
1471                 bufp += snprintf(bufp, bufend - bufp, "CSUM ");
1472         if (features & VIRTIO_NET_F_GUEST_CSUM)
1473                 /* LINTED E_PTRDIFF_OVERFLOW */
1474                 bufp += snprintf(bufp, bufend - bufp, "GUEST_CSUM ");
1475         if (features & VIRTIO_NET_F_MAC)
1476                 /* LINTED E_PTRDIFF_OVERFLOW */
1477                 bufp += snprintf(bufp, bufend - bufp, "MAC ");
1478         if (features & VIRTIO_NET_F_GSO)
1479                 /* LINTED E_PTRDIFF_OVERFLOW */
1480                 bufp += snprintf(bufp, bufend - bufp, "GSO ");
1481         if (features & VIRTIO_NET_F_GUEST_TSO4)
1482                 /* LINTED E_PTRDIFF_OVERFLOW */
1483                 bufp += snprintf(bufp, bufend - bufp, "GUEST_TSO4 ");
1484         if (features & VIRTIO_NET_F_GUEST_TSO6)
1485                 /* LINTED E_PTRDIFF_OVERFLOW */
1486                 bufp += snprintf(bufp, bufend - bufp, "GUEST_TSO6 ");
1487         if (features & VIRTIO_NET_F_GUEST_ECN)
1488                 /* LINTED E_PTRDIFF_OVERFLOW */
1489                 bufp += snprintf(bufp, bufend - bufp, "GUEST_ECN ");
1490         if (features & VIRTIO_NET_F_GUEST_UFO)
1491                 /* LINTED E_PTRDIFF_OVERFLOW */
1492                 bufp += snprintf(bufp, bufend - bufp, "GUEST_UFO ");
1493         if (features & VIRTIO_NET_F_HOST_TSO4)
1494                 /* LINTED E_PTRDIFF_OVERFLOW */
1495                 bufp += snprintf(bufp, bufend - bufp, "HOST_TSO4 ");
1496         if (features & VIRTIO_NET_F_HOST_TSO6)
1497                 /* LINTED E_PTRDIFF_OVERFLOW */
1498                 bufp += snprintf(bufp, bufend - bufp, "HOST_TSO6 ");
1499         if (features & VIRTIO_NET_F_HOST_ECN)
1500                 /* LINTED E_PTRDIFF_OVERFLOW */
1501                 bufp += snprintf(bufp, bufend - bufp, "HOST_ECN ");
1502         if (features & VIRTIO_NET_F_HOST_UFO)
1503                 /* LINTED E_PTRDIFF_OVERFLOW */
1504                 bufp += snprintf(bufp, bufend - bufp, "HOST_UFO ");
1505         if (features & VIRTIO_NET_F_MRG_RXBUF)
1506                 /* LINTED E_PTRDIFF_OVERFLOW */
1507                 bufp += snprintf(bufp, bufend - bufp, "MRG_RXBUF ");
1508         if (features & VIRTIO_NET_F_STATUS)
1509                 /* LINTED E_PTRDIFF_OVERFLOW */
1510                 bufp += snprintf(bufp, bufend - bufp, "STATUS ");
1511         if (features & VIRTIO_NET_F_CTRL_VQ)
1512                 /* LINTED E_PTRDIFF_OVERFLOW */
1513                 bufp += snprintf(bufp, bufend - bufp, "CTRL_VQ ");
1514         if (features & VIRTIO_NET_F_CTRL_RX)
1515                 /* LINTED E_PTRDIFF_OVERFLOW */
1516                 bufp += snprintf(bufp, bufend - bufp, "CTRL_RX ");
1517         if (features & VIRTIO_NET_F_CTRL_VLAN)
1518                 /* LINTED E_PTRDIFF_OVERFLOW */
1519                 bufp += snprintf(bufp, bufend - bufp, "CTRL_VLAN ");
1520         if (features & VIRTIO_NET_F_CTRL_RX_EXTRA)
1521                 /* LINTED E_PTRDIFF_OVERFLOW */
1522                 bufp += snprintf(bufp, bufend - bufp, "CTRL_RX_EXTRA ");
1523 
1524         /* LINTED E_PTRDIFF_OVERFLOW */
1525         bufp += snprintf(bufp, bufend - bufp, ")");
1526         *bufp = '\0';
1527 
1528         dev_err(sc->sc_dev, CE_NOTE, "%s", buf);
1529 }
1530 
1531 /*
1532  * Find out which features are supported by the device and
1533  * choose which ones we wish to use.
1534  */
1535 static int
1536 vioif_dev_features(struct vioif_softc *sc)
1537 {
1538         uint32_t host_features;
1539 
1540         host_features = virtio_negotiate_features(&sc->sc_virtio,
1541             VIRTIO_NET_F_CSUM |
1542             VIRTIO_NET_F_HOST_TSO4 |
1543             VIRTIO_NET_F_HOST_ECN |
1544             VIRTIO_NET_F_MAC |
1545             VIRTIO_NET_F_STATUS |
1546             VIRTIO_F_RING_INDIRECT_DESC |
1547             VIRTIO_F_NOTIFY_ON_EMPTY);
1548 
1549         vioif_show_features(sc, "Host features: ", host_features);
1550         vioif_show_features(sc, "Negotiated features: ",
1551             sc->sc_virtio.sc_features);
1552 
1553         if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
1554                 dev_err(sc->sc_dev, CE_NOTE,
1555                     "Host does not support RING_INDIRECT_DESC, bye.");
1556                 return (DDI_FAILURE);
1557         }
1558 
1559         return (DDI_SUCCESS);
1560 }
1561 
1562 static int
1563 vioif_has_feature(struct vioif_softc *sc, uint32_t feature)
1564 {
1565         return (virtio_has_feature(&sc->sc_virtio, feature));
1566 }
1567 
1568 static void
1569 vioif_set_mac(struct vioif_softc *sc)
1570 {
1571         int i;
1572 
1573         for (i = 0; i < ETHERADDRL; i++) {
1574                 virtio_write_device_config_1(&sc->sc_virtio,
1575                     VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
1576         }
1577 }
1578 
1579 /* Get the mac address out of the hardware, or make up one. */
1580 static void
1581 vioif_get_mac(struct vioif_softc *sc)
1582 {
1583         int i;
1584         if (sc->sc_virtio.sc_features & VIRTIO_NET_F_MAC) {
1585                 for (i = 0; i < ETHERADDRL; i++) {
1586                         sc->sc_mac[i] = virtio_read_device_config_1(
1587                             &sc->sc_virtio,
1588                             VIRTIO_NET_CONFIG_MAC + i);
1589                 }
1590                 dev_err(sc->sc_dev, CE_NOTE, "Got MAC address from host: %s",
1591                     ether_sprintf((struct ether_addr *)sc->sc_mac));
1592         } else {
1593                 /* Get a few random bytes */
1594                 (void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
1595                 /* Make sure it's a unicast MAC */
1596                 sc->sc_mac[0] &= ~1;
1597                 /* Set the "locally administered" bit */
1598                 sc->sc_mac[1] |= 2;
1599 
1600                 vioif_set_mac(sc);
1601 
1602                 dev_err(sc->sc_dev, CE_NOTE,
1603                     "Generated a random MAC address: %s",
1604                     ether_sprintf((struct ether_addr *)sc->sc_mac));
1605         }
1606 }
1607 
1608 /*
1609  * Virtqueue interrupt handlers
1610  */
1611 /* ARGSUSED */
1612 uint_t
1613 vioif_rx_handler(caddr_t arg1, caddr_t arg2)
1614 {
1615         struct virtio_softc *vsc = (void *) arg1;
1616         struct vioif_softc *sc = container_of(vsc,
1617             struct vioif_softc, sc_virtio);
1618 
1619         (void) vioif_process_rx(sc);
1620 
1621         (void) vioif_populate_rx(sc, KM_NOSLEEP);
1622 
1623         return (DDI_INTR_CLAIMED);
1624 }
1625 
1626 /* ARGSUSED */
1627 uint_t
1628 vioif_tx_handler(caddr_t arg1, caddr_t arg2)
1629 {
1630         struct virtio_softc *vsc = (void *)arg1;
1631         struct vioif_softc *sc = container_of(vsc,
1632             struct vioif_softc, sc_virtio);
1633 
1634         vioif_reclaim_used_tx(sc);
1635         return (DDI_INTR_CLAIMED);
1636 }
1637 
1638 static int
1639 vioif_register_ints(struct vioif_softc *sc)
1640 {
1641         int ret;
1642 
1643         struct virtio_int_handler vioif_vq_h[] = {
1644                 { vioif_rx_handler },
1645                 { vioif_tx_handler },
1646                 { NULL }
1647         };
1648 
1649         ret = virtio_register_ints(&sc->sc_virtio, NULL, vioif_vq_h);
1650 
1651         return (ret);
1652 }
1653 
1654 
1655 static void
1656 vioif_check_features(struct vioif_softc *sc)
1657 {
1658         if (vioif_has_feature(sc, VIRTIO_NET_F_CSUM)) {
1659                 /* The GSO/GRO featured depend on CSUM, check them here. */
1660                 sc->sc_tx_csum = 1;
1661                 sc->sc_rx_csum = 1;
1662 
1663                 if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
1664                         sc->sc_rx_csum = 0;
1665                 }
1666                 cmn_err(CE_NOTE, "Csum enabled.");
1667 
1668                 if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
1669 
1670                         sc->sc_tx_tso4 = 1;
1671                         /*
1672                          * We don't seem to have a way to ask the system
1673                          * not to send us LSO packets with Explicit
1674                          * Congestion Notification bit set, so we require
1675                          * the device to support it in order to do
1676                          * LSO.
1677                          */
1678                         if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
1679                                 dev_err(sc->sc_dev, CE_NOTE,
1680                                     "TSO4 supported, but not ECN. "
1681                                     "Not using LSO.");
1682                                 sc->sc_tx_tso4 = 0;
1683                         } else {
1684                                 cmn_err(CE_NOTE, "LSO enabled");
1685                         }
1686                 }
1687         }
1688 }
1689 
1690 static int
1691 vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1692 {
1693         int ret, instance;
1694         struct vioif_softc *sc;
1695         struct virtio_softc *vsc;
1696         mac_register_t *macp;
1697         char cache_name[CACHE_NAME_SIZE];
1698 
1699         instance = ddi_get_instance(devinfo);
1700 
1701         switch (cmd) {
1702         case DDI_ATTACH:
1703                 break;
1704 
1705         case DDI_RESUME:
1706         case DDI_PM_RESUME:
1707                 /* not supported yet */
1708                 goto exit;
1709 
1710         default:
1711                 /* unrecognized command */
1712                 goto exit;
1713         }
1714 
1715         sc = kmem_zalloc(sizeof (struct vioif_softc), KM_SLEEP);
1716         ddi_set_driver_private(devinfo, sc);
1717 
1718         vsc = &sc->sc_virtio;
1719 
1720         /* Duplicate for less typing */
1721         sc->sc_dev = devinfo;
1722         vsc->sc_dev = devinfo;
1723 
1724         /*
1725          * Initialize interrupt kstat.
1726          */
1727         sc->sc_intrstat = kstat_create("vioif", instance, "intr", "controller",
1728             KSTAT_TYPE_INTR, 1, 0);
1729         if (sc->sc_intrstat == NULL) {
1730                 dev_err(devinfo, CE_WARN, "kstat_create failed");
1731                 goto exit_intrstat;
1732         }
1733         kstat_install(sc->sc_intrstat);
1734 
1735         /* map BAR 0 */
1736         ret = ddi_regs_map_setup(devinfo, 1,
1737             (caddr_t *)&sc->sc_virtio.sc_io_addr,
1738             0, 0, &vioif_attr, &sc->sc_virtio.sc_ioh);
1739         if (ret != DDI_SUCCESS) {
1740                 dev_err(devinfo, CE_WARN, "unable to map bar 0: %d", ret);
1741                 goto exit_map;
1742         }
1743 
1744         virtio_device_reset(&sc->sc_virtio);
1745         virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
1746         virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
1747 
1748         ret = vioif_dev_features(sc);
1749         if (ret)
1750                 goto exit_features;
1751 
1752         vsc->sc_nvqs = vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
1753 
1754         (void) snprintf(cache_name, CACHE_NAME_SIZE, "vioif%d_rx", instance);
1755         sc->sc_rxbuf_cache = kmem_cache_create(cache_name,
1756             sizeof (struct vioif_rx_buf), 0, vioif_rx_construct,
1757             vioif_rx_destruct, NULL, sc, NULL, KM_SLEEP);
1758         if (sc->sc_rxbuf_cache == NULL) {
1759                 dev_err(sc->sc_dev, CE_WARN, "Can't allocate the buffer cache");
1760                 goto exit_cache;
1761         }
1762 
1763         ret = vioif_register_ints(sc);
1764         if (ret) {
1765                 dev_err(sc->sc_dev, CE_WARN,
1766                     "Failed to allocate interrupt(s)!");
1767                 goto exit_ints;
1768         }
1769 
1770         /*
1771          * Register layout determined, can now access the
1772          * device-specific bits
1773          */
1774         vioif_get_mac(sc);
1775 
1776         sc->sc_rx_vq = virtio_alloc_vq(&sc->sc_virtio, 0,
1777             VIOIF_RX_QLEN, VIOIF_INDIRECT_MAX, "rx");
1778         if (!sc->sc_rx_vq)
1779                 goto exit_alloc1;
1780         virtio_stop_vq_intr(sc->sc_rx_vq);
1781 
1782         sc->sc_tx_vq = virtio_alloc_vq(&sc->sc_virtio, 1,
1783             VIOIF_TX_QLEN, VIOIF_INDIRECT_MAX, "tx");
1784         if (!sc->sc_rx_vq)
1785                 goto exit_alloc2;
1786         virtio_stop_vq_intr(sc->sc_tx_vq);
1787 
1788         if (vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ)) {
1789                 sc->sc_ctrl_vq = virtio_alloc_vq(&sc->sc_virtio, 2,
1790                     VIOIF_CTRL_QLEN, 0, "ctrl");
1791                 if (!sc->sc_ctrl_vq) {
1792                         goto exit_alloc3;
1793                 }
1794                 virtio_stop_vq_intr(sc->sc_ctrl_vq);
1795         }
1796 
1797         virtio_set_status(&sc->sc_virtio,
1798             VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1799 
1800         sc->sc_rxloan = 0;
1801 
1802         /* set some reasonable-small default values */
1803         sc->sc_rxcopy_thresh = 300;
1804         sc->sc_txcopy_thresh = 300;
1805         sc->sc_mtu = ETHERMTU;
1806 
1807         vioif_check_features(sc);
1808 
1809         if (vioif_alloc_mems(sc))
1810                 goto exit_alloc_mems;
1811 
1812         if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
1813                 dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
1814                 goto exit_macalloc;
1815         }
1816 
1817         macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1818         macp->m_driver = sc;
1819         macp->m_dip = devinfo;
1820         macp->m_src_addr = sc->sc_mac;
1821         macp->m_callbacks = &vioif_m_callbacks;
1822         macp->m_min_sdu = 0;
1823         macp->m_max_sdu = sc->sc_mtu;
1824         macp->m_margin = VLAN_TAGSZ;
1825         macp->m_priv_props = vioif_priv_props;
1826 
1827         sc->sc_macp = macp;
1828 
1829         /* Pre-fill the rx ring. */
1830         (void) vioif_populate_rx(sc, KM_SLEEP);
1831 
1832         ret = mac_register(macp, &sc->sc_mac_handle);
1833         if (ret != 0) {
1834                 dev_err(devinfo, CE_WARN, "vioif_attach: "
1835                     "mac_register() failed, ret=%d", ret);
1836                 goto exit_register;
1837         }
1838 
1839         ret = virtio_enable_ints(&sc->sc_virtio);
1840         if (ret) {
1841                 dev_err(devinfo, CE_WARN, "Failed to enable interrupts");
1842                 goto exit_enable_ints;
1843         }
1844 
1845         mac_link_update(sc->sc_mac_handle, LINK_STATE_UP);
1846         return (DDI_SUCCESS);
1847 
1848 exit_enable_ints:
1849         (void) mac_unregister(sc->sc_mac_handle);
1850 exit_register:
1851         mac_free(macp);
1852 exit_macalloc:
1853         vioif_free_mems(sc);
1854 exit_alloc_mems:
1855         virtio_release_ints(&sc->sc_virtio);
1856         if (sc->sc_ctrl_vq)
1857                 virtio_free_vq(sc->sc_ctrl_vq);
1858 exit_alloc3:
1859         virtio_free_vq(sc->sc_tx_vq);
1860 exit_alloc2:
1861         virtio_free_vq(sc->sc_rx_vq);
1862 exit_alloc1:
1863 exit_ints:
1864         kmem_cache_destroy(sc->sc_rxbuf_cache);
1865 exit_cache:
1866 exit_features:
1867         virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1868         ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1869 exit_intrstat:
1870 exit_map:
1871         kstat_delete(sc->sc_intrstat);
1872         kmem_free(sc, sizeof (struct vioif_softc));
1873 exit:
1874         return (DDI_FAILURE);
1875 }
1876 
1877 static int
1878 vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1879 {
1880         struct vioif_softc *sc;
1881 
1882         if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1883                 return (DDI_FAILURE);
1884 
1885         switch (cmd) {
1886         case DDI_DETACH:
1887                 break;
1888 
1889         case DDI_PM_SUSPEND:
1890                 /* not supported yet */
1891                 return (DDI_FAILURE);
1892 
1893         default:
1894                 /* unrecognized command */
1895                 return (DDI_FAILURE);
1896         }
1897 
1898         if (sc->sc_rxloan) {
1899                 cmn_err(CE_NOTE, "Some rx buffers are still upstream, "
1900                     "Not detaching");
1901                 return (DDI_FAILURE);
1902         }
1903 
1904         virtio_stop_vq_intr(sc->sc_rx_vq);
1905         virtio_stop_vq_intr(sc->sc_tx_vq);
1906 
1907         virtio_release_ints(&sc->sc_virtio);
1908 
1909         if (mac_unregister(sc->sc_mac_handle)) {
1910                 return (DDI_FAILURE);
1911         }
1912 
1913         mac_free(sc->sc_macp);
1914 
1915         vioif_free_mems(sc);
1916         virtio_free_vq(sc->sc_rx_vq);
1917         virtio_free_vq(sc->sc_tx_vq);
1918 
1919         virtio_device_reset(&sc->sc_virtio);
1920 
1921         ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1922 
1923         kmem_cache_destroy(sc->sc_rxbuf_cache);
1924         kstat_delete(sc->sc_intrstat);
1925         kmem_free(sc, sizeof (struct vioif_softc));
1926 
1927         return (DDI_SUCCESS);
1928 }
1929 
1930 static int
1931 vioif_quiesce(dev_info_t *devinfo)
1932 {
1933         struct vioif_softc *sc;
1934 
1935         if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1936                 return (DDI_FAILURE);
1937 
1938         virtio_stop_vq_intr(sc->sc_rx_vq);
1939         virtio_stop_vq_intr(sc->sc_tx_vq);
1940         virtio_device_reset(&sc->sc_virtio);
1941 
1942         return (DDI_SUCCESS);
1943 }
1944 
1945 int
1946 _init(void)
1947 {
1948         int ret = 0;
1949 
1950         mac_init_ops(&vioif_ops, "vioif");
1951 
1952         ret = mod_install(&modlinkage);
1953         if (ret != DDI_SUCCESS) {
1954                 mac_fini_ops(&vioif_ops);
1955                 return (ret);
1956         }
1957 
1958         return (0);
1959 }
1960 
1961 int
1962 _fini(void)
1963 {
1964         int ret;
1965 
1966         ret = mod_remove(&modlinkage);
1967         if (ret == DDI_SUCCESS) {
1968                 mac_fini_ops(&vioif_ops);
1969         }
1970 
1971         return (ret);
1972 }
1973 
1974 int
1975 _info(struct modinfo *pModinfo)
1976 {
1977         return (mod_info(&modlinkage, pModinfo));
1978 }