1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
14 * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
15 * Copyright 2015 Joyent, Inc.
16 */
17
18 /* Based on the NetBSD virtio driver by Minoura Makoto. */
19 /*
20 * Copyright (c) 2010 Minoura Makoto.
21 * All rights reserved.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the above copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
33 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
34 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
35 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
36 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
37 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
38 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
39 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
40 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
41 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
42 */
43
44 #include <sys/types.h>
45 #include <sys/errno.h>
46 #include <sys/param.h>
47 #include <sys/stropts.h>
48 #include <sys/stream.h>
49 #include <sys/strsubr.h>
50 #include <sys/kmem.h>
51 #include <sys/conf.h>
52 #include <sys/devops.h>
53 #include <sys/ksynch.h>
54 #include <sys/stat.h>
55 #include <sys/modctl.h>
56 #include <sys/debug.h>
57 #include <sys/pci.h>
58 #include <sys/ethernet.h>
59 #include <sys/vlan.h>
60
61 #include <sys/dlpi.h>
62 #include <sys/taskq.h>
63 #include <sys/cyclic.h>
64
65 #include <sys/pattr.h>
66 #include <sys/strsun.h>
67
68 #include <sys/random.h>
69 #include <sys/sysmacros.h>
70 #include <sys/stream.h>
71
72 #include <sys/mac.h>
73 #include <sys/mac_provider.h>
74 #include <sys/mac_ether.h>
75
76 #include "virtiovar.h"
77 #include "virtioreg.h"
78
79 /* Configuration registers */
80 #define VIRTIO_NET_CONFIG_MAC 0 /* 8bit x 6byte */
81 #define VIRTIO_NET_CONFIG_STATUS 6 /* 16bit */
82
83 /* Feature bits */
84 #define VIRTIO_NET_F_CSUM (1 << 0) /* Host handles pkts w/ partial csum */
85 #define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* Guest handles pkts w/ part csum */
86 #define VIRTIO_NET_F_MAC (1 << 5) /* Host has given MAC address. */
87 #define VIRTIO_NET_F_GSO (1 << 6) /* Host handles pkts w/ any GSO type */
88 #define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* Guest can handle TSOv4 in. */
89 #define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* Guest can handle TSOv6 in. */
90 #define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* Guest can handle TSO[6] w/ ECN in */
91 #define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* Guest can handle UFO in. */
92 #define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* Host can handle TSOv4 in. */
93 #define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* Host can handle TSOv6 in. */
94 #define VIRTIO_NET_F_HOST_ECN (1 << 13) /* Host can handle TSO[6] w/ ECN in */
95 #define VIRTIO_NET_F_HOST_UFO (1 << 14) /* Host can handle UFO in. */
96 #define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* Host can merge receive buffers. */
97 #define VIRTIO_NET_F_STATUS (1 << 16) /* Config.status available */
98 #define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* Control channel available */
99 #define VIRTIO_NET_F_CTRL_RX (1 << 18) /* Control channel RX mode support */
100 #define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* Control channel VLAN filtering */
101 #define VIRTIO_NET_F_CTRL_RX_EXTRA (1 << 20) /* Extra RX mode control support */
102
103 #define VIRTIO_NET_FEATURE_BITS \
104 "\020" \
105 "\1CSUM" \
106 "\2GUEST_CSUM" \
107 "\6MAC" \
108 "\7GSO" \
109 "\10GUEST_TSO4" \
110 "\11GUEST_TSO6" \
111 "\12GUEST_ECN" \
112 "\13GUEST_UFO" \
113 "\14HOST_TSO4" \
114 "\15HOST_TSO6" \
115 "\16HOST_ECN" \
116 "\17HOST_UFO" \
117 "\20MRG_RXBUF" \
118 "\21STATUS" \
119 "\22CTRL_VQ" \
120 "\23CTRL_RX" \
121 "\24CTRL_VLAN" \
122 "\25CTRL_RX_EXTRA"
123
124 /* Status */
125 #define VIRTIO_NET_S_LINK_UP 1
126
127 #pragma pack(1)
128 /* Packet header structure */
129 struct virtio_net_hdr {
130 uint8_t flags;
131 uint8_t gso_type;
132 uint16_t hdr_len;
133 uint16_t gso_size;
134 uint16_t csum_start;
135 uint16_t csum_offset;
136 };
137 #pragma pack()
138
139 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* flags */
140 #define VIRTIO_NET_HDR_GSO_NONE 0 /* gso_type */
141 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* gso_type */
142 #define VIRTIO_NET_HDR_GSO_UDP 3 /* gso_type */
143 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* gso_type */
144 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* gso_type, |'ed */
145
146
147 /* Control virtqueue */
148 #pragma pack(1)
149 struct virtio_net_ctrl_cmd {
150 uint8_t class;
151 uint8_t command;
152 };
153 #pragma pack()
154
155 #define VIRTIO_NET_CTRL_RX 0
156 #define VIRTIO_NET_CTRL_RX_PROMISC 0
157 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1
158
159 #define VIRTIO_NET_CTRL_MAC 1
160 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0
161
162 #define VIRTIO_NET_CTRL_VLAN 2
163 #define VIRTIO_NET_CTRL_VLAN_ADD 0
164 #define VIRTIO_NET_CTRL_VLAN_DEL 1
165
166 #pragma pack(1)
167 struct virtio_net_ctrl_status {
168 uint8_t ack;
169 };
170
171 struct virtio_net_ctrl_rx {
172 uint8_t onoff;
173 };
174
175 struct virtio_net_ctrl_mac_tbl {
176 uint32_t nentries;
177 uint8_t macs[][ETHERADDRL];
178 };
179
180 struct virtio_net_ctrl_vlan {
181 uint16_t id;
182 };
183 #pragma pack()
184
185 static int vioif_quiesce(dev_info_t *);
186 static int vioif_attach(dev_info_t *, ddi_attach_cmd_t);
187 static int vioif_detach(dev_info_t *, ddi_detach_cmd_t);
188
189 DDI_DEFINE_STREAM_OPS(vioif_ops,
190 nulldev, /* identify */
191 nulldev, /* probe */
192 vioif_attach, /* attach */
193 vioif_detach, /* detach */
194 nodev, /* reset */
195 NULL, /* cb_ops */
196 D_MP, /* bus_ops */
197 NULL, /* power */
198 vioif_quiesce /* quiesce */);
199
200 static char vioif_ident[] = "VirtIO ethernet driver";
201
202 /* Standard Module linkage initialization for a Streams driver */
203 extern struct mod_ops mod_driverops;
204
205 static struct modldrv modldrv = {
206 &mod_driverops, /* Type of module. This one is a driver */
207 vioif_ident, /* short description */
208 &vioif_ops /* driver specific ops */
209 };
210
211 static struct modlinkage modlinkage = {
212 MODREV_1,
213 {
214 (void *)&modldrv,
215 NULL,
216 },
217 };
218
219 ddi_device_acc_attr_t vioif_attr = {
220 DDI_DEVICE_ATTR_V0,
221 DDI_NEVERSWAP_ACC, /* virtio is always native byte order */
222 DDI_STORECACHING_OK_ACC,
223 DDI_DEFAULT_ACC
224 };
225
226 /*
227 * A mapping represents a binding for a single buffer that is contiguous in the
228 * virtual address space.
229 */
230 struct vioif_buf_mapping {
231 caddr_t vbm_buf;
232 ddi_dma_handle_t vbm_dmah;
233 ddi_acc_handle_t vbm_acch;
234 ddi_dma_cookie_t vbm_dmac;
235 unsigned int vbm_ncookies;
236 };
237
238 /*
239 * Rx buffers can be loaned upstream, so the code has
240 * to allocate them dynamically.
241 */
242 struct vioif_rx_buf {
243 struct vioif_softc *rb_sc;
244 frtn_t rb_frtn;
245
246 struct vioif_buf_mapping rb_mapping;
247 };
248
249 /*
250 * Tx buffers have two mapping types. One, "inline", is pre-allocated and is
251 * used to hold the virtio_net_header. Small packets also get copied there, as
252 * it's faster then mapping them. Bigger packets get mapped using the "external"
253 * mapping array. An array is used, because a packet may consist of muptiple
254 * fragments, so each fragment gets bound to an entry. According to my
255 * observations, the number of fragments does not exceed 2, but just in case,
256 * a bigger, up to VIOIF_INDIRECT_MAX - 1 array is allocated. To save resources,
257 * the dma handles are allocated lazily in the tx path.
258 */
259 struct vioif_tx_buf {
260 mblk_t *tb_mp;
261
262 /* inline buffer */
263 struct vioif_buf_mapping tb_inline_mapping;
264
265 /* External buffers */
266 struct vioif_buf_mapping *tb_external_mapping;
267 unsigned int tb_external_num;
268 };
269
270 struct vioif_softc {
271 dev_info_t *sc_dev; /* mirrors virtio_softc->sc_dev */
272 struct virtio_softc sc_virtio;
273
274 mac_handle_t sc_mac_handle;
275 mac_register_t *sc_macp;
276
277 struct virtqueue *sc_rx_vq;
278 struct virtqueue *sc_tx_vq;
279 struct virtqueue *sc_ctrl_vq;
280
281 unsigned int sc_tx_stopped:1;
282
283 /* Feature bits. */
284 unsigned int sc_rx_csum:1;
285 unsigned int sc_tx_csum:1;
286 unsigned int sc_tx_tso4:1;
287
288 /*
289 * For debugging, it is useful to know whether the MAC address we
290 * are using came from the host (via VIRTIO_NET_CONFIG_MAC) or
291 * was otherwise generated or set from within the guest.
292 */
293 unsigned int sc_mac_from_host:1;
294
295 int sc_mtu;
296 uint8_t sc_mac[ETHERADDRL];
297 /*
298 * For rx buffers, we keep a pointer array, because the buffers
299 * can be loaned upstream, and we have to repopulate the array with
300 * new members.
301 */
302 struct vioif_rx_buf **sc_rxbufs;
303
304 /*
305 * For tx, we just allocate an array of buffers. The packet can
306 * either be copied into the inline buffer, or the external mapping
307 * could be used to map the packet
308 */
309 struct vioif_tx_buf *sc_txbufs;
310
311 kstat_t *sc_intrstat;
312 /*
313 * We "loan" rx buffers upstream and reuse them after they are
314 * freed. This lets us avoid allocations in the hot path.
315 */
316 kmem_cache_t *sc_rxbuf_cache;
317 ulong_t sc_rxloan;
318
319 /* Copying small packets turns out to be faster then mapping them. */
320 unsigned long sc_rxcopy_thresh;
321 unsigned long sc_txcopy_thresh;
322
323 /*
324 * Statistics visible through mac:
325 */
326 uint64_t sc_ipackets;
327 uint64_t sc_opackets;
328 uint64_t sc_rbytes;
329 uint64_t sc_obytes;
330 uint64_t sc_brdcstxmt;
331 uint64_t sc_brdcstrcv;
332 uint64_t sc_multixmt;
333 uint64_t sc_multircv;
334 uint64_t sc_norecvbuf;
335 uint64_t sc_notxbuf;
336 uint64_t sc_ierrors;
337 uint64_t sc_oerrors;
338
339 /*
340 * Internal debugging statistics:
341 */
342 uint64_t sc_rxfail_dma_handle;
343 uint64_t sc_rxfail_dma_buffer;
344 uint64_t sc_rxfail_dma_bind;
345 uint64_t sc_rxfail_chain_undersize;
346 uint64_t sc_rxfail_no_descriptors;
347 uint64_t sc_txfail_dma_handle;
348 uint64_t sc_txfail_dma_bind;
349 uint64_t sc_txfail_indirect_limit;
350 };
351
352 #define ETHER_HEADER_LEN sizeof (struct ether_header)
353
354 /* MTU + the ethernet header. */
355 #define MAX_PAYLOAD 65535
356 #define MAX_MTU (MAX_PAYLOAD - ETHER_HEADER_LEN)
357 #define DEFAULT_MTU ETHERMTU
358
359 /*
360 * Yeah, we spend 8M per device. Turns out, there is no point
361 * being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF),
362 * because vhost does not support them, and we expect to be used with
363 * vhost in production environment.
364 */
365 /* The buffer keeps both the packet data and the virtio_net_header. */
366 #define VIOIF_RX_SIZE (MAX_PAYLOAD + sizeof (struct virtio_net_hdr))
367
368 /*
369 * We win a bit on header alignment, but the host wins a lot
370 * more on moving aligned buffers. Might need more thought.
371 */
372 #define VIOIF_IP_ALIGN 0
373
374 /* Maximum number of indirect descriptors, somewhat arbitrary. */
375 #define VIOIF_INDIRECT_MAX 128
376
377 /*
378 * We pre-allocate a reasonably large buffer to copy small packets
379 * there. Bigger packets are mapped, packets with multiple
380 * cookies are mapped as indirect buffers.
381 */
382 #define VIOIF_TX_INLINE_SIZE 2048
383
384 /* Native queue size for all queues */
385 #define VIOIF_RX_QLEN 0
386 #define VIOIF_TX_QLEN 0
387 #define VIOIF_CTRL_QLEN 0
388
389 static uchar_t vioif_broadcast[ETHERADDRL] = {
390 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
391 };
392
393 #define VIOIF_TX_THRESH_MAX 640
394 #define VIOIF_RX_THRESH_MAX 640
395
396 #define CACHE_NAME_SIZE 32
397
398 static char vioif_txcopy_thresh[] =
399 "vioif_txcopy_thresh";
400 static char vioif_rxcopy_thresh[] =
401 "vioif_rxcopy_thresh";
402
403 static char *vioif_priv_props[] = {
404 vioif_txcopy_thresh,
405 vioif_rxcopy_thresh,
406 NULL
407 };
408
409 /* Add up to ddi? */
410 static ddi_dma_cookie_t *
411 vioif_dma_curr_cookie(ddi_dma_handle_t dmah)
412 {
413 ddi_dma_impl_t *dmah_impl = (void *) dmah;
414 ASSERT(dmah_impl->dmai_cookie);
415 return (dmah_impl->dmai_cookie);
416 }
417
418 static void
419 vioif_dma_reset_cookie(ddi_dma_handle_t dmah, ddi_dma_cookie_t *dmac)
420 {
421 ddi_dma_impl_t *dmah_impl = (void *) dmah;
422 dmah_impl->dmai_cookie = dmac;
423 }
424
425 static link_state_t
426 vioif_link_state(struct vioif_softc *sc)
427 {
428 if (sc->sc_virtio.sc_features & VIRTIO_NET_F_STATUS) {
429 if (virtio_read_device_config_2(&sc->sc_virtio,
430 VIRTIO_NET_CONFIG_STATUS) & VIRTIO_NET_S_LINK_UP) {
431 return (LINK_STATE_UP);
432 } else {
433 return (LINK_STATE_DOWN);
434 }
435 }
436
437 return (LINK_STATE_UP);
438 }
439
440 static ddi_dma_attr_t vioif_inline_buf_dma_attr = {
441 DMA_ATTR_V0, /* Version number */
442 0, /* low address */
443 0xFFFFFFFFFFFFFFFF, /* high address */
444 0xFFFFFFFF, /* counter register max */
445 1, /* page alignment */
446 1, /* burst sizes: 1 - 32 */
447 1, /* minimum transfer size */
448 0xFFFFFFFF, /* max transfer size */
449 0xFFFFFFFFFFFFFFF, /* address register max */
450 1, /* scatter-gather capacity */
451 1, /* device operates on bytes */
452 0, /* attr flag: set to 0 */
453 };
454
455 static ddi_dma_attr_t vioif_mapped_buf_dma_attr = {
456 DMA_ATTR_V0, /* Version number */
457 0, /* low address */
458 0xFFFFFFFFFFFFFFFF, /* high address */
459 0xFFFFFFFF, /* counter register max */
460 1, /* page alignment */
461 1, /* burst sizes: 1 - 32 */
462 1, /* minimum transfer size */
463 0xFFFFFFFF, /* max transfer size */
464 0xFFFFFFFFFFFFFFF, /* address register max */
465
466 /* One entry is used for the virtio_net_hdr on the tx path */
467 VIOIF_INDIRECT_MAX - 1, /* scatter-gather capacity */
468 1, /* device operates on bytes */
469 0, /* attr flag: set to 0 */
470 };
471
472 static ddi_device_acc_attr_t vioif_bufattr = {
473 DDI_DEVICE_ATTR_V0,
474 DDI_NEVERSWAP_ACC,
475 DDI_STORECACHING_OK_ACC,
476 DDI_DEFAULT_ACC
477 };
478
479 static void
480 vioif_rx_free(caddr_t free_arg)
481 {
482 struct vioif_rx_buf *buf = (void *) free_arg;
483 struct vioif_softc *sc = buf->rb_sc;
484
485 kmem_cache_free(sc->sc_rxbuf_cache, buf);
486 atomic_dec_ulong(&sc->sc_rxloan);
487 }
488
489 static int
490 vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
491 {
492 _NOTE(ARGUNUSED(kmflags));
493 struct vioif_softc *sc = user_arg;
494 struct vioif_rx_buf *buf = buffer;
495 size_t len;
496
497 if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
498 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
499 sc->sc_rxfail_dma_handle++;
500 goto exit_handle;
501 }
502
503 if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
504 VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
505 &vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
506 NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
507 sc->sc_rxfail_dma_buffer++;
508 goto exit_alloc;
509 }
510 ASSERT(len >= VIOIF_RX_SIZE);
511
512 if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
513 buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
514 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
515 &buf->rb_mapping.vbm_ncookies)) {
516 sc->sc_rxfail_dma_bind++;
517 goto exit_bind;
518 }
519
520 ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
521
522 buf->rb_sc = sc;
523 buf->rb_frtn.free_arg = (void *) buf;
524 buf->rb_frtn.free_func = vioif_rx_free;
525
526 return (0);
527 exit_bind:
528 ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
529 exit_alloc:
530 ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
531 exit_handle:
532
533 return (ENOMEM);
534 }
535
536 static void
537 vioif_rx_destruct(void *buffer, void *user_arg)
538 {
539 _NOTE(ARGUNUSED(user_arg));
540 struct vioif_rx_buf *buf = buffer;
541
542 ASSERT(buf->rb_mapping.vbm_acch);
543 ASSERT(buf->rb_mapping.vbm_acch);
544
545 (void) ddi_dma_unbind_handle(buf->rb_mapping.vbm_dmah);
546 ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
547 ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
548 }
549
550 static void
551 vioif_free_mems(struct vioif_softc *sc)
552 {
553 int i;
554
555 for (i = 0; i < sc->sc_tx_vq->vq_num; i++) {
556 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
557 int j;
558
559 /* Tear down the internal mapping. */
560
561 ASSERT(buf->tb_inline_mapping.vbm_acch);
562 ASSERT(buf->tb_inline_mapping.vbm_dmah);
563
564 (void) ddi_dma_unbind_handle(buf->tb_inline_mapping.vbm_dmah);
565 ddi_dma_mem_free(&buf->tb_inline_mapping.vbm_acch);
566 ddi_dma_free_handle(&buf->tb_inline_mapping.vbm_dmah);
567
568 /* We should not see any in-flight buffers at this point. */
569 ASSERT(!buf->tb_mp);
570
571 /* Free all the dma hdnales we allocated lazily. */
572 for (j = 0; buf->tb_external_mapping[j].vbm_dmah; j++)
573 ddi_dma_free_handle(
574 &buf->tb_external_mapping[j].vbm_dmah);
575 /* Free the external mapping array. */
576 kmem_free(buf->tb_external_mapping,
577 sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1);
578 }
579
580 kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) *
581 sc->sc_tx_vq->vq_num);
582
583 for (i = 0; i < sc->sc_rx_vq->vq_num; i++) {
584 struct vioif_rx_buf *buf = sc->sc_rxbufs[i];
585
586 if (buf)
587 kmem_cache_free(sc->sc_rxbuf_cache, buf);
588 }
589 kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf *) *
590 sc->sc_rx_vq->vq_num);
591 }
592
593 static int
594 vioif_alloc_mems(struct vioif_softc *sc)
595 {
596 int i, txqsize, rxqsize;
597 size_t len;
598 unsigned int nsegments;
599
600 txqsize = sc->sc_tx_vq->vq_num;
601 rxqsize = sc->sc_rx_vq->vq_num;
602
603 sc->sc_txbufs = kmem_zalloc(sizeof (struct vioif_tx_buf) * txqsize,
604 KM_SLEEP);
605 if (sc->sc_txbufs == NULL) {
606 dev_err(sc->sc_dev, CE_WARN,
607 "Failed to allocate the tx buffers array");
608 goto exit_txalloc;
609 }
610
611 /*
612 * We don't allocate the rx vioif_bufs, just the pointers, as
613 * rx vioif_bufs can be loaned upstream, and we don't know the
614 * total number we need.
615 */
616 sc->sc_rxbufs = kmem_zalloc(sizeof (struct vioif_rx_buf *) * rxqsize,
617 KM_SLEEP);
618 if (sc->sc_rxbufs == NULL) {
619 dev_err(sc->sc_dev, CE_WARN,
620 "Failed to allocate the rx buffers pointer array");
621 goto exit_rxalloc;
622 }
623
624 for (i = 0; i < txqsize; i++) {
625 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
626
627 /* Allocate and bind an inline mapping. */
628
629 if (ddi_dma_alloc_handle(sc->sc_dev,
630 &vioif_inline_buf_dma_attr,
631 DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_dmah)) {
632
633 dev_err(sc->sc_dev, CE_WARN,
634 "Can't allocate dma handle for tx buffer %d", i);
635 goto exit_tx;
636 }
637
638 if (ddi_dma_mem_alloc(buf->tb_inline_mapping.vbm_dmah,
639 VIOIF_TX_INLINE_SIZE, &vioif_bufattr, DDI_DMA_STREAMING,
640 DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_buf,
641 &len, &buf->tb_inline_mapping.vbm_acch)) {
642
643 dev_err(sc->sc_dev, CE_WARN,
644 "Can't allocate tx buffer %d", i);
645 goto exit_tx;
646 }
647 ASSERT(len >= VIOIF_TX_INLINE_SIZE);
648
649 if (ddi_dma_addr_bind_handle(buf->tb_inline_mapping.vbm_dmah,
650 NULL, buf->tb_inline_mapping.vbm_buf, len,
651 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
652 &buf->tb_inline_mapping.vbm_dmac, &nsegments)) {
653
654 dev_err(sc->sc_dev, CE_WARN,
655 "Can't bind tx buffer %d", i);
656 goto exit_tx;
657 }
658
659 /* We asked for a single segment */
660 ASSERT(nsegments == 1);
661
662 /*
663 * We allow up to VIOIF_INDIRECT_MAX - 1 external mappings.
664 * In reality, I don't expect more then 2-3 used, but who
665 * knows.
666 */
667 buf->tb_external_mapping = kmem_zalloc(
668 sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1,
669 KM_SLEEP);
670
671 /*
672 * The external mapping's dma handles are allocate lazily,
673 * as we don't expect most of them to be used..
674 */
675 }
676
677 return (0);
678
679 exit_tx:
680 for (i = 0; i < txqsize; i++) {
681 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
682
683 if (buf->tb_inline_mapping.vbm_dmah)
684 (void) ddi_dma_unbind_handle(
685 buf->tb_inline_mapping.vbm_dmah);
686
687 if (buf->tb_inline_mapping.vbm_acch)
688 ddi_dma_mem_free(
689 &buf->tb_inline_mapping.vbm_acch);
690
691 if (buf->tb_inline_mapping.vbm_dmah)
692 ddi_dma_free_handle(
693 &buf->tb_inline_mapping.vbm_dmah);
694
695 if (buf->tb_external_mapping)
696 kmem_free(buf->tb_external_mapping,
697 sizeof (struct vioif_tx_buf) *
698 VIOIF_INDIRECT_MAX - 1);
699 }
700
701 kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf) * rxqsize);
702
703 exit_rxalloc:
704 kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) * txqsize);
705 exit_txalloc:
706 return (ENOMEM);
707 }
708
709 /* ARGSUSED */
710 int
711 vioif_multicst(void *arg, boolean_t add, const uint8_t *macaddr)
712 {
713 return (DDI_SUCCESS);
714 }
715
716 /* ARGSUSED */
717 int
718 vioif_promisc(void *arg, boolean_t on)
719 {
720 return (DDI_SUCCESS);
721 }
722
723 /* ARGSUSED */
724 int
725 vioif_unicst(void *arg, const uint8_t *macaddr)
726 {
727 return (DDI_FAILURE);
728 }
729
730
731 static uint_t
732 vioif_add_rx(struct vioif_softc *sc, int kmflag)
733 {
734 uint_t num_added = 0;
735 struct vq_entry *ve;
736
737 while ((ve = vq_alloc_entry(sc->sc_rx_vq)) != NULL) {
738 struct vioif_rx_buf *buf = sc->sc_rxbufs[ve->qe_index];
739
740 if (buf == NULL) {
741 /* First run, allocate the buffer. */
742 buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
743 sc->sc_rxbufs[ve->qe_index] = buf;
744 }
745
746 /* Still nothing? Bye. */
747 if (buf == NULL) {
748 sc->sc_norecvbuf++;
749 vq_free_entry(sc->sc_rx_vq, ve);
750 break;
751 }
752
753 ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
754
755 /*
756 * For an unknown reason, the virtio_net_hdr must be placed
757 * as a separate virtio queue entry.
758 */
759 virtio_ve_add_indirect_buf(ve,
760 buf->rb_mapping.vbm_dmac.dmac_laddress,
761 sizeof (struct virtio_net_hdr), B_FALSE);
762
763 /* Add the rest of the first cookie. */
764 virtio_ve_add_indirect_buf(ve,
765 buf->rb_mapping.vbm_dmac.dmac_laddress +
766 sizeof (struct virtio_net_hdr),
767 buf->rb_mapping.vbm_dmac.dmac_size -
768 sizeof (struct virtio_net_hdr), B_FALSE);
769
770 /*
771 * If the buffer consists of a single cookie (unlikely for a
772 * 64-k buffer), we are done. Otherwise, add the rest of the
773 * cookies using indirect entries.
774 */
775 if (buf->rb_mapping.vbm_ncookies > 1) {
776 ddi_dma_cookie_t *first_extra_dmac;
777 ddi_dma_cookie_t dmac;
778 first_extra_dmac =
779 vioif_dma_curr_cookie(buf->rb_mapping.vbm_dmah);
780
781 ddi_dma_nextcookie(buf->rb_mapping.vbm_dmah, &dmac);
782 virtio_ve_add_cookie(ve, buf->rb_mapping.vbm_dmah,
783 dmac, buf->rb_mapping.vbm_ncookies - 1, B_FALSE);
784 vioif_dma_reset_cookie(buf->rb_mapping.vbm_dmah,
785 first_extra_dmac);
786 }
787
788 virtio_push_chain(ve, B_FALSE);
789 num_added++;
790 }
791
792 return (num_added);
793 }
794
795 static uint_t
796 vioif_populate_rx(struct vioif_softc *sc, int kmflag)
797 {
798 uint_t num_added = vioif_add_rx(sc, kmflag);
799
800 if (num_added > 0)
801 virtio_sync_vq(sc->sc_rx_vq);
802
803 return (num_added);
804 }
805
806 static uint_t
807 vioif_process_rx(struct vioif_softc *sc)
808 {
809 struct vq_entry *ve;
810 struct vioif_rx_buf *buf;
811 mblk_t *mphead = NULL, *lastmp = NULL, *mp;
812 uint32_t len;
813 uint_t num_processed = 0;
814
815 while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) {
816
817 buf = sc->sc_rxbufs[ve->qe_index];
818 ASSERT(buf);
819
820 if (len < sizeof (struct virtio_net_hdr)) {
821 sc->sc_rxfail_chain_undersize++;
822 sc->sc_ierrors++;
823 virtio_free_chain(ve);
824 continue;
825 }
826
827 len -= sizeof (struct virtio_net_hdr);
828 /*
829 * We copy small packets that happen to fit into a single
830 * cookie and reuse the buffers. For bigger ones, we loan
831 * the buffers upstream.
832 */
833 if (len < sc->sc_rxcopy_thresh) {
834 mp = allocb(len, 0);
835 if (mp == NULL) {
836 sc->sc_norecvbuf++;
837 sc->sc_ierrors++;
838
839 virtio_free_chain(ve);
840 break;
841 }
842
843 bcopy((char *)buf->rb_mapping.vbm_buf +
844 sizeof (struct virtio_net_hdr), mp->b_rptr, len);
845 mp->b_wptr = mp->b_rptr + len;
846
847 } else {
848 mp = desballoc((unsigned char *)
849 buf->rb_mapping.vbm_buf +
850 sizeof (struct virtio_net_hdr) +
851 VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
852 if (mp == NULL) {
853 sc->sc_norecvbuf++;
854 sc->sc_ierrors++;
855
856 virtio_free_chain(ve);
857 break;
858 }
859 mp->b_wptr = mp->b_rptr + len;
860
861 atomic_inc_ulong(&sc->sc_rxloan);
862 /*
863 * Buffer loaned, we will have to allocate a new one
864 * for this slot.
865 */
866 sc->sc_rxbufs[ve->qe_index] = NULL;
867 }
868
869 /*
870 * virtio-net does not tell us if this packet is multicast
871 * or broadcast, so we have to check it.
872 */
873 if (mp->b_rptr[0] & 0x1) {
874 if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
875 sc->sc_multircv++;
876 else
877 sc->sc_brdcstrcv++;
878 }
879
880 sc->sc_rbytes += len;
881 sc->sc_ipackets++;
882
883 virtio_free_chain(ve);
884
885 if (lastmp == NULL) {
886 mphead = mp;
887 } else {
888 lastmp->b_next = mp;
889 }
890 lastmp = mp;
891 num_processed++;
892 }
893
894 if (mphead != NULL) {
895 mac_rx(sc->sc_mac_handle, NULL, mphead);
896 }
897
898 return (num_processed);
899 }
900
901 static uint_t
902 vioif_reclaim_used_tx(struct vioif_softc *sc)
903 {
904 struct vq_entry *ve;
905 struct vioif_tx_buf *buf;
906 uint32_t len;
907 mblk_t *mp;
908 uint_t num_reclaimed = 0;
909
910 while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
911 /* We don't chain descriptors for tx, so don't expect any. */
912 ASSERT(!ve->qe_next);
913
914 buf = &sc->sc_txbufs[ve->qe_index];
915 mp = buf->tb_mp;
916 buf->tb_mp = NULL;
917
918 if (mp != NULL) {
919 for (int i = 0; i < buf->tb_external_num; i++)
920 (void) ddi_dma_unbind_handle(
921 buf->tb_external_mapping[i].vbm_dmah);
922 }
923
924 virtio_free_chain(ve);
925
926 /* External mapping used, mp was not freed in vioif_send() */
927 if (mp != NULL)
928 freemsg(mp);
929 num_reclaimed++;
930 }
931
932 if (sc->sc_tx_stopped && num_reclaimed > 0) {
933 sc->sc_tx_stopped = 0;
934 mac_tx_update(sc->sc_mac_handle);
935 }
936
937 return (num_reclaimed);
938 }
939
940 /* sc will be used to update stat counters. */
941 /* ARGSUSED */
942 static inline void
943 vioif_tx_inline(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
944 size_t msg_size)
945 {
946 struct vioif_tx_buf *buf;
947 buf = &sc->sc_txbufs[ve->qe_index];
948
949 ASSERT(buf);
950
951 /* Frees mp */
952 mcopymsg(mp, buf->tb_inline_mapping.vbm_buf +
953 sizeof (struct virtio_net_hdr));
954
955 virtio_ve_add_indirect_buf(ve,
956 buf->tb_inline_mapping.vbm_dmac.dmac_laddress +
957 sizeof (struct virtio_net_hdr), msg_size, B_TRUE);
958 }
959
960 static inline int
961 vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf,
962 int i)
963 {
964 int ret = DDI_SUCCESS;
965
966 if (!buf->tb_external_mapping[i].vbm_dmah) {
967 ret = ddi_dma_alloc_handle(sc->sc_dev,
968 &vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
969 &buf->tb_external_mapping[i].vbm_dmah);
970 if (ret != DDI_SUCCESS) {
971 sc->sc_txfail_dma_handle++;
972 }
973 }
974
975 return (ret);
976 }
977
978 static inline int
979 vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
980 size_t msg_size)
981 {
982 _NOTE(ARGUNUSED(msg_size));
983
984 struct vioif_tx_buf *buf;
985 mblk_t *nmp;
986 int i, j;
987 int ret = DDI_SUCCESS;
988
989 buf = &sc->sc_txbufs[ve->qe_index];
990
991 ASSERT(buf);
992
993 buf->tb_external_num = 0;
994 i = 0;
995 nmp = mp;
996
997 while (nmp) {
998 size_t len;
999 ddi_dma_cookie_t dmac;
1000 unsigned int ncookies;
1001
1002 len = MBLKL(nmp);
1003 /*
1004 * For some reason, the network stack can
1005 * actually send us zero-length fragments.
1006 */
1007 if (len == 0) {
1008 nmp = nmp->b_cont;
1009 continue;
1010 }
1011
1012 ret = vioif_tx_lazy_handle_alloc(sc, buf, i);
1013 if (ret != DDI_SUCCESS) {
1014 sc->sc_notxbuf++;
1015 sc->sc_oerrors++;
1016 goto exit_lazy_alloc;
1017 }
1018 ret = ddi_dma_addr_bind_handle(
1019 buf->tb_external_mapping[i].vbm_dmah, NULL,
1020 (caddr_t)nmp->b_rptr, len,
1021 DDI_DMA_WRITE | DDI_DMA_STREAMING,
1022 DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
1023
1024 if (ret != DDI_SUCCESS) {
1025 sc->sc_txfail_dma_bind++;
1026 sc->sc_oerrors++;
1027 goto exit_bind;
1028 }
1029
1030 /* Check if we still fit into the indirect table. */
1031 if (virtio_ve_indirect_available(ve) < ncookies) {
1032 sc->sc_txfail_indirect_limit++;
1033 sc->sc_notxbuf++;
1034 sc->sc_oerrors++;
1035
1036 ret = DDI_FAILURE;
1037 goto exit_limit;
1038 }
1039
1040 virtio_ve_add_cookie(ve, buf->tb_external_mapping[i].vbm_dmah,
1041 dmac, ncookies, B_TRUE);
1042
1043 nmp = nmp->b_cont;
1044 i++;
1045 }
1046
1047 buf->tb_external_num = i;
1048 /* Save the mp to free it when the packet is sent. */
1049 buf->tb_mp = mp;
1050
1051 return (DDI_SUCCESS);
1052
1053 exit_limit:
1054 exit_bind:
1055 exit_lazy_alloc:
1056
1057 for (j = 0; j < i; j++) {
1058 (void) ddi_dma_unbind_handle(
1059 buf->tb_external_mapping[j].vbm_dmah);
1060 }
1061
1062 return (ret);
1063 }
1064
1065 static boolean_t
1066 vioif_send(struct vioif_softc *sc, mblk_t *mp)
1067 {
1068 struct vq_entry *ve;
1069 struct vioif_tx_buf *buf;
1070 struct virtio_net_hdr *net_header = NULL;
1071 size_t msg_size = 0;
1072 uint32_t csum_start;
1073 uint32_t csum_stuff;
1074 uint32_t csum_flags;
1075 uint32_t lso_flags;
1076 uint32_t lso_mss;
1077 mblk_t *nmp;
1078 int ret;
1079 boolean_t lso_required = B_FALSE;
1080
1081 for (nmp = mp; nmp; nmp = nmp->b_cont)
1082 msg_size += MBLKL(nmp);
1083
1084 if (sc->sc_tx_tso4) {
1085 mac_lso_get(mp, &lso_mss, &lso_flags);
1086 lso_required = (lso_flags & HW_LSO);
1087 }
1088
1089 ve = vq_alloc_entry(sc->sc_tx_vq);
1090
1091 if (ve == NULL) {
1092 sc->sc_notxbuf++;
1093 /* Out of free descriptors - try later. */
1094 return (B_FALSE);
1095 }
1096 buf = &sc->sc_txbufs[ve->qe_index];
1097
1098 /* Use the inline buffer of the first entry for the virtio_net_hdr. */
1099 (void) memset(buf->tb_inline_mapping.vbm_buf, 0,
1100 sizeof (struct virtio_net_hdr));
1101
1102 net_header = (struct virtio_net_hdr *)buf->tb_inline_mapping.vbm_buf;
1103
1104 mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL,
1105 NULL, &csum_flags);
1106
1107 /* They want us to do the TCP/UDP csum calculation. */
1108 if (csum_flags & HCK_PARTIALCKSUM) {
1109 struct ether_header *eth_header;
1110 int eth_hsize;
1111
1112 /* Did we ask for it? */
1113 ASSERT(sc->sc_tx_csum);
1114
1115 /* We only asked for partial csum packets. */
1116 ASSERT(!(csum_flags & HCK_IPV4_HDRCKSUM));
1117 ASSERT(!(csum_flags & HCK_FULLCKSUM));
1118
1119 eth_header = (void *) mp->b_rptr;
1120 if (eth_header->ether_type == htons(ETHERTYPE_VLAN)) {
1121 eth_hsize = sizeof (struct ether_vlan_header);
1122 } else {
1123 eth_hsize = sizeof (struct ether_header);
1124 }
1125 net_header->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1126 net_header->csum_start = eth_hsize + csum_start;
1127 net_header->csum_offset = csum_stuff - csum_start;
1128 }
1129
1130 /* setup LSO fields if required */
1131 if (lso_required) {
1132 net_header->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1133 net_header->gso_size = (uint16_t)lso_mss;
1134 }
1135
1136 virtio_ve_add_indirect_buf(ve,
1137 buf->tb_inline_mapping.vbm_dmac.dmac_laddress,
1138 sizeof (struct virtio_net_hdr), B_TRUE);
1139
1140 /* meanwhile update the statistic */
1141 if (mp->b_rptr[0] & 0x1) {
1142 if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
1143 sc->sc_multixmt++;
1144 else
1145 sc->sc_brdcstxmt++;
1146 }
1147
1148 /*
1149 * We copy small packets into the inline buffer. The bigger ones
1150 * get mapped using the mapped buffer.
1151 */
1152 if (msg_size < sc->sc_txcopy_thresh) {
1153 vioif_tx_inline(sc, ve, mp, msg_size);
1154 } else {
1155 /* statistic gets updated by vioif_tx_external when fail */
1156 ret = vioif_tx_external(sc, ve, mp, msg_size);
1157 if (ret != DDI_SUCCESS)
1158 goto exit_tx_external;
1159 }
1160
1161 virtio_push_chain(ve, B_TRUE);
1162
1163 sc->sc_opackets++;
1164 sc->sc_obytes += msg_size;
1165
1166 return (B_TRUE);
1167
1168 exit_tx_external:
1169
1170 vq_free_entry(sc->sc_tx_vq, ve);
1171 /*
1172 * vioif_tx_external can fail when the buffer does not fit into the
1173 * indirect descriptor table. Free the mp. I don't expect this ever
1174 * to happen.
1175 */
1176 freemsg(mp);
1177
1178 return (B_TRUE);
1179 }
1180
1181 mblk_t *
1182 vioif_tx(void *arg, mblk_t *mp)
1183 {
1184 struct vioif_softc *sc = arg;
1185 mblk_t *nmp;
1186
1187 while (mp != NULL) {
1188 nmp = mp->b_next;
1189 mp->b_next = NULL;
1190
1191 if (!vioif_send(sc, mp)) {
1192 sc->sc_tx_stopped = 1;
1193 mp->b_next = nmp;
1194 break;
1195 }
1196 mp = nmp;
1197 }
1198
1199 return (mp);
1200 }
1201
1202 int
1203 vioif_start(void *arg)
1204 {
1205 struct vioif_softc *sc = arg;
1206 struct vq_entry *ve;
1207 uint32_t len;
1208
1209 mac_link_update(sc->sc_mac_handle, vioif_link_state(sc));
1210
1211 virtio_start_vq_intr(sc->sc_rx_vq);
1212
1213 /*
1214 * Don't start interrupts on sc_tx_vq. We use VIRTIO_F_NOTIFY_ON_EMPTY,
1215 * so the device will send a transmit interrupt when the queue is empty
1216 * and we can reclaim it in one sweep.
1217 */
1218
1219 /*
1220 * Clear any data that arrived early on the receive queue and populate
1221 * it with free buffers that the device can use moving forward.
1222 */
1223 while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len)) != NULL) {
1224 virtio_free_chain(ve);
1225 }
1226 (void) vioif_populate_rx(sc, KM_SLEEP);
1227
1228 return (DDI_SUCCESS);
1229 }
1230
1231 void
1232 vioif_stop(void *arg)
1233 {
1234 struct vioif_softc *sc = arg;
1235
1236 virtio_stop_vq_intr(sc->sc_rx_vq);
1237 }
1238
1239 /* ARGSUSED */
1240 static int
1241 vioif_stat(void *arg, uint_t stat, uint64_t *val)
1242 {
1243 struct vioif_softc *sc = arg;
1244
1245 switch (stat) {
1246 case MAC_STAT_IERRORS:
1247 *val = sc->sc_ierrors;
1248 break;
1249 case MAC_STAT_OERRORS:
1250 *val = sc->sc_oerrors;
1251 break;
1252 case MAC_STAT_MULTIRCV:
1253 *val = sc->sc_multircv;
1254 break;
1255 case MAC_STAT_BRDCSTRCV:
1256 *val = sc->sc_brdcstrcv;
1257 break;
1258 case MAC_STAT_MULTIXMT:
1259 *val = sc->sc_multixmt;
1260 break;
1261 case MAC_STAT_BRDCSTXMT:
1262 *val = sc->sc_brdcstxmt;
1263 break;
1264 case MAC_STAT_IPACKETS:
1265 *val = sc->sc_ipackets;
1266 break;
1267 case MAC_STAT_RBYTES:
1268 *val = sc->sc_rbytes;
1269 break;
1270 case MAC_STAT_OPACKETS:
1271 *val = sc->sc_opackets;
1272 break;
1273 case MAC_STAT_OBYTES:
1274 *val = sc->sc_obytes;
1275 break;
1276 case MAC_STAT_NORCVBUF:
1277 *val = sc->sc_norecvbuf;
1278 break;
1279 case MAC_STAT_NOXMTBUF:
1280 *val = sc->sc_notxbuf;
1281 break;
1282 case MAC_STAT_IFSPEED:
1283 /* always 1 Gbit */
1284 *val = 1000000000ULL;
1285 break;
1286 case ETHER_STAT_LINK_DUPLEX:
1287 /* virtual device, always full-duplex */
1288 *val = LINK_DUPLEX_FULL;
1289 break;
1290
1291 default:
1292 return (ENOTSUP);
1293 }
1294
1295 return (DDI_SUCCESS);
1296 }
1297
1298 static int
1299 vioif_set_prop_private(struct vioif_softc *sc, const char *pr_name,
1300 uint_t pr_valsize, const void *pr_val)
1301 {
1302 _NOTE(ARGUNUSED(pr_valsize));
1303
1304 long result;
1305
1306 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1307
1308 if (pr_val == NULL)
1309 return (EINVAL);
1310
1311 (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1312
1313 if (result < 0 || result > VIOIF_TX_THRESH_MAX)
1314 return (EINVAL);
1315 sc->sc_txcopy_thresh = result;
1316 }
1317 if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1318
1319 if (pr_val == NULL)
1320 return (EINVAL);
1321
1322 (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1323
1324 if (result < 0 || result > VIOIF_RX_THRESH_MAX)
1325 return (EINVAL);
1326 sc->sc_rxcopy_thresh = result;
1327 }
1328 return (0);
1329 }
1330
1331 static int
1332 vioif_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1333 uint_t pr_valsize, const void *pr_val)
1334 {
1335 struct vioif_softc *sc = arg;
1336 const uint32_t *new_mtu;
1337 int err;
1338
1339 switch (pr_num) {
1340 case MAC_PROP_MTU:
1341 new_mtu = pr_val;
1342
1343 if (*new_mtu > MAX_MTU) {
1344 return (EINVAL);
1345 }
1346
1347 err = mac_maxsdu_update(sc->sc_mac_handle, *new_mtu);
1348 if (err) {
1349 return (err);
1350 }
1351 break;
1352 case MAC_PROP_PRIVATE:
1353 err = vioif_set_prop_private(sc, pr_name,
1354 pr_valsize, pr_val);
1355 if (err)
1356 return (err);
1357 break;
1358 default:
1359 return (ENOTSUP);
1360 }
1361
1362 return (0);
1363 }
1364
1365 static int
1366 vioif_get_prop_private(struct vioif_softc *sc, const char *pr_name,
1367 uint_t pr_valsize, void *pr_val)
1368 {
1369 int err = ENOTSUP;
1370 int value;
1371
1372 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1373
1374 value = sc->sc_txcopy_thresh;
1375 err = 0;
1376 goto done;
1377 }
1378 if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1379
1380 value = sc->sc_rxcopy_thresh;
1381 err = 0;
1382 goto done;
1383 }
1384 done:
1385 if (err == 0) {
1386 (void) snprintf(pr_val, pr_valsize, "%d", value);
1387 }
1388 return (err);
1389 }
1390
1391 static int
1392 vioif_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1393 uint_t pr_valsize, void *pr_val)
1394 {
1395 struct vioif_softc *sc = arg;
1396 int err = ENOTSUP;
1397
1398 switch (pr_num) {
1399 case MAC_PROP_PRIVATE:
1400 err = vioif_get_prop_private(sc, pr_name,
1401 pr_valsize, pr_val);
1402 break;
1403 default:
1404 break;
1405 }
1406 return (err);
1407 }
1408
1409 static void
1410 vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1411 mac_prop_info_handle_t prh)
1412 {
1413 struct vioif_softc *sc = arg;
1414 char valstr[64];
1415 int value;
1416
1417 switch (pr_num) {
1418 case MAC_PROP_MTU:
1419 mac_prop_info_set_range_uint32(prh, ETHERMIN, MAX_MTU);
1420 break;
1421
1422 case MAC_PROP_PRIVATE:
1423 bzero(valstr, sizeof (valstr));
1424 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1425 value = sc->sc_txcopy_thresh;
1426 } else if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1427 value = sc->sc_rxcopy_thresh;
1428 } else {
1429 return;
1430 }
1431 (void) snprintf(valstr, sizeof (valstr), "%d", value);
1432 break;
1433
1434 default:
1435 break;
1436 }
1437 }
1438
1439 static boolean_t
1440 vioif_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1441 {
1442 struct vioif_softc *sc = arg;
1443
1444 switch (cap) {
1445 case MAC_CAPAB_HCKSUM:
1446 if (sc->sc_tx_csum) {
1447 uint32_t *txflags = cap_data;
1448
1449 *txflags = HCKSUM_INET_PARTIAL;
1450 return (B_TRUE);
1451 }
1452 return (B_FALSE);
1453 case MAC_CAPAB_LSO:
1454 if (sc->sc_tx_tso4) {
1455 mac_capab_lso_t *cap_lso = cap_data;
1456
1457 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
1458 cap_lso->lso_basic_tcp_ipv4.lso_max = MAX_MTU;
1459 return (B_TRUE);
1460 }
1461 return (B_FALSE);
1462 default:
1463 break;
1464 }
1465 return (B_FALSE);
1466 }
1467
1468 static mac_callbacks_t vioif_m_callbacks = {
1469 .mc_callbacks = (MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO),
1470 .mc_getstat = vioif_stat,
1471 .mc_start = vioif_start,
1472 .mc_stop = vioif_stop,
1473 .mc_setpromisc = vioif_promisc,
1474 .mc_multicst = vioif_multicst,
1475 .mc_unicst = vioif_unicst,
1476 .mc_tx = vioif_tx,
1477 /* Optional callbacks */
1478 .mc_reserved = NULL, /* reserved */
1479 .mc_ioctl = NULL, /* mc_ioctl */
1480 .mc_getcapab = vioif_getcapab, /* mc_getcapab */
1481 .mc_open = NULL, /* mc_open */
1482 .mc_close = NULL, /* mc_close */
1483 .mc_setprop = vioif_setprop,
1484 .mc_getprop = vioif_getprop,
1485 .mc_propinfo = vioif_propinfo,
1486 };
1487
1488 static void
1489 vioif_show_features(struct vioif_softc *sc, const char *prefix,
1490 uint32_t features)
1491 {
1492 char buf[512];
1493 char *bufp = buf;
1494 char *bufend = buf + sizeof (buf);
1495
1496 /* LINTED E_PTRDIFF_OVERFLOW */
1497 bufp += snprintf(bufp, bufend - bufp, prefix);
1498 /* LINTED E_PTRDIFF_OVERFLOW */
1499 bufp += virtio_show_features(features, bufp, bufend - bufp);
1500 *bufp = '\0';
1501
1502 /* Using '!' to only CE_NOTE this to the system log. */
1503 dev_err(sc->sc_dev, CE_NOTE, "!%s Vioif (%b)", buf, features,
1504 VIRTIO_NET_FEATURE_BITS);
1505 }
1506
1507 /*
1508 * Find out which features are supported by the device and
1509 * choose which ones we wish to use.
1510 */
1511 static int
1512 vioif_dev_features(struct vioif_softc *sc)
1513 {
1514 uint32_t host_features;
1515
1516 host_features = virtio_negotiate_features(&sc->sc_virtio,
1517 VIRTIO_NET_F_CSUM |
1518 VIRTIO_NET_F_HOST_TSO4 |
1519 VIRTIO_NET_F_HOST_ECN |
1520 VIRTIO_NET_F_MAC |
1521 VIRTIO_NET_F_STATUS |
1522 VIRTIO_F_RING_INDIRECT_DESC |
1523 VIRTIO_F_NOTIFY_ON_EMPTY);
1524
1525 vioif_show_features(sc, "Host features: ", host_features);
1526 vioif_show_features(sc, "Negotiated features: ",
1527 sc->sc_virtio.sc_features);
1528
1529 if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
1530 dev_err(sc->sc_dev, CE_WARN,
1531 "Host does not support RING_INDIRECT_DESC. Cannot attach.");
1532 return (DDI_FAILURE);
1533 }
1534
1535 return (DDI_SUCCESS);
1536 }
1537
1538 static int
1539 vioif_has_feature(struct vioif_softc *sc, uint32_t feature)
1540 {
1541 return (virtio_has_feature(&sc->sc_virtio, feature));
1542 }
1543
1544 static void
1545 vioif_set_mac(struct vioif_softc *sc)
1546 {
1547 int i;
1548
1549 for (i = 0; i < ETHERADDRL; i++) {
1550 virtio_write_device_config_1(&sc->sc_virtio,
1551 VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
1552 }
1553 sc->sc_mac_from_host = 0;
1554 }
1555
1556 /* Get the mac address out of the hardware, or make up one. */
1557 static void
1558 vioif_get_mac(struct vioif_softc *sc)
1559 {
1560 int i;
1561 if (sc->sc_virtio.sc_features & VIRTIO_NET_F_MAC) {
1562 for (i = 0; i < ETHERADDRL; i++) {
1563 sc->sc_mac[i] = virtio_read_device_config_1(
1564 &sc->sc_virtio,
1565 VIRTIO_NET_CONFIG_MAC + i);
1566 }
1567 sc->sc_mac_from_host = 1;
1568 } else {
1569 /* Get a few random bytes */
1570 (void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
1571 /* Make sure it's a unicast MAC */
1572 sc->sc_mac[0] &= ~1;
1573 /* Set the "locally administered" bit */
1574 sc->sc_mac[1] |= 2;
1575
1576 vioif_set_mac(sc);
1577
1578 dev_err(sc->sc_dev, CE_NOTE,
1579 "!Generated a random MAC address: %s",
1580 ether_sprintf((struct ether_addr *)sc->sc_mac));
1581 }
1582 }
1583
1584 /*
1585 * Virtqueue interrupt handlers
1586 */
1587 /* ARGSUSED */
1588 uint_t
1589 vioif_rx_handler(caddr_t arg1, caddr_t arg2)
1590 {
1591 struct virtio_softc *vsc = (void *) arg1;
1592 struct vioif_softc *sc = container_of(vsc,
1593 struct vioif_softc, sc_virtio);
1594
1595 /*
1596 * The return values of these functions are not needed but they make
1597 * debugging interrupts simpler because you can use them to detect when
1598 * stuff was processed and repopulated in this handler.
1599 */
1600 (void) vioif_process_rx(sc);
1601 (void) vioif_populate_rx(sc, KM_NOSLEEP);
1602
1603 return (DDI_INTR_CLAIMED);
1604 }
1605
1606 /* ARGSUSED */
1607 uint_t
1608 vioif_tx_handler(caddr_t arg1, caddr_t arg2)
1609 {
1610 struct virtio_softc *vsc = (void *)arg1;
1611 struct vioif_softc *sc = container_of(vsc,
1612 struct vioif_softc, sc_virtio);
1613
1614 /*
1615 * The return value of this function is not needed but makes debugging
1616 * interrupts simpler because you can use it to detect if anything was
1617 * reclaimed in this handler.
1618 */
1619 (void) vioif_reclaim_used_tx(sc);
1620
1621 return (DDI_INTR_CLAIMED);
1622 }
1623
1624 static int
1625 vioif_register_ints(struct vioif_softc *sc)
1626 {
1627 int ret;
1628
1629 struct virtio_int_handler vioif_vq_h[] = {
1630 { vioif_rx_handler },
1631 { vioif_tx_handler },
1632 { NULL }
1633 };
1634
1635 ret = virtio_register_ints(&sc->sc_virtio, NULL, vioif_vq_h);
1636
1637 return (ret);
1638 }
1639
1640
1641 static void
1642 vioif_check_features(struct vioif_softc *sc)
1643 {
1644 if (vioif_has_feature(sc, VIRTIO_NET_F_CSUM)) {
1645 /* The GSO/GRO featured depend on CSUM, check them here. */
1646 sc->sc_tx_csum = 1;
1647 sc->sc_rx_csum = 1;
1648
1649 if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
1650 sc->sc_rx_csum = 0;
1651 }
1652 dev_err(sc->sc_dev, CE_NOTE, "!Csum enabled.");
1653
1654 if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
1655
1656 sc->sc_tx_tso4 = 1;
1657 /*
1658 * We don't seem to have a way to ask the system
1659 * not to send us LSO packets with Explicit
1660 * Congestion Notification bit set, so we require
1661 * the device to support it in order to do
1662 * LSO.
1663 */
1664 if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
1665 dev_err(sc->sc_dev, CE_NOTE,
1666 "!TSO4 supported, but not ECN. "
1667 "Not using LSO.");
1668 sc->sc_tx_tso4 = 0;
1669 } else {
1670 dev_err(sc->sc_dev, CE_NOTE, "!LSO enabled");
1671 }
1672 }
1673 }
1674 }
1675
1676 static int
1677 vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1678 {
1679 int ret, instance;
1680 struct vioif_softc *sc;
1681 struct virtio_softc *vsc;
1682 mac_register_t *macp;
1683 char cache_name[CACHE_NAME_SIZE];
1684
1685 instance = ddi_get_instance(devinfo);
1686
1687 switch (cmd) {
1688 case DDI_ATTACH:
1689 break;
1690
1691 case DDI_RESUME:
1692 case DDI_PM_RESUME:
1693 /* We do not support suspend/resume for vioif. */
1694 goto exit;
1695
1696 default:
1697 goto exit;
1698 }
1699
1700 sc = kmem_zalloc(sizeof (struct vioif_softc), KM_SLEEP);
1701 ddi_set_driver_private(devinfo, sc);
1702
1703 vsc = &sc->sc_virtio;
1704
1705 /* Duplicate for less typing */
1706 sc->sc_dev = devinfo;
1707 vsc->sc_dev = devinfo;
1708
1709 /*
1710 * Initialize interrupt kstat.
1711 */
1712 sc->sc_intrstat = kstat_create("vioif", instance, "intr", "controller",
1713 KSTAT_TYPE_INTR, 1, 0);
1714 if (sc->sc_intrstat == NULL) {
1715 dev_err(devinfo, CE_WARN, "kstat_create failed");
1716 goto exit_intrstat;
1717 }
1718 kstat_install(sc->sc_intrstat);
1719
1720 /* map BAR 0 */
1721 ret = ddi_regs_map_setup(devinfo, 1,
1722 (caddr_t *)&sc->sc_virtio.sc_io_addr,
1723 0, 0, &vioif_attr, &sc->sc_virtio.sc_ioh);
1724 if (ret != DDI_SUCCESS) {
1725 dev_err(devinfo, CE_WARN, "unable to map bar 0: %d", ret);
1726 goto exit_map;
1727 }
1728
1729 virtio_device_reset(&sc->sc_virtio);
1730 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
1731 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
1732
1733 ret = vioif_dev_features(sc);
1734 if (ret)
1735 goto exit_features;
1736
1737 vsc->sc_nvqs = vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
1738
1739 (void) snprintf(cache_name, CACHE_NAME_SIZE, "vioif%d_rx", instance);
1740 sc->sc_rxbuf_cache = kmem_cache_create(cache_name,
1741 sizeof (struct vioif_rx_buf), 0, vioif_rx_construct,
1742 vioif_rx_destruct, NULL, sc, NULL, KM_SLEEP);
1743 if (sc->sc_rxbuf_cache == NULL) {
1744 dev_err(sc->sc_dev, CE_WARN, "Can't allocate the buffer cache");
1745 goto exit_cache;
1746 }
1747
1748 ret = vioif_register_ints(sc);
1749 if (ret) {
1750 dev_err(sc->sc_dev, CE_WARN,
1751 "Failed to allocate interrupt(s)!");
1752 goto exit_ints;
1753 }
1754
1755 /*
1756 * Register layout determined, can now access the
1757 * device-specific bits
1758 */
1759 vioif_get_mac(sc);
1760
1761 sc->sc_rx_vq = virtio_alloc_vq(&sc->sc_virtio, 0,
1762 VIOIF_RX_QLEN, VIOIF_INDIRECT_MAX, "rx");
1763 if (!sc->sc_rx_vq)
1764 goto exit_alloc1;
1765 virtio_stop_vq_intr(sc->sc_rx_vq);
1766
1767 sc->sc_tx_vq = virtio_alloc_vq(&sc->sc_virtio, 1,
1768 VIOIF_TX_QLEN, VIOIF_INDIRECT_MAX, "tx");
1769 if (!sc->sc_tx_vq)
1770 goto exit_alloc2;
1771 virtio_stop_vq_intr(sc->sc_tx_vq);
1772
1773 if (vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ)) {
1774 sc->sc_ctrl_vq = virtio_alloc_vq(&sc->sc_virtio, 2,
1775 VIOIF_CTRL_QLEN, 0, "ctrl");
1776 if (!sc->sc_ctrl_vq) {
1777 goto exit_alloc3;
1778 }
1779 virtio_stop_vq_intr(sc->sc_ctrl_vq);
1780 }
1781
1782 virtio_set_status(&sc->sc_virtio,
1783 VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1784
1785 sc->sc_rxloan = 0;
1786
1787 /* set some reasonable-small default values */
1788 sc->sc_rxcopy_thresh = 300;
1789 sc->sc_txcopy_thresh = 300;
1790 sc->sc_mtu = ETHERMTU;
1791
1792 vioif_check_features(sc);
1793
1794 if (vioif_alloc_mems(sc) != 0)
1795 goto exit_alloc_mems;
1796
1797 if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
1798 dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
1799 goto exit_macalloc;
1800 }
1801
1802 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1803 macp->m_driver = sc;
1804 macp->m_dip = devinfo;
1805 macp->m_src_addr = sc->sc_mac;
1806 macp->m_callbacks = &vioif_m_callbacks;
1807 macp->m_min_sdu = 0;
1808 macp->m_max_sdu = sc->sc_mtu;
1809 macp->m_margin = VLAN_TAGSZ;
1810 macp->m_priv_props = vioif_priv_props;
1811
1812 sc->sc_macp = macp;
1813
1814 /* Pre-fill the rx ring. */
1815 (void) vioif_populate_rx(sc, KM_SLEEP);
1816
1817 ret = mac_register(macp, &sc->sc_mac_handle);
1818 if (ret != 0) {
1819 dev_err(devinfo, CE_WARN, "vioif_attach: "
1820 "mac_register() failed, ret=%d", ret);
1821 goto exit_register;
1822 }
1823
1824 ret = virtio_enable_ints(&sc->sc_virtio);
1825 if (ret) {
1826 dev_err(devinfo, CE_WARN, "Failed to enable interrupts");
1827 goto exit_enable_ints;
1828 }
1829
1830 mac_link_update(sc->sc_mac_handle, LINK_STATE_UP);
1831 return (DDI_SUCCESS);
1832
1833 exit_enable_ints:
1834 (void) mac_unregister(sc->sc_mac_handle);
1835 exit_register:
1836 mac_free(macp);
1837 exit_macalloc:
1838 vioif_free_mems(sc);
1839 exit_alloc_mems:
1840 virtio_release_ints(&sc->sc_virtio);
1841 if (sc->sc_ctrl_vq)
1842 virtio_free_vq(sc->sc_ctrl_vq);
1843 exit_alloc3:
1844 virtio_free_vq(sc->sc_tx_vq);
1845 exit_alloc2:
1846 virtio_free_vq(sc->sc_rx_vq);
1847 exit_alloc1:
1848 exit_ints:
1849 kmem_cache_destroy(sc->sc_rxbuf_cache);
1850 exit_cache:
1851 exit_features:
1852 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1853 ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1854 exit_intrstat:
1855 exit_map:
1856 kstat_delete(sc->sc_intrstat);
1857 kmem_free(sc, sizeof (struct vioif_softc));
1858 exit:
1859 return (DDI_FAILURE);
1860 }
1861
1862 static int
1863 vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1864 {
1865 struct vioif_softc *sc;
1866
1867 if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1868 return (DDI_FAILURE);
1869
1870 switch (cmd) {
1871 case DDI_DETACH:
1872 break;
1873
1874 case DDI_PM_SUSPEND:
1875 /* We do not support suspend/resume for vioif. */
1876 return (DDI_FAILURE);
1877
1878 default:
1879 return (DDI_FAILURE);
1880 }
1881
1882 if (sc->sc_rxloan > 0) {
1883 dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream,"
1884 " not detaching.");
1885 return (DDI_FAILURE);
1886 }
1887
1888 virtio_stop_vq_intr(sc->sc_rx_vq);
1889 virtio_stop_vq_intr(sc->sc_tx_vq);
1890
1891 virtio_release_ints(&sc->sc_virtio);
1892
1893 if (mac_unregister(sc->sc_mac_handle)) {
1894 return (DDI_FAILURE);
1895 }
1896
1897 mac_free(sc->sc_macp);
1898
1899 vioif_free_mems(sc);
1900 virtio_free_vq(sc->sc_rx_vq);
1901 virtio_free_vq(sc->sc_tx_vq);
1902
1903 virtio_device_reset(&sc->sc_virtio);
1904
1905 ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1906
1907 kmem_cache_destroy(sc->sc_rxbuf_cache);
1908 kstat_delete(sc->sc_intrstat);
1909 kmem_free(sc, sizeof (struct vioif_softc));
1910
1911 return (DDI_SUCCESS);
1912 }
1913
1914 static int
1915 vioif_quiesce(dev_info_t *devinfo)
1916 {
1917 struct vioif_softc *sc;
1918
1919 if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1920 return (DDI_FAILURE);
1921
1922 virtio_stop_vq_intr(sc->sc_rx_vq);
1923 virtio_stop_vq_intr(sc->sc_tx_vq);
1924 virtio_device_reset(&sc->sc_virtio);
1925
1926 return (DDI_SUCCESS);
1927 }
1928
1929 int
1930 _init(void)
1931 {
1932 int ret = 0;
1933
1934 mac_init_ops(&vioif_ops, "vioif");
1935
1936 ret = mod_install(&modlinkage);
1937 if (ret != DDI_SUCCESS) {
1938 mac_fini_ops(&vioif_ops);
1939 return (ret);
1940 }
1941
1942 return (0);
1943 }
1944
1945 int
1946 _fini(void)
1947 {
1948 int ret;
1949
1950 ret = mod_remove(&modlinkage);
1951 if (ret == DDI_SUCCESS) {
1952 mac_fini_ops(&vioif_ops);
1953 }
1954
1955 return (ret);
1956 }
1957
1958 int
1959 _info(struct modinfo *pModinfo)
1960 {
1961 return (mod_info(&modlinkage, pModinfo));
1962 }