1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
14 * Copyright (c) 2014, 2015 by Delphix. All rights reserved.
15 */
16
17 /* Based on the NetBSD virtio driver by Minoura Makoto. */
18 /*
19 * Copyright (c) 2010 Minoura Makoto.
20 * All rights reserved.
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
32 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
34 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
35 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
40 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 */
42
43 #include <sys/types.h>
44 #include <sys/errno.h>
45 #include <sys/param.h>
46 #include <sys/stropts.h>
47 #include <sys/stream.h>
48 #include <sys/strsubr.h>
49 #include <sys/kmem.h>
50 #include <sys/conf.h>
51 #include <sys/devops.h>
52 #include <sys/ksynch.h>
53 #include <sys/stat.h>
54 #include <sys/modctl.h>
55 #include <sys/debug.h>
56 #include <sys/pci.h>
57 #include <sys/ethernet.h>
58
59 #define VLAN_TAGSZ 4
60
61 #include <sys/dlpi.h>
62 #include <sys/taskq.h>
63 #include <sys/cyclic.h>
64
65 #include <sys/pattr.h>
66 #include <sys/strsun.h>
67
68 #include <sys/random.h>
69 #include <sys/sysmacros.h>
70 #include <sys/stream.h>
71
72 #include <sys/mac.h>
73 #include <sys/mac_provider.h>
74 #include <sys/mac_ether.h>
75
76 #include "virtiovar.h"
77 #include "virtioreg.h"
78
79 #if !defined(__packed)
80 #define __packed __attribute__((packed))
81 #endif /* __packed */
82
83 /* Configuration registers */
84 #define VIRTIO_NET_CONFIG_MAC 0 /* 8bit x 6byte */
85 #define VIRTIO_NET_CONFIG_STATUS 6 /* 16bit */
86
87 /* Feature bits */
88 #define VIRTIO_NET_F_CSUM (1 << 0) /* Host handles pkts w/ partial csum */
89 #define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* Guest handles pkts w/ part csum */
90 #define VIRTIO_NET_F_MAC (1 << 5) /* Host has given MAC address. */
91 #define VIRTIO_NET_F_GSO (1 << 6) /* Host handles pkts w/ any GSO type */
92 #define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* Guest can handle TSOv4 in. */
93 #define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* Guest can handle TSOv6 in. */
94 #define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* Guest can handle TSO[6] w/ ECN in */
95 #define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* Guest can handle UFO in. */
96 #define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* Host can handle TSOv4 in. */
97 #define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* Host can handle TSOv6 in. */
98 #define VIRTIO_NET_F_HOST_ECN (1 << 13) /* Host can handle TSO[6] w/ ECN in */
99 #define VIRTIO_NET_F_HOST_UFO (1 << 14) /* Host can handle UFO in. */
100 #define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* Host can merge receive buffers. */
101 #define VIRTIO_NET_F_STATUS (1 << 16) /* Config.status available */
102 #define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* Control channel available */
103 #define VIRTIO_NET_F_CTRL_RX (1 << 18) /* Control channel RX mode support */
104 #define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* Control channel VLAN filtering */
105 #define VIRTIO_NET_F_CTRL_RX_EXTRA (1 << 20) /* Extra RX mode control support */
106
107 /* Status */
108 #define VIRTIO_NET_S_LINK_UP 1
109
110 /* Packet header structure */
111 struct virtio_net_hdr {
112 uint8_t flags;
113 uint8_t gso_type;
114 uint16_t hdr_len;
115 uint16_t gso_size;
116 uint16_t csum_start;
117 uint16_t csum_offset;
118 };
119
120 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* flags */
121 #define VIRTIO_NET_HDR_GSO_NONE 0 /* gso_type */
122 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* gso_type */
123 #define VIRTIO_NET_HDR_GSO_UDP 3 /* gso_type */
124 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* gso_type */
125 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* gso_type, |'ed */
126
127
128 /* Control virtqueue */
129 struct virtio_net_ctrl_cmd {
130 uint8_t class;
131 uint8_t command;
132 } __packed;
133
134 #define VIRTIO_NET_CTRL_RX 0
135 #define VIRTIO_NET_CTRL_RX_PROMISC 0
136 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1
137
138 #define VIRTIO_NET_CTRL_MAC 1
139 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0
140
141 #define VIRTIO_NET_CTRL_VLAN 2
142 #define VIRTIO_NET_CTRL_VLAN_ADD 0
143 #define VIRTIO_NET_CTRL_VLAN_DEL 1
144
145 struct virtio_net_ctrl_status {
146 uint8_t ack;
147 } __packed;
148
149 struct virtio_net_ctrl_rx {
150 uint8_t onoff;
151 } __packed;
152
153 struct virtio_net_ctrl_mac_tbl {
154 uint32_t nentries;
155 uint8_t macs[][ETHERADDRL];
156 } __packed;
157
158 struct virtio_net_ctrl_vlan {
159 uint16_t id;
160 } __packed;
161
162 static int vioif_quiesce(dev_info_t *);
163 static int vioif_attach(dev_info_t *, ddi_attach_cmd_t);
164 static int vioif_detach(dev_info_t *, ddi_detach_cmd_t);
165
166 DDI_DEFINE_STREAM_OPS(vioif_ops,
167 nulldev, /* identify */
168 nulldev, /* probe */
169 vioif_attach, /* attach */
170 vioif_detach, /* detach */
171 nodev, /* reset */
172 NULL, /* cb_ops */
173 D_MP, /* bus_ops */
174 NULL, /* power */
175 vioif_quiesce /* quiesce */
176 );
177
178 static char vioif_ident[] = "VirtIO ethernet driver";
179
180 /* Standard Module linkage initialization for a Streams driver */
181 extern struct mod_ops mod_driverops;
182
183 static struct modldrv modldrv = {
184 &mod_driverops, /* Type of module. This one is a driver */
185 vioif_ident, /* short description */
186 &vioif_ops /* driver specific ops */
187 };
188
189 static struct modlinkage modlinkage = {
190 MODREV_1,
191 {
192 (void *)&modldrv,
193 NULL,
194 },
195 };
196
197 ddi_device_acc_attr_t vioif_attr = {
198 DDI_DEVICE_ATTR_V0,
199 DDI_NEVERSWAP_ACC, /* virtio is always native byte order */
200 DDI_STORECACHING_OK_ACC,
201 DDI_DEFAULT_ACC
202 };
203
204 /*
205 * A mapping represents a binding for a single buffer that is contiguous in the
206 * virtual address space.
207 */
208 struct vioif_buf_mapping {
209 caddr_t vbm_buf;
210 ddi_dma_handle_t vbm_dmah;
211 ddi_acc_handle_t vbm_acch;
212 ddi_dma_cookie_t vbm_dmac;
213 unsigned int vbm_ncookies;
214 };
215
216 /*
217 * Rx buffers can be loaned upstream, so the code has
218 * to allocate them dynamically.
219 */
220 struct vioif_rx_buf {
221 struct vioif_softc *rb_sc;
222 frtn_t rb_frtn;
223
224 struct vioif_buf_mapping rb_mapping;
225 };
226
227 /*
228 * Tx buffers have two mapping types. One, "inline", is pre-allocated and is
229 * used to hold the virtio_net_header. Small packets also get copied there, as
230 * it's faster then mapping them. Bigger packets get mapped using the "external"
231 * mapping array. An array is used, because a packet may consist of muptiple
232 * fragments, so each fragment gets bound to an entry. According to my
233 * observations, the number of fragments does not exceed 2, but just in case,
234 * a bigger, up to VIOIF_INDIRECT_MAX - 1 array is allocated. To save resources,
235 * the dma handles are allocated lazily in the tx path.
236 */
237 struct vioif_tx_buf {
238 mblk_t *tb_mp;
239
240 /* inline buffer */
241 struct vioif_buf_mapping tb_inline_mapping;
242
243 /* External buffers */
244 struct vioif_buf_mapping *tb_external_mapping;
245 unsigned int tb_external_num;
246 };
247
248 struct vioif_softc {
249 dev_info_t *sc_dev; /* mirrors virtio_softc->sc_dev */
250 struct virtio_softc sc_virtio;
251
252 mac_handle_t sc_mac_handle;
253 mac_register_t *sc_macp;
254
255 struct virtqueue *sc_rx_vq;
256 struct virtqueue *sc_tx_vq;
257 struct virtqueue *sc_ctrl_vq;
258
259 unsigned int sc_tx_stopped:1;
260
261 /* Feature bits. */
262 unsigned int sc_rx_csum:1;
263 unsigned int sc_tx_csum:1;
264 unsigned int sc_tx_tso4:1;
265
266 int sc_mtu;
267 uint8_t sc_mac[ETHERADDRL];
268 /*
269 * For rx buffers, we keep a pointer array, because the buffers
270 * can be loaned upstream, and we have to repopulate the array with
271 * new members.
272 */
273 struct vioif_rx_buf **sc_rxbufs;
274
275 /*
276 * For tx, we just allocate an array of buffers. The packet can
277 * either be copied into the inline buffer, or the external mapping
278 * could be used to map the packet
279 */
280 struct vioif_tx_buf *sc_txbufs;
281
282 kstat_t *sc_intrstat;
283 /*
284 * We "loan" rx buffers upstream and reuse them after they are
285 * freed. This lets us avoid allocations in the hot path.
286 */
287 kmem_cache_t *sc_rxbuf_cache;
288 ulong_t sc_rxloan;
289
290 /* Copying small packets turns out to be faster then mapping them. */
291 unsigned long sc_rxcopy_thresh;
292 unsigned long sc_txcopy_thresh;
293 /* Some statistic coming here */
294 uint64_t sc_ipackets;
295 uint64_t sc_opackets;
296 uint64_t sc_rbytes;
297 uint64_t sc_obytes;
298 uint64_t sc_brdcstxmt;
299 uint64_t sc_brdcstrcv;
300 uint64_t sc_multixmt;
301 uint64_t sc_multircv;
302 uint64_t sc_norecvbuf;
303 uint64_t sc_notxbuf;
304 uint64_t sc_ierrors;
305 uint64_t sc_oerrors;
306 };
307
308 #define ETHER_HEADER_LEN sizeof (struct ether_header)
309
310 /* MTU + the ethernet header. */
311 #define MAX_PAYLOAD 65535
312 #define MAX_MTU (MAX_PAYLOAD - ETHER_HEADER_LEN)
313 #define DEFAULT_MTU ETHERMTU
314
315 /*
316 * Yeah, we spend 8M per device. Turns out, there is no point
317 * being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF),
318 * because vhost does not support them, and we expect to be used with
319 * vhost in production environment.
320 */
321 /* The buffer keeps both the packet data and the virtio_net_header. */
322 #define VIOIF_RX_SIZE (MAX_PAYLOAD + sizeof (struct virtio_net_hdr))
323
324 /*
325 * We win a bit on header alignment, but the host wins a lot
326 * more on moving aligned buffers. Might need more thought.
327 */
328 #define VIOIF_IP_ALIGN 0
329
330 /* Maximum number of indirect descriptors, somewhat arbitrary. */
331 #define VIOIF_INDIRECT_MAX 128
332
333 /*
334 * We pre-allocate a reasonably large buffer to copy small packets
335 * there. Bigger packets are mapped, packets with multiple
336 * cookies are mapped as indirect buffers.
337 */
338 #define VIOIF_TX_INLINE_SIZE 2048
339
340 /* Native queue size for all queues */
341 #define VIOIF_RX_QLEN 0
342 #define VIOIF_TX_QLEN 0
343 #define VIOIF_CTRL_QLEN 0
344
345 static uchar_t vioif_broadcast[ETHERADDRL] = {
346 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
347 };
348
349 #define VIOIF_TX_THRESH_MAX 640
350 #define VIOIF_RX_THRESH_MAX 640
351
352 #define CACHE_NAME_SIZE 32
353
354 static char vioif_txcopy_thresh[] =
355 "vioif_txcopy_thresh";
356 static char vioif_rxcopy_thresh[] =
357 "vioif_rxcopy_thresh";
358
359 static char *vioif_priv_props[] = {
360 vioif_txcopy_thresh,
361 vioif_rxcopy_thresh,
362 NULL
363 };
364
365 /* Add up to ddi? */
366 static ddi_dma_cookie_t *
367 vioif_dma_curr_cookie(ddi_dma_handle_t dmah)
368 {
369 ddi_dma_impl_t *dmah_impl = (void *) dmah;
370 ASSERT(dmah_impl->dmai_cookie);
371 return (dmah_impl->dmai_cookie);
372 }
373
374 static void
375 vioif_dma_reset_cookie(ddi_dma_handle_t dmah, ddi_dma_cookie_t *dmac)
376 {
377 ddi_dma_impl_t *dmah_impl = (void *) dmah;
378 dmah_impl->dmai_cookie = dmac;
379 }
380
381 static link_state_t
382 vioif_link_state(struct vioif_softc *sc)
383 {
384 if (sc->sc_virtio.sc_features & VIRTIO_NET_F_STATUS) {
385 if (virtio_read_device_config_2(&sc->sc_virtio,
386 VIRTIO_NET_CONFIG_STATUS) & VIRTIO_NET_S_LINK_UP) {
387
388 return (LINK_STATE_UP);
389 } else {
390 return (LINK_STATE_DOWN);
391 }
392 }
393
394 return (LINK_STATE_UP);
395 }
396
397 static ddi_dma_attr_t vioif_inline_buf_dma_attr = {
398 DMA_ATTR_V0, /* Version number */
399 0, /* low address */
400 0xFFFFFFFFFFFFFFFF, /* high address */
401 0xFFFFFFFF, /* counter register max */
402 1, /* page alignment */
403 1, /* burst sizes: 1 - 32 */
404 1, /* minimum transfer size */
405 0xFFFFFFFF, /* max transfer size */
406 0xFFFFFFFFFFFFFFF, /* address register max */
407 1, /* scatter-gather capacity */
408 1, /* device operates on bytes */
409 0, /* attr flag: set to 0 */
410 };
411
412 static ddi_dma_attr_t vioif_mapped_buf_dma_attr = {
413 DMA_ATTR_V0, /* Version number */
414 0, /* low address */
415 0xFFFFFFFFFFFFFFFF, /* high address */
416 0xFFFFFFFF, /* counter register max */
417 1, /* page alignment */
418 1, /* burst sizes: 1 - 32 */
419 1, /* minimum transfer size */
420 0xFFFFFFFF, /* max transfer size */
421 0xFFFFFFFFFFFFFFF, /* address register max */
422
423 /* One entry is used for the virtio_net_hdr on the tx path */
424 VIOIF_INDIRECT_MAX - 1, /* scatter-gather capacity */
425 1, /* device operates on bytes */
426 0, /* attr flag: set to 0 */
427 };
428
429 static ddi_device_acc_attr_t vioif_bufattr = {
430 DDI_DEVICE_ATTR_V0,
431 DDI_NEVERSWAP_ACC,
432 DDI_STORECACHING_OK_ACC,
433 DDI_DEFAULT_ACC
434 };
435
436 static void
437 vioif_rx_free(caddr_t free_arg)
438 {
439 struct vioif_rx_buf *buf = (void *) free_arg;
440 struct vioif_softc *sc = buf->rb_sc;
441
442 kmem_cache_free(sc->sc_rxbuf_cache, buf);
443 atomic_dec_ulong(&sc->sc_rxloan);
444 }
445
446 static int
447 vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
448 {
449 _NOTE(ARGUNUSED(kmflags));
450 struct vioif_softc *sc = user_arg;
451 struct vioif_rx_buf *buf = buffer;
452 size_t len;
453
454 if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
455 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
456 dev_err(sc->sc_dev, CE_WARN,
457 "Can't allocate dma handle for rx buffer");
458 goto exit_handle;
459 }
460
461 if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
462 VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
463 &vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
464 NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
465 dev_err(sc->sc_dev, CE_WARN,
466 "Can't allocate rx buffer");
467 goto exit_alloc;
468 }
469 ASSERT(len >= VIOIF_RX_SIZE);
470
471 if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
472 buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
473 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
474 &buf->rb_mapping.vbm_ncookies)) {
475 dev_err(sc->sc_dev, CE_WARN, "Can't bind tx buffer");
476
477 goto exit_bind;
478 }
479
480 ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
481
482 buf->rb_sc = sc;
483 buf->rb_frtn.free_arg = (void *) buf;
484 buf->rb_frtn.free_func = vioif_rx_free;
485
486 return (0);
487 exit_bind:
488 ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
489 exit_alloc:
490 ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
491 exit_handle:
492
493 return (ENOMEM);
494 }
495
496 static void
497 vioif_rx_destruct(void *buffer, void *user_arg)
498 {
499 _NOTE(ARGUNUSED(user_arg));
500 struct vioif_rx_buf *buf = buffer;
501
502 ASSERT(buf->rb_mapping.vbm_acch);
503 ASSERT(buf->rb_mapping.vbm_acch);
504
505 (void) ddi_dma_unbind_handle(buf->rb_mapping.vbm_dmah);
506 ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
507 ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
508 }
509
510 static void
511 vioif_free_mems(struct vioif_softc *sc)
512 {
513 int i;
514
515 for (i = 0; i < sc->sc_tx_vq->vq_num; i++) {
516 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
517 int j;
518
519 /* Tear down the internal mapping. */
520
521 ASSERT(buf->tb_inline_mapping.vbm_acch);
522 ASSERT(buf->tb_inline_mapping.vbm_dmah);
523
524 (void) ddi_dma_unbind_handle(buf->tb_inline_mapping.vbm_dmah);
525 ddi_dma_mem_free(&buf->tb_inline_mapping.vbm_acch);
526 ddi_dma_free_handle(&buf->tb_inline_mapping.vbm_dmah);
527
528 /* We should not see any in-flight buffers at this point. */
529 ASSERT(!buf->tb_mp);
530
531 /* Free all the dma hdnales we allocated lazily. */
532 for (j = 0; buf->tb_external_mapping[j].vbm_dmah; j++)
533 ddi_dma_free_handle(
534 &buf->tb_external_mapping[j].vbm_dmah);
535 /* Free the external mapping array. */
536 kmem_free(buf->tb_external_mapping,
537 sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1);
538 }
539
540 kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) *
541 sc->sc_tx_vq->vq_num);
542
543 for (i = 0; i < sc->sc_rx_vq->vq_num; i++) {
544 struct vioif_rx_buf *buf = sc->sc_rxbufs[i];
545
546 if (buf)
547 kmem_cache_free(sc->sc_rxbuf_cache, buf);
548 }
549 kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf *) *
550 sc->sc_rx_vq->vq_num);
551 }
552
553 static int
554 vioif_alloc_mems(struct vioif_softc *sc)
555 {
556 int i, txqsize, rxqsize;
557 size_t len;
558 unsigned int nsegments;
559
560 txqsize = sc->sc_tx_vq->vq_num;
561 rxqsize = sc->sc_rx_vq->vq_num;
562
563 sc->sc_txbufs = kmem_zalloc(sizeof (struct vioif_tx_buf) * txqsize,
564 KM_SLEEP);
565 if (sc->sc_txbufs == NULL) {
566 dev_err(sc->sc_dev, CE_WARN,
567 "Failed to allocate the tx buffers array");
568 goto exit_txalloc;
569 }
570
571 /*
572 * We don't allocate the rx vioif_bufs, just the pointers, as
573 * rx vioif_bufs can be loaned upstream, and we don't know the
574 * total number we need.
575 */
576 sc->sc_rxbufs = kmem_zalloc(sizeof (struct vioif_rx_buf *) * rxqsize,
577 KM_SLEEP);
578 if (sc->sc_rxbufs == NULL) {
579 dev_err(sc->sc_dev, CE_WARN,
580 "Failed to allocate the rx buffers pointer array");
581 goto exit_rxalloc;
582 }
583
584 for (i = 0; i < txqsize; i++) {
585 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
586
587 /* Allocate and bind an inline mapping. */
588
589 if (ddi_dma_alloc_handle(sc->sc_dev,
590 &vioif_inline_buf_dma_attr,
591 DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_dmah)) {
592
593 dev_err(sc->sc_dev, CE_WARN,
594 "Can't allocate dma handle for tx buffer %d", i);
595 goto exit_tx;
596 }
597
598 if (ddi_dma_mem_alloc(buf->tb_inline_mapping.vbm_dmah,
599 VIOIF_TX_INLINE_SIZE, &vioif_bufattr, DDI_DMA_STREAMING,
600 DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_buf,
601 &len, &buf->tb_inline_mapping.vbm_acch)) {
602
603 dev_err(sc->sc_dev, CE_WARN,
604 "Can't allocate tx buffer %d", i);
605 goto exit_tx;
606 }
607 ASSERT(len >= VIOIF_TX_INLINE_SIZE);
608
609 if (ddi_dma_addr_bind_handle(buf->tb_inline_mapping.vbm_dmah,
610 NULL, buf->tb_inline_mapping.vbm_buf, len,
611 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
612 &buf->tb_inline_mapping.vbm_dmac, &nsegments)) {
613
614 dev_err(sc->sc_dev, CE_WARN,
615 "Can't bind tx buffer %d", i);
616 goto exit_tx;
617 }
618
619 /* We asked for a single segment */
620 ASSERT(nsegments == 1);
621
622 /*
623 * We allow up to VIOIF_INDIRECT_MAX - 1 external mappings.
624 * In reality, I don't expect more then 2-3 used, but who
625 * knows.
626 */
627 buf->tb_external_mapping = kmem_zalloc(
628 sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1,
629 KM_SLEEP);
630
631 /*
632 * The external mapping's dma handles are allocate lazily,
633 * as we don't expect most of them to be used..
634 */
635 }
636
637 return (0);
638
639 exit_tx:
640 for (i = 0; i < txqsize; i++) {
641 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
642
643 if (buf->tb_inline_mapping.vbm_dmah)
644 (void) ddi_dma_unbind_handle(
645 buf->tb_inline_mapping.vbm_dmah);
646
647 if (buf->tb_inline_mapping.vbm_acch)
648 ddi_dma_mem_free(
649 &buf->tb_inline_mapping.vbm_acch);
650
651 if (buf->tb_inline_mapping.vbm_dmah)
652 ddi_dma_free_handle(
653 &buf->tb_inline_mapping.vbm_dmah);
654
655 if (buf->tb_external_mapping)
656 kmem_free(buf->tb_external_mapping,
657 sizeof (struct vioif_tx_buf) *
658 VIOIF_INDIRECT_MAX - 1);
659 }
660
661 kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf) * rxqsize);
662
663 exit_rxalloc:
664 kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) * txqsize);
665 exit_txalloc:
666 return (ENOMEM);
667 }
668
669 /* ARGSUSED */
670 int
671 vioif_multicst(void *arg, boolean_t add, const uint8_t *macaddr)
672 {
673 return (DDI_SUCCESS);
674 }
675
676 /* ARGSUSED */
677 int
678 vioif_promisc(void *arg, boolean_t on)
679 {
680 return (DDI_SUCCESS);
681 }
682
683 /* ARGSUSED */
684 int
685 vioif_unicst(void *arg, const uint8_t *macaddr)
686 {
687 return (DDI_FAILURE);
688 }
689
690
691 static int
692 vioif_add_rx(struct vioif_softc *sc, int kmflag)
693 {
694 struct vq_entry *ve;
695 struct vioif_rx_buf *buf;
696
697 ve = vq_alloc_entry(sc->sc_rx_vq);
698 if (!ve) {
699 /* Out of free descriptors - ring already full.
700 * would be better to update sc_norxdescavail
701 * but MAC does not ask for this info
702 * hence update sc_norecvbuf
703 */
704 sc->sc_norecvbuf++;
705 goto exit_vq;
706 }
707 buf = sc->sc_rxbufs[ve->qe_index];
708
709 if (!buf) {
710 /* First run, allocate the buffer. */
711 buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
712 sc->sc_rxbufs[ve->qe_index] = buf;
713 }
714
715 /* Still nothing? Bye. */
716 if (!buf) {
717 dev_err(sc->sc_dev, CE_WARN, "Can't allocate rx buffer");
718 sc->sc_norecvbuf++;
719 goto exit_buf;
720 }
721
722 ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
723
724 /*
725 * For an unknown reason, the virtio_net_hdr must be placed
726 * as a separate virtio queue entry.
727 */
728 virtio_ve_add_indirect_buf(ve, buf->rb_mapping.vbm_dmac.dmac_laddress,
729 sizeof (struct virtio_net_hdr), B_FALSE);
730
731 /* Add the rest of the first cookie. */
732 virtio_ve_add_indirect_buf(ve,
733 buf->rb_mapping.vbm_dmac.dmac_laddress +
734 sizeof (struct virtio_net_hdr),
735 buf->rb_mapping.vbm_dmac.dmac_size -
736 sizeof (struct virtio_net_hdr), B_FALSE);
737
738 /*
739 * If the buffer consists of a single cookie (unlikely for a
740 * 64-k buffer), we are done. Otherwise, add the rest of the cookies
741 * using indirect entries.
742 */
743 if (buf->rb_mapping.vbm_ncookies > 1) {
744 ddi_dma_cookie_t *first_extra_dmac;
745 ddi_dma_cookie_t dmac;
746 first_extra_dmac =
747 vioif_dma_curr_cookie(buf->rb_mapping.vbm_dmah);
748
749 ddi_dma_nextcookie(buf->rb_mapping.vbm_dmah, &dmac);
750 virtio_ve_add_cookie(ve, buf->rb_mapping.vbm_dmah,
751 dmac, buf->rb_mapping.vbm_ncookies - 1, B_FALSE);
752 vioif_dma_reset_cookie(buf->rb_mapping.vbm_dmah,
753 first_extra_dmac);
754 }
755
756 virtio_push_chain(ve, B_FALSE);
757
758 return (DDI_SUCCESS);
759
760 exit_buf:
761 vq_free_entry(sc->sc_rx_vq, ve);
762 exit_vq:
763 return (DDI_FAILURE);
764 }
765
766 static int
767 vioif_populate_rx(struct vioif_softc *sc, int kmflag)
768 {
769 int i = 0;
770 int ret;
771
772 for (;;) {
773 ret = vioif_add_rx(sc, kmflag);
774 if (ret)
775 /*
776 * We could not allocate some memory. Try to work with
777 * what we've got.
778 */
779 break;
780 i++;
781 }
782
783 if (i)
784 virtio_sync_vq(sc->sc_rx_vq);
785
786 return (i);
787 }
788
789 static int
790 vioif_process_rx(struct vioif_softc *sc)
791 {
792 struct vq_entry *ve;
793 struct vioif_rx_buf *buf;
794 mblk_t *mp;
795 uint32_t len;
796 int i = 0;
797
798 while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) {
799
800 buf = sc->sc_rxbufs[ve->qe_index];
801 ASSERT(buf);
802
803 if (len < sizeof (struct virtio_net_hdr)) {
804 dev_err(sc->sc_dev, CE_WARN, "RX: Cnain too small: %u",
805 len - (uint32_t)sizeof (struct virtio_net_hdr));
806 sc->sc_ierrors++;
807 virtio_free_chain(ve);
808 continue;
809 }
810
811 len -= sizeof (struct virtio_net_hdr);
812 /*
813 * We copy small packets that happenned to fit into a single
814 * cookie and reuse the buffers. For bigger ones, we loan
815 * the buffers upstream.
816 */
817 if (len < sc->sc_rxcopy_thresh) {
818 mp = allocb(len, 0);
819 if (!mp) {
820 sc->sc_norecvbuf++;
821 sc->sc_ierrors++;
822
823 virtio_free_chain(ve);
824 break;
825 }
826
827 bcopy((char *)buf->rb_mapping.vbm_buf +
828 sizeof (struct virtio_net_hdr), mp->b_rptr, len);
829 mp->b_wptr = mp->b_rptr + len;
830
831 } else {
832 mp = desballoc((unsigned char *)
833 buf->rb_mapping.vbm_buf +
834 sizeof (struct virtio_net_hdr) +
835 VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
836 if (!mp) {
837 sc->sc_norecvbuf++;
838 sc->sc_ierrors++;
839
840 virtio_free_chain(ve);
841 break;
842 }
843 mp->b_wptr = mp->b_rptr + len;
844
845 atomic_inc_ulong(&sc->sc_rxloan);
846 /*
847 * Buffer loanded, we will have to allocate a new one
848 * for this slot.
849 */
850 sc->sc_rxbufs[ve->qe_index] = NULL;
851 }
852 /* virtio-net does not provide the info if this packet
853 * is multicast or broadcast. So we have to check it
854 */
855 if (mp->b_rptr[0] & 0x1) {
856 if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
857 sc->sc_multircv++;
858 else
859 sc->sc_brdcstrcv++;
860 }
861
862 sc->sc_rbytes += len;
863 sc->sc_ipackets++;
864
865 virtio_free_chain(ve);
866 mac_rx(sc->sc_mac_handle, NULL, mp);
867 i++;
868 }
869
870 return (i);
871 }
872
873 static void
874 vioif_reclaim_used_tx(struct vioif_softc *sc)
875 {
876 struct vq_entry *ve;
877 struct vioif_tx_buf *buf;
878 uint32_t len;
879 mblk_t *mp;
880 int i = 0;
881
882 while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
883 /* We don't chain descriptors for tx, so don't expect any. */
884 ASSERT(!ve->qe_next);
885
886 buf = &sc->sc_txbufs[ve->qe_index];
887 mp = buf->tb_mp;
888 buf->tb_mp = NULL;
889
890 if (mp) {
891 for (i = 0; i < buf->tb_external_num; i++)
892 (void) ddi_dma_unbind_handle(
893 buf->tb_external_mapping[i].vbm_dmah);
894 }
895
896 virtio_free_chain(ve);
897
898 /* External mapping used, mp was not freed in vioif_send() */
899 if (mp)
900 freemsg(mp);
901 i++;
902 }
903
904 if (sc->sc_tx_stopped && i) {
905 sc->sc_tx_stopped = 0;
906 mac_tx_update(sc->sc_mac_handle);
907 }
908 }
909
910 /* sc will be used to update stat counters. */
911 /* ARGSUSED */
912 static inline void
913 vioif_tx_inline(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
914 size_t msg_size)
915 {
916 struct vioif_tx_buf *buf;
917 buf = &sc->sc_txbufs[ve->qe_index];
918
919 ASSERT(buf);
920
921 /* Frees mp */
922 mcopymsg(mp, buf->tb_inline_mapping.vbm_buf +
923 sizeof (struct virtio_net_hdr));
924
925 virtio_ve_add_indirect_buf(ve,
926 buf->tb_inline_mapping.vbm_dmac.dmac_laddress +
927 sizeof (struct virtio_net_hdr), msg_size, B_TRUE);
928 }
929
930 static inline int
931 vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf,
932 int i)
933 {
934 int ret = DDI_SUCCESS;
935
936 if (!buf->tb_external_mapping[i].vbm_dmah) {
937 ret = ddi_dma_alloc_handle(sc->sc_dev,
938 &vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
939 &buf->tb_external_mapping[i].vbm_dmah);
940 if (ret != DDI_SUCCESS) {
941 dev_err(sc->sc_dev, CE_WARN,
942 "Can't allocate dma handle for external tx buffer");
943 }
944 }
945
946 return (ret);
947 }
948
949 static inline int
950 vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
951 size_t msg_size)
952 {
953 _NOTE(ARGUNUSED(msg_size));
954
955 struct vioif_tx_buf *buf;
956 mblk_t *nmp;
957 int i, j;
958 int ret = DDI_SUCCESS;
959
960 buf = &sc->sc_txbufs[ve->qe_index];
961
962 ASSERT(buf);
963
964 buf->tb_external_num = 0;
965 i = 0;
966 nmp = mp;
967
968 while (nmp) {
969 size_t len;
970 ddi_dma_cookie_t dmac;
971 unsigned int ncookies;
972
973 len = MBLKL(nmp);
974 /*
975 * For some reason, the network stack can
976 * actually send us zero-length fragments.
977 */
978 if (len == 0) {
979 nmp = nmp->b_cont;
980 continue;
981 }
982
983 ret = vioif_tx_lazy_handle_alloc(sc, buf, i);
984 if (ret != DDI_SUCCESS) {
985 sc->sc_notxbuf++;
986 sc->sc_oerrors++;
987 goto exit_lazy_alloc;
988 }
989 ret = ddi_dma_addr_bind_handle(
990 buf->tb_external_mapping[i].vbm_dmah, NULL,
991 (caddr_t)nmp->b_rptr, len,
992 DDI_DMA_WRITE | DDI_DMA_STREAMING,
993 DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
994
995 if (ret != DDI_SUCCESS) {
996 sc->sc_oerrors++;
997 dev_err(sc->sc_dev, CE_NOTE,
998 "TX: Failed to bind external handle");
999 goto exit_bind;
1000 }
1001
1002 /* Check if we still fit into the indirect table. */
1003 if (virtio_ve_indirect_available(ve) < ncookies) {
1004 dev_err(sc->sc_dev, CE_NOTE,
1005 "TX: Indirect descriptor table limit reached."
1006 " It took %d fragments.", i);
1007 sc->sc_notxbuf++;
1008 sc->sc_oerrors++;
1009
1010 ret = DDI_FAILURE;
1011 goto exit_limit;
1012 }
1013
1014 virtio_ve_add_cookie(ve, buf->tb_external_mapping[i].vbm_dmah,
1015 dmac, ncookies, B_TRUE);
1016
1017 nmp = nmp->b_cont;
1018 i++;
1019 }
1020
1021 buf->tb_external_num = i;
1022 /* Save the mp to free it when the packet is sent. */
1023 buf->tb_mp = mp;
1024
1025 return (DDI_SUCCESS);
1026
1027 exit_limit:
1028 exit_bind:
1029 exit_lazy_alloc:
1030
1031 for (j = 0; j < i; j++) {
1032 (void) ddi_dma_unbind_handle(
1033 buf->tb_external_mapping[j].vbm_dmah);
1034 }
1035
1036 return (ret);
1037 }
1038
1039 static boolean_t
1040 vioif_send(struct vioif_softc *sc, mblk_t *mp)
1041 {
1042 struct vq_entry *ve;
1043 struct vioif_tx_buf *buf;
1044 struct virtio_net_hdr *net_header = NULL;
1045 size_t msg_size = 0;
1046 uint32_t csum_start;
1047 uint32_t csum_stuff;
1048 uint32_t csum_flags;
1049 uint32_t lso_flags;
1050 uint32_t lso_mss;
1051 mblk_t *nmp;
1052 int ret;
1053 boolean_t lso_required = B_FALSE;
1054
1055 for (nmp = mp; nmp; nmp = nmp->b_cont)
1056 msg_size += MBLKL(nmp);
1057
1058 if (sc->sc_tx_tso4) {
1059 mac_lso_get(mp, &lso_mss, &lso_flags);
1060 lso_required = (lso_flags & HW_LSO);
1061 }
1062
1063 ve = vq_alloc_entry(sc->sc_tx_vq);
1064
1065 if (!ve) {
1066 sc->sc_notxbuf++;
1067 /* Out of free descriptors - try later. */
1068 return (B_FALSE);
1069 }
1070 buf = &sc->sc_txbufs[ve->qe_index];
1071
1072 /* Use the inline buffer of the first entry for the virtio_net_hdr. */
1073 (void) memset(buf->tb_inline_mapping.vbm_buf, 0,
1074 sizeof (struct virtio_net_hdr));
1075
1076 /* LINTED E_BAD_PTR_CAST_ALIGN */
1077 net_header = (struct virtio_net_hdr *)
1078 buf->tb_inline_mapping.vbm_buf;
1079
1080 mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL,
1081 NULL, &csum_flags);
1082
1083 /* They want us to do the TCP/UDP csum calculation. */
1084 if (csum_flags & HCK_PARTIALCKSUM) {
1085 struct ether_header *eth_header;
1086 int eth_hsize;
1087
1088 /* Did we ask for it? */
1089 ASSERT(sc->sc_tx_csum);
1090
1091 /* We only asked for partial csum packets. */
1092 ASSERT(!(csum_flags & HCK_IPV4_HDRCKSUM));
1093 ASSERT(!(csum_flags & HCK_FULLCKSUM));
1094
1095 eth_header = (void *) mp->b_rptr;
1096 if (eth_header->ether_type == htons(ETHERTYPE_VLAN)) {
1097 eth_hsize = sizeof (struct ether_vlan_header);
1098 } else {
1099 eth_hsize = sizeof (struct ether_header);
1100 }
1101 net_header->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1102 net_header->csum_start = eth_hsize + csum_start;
1103 net_header->csum_offset = csum_stuff - csum_start;
1104 }
1105
1106 /* setup LSO fields if required */
1107 if (lso_required) {
1108 net_header->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1109 net_header->gso_size = (uint16_t)lso_mss;
1110 }
1111
1112 virtio_ve_add_indirect_buf(ve,
1113 buf->tb_inline_mapping.vbm_dmac.dmac_laddress,
1114 sizeof (struct virtio_net_hdr), B_TRUE);
1115
1116 /* meanwhile update the statistic */
1117 if (mp->b_rptr[0] & 0x1) {
1118 if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
1119 sc->sc_multixmt++;
1120 else
1121 sc->sc_brdcstxmt++;
1122 }
1123
1124 /*
1125 * We copy small packets into the inline buffer. The bigger ones
1126 * get mapped using the mapped buffer.
1127 */
1128 if (msg_size < sc->sc_txcopy_thresh) {
1129 vioif_tx_inline(sc, ve, mp, msg_size);
1130 } else {
1131 /* statistic gets updated by vioif_tx_external when fail */
1132 ret = vioif_tx_external(sc, ve, mp, msg_size);
1133 if (ret != DDI_SUCCESS)
1134 goto exit_tx_external;
1135 }
1136
1137 virtio_push_chain(ve, B_TRUE);
1138
1139 sc->sc_opackets++;
1140 sc->sc_obytes += msg_size;
1141
1142 return (B_TRUE);
1143
1144 exit_tx_external:
1145
1146 vq_free_entry(sc->sc_tx_vq, ve);
1147 /*
1148 * vioif_tx_external can fail when the buffer does not fit into the
1149 * indirect descriptor table. Free the mp. I don't expect this ever
1150 * to happen.
1151 */
1152 freemsg(mp);
1153
1154 return (B_TRUE);
1155 }
1156
1157 mblk_t *
1158 vioif_tx(void *arg, mblk_t *mp)
1159 {
1160 struct vioif_softc *sc = arg;
1161 mblk_t *nmp;
1162
1163 while (mp != NULL) {
1164 nmp = mp->b_next;
1165 mp->b_next = NULL;
1166
1167 if (!vioif_send(sc, mp)) {
1168 sc->sc_tx_stopped = 1;
1169 mp->b_next = nmp;
1170 break;
1171 }
1172 mp = nmp;
1173 }
1174
1175 return (mp);
1176 }
1177
1178 int
1179 vioif_start(void *arg)
1180 {
1181 struct vioif_softc *sc = arg;
1182
1183 mac_link_update(sc->sc_mac_handle,
1184 vioif_link_state(sc));
1185
1186 virtio_start_vq_intr(sc->sc_rx_vq);
1187
1188 return (DDI_SUCCESS);
1189 }
1190
1191 void
1192 vioif_stop(void *arg)
1193 {
1194 struct vioif_softc *sc = arg;
1195
1196 virtio_stop_vq_intr(sc->sc_rx_vq);
1197 }
1198
1199 /* ARGSUSED */
1200 static int
1201 vioif_stat(void *arg, uint_t stat, uint64_t *val)
1202 {
1203 struct vioif_softc *sc = arg;
1204
1205 switch (stat) {
1206 case MAC_STAT_IERRORS:
1207 *val = sc->sc_ierrors;
1208 break;
1209 case MAC_STAT_OERRORS:
1210 *val = sc->sc_oerrors;
1211 break;
1212 case MAC_STAT_MULTIRCV:
1213 *val = sc->sc_multircv;
1214 break;
1215 case MAC_STAT_BRDCSTRCV:
1216 *val = sc->sc_brdcstrcv;
1217 break;
1218 case MAC_STAT_MULTIXMT:
1219 *val = sc->sc_multixmt;
1220 break;
1221 case MAC_STAT_BRDCSTXMT:
1222 *val = sc->sc_brdcstxmt;
1223 break;
1224 case MAC_STAT_IPACKETS:
1225 *val = sc->sc_ipackets;
1226 break;
1227 case MAC_STAT_RBYTES:
1228 *val = sc->sc_rbytes;
1229 break;
1230 case MAC_STAT_OPACKETS:
1231 *val = sc->sc_opackets;
1232 break;
1233 case MAC_STAT_OBYTES:
1234 *val = sc->sc_obytes;
1235 break;
1236 case MAC_STAT_NORCVBUF:
1237 *val = sc->sc_norecvbuf;
1238 break;
1239 case MAC_STAT_NOXMTBUF:
1240 *val = sc->sc_notxbuf;
1241 break;
1242 case MAC_STAT_IFSPEED:
1243 /* always 1 Gbit */
1244 *val = 1000000000ULL;
1245 break;
1246 case ETHER_STAT_LINK_DUPLEX:
1247 /* virtual device, always full-duplex */
1248 *val = LINK_DUPLEX_FULL;
1249 break;
1250
1251 default:
1252 return (ENOTSUP);
1253 }
1254
1255 return (DDI_SUCCESS);
1256 }
1257
1258 static int
1259 vioif_set_prop_private(struct vioif_softc *sc, const char *pr_name,
1260 uint_t pr_valsize, const void *pr_val)
1261 {
1262 _NOTE(ARGUNUSED(pr_valsize));
1263
1264 long result;
1265
1266 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1267
1268 if (pr_val == NULL)
1269 return (EINVAL);
1270
1271 (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1272
1273 if (result < 0 || result > VIOIF_TX_THRESH_MAX)
1274 return (EINVAL);
1275 sc->sc_txcopy_thresh = result;
1276 }
1277 if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1278
1279 if (pr_val == NULL)
1280 return (EINVAL);
1281
1282 (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1283
1284 if (result < 0 || result > VIOIF_RX_THRESH_MAX)
1285 return (EINVAL);
1286 sc->sc_rxcopy_thresh = result;
1287 }
1288 return (0);
1289 }
1290
1291 static int
1292 vioif_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1293 uint_t pr_valsize, const void *pr_val)
1294 {
1295 struct vioif_softc *sc = arg;
1296 const uint32_t *new_mtu;
1297 int err;
1298
1299 switch (pr_num) {
1300 case MAC_PROP_MTU:
1301 new_mtu = pr_val;
1302
1303 if (*new_mtu > MAX_MTU) {
1304 return (EINVAL);
1305 }
1306
1307 err = mac_maxsdu_update(sc->sc_mac_handle, *new_mtu);
1308 if (err) {
1309 return (err);
1310 }
1311 break;
1312 case MAC_PROP_PRIVATE:
1313 err = vioif_set_prop_private(sc, pr_name,
1314 pr_valsize, pr_val);
1315 if (err)
1316 return (err);
1317 default:
1318 return (ENOTSUP);
1319 }
1320
1321 return (0);
1322 }
1323
1324 static int
1325 vioif_get_prop_private(struct vioif_softc *sc, const char *pr_name,
1326 uint_t pr_valsize, void *pr_val)
1327 {
1328 int err = ENOTSUP;
1329 int value;
1330
1331 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1332
1333 value = sc->sc_txcopy_thresh;
1334 err = 0;
1335 goto done;
1336 }
1337 if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1338
1339 value = sc->sc_rxcopy_thresh;
1340 err = 0;
1341 goto done;
1342 }
1343 done:
1344 if (err == 0) {
1345 (void) snprintf(pr_val, pr_valsize, "%d", value);
1346 }
1347 return (err);
1348 }
1349
1350 static int
1351 vioif_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1352 uint_t pr_valsize, void *pr_val)
1353 {
1354 struct vioif_softc *sc = arg;
1355 int err = ENOTSUP;
1356
1357 switch (pr_num) {
1358 case MAC_PROP_PRIVATE:
1359 err = vioif_get_prop_private(sc, pr_name,
1360 pr_valsize, pr_val);
1361 break;
1362 default:
1363 break;
1364 }
1365 return (err);
1366 }
1367
1368 static void
1369 vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1370 mac_prop_info_handle_t prh)
1371 {
1372 struct vioif_softc *sc = arg;
1373
1374 switch (pr_num) {
1375 case MAC_PROP_MTU:
1376 mac_prop_info_set_range_uint32(prh, ETHERMIN, MAX_MTU);
1377 break;
1378
1379 case MAC_PROP_PRIVATE: {
1380 char valstr[64];
1381 int value;
1382
1383 bzero(valstr, sizeof (valstr));
1384 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1385
1386 value = sc->sc_txcopy_thresh;
1387 } else if (strcmp(pr_name,
1388 vioif_rxcopy_thresh) == 0) {
1389 value = sc->sc_rxcopy_thresh;
1390 } else {
1391 return;
1392 }
1393 (void) snprintf(valstr, sizeof (valstr), "%d", value);
1394 }
1395 default:
1396 break;
1397 }
1398 }
1399
1400 static boolean_t
1401 vioif_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1402 {
1403 struct vioif_softc *sc = arg;
1404
1405 switch (cap) {
1406 case MAC_CAPAB_HCKSUM:
1407 if (sc->sc_tx_csum) {
1408 uint32_t *txflags = cap_data;
1409
1410 *txflags = HCKSUM_INET_PARTIAL;
1411 return (B_TRUE);
1412 }
1413 return (B_FALSE);
1414 case MAC_CAPAB_LSO:
1415 if (sc->sc_tx_tso4) {
1416 mac_capab_lso_t *cap_lso = cap_data;
1417
1418 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
1419 cap_lso->lso_basic_tcp_ipv4.lso_max = MAX_MTU;
1420 return (B_TRUE);
1421 }
1422 return (B_FALSE);
1423 default:
1424 break;
1425 }
1426 return (B_FALSE);
1427 }
1428
1429 static mac_callbacks_t vioif_m_callbacks = {
1430 .mc_callbacks = (MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO),
1431 .mc_getstat = vioif_stat,
1432 .mc_start = vioif_start,
1433 .mc_stop = vioif_stop,
1434 .mc_setpromisc = vioif_promisc,
1435 .mc_multicst = vioif_multicst,
1436 .mc_unicst = vioif_unicst,
1437 .mc_tx = vioif_tx,
1438 /* Optional callbacks */
1439 .mc_reserved = NULL, /* reserved */
1440 .mc_ioctl = NULL, /* mc_ioctl */
1441 .mc_getcapab = vioif_getcapab, /* mc_getcapab */
1442 .mc_open = NULL, /* mc_open */
1443 .mc_close = NULL, /* mc_close */
1444 .mc_setprop = vioif_setprop,
1445 .mc_getprop = vioif_getprop,
1446 .mc_propinfo = vioif_propinfo,
1447 };
1448
1449 static void
1450 vioif_show_features(struct vioif_softc *sc, const char *prefix,
1451 uint32_t features)
1452 {
1453 char buf[512];
1454
1455 dev_err(sc->sc_dev, CE_NOTE, "%s %s Vioif (%b)", prefix, virtio_show_features(...), features, "\020\1CSUM\2GUEST_CSUM\3MAC\4GSO\5GUEST_TSO4\5GUEST_TSO6\6GUEST_ECN\7GUEST_UFO\8HOST_TSO4\9HOST_TSO6\10HOST_ECN\11HOST_UFO\12MRG_RXBUF\13STATUS\14CTRL_VQ\15CTRL_RX\16CTRL_VLAN\17CTRL_RX_EXTRA", buf);
1456 }
1457
1458 /*
1459 * Find out which features are supported by the device and
1460 * choose which ones we wish to use.
1461 */
1462 static int
1463 vioif_dev_features(struct vioif_softc *sc)
1464 {
1465 uint32_t host_features;
1466
1467 host_features = virtio_negotiate_features(&sc->sc_virtio,
1468 VIRTIO_NET_F_CSUM |
1469 VIRTIO_NET_F_HOST_TSO4 |
1470 VIRTIO_NET_F_HOST_ECN |
1471 VIRTIO_NET_F_MAC |
1472 VIRTIO_NET_F_STATUS |
1473 VIRTIO_F_RING_INDIRECT_DESC |
1474 VIRTIO_F_NOTIFY_ON_EMPTY);
1475
1476 vioif_show_features(sc, "Host features: ", host_features);
1477 vioif_show_features(sc, ",
1478 sc->sc_virtio.sc_features);
1479
1480 if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
1481 dev_err(sc->sc_dev, CE_NOTE,
1482 "Host does not support RING_INDIRECT_DESC, bye.");
1483 return (DDI_FAILURE);
1484 }
1485
1486 return (DDI_SUCCESS);
1487 }
1488
1489 static int
1490 vioif_has_feature(struct vioif_softc *sc, uint32_t feature)
1491 {
1492 return (virtio_has_feature(&sc->sc_virtio, feature));
1493 }
1494
1495 static void
1496 vioif_set_mac(struct vioif_softc *sc)
1497 {
1498 int i;
1499
1500 for (i = 0; i < ETHERADDRL; i++) {
1501 virtio_write_device_config_1(&sc->sc_virtio,
1502 VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
1503 }
1504 }
1505
1506 /* Get the mac address out of the hardware, or make up one. */
1507 static void
1508 vioif_get_mac(struct vioif_softc *sc)
1509 {
1510 int i;
1511 if (sc->sc_virtio.sc_features & VIRTIO_NET_F_MAC) {
1512 for (i = 0; i < ETHERADDRL; i++) {
1513 sc->sc_mac[i] = virtio_read_device_config_1(
1514 &sc->sc_virtio,
1515 VIRTIO_NET_CONFIG_MAC + i);
1516 }
1517 dev_err(sc->sc_dev, CE_NOTE, "Got MAC address from host: %s",
1518 ether_sprintf((struct ether_addr *)sc->sc_mac));
1519 } else {
1520 /* Get a few random bytes */
1521 (void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
1522 /* Make sure it's a unicast MAC */
1523 sc->sc_mac[0] &= ~1;
1524 /* Set the "locally administered" bit */
1525 sc->sc_mac[1] |= 2;
1526
1527 vioif_set_mac(sc);
1528
1529 dev_err(sc->sc_dev, CE_NOTE,
1530 "Generated a random MAC address: %s",
1531 ether_sprintf((struct ether_addr *)sc->sc_mac));
1532 }
1533 }
1534
1535 /*
1536 * Virtqueue interrupt handlers
1537 */
1538 /* ARGSUSED */
1539 uint_t
1540 vioif_rx_handler(caddr_t arg1, caddr_t arg2)
1541 {
1542 struct virtio_softc *vsc = (void *) arg1;
1543 struct vioif_softc *sc = container_of(vsc,
1544 struct vioif_softc, sc_virtio);
1545
1546 (void) vioif_process_rx(sc);
1547
1548 (void) vioif_populate_rx(sc, KM_NOSLEEP);
1549
1550 return (DDI_INTR_CLAIMED);
1551 }
1552
1553 /* ARGSUSED */
1554 uint_t
1555 vioif_tx_handler(caddr_t arg1, caddr_t arg2)
1556 {
1557 struct virtio_softc *vsc = (void *)arg1;
1558 struct vioif_softc *sc = container_of(vsc,
1559 struct vioif_softc, sc_virtio);
1560
1561 vioif_reclaim_used_tx(sc);
1562 return (DDI_INTR_CLAIMED);
1563 }
1564
1565 static int
1566 vioif_register_ints(struct vioif_softc *sc)
1567 {
1568 int ret;
1569
1570 struct virtio_int_handler vioif_vq_h[] = {
1571 { vioif_rx_handler },
1572 { vioif_tx_handler },
1573 { NULL }
1574 };
1575
1576 ret = virtio_register_ints(&sc->sc_virtio, NULL, vioif_vq_h);
1577
1578 return (ret);
1579 }
1580
1581
1582 static void
1583 vioif_check_features(struct vioif_softc *sc)
1584 {
1585 if (vioif_has_feature(sc, VIRTIO_NET_F_CSUM)) {
1586 /* The GSO/GRO featured depend on CSUM, check them here. */
1587 sc->sc_tx_csum = 1;
1588 sc->sc_rx_csum = 1;
1589
1590 if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
1591 sc->sc_rx_csum = 0;
1592 }
1593 cmn_err(CE_NOTE, "Csum enabled.");
1594
1595 if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
1596
1597 sc->sc_tx_tso4 = 1;
1598 /*
1599 * We don't seem to have a way to ask the system
1600 * not to send us LSO packets with Explicit
1601 * Congestion Notification bit set, so we require
1602 * the device to support it in order to do
1603 * LSO.
1604 */
1605 if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
1606 dev_err(sc->sc_dev, CE_NOTE,
1607 "TSO4 supported, but not ECN. "
1608 "Not using LSO.");
1609 sc->sc_tx_tso4 = 0;
1610 } else {
1611 cmn_err(CE_NOTE, "LSO enabled");
1612 }
1613 }
1614 }
1615 }
1616
1617 static int
1618 vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1619 {
1620 int ret, instance;
1621 struct vioif_softc *sc;
1622 struct virtio_softc *vsc;
1623 mac_register_t *macp;
1624 char cache_name[CACHE_NAME_SIZE];
1625
1626 instance = ddi_get_instance(devinfo);
1627
1628 switch (cmd) {
1629 case DDI_ATTACH:
1630 break;
1631
1632 case DDI_RESUME:
1633 case DDI_PM_RESUME:
1634 dev_err(devinfo, CE_WARN, "resume not supported yet");
1635 goto exit;
1636
1637 default:
1638 dev_err(devinfo, CE_WARN, "cmd 0x%x unrecognized", cmd);
1639 goto exit;
1640 }
1641
1642 sc = kmem_zalloc(sizeof (struct vioif_softc), KM_SLEEP);
1643 ddi_set_driver_private(devinfo, sc);
1644
1645 vsc = &sc->sc_virtio;
1646
1647 /* Duplicate for less typing */
1648 sc->sc_dev = devinfo;
1649 vsc->sc_dev = devinfo;
1650
1651 /*
1652 * Initialize interrupt kstat.
1653 */
1654 sc->sc_intrstat = kstat_create("vioif", instance, "intr", "controller",
1655 KSTAT_TYPE_INTR, 1, 0);
1656 if (sc->sc_intrstat == NULL) {
1657 dev_err(devinfo, CE_WARN, "kstat_create failed");
1658 goto exit_intrstat;
1659 }
1660 kstat_install(sc->sc_intrstat);
1661
1662 /* map BAR 0 */
1663 ret = ddi_regs_map_setup(devinfo, 1,
1664 (caddr_t *)&sc->sc_virtio.sc_io_addr,
1665 0, 0, &vioif_attr, &sc->sc_virtio.sc_ioh);
1666 if (ret != DDI_SUCCESS) {
1667 dev_err(devinfo, CE_WARN, "unable to map bar 0: %d", ret);
1668 goto exit_map;
1669 }
1670
1671 virtio_device_reset(&sc->sc_virtio);
1672 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
1673 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
1674
1675 ret = vioif_dev_features(sc);
1676 if (ret)
1677 goto exit_features;
1678
1679 vsc->sc_nvqs = vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
1680
1681 (void) snprintf(cache_name, CACHE_NAME_SIZE, "vioif%d_rx", instance);
1682 sc->sc_rxbuf_cache = kmem_cache_create(cache_name,
1683 sizeof (struct vioif_rx_buf), 0, vioif_rx_construct,
1684 vioif_rx_destruct, NULL, sc, NULL, KM_SLEEP);
1685 if (sc->sc_rxbuf_cache == NULL) {
1686 dev_err(sc->sc_dev, CE_WARN, "Can't allocate the buffer cache");
1687 goto exit_cache;
1688 }
1689
1690 ret = vioif_register_ints(sc);
1691 if (ret) {
1692 dev_err(sc->sc_dev, CE_WARN,
1693 "Failed to allocate interrupt(s)!");
1694 goto exit_ints;
1695 }
1696
1697 /*
1698 * Register layout determined, can now access the
1699 * device-specific bits
1700 */
1701 vioif_get_mac(sc);
1702
1703 sc->sc_rx_vq = virtio_alloc_vq(&sc->sc_virtio, 0,
1704 VIOIF_RX_QLEN, VIOIF_INDIRECT_MAX, "rx");
1705 if (!sc->sc_rx_vq)
1706 goto exit_alloc1;
1707 virtio_stop_vq_intr(sc->sc_rx_vq);
1708
1709 sc->sc_tx_vq = virtio_alloc_vq(&sc->sc_virtio, 1,
1710 VIOIF_TX_QLEN, VIOIF_INDIRECT_MAX, "tx");
1711 if (!sc->sc_rx_vq)
1712 goto exit_alloc2;
1713 virtio_stop_vq_intr(sc->sc_tx_vq);
1714
1715 if (vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ)) {
1716 sc->sc_ctrl_vq = virtio_alloc_vq(&sc->sc_virtio, 2,
1717 VIOIF_CTRL_QLEN, 0, "ctrl");
1718 if (!sc->sc_ctrl_vq) {
1719 goto exit_alloc3;
1720 }
1721 virtio_stop_vq_intr(sc->sc_ctrl_vq);
1722 }
1723
1724 virtio_set_status(&sc->sc_virtio,
1725 VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1726
1727 sc->sc_rxloan = 0;
1728
1729 /* set some reasonable-small default values */
1730 sc->sc_rxcopy_thresh = 300;
1731 sc->sc_txcopy_thresh = 300;
1732 sc->sc_mtu = ETHERMTU;
1733
1734 vioif_check_features(sc);
1735
1736 if (vioif_alloc_mems(sc))
1737 goto exit_alloc_mems;
1738
1739 if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
1740 dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
1741 goto exit_macalloc;
1742 }
1743
1744 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1745 macp->m_driver = sc;
1746 macp->m_dip = devinfo;
1747 macp->m_src_addr = sc->sc_mac;
1748 macp->m_callbacks = &vioif_m_callbacks;
1749 macp->m_min_sdu = 0;
1750 macp->m_max_sdu = sc->sc_mtu;
1751 macp->m_margin = VLAN_TAGSZ;
1752 macp->m_priv_props = vioif_priv_props;
1753
1754 sc->sc_macp = macp;
1755
1756 /* Pre-fill the rx ring. */
1757 (void) vioif_populate_rx(sc, KM_SLEEP);
1758
1759 ret = mac_register(macp, &sc->sc_mac_handle);
1760 if (ret != 0) {
1761 dev_err(devinfo, CE_WARN, "vioif_attach: "
1762 "mac_register() failed, ret=%d", ret);
1763 goto exit_register;
1764 }
1765
1766 ret = virtio_enable_ints(&sc->sc_virtio);
1767 if (ret) {
1768 dev_err(devinfo, CE_WARN, "Failed to enable interrupts");
1769 goto exit_enable_ints;
1770 }
1771
1772 mac_link_update(sc->sc_mac_handle, LINK_STATE_UP);
1773 return (DDI_SUCCESS);
1774
1775 exit_enable_ints:
1776 (void) mac_unregister(sc->sc_mac_handle);
1777 exit_register:
1778 mac_free(macp);
1779 exit_macalloc:
1780 vioif_free_mems(sc);
1781 exit_alloc_mems:
1782 virtio_release_ints(&sc->sc_virtio);
1783 if (sc->sc_ctrl_vq)
1784 virtio_free_vq(sc->sc_ctrl_vq);
1785 exit_alloc3:
1786 virtio_free_vq(sc->sc_tx_vq);
1787 exit_alloc2:
1788 virtio_free_vq(sc->sc_rx_vq);
1789 exit_alloc1:
1790 exit_ints:
1791 kmem_cache_destroy(sc->sc_rxbuf_cache);
1792 exit_cache:
1793 exit_features:
1794 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1795 ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1796 exit_intrstat:
1797 exit_map:
1798 kstat_delete(sc->sc_intrstat);
1799 kmem_free(sc, sizeof (struct vioif_softc));
1800 exit:
1801 return (DDI_FAILURE);
1802 }
1803
1804 static int
1805 vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1806 {
1807 struct vioif_softc *sc;
1808
1809 if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1810 return (DDI_FAILURE);
1811
1812 switch (cmd) {
1813 case DDI_DETACH:
1814 break;
1815
1816 case DDI_PM_SUSPEND:
1817 cmn_err(CE_WARN, "suspend not supported yet");
1818 return (DDI_FAILURE);
1819
1820 default:
1821 cmn_err(CE_WARN, "cmd 0x%x unrecognized", cmd);
1822 return (DDI_FAILURE);
1823 }
1824
1825 if (sc->sc_rxloan) {
1826 cmn_err(CE_WARN, "Some rx buffers are still upstream, "
1827 "Not detaching");
1828 return (DDI_FAILURE);
1829 }
1830
1831 virtio_stop_vq_intr(sc->sc_rx_vq);
1832 virtio_stop_vq_intr(sc->sc_tx_vq);
1833
1834 virtio_release_ints(&sc->sc_virtio);
1835
1836 if (mac_unregister(sc->sc_mac_handle)) {
1837 return (DDI_FAILURE);
1838 }
1839
1840 mac_free(sc->sc_macp);
1841
1842 vioif_free_mems(sc);
1843 virtio_free_vq(sc->sc_rx_vq);
1844 virtio_free_vq(sc->sc_tx_vq);
1845
1846 virtio_device_reset(&sc->sc_virtio);
1847
1848 ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1849
1850 kmem_cache_destroy(sc->sc_rxbuf_cache);
1851 kstat_delete(sc->sc_intrstat);
1852 kmem_free(sc, sizeof (struct vioif_softc));
1853
1854 return (DDI_SUCCESS);
1855 }
1856
1857 static int
1858 vioif_quiesce(dev_info_t *devinfo)
1859 {
1860 struct vioif_softc *sc;
1861
1862 if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1863 return (DDI_FAILURE);
1864
1865 virtio_stop_vq_intr(sc->sc_rx_vq);
1866 virtio_stop_vq_intr(sc->sc_tx_vq);
1867 virtio_device_reset(&sc->sc_virtio);
1868
1869 return (DDI_SUCCESS);
1870 }
1871
1872 int
1873 _init(void)
1874 {
1875 int ret = 0;
1876
1877 mac_init_ops(&vioif_ops, "vioif");
1878
1879 ret = mod_install(&modlinkage);
1880 if (ret != DDI_SUCCESS) {
1881 mac_fini_ops(&vioif_ops);
1882 cmn_err(CE_WARN, "Unable to install the driver");
1883 return (ret);
1884 }
1885
1886 return (0);
1887 }
1888
1889 int
1890 _fini(void)
1891 {
1892 int ret;
1893
1894 ret = mod_remove(&modlinkage);
1895 if (ret == DDI_SUCCESS) {
1896 mac_fini_ops(&vioif_ops);
1897 }
1898
1899 return (ret);
1900 }
1901
1902 int
1903 _info(struct modinfo *pModinfo)
1904 {
1905 return (mod_info(&modlinkage, pModinfo));
1906 }