1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2013 Nexenta Inc. All rights reserved.
14 * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
15 */
16
17 /* Based on the NetBSD virtio driver by Minoura Makoto. */
18 /*
19 * Copyright (c) 2010 Minoura Makoto.
20 * All rights reserved.
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
32 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
34 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
35 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
36 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
37 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
38 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
39 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
40 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
41 */
42
43 #include <sys/types.h>
44 #include <sys/errno.h>
45 #include <sys/param.h>
46 #include <sys/stropts.h>
47 #include <sys/stream.h>
48 #include <sys/strsubr.h>
49 #include <sys/kmem.h>
50 #include <sys/conf.h>
51 #include <sys/devops.h>
52 #include <sys/ksynch.h>
53 #include <sys/stat.h>
54 #include <sys/modctl.h>
55 #include <sys/debug.h>
56 #include <sys/pci.h>
57 #include <sys/ethernet.h>
58 #include <sys/vlan.h>
59
60 #include <sys/dlpi.h>
61 #include <sys/taskq.h>
62 #include <sys/cyclic.h>
63
64 #include <sys/pattr.h>
65 #include <sys/strsun.h>
66
67 #include <sys/random.h>
68 #include <sys/sysmacros.h>
69 #include <sys/stream.h>
70
71 #include <sys/mac.h>
72 #include <sys/mac_provider.h>
73 #include <sys/mac_ether.h>
74
75 #include "virtiovar.h"
76 #include "virtioreg.h"
77
78 /* Configuration registers */
79 #define VIRTIO_NET_CONFIG_MAC 0 /* 8bit x 6byte */
80 #define VIRTIO_NET_CONFIG_STATUS 6 /* 16bit */
81
82 /* Feature bits */
83 #define VIRTIO_NET_F_CSUM (1 << 0) /* Host handles pkts w/ partial csum */
84 #define VIRTIO_NET_F_GUEST_CSUM (1 << 1) /* Guest handles pkts w/ part csum */
85 #define VIRTIO_NET_F_MAC (1 << 5) /* Host has given MAC address. */
86 #define VIRTIO_NET_F_GSO (1 << 6) /* Host handles pkts w/ any GSO type */
87 #define VIRTIO_NET_F_GUEST_TSO4 (1 << 7) /* Guest can handle TSOv4 in. */
88 #define VIRTIO_NET_F_GUEST_TSO6 (1 << 8) /* Guest can handle TSOv6 in. */
89 #define VIRTIO_NET_F_GUEST_ECN (1 << 9) /* Guest can handle TSO[6] w/ ECN in */
90 #define VIRTIO_NET_F_GUEST_UFO (1 << 10) /* Guest can handle UFO in. */
91 #define VIRTIO_NET_F_HOST_TSO4 (1 << 11) /* Host can handle TSOv4 in. */
92 #define VIRTIO_NET_F_HOST_TSO6 (1 << 12) /* Host can handle TSOv6 in. */
93 #define VIRTIO_NET_F_HOST_ECN (1 << 13) /* Host can handle TSO[6] w/ ECN in */
94 #define VIRTIO_NET_F_HOST_UFO (1 << 14) /* Host can handle UFO in. */
95 #define VIRTIO_NET_F_MRG_RXBUF (1 << 15) /* Host can merge receive buffers. */
96 #define VIRTIO_NET_F_STATUS (1 << 16) /* Config.status available */
97 #define VIRTIO_NET_F_CTRL_VQ (1 << 17) /* Control channel available */
98 #define VIRTIO_NET_F_CTRL_RX (1 << 18) /* Control channel RX mode support */
99 #define VIRTIO_NET_F_CTRL_VLAN (1 << 19) /* Control channel VLAN filtering */
100 #define VIRTIO_NET_F_CTRL_RX_EXTRA (1 << 20) /* Extra RX mode control support */
101
102 #define VIRTIO_NET_FEATURE_BITS \
103 "\020" \
104 "\1CSUM" \
105 "\2GUEST_CSUM" \
106 "\6MAC" \
107 "\7GSO" \
108 "\10GUEST_TSO4" \
109 "\11GUEST_TSO6" \
110 "\12GUEST_ECN" \
111 "\13GUEST_UFO" \
112 "\14HOST_TSO4" \
113 "\15HOST_TSO6" \
114 "\16HOST_ECN" \
115 "\17HOST_UFO" \
116 "\20MRG_RXBUF" \
117 "\21STATUS" \
118 "\22CTRL_VQ" \
119 "\23CTRL_RX" \
120 "\24CTRL_VLAN" \
121 "\25CTRL_RX_EXTRA"
122
123 /* Status */
124 #define VIRTIO_NET_S_LINK_UP 1
125
126 #pragma pack(1)
127 /* Packet header structure */
128 struct virtio_net_hdr {
129 uint8_t flags;
130 uint8_t gso_type;
131 uint16_t hdr_len;
132 uint16_t gso_size;
133 uint16_t csum_start;
134 uint16_t csum_offset;
135 };
136 #pragma pack()
137
138 #define VIRTIO_NET_HDR_F_NEEDS_CSUM 1 /* flags */
139 #define VIRTIO_NET_HDR_GSO_NONE 0 /* gso_type */
140 #define VIRTIO_NET_HDR_GSO_TCPV4 1 /* gso_type */
141 #define VIRTIO_NET_HDR_GSO_UDP 3 /* gso_type */
142 #define VIRTIO_NET_HDR_GSO_TCPV6 4 /* gso_type */
143 #define VIRTIO_NET_HDR_GSO_ECN 0x80 /* gso_type, |'ed */
144
145
146 /* Control virtqueue */
147 #pragma pack(1)
148 struct virtio_net_ctrl_cmd {
149 uint8_t class;
150 uint8_t command;
151 };
152 #pragma pack()
153
154 #define VIRTIO_NET_CTRL_RX 0
155 #define VIRTIO_NET_CTRL_RX_PROMISC 0
156 #define VIRTIO_NET_CTRL_RX_ALLMULTI 1
157
158 #define VIRTIO_NET_CTRL_MAC 1
159 #define VIRTIO_NET_CTRL_MAC_TABLE_SET 0
160
161 #define VIRTIO_NET_CTRL_VLAN 2
162 #define VIRTIO_NET_CTRL_VLAN_ADD 0
163 #define VIRTIO_NET_CTRL_VLAN_DEL 1
164
165 #pragma pack(1)
166 struct virtio_net_ctrl_status {
167 uint8_t ack;
168 };
169
170 struct virtio_net_ctrl_rx {
171 uint8_t onoff;
172 };
173
174 struct virtio_net_ctrl_mac_tbl {
175 uint32_t nentries;
176 uint8_t macs[][ETHERADDRL];
177 };
178
179 struct virtio_net_ctrl_vlan {
180 uint16_t id;
181 };
182 #pragma pack()
183
184 static int vioif_quiesce(dev_info_t *);
185 static int vioif_attach(dev_info_t *, ddi_attach_cmd_t);
186 static int vioif_detach(dev_info_t *, ddi_detach_cmd_t);
187
188 DDI_DEFINE_STREAM_OPS(vioif_ops,
189 nulldev, /* identify */
190 nulldev, /* probe */
191 vioif_attach, /* attach */
192 vioif_detach, /* detach */
193 nodev, /* reset */
194 NULL, /* cb_ops */
195 D_MP, /* bus_ops */
196 NULL, /* power */
197 vioif_quiesce /* quiesce */);
198
199 static char vioif_ident[] = "VirtIO ethernet driver";
200
201 /* Standard Module linkage initialization for a Streams driver */
202 extern struct mod_ops mod_driverops;
203
204 static struct modldrv modldrv = {
205 &mod_driverops, /* Type of module. This one is a driver */
206 vioif_ident, /* short description */
207 &vioif_ops /* driver specific ops */
208 };
209
210 static struct modlinkage modlinkage = {
211 MODREV_1,
212 {
213 (void *)&modldrv,
214 NULL,
215 },
216 };
217
218 ddi_device_acc_attr_t vioif_attr = {
219 DDI_DEVICE_ATTR_V0,
220 DDI_NEVERSWAP_ACC, /* virtio is always native byte order */
221 DDI_STORECACHING_OK_ACC,
222 DDI_DEFAULT_ACC
223 };
224
225 /*
226 * A mapping represents a binding for a single buffer that is contiguous in the
227 * virtual address space.
228 */
229 struct vioif_buf_mapping {
230 caddr_t vbm_buf;
231 ddi_dma_handle_t vbm_dmah;
232 ddi_acc_handle_t vbm_acch;
233 ddi_dma_cookie_t vbm_dmac;
234 unsigned int vbm_ncookies;
235 };
236
237 /*
238 * Rx buffers can be loaned upstream, so the code has
239 * to allocate them dynamically.
240 */
241 struct vioif_rx_buf {
242 struct vioif_softc *rb_sc;
243 frtn_t rb_frtn;
244
245 struct vioif_buf_mapping rb_mapping;
246 };
247
248 /*
249 * Tx buffers have two mapping types. One, "inline", is pre-allocated and is
250 * used to hold the virtio_net_header. Small packets also get copied there, as
251 * it's faster then mapping them. Bigger packets get mapped using the "external"
252 * mapping array. An array is used, because a packet may consist of muptiple
253 * fragments, so each fragment gets bound to an entry. According to my
254 * observations, the number of fragments does not exceed 2, but just in case,
255 * a bigger, up to VIOIF_INDIRECT_MAX - 1 array is allocated. To save resources,
256 * the dma handles are allocated lazily in the tx path.
257 */
258 struct vioif_tx_buf {
259 mblk_t *tb_mp;
260
261 /* inline buffer */
262 struct vioif_buf_mapping tb_inline_mapping;
263
264 /* External buffers */
265 struct vioif_buf_mapping *tb_external_mapping;
266 unsigned int tb_external_num;
267 };
268
269 struct vioif_softc {
270 dev_info_t *sc_dev; /* mirrors virtio_softc->sc_dev */
271 struct virtio_softc sc_virtio;
272
273 mac_handle_t sc_mac_handle;
274 mac_register_t *sc_macp;
275
276 struct virtqueue *sc_rx_vq;
277 struct virtqueue *sc_tx_vq;
278 struct virtqueue *sc_ctrl_vq;
279
280 unsigned int sc_tx_stopped:1;
281
282 /* Feature bits. */
283 unsigned int sc_rx_csum:1;
284 unsigned int sc_tx_csum:1;
285 unsigned int sc_tx_tso4:1;
286
287 int sc_mtu;
288 uint8_t sc_mac[ETHERADDRL];
289 /*
290 * For rx buffers, we keep a pointer array, because the buffers
291 * can be loaned upstream, and we have to repopulate the array with
292 * new members.
293 */
294 struct vioif_rx_buf **sc_rxbufs;
295
296 /*
297 * For tx, we just allocate an array of buffers. The packet can
298 * either be copied into the inline buffer, or the external mapping
299 * could be used to map the packet
300 */
301 struct vioif_tx_buf *sc_txbufs;
302
303 kstat_t *sc_intrstat;
304 /*
305 * We "loan" rx buffers upstream and reuse them after they are
306 * freed. This lets us avoid allocations in the hot path.
307 */
308 kmem_cache_t *sc_rxbuf_cache;
309 ulong_t sc_rxloan;
310
311 /* Copying small packets turns out to be faster then mapping them. */
312 unsigned long sc_rxcopy_thresh;
313 unsigned long sc_txcopy_thresh;
314 /* Some statistic coming here */
315 uint64_t sc_ipackets;
316 uint64_t sc_opackets;
317 uint64_t sc_rbytes;
318 uint64_t sc_obytes;
319 uint64_t sc_brdcstxmt;
320 uint64_t sc_brdcstrcv;
321 uint64_t sc_multixmt;
322 uint64_t sc_multircv;
323 uint64_t sc_norecvbuf;
324 uint64_t sc_notxbuf;
325 uint64_t sc_ierrors;
326 uint64_t sc_oerrors;
327 };
328
329 #define ETHER_HEADER_LEN sizeof (struct ether_header)
330
331 /* MTU + the ethernet header. */
332 #define MAX_PAYLOAD 65535
333 #define MAX_MTU (MAX_PAYLOAD - ETHER_HEADER_LEN)
334 #define DEFAULT_MTU ETHERMTU
335
336 /*
337 * Yeah, we spend 8M per device. Turns out, there is no point
338 * being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF),
339 * because vhost does not support them, and we expect to be used with
340 * vhost in production environment.
341 */
342 /* The buffer keeps both the packet data and the virtio_net_header. */
343 #define VIOIF_RX_SIZE (MAX_PAYLOAD + sizeof (struct virtio_net_hdr))
344
345 /*
346 * We win a bit on header alignment, but the host wins a lot
347 * more on moving aligned buffers. Might need more thought.
348 */
349 #define VIOIF_IP_ALIGN 0
350
351 /* Maximum number of indirect descriptors, somewhat arbitrary. */
352 #define VIOIF_INDIRECT_MAX 128
353
354 /*
355 * We pre-allocate a reasonably large buffer to copy small packets
356 * there. Bigger packets are mapped, packets with multiple
357 * cookies are mapped as indirect buffers.
358 */
359 #define VIOIF_TX_INLINE_SIZE 2048
360
361 /* Native queue size for all queues */
362 #define VIOIF_RX_QLEN 0
363 #define VIOIF_TX_QLEN 0
364 #define VIOIF_CTRL_QLEN 0
365
366 static uchar_t vioif_broadcast[ETHERADDRL] = {
367 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
368 };
369
370 #define VIOIF_TX_THRESH_MAX 640
371 #define VIOIF_RX_THRESH_MAX 640
372
373 #define CACHE_NAME_SIZE 32
374
375 static char vioif_txcopy_thresh[] =
376 "vioif_txcopy_thresh";
377 static char vioif_rxcopy_thresh[] =
378 "vioif_rxcopy_thresh";
379
380 static char *vioif_priv_props[] = {
381 vioif_txcopy_thresh,
382 vioif_rxcopy_thresh,
383 NULL
384 };
385
386 /* Add up to ddi? */
387 static ddi_dma_cookie_t *
388 vioif_dma_curr_cookie(ddi_dma_handle_t dmah)
389 {
390 ddi_dma_impl_t *dmah_impl = (void *) dmah;
391 ASSERT(dmah_impl->dmai_cookie);
392 return (dmah_impl->dmai_cookie);
393 }
394
395 static void
396 vioif_dma_reset_cookie(ddi_dma_handle_t dmah, ddi_dma_cookie_t *dmac)
397 {
398 ddi_dma_impl_t *dmah_impl = (void *) dmah;
399 dmah_impl->dmai_cookie = dmac;
400 }
401
402 static link_state_t
403 vioif_link_state(struct vioif_softc *sc)
404 {
405 if (sc->sc_virtio.sc_features & VIRTIO_NET_F_STATUS) {
406 if (virtio_read_device_config_2(&sc->sc_virtio,
407 VIRTIO_NET_CONFIG_STATUS) & VIRTIO_NET_S_LINK_UP) {
408 return (LINK_STATE_UP);
409 } else {
410 return (LINK_STATE_DOWN);
411 }
412 }
413
414 return (LINK_STATE_UP);
415 }
416
417 static ddi_dma_attr_t vioif_inline_buf_dma_attr = {
418 DMA_ATTR_V0, /* Version number */
419 0, /* low address */
420 0xFFFFFFFFFFFFFFFF, /* high address */
421 0xFFFFFFFF, /* counter register max */
422 1, /* page alignment */
423 1, /* burst sizes: 1 - 32 */
424 1, /* minimum transfer size */
425 0xFFFFFFFF, /* max transfer size */
426 0xFFFFFFFFFFFFFFF, /* address register max */
427 1, /* scatter-gather capacity */
428 1, /* device operates on bytes */
429 0, /* attr flag: set to 0 */
430 };
431
432 static ddi_dma_attr_t vioif_mapped_buf_dma_attr = {
433 DMA_ATTR_V0, /* Version number */
434 0, /* low address */
435 0xFFFFFFFFFFFFFFFF, /* high address */
436 0xFFFFFFFF, /* counter register max */
437 1, /* page alignment */
438 1, /* burst sizes: 1 - 32 */
439 1, /* minimum transfer size */
440 0xFFFFFFFF, /* max transfer size */
441 0xFFFFFFFFFFFFFFF, /* address register max */
442
443 /* One entry is used for the virtio_net_hdr on the tx path */
444 VIOIF_INDIRECT_MAX - 1, /* scatter-gather capacity */
445 1, /* device operates on bytes */
446 0, /* attr flag: set to 0 */
447 };
448
449 static ddi_device_acc_attr_t vioif_bufattr = {
450 DDI_DEVICE_ATTR_V0,
451 DDI_NEVERSWAP_ACC,
452 DDI_STORECACHING_OK_ACC,
453 DDI_DEFAULT_ACC
454 };
455
456 static void
457 vioif_rx_free(caddr_t free_arg)
458 {
459 struct vioif_rx_buf *buf = (void *) free_arg;
460 struct vioif_softc *sc = buf->rb_sc;
461
462 kmem_cache_free(sc->sc_rxbuf_cache, buf);
463 atomic_dec_ulong(&sc->sc_rxloan);
464 }
465
466 static int
467 vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
468 {
469 _NOTE(ARGUNUSED(kmflags));
470 struct vioif_softc *sc = user_arg;
471 struct vioif_rx_buf *buf = buffer;
472 size_t len;
473
474 if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
475 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
476 dev_err(sc->sc_dev, CE_WARN,
477 "Can't allocate dma handle for rx buffer");
478 goto exit_handle;
479 }
480
481 if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
482 VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
483 &vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
484 NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
485 dev_err(sc->sc_dev, CE_WARN,
486 "Can't allocate rx buffer");
487 goto exit_alloc;
488 }
489 ASSERT(len >= VIOIF_RX_SIZE);
490
491 if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
492 buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
493 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
494 &buf->rb_mapping.vbm_ncookies)) {
495 dev_err(sc->sc_dev, CE_WARN, "Can't bind tx buffer");
496
497 goto exit_bind;
498 }
499
500 ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
501
502 buf->rb_sc = sc;
503 buf->rb_frtn.free_arg = (void *) buf;
504 buf->rb_frtn.free_func = vioif_rx_free;
505
506 return (0);
507 exit_bind:
508 ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
509 exit_alloc:
510 ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
511 exit_handle:
512
513 return (ENOMEM);
514 }
515
516 static void
517 vioif_rx_destruct(void *buffer, void *user_arg)
518 {
519 _NOTE(ARGUNUSED(user_arg));
520 struct vioif_rx_buf *buf = buffer;
521
522 ASSERT(buf->rb_mapping.vbm_acch);
523 ASSERT(buf->rb_mapping.vbm_acch);
524
525 (void) ddi_dma_unbind_handle(buf->rb_mapping.vbm_dmah);
526 ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
527 ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
528 }
529
530 static void
531 vioif_free_mems(struct vioif_softc *sc)
532 {
533 int i;
534
535 for (i = 0; i < sc->sc_tx_vq->vq_num; i++) {
536 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
537 int j;
538
539 /* Tear down the internal mapping. */
540
541 ASSERT(buf->tb_inline_mapping.vbm_acch);
542 ASSERT(buf->tb_inline_mapping.vbm_dmah);
543
544 (void) ddi_dma_unbind_handle(buf->tb_inline_mapping.vbm_dmah);
545 ddi_dma_mem_free(&buf->tb_inline_mapping.vbm_acch);
546 ddi_dma_free_handle(&buf->tb_inline_mapping.vbm_dmah);
547
548 /* We should not see any in-flight buffers at this point. */
549 ASSERT(!buf->tb_mp);
550
551 /* Free all the dma hdnales we allocated lazily. */
552 for (j = 0; buf->tb_external_mapping[j].vbm_dmah; j++)
553 ddi_dma_free_handle(
554 &buf->tb_external_mapping[j].vbm_dmah);
555 /* Free the external mapping array. */
556 kmem_free(buf->tb_external_mapping,
557 sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1);
558 }
559
560 kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) *
561 sc->sc_tx_vq->vq_num);
562
563 for (i = 0; i < sc->sc_rx_vq->vq_num; i++) {
564 struct vioif_rx_buf *buf = sc->sc_rxbufs[i];
565
566 if (buf)
567 kmem_cache_free(sc->sc_rxbuf_cache, buf);
568 }
569 kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf *) *
570 sc->sc_rx_vq->vq_num);
571 }
572
573 static int
574 vioif_alloc_mems(struct vioif_softc *sc)
575 {
576 int i, txqsize, rxqsize;
577 size_t len;
578 unsigned int nsegments;
579
580 txqsize = sc->sc_tx_vq->vq_num;
581 rxqsize = sc->sc_rx_vq->vq_num;
582
583 sc->sc_txbufs = kmem_zalloc(sizeof (struct vioif_tx_buf) * txqsize,
584 KM_SLEEP);
585 if (sc->sc_txbufs == NULL) {
586 dev_err(sc->sc_dev, CE_WARN,
587 "Failed to allocate the tx buffers array");
588 goto exit_txalloc;
589 }
590
591 /*
592 * We don't allocate the rx vioif_bufs, just the pointers, as
593 * rx vioif_bufs can be loaned upstream, and we don't know the
594 * total number we need.
595 */
596 sc->sc_rxbufs = kmem_zalloc(sizeof (struct vioif_rx_buf *) * rxqsize,
597 KM_SLEEP);
598 if (sc->sc_rxbufs == NULL) {
599 dev_err(sc->sc_dev, CE_WARN,
600 "Failed to allocate the rx buffers pointer array");
601 goto exit_rxalloc;
602 }
603
604 for (i = 0; i < txqsize; i++) {
605 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
606
607 /* Allocate and bind an inline mapping. */
608
609 if (ddi_dma_alloc_handle(sc->sc_dev,
610 &vioif_inline_buf_dma_attr,
611 DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_dmah)) {
612
613 dev_err(sc->sc_dev, CE_WARN,
614 "Can't allocate dma handle for tx buffer %d", i);
615 goto exit_tx;
616 }
617
618 if (ddi_dma_mem_alloc(buf->tb_inline_mapping.vbm_dmah,
619 VIOIF_TX_INLINE_SIZE, &vioif_bufattr, DDI_DMA_STREAMING,
620 DDI_DMA_SLEEP, NULL, &buf->tb_inline_mapping.vbm_buf,
621 &len, &buf->tb_inline_mapping.vbm_acch)) {
622
623 dev_err(sc->sc_dev, CE_WARN,
624 "Can't allocate tx buffer %d", i);
625 goto exit_tx;
626 }
627 ASSERT(len >= VIOIF_TX_INLINE_SIZE);
628
629 if (ddi_dma_addr_bind_handle(buf->tb_inline_mapping.vbm_dmah,
630 NULL, buf->tb_inline_mapping.vbm_buf, len,
631 DDI_DMA_WRITE | DDI_DMA_STREAMING, DDI_DMA_SLEEP, NULL,
632 &buf->tb_inline_mapping.vbm_dmac, &nsegments)) {
633
634 dev_err(sc->sc_dev, CE_WARN,
635 "Can't bind tx buffer %d", i);
636 goto exit_tx;
637 }
638
639 /* We asked for a single segment */
640 ASSERT(nsegments == 1);
641
642 /*
643 * We allow up to VIOIF_INDIRECT_MAX - 1 external mappings.
644 * In reality, I don't expect more then 2-3 used, but who
645 * knows.
646 */
647 buf->tb_external_mapping = kmem_zalloc(
648 sizeof (struct vioif_tx_buf) * VIOIF_INDIRECT_MAX - 1,
649 KM_SLEEP);
650
651 /*
652 * The external mapping's dma handles are allocate lazily,
653 * as we don't expect most of them to be used..
654 */
655 }
656
657 return (0);
658
659 exit_tx:
660 for (i = 0; i < txqsize; i++) {
661 struct vioif_tx_buf *buf = &sc->sc_txbufs[i];
662
663 if (buf->tb_inline_mapping.vbm_dmah)
664 (void) ddi_dma_unbind_handle(
665 buf->tb_inline_mapping.vbm_dmah);
666
667 if (buf->tb_inline_mapping.vbm_acch)
668 ddi_dma_mem_free(
669 &buf->tb_inline_mapping.vbm_acch);
670
671 if (buf->tb_inline_mapping.vbm_dmah)
672 ddi_dma_free_handle(
673 &buf->tb_inline_mapping.vbm_dmah);
674
675 if (buf->tb_external_mapping)
676 kmem_free(buf->tb_external_mapping,
677 sizeof (struct vioif_tx_buf) *
678 VIOIF_INDIRECT_MAX - 1);
679 }
680
681 kmem_free(sc->sc_rxbufs, sizeof (struct vioif_rx_buf) * rxqsize);
682
683 exit_rxalloc:
684 kmem_free(sc->sc_txbufs, sizeof (struct vioif_tx_buf) * txqsize);
685 exit_txalloc:
686 return (ENOMEM);
687 }
688
689 /* ARGSUSED */
690 int
691 vioif_multicst(void *arg, boolean_t add, const uint8_t *macaddr)
692 {
693 return (DDI_SUCCESS);
694 }
695
696 /* ARGSUSED */
697 int
698 vioif_promisc(void *arg, boolean_t on)
699 {
700 return (DDI_SUCCESS);
701 }
702
703 /* ARGSUSED */
704 int
705 vioif_unicst(void *arg, const uint8_t *macaddr)
706 {
707 return (DDI_FAILURE);
708 }
709
710
711 static uint_t
712 vioif_add_rx(struct vioif_softc *sc, int kmflag)
713 {
714 uint_t num_added = 0;
715 struct vq_entry *ve;
716
717 while ((ve = vq_alloc_entry(sc->sc_rx_vq)) != NULL) {
718 struct vioif_rx_buf *buf = sc->sc_rxbufs[ve->qe_index];
719
720 if (!buf) {
721 /* First run, allocate the buffer. */
722 buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
723 sc->sc_rxbufs[ve->qe_index] = buf;
724 }
725
726 /* Still nothing? Bye. */
727 if (!buf) {
728 dev_err(sc->sc_dev, CE_WARN,
729 "Can't allocate rx buffer");
730 sc->sc_norecvbuf++;
731 vq_free_entry(sc->sc_rx_vq, ve);
732 break;
733 }
734
735 ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
736
737 /*
738 * For an unknown reason, the virtio_net_hdr must be placed
739 * as a separate virtio queue entry.
740 */
741 virtio_ve_add_indirect_buf(ve,
742 buf->rb_mapping.vbm_dmac.dmac_laddress,
743 sizeof (struct virtio_net_hdr), B_FALSE);
744
745 /* Add the rest of the first cookie. */
746 virtio_ve_add_indirect_buf(ve,
747 buf->rb_mapping.vbm_dmac.dmac_laddress +
748 sizeof (struct virtio_net_hdr),
749 buf->rb_mapping.vbm_dmac.dmac_size -
750 sizeof (struct virtio_net_hdr), B_FALSE);
751
752 /*
753 * If the buffer consists of a single cookie (unlikely for a
754 * 64-k buffer), we are done. Otherwise, add the rest of the
755 * cookies using indirect entries.
756 */
757 if (buf->rb_mapping.vbm_ncookies > 1) {
758 ddi_dma_cookie_t *first_extra_dmac;
759 ddi_dma_cookie_t dmac;
760 first_extra_dmac =
761 vioif_dma_curr_cookie(buf->rb_mapping.vbm_dmah);
762
763 ddi_dma_nextcookie(buf->rb_mapping.vbm_dmah, &dmac);
764 virtio_ve_add_cookie(ve, buf->rb_mapping.vbm_dmah,
765 dmac, buf->rb_mapping.vbm_ncookies - 1, B_FALSE);
766 vioif_dma_reset_cookie(buf->rb_mapping.vbm_dmah,
767 first_extra_dmac);
768 }
769
770 virtio_push_chain(ve, B_FALSE);
771 num_added++;
772 }
773
774 return (num_added);
775 }
776
777 static uint_t
778 vioif_populate_rx(struct vioif_softc *sc, int kmflag)
779 {
780 uint_t num_added = vioif_add_rx(sc, kmflag);
781
782 if (num_added > 0)
783 virtio_sync_vq(sc->sc_rx_vq);
784
785 return (num_added);
786 }
787
788 static uint_t
789 vioif_process_rx(struct vioif_softc *sc)
790 {
791 struct vq_entry *ve;
792 struct vioif_rx_buf *buf;
793 mblk_t *mphead = NULL, *lastmp = NULL, *mp;
794 uint32_t len;
795 uint_t num_processed = 0;
796
797 while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) {
798
799 buf = sc->sc_rxbufs[ve->qe_index];
800 ASSERT(buf);
801
802 if (len < sizeof (struct virtio_net_hdr)) {
803 dev_err(sc->sc_dev, CE_WARN, "RX: Cnain too small: %u",
804 len - (uint32_t)sizeof (struct virtio_net_hdr));
805 sc->sc_ierrors++;
806 virtio_free_chain(ve);
807 continue;
808 }
809
810 len -= sizeof (struct virtio_net_hdr);
811 /*
812 * We copy small packets that happen to fit into a single
813 * cookie and reuse the buffers. For bigger ones, we loan
814 * the buffers upstream.
815 */
816 if (len < sc->sc_rxcopy_thresh) {
817 mp = allocb(len, 0);
818 if (!mp) {
819 sc->sc_norecvbuf++;
820 sc->sc_ierrors++;
821
822 virtio_free_chain(ve);
823 break;
824 }
825
826 bcopy((char *)buf->rb_mapping.vbm_buf +
827 sizeof (struct virtio_net_hdr), mp->b_rptr, len);
828 mp->b_wptr = mp->b_rptr + len;
829
830 } else {
831 mp = desballoc((unsigned char *)
832 buf->rb_mapping.vbm_buf +
833 sizeof (struct virtio_net_hdr) +
834 VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
835 if (!mp) {
836 sc->sc_norecvbuf++;
837 sc->sc_ierrors++;
838
839 virtio_free_chain(ve);
840 break;
841 }
842 mp->b_wptr = mp->b_rptr + len;
843
844 atomic_inc_ulong(&sc->sc_rxloan);
845 /*
846 * Buffer loaned, we will have to allocate a new one
847 * for this slot.
848 */
849 sc->sc_rxbufs[ve->qe_index] = NULL;
850 }
851
852 /*
853 * virtio-net does not tell us if this packet is multicast
854 * or broadcast, so we have to check it.
855 */
856 if (mp->b_rptr[0] & 0x1) {
857 if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
858 sc->sc_multircv++;
859 else
860 sc->sc_brdcstrcv++;
861 }
862
863 sc->sc_rbytes += len;
864 sc->sc_ipackets++;
865
866 virtio_free_chain(ve);
867
868 if (lastmp == NULL) {
869 mphead = mp;
870 } else {
871 lastmp->b_next = mp;
872 }
873 lastmp = mp;
874 num_processed++;
875 }
876
877 if (mphead != NULL) {
878 mac_rx(sc->sc_mac_handle, NULL, mphead);
879 }
880
881 return (num_processed);
882 }
883
884 static uint_t
885 vioif_reclaim_used_tx(struct vioif_softc *sc)
886 {
887 struct vq_entry *ve;
888 struct vioif_tx_buf *buf;
889 uint32_t len;
890 mblk_t *mp;
891 uint_t num_reclaimed = 0;
892
893 while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
894 /* We don't chain descriptors for tx, so don't expect any. */
895 ASSERT(!ve->qe_next);
896
897 buf = &sc->sc_txbufs[ve->qe_index];
898 mp = buf->tb_mp;
899 buf->tb_mp = NULL;
900
901 if (mp) {
902 for (int i = 0; i < buf->tb_external_num; i++)
903 (void) ddi_dma_unbind_handle(
904 buf->tb_external_mapping[i].vbm_dmah);
905 }
906
907 virtio_free_chain(ve);
908
909 /* External mapping used, mp was not freed in vioif_send() */
910 if (mp)
911 freemsg(mp);
912 num_reclaimed++;
913 }
914
915 if (sc->sc_tx_stopped && num_reclaimed > 0) {
916 sc->sc_tx_stopped = 0;
917 mac_tx_update(sc->sc_mac_handle);
918 }
919
920 return (num_reclaimed);
921 }
922
923 /* sc will be used to update stat counters. */
924 /* ARGSUSED */
925 static inline void
926 vioif_tx_inline(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
927 size_t msg_size)
928 {
929 struct vioif_tx_buf *buf;
930 buf = &sc->sc_txbufs[ve->qe_index];
931
932 ASSERT(buf);
933
934 /* Frees mp */
935 mcopymsg(mp, buf->tb_inline_mapping.vbm_buf +
936 sizeof (struct virtio_net_hdr));
937
938 virtio_ve_add_indirect_buf(ve,
939 buf->tb_inline_mapping.vbm_dmac.dmac_laddress +
940 sizeof (struct virtio_net_hdr), msg_size, B_TRUE);
941 }
942
943 static inline int
944 vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf,
945 int i)
946 {
947 int ret = DDI_SUCCESS;
948
949 if (!buf->tb_external_mapping[i].vbm_dmah) {
950 ret = ddi_dma_alloc_handle(sc->sc_dev,
951 &vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
952 &buf->tb_external_mapping[i].vbm_dmah);
953 if (ret != DDI_SUCCESS) {
954 dev_err(sc->sc_dev, CE_WARN,
955 "Can't allocate dma handle for external tx buffer");
956 }
957 }
958
959 return (ret);
960 }
961
962 static inline int
963 vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
964 size_t msg_size)
965 {
966 _NOTE(ARGUNUSED(msg_size));
967
968 struct vioif_tx_buf *buf;
969 mblk_t *nmp;
970 int i, j;
971 int ret = DDI_SUCCESS;
972
973 buf = &sc->sc_txbufs[ve->qe_index];
974
975 ASSERT(buf);
976
977 buf->tb_external_num = 0;
978 i = 0;
979 nmp = mp;
980
981 while (nmp) {
982 size_t len;
983 ddi_dma_cookie_t dmac;
984 unsigned int ncookies;
985
986 len = MBLKL(nmp);
987 /*
988 * For some reason, the network stack can
989 * actually send us zero-length fragments.
990 */
991 if (len == 0) {
992 nmp = nmp->b_cont;
993 continue;
994 }
995
996 ret = vioif_tx_lazy_handle_alloc(sc, buf, i);
997 if (ret != DDI_SUCCESS) {
998 sc->sc_notxbuf++;
999 sc->sc_oerrors++;
1000 goto exit_lazy_alloc;
1001 }
1002 ret = ddi_dma_addr_bind_handle(
1003 buf->tb_external_mapping[i].vbm_dmah, NULL,
1004 (caddr_t)nmp->b_rptr, len,
1005 DDI_DMA_WRITE | DDI_DMA_STREAMING,
1006 DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
1007
1008 if (ret != DDI_SUCCESS) {
1009 sc->sc_oerrors++;
1010 dev_err(sc->sc_dev, CE_NOTE,
1011 "TX: Failed to bind external handle");
1012 goto exit_bind;
1013 }
1014
1015 /* Check if we still fit into the indirect table. */
1016 if (virtio_ve_indirect_available(ve) < ncookies) {
1017 dev_err(sc->sc_dev, CE_NOTE,
1018 "TX: Indirect descriptor table limit reached."
1019 " It took %d fragments.", i);
1020 sc->sc_notxbuf++;
1021 sc->sc_oerrors++;
1022
1023 ret = DDI_FAILURE;
1024 goto exit_limit;
1025 }
1026
1027 virtio_ve_add_cookie(ve, buf->tb_external_mapping[i].vbm_dmah,
1028 dmac, ncookies, B_TRUE);
1029
1030 nmp = nmp->b_cont;
1031 i++;
1032 }
1033
1034 buf->tb_external_num = i;
1035 /* Save the mp to free it when the packet is sent. */
1036 buf->tb_mp = mp;
1037
1038 return (DDI_SUCCESS);
1039
1040 exit_limit:
1041 exit_bind:
1042 exit_lazy_alloc:
1043
1044 for (j = 0; j < i; j++) {
1045 (void) ddi_dma_unbind_handle(
1046 buf->tb_external_mapping[j].vbm_dmah);
1047 }
1048
1049 return (ret);
1050 }
1051
1052 static boolean_t
1053 vioif_send(struct vioif_softc *sc, mblk_t *mp)
1054 {
1055 struct vq_entry *ve;
1056 struct vioif_tx_buf *buf;
1057 struct virtio_net_hdr *net_header = NULL;
1058 size_t msg_size = 0;
1059 uint32_t csum_start;
1060 uint32_t csum_stuff;
1061 uint32_t csum_flags;
1062 uint32_t lso_flags;
1063 uint32_t lso_mss;
1064 mblk_t *nmp;
1065 int ret;
1066 boolean_t lso_required = B_FALSE;
1067
1068 for (nmp = mp; nmp; nmp = nmp->b_cont)
1069 msg_size += MBLKL(nmp);
1070
1071 if (sc->sc_tx_tso4) {
1072 mac_lso_get(mp, &lso_mss, &lso_flags);
1073 lso_required = (lso_flags & HW_LSO);
1074 }
1075
1076 ve = vq_alloc_entry(sc->sc_tx_vq);
1077
1078 if (!ve) {
1079 sc->sc_notxbuf++;
1080 /* Out of free descriptors - try later. */
1081 return (B_FALSE);
1082 }
1083 buf = &sc->sc_txbufs[ve->qe_index];
1084
1085 /* Use the inline buffer of the first entry for the virtio_net_hdr. */
1086 (void) memset(buf->tb_inline_mapping.vbm_buf, 0,
1087 sizeof (struct virtio_net_hdr));
1088
1089 net_header = (struct virtio_net_hdr *)buf->tb_inline_mapping.vbm_buf;
1090
1091 mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL,
1092 NULL, &csum_flags);
1093
1094 /* They want us to do the TCP/UDP csum calculation. */
1095 if (csum_flags & HCK_PARTIALCKSUM) {
1096 struct ether_header *eth_header;
1097 int eth_hsize;
1098
1099 /* Did we ask for it? */
1100 ASSERT(sc->sc_tx_csum);
1101
1102 /* We only asked for partial csum packets. */
1103 ASSERT(!(csum_flags & HCK_IPV4_HDRCKSUM));
1104 ASSERT(!(csum_flags & HCK_FULLCKSUM));
1105
1106 eth_header = (void *) mp->b_rptr;
1107 if (eth_header->ether_type == htons(ETHERTYPE_VLAN)) {
1108 eth_hsize = sizeof (struct ether_vlan_header);
1109 } else {
1110 eth_hsize = sizeof (struct ether_header);
1111 }
1112 net_header->flags = VIRTIO_NET_HDR_F_NEEDS_CSUM;
1113 net_header->csum_start = eth_hsize + csum_start;
1114 net_header->csum_offset = csum_stuff - csum_start;
1115 }
1116
1117 /* setup LSO fields if required */
1118 if (lso_required) {
1119 net_header->gso_type = VIRTIO_NET_HDR_GSO_TCPV4;
1120 net_header->gso_size = (uint16_t)lso_mss;
1121 }
1122
1123 virtio_ve_add_indirect_buf(ve,
1124 buf->tb_inline_mapping.vbm_dmac.dmac_laddress,
1125 sizeof (struct virtio_net_hdr), B_TRUE);
1126
1127 /* meanwhile update the statistic */
1128 if (mp->b_rptr[0] & 0x1) {
1129 if (bcmp(mp->b_rptr, vioif_broadcast, ETHERADDRL) != 0)
1130 sc->sc_multixmt++;
1131 else
1132 sc->sc_brdcstxmt++;
1133 }
1134
1135 /*
1136 * We copy small packets into the inline buffer. The bigger ones
1137 * get mapped using the mapped buffer.
1138 */
1139 if (msg_size < sc->sc_txcopy_thresh) {
1140 vioif_tx_inline(sc, ve, mp, msg_size);
1141 } else {
1142 /* statistic gets updated by vioif_tx_external when fail */
1143 ret = vioif_tx_external(sc, ve, mp, msg_size);
1144 if (ret != DDI_SUCCESS)
1145 goto exit_tx_external;
1146 }
1147
1148 virtio_push_chain(ve, B_TRUE);
1149
1150 sc->sc_opackets++;
1151 sc->sc_obytes += msg_size;
1152
1153 return (B_TRUE);
1154
1155 exit_tx_external:
1156
1157 vq_free_entry(sc->sc_tx_vq, ve);
1158 /*
1159 * vioif_tx_external can fail when the buffer does not fit into the
1160 * indirect descriptor table. Free the mp. I don't expect this ever
1161 * to happen.
1162 */
1163 freemsg(mp);
1164
1165 return (B_TRUE);
1166 }
1167
1168 mblk_t *
1169 vioif_tx(void *arg, mblk_t *mp)
1170 {
1171 struct vioif_softc *sc = arg;
1172 mblk_t *nmp;
1173
1174 while (mp != NULL) {
1175 nmp = mp->b_next;
1176 mp->b_next = NULL;
1177
1178 if (!vioif_send(sc, mp)) {
1179 sc->sc_tx_stopped = 1;
1180 mp->b_next = nmp;
1181 break;
1182 }
1183 mp = nmp;
1184 }
1185
1186 return (mp);
1187 }
1188
1189 int
1190 vioif_start(void *arg)
1191 {
1192 struct vioif_softc *sc = arg;
1193 struct vq_entry *ve;
1194 uint32_t len;
1195
1196 mac_link_update(sc->sc_mac_handle,
1197 vioif_link_state(sc));
1198
1199 virtio_start_vq_intr(sc->sc_rx_vq);
1200
1201 /*
1202 * Don't start interrupts on sc_tx_vq. We use VIRTIO_F_NOTIFY_ON_EMPTY,
1203 * so the device will send a transmit interrupt when the queue is empty
1204 * and we can reclaim it in one sweep.
1205 */
1206
1207 /*
1208 * Clear any data that arrived early on the receive queue and populate
1209 * it with free buffers that the device can use moving forward.
1210 */
1211 while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len)) != NULL) {
1212 virtio_free_chain(ve);
1213 }
1214 (void) vioif_populate_rx(sc, KM_SLEEP);
1215
1216 return (DDI_SUCCESS);
1217 }
1218
1219 void
1220 vioif_stop(void *arg)
1221 {
1222 struct vioif_softc *sc = arg;
1223
1224 virtio_stop_vq_intr(sc->sc_rx_vq);
1225 }
1226
1227 /* ARGSUSED */
1228 static int
1229 vioif_stat(void *arg, uint_t stat, uint64_t *val)
1230 {
1231 struct vioif_softc *sc = arg;
1232
1233 switch (stat) {
1234 case MAC_STAT_IERRORS:
1235 *val = sc->sc_ierrors;
1236 break;
1237 case MAC_STAT_OERRORS:
1238 *val = sc->sc_oerrors;
1239 break;
1240 case MAC_STAT_MULTIRCV:
1241 *val = sc->sc_multircv;
1242 break;
1243 case MAC_STAT_BRDCSTRCV:
1244 *val = sc->sc_brdcstrcv;
1245 break;
1246 case MAC_STAT_MULTIXMT:
1247 *val = sc->sc_multixmt;
1248 break;
1249 case MAC_STAT_BRDCSTXMT:
1250 *val = sc->sc_brdcstxmt;
1251 break;
1252 case MAC_STAT_IPACKETS:
1253 *val = sc->sc_ipackets;
1254 break;
1255 case MAC_STAT_RBYTES:
1256 *val = sc->sc_rbytes;
1257 break;
1258 case MAC_STAT_OPACKETS:
1259 *val = sc->sc_opackets;
1260 break;
1261 case MAC_STAT_OBYTES:
1262 *val = sc->sc_obytes;
1263 break;
1264 case MAC_STAT_NORCVBUF:
1265 *val = sc->sc_norecvbuf;
1266 break;
1267 case MAC_STAT_NOXMTBUF:
1268 *val = sc->sc_notxbuf;
1269 break;
1270 case MAC_STAT_IFSPEED:
1271 /* always 1 Gbit */
1272 *val = 1000000000ULL;
1273 break;
1274 case ETHER_STAT_LINK_DUPLEX:
1275 /* virtual device, always full-duplex */
1276 *val = LINK_DUPLEX_FULL;
1277 break;
1278
1279 default:
1280 return (ENOTSUP);
1281 }
1282
1283 return (DDI_SUCCESS);
1284 }
1285
1286 static int
1287 vioif_set_prop_private(struct vioif_softc *sc, const char *pr_name,
1288 uint_t pr_valsize, const void *pr_val)
1289 {
1290 _NOTE(ARGUNUSED(pr_valsize));
1291
1292 long result;
1293
1294 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1295
1296 if (pr_val == NULL)
1297 return (EINVAL);
1298
1299 (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1300
1301 if (result < 0 || result > VIOIF_TX_THRESH_MAX)
1302 return (EINVAL);
1303 sc->sc_txcopy_thresh = result;
1304 }
1305 if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1306
1307 if (pr_val == NULL)
1308 return (EINVAL);
1309
1310 (void) ddi_strtol(pr_val, (char **)NULL, 0, &result);
1311
1312 if (result < 0 || result > VIOIF_RX_THRESH_MAX)
1313 return (EINVAL);
1314 sc->sc_rxcopy_thresh = result;
1315 }
1316 return (0);
1317 }
1318
1319 static int
1320 vioif_setprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1321 uint_t pr_valsize, const void *pr_val)
1322 {
1323 struct vioif_softc *sc = arg;
1324 const uint32_t *new_mtu;
1325 int err;
1326
1327 switch (pr_num) {
1328 case MAC_PROP_MTU:
1329 new_mtu = pr_val;
1330
1331 if (*new_mtu > MAX_MTU) {
1332 return (EINVAL);
1333 }
1334
1335 err = mac_maxsdu_update(sc->sc_mac_handle, *new_mtu);
1336 if (err) {
1337 return (err);
1338 }
1339 break;
1340 case MAC_PROP_PRIVATE:
1341 err = vioif_set_prop_private(sc, pr_name,
1342 pr_valsize, pr_val);
1343 if (err)
1344 return (err);
1345 break;
1346 default:
1347 return (ENOTSUP);
1348 }
1349
1350 return (0);
1351 }
1352
1353 static int
1354 vioif_get_prop_private(struct vioif_softc *sc, const char *pr_name,
1355 uint_t pr_valsize, void *pr_val)
1356 {
1357 int err = ENOTSUP;
1358 int value;
1359
1360 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1361
1362 value = sc->sc_txcopy_thresh;
1363 err = 0;
1364 goto done;
1365 }
1366 if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1367
1368 value = sc->sc_rxcopy_thresh;
1369 err = 0;
1370 goto done;
1371 }
1372 done:
1373 if (err == 0) {
1374 (void) snprintf(pr_val, pr_valsize, "%d", value);
1375 }
1376 return (err);
1377 }
1378
1379 static int
1380 vioif_getprop(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1381 uint_t pr_valsize, void *pr_val)
1382 {
1383 struct vioif_softc *sc = arg;
1384 int err = ENOTSUP;
1385
1386 switch (pr_num) {
1387 case MAC_PROP_PRIVATE:
1388 err = vioif_get_prop_private(sc, pr_name,
1389 pr_valsize, pr_val);
1390 break;
1391 default:
1392 break;
1393 }
1394 return (err);
1395 }
1396
1397 static void
1398 vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1399 mac_prop_info_handle_t prh)
1400 {
1401 struct vioif_softc *sc = arg;
1402 char valstr[64];
1403 int value;
1404
1405 switch (pr_num) {
1406 case MAC_PROP_MTU:
1407 mac_prop_info_set_range_uint32(prh, ETHERMIN, MAX_MTU);
1408 break;
1409
1410 case MAC_PROP_PRIVATE:
1411 bzero(valstr, sizeof (valstr));
1412 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1413
1414 value = sc->sc_txcopy_thresh;
1415 } else if (strcmp(pr_name,
1416 vioif_rxcopy_thresh) == 0) {
1417 value = sc->sc_rxcopy_thresh;
1418 } else {
1419 return;
1420 }
1421 (void) snprintf(valstr, sizeof (valstr), "%d", value);
1422 break;
1423
1424 default:
1425 break;
1426 }
1427 }
1428
1429 static boolean_t
1430 vioif_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1431 {
1432 struct vioif_softc *sc = arg;
1433
1434 switch (cap) {
1435 case MAC_CAPAB_HCKSUM:
1436 if (sc->sc_tx_csum) {
1437 uint32_t *txflags = cap_data;
1438
1439 *txflags = HCKSUM_INET_PARTIAL;
1440 return (B_TRUE);
1441 }
1442 return (B_FALSE);
1443 case MAC_CAPAB_LSO:
1444 if (sc->sc_tx_tso4) {
1445 mac_capab_lso_t *cap_lso = cap_data;
1446
1447 cap_lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
1448 cap_lso->lso_basic_tcp_ipv4.lso_max = MAX_MTU;
1449 return (B_TRUE);
1450 }
1451 return (B_FALSE);
1452 default:
1453 break;
1454 }
1455 return (B_FALSE);
1456 }
1457
1458 static mac_callbacks_t vioif_m_callbacks = {
1459 .mc_callbacks = (MC_GETCAPAB | MC_SETPROP | MC_GETPROP | MC_PROPINFO),
1460 .mc_getstat = vioif_stat,
1461 .mc_start = vioif_start,
1462 .mc_stop = vioif_stop,
1463 .mc_setpromisc = vioif_promisc,
1464 .mc_multicst = vioif_multicst,
1465 .mc_unicst = vioif_unicst,
1466 .mc_tx = vioif_tx,
1467 /* Optional callbacks */
1468 .mc_reserved = NULL, /* reserved */
1469 .mc_ioctl = NULL, /* mc_ioctl */
1470 .mc_getcapab = vioif_getcapab, /* mc_getcapab */
1471 .mc_open = NULL, /* mc_open */
1472 .mc_close = NULL, /* mc_close */
1473 .mc_setprop = vioif_setprop,
1474 .mc_getprop = vioif_getprop,
1475 .mc_propinfo = vioif_propinfo,
1476 };
1477
1478 static void
1479 vioif_show_features(struct vioif_softc *sc, const char *prefix,
1480 uint32_t features)
1481 {
1482 char buf[512];
1483 char *bufp = buf;
1484 char *bufend = buf + sizeof (buf);
1485
1486 /* LINTED E_PTRDIFF_OVERFLOW */
1487 bufp += snprintf(bufp, bufend - bufp, prefix);
1488 /* LINTED E_PTRDIFF_OVERFLOW */
1489 bufp += virtio_show_features(features, bufp, bufend - bufp);
1490 *bufp = '\0';
1491
1492
1493 /* Using '!' to only CE_NOTE this to the system log. */
1494 dev_err(sc->sc_dev, CE_NOTE, "!%s Vioif (%b)", buf, features,
1495 VIRTIO_NET_FEATURE_BITS);
1496 }
1497
1498 /*
1499 * Find out which features are supported by the device and
1500 * choose which ones we wish to use.
1501 */
1502 static int
1503 vioif_dev_features(struct vioif_softc *sc)
1504 {
1505 uint32_t host_features;
1506
1507 host_features = virtio_negotiate_features(&sc->sc_virtio,
1508 VIRTIO_NET_F_CSUM |
1509 VIRTIO_NET_F_HOST_TSO4 |
1510 VIRTIO_NET_F_HOST_ECN |
1511 VIRTIO_NET_F_MAC |
1512 VIRTIO_NET_F_STATUS |
1513 VIRTIO_F_RING_INDIRECT_DESC |
1514 VIRTIO_F_NOTIFY_ON_EMPTY);
1515
1516 vioif_show_features(sc, "Host features: ", host_features);
1517 vioif_show_features(sc, "Negotiated features: ",
1518 sc->sc_virtio.sc_features);
1519
1520 if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
1521 dev_err(sc->sc_dev, CE_NOTE,
1522 "Host does not support RING_INDIRECT_DESC, bye.");
1523 return (DDI_FAILURE);
1524 }
1525
1526 return (DDI_SUCCESS);
1527 }
1528
1529 static int
1530 vioif_has_feature(struct vioif_softc *sc, uint32_t feature)
1531 {
1532 return (virtio_has_feature(&sc->sc_virtio, feature));
1533 }
1534
1535 static void
1536 vioif_set_mac(struct vioif_softc *sc)
1537 {
1538 int i;
1539
1540 for (i = 0; i < ETHERADDRL; i++) {
1541 virtio_write_device_config_1(&sc->sc_virtio,
1542 VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
1543 }
1544 }
1545
1546 /* Get the mac address out of the hardware, or make up one. */
1547 static void
1548 vioif_get_mac(struct vioif_softc *sc)
1549 {
1550 int i;
1551 if (sc->sc_virtio.sc_features & VIRTIO_NET_F_MAC) {
1552 for (i = 0; i < ETHERADDRL; i++) {
1553 sc->sc_mac[i] = virtio_read_device_config_1(
1554 &sc->sc_virtio,
1555 VIRTIO_NET_CONFIG_MAC + i);
1556 }
1557 dev_err(sc->sc_dev, CE_NOTE, "Got MAC address from host: %s",
1558 ether_sprintf((struct ether_addr *)sc->sc_mac));
1559 } else {
1560 /* Get a few random bytes */
1561 (void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
1562 /* Make sure it's a unicast MAC */
1563 sc->sc_mac[0] &= ~1;
1564 /* Set the "locally administered" bit */
1565 sc->sc_mac[1] |= 2;
1566
1567 vioif_set_mac(sc);
1568
1569 dev_err(sc->sc_dev, CE_NOTE,
1570 "Generated a random MAC address: %s",
1571 ether_sprintf((struct ether_addr *)sc->sc_mac));
1572 }
1573 }
1574
1575 /*
1576 * Virtqueue interrupt handlers
1577 */
1578 /* ARGSUSED */
1579 uint_t
1580 vioif_rx_handler(caddr_t arg1, caddr_t arg2)
1581 {
1582 struct virtio_softc *vsc = (void *) arg1;
1583 struct vioif_softc *sc = container_of(vsc,
1584 struct vioif_softc, sc_virtio);
1585
1586 /*
1587 * The return values of these functions are not needed but they make
1588 * debugging interrupts simpler because you can use them to detect when
1589 * stuff was processed and repopulated in this handler.
1590 */
1591 (void) vioif_process_rx(sc);
1592 (void) vioif_populate_rx(sc, KM_NOSLEEP);
1593
1594 return (DDI_INTR_CLAIMED);
1595 }
1596
1597 /* ARGSUSED */
1598 uint_t
1599 vioif_tx_handler(caddr_t arg1, caddr_t arg2)
1600 {
1601 struct virtio_softc *vsc = (void *)arg1;
1602 struct vioif_softc *sc = container_of(vsc,
1603 struct vioif_softc, sc_virtio);
1604
1605 /*
1606 * The return value of this function is not needed but makes debugging
1607 * interrupts simpler because you can use it to detect if anything was
1608 * reclaimed in this handler.
1609 */
1610 (void) vioif_reclaim_used_tx(sc);
1611
1612 return (DDI_INTR_CLAIMED);
1613 }
1614
1615 static int
1616 vioif_register_ints(struct vioif_softc *sc)
1617 {
1618 int ret;
1619
1620 struct virtio_int_handler vioif_vq_h[] = {
1621 { vioif_rx_handler },
1622 { vioif_tx_handler },
1623 { NULL }
1624 };
1625
1626 ret = virtio_register_ints(&sc->sc_virtio, NULL, vioif_vq_h);
1627
1628 return (ret);
1629 }
1630
1631
1632 static void
1633 vioif_check_features(struct vioif_softc *sc)
1634 {
1635 if (vioif_has_feature(sc, VIRTIO_NET_F_CSUM)) {
1636 /* The GSO/GRO featured depend on CSUM, check them here. */
1637 sc->sc_tx_csum = 1;
1638 sc->sc_rx_csum = 1;
1639
1640 if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
1641 sc->sc_rx_csum = 0;
1642 }
1643 cmn_err(CE_NOTE, "Csum enabled.");
1644
1645 if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
1646
1647 sc->sc_tx_tso4 = 1;
1648 /*
1649 * We don't seem to have a way to ask the system
1650 * not to send us LSO packets with Explicit
1651 * Congestion Notification bit set, so we require
1652 * the device to support it in order to do
1653 * LSO.
1654 */
1655 if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
1656 dev_err(sc->sc_dev, CE_NOTE,
1657 "TSO4 supported, but not ECN. "
1658 "Not using LSO.");
1659 sc->sc_tx_tso4 = 0;
1660 } else {
1661 cmn_err(CE_NOTE, "LSO enabled");
1662 }
1663 }
1664 }
1665 }
1666
1667 static int
1668 vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1669 {
1670 int ret, instance;
1671 struct vioif_softc *sc;
1672 struct virtio_softc *vsc;
1673 mac_register_t *macp;
1674 char cache_name[CACHE_NAME_SIZE];
1675
1676 instance = ddi_get_instance(devinfo);
1677
1678 switch (cmd) {
1679 case DDI_ATTACH:
1680 break;
1681
1682 case DDI_RESUME:
1683 case DDI_PM_RESUME:
1684 /* We do not support suspend/resume for vioif. */
1685 goto exit;
1686
1687 default:
1688 goto exit;
1689 }
1690
1691 sc = kmem_zalloc(sizeof (struct vioif_softc), KM_SLEEP);
1692 ddi_set_driver_private(devinfo, sc);
1693
1694 vsc = &sc->sc_virtio;
1695
1696 /* Duplicate for less typing */
1697 sc->sc_dev = devinfo;
1698 vsc->sc_dev = devinfo;
1699
1700 /*
1701 * Initialize interrupt kstat.
1702 */
1703 sc->sc_intrstat = kstat_create("vioif", instance, "intr", "controller",
1704 KSTAT_TYPE_INTR, 1, 0);
1705 if (sc->sc_intrstat == NULL) {
1706 dev_err(devinfo, CE_WARN, "kstat_create failed");
1707 goto exit_intrstat;
1708 }
1709 kstat_install(sc->sc_intrstat);
1710
1711 /* map BAR 0 */
1712 ret = ddi_regs_map_setup(devinfo, 1,
1713 (caddr_t *)&sc->sc_virtio.sc_io_addr,
1714 0, 0, &vioif_attr, &sc->sc_virtio.sc_ioh);
1715 if (ret != DDI_SUCCESS) {
1716 dev_err(devinfo, CE_WARN, "unable to map bar 0: %d", ret);
1717 goto exit_map;
1718 }
1719
1720 virtio_device_reset(&sc->sc_virtio);
1721 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_ACK);
1722 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_DRIVER);
1723
1724 ret = vioif_dev_features(sc);
1725 if (ret)
1726 goto exit_features;
1727
1728 vsc->sc_nvqs = vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ) ? 3 : 2;
1729
1730 (void) snprintf(cache_name, CACHE_NAME_SIZE, "vioif%d_rx", instance);
1731 sc->sc_rxbuf_cache = kmem_cache_create(cache_name,
1732 sizeof (struct vioif_rx_buf), 0, vioif_rx_construct,
1733 vioif_rx_destruct, NULL, sc, NULL, KM_SLEEP);
1734 if (sc->sc_rxbuf_cache == NULL) {
1735 dev_err(sc->sc_dev, CE_WARN, "Can't allocate the buffer cache");
1736 goto exit_cache;
1737 }
1738
1739 ret = vioif_register_ints(sc);
1740 if (ret) {
1741 dev_err(sc->sc_dev, CE_WARN,
1742 "Failed to allocate interrupt(s)!");
1743 goto exit_ints;
1744 }
1745
1746 /*
1747 * Register layout determined, can now access the
1748 * device-specific bits
1749 */
1750 vioif_get_mac(sc);
1751
1752 sc->sc_rx_vq = virtio_alloc_vq(&sc->sc_virtio, 0,
1753 VIOIF_RX_QLEN, VIOIF_INDIRECT_MAX, "rx");
1754 if (!sc->sc_rx_vq)
1755 goto exit_alloc1;
1756 virtio_stop_vq_intr(sc->sc_rx_vq);
1757
1758 sc->sc_tx_vq = virtio_alloc_vq(&sc->sc_virtio, 1,
1759 VIOIF_TX_QLEN, VIOIF_INDIRECT_MAX, "tx");
1760 if (!sc->sc_tx_vq)
1761 goto exit_alloc2;
1762 virtio_stop_vq_intr(sc->sc_tx_vq);
1763
1764 if (vioif_has_feature(sc, VIRTIO_NET_F_CTRL_VQ)) {
1765 sc->sc_ctrl_vq = virtio_alloc_vq(&sc->sc_virtio, 2,
1766 VIOIF_CTRL_QLEN, 0, "ctrl");
1767 if (!sc->sc_ctrl_vq) {
1768 goto exit_alloc3;
1769 }
1770 virtio_stop_vq_intr(sc->sc_ctrl_vq);
1771 }
1772
1773 virtio_set_status(&sc->sc_virtio,
1774 VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1775
1776 sc->sc_rxloan = 0;
1777
1778 /* set some reasonable-small default values */
1779 sc->sc_rxcopy_thresh = 300;
1780 sc->sc_txcopy_thresh = 300;
1781 sc->sc_mtu = ETHERMTU;
1782
1783 vioif_check_features(sc);
1784
1785 if (vioif_alloc_mems(sc))
1786 goto exit_alloc_mems;
1787
1788 if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
1789 dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
1790 goto exit_macalloc;
1791 }
1792
1793 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1794 macp->m_driver = sc;
1795 macp->m_dip = devinfo;
1796 macp->m_src_addr = sc->sc_mac;
1797 macp->m_callbacks = &vioif_m_callbacks;
1798 macp->m_min_sdu = 0;
1799 macp->m_max_sdu = sc->sc_mtu;
1800 macp->m_margin = VLAN_TAGSZ;
1801 macp->m_priv_props = vioif_priv_props;
1802
1803 sc->sc_macp = macp;
1804
1805 /* Pre-fill the rx ring. */
1806 (void) vioif_populate_rx(sc, KM_SLEEP);
1807
1808 ret = mac_register(macp, &sc->sc_mac_handle);
1809 if (ret != 0) {
1810 dev_err(devinfo, CE_WARN, "vioif_attach: "
1811 "mac_register() failed, ret=%d", ret);
1812 goto exit_register;
1813 }
1814
1815 ret = virtio_enable_ints(&sc->sc_virtio);
1816 if (ret) {
1817 dev_err(devinfo, CE_WARN, "Failed to enable interrupts");
1818 goto exit_enable_ints;
1819 }
1820
1821 mac_link_update(sc->sc_mac_handle, LINK_STATE_UP);
1822 return (DDI_SUCCESS);
1823
1824 exit_enable_ints:
1825 (void) mac_unregister(sc->sc_mac_handle);
1826 exit_register:
1827 mac_free(macp);
1828 exit_macalloc:
1829 vioif_free_mems(sc);
1830 exit_alloc_mems:
1831 virtio_release_ints(&sc->sc_virtio);
1832 if (sc->sc_ctrl_vq)
1833 virtio_free_vq(sc->sc_ctrl_vq);
1834 exit_alloc3:
1835 virtio_free_vq(sc->sc_tx_vq);
1836 exit_alloc2:
1837 virtio_free_vq(sc->sc_rx_vq);
1838 exit_alloc1:
1839 exit_ints:
1840 kmem_cache_destroy(sc->sc_rxbuf_cache);
1841 exit_cache:
1842 exit_features:
1843 virtio_set_status(&sc->sc_virtio, VIRTIO_CONFIG_DEVICE_STATUS_FAILED);
1844 ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1845 exit_intrstat:
1846 exit_map:
1847 kstat_delete(sc->sc_intrstat);
1848 kmem_free(sc, sizeof (struct vioif_softc));
1849 exit:
1850 return (DDI_FAILURE);
1851 }
1852
1853 static int
1854 vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1855 {
1856 struct vioif_softc *sc;
1857
1858 if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1859 return (DDI_FAILURE);
1860
1861 switch (cmd) {
1862 case DDI_DETACH:
1863 break;
1864
1865 case DDI_PM_SUSPEND:
1866 /* We do not support suspend/resume for vioif. */
1867 return (DDI_FAILURE);
1868
1869 default:
1870 return (DDI_FAILURE);
1871 }
1872
1873 if (sc->sc_rxloan) {
1874 dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream,"
1875 " not detaching.");
1876 return (DDI_FAILURE);
1877 }
1878
1879 virtio_stop_vq_intr(sc->sc_rx_vq);
1880 virtio_stop_vq_intr(sc->sc_tx_vq);
1881
1882 virtio_release_ints(&sc->sc_virtio);
1883
1884 if (mac_unregister(sc->sc_mac_handle)) {
1885 return (DDI_FAILURE);
1886 }
1887
1888 mac_free(sc->sc_macp);
1889
1890 vioif_free_mems(sc);
1891 virtio_free_vq(sc->sc_rx_vq);
1892 virtio_free_vq(sc->sc_tx_vq);
1893
1894 virtio_device_reset(&sc->sc_virtio);
1895
1896 ddi_regs_map_free(&sc->sc_virtio.sc_ioh);
1897
1898 kmem_cache_destroy(sc->sc_rxbuf_cache);
1899 kstat_delete(sc->sc_intrstat);
1900 kmem_free(sc, sizeof (struct vioif_softc));
1901
1902 return (DDI_SUCCESS);
1903 }
1904
1905 static int
1906 vioif_quiesce(dev_info_t *devinfo)
1907 {
1908 struct vioif_softc *sc;
1909
1910 if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1911 return (DDI_FAILURE);
1912
1913 virtio_stop_vq_intr(sc->sc_rx_vq);
1914 virtio_stop_vq_intr(sc->sc_tx_vq);
1915 virtio_device_reset(&sc->sc_virtio);
1916
1917 return (DDI_SUCCESS);
1918 }
1919
1920 int
1921 _init(void)
1922 {
1923 int ret = 0;
1924
1925 mac_init_ops(&vioif_ops, "vioif");
1926
1927 ret = mod_install(&modlinkage);
1928 if (ret != DDI_SUCCESS) {
1929 mac_fini_ops(&vioif_ops);
1930 return (ret);
1931 }
1932
1933 return (0);
1934 }
1935
1936 int
1937 _fini(void)
1938 {
1939 int ret;
1940
1941 ret = mod_remove(&modlinkage);
1942 if (ret == DDI_SUCCESS) {
1943 mac_fini_ops(&vioif_ops);
1944 }
1945
1946 return (ret);
1947 }
1948
1949 int
1950 _info(struct modinfo *pModinfo)
1951 {
1952 return (mod_info(&modlinkage, pModinfo));
1953 }