1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2013 Nexenta Inc. All rights reserved.
14 * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
15 */
16
17 /* Based on the NetBSD virtio driver by Minoura Makoto. */
18 /*
19 * Copyright (c) 2010 Minoura Makoto.
20 * All rights reserved.
21 *
22 * Redistribution and use in source and binary forms, with or without
23 * modification, are permitted provided that the following conditions
24 * are met:
25 * 1. Redistributions of source code must retain the above copyright
26 * notice, this list of conditions and the following disclaimer.
27 * 2. Redistributions in binary form must reproduce the above copyright
28 * notice, this list of conditions and the following disclaimer in the
29 * documentation and/or other materials provided with the distribution.
30 *
31 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
32 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
33 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
34 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
267 };
268
269 struct vioif_softc {
270 dev_info_t *sc_dev; /* mirrors virtio_softc->sc_dev */
271 struct virtio_softc sc_virtio;
272
273 mac_handle_t sc_mac_handle;
274 mac_register_t *sc_macp;
275
276 struct virtqueue *sc_rx_vq;
277 struct virtqueue *sc_tx_vq;
278 struct virtqueue *sc_ctrl_vq;
279
280 unsigned int sc_tx_stopped:1;
281
282 /* Feature bits. */
283 unsigned int sc_rx_csum:1;
284 unsigned int sc_tx_csum:1;
285 unsigned int sc_tx_tso4:1;
286
287 int sc_mtu;
288 uint8_t sc_mac[ETHERADDRL];
289 /*
290 * For rx buffers, we keep a pointer array, because the buffers
291 * can be loaned upstream, and we have to repopulate the array with
292 * new members.
293 */
294 struct vioif_rx_buf **sc_rxbufs;
295
296 /*
297 * For tx, we just allocate an array of buffers. The packet can
298 * either be copied into the inline buffer, or the external mapping
299 * could be used to map the packet
300 */
301 struct vioif_tx_buf *sc_txbufs;
302
303 kstat_t *sc_intrstat;
304 /*
305 * We "loan" rx buffers upstream and reuse them after they are
306 * freed. This lets us avoid allocations in the hot path.
307 */
308 kmem_cache_t *sc_rxbuf_cache;
309 ulong_t sc_rxloan;
310
311 /* Copying small packets turns out to be faster then mapping them. */
312 unsigned long sc_rxcopy_thresh;
313 unsigned long sc_txcopy_thresh;
314 /* Some statistic coming here */
315 uint64_t sc_ipackets;
316 uint64_t sc_opackets;
317 uint64_t sc_rbytes;
318 uint64_t sc_obytes;
319 uint64_t sc_brdcstxmt;
320 uint64_t sc_brdcstrcv;
321 uint64_t sc_multixmt;
322 uint64_t sc_multircv;
323 uint64_t sc_norecvbuf;
324 uint64_t sc_notxbuf;
325 uint64_t sc_ierrors;
326 uint64_t sc_oerrors;
327 };
328
329 #define ETHER_HEADER_LEN sizeof (struct ether_header)
330
331 /* MTU + the ethernet header. */
332 #define MAX_PAYLOAD 65535
333 #define MAX_MTU (MAX_PAYLOAD - ETHER_HEADER_LEN)
334 #define DEFAULT_MTU ETHERMTU
335
336 /*
337 * Yeah, we spend 8M per device. Turns out, there is no point
338 * being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF),
339 * because vhost does not support them, and we expect to be used with
340 * vhost in production environment.
341 */
342 /* The buffer keeps both the packet data and the virtio_net_header. */
343 #define VIOIF_RX_SIZE (MAX_PAYLOAD + sizeof (struct virtio_net_hdr))
344
345 /*
346 * We win a bit on header alignment, but the host wins a lot
456 static void
457 vioif_rx_free(caddr_t free_arg)
458 {
459 struct vioif_rx_buf *buf = (void *) free_arg;
460 struct vioif_softc *sc = buf->rb_sc;
461
462 kmem_cache_free(sc->sc_rxbuf_cache, buf);
463 atomic_dec_ulong(&sc->sc_rxloan);
464 }
465
466 static int
467 vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
468 {
469 _NOTE(ARGUNUSED(kmflags));
470 struct vioif_softc *sc = user_arg;
471 struct vioif_rx_buf *buf = buffer;
472 size_t len;
473
474 if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
475 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
476 dev_err(sc->sc_dev, CE_WARN,
477 "Can't allocate dma handle for rx buffer");
478 goto exit_handle;
479 }
480
481 if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
482 VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
483 &vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
484 NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
485 dev_err(sc->sc_dev, CE_WARN,
486 "Can't allocate rx buffer");
487 goto exit_alloc;
488 }
489 ASSERT(len >= VIOIF_RX_SIZE);
490
491 if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
492 buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
493 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
494 &buf->rb_mapping.vbm_ncookies)) {
495 dev_err(sc->sc_dev, CE_WARN, "Can't bind tx buffer");
496
497 goto exit_bind;
498 }
499
500 ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
501
502 buf->rb_sc = sc;
503 buf->rb_frtn.free_arg = (void *) buf;
504 buf->rb_frtn.free_func = vioif_rx_free;
505
506 return (0);
507 exit_bind:
508 ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
509 exit_alloc:
510 ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
511 exit_handle:
512
513 return (ENOMEM);
514 }
515
516 static void
700 return (DDI_SUCCESS);
701 }
702
703 /* ARGSUSED */
704 int
705 vioif_unicst(void *arg, const uint8_t *macaddr)
706 {
707 return (DDI_FAILURE);
708 }
709
710
711 static uint_t
712 vioif_add_rx(struct vioif_softc *sc, int kmflag)
713 {
714 uint_t num_added = 0;
715 struct vq_entry *ve;
716
717 while ((ve = vq_alloc_entry(sc->sc_rx_vq)) != NULL) {
718 struct vioif_rx_buf *buf = sc->sc_rxbufs[ve->qe_index];
719
720 if (!buf) {
721 /* First run, allocate the buffer. */
722 buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
723 sc->sc_rxbufs[ve->qe_index] = buf;
724 }
725
726 /* Still nothing? Bye. */
727 if (!buf) {
728 dev_err(sc->sc_dev, CE_WARN,
729 "Can't allocate rx buffer");
730 sc->sc_norecvbuf++;
731 vq_free_entry(sc->sc_rx_vq, ve);
732 break;
733 }
734
735 ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
736
737 /*
738 * For an unknown reason, the virtio_net_hdr must be placed
739 * as a separate virtio queue entry.
740 */
741 virtio_ve_add_indirect_buf(ve,
742 buf->rb_mapping.vbm_dmac.dmac_laddress,
743 sizeof (struct virtio_net_hdr), B_FALSE);
744
745 /* Add the rest of the first cookie. */
746 virtio_ve_add_indirect_buf(ve,
747 buf->rb_mapping.vbm_dmac.dmac_laddress +
748 sizeof (struct virtio_net_hdr),
749 buf->rb_mapping.vbm_dmac.dmac_size -
783 virtio_sync_vq(sc->sc_rx_vq);
784
785 return (num_added);
786 }
787
788 static uint_t
789 vioif_process_rx(struct vioif_softc *sc)
790 {
791 struct vq_entry *ve;
792 struct vioif_rx_buf *buf;
793 mblk_t *mphead = NULL, *lastmp = NULL, *mp;
794 uint32_t len;
795 uint_t num_processed = 0;
796
797 while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) {
798
799 buf = sc->sc_rxbufs[ve->qe_index];
800 ASSERT(buf);
801
802 if (len < sizeof (struct virtio_net_hdr)) {
803 dev_err(sc->sc_dev, CE_WARN, "RX: Cnain too small: %u",
804 len - (uint32_t)sizeof (struct virtio_net_hdr));
805 sc->sc_ierrors++;
806 virtio_free_chain(ve);
807 continue;
808 }
809
810 len -= sizeof (struct virtio_net_hdr);
811 /*
812 * We copy small packets that happen to fit into a single
813 * cookie and reuse the buffers. For bigger ones, we loan
814 * the buffers upstream.
815 */
816 if (len < sc->sc_rxcopy_thresh) {
817 mp = allocb(len, 0);
818 if (!mp) {
819 sc->sc_norecvbuf++;
820 sc->sc_ierrors++;
821
822 virtio_free_chain(ve);
823 break;
824 }
825
826 bcopy((char *)buf->rb_mapping.vbm_buf +
827 sizeof (struct virtio_net_hdr), mp->b_rptr, len);
828 mp->b_wptr = mp->b_rptr + len;
829
830 } else {
831 mp = desballoc((unsigned char *)
832 buf->rb_mapping.vbm_buf +
833 sizeof (struct virtio_net_hdr) +
834 VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
835 if (!mp) {
836 sc->sc_norecvbuf++;
837 sc->sc_ierrors++;
838
839 virtio_free_chain(ve);
840 break;
841 }
842 mp->b_wptr = mp->b_rptr + len;
843
844 atomic_inc_ulong(&sc->sc_rxloan);
845 /*
846 * Buffer loaned, we will have to allocate a new one
847 * for this slot.
848 */
849 sc->sc_rxbufs[ve->qe_index] = NULL;
850 }
851
852 /*
853 * virtio-net does not tell us if this packet is multicast
854 * or broadcast, so we have to check it.
855 */
881 return (num_processed);
882 }
883
884 static uint_t
885 vioif_reclaim_used_tx(struct vioif_softc *sc)
886 {
887 struct vq_entry *ve;
888 struct vioif_tx_buf *buf;
889 uint32_t len;
890 mblk_t *mp;
891 uint_t num_reclaimed = 0;
892
893 while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
894 /* We don't chain descriptors for tx, so don't expect any. */
895 ASSERT(!ve->qe_next);
896
897 buf = &sc->sc_txbufs[ve->qe_index];
898 mp = buf->tb_mp;
899 buf->tb_mp = NULL;
900
901 if (mp) {
902 for (int i = 0; i < buf->tb_external_num; i++)
903 (void) ddi_dma_unbind_handle(
904 buf->tb_external_mapping[i].vbm_dmah);
905 }
906
907 virtio_free_chain(ve);
908
909 /* External mapping used, mp was not freed in vioif_send() */
910 if (mp)
911 freemsg(mp);
912 num_reclaimed++;
913 }
914
915 if (sc->sc_tx_stopped && num_reclaimed > 0) {
916 sc->sc_tx_stopped = 0;
917 mac_tx_update(sc->sc_mac_handle);
918 }
919
920 return (num_reclaimed);
921 }
922
923 /* sc will be used to update stat counters. */
924 /* ARGSUSED */
925 static inline void
926 vioif_tx_inline(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
927 size_t msg_size)
928 {
929 struct vioif_tx_buf *buf;
930 buf = &sc->sc_txbufs[ve->qe_index];
934 /* Frees mp */
935 mcopymsg(mp, buf->tb_inline_mapping.vbm_buf +
936 sizeof (struct virtio_net_hdr));
937
938 virtio_ve_add_indirect_buf(ve,
939 buf->tb_inline_mapping.vbm_dmac.dmac_laddress +
940 sizeof (struct virtio_net_hdr), msg_size, B_TRUE);
941 }
942
943 static inline int
944 vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf,
945 int i)
946 {
947 int ret = DDI_SUCCESS;
948
949 if (!buf->tb_external_mapping[i].vbm_dmah) {
950 ret = ddi_dma_alloc_handle(sc->sc_dev,
951 &vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
952 &buf->tb_external_mapping[i].vbm_dmah);
953 if (ret != DDI_SUCCESS) {
954 dev_err(sc->sc_dev, CE_WARN,
955 "Can't allocate dma handle for external tx buffer");
956 }
957 }
958
959 return (ret);
960 }
961
962 static inline int
963 vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
964 size_t msg_size)
965 {
966 _NOTE(ARGUNUSED(msg_size));
967
968 struct vioif_tx_buf *buf;
969 mblk_t *nmp;
970 int i, j;
971 int ret = DDI_SUCCESS;
972
973 buf = &sc->sc_txbufs[ve->qe_index];
974
975 ASSERT(buf);
989 * actually send us zero-length fragments.
990 */
991 if (len == 0) {
992 nmp = nmp->b_cont;
993 continue;
994 }
995
996 ret = vioif_tx_lazy_handle_alloc(sc, buf, i);
997 if (ret != DDI_SUCCESS) {
998 sc->sc_notxbuf++;
999 sc->sc_oerrors++;
1000 goto exit_lazy_alloc;
1001 }
1002 ret = ddi_dma_addr_bind_handle(
1003 buf->tb_external_mapping[i].vbm_dmah, NULL,
1004 (caddr_t)nmp->b_rptr, len,
1005 DDI_DMA_WRITE | DDI_DMA_STREAMING,
1006 DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
1007
1008 if (ret != DDI_SUCCESS) {
1009 sc->sc_oerrors++;
1010 dev_err(sc->sc_dev, CE_NOTE,
1011 "TX: Failed to bind external handle");
1012 goto exit_bind;
1013 }
1014
1015 /* Check if we still fit into the indirect table. */
1016 if (virtio_ve_indirect_available(ve) < ncookies) {
1017 dev_err(sc->sc_dev, CE_NOTE,
1018 "TX: Indirect descriptor table limit reached."
1019 " It took %d fragments.", i);
1020 sc->sc_notxbuf++;
1021 sc->sc_oerrors++;
1022
1023 ret = DDI_FAILURE;
1024 goto exit_limit;
1025 }
1026
1027 virtio_ve_add_cookie(ve, buf->tb_external_mapping[i].vbm_dmah,
1028 dmac, ncookies, B_TRUE);
1029
1030 nmp = nmp->b_cont;
1031 i++;
1032 }
1033
1034 buf->tb_external_num = i;
1035 /* Save the mp to free it when the packet is sent. */
1036 buf->tb_mp = mp;
1037
1038 return (DDI_SUCCESS);
1039
1058 size_t msg_size = 0;
1059 uint32_t csum_start;
1060 uint32_t csum_stuff;
1061 uint32_t csum_flags;
1062 uint32_t lso_flags;
1063 uint32_t lso_mss;
1064 mblk_t *nmp;
1065 int ret;
1066 boolean_t lso_required = B_FALSE;
1067
1068 for (nmp = mp; nmp; nmp = nmp->b_cont)
1069 msg_size += MBLKL(nmp);
1070
1071 if (sc->sc_tx_tso4) {
1072 mac_lso_get(mp, &lso_mss, &lso_flags);
1073 lso_required = (lso_flags & HW_LSO);
1074 }
1075
1076 ve = vq_alloc_entry(sc->sc_tx_vq);
1077
1078 if (!ve) {
1079 sc->sc_notxbuf++;
1080 /* Out of free descriptors - try later. */
1081 return (B_FALSE);
1082 }
1083 buf = &sc->sc_txbufs[ve->qe_index];
1084
1085 /* Use the inline buffer of the first entry for the virtio_net_hdr. */
1086 (void) memset(buf->tb_inline_mapping.vbm_buf, 0,
1087 sizeof (struct virtio_net_hdr));
1088
1089 net_header = (struct virtio_net_hdr *)buf->tb_inline_mapping.vbm_buf;
1090
1091 mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL,
1092 NULL, &csum_flags);
1093
1094 /* They want us to do the TCP/UDP csum calculation. */
1095 if (csum_flags & HCK_PARTIALCKSUM) {
1096 struct ether_header *eth_header;
1097 int eth_hsize;
1098
1176 mp->b_next = NULL;
1177
1178 if (!vioif_send(sc, mp)) {
1179 sc->sc_tx_stopped = 1;
1180 mp->b_next = nmp;
1181 break;
1182 }
1183 mp = nmp;
1184 }
1185
1186 return (mp);
1187 }
1188
1189 int
1190 vioif_start(void *arg)
1191 {
1192 struct vioif_softc *sc = arg;
1193 struct vq_entry *ve;
1194 uint32_t len;
1195
1196 mac_link_update(sc->sc_mac_handle,
1197 vioif_link_state(sc));
1198
1199 virtio_start_vq_intr(sc->sc_rx_vq);
1200
1201 /*
1202 * Don't start interrupts on sc_tx_vq. We use VIRTIO_F_NOTIFY_ON_EMPTY,
1203 * so the device will send a transmit interrupt when the queue is empty
1204 * and we can reclaim it in one sweep.
1205 */
1206
1207 /*
1208 * Clear any data that arrived early on the receive queue and populate
1209 * it with free buffers that the device can use moving forward.
1210 */
1211 while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len)) != NULL) {
1212 virtio_free_chain(ve);
1213 }
1214 (void) vioif_populate_rx(sc, KM_SLEEP);
1215
1216 return (DDI_SUCCESS);
1217 }
1393 }
1394 return (err);
1395 }
1396
1397 static void
1398 vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1399 mac_prop_info_handle_t prh)
1400 {
1401 struct vioif_softc *sc = arg;
1402 char valstr[64];
1403 int value;
1404
1405 switch (pr_num) {
1406 case MAC_PROP_MTU:
1407 mac_prop_info_set_range_uint32(prh, ETHERMIN, MAX_MTU);
1408 break;
1409
1410 case MAC_PROP_PRIVATE:
1411 bzero(valstr, sizeof (valstr));
1412 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1413
1414 value = sc->sc_txcopy_thresh;
1415 } else if (strcmp(pr_name,
1416 vioif_rxcopy_thresh) == 0) {
1417 value = sc->sc_rxcopy_thresh;
1418 } else {
1419 return;
1420 }
1421 (void) snprintf(valstr, sizeof (valstr), "%d", value);
1422 break;
1423
1424 default:
1425 break;
1426 }
1427 }
1428
1429 static boolean_t
1430 vioif_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1431 {
1432 struct vioif_softc *sc = arg;
1433
1434 switch (cap) {
1435 case MAC_CAPAB_HCKSUM:
1436 if (sc->sc_tx_csum) {
1472 .mc_close = NULL, /* mc_close */
1473 .mc_setprop = vioif_setprop,
1474 .mc_getprop = vioif_getprop,
1475 .mc_propinfo = vioif_propinfo,
1476 };
1477
1478 static void
1479 vioif_show_features(struct vioif_softc *sc, const char *prefix,
1480 uint32_t features)
1481 {
1482 char buf[512];
1483 char *bufp = buf;
1484 char *bufend = buf + sizeof (buf);
1485
1486 /* LINTED E_PTRDIFF_OVERFLOW */
1487 bufp += snprintf(bufp, bufend - bufp, prefix);
1488 /* LINTED E_PTRDIFF_OVERFLOW */
1489 bufp += virtio_show_features(features, bufp, bufend - bufp);
1490 *bufp = '\0';
1491
1492
1493 /* Using '!' to only CE_NOTE this to the system log. */
1494 dev_err(sc->sc_dev, CE_NOTE, "!%s Vioif (%b)", buf, features,
1495 VIRTIO_NET_FEATURE_BITS);
1496 }
1497
1498 /*
1499 * Find out which features are supported by the device and
1500 * choose which ones we wish to use.
1501 */
1502 static int
1503 vioif_dev_features(struct vioif_softc *sc)
1504 {
1505 uint32_t host_features;
1506
1507 host_features = virtio_negotiate_features(&sc->sc_virtio,
1508 VIRTIO_NET_F_CSUM |
1509 VIRTIO_NET_F_HOST_TSO4 |
1510 VIRTIO_NET_F_HOST_ECN |
1511 VIRTIO_NET_F_MAC |
1512 VIRTIO_NET_F_STATUS |
1513 VIRTIO_F_RING_INDIRECT_DESC |
1514 VIRTIO_F_NOTIFY_ON_EMPTY);
1515
1516 vioif_show_features(sc, "Host features: ", host_features);
1517 vioif_show_features(sc, "Negotiated features: ",
1518 sc->sc_virtio.sc_features);
1519
1520 if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
1521 dev_err(sc->sc_dev, CE_NOTE,
1522 "Host does not support RING_INDIRECT_DESC, bye.");
1523 return (DDI_FAILURE);
1524 }
1525
1526 return (DDI_SUCCESS);
1527 }
1528
1529 static int
1530 vioif_has_feature(struct vioif_softc *sc, uint32_t feature)
1531 {
1532 return (virtio_has_feature(&sc->sc_virtio, feature));
1533 }
1534
1535 static void
1536 vioif_set_mac(struct vioif_softc *sc)
1537 {
1538 int i;
1539
1540 for (i = 0; i < ETHERADDRL; i++) {
1541 virtio_write_device_config_1(&sc->sc_virtio,
1542 VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
1543 }
1544 }
1545
1546 /* Get the mac address out of the hardware, or make up one. */
1547 static void
1548 vioif_get_mac(struct vioif_softc *sc)
1549 {
1550 int i;
1551 if (sc->sc_virtio.sc_features & VIRTIO_NET_F_MAC) {
1552 for (i = 0; i < ETHERADDRL; i++) {
1553 sc->sc_mac[i] = virtio_read_device_config_1(
1554 &sc->sc_virtio,
1555 VIRTIO_NET_CONFIG_MAC + i);
1556 }
1557 dev_err(sc->sc_dev, CE_NOTE, "Got MAC address from host: %s",
1558 ether_sprintf((struct ether_addr *)sc->sc_mac));
1559 } else {
1560 /* Get a few random bytes */
1561 (void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
1562 /* Make sure it's a unicast MAC */
1563 sc->sc_mac[0] &= ~1;
1564 /* Set the "locally administered" bit */
1565 sc->sc_mac[1] |= 2;
1566
1567 vioif_set_mac(sc);
1568
1569 dev_err(sc->sc_dev, CE_NOTE,
1570 "Generated a random MAC address: %s",
1571 ether_sprintf((struct ether_addr *)sc->sc_mac));
1572 }
1573 }
1574
1575 /*
1576 * Virtqueue interrupt handlers
1577 */
1578 /* ARGSUSED */
1579 uint_t
1580 vioif_rx_handler(caddr_t arg1, caddr_t arg2)
1581 {
1582 struct virtio_softc *vsc = (void *) arg1;
1583 struct vioif_softc *sc = container_of(vsc,
1584 struct vioif_softc, sc_virtio);
1585
1586 /*
1587 * The return values of these functions are not needed but they make
1588 * debugging interrupts simpler because you can use them to detect when
1589 * stuff was processed and repopulated in this handler.
1590 */
1623 { NULL }
1624 };
1625
1626 ret = virtio_register_ints(&sc->sc_virtio, NULL, vioif_vq_h);
1627
1628 return (ret);
1629 }
1630
1631
1632 static void
1633 vioif_check_features(struct vioif_softc *sc)
1634 {
1635 if (vioif_has_feature(sc, VIRTIO_NET_F_CSUM)) {
1636 /* The GSO/GRO featured depend on CSUM, check them here. */
1637 sc->sc_tx_csum = 1;
1638 sc->sc_rx_csum = 1;
1639
1640 if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
1641 sc->sc_rx_csum = 0;
1642 }
1643 cmn_err(CE_NOTE, "Csum enabled.");
1644
1645 if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
1646
1647 sc->sc_tx_tso4 = 1;
1648 /*
1649 * We don't seem to have a way to ask the system
1650 * not to send us LSO packets with Explicit
1651 * Congestion Notification bit set, so we require
1652 * the device to support it in order to do
1653 * LSO.
1654 */
1655 if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
1656 dev_err(sc->sc_dev, CE_NOTE,
1657 "TSO4 supported, but not ECN. "
1658 "Not using LSO.");
1659 sc->sc_tx_tso4 = 0;
1660 } else {
1661 cmn_err(CE_NOTE, "LSO enabled");
1662 }
1663 }
1664 }
1665 }
1666
1667 static int
1668 vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1669 {
1670 int ret, instance;
1671 struct vioif_softc *sc;
1672 struct virtio_softc *vsc;
1673 mac_register_t *macp;
1674 char cache_name[CACHE_NAME_SIZE];
1675
1676 instance = ddi_get_instance(devinfo);
1677
1678 switch (cmd) {
1679 case DDI_ATTACH:
1680 break;
1681
1765 sc->sc_ctrl_vq = virtio_alloc_vq(&sc->sc_virtio, 2,
1766 VIOIF_CTRL_QLEN, 0, "ctrl");
1767 if (!sc->sc_ctrl_vq) {
1768 goto exit_alloc3;
1769 }
1770 virtio_stop_vq_intr(sc->sc_ctrl_vq);
1771 }
1772
1773 virtio_set_status(&sc->sc_virtio,
1774 VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1775
1776 sc->sc_rxloan = 0;
1777
1778 /* set some reasonable-small default values */
1779 sc->sc_rxcopy_thresh = 300;
1780 sc->sc_txcopy_thresh = 300;
1781 sc->sc_mtu = ETHERMTU;
1782
1783 vioif_check_features(sc);
1784
1785 if (vioif_alloc_mems(sc))
1786 goto exit_alloc_mems;
1787
1788 if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
1789 dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
1790 goto exit_macalloc;
1791 }
1792
1793 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1794 macp->m_driver = sc;
1795 macp->m_dip = devinfo;
1796 macp->m_src_addr = sc->sc_mac;
1797 macp->m_callbacks = &vioif_m_callbacks;
1798 macp->m_min_sdu = 0;
1799 macp->m_max_sdu = sc->sc_mtu;
1800 macp->m_margin = VLAN_TAGSZ;
1801 macp->m_priv_props = vioif_priv_props;
1802
1803 sc->sc_macp = macp;
1804
1805 /* Pre-fill the rx ring. */
1853 static int
1854 vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1855 {
1856 struct vioif_softc *sc;
1857
1858 if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1859 return (DDI_FAILURE);
1860
1861 switch (cmd) {
1862 case DDI_DETACH:
1863 break;
1864
1865 case DDI_PM_SUSPEND:
1866 /* We do not support suspend/resume for vioif. */
1867 return (DDI_FAILURE);
1868
1869 default:
1870 return (DDI_FAILURE);
1871 }
1872
1873 if (sc->sc_rxloan) {
1874 dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream,"
1875 " not detaching.");
1876 return (DDI_FAILURE);
1877 }
1878
1879 virtio_stop_vq_intr(sc->sc_rx_vq);
1880 virtio_stop_vq_intr(sc->sc_tx_vq);
1881
1882 virtio_release_ints(&sc->sc_virtio);
1883
1884 if (mac_unregister(sc->sc_mac_handle)) {
1885 return (DDI_FAILURE);
1886 }
1887
1888 mac_free(sc->sc_macp);
1889
1890 vioif_free_mems(sc);
1891 virtio_free_vq(sc->sc_rx_vq);
1892 virtio_free_vq(sc->sc_tx_vq);
1893
|
1 /*
2 * This file and its contents are supplied under the terms of the
3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 * You may only use this file in accordance with the terms of version
5 * 1.0 of the CDDL.
6 *
7 * A full copy of the text of the CDDL should have accompanied this
8 * source. A copy of the CDDL is also available via the Internet at
9 * http://www.illumos.org/license/CDDL.
10 */
11
12 /*
13 * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
14 * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
15 * Copyright 2015 Joyent, Inc.
16 */
17
18 /* Based on the NetBSD virtio driver by Minoura Makoto. */
19 /*
20 * Copyright (c) 2010 Minoura Makoto.
21 * All rights reserved.
22 *
23 * Redistribution and use in source and binary forms, with or without
24 * modification, are permitted provided that the following conditions
25 * are met:
26 * 1. Redistributions of source code must retain the above copyright
27 * notice, this list of conditions and the following disclaimer.
28 * 2. Redistributions in binary form must reproduce the above copyright
29 * notice, this list of conditions and the following disclaimer in the
30 * documentation and/or other materials provided with the distribution.
31 *
32 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
33 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
34 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
35 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
268 };
269
270 struct vioif_softc {
271 dev_info_t *sc_dev; /* mirrors virtio_softc->sc_dev */
272 struct virtio_softc sc_virtio;
273
274 mac_handle_t sc_mac_handle;
275 mac_register_t *sc_macp;
276
277 struct virtqueue *sc_rx_vq;
278 struct virtqueue *sc_tx_vq;
279 struct virtqueue *sc_ctrl_vq;
280
281 unsigned int sc_tx_stopped:1;
282
283 /* Feature bits. */
284 unsigned int sc_rx_csum:1;
285 unsigned int sc_tx_csum:1;
286 unsigned int sc_tx_tso4:1;
287
288 /*
289 * For debugging, it is useful to know whether the MAC address we
290 * are using came from the host (via VIRTIO_NET_CONFIG_MAC) or
291 * was otherwise generated or set from within the guest.
292 */
293 unsigned int sc_mac_from_host:1;
294
295 int sc_mtu;
296 uint8_t sc_mac[ETHERADDRL];
297 /*
298 * For rx buffers, we keep a pointer array, because the buffers
299 * can be loaned upstream, and we have to repopulate the array with
300 * new members.
301 */
302 struct vioif_rx_buf **sc_rxbufs;
303
304 /*
305 * For tx, we just allocate an array of buffers. The packet can
306 * either be copied into the inline buffer, or the external mapping
307 * could be used to map the packet
308 */
309 struct vioif_tx_buf *sc_txbufs;
310
311 kstat_t *sc_intrstat;
312 /*
313 * We "loan" rx buffers upstream and reuse them after they are
314 * freed. This lets us avoid allocations in the hot path.
315 */
316 kmem_cache_t *sc_rxbuf_cache;
317 ulong_t sc_rxloan;
318
319 /* Copying small packets turns out to be faster then mapping them. */
320 unsigned long sc_rxcopy_thresh;
321 unsigned long sc_txcopy_thresh;
322
323 /*
324 * Statistics visible through mac:
325 */
326 uint64_t sc_ipackets;
327 uint64_t sc_opackets;
328 uint64_t sc_rbytes;
329 uint64_t sc_obytes;
330 uint64_t sc_brdcstxmt;
331 uint64_t sc_brdcstrcv;
332 uint64_t sc_multixmt;
333 uint64_t sc_multircv;
334 uint64_t sc_norecvbuf;
335 uint64_t sc_notxbuf;
336 uint64_t sc_ierrors;
337 uint64_t sc_oerrors;
338
339 /*
340 * Internal debugging statistics:
341 */
342 uint64_t sc_rxfail_dma_handle;
343 uint64_t sc_rxfail_dma_buffer;
344 uint64_t sc_rxfail_dma_bind;
345 uint64_t sc_rxfail_chain_undersize;
346 uint64_t sc_rxfail_no_descriptors;
347 uint64_t sc_txfail_dma_handle;
348 uint64_t sc_txfail_dma_bind;
349 uint64_t sc_txfail_indirect_limit;
350 };
351
352 #define ETHER_HEADER_LEN sizeof (struct ether_header)
353
354 /* MTU + the ethernet header. */
355 #define MAX_PAYLOAD 65535
356 #define MAX_MTU (MAX_PAYLOAD - ETHER_HEADER_LEN)
357 #define DEFAULT_MTU ETHERMTU
358
359 /*
360 * Yeah, we spend 8M per device. Turns out, there is no point
361 * being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF),
362 * because vhost does not support them, and we expect to be used with
363 * vhost in production environment.
364 */
365 /* The buffer keeps both the packet data and the virtio_net_header. */
366 #define VIOIF_RX_SIZE (MAX_PAYLOAD + sizeof (struct virtio_net_hdr))
367
368 /*
369 * We win a bit on header alignment, but the host wins a lot
479 static void
480 vioif_rx_free(caddr_t free_arg)
481 {
482 struct vioif_rx_buf *buf = (void *) free_arg;
483 struct vioif_softc *sc = buf->rb_sc;
484
485 kmem_cache_free(sc->sc_rxbuf_cache, buf);
486 atomic_dec_ulong(&sc->sc_rxloan);
487 }
488
489 static int
490 vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
491 {
492 _NOTE(ARGUNUSED(kmflags));
493 struct vioif_softc *sc = user_arg;
494 struct vioif_rx_buf *buf = buffer;
495 size_t len;
496
497 if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
498 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
499 sc->sc_rxfail_dma_handle++;
500 goto exit_handle;
501 }
502
503 if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
504 VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
505 &vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
506 NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
507 sc->sc_rxfail_dma_buffer++;
508 goto exit_alloc;
509 }
510 ASSERT(len >= VIOIF_RX_SIZE);
511
512 if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
513 buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
514 DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
515 &buf->rb_mapping.vbm_ncookies)) {
516 sc->sc_rxfail_dma_bind++;
517 goto exit_bind;
518 }
519
520 ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
521
522 buf->rb_sc = sc;
523 buf->rb_frtn.free_arg = (void *) buf;
524 buf->rb_frtn.free_func = vioif_rx_free;
525
526 return (0);
527 exit_bind:
528 ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
529 exit_alloc:
530 ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
531 exit_handle:
532
533 return (ENOMEM);
534 }
535
536 static void
720 return (DDI_SUCCESS);
721 }
722
723 /* ARGSUSED */
724 int
725 vioif_unicst(void *arg, const uint8_t *macaddr)
726 {
727 return (DDI_FAILURE);
728 }
729
730
731 static uint_t
732 vioif_add_rx(struct vioif_softc *sc, int kmflag)
733 {
734 uint_t num_added = 0;
735 struct vq_entry *ve;
736
737 while ((ve = vq_alloc_entry(sc->sc_rx_vq)) != NULL) {
738 struct vioif_rx_buf *buf = sc->sc_rxbufs[ve->qe_index];
739
740 if (buf == NULL) {
741 /* First run, allocate the buffer. */
742 buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
743 sc->sc_rxbufs[ve->qe_index] = buf;
744 }
745
746 /* Still nothing? Bye. */
747 if (buf == NULL) {
748 sc->sc_norecvbuf++;
749 vq_free_entry(sc->sc_rx_vq, ve);
750 break;
751 }
752
753 ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
754
755 /*
756 * For an unknown reason, the virtio_net_hdr must be placed
757 * as a separate virtio queue entry.
758 */
759 virtio_ve_add_indirect_buf(ve,
760 buf->rb_mapping.vbm_dmac.dmac_laddress,
761 sizeof (struct virtio_net_hdr), B_FALSE);
762
763 /* Add the rest of the first cookie. */
764 virtio_ve_add_indirect_buf(ve,
765 buf->rb_mapping.vbm_dmac.dmac_laddress +
766 sizeof (struct virtio_net_hdr),
767 buf->rb_mapping.vbm_dmac.dmac_size -
801 virtio_sync_vq(sc->sc_rx_vq);
802
803 return (num_added);
804 }
805
806 static uint_t
807 vioif_process_rx(struct vioif_softc *sc)
808 {
809 struct vq_entry *ve;
810 struct vioif_rx_buf *buf;
811 mblk_t *mphead = NULL, *lastmp = NULL, *mp;
812 uint32_t len;
813 uint_t num_processed = 0;
814
815 while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) {
816
817 buf = sc->sc_rxbufs[ve->qe_index];
818 ASSERT(buf);
819
820 if (len < sizeof (struct virtio_net_hdr)) {
821 sc->sc_rxfail_chain_undersize++;
822 sc->sc_ierrors++;
823 virtio_free_chain(ve);
824 continue;
825 }
826
827 len -= sizeof (struct virtio_net_hdr);
828 /*
829 * We copy small packets that happen to fit into a single
830 * cookie and reuse the buffers. For bigger ones, we loan
831 * the buffers upstream.
832 */
833 if (len < sc->sc_rxcopy_thresh) {
834 mp = allocb(len, 0);
835 if (mp == NULL) {
836 sc->sc_norecvbuf++;
837 sc->sc_ierrors++;
838
839 virtio_free_chain(ve);
840 break;
841 }
842
843 bcopy((char *)buf->rb_mapping.vbm_buf +
844 sizeof (struct virtio_net_hdr), mp->b_rptr, len);
845 mp->b_wptr = mp->b_rptr + len;
846
847 } else {
848 mp = desballoc((unsigned char *)
849 buf->rb_mapping.vbm_buf +
850 sizeof (struct virtio_net_hdr) +
851 VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
852 if (mp == NULL) {
853 sc->sc_norecvbuf++;
854 sc->sc_ierrors++;
855
856 virtio_free_chain(ve);
857 break;
858 }
859 mp->b_wptr = mp->b_rptr + len;
860
861 atomic_inc_ulong(&sc->sc_rxloan);
862 /*
863 * Buffer loaned, we will have to allocate a new one
864 * for this slot.
865 */
866 sc->sc_rxbufs[ve->qe_index] = NULL;
867 }
868
869 /*
870 * virtio-net does not tell us if this packet is multicast
871 * or broadcast, so we have to check it.
872 */
898 return (num_processed);
899 }
900
901 static uint_t
902 vioif_reclaim_used_tx(struct vioif_softc *sc)
903 {
904 struct vq_entry *ve;
905 struct vioif_tx_buf *buf;
906 uint32_t len;
907 mblk_t *mp;
908 uint_t num_reclaimed = 0;
909
910 while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
911 /* We don't chain descriptors for tx, so don't expect any. */
912 ASSERT(!ve->qe_next);
913
914 buf = &sc->sc_txbufs[ve->qe_index];
915 mp = buf->tb_mp;
916 buf->tb_mp = NULL;
917
918 if (mp != NULL) {
919 for (int i = 0; i < buf->tb_external_num; i++)
920 (void) ddi_dma_unbind_handle(
921 buf->tb_external_mapping[i].vbm_dmah);
922 }
923
924 virtio_free_chain(ve);
925
926 /* External mapping used, mp was not freed in vioif_send() */
927 if (mp != NULL)
928 freemsg(mp);
929 num_reclaimed++;
930 }
931
932 if (sc->sc_tx_stopped && num_reclaimed > 0) {
933 sc->sc_tx_stopped = 0;
934 mac_tx_update(sc->sc_mac_handle);
935 }
936
937 return (num_reclaimed);
938 }
939
940 /* sc will be used to update stat counters. */
941 /* ARGSUSED */
942 static inline void
943 vioif_tx_inline(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
944 size_t msg_size)
945 {
946 struct vioif_tx_buf *buf;
947 buf = &sc->sc_txbufs[ve->qe_index];
951 /* Frees mp */
952 mcopymsg(mp, buf->tb_inline_mapping.vbm_buf +
953 sizeof (struct virtio_net_hdr));
954
955 virtio_ve_add_indirect_buf(ve,
956 buf->tb_inline_mapping.vbm_dmac.dmac_laddress +
957 sizeof (struct virtio_net_hdr), msg_size, B_TRUE);
958 }
959
960 static inline int
961 vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf,
962 int i)
963 {
964 int ret = DDI_SUCCESS;
965
966 if (!buf->tb_external_mapping[i].vbm_dmah) {
967 ret = ddi_dma_alloc_handle(sc->sc_dev,
968 &vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
969 &buf->tb_external_mapping[i].vbm_dmah);
970 if (ret != DDI_SUCCESS) {
971 sc->sc_txfail_dma_handle++;
972 }
973 }
974
975 return (ret);
976 }
977
978 static inline int
979 vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
980 size_t msg_size)
981 {
982 _NOTE(ARGUNUSED(msg_size));
983
984 struct vioif_tx_buf *buf;
985 mblk_t *nmp;
986 int i, j;
987 int ret = DDI_SUCCESS;
988
989 buf = &sc->sc_txbufs[ve->qe_index];
990
991 ASSERT(buf);
1005 * actually send us zero-length fragments.
1006 */
1007 if (len == 0) {
1008 nmp = nmp->b_cont;
1009 continue;
1010 }
1011
1012 ret = vioif_tx_lazy_handle_alloc(sc, buf, i);
1013 if (ret != DDI_SUCCESS) {
1014 sc->sc_notxbuf++;
1015 sc->sc_oerrors++;
1016 goto exit_lazy_alloc;
1017 }
1018 ret = ddi_dma_addr_bind_handle(
1019 buf->tb_external_mapping[i].vbm_dmah, NULL,
1020 (caddr_t)nmp->b_rptr, len,
1021 DDI_DMA_WRITE | DDI_DMA_STREAMING,
1022 DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
1023
1024 if (ret != DDI_SUCCESS) {
1025 sc->sc_txfail_dma_bind++;
1026 sc->sc_oerrors++;
1027 goto exit_bind;
1028 }
1029
1030 /* Check if we still fit into the indirect table. */
1031 if (virtio_ve_indirect_available(ve) < ncookies) {
1032 sc->sc_txfail_indirect_limit++;
1033 sc->sc_notxbuf++;
1034 sc->sc_oerrors++;
1035
1036 ret = DDI_FAILURE;
1037 goto exit_limit;
1038 }
1039
1040 virtio_ve_add_cookie(ve, buf->tb_external_mapping[i].vbm_dmah,
1041 dmac, ncookies, B_TRUE);
1042
1043 nmp = nmp->b_cont;
1044 i++;
1045 }
1046
1047 buf->tb_external_num = i;
1048 /* Save the mp to free it when the packet is sent. */
1049 buf->tb_mp = mp;
1050
1051 return (DDI_SUCCESS);
1052
1071 size_t msg_size = 0;
1072 uint32_t csum_start;
1073 uint32_t csum_stuff;
1074 uint32_t csum_flags;
1075 uint32_t lso_flags;
1076 uint32_t lso_mss;
1077 mblk_t *nmp;
1078 int ret;
1079 boolean_t lso_required = B_FALSE;
1080
1081 for (nmp = mp; nmp; nmp = nmp->b_cont)
1082 msg_size += MBLKL(nmp);
1083
1084 if (sc->sc_tx_tso4) {
1085 mac_lso_get(mp, &lso_mss, &lso_flags);
1086 lso_required = (lso_flags & HW_LSO);
1087 }
1088
1089 ve = vq_alloc_entry(sc->sc_tx_vq);
1090
1091 if (ve == NULL) {
1092 sc->sc_notxbuf++;
1093 /* Out of free descriptors - try later. */
1094 return (B_FALSE);
1095 }
1096 buf = &sc->sc_txbufs[ve->qe_index];
1097
1098 /* Use the inline buffer of the first entry for the virtio_net_hdr. */
1099 (void) memset(buf->tb_inline_mapping.vbm_buf, 0,
1100 sizeof (struct virtio_net_hdr));
1101
1102 net_header = (struct virtio_net_hdr *)buf->tb_inline_mapping.vbm_buf;
1103
1104 mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL,
1105 NULL, &csum_flags);
1106
1107 /* They want us to do the TCP/UDP csum calculation. */
1108 if (csum_flags & HCK_PARTIALCKSUM) {
1109 struct ether_header *eth_header;
1110 int eth_hsize;
1111
1189 mp->b_next = NULL;
1190
1191 if (!vioif_send(sc, mp)) {
1192 sc->sc_tx_stopped = 1;
1193 mp->b_next = nmp;
1194 break;
1195 }
1196 mp = nmp;
1197 }
1198
1199 return (mp);
1200 }
1201
1202 int
1203 vioif_start(void *arg)
1204 {
1205 struct vioif_softc *sc = arg;
1206 struct vq_entry *ve;
1207 uint32_t len;
1208
1209 mac_link_update(sc->sc_mac_handle, vioif_link_state(sc));
1210
1211 virtio_start_vq_intr(sc->sc_rx_vq);
1212
1213 /*
1214 * Don't start interrupts on sc_tx_vq. We use VIRTIO_F_NOTIFY_ON_EMPTY,
1215 * so the device will send a transmit interrupt when the queue is empty
1216 * and we can reclaim it in one sweep.
1217 */
1218
1219 /*
1220 * Clear any data that arrived early on the receive queue and populate
1221 * it with free buffers that the device can use moving forward.
1222 */
1223 while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len)) != NULL) {
1224 virtio_free_chain(ve);
1225 }
1226 (void) vioif_populate_rx(sc, KM_SLEEP);
1227
1228 return (DDI_SUCCESS);
1229 }
1405 }
1406 return (err);
1407 }
1408
1409 static void
1410 vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1411 mac_prop_info_handle_t prh)
1412 {
1413 struct vioif_softc *sc = arg;
1414 char valstr[64];
1415 int value;
1416
1417 switch (pr_num) {
1418 case MAC_PROP_MTU:
1419 mac_prop_info_set_range_uint32(prh, ETHERMIN, MAX_MTU);
1420 break;
1421
1422 case MAC_PROP_PRIVATE:
1423 bzero(valstr, sizeof (valstr));
1424 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1425 value = sc->sc_txcopy_thresh;
1426 } else if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1427 value = sc->sc_rxcopy_thresh;
1428 } else {
1429 return;
1430 }
1431 (void) snprintf(valstr, sizeof (valstr), "%d", value);
1432 break;
1433
1434 default:
1435 break;
1436 }
1437 }
1438
1439 static boolean_t
1440 vioif_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1441 {
1442 struct vioif_softc *sc = arg;
1443
1444 switch (cap) {
1445 case MAC_CAPAB_HCKSUM:
1446 if (sc->sc_tx_csum) {
1482 .mc_close = NULL, /* mc_close */
1483 .mc_setprop = vioif_setprop,
1484 .mc_getprop = vioif_getprop,
1485 .mc_propinfo = vioif_propinfo,
1486 };
1487
1488 static void
1489 vioif_show_features(struct vioif_softc *sc, const char *prefix,
1490 uint32_t features)
1491 {
1492 char buf[512];
1493 char *bufp = buf;
1494 char *bufend = buf + sizeof (buf);
1495
1496 /* LINTED E_PTRDIFF_OVERFLOW */
1497 bufp += snprintf(bufp, bufend - bufp, prefix);
1498 /* LINTED E_PTRDIFF_OVERFLOW */
1499 bufp += virtio_show_features(features, bufp, bufend - bufp);
1500 *bufp = '\0';
1501
1502 /* Using '!' to only CE_NOTE this to the system log. */
1503 dev_err(sc->sc_dev, CE_NOTE, "!%s Vioif (%b)", buf, features,
1504 VIRTIO_NET_FEATURE_BITS);
1505 }
1506
1507 /*
1508 * Find out which features are supported by the device and
1509 * choose which ones we wish to use.
1510 */
1511 static int
1512 vioif_dev_features(struct vioif_softc *sc)
1513 {
1514 uint32_t host_features;
1515
1516 host_features = virtio_negotiate_features(&sc->sc_virtio,
1517 VIRTIO_NET_F_CSUM |
1518 VIRTIO_NET_F_HOST_TSO4 |
1519 VIRTIO_NET_F_HOST_ECN |
1520 VIRTIO_NET_F_MAC |
1521 VIRTIO_NET_F_STATUS |
1522 VIRTIO_F_RING_INDIRECT_DESC |
1523 VIRTIO_F_NOTIFY_ON_EMPTY);
1524
1525 vioif_show_features(sc, "Host features: ", host_features);
1526 vioif_show_features(sc, "Negotiated features: ",
1527 sc->sc_virtio.sc_features);
1528
1529 if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
1530 dev_err(sc->sc_dev, CE_WARN,
1531 "Host does not support RING_INDIRECT_DESC. Cannot attach.");
1532 return (DDI_FAILURE);
1533 }
1534
1535 return (DDI_SUCCESS);
1536 }
1537
1538 static int
1539 vioif_has_feature(struct vioif_softc *sc, uint32_t feature)
1540 {
1541 return (virtio_has_feature(&sc->sc_virtio, feature));
1542 }
1543
1544 static void
1545 vioif_set_mac(struct vioif_softc *sc)
1546 {
1547 int i;
1548
1549 for (i = 0; i < ETHERADDRL; i++) {
1550 virtio_write_device_config_1(&sc->sc_virtio,
1551 VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
1552 }
1553 sc->sc_mac_from_host = 0;
1554 }
1555
1556 /* Get the mac address out of the hardware, or make up one. */
1557 static void
1558 vioif_get_mac(struct vioif_softc *sc)
1559 {
1560 int i;
1561 if (sc->sc_virtio.sc_features & VIRTIO_NET_F_MAC) {
1562 for (i = 0; i < ETHERADDRL; i++) {
1563 sc->sc_mac[i] = virtio_read_device_config_1(
1564 &sc->sc_virtio,
1565 VIRTIO_NET_CONFIG_MAC + i);
1566 }
1567 sc->sc_mac_from_host = 1;
1568 } else {
1569 /* Get a few random bytes */
1570 (void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
1571 /* Make sure it's a unicast MAC */
1572 sc->sc_mac[0] &= ~1;
1573 /* Set the "locally administered" bit */
1574 sc->sc_mac[1] |= 2;
1575
1576 vioif_set_mac(sc);
1577
1578 dev_err(sc->sc_dev, CE_NOTE,
1579 "!Generated a random MAC address: %s",
1580 ether_sprintf((struct ether_addr *)sc->sc_mac));
1581 }
1582 }
1583
1584 /*
1585 * Virtqueue interrupt handlers
1586 */
1587 /* ARGSUSED */
1588 uint_t
1589 vioif_rx_handler(caddr_t arg1, caddr_t arg2)
1590 {
1591 struct virtio_softc *vsc = (void *) arg1;
1592 struct vioif_softc *sc = container_of(vsc,
1593 struct vioif_softc, sc_virtio);
1594
1595 /*
1596 * The return values of these functions are not needed but they make
1597 * debugging interrupts simpler because you can use them to detect when
1598 * stuff was processed and repopulated in this handler.
1599 */
1632 { NULL }
1633 };
1634
1635 ret = virtio_register_ints(&sc->sc_virtio, NULL, vioif_vq_h);
1636
1637 return (ret);
1638 }
1639
1640
1641 static void
1642 vioif_check_features(struct vioif_softc *sc)
1643 {
1644 if (vioif_has_feature(sc, VIRTIO_NET_F_CSUM)) {
1645 /* The GSO/GRO featured depend on CSUM, check them here. */
1646 sc->sc_tx_csum = 1;
1647 sc->sc_rx_csum = 1;
1648
1649 if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
1650 sc->sc_rx_csum = 0;
1651 }
1652 dev_err(sc->sc_dev, CE_NOTE, "!Csum enabled.");
1653
1654 if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
1655
1656 sc->sc_tx_tso4 = 1;
1657 /*
1658 * We don't seem to have a way to ask the system
1659 * not to send us LSO packets with Explicit
1660 * Congestion Notification bit set, so we require
1661 * the device to support it in order to do
1662 * LSO.
1663 */
1664 if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
1665 dev_err(sc->sc_dev, CE_NOTE,
1666 "!TSO4 supported, but not ECN. "
1667 "Not using LSO.");
1668 sc->sc_tx_tso4 = 0;
1669 } else {
1670 dev_err(sc->sc_dev, CE_NOTE, "!LSO enabled");
1671 }
1672 }
1673 }
1674 }
1675
1676 static int
1677 vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1678 {
1679 int ret, instance;
1680 struct vioif_softc *sc;
1681 struct virtio_softc *vsc;
1682 mac_register_t *macp;
1683 char cache_name[CACHE_NAME_SIZE];
1684
1685 instance = ddi_get_instance(devinfo);
1686
1687 switch (cmd) {
1688 case DDI_ATTACH:
1689 break;
1690
1774 sc->sc_ctrl_vq = virtio_alloc_vq(&sc->sc_virtio, 2,
1775 VIOIF_CTRL_QLEN, 0, "ctrl");
1776 if (!sc->sc_ctrl_vq) {
1777 goto exit_alloc3;
1778 }
1779 virtio_stop_vq_intr(sc->sc_ctrl_vq);
1780 }
1781
1782 virtio_set_status(&sc->sc_virtio,
1783 VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1784
1785 sc->sc_rxloan = 0;
1786
1787 /* set some reasonable-small default values */
1788 sc->sc_rxcopy_thresh = 300;
1789 sc->sc_txcopy_thresh = 300;
1790 sc->sc_mtu = ETHERMTU;
1791
1792 vioif_check_features(sc);
1793
1794 if (vioif_alloc_mems(sc) != 0)
1795 goto exit_alloc_mems;
1796
1797 if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
1798 dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
1799 goto exit_macalloc;
1800 }
1801
1802 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1803 macp->m_driver = sc;
1804 macp->m_dip = devinfo;
1805 macp->m_src_addr = sc->sc_mac;
1806 macp->m_callbacks = &vioif_m_callbacks;
1807 macp->m_min_sdu = 0;
1808 macp->m_max_sdu = sc->sc_mtu;
1809 macp->m_margin = VLAN_TAGSZ;
1810 macp->m_priv_props = vioif_priv_props;
1811
1812 sc->sc_macp = macp;
1813
1814 /* Pre-fill the rx ring. */
1862 static int
1863 vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1864 {
1865 struct vioif_softc *sc;
1866
1867 if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1868 return (DDI_FAILURE);
1869
1870 switch (cmd) {
1871 case DDI_DETACH:
1872 break;
1873
1874 case DDI_PM_SUSPEND:
1875 /* We do not support suspend/resume for vioif. */
1876 return (DDI_FAILURE);
1877
1878 default:
1879 return (DDI_FAILURE);
1880 }
1881
1882 if (sc->sc_rxloan > 0) {
1883 dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream,"
1884 " not detaching.");
1885 return (DDI_FAILURE);
1886 }
1887
1888 virtio_stop_vq_intr(sc->sc_rx_vq);
1889 virtio_stop_vq_intr(sc->sc_tx_vq);
1890
1891 virtio_release_ints(&sc->sc_virtio);
1892
1893 if (mac_unregister(sc->sc_mac_handle)) {
1894 return (DDI_FAILURE);
1895 }
1896
1897 mac_free(sc->sc_macp);
1898
1899 vioif_free_mems(sc);
1900 virtio_free_vq(sc->sc_rx_vq);
1901 virtio_free_vq(sc->sc_tx_vq);
1902
|