1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2013 Nexenta Inc.  All rights reserved.
  14  * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
  15  */
  16 
  17 /* Based on the NetBSD virtio driver by Minoura Makoto. */
  18 /*
  19  * Copyright (c) 2010 Minoura Makoto.
  20  * All rights reserved.
  21  *
  22  * Redistribution and use in source and binary forms, with or without
  23  * modification, are permitted provided that the following conditions
  24  * are met:
  25  * 1. Redistributions of source code must retain the above copyright
  26  *    notice, this list of conditions and the following disclaimer.
  27  * 2. Redistributions in binary form must reproduce the above copyright
  28  *    notice, this list of conditions and the following disclaimer in the
  29  *    documentation and/or other materials provided with the distribution.
  30  *
  31  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  32  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  33  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  34  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 
 267 };
 268 
 269 struct vioif_softc {
 270         dev_info_t              *sc_dev; /* mirrors virtio_softc->sc_dev */
 271         struct virtio_softc     sc_virtio;
 272 
 273         mac_handle_t sc_mac_handle;
 274         mac_register_t *sc_macp;
 275 
 276         struct virtqueue        *sc_rx_vq;
 277         struct virtqueue        *sc_tx_vq;
 278         struct virtqueue        *sc_ctrl_vq;
 279 
 280         unsigned int            sc_tx_stopped:1;
 281 
 282         /* Feature bits. */
 283         unsigned int            sc_rx_csum:1;
 284         unsigned int            sc_tx_csum:1;
 285         unsigned int            sc_tx_tso4:1;
 286 
 287         int                     sc_mtu;
 288         uint8_t                 sc_mac[ETHERADDRL];
 289         /*
 290          * For rx buffers, we keep a pointer array, because the buffers
 291          * can be loaned upstream, and we have to repopulate the array with
 292          * new members.
 293          */
 294         struct vioif_rx_buf     **sc_rxbufs;
 295 
 296         /*
 297          * For tx, we just allocate an array of buffers. The packet can
 298          * either be copied into the inline buffer, or the external mapping
 299          * could be used to map the packet
 300          */
 301         struct vioif_tx_buf     *sc_txbufs;
 302 
 303         kstat_t                 *sc_intrstat;
 304         /*
 305          * We "loan" rx buffers upstream and reuse them after they are
 306          * freed. This lets us avoid allocations in the hot path.
 307          */
 308         kmem_cache_t            *sc_rxbuf_cache;
 309         ulong_t                 sc_rxloan;
 310 
 311         /* Copying small packets turns out to be faster then mapping them. */
 312         unsigned long           sc_rxcopy_thresh;
 313         unsigned long           sc_txcopy_thresh;
 314         /* Some statistic coming here */
 315         uint64_t                sc_ipackets;
 316         uint64_t                sc_opackets;
 317         uint64_t                sc_rbytes;
 318         uint64_t                sc_obytes;
 319         uint64_t                sc_brdcstxmt;
 320         uint64_t                sc_brdcstrcv;
 321         uint64_t                sc_multixmt;
 322         uint64_t                sc_multircv;
 323         uint64_t                sc_norecvbuf;
 324         uint64_t                sc_notxbuf;
 325         uint64_t                sc_ierrors;
 326         uint64_t                sc_oerrors;
 327 };
 328 
 329 #define ETHER_HEADER_LEN                sizeof (struct ether_header)
 330 
 331 /* MTU + the ethernet header. */
 332 #define MAX_PAYLOAD     65535
 333 #define MAX_MTU         (MAX_PAYLOAD - ETHER_HEADER_LEN)
 334 #define DEFAULT_MTU     ETHERMTU
 335 
 336 /*
 337  * Yeah, we spend 8M per device. Turns out, there is no point
 338  * being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF),
 339  * because vhost does not support them, and we expect to be used with
 340  * vhost in production environment.
 341  */
 342 /* The buffer keeps both the packet data and the virtio_net_header. */
 343 #define VIOIF_RX_SIZE (MAX_PAYLOAD + sizeof (struct virtio_net_hdr))
 344 
 345 /*
 346  * We win a bit on header alignment, but the host wins a lot
 
 456 static void
 457 vioif_rx_free(caddr_t free_arg)
 458 {
 459         struct vioif_rx_buf *buf = (void *) free_arg;
 460         struct vioif_softc *sc = buf->rb_sc;
 461 
 462         kmem_cache_free(sc->sc_rxbuf_cache, buf);
 463         atomic_dec_ulong(&sc->sc_rxloan);
 464 }
 465 
 466 static int
 467 vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
 468 {
 469         _NOTE(ARGUNUSED(kmflags));
 470         struct vioif_softc *sc = user_arg;
 471         struct vioif_rx_buf *buf = buffer;
 472         size_t len;
 473 
 474         if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
 475             DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
 476                 dev_err(sc->sc_dev, CE_WARN,
 477                     "Can't allocate dma handle for rx buffer");
 478                 goto exit_handle;
 479         }
 480 
 481         if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
 482             VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
 483             &vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
 484             NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
 485                 dev_err(sc->sc_dev, CE_WARN,
 486                     "Can't allocate rx buffer");
 487                 goto exit_alloc;
 488         }
 489         ASSERT(len >= VIOIF_RX_SIZE);
 490 
 491         if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
 492             buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
 493             DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
 494             &buf->rb_mapping.vbm_ncookies)) {
 495                 dev_err(sc->sc_dev, CE_WARN, "Can't bind tx buffer");
 496 
 497                 goto exit_bind;
 498         }
 499 
 500         ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
 501 
 502         buf->rb_sc = sc;
 503         buf->rb_frtn.free_arg = (void *) buf;
 504         buf->rb_frtn.free_func = vioif_rx_free;
 505 
 506         return (0);
 507 exit_bind:
 508         ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
 509 exit_alloc:
 510         ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
 511 exit_handle:
 512 
 513         return (ENOMEM);
 514 }
 515 
 516 static void
 
 700         return (DDI_SUCCESS);
 701 }
 702 
 703 /* ARGSUSED */
 704 int
 705 vioif_unicst(void *arg, const uint8_t *macaddr)
 706 {
 707         return (DDI_FAILURE);
 708 }
 709 
 710 
 711 static uint_t
 712 vioif_add_rx(struct vioif_softc *sc, int kmflag)
 713 {
 714         uint_t num_added = 0;
 715         struct vq_entry *ve;
 716 
 717         while ((ve = vq_alloc_entry(sc->sc_rx_vq)) != NULL) {
 718                 struct vioif_rx_buf *buf = sc->sc_rxbufs[ve->qe_index];
 719 
 720                 if (!buf) {
 721                         /* First run, allocate the buffer. */
 722                         buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
 723                         sc->sc_rxbufs[ve->qe_index] = buf;
 724                 }
 725 
 726                 /* Still nothing? Bye. */
 727                 if (!buf) {
 728                         dev_err(sc->sc_dev, CE_WARN,
 729                             "Can't allocate rx buffer");
 730                         sc->sc_norecvbuf++;
 731                         vq_free_entry(sc->sc_rx_vq, ve);
 732                         break;
 733                 }
 734 
 735                 ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
 736 
 737                 /*
 738                  * For an unknown reason, the virtio_net_hdr must be placed
 739                  * as a separate virtio queue entry.
 740                  */
 741                 virtio_ve_add_indirect_buf(ve,
 742                     buf->rb_mapping.vbm_dmac.dmac_laddress,
 743                     sizeof (struct virtio_net_hdr), B_FALSE);
 744 
 745                 /* Add the rest of the first cookie. */
 746                 virtio_ve_add_indirect_buf(ve,
 747                     buf->rb_mapping.vbm_dmac.dmac_laddress +
 748                     sizeof (struct virtio_net_hdr),
 749                     buf->rb_mapping.vbm_dmac.dmac_size -
 
 783                 virtio_sync_vq(sc->sc_rx_vq);
 784 
 785         return (num_added);
 786 }
 787 
 788 static uint_t
 789 vioif_process_rx(struct vioif_softc *sc)
 790 {
 791         struct vq_entry *ve;
 792         struct vioif_rx_buf *buf;
 793         mblk_t *mphead = NULL, *lastmp = NULL, *mp;
 794         uint32_t len;
 795         uint_t num_processed = 0;
 796 
 797         while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) {
 798 
 799                 buf = sc->sc_rxbufs[ve->qe_index];
 800                 ASSERT(buf);
 801 
 802                 if (len < sizeof (struct virtio_net_hdr)) {
 803                         dev_err(sc->sc_dev, CE_WARN, "RX: Cnain too small: %u",
 804                             len - (uint32_t)sizeof (struct virtio_net_hdr));
 805                         sc->sc_ierrors++;
 806                         virtio_free_chain(ve);
 807                         continue;
 808                 }
 809 
 810                 len -= sizeof (struct virtio_net_hdr);
 811                 /*
 812                  * We copy small packets that happen to fit into a single
 813                  * cookie and reuse the buffers. For bigger ones, we loan
 814                  * the buffers upstream.
 815                  */
 816                 if (len < sc->sc_rxcopy_thresh) {
 817                         mp = allocb(len, 0);
 818                         if (!mp) {
 819                                 sc->sc_norecvbuf++;
 820                                 sc->sc_ierrors++;
 821 
 822                                 virtio_free_chain(ve);
 823                                 break;
 824                         }
 825 
 826                         bcopy((char *)buf->rb_mapping.vbm_buf +
 827                             sizeof (struct virtio_net_hdr), mp->b_rptr, len);
 828                         mp->b_wptr = mp->b_rptr + len;
 829 
 830                 } else {
 831                         mp = desballoc((unsigned char *)
 832                             buf->rb_mapping.vbm_buf +
 833                             sizeof (struct virtio_net_hdr) +
 834                             VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
 835                         if (!mp) {
 836                                 sc->sc_norecvbuf++;
 837                                 sc->sc_ierrors++;
 838 
 839                                 virtio_free_chain(ve);
 840                                 break;
 841                         }
 842                         mp->b_wptr = mp->b_rptr + len;
 843 
 844                         atomic_inc_ulong(&sc->sc_rxloan);
 845                         /*
 846                          * Buffer loaned, we will have to allocate a new one
 847                          * for this slot.
 848                          */
 849                         sc->sc_rxbufs[ve->qe_index] = NULL;
 850                 }
 851 
 852                 /*
 853                  * virtio-net does not tell us if this packet is multicast
 854                  * or broadcast, so we have to check it.
 855                  */
 
 881         return (num_processed);
 882 }
 883 
 884 static uint_t
 885 vioif_reclaim_used_tx(struct vioif_softc *sc)
 886 {
 887         struct vq_entry *ve;
 888         struct vioif_tx_buf *buf;
 889         uint32_t len;
 890         mblk_t *mp;
 891         uint_t num_reclaimed = 0;
 892 
 893         while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
 894                 /* We don't chain descriptors for tx, so don't expect any. */
 895                 ASSERT(!ve->qe_next);
 896 
 897                 buf = &sc->sc_txbufs[ve->qe_index];
 898                 mp = buf->tb_mp;
 899                 buf->tb_mp = NULL;
 900 
 901                 if (mp) {
 902                         for (int i = 0; i < buf->tb_external_num; i++)
 903                                 (void) ddi_dma_unbind_handle(
 904                                     buf->tb_external_mapping[i].vbm_dmah);
 905                 }
 906 
 907                 virtio_free_chain(ve);
 908 
 909                 /* External mapping used, mp was not freed in vioif_send() */
 910                 if (mp)
 911                         freemsg(mp);
 912                 num_reclaimed++;
 913         }
 914 
 915         if (sc->sc_tx_stopped && num_reclaimed > 0) {
 916                 sc->sc_tx_stopped = 0;
 917                 mac_tx_update(sc->sc_mac_handle);
 918         }
 919 
 920         return (num_reclaimed);
 921 }
 922 
 923 /* sc will be used to update stat counters. */
 924 /* ARGSUSED */
 925 static inline void
 926 vioif_tx_inline(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
 927     size_t msg_size)
 928 {
 929         struct vioif_tx_buf *buf;
 930         buf = &sc->sc_txbufs[ve->qe_index];
 
 934         /* Frees mp */
 935         mcopymsg(mp, buf->tb_inline_mapping.vbm_buf +
 936             sizeof (struct virtio_net_hdr));
 937 
 938         virtio_ve_add_indirect_buf(ve,
 939             buf->tb_inline_mapping.vbm_dmac.dmac_laddress +
 940             sizeof (struct virtio_net_hdr), msg_size, B_TRUE);
 941 }
 942 
 943 static inline int
 944 vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf,
 945     int i)
 946 {
 947         int ret = DDI_SUCCESS;
 948 
 949         if (!buf->tb_external_mapping[i].vbm_dmah) {
 950                 ret = ddi_dma_alloc_handle(sc->sc_dev,
 951                     &vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
 952                     &buf->tb_external_mapping[i].vbm_dmah);
 953                 if (ret != DDI_SUCCESS) {
 954                         dev_err(sc->sc_dev, CE_WARN,
 955                             "Can't allocate dma handle for external tx buffer");
 956                 }
 957         }
 958 
 959         return (ret);
 960 }
 961 
 962 static inline int
 963 vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
 964     size_t msg_size)
 965 {
 966         _NOTE(ARGUNUSED(msg_size));
 967 
 968         struct vioif_tx_buf *buf;
 969         mblk_t *nmp;
 970         int i, j;
 971         int ret = DDI_SUCCESS;
 972 
 973         buf = &sc->sc_txbufs[ve->qe_index];
 974 
 975         ASSERT(buf);
 
 989                  * actually send us zero-length fragments.
 990                  */
 991                 if (len == 0) {
 992                         nmp = nmp->b_cont;
 993                         continue;
 994                 }
 995 
 996                 ret = vioif_tx_lazy_handle_alloc(sc, buf, i);
 997                 if (ret != DDI_SUCCESS) {
 998                         sc->sc_notxbuf++;
 999                         sc->sc_oerrors++;
1000                         goto exit_lazy_alloc;
1001                 }
1002                 ret = ddi_dma_addr_bind_handle(
1003                     buf->tb_external_mapping[i].vbm_dmah, NULL,
1004                     (caddr_t)nmp->b_rptr, len,
1005                     DDI_DMA_WRITE | DDI_DMA_STREAMING,
1006                     DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
1007 
1008                 if (ret != DDI_SUCCESS) {
1009                         sc->sc_oerrors++;
1010                         dev_err(sc->sc_dev, CE_NOTE,
1011                             "TX: Failed to bind external handle");
1012                         goto exit_bind;
1013                 }
1014 
1015                 /* Check if we still fit into the indirect table. */
1016                 if (virtio_ve_indirect_available(ve) < ncookies) {
1017                         dev_err(sc->sc_dev, CE_NOTE,
1018                             "TX: Indirect descriptor table limit reached."
1019                             " It took %d fragments.", i);
1020                         sc->sc_notxbuf++;
1021                         sc->sc_oerrors++;
1022 
1023                         ret = DDI_FAILURE;
1024                         goto exit_limit;
1025                 }
1026 
1027                 virtio_ve_add_cookie(ve, buf->tb_external_mapping[i].vbm_dmah,
1028                     dmac, ncookies, B_TRUE);
1029 
1030                 nmp = nmp->b_cont;
1031                 i++;
1032         }
1033 
1034         buf->tb_external_num = i;
1035         /* Save the mp to free it when the packet is sent. */
1036         buf->tb_mp = mp;
1037 
1038         return (DDI_SUCCESS);
1039 
 
1058         size_t msg_size = 0;
1059         uint32_t csum_start;
1060         uint32_t csum_stuff;
1061         uint32_t csum_flags;
1062         uint32_t lso_flags;
1063         uint32_t lso_mss;
1064         mblk_t *nmp;
1065         int ret;
1066         boolean_t lso_required = B_FALSE;
1067 
1068         for (nmp = mp; nmp; nmp = nmp->b_cont)
1069                 msg_size += MBLKL(nmp);
1070 
1071         if (sc->sc_tx_tso4) {
1072                 mac_lso_get(mp, &lso_mss, &lso_flags);
1073                 lso_required = (lso_flags & HW_LSO);
1074         }
1075 
1076         ve = vq_alloc_entry(sc->sc_tx_vq);
1077 
1078         if (!ve) {
1079                 sc->sc_notxbuf++;
1080                 /* Out of free descriptors - try later. */
1081                 return (B_FALSE);
1082         }
1083         buf = &sc->sc_txbufs[ve->qe_index];
1084 
1085         /* Use the inline buffer of the first entry for the virtio_net_hdr. */
1086         (void) memset(buf->tb_inline_mapping.vbm_buf, 0,
1087             sizeof (struct virtio_net_hdr));
1088 
1089         net_header = (struct virtio_net_hdr *)buf->tb_inline_mapping.vbm_buf;
1090 
1091         mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL,
1092             NULL, &csum_flags);
1093 
1094         /* They want us to do the TCP/UDP csum calculation. */
1095         if (csum_flags & HCK_PARTIALCKSUM) {
1096                 struct ether_header *eth_header;
1097                 int eth_hsize;
1098 
 
1176                 mp->b_next = NULL;
1177 
1178                 if (!vioif_send(sc, mp)) {
1179                         sc->sc_tx_stopped = 1;
1180                         mp->b_next = nmp;
1181                         break;
1182                 }
1183                 mp = nmp;
1184         }
1185 
1186         return (mp);
1187 }
1188 
1189 int
1190 vioif_start(void *arg)
1191 {
1192         struct vioif_softc *sc = arg;
1193         struct vq_entry *ve;
1194         uint32_t len;
1195 
1196         mac_link_update(sc->sc_mac_handle,
1197             vioif_link_state(sc));
1198 
1199         virtio_start_vq_intr(sc->sc_rx_vq);
1200 
1201         /*
1202          * Don't start interrupts on sc_tx_vq. We use VIRTIO_F_NOTIFY_ON_EMPTY,
1203          * so the device will send a transmit interrupt when the queue is empty
1204          * and we can reclaim it in one sweep.
1205          */
1206 
1207         /*
1208          * Clear any data that arrived early on the receive queue and populate
1209          * it with free buffers that the device can use moving forward.
1210          */
1211         while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len)) != NULL) {
1212                 virtio_free_chain(ve);
1213         }
1214         (void) vioif_populate_rx(sc, KM_SLEEP);
1215 
1216         return (DDI_SUCCESS);
1217 }
 
1393         }
1394         return (err);
1395 }
1396 
1397 static void
1398 vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1399     mac_prop_info_handle_t prh)
1400 {
1401         struct vioif_softc *sc = arg;
1402         char valstr[64];
1403         int value;
1404 
1405         switch (pr_num) {
1406         case MAC_PROP_MTU:
1407                 mac_prop_info_set_range_uint32(prh, ETHERMIN, MAX_MTU);
1408                 break;
1409 
1410         case MAC_PROP_PRIVATE:
1411                 bzero(valstr, sizeof (valstr));
1412                 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1413 
1414                         value = sc->sc_txcopy_thresh;
1415                 } else  if (strcmp(pr_name,
1416                     vioif_rxcopy_thresh) == 0) {
1417                         value = sc->sc_rxcopy_thresh;
1418                 } else {
1419                         return;
1420                 }
1421                 (void) snprintf(valstr, sizeof (valstr), "%d", value);
1422                 break;
1423 
1424         default:
1425                 break;
1426         }
1427 }
1428 
1429 static boolean_t
1430 vioif_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1431 {
1432         struct vioif_softc *sc = arg;
1433 
1434         switch (cap) {
1435         case MAC_CAPAB_HCKSUM:
1436                 if (sc->sc_tx_csum) {
 
1472         .mc_close       = NULL,         /* mc_close */
1473         .mc_setprop     = vioif_setprop,
1474         .mc_getprop     = vioif_getprop,
1475         .mc_propinfo    = vioif_propinfo,
1476 };
1477 
1478 static void
1479 vioif_show_features(struct vioif_softc *sc, const char *prefix,
1480     uint32_t features)
1481 {
1482         char buf[512];
1483         char *bufp = buf;
1484         char *bufend = buf + sizeof (buf);
1485 
1486         /* LINTED E_PTRDIFF_OVERFLOW */
1487         bufp += snprintf(bufp, bufend - bufp, prefix);
1488         /* LINTED E_PTRDIFF_OVERFLOW */
1489         bufp += virtio_show_features(features, bufp, bufend - bufp);
1490         *bufp = '\0';
1491 
1492 
1493         /* Using '!' to only CE_NOTE this to the system log. */
1494         dev_err(sc->sc_dev, CE_NOTE, "!%s Vioif (%b)", buf, features,
1495             VIRTIO_NET_FEATURE_BITS);
1496 }
1497 
1498 /*
1499  * Find out which features are supported by the device and
1500  * choose which ones we wish to use.
1501  */
1502 static int
1503 vioif_dev_features(struct vioif_softc *sc)
1504 {
1505         uint32_t host_features;
1506 
1507         host_features = virtio_negotiate_features(&sc->sc_virtio,
1508             VIRTIO_NET_F_CSUM |
1509             VIRTIO_NET_F_HOST_TSO4 |
1510             VIRTIO_NET_F_HOST_ECN |
1511             VIRTIO_NET_F_MAC |
1512             VIRTIO_NET_F_STATUS |
1513             VIRTIO_F_RING_INDIRECT_DESC |
1514             VIRTIO_F_NOTIFY_ON_EMPTY);
1515 
1516         vioif_show_features(sc, "Host features: ", host_features);
1517         vioif_show_features(sc, "Negotiated features: ",
1518             sc->sc_virtio.sc_features);
1519 
1520         if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
1521                 dev_err(sc->sc_dev, CE_NOTE,
1522                     "Host does not support RING_INDIRECT_DESC, bye.");
1523                 return (DDI_FAILURE);
1524         }
1525 
1526         return (DDI_SUCCESS);
1527 }
1528 
1529 static int
1530 vioif_has_feature(struct vioif_softc *sc, uint32_t feature)
1531 {
1532         return (virtio_has_feature(&sc->sc_virtio, feature));
1533 }
1534 
1535 static void
1536 vioif_set_mac(struct vioif_softc *sc)
1537 {
1538         int i;
1539 
1540         for (i = 0; i < ETHERADDRL; i++) {
1541                 virtio_write_device_config_1(&sc->sc_virtio,
1542                     VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
1543         }
1544 }
1545 
1546 /* Get the mac address out of the hardware, or make up one. */
1547 static void
1548 vioif_get_mac(struct vioif_softc *sc)
1549 {
1550         int i;
1551         if (sc->sc_virtio.sc_features & VIRTIO_NET_F_MAC) {
1552                 for (i = 0; i < ETHERADDRL; i++) {
1553                         sc->sc_mac[i] = virtio_read_device_config_1(
1554                             &sc->sc_virtio,
1555                             VIRTIO_NET_CONFIG_MAC + i);
1556                 }
1557                 dev_err(sc->sc_dev, CE_NOTE, "Got MAC address from host: %s",
1558                     ether_sprintf((struct ether_addr *)sc->sc_mac));
1559         } else {
1560                 /* Get a few random bytes */
1561                 (void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
1562                 /* Make sure it's a unicast MAC */
1563                 sc->sc_mac[0] &= ~1;
1564                 /* Set the "locally administered" bit */
1565                 sc->sc_mac[1] |= 2;
1566 
1567                 vioif_set_mac(sc);
1568 
1569                 dev_err(sc->sc_dev, CE_NOTE,
1570                     "Generated a random MAC address: %s",
1571                     ether_sprintf((struct ether_addr *)sc->sc_mac));
1572         }
1573 }
1574 
1575 /*
1576  * Virtqueue interrupt handlers
1577  */
1578 /* ARGSUSED */
1579 uint_t
1580 vioif_rx_handler(caddr_t arg1, caddr_t arg2)
1581 {
1582         struct virtio_softc *vsc = (void *) arg1;
1583         struct vioif_softc *sc = container_of(vsc,
1584             struct vioif_softc, sc_virtio);
1585 
1586         /*
1587          * The return values of these functions are not needed but they make
1588          * debugging interrupts simpler because you can use them to detect when
1589          * stuff was processed and repopulated in this handler.
1590          */
 
1623                 { NULL }
1624         };
1625 
1626         ret = virtio_register_ints(&sc->sc_virtio, NULL, vioif_vq_h);
1627 
1628         return (ret);
1629 }
1630 
1631 
1632 static void
1633 vioif_check_features(struct vioif_softc *sc)
1634 {
1635         if (vioif_has_feature(sc, VIRTIO_NET_F_CSUM)) {
1636                 /* The GSO/GRO featured depend on CSUM, check them here. */
1637                 sc->sc_tx_csum = 1;
1638                 sc->sc_rx_csum = 1;
1639 
1640                 if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
1641                         sc->sc_rx_csum = 0;
1642                 }
1643                 cmn_err(CE_NOTE, "Csum enabled.");
1644 
1645                 if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
1646 
1647                         sc->sc_tx_tso4 = 1;
1648                         /*
1649                          * We don't seem to have a way to ask the system
1650                          * not to send us LSO packets with Explicit
1651                          * Congestion Notification bit set, so we require
1652                          * the device to support it in order to do
1653                          * LSO.
1654                          */
1655                         if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
1656                                 dev_err(sc->sc_dev, CE_NOTE,
1657                                     "TSO4 supported, but not ECN. "
1658                                     "Not using LSO.");
1659                                 sc->sc_tx_tso4 = 0;
1660                         } else {
1661                                 cmn_err(CE_NOTE, "LSO enabled");
1662                         }
1663                 }
1664         }
1665 }
1666 
1667 static int
1668 vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1669 {
1670         int ret, instance;
1671         struct vioif_softc *sc;
1672         struct virtio_softc *vsc;
1673         mac_register_t *macp;
1674         char cache_name[CACHE_NAME_SIZE];
1675 
1676         instance = ddi_get_instance(devinfo);
1677 
1678         switch (cmd) {
1679         case DDI_ATTACH:
1680                 break;
1681 
 
1765                 sc->sc_ctrl_vq = virtio_alloc_vq(&sc->sc_virtio, 2,
1766                     VIOIF_CTRL_QLEN, 0, "ctrl");
1767                 if (!sc->sc_ctrl_vq) {
1768                         goto exit_alloc3;
1769                 }
1770                 virtio_stop_vq_intr(sc->sc_ctrl_vq);
1771         }
1772 
1773         virtio_set_status(&sc->sc_virtio,
1774             VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1775 
1776         sc->sc_rxloan = 0;
1777 
1778         /* set some reasonable-small default values */
1779         sc->sc_rxcopy_thresh = 300;
1780         sc->sc_txcopy_thresh = 300;
1781         sc->sc_mtu = ETHERMTU;
1782 
1783         vioif_check_features(sc);
1784 
1785         if (vioif_alloc_mems(sc))
1786                 goto exit_alloc_mems;
1787 
1788         if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
1789                 dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
1790                 goto exit_macalloc;
1791         }
1792 
1793         macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1794         macp->m_driver = sc;
1795         macp->m_dip = devinfo;
1796         macp->m_src_addr = sc->sc_mac;
1797         macp->m_callbacks = &vioif_m_callbacks;
1798         macp->m_min_sdu = 0;
1799         macp->m_max_sdu = sc->sc_mtu;
1800         macp->m_margin = VLAN_TAGSZ;
1801         macp->m_priv_props = vioif_priv_props;
1802 
1803         sc->sc_macp = macp;
1804 
1805         /* Pre-fill the rx ring. */
 
1853 static int
1854 vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1855 {
1856         struct vioif_softc *sc;
1857 
1858         if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1859                 return (DDI_FAILURE);
1860 
1861         switch (cmd) {
1862         case DDI_DETACH:
1863                 break;
1864 
1865         case DDI_PM_SUSPEND:
1866                 /* We do not support suspend/resume for vioif. */
1867                 return (DDI_FAILURE);
1868 
1869         default:
1870                 return (DDI_FAILURE);
1871         }
1872 
1873         if (sc->sc_rxloan) {
1874                 dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream,"
1875                     " not detaching.");
1876                 return (DDI_FAILURE);
1877         }
1878 
1879         virtio_stop_vq_intr(sc->sc_rx_vq);
1880         virtio_stop_vq_intr(sc->sc_tx_vq);
1881 
1882         virtio_release_ints(&sc->sc_virtio);
1883 
1884         if (mac_unregister(sc->sc_mac_handle)) {
1885                 return (DDI_FAILURE);
1886         }
1887 
1888         mac_free(sc->sc_macp);
1889 
1890         vioif_free_mems(sc);
1891         virtio_free_vq(sc->sc_rx_vq);
1892         virtio_free_vq(sc->sc_tx_vq);
1893 
 
 | 
   1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 
  12 /*
  13  * Copyright 2013 Nexenta Systems, Inc.  All rights reserved.
  14  * Copyright (c) 2014, 2016 by Delphix. All rights reserved.
  15  * Copyright 2015 Joyent, Inc.
  16  */
  17 
  18 /* Based on the NetBSD virtio driver by Minoura Makoto. */
  19 /*
  20  * Copyright (c) 2010 Minoura Makoto.
  21  * All rights reserved.
  22  *
  23  * Redistribution and use in source and binary forms, with or without
  24  * modification, are permitted provided that the following conditions
  25  * are met:
  26  * 1. Redistributions of source code must retain the above copyright
  27  *    notice, this list of conditions and the following disclaimer.
  28  * 2. Redistributions in binary form must reproduce the above copyright
  29  *    notice, this list of conditions and the following disclaimer in the
  30  *    documentation and/or other materials provided with the distribution.
  31  *
  32  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
  33  * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
  34  * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
  35  * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
 
 268 };
 269 
 270 struct vioif_softc {
 271         dev_info_t              *sc_dev; /* mirrors virtio_softc->sc_dev */
 272         struct virtio_softc     sc_virtio;
 273 
 274         mac_handle_t sc_mac_handle;
 275         mac_register_t *sc_macp;
 276 
 277         struct virtqueue        *sc_rx_vq;
 278         struct virtqueue        *sc_tx_vq;
 279         struct virtqueue        *sc_ctrl_vq;
 280 
 281         unsigned int            sc_tx_stopped:1;
 282 
 283         /* Feature bits. */
 284         unsigned int            sc_rx_csum:1;
 285         unsigned int            sc_tx_csum:1;
 286         unsigned int            sc_tx_tso4:1;
 287 
 288         /*
 289          * For debugging, it is useful to know whether the MAC address we
 290          * are using came from the host (via VIRTIO_NET_CONFIG_MAC) or
 291          * was otherwise generated or set from within the guest.
 292          */
 293         unsigned int            sc_mac_from_host:1;
 294 
 295         int                     sc_mtu;
 296         uint8_t                 sc_mac[ETHERADDRL];
 297         /*
 298          * For rx buffers, we keep a pointer array, because the buffers
 299          * can be loaned upstream, and we have to repopulate the array with
 300          * new members.
 301          */
 302         struct vioif_rx_buf     **sc_rxbufs;
 303 
 304         /*
 305          * For tx, we just allocate an array of buffers. The packet can
 306          * either be copied into the inline buffer, or the external mapping
 307          * could be used to map the packet
 308          */
 309         struct vioif_tx_buf     *sc_txbufs;
 310 
 311         kstat_t                 *sc_intrstat;
 312         /*
 313          * We "loan" rx buffers upstream and reuse them after they are
 314          * freed. This lets us avoid allocations in the hot path.
 315          */
 316         kmem_cache_t            *sc_rxbuf_cache;
 317         ulong_t                 sc_rxloan;
 318 
 319         /* Copying small packets turns out to be faster then mapping them. */
 320         unsigned long           sc_rxcopy_thresh;
 321         unsigned long           sc_txcopy_thresh;
 322 
 323         /*
 324          * Statistics visible through mac:
 325          */
 326         uint64_t                sc_ipackets;
 327         uint64_t                sc_opackets;
 328         uint64_t                sc_rbytes;
 329         uint64_t                sc_obytes;
 330         uint64_t                sc_brdcstxmt;
 331         uint64_t                sc_brdcstrcv;
 332         uint64_t                sc_multixmt;
 333         uint64_t                sc_multircv;
 334         uint64_t                sc_norecvbuf;
 335         uint64_t                sc_notxbuf;
 336         uint64_t                sc_ierrors;
 337         uint64_t                sc_oerrors;
 338 
 339         /*
 340          * Internal debugging statistics:
 341          */
 342         uint64_t                sc_rxfail_dma_handle;
 343         uint64_t                sc_rxfail_dma_buffer;
 344         uint64_t                sc_rxfail_dma_bind;
 345         uint64_t                sc_rxfail_chain_undersize;
 346         uint64_t                sc_rxfail_no_descriptors;
 347         uint64_t                sc_txfail_dma_handle;
 348         uint64_t                sc_txfail_dma_bind;
 349         uint64_t                sc_txfail_indirect_limit;
 350 };
 351 
 352 #define ETHER_HEADER_LEN                sizeof (struct ether_header)
 353 
 354 /* MTU + the ethernet header. */
 355 #define MAX_PAYLOAD     65535
 356 #define MAX_MTU         (MAX_PAYLOAD - ETHER_HEADER_LEN)
 357 #define DEFAULT_MTU     ETHERMTU
 358 
 359 /*
 360  * Yeah, we spend 8M per device. Turns out, there is no point
 361  * being smart and using merged rx buffers (VIRTIO_NET_F_MRG_RXBUF),
 362  * because vhost does not support them, and we expect to be used with
 363  * vhost in production environment.
 364  */
 365 /* The buffer keeps both the packet data and the virtio_net_header. */
 366 #define VIOIF_RX_SIZE (MAX_PAYLOAD + sizeof (struct virtio_net_hdr))
 367 
 368 /*
 369  * We win a bit on header alignment, but the host wins a lot
 
 479 static void
 480 vioif_rx_free(caddr_t free_arg)
 481 {
 482         struct vioif_rx_buf *buf = (void *) free_arg;
 483         struct vioif_softc *sc = buf->rb_sc;
 484 
 485         kmem_cache_free(sc->sc_rxbuf_cache, buf);
 486         atomic_dec_ulong(&sc->sc_rxloan);
 487 }
 488 
 489 static int
 490 vioif_rx_construct(void *buffer, void *user_arg, int kmflags)
 491 {
 492         _NOTE(ARGUNUSED(kmflags));
 493         struct vioif_softc *sc = user_arg;
 494         struct vioif_rx_buf *buf = buffer;
 495         size_t len;
 496 
 497         if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
 498             DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
 499                 sc->sc_rxfail_dma_handle++;
 500                 goto exit_handle;
 501         }
 502 
 503         if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
 504             VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
 505             &vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
 506             NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
 507                 sc->sc_rxfail_dma_buffer++;
 508                 goto exit_alloc;
 509         }
 510         ASSERT(len >= VIOIF_RX_SIZE);
 511 
 512         if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
 513             buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
 514             DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
 515             &buf->rb_mapping.vbm_ncookies)) {
 516                 sc->sc_rxfail_dma_bind++;
 517                 goto exit_bind;
 518         }
 519 
 520         ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
 521 
 522         buf->rb_sc = sc;
 523         buf->rb_frtn.free_arg = (void *) buf;
 524         buf->rb_frtn.free_func = vioif_rx_free;
 525 
 526         return (0);
 527 exit_bind:
 528         ddi_dma_mem_free(&buf->rb_mapping.vbm_acch);
 529 exit_alloc:
 530         ddi_dma_free_handle(&buf->rb_mapping.vbm_dmah);
 531 exit_handle:
 532 
 533         return (ENOMEM);
 534 }
 535 
 536 static void
 
 720         return (DDI_SUCCESS);
 721 }
 722 
 723 /* ARGSUSED */
 724 int
 725 vioif_unicst(void *arg, const uint8_t *macaddr)
 726 {
 727         return (DDI_FAILURE);
 728 }
 729 
 730 
 731 static uint_t
 732 vioif_add_rx(struct vioif_softc *sc, int kmflag)
 733 {
 734         uint_t num_added = 0;
 735         struct vq_entry *ve;
 736 
 737         while ((ve = vq_alloc_entry(sc->sc_rx_vq)) != NULL) {
 738                 struct vioif_rx_buf *buf = sc->sc_rxbufs[ve->qe_index];
 739 
 740                 if (buf == NULL) {
 741                         /* First run, allocate the buffer. */
 742                         buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
 743                         sc->sc_rxbufs[ve->qe_index] = buf;
 744                 }
 745 
 746                 /* Still nothing? Bye. */
 747                 if (buf == NULL) {
 748                         sc->sc_norecvbuf++;
 749                         vq_free_entry(sc->sc_rx_vq, ve);
 750                         break;
 751                 }
 752 
 753                 ASSERT(buf->rb_mapping.vbm_ncookies >= 1);
 754 
 755                 /*
 756                  * For an unknown reason, the virtio_net_hdr must be placed
 757                  * as a separate virtio queue entry.
 758                  */
 759                 virtio_ve_add_indirect_buf(ve,
 760                     buf->rb_mapping.vbm_dmac.dmac_laddress,
 761                     sizeof (struct virtio_net_hdr), B_FALSE);
 762 
 763                 /* Add the rest of the first cookie. */
 764                 virtio_ve_add_indirect_buf(ve,
 765                     buf->rb_mapping.vbm_dmac.dmac_laddress +
 766                     sizeof (struct virtio_net_hdr),
 767                     buf->rb_mapping.vbm_dmac.dmac_size -
 
 801                 virtio_sync_vq(sc->sc_rx_vq);
 802 
 803         return (num_added);
 804 }
 805 
 806 static uint_t
 807 vioif_process_rx(struct vioif_softc *sc)
 808 {
 809         struct vq_entry *ve;
 810         struct vioif_rx_buf *buf;
 811         mblk_t *mphead = NULL, *lastmp = NULL, *mp;
 812         uint32_t len;
 813         uint_t num_processed = 0;
 814 
 815         while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len))) {
 816 
 817                 buf = sc->sc_rxbufs[ve->qe_index];
 818                 ASSERT(buf);
 819 
 820                 if (len < sizeof (struct virtio_net_hdr)) {
 821                         sc->sc_rxfail_chain_undersize++;
 822                         sc->sc_ierrors++;
 823                         virtio_free_chain(ve);
 824                         continue;
 825                 }
 826 
 827                 len -= sizeof (struct virtio_net_hdr);
 828                 /*
 829                  * We copy small packets that happen to fit into a single
 830                  * cookie and reuse the buffers. For bigger ones, we loan
 831                  * the buffers upstream.
 832                  */
 833                 if (len < sc->sc_rxcopy_thresh) {
 834                         mp = allocb(len, 0);
 835                         if (mp == NULL) {
 836                                 sc->sc_norecvbuf++;
 837                                 sc->sc_ierrors++;
 838 
 839                                 virtio_free_chain(ve);
 840                                 break;
 841                         }
 842 
 843                         bcopy((char *)buf->rb_mapping.vbm_buf +
 844                             sizeof (struct virtio_net_hdr), mp->b_rptr, len);
 845                         mp->b_wptr = mp->b_rptr + len;
 846 
 847                 } else {
 848                         mp = desballoc((unsigned char *)
 849                             buf->rb_mapping.vbm_buf +
 850                             sizeof (struct virtio_net_hdr) +
 851                             VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
 852                         if (mp == NULL) {
 853                                 sc->sc_norecvbuf++;
 854                                 sc->sc_ierrors++;
 855 
 856                                 virtio_free_chain(ve);
 857                                 break;
 858                         }
 859                         mp->b_wptr = mp->b_rptr + len;
 860 
 861                         atomic_inc_ulong(&sc->sc_rxloan);
 862                         /*
 863                          * Buffer loaned, we will have to allocate a new one
 864                          * for this slot.
 865                          */
 866                         sc->sc_rxbufs[ve->qe_index] = NULL;
 867                 }
 868 
 869                 /*
 870                  * virtio-net does not tell us if this packet is multicast
 871                  * or broadcast, so we have to check it.
 872                  */
 
 898         return (num_processed);
 899 }
 900 
 901 static uint_t
 902 vioif_reclaim_used_tx(struct vioif_softc *sc)
 903 {
 904         struct vq_entry *ve;
 905         struct vioif_tx_buf *buf;
 906         uint32_t len;
 907         mblk_t *mp;
 908         uint_t num_reclaimed = 0;
 909 
 910         while ((ve = virtio_pull_chain(sc->sc_tx_vq, &len))) {
 911                 /* We don't chain descriptors for tx, so don't expect any. */
 912                 ASSERT(!ve->qe_next);
 913 
 914                 buf = &sc->sc_txbufs[ve->qe_index];
 915                 mp = buf->tb_mp;
 916                 buf->tb_mp = NULL;
 917 
 918                 if (mp != NULL) {
 919                         for (int i = 0; i < buf->tb_external_num; i++)
 920                                 (void) ddi_dma_unbind_handle(
 921                                     buf->tb_external_mapping[i].vbm_dmah);
 922                 }
 923 
 924                 virtio_free_chain(ve);
 925 
 926                 /* External mapping used, mp was not freed in vioif_send() */
 927                 if (mp != NULL)
 928                         freemsg(mp);
 929                 num_reclaimed++;
 930         }
 931 
 932         if (sc->sc_tx_stopped && num_reclaimed > 0) {
 933                 sc->sc_tx_stopped = 0;
 934                 mac_tx_update(sc->sc_mac_handle);
 935         }
 936 
 937         return (num_reclaimed);
 938 }
 939 
 940 /* sc will be used to update stat counters. */
 941 /* ARGSUSED */
 942 static inline void
 943 vioif_tx_inline(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
 944     size_t msg_size)
 945 {
 946         struct vioif_tx_buf *buf;
 947         buf = &sc->sc_txbufs[ve->qe_index];
 
 951         /* Frees mp */
 952         mcopymsg(mp, buf->tb_inline_mapping.vbm_buf +
 953             sizeof (struct virtio_net_hdr));
 954 
 955         virtio_ve_add_indirect_buf(ve,
 956             buf->tb_inline_mapping.vbm_dmac.dmac_laddress +
 957             sizeof (struct virtio_net_hdr), msg_size, B_TRUE);
 958 }
 959 
 960 static inline int
 961 vioif_tx_lazy_handle_alloc(struct vioif_softc *sc, struct vioif_tx_buf *buf,
 962     int i)
 963 {
 964         int ret = DDI_SUCCESS;
 965 
 966         if (!buf->tb_external_mapping[i].vbm_dmah) {
 967                 ret = ddi_dma_alloc_handle(sc->sc_dev,
 968                     &vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
 969                     &buf->tb_external_mapping[i].vbm_dmah);
 970                 if (ret != DDI_SUCCESS) {
 971                         sc->sc_txfail_dma_handle++;
 972                 }
 973         }
 974 
 975         return (ret);
 976 }
 977 
 978 static inline int
 979 vioif_tx_external(struct vioif_softc *sc, struct vq_entry *ve, mblk_t *mp,
 980     size_t msg_size)
 981 {
 982         _NOTE(ARGUNUSED(msg_size));
 983 
 984         struct vioif_tx_buf *buf;
 985         mblk_t *nmp;
 986         int i, j;
 987         int ret = DDI_SUCCESS;
 988 
 989         buf = &sc->sc_txbufs[ve->qe_index];
 990 
 991         ASSERT(buf);
 
1005                  * actually send us zero-length fragments.
1006                  */
1007                 if (len == 0) {
1008                         nmp = nmp->b_cont;
1009                         continue;
1010                 }
1011 
1012                 ret = vioif_tx_lazy_handle_alloc(sc, buf, i);
1013                 if (ret != DDI_SUCCESS) {
1014                         sc->sc_notxbuf++;
1015                         sc->sc_oerrors++;
1016                         goto exit_lazy_alloc;
1017                 }
1018                 ret = ddi_dma_addr_bind_handle(
1019                     buf->tb_external_mapping[i].vbm_dmah, NULL,
1020                     (caddr_t)nmp->b_rptr, len,
1021                     DDI_DMA_WRITE | DDI_DMA_STREAMING,
1022                     DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
1023 
1024                 if (ret != DDI_SUCCESS) {
1025                         sc->sc_txfail_dma_bind++;
1026                         sc->sc_oerrors++;
1027                         goto exit_bind;
1028                 }
1029 
1030                 /* Check if we still fit into the indirect table. */
1031                 if (virtio_ve_indirect_available(ve) < ncookies) {
1032                         sc->sc_txfail_indirect_limit++;
1033                         sc->sc_notxbuf++;
1034                         sc->sc_oerrors++;
1035 
1036                         ret = DDI_FAILURE;
1037                         goto exit_limit;
1038                 }
1039 
1040                 virtio_ve_add_cookie(ve, buf->tb_external_mapping[i].vbm_dmah,
1041                     dmac, ncookies, B_TRUE);
1042 
1043                 nmp = nmp->b_cont;
1044                 i++;
1045         }
1046 
1047         buf->tb_external_num = i;
1048         /* Save the mp to free it when the packet is sent. */
1049         buf->tb_mp = mp;
1050 
1051         return (DDI_SUCCESS);
1052 
 
1071         size_t msg_size = 0;
1072         uint32_t csum_start;
1073         uint32_t csum_stuff;
1074         uint32_t csum_flags;
1075         uint32_t lso_flags;
1076         uint32_t lso_mss;
1077         mblk_t *nmp;
1078         int ret;
1079         boolean_t lso_required = B_FALSE;
1080 
1081         for (nmp = mp; nmp; nmp = nmp->b_cont)
1082                 msg_size += MBLKL(nmp);
1083 
1084         if (sc->sc_tx_tso4) {
1085                 mac_lso_get(mp, &lso_mss, &lso_flags);
1086                 lso_required = (lso_flags & HW_LSO);
1087         }
1088 
1089         ve = vq_alloc_entry(sc->sc_tx_vq);
1090 
1091         if (ve == NULL) {
1092                 sc->sc_notxbuf++;
1093                 /* Out of free descriptors - try later. */
1094                 return (B_FALSE);
1095         }
1096         buf = &sc->sc_txbufs[ve->qe_index];
1097 
1098         /* Use the inline buffer of the first entry for the virtio_net_hdr. */
1099         (void) memset(buf->tb_inline_mapping.vbm_buf, 0,
1100             sizeof (struct virtio_net_hdr));
1101 
1102         net_header = (struct virtio_net_hdr *)buf->tb_inline_mapping.vbm_buf;
1103 
1104         mac_hcksum_get(mp, &csum_start, &csum_stuff, NULL,
1105             NULL, &csum_flags);
1106 
1107         /* They want us to do the TCP/UDP csum calculation. */
1108         if (csum_flags & HCK_PARTIALCKSUM) {
1109                 struct ether_header *eth_header;
1110                 int eth_hsize;
1111 
 
1189                 mp->b_next = NULL;
1190 
1191                 if (!vioif_send(sc, mp)) {
1192                         sc->sc_tx_stopped = 1;
1193                         mp->b_next = nmp;
1194                         break;
1195                 }
1196                 mp = nmp;
1197         }
1198 
1199         return (mp);
1200 }
1201 
1202 int
1203 vioif_start(void *arg)
1204 {
1205         struct vioif_softc *sc = arg;
1206         struct vq_entry *ve;
1207         uint32_t len;
1208 
1209         mac_link_update(sc->sc_mac_handle, vioif_link_state(sc));
1210 
1211         virtio_start_vq_intr(sc->sc_rx_vq);
1212 
1213         /*
1214          * Don't start interrupts on sc_tx_vq. We use VIRTIO_F_NOTIFY_ON_EMPTY,
1215          * so the device will send a transmit interrupt when the queue is empty
1216          * and we can reclaim it in one sweep.
1217          */
1218 
1219         /*
1220          * Clear any data that arrived early on the receive queue and populate
1221          * it with free buffers that the device can use moving forward.
1222          */
1223         while ((ve = virtio_pull_chain(sc->sc_rx_vq, &len)) != NULL) {
1224                 virtio_free_chain(ve);
1225         }
1226         (void) vioif_populate_rx(sc, KM_SLEEP);
1227 
1228         return (DDI_SUCCESS);
1229 }
 
1405         }
1406         return (err);
1407 }
1408 
1409 static void
1410 vioif_propinfo(void *arg, const char *pr_name, mac_prop_id_t pr_num,
1411     mac_prop_info_handle_t prh)
1412 {
1413         struct vioif_softc *sc = arg;
1414         char valstr[64];
1415         int value;
1416 
1417         switch (pr_num) {
1418         case MAC_PROP_MTU:
1419                 mac_prop_info_set_range_uint32(prh, ETHERMIN, MAX_MTU);
1420                 break;
1421 
1422         case MAC_PROP_PRIVATE:
1423                 bzero(valstr, sizeof (valstr));
1424                 if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
1425                         value = sc->sc_txcopy_thresh;
1426                 } else if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
1427                         value = sc->sc_rxcopy_thresh;
1428                 } else {
1429                         return;
1430                 }
1431                 (void) snprintf(valstr, sizeof (valstr), "%d", value);
1432                 break;
1433 
1434         default:
1435                 break;
1436         }
1437 }
1438 
1439 static boolean_t
1440 vioif_getcapab(void *arg, mac_capab_t cap, void *cap_data)
1441 {
1442         struct vioif_softc *sc = arg;
1443 
1444         switch (cap) {
1445         case MAC_CAPAB_HCKSUM:
1446                 if (sc->sc_tx_csum) {
 
1482         .mc_close       = NULL,         /* mc_close */
1483         .mc_setprop     = vioif_setprop,
1484         .mc_getprop     = vioif_getprop,
1485         .mc_propinfo    = vioif_propinfo,
1486 };
1487 
1488 static void
1489 vioif_show_features(struct vioif_softc *sc, const char *prefix,
1490     uint32_t features)
1491 {
1492         char buf[512];
1493         char *bufp = buf;
1494         char *bufend = buf + sizeof (buf);
1495 
1496         /* LINTED E_PTRDIFF_OVERFLOW */
1497         bufp += snprintf(bufp, bufend - bufp, prefix);
1498         /* LINTED E_PTRDIFF_OVERFLOW */
1499         bufp += virtio_show_features(features, bufp, bufend - bufp);
1500         *bufp = '\0';
1501 
1502         /* Using '!' to only CE_NOTE this to the system log. */
1503         dev_err(sc->sc_dev, CE_NOTE, "!%s Vioif (%b)", buf, features,
1504             VIRTIO_NET_FEATURE_BITS);
1505 }
1506 
1507 /*
1508  * Find out which features are supported by the device and
1509  * choose which ones we wish to use.
1510  */
1511 static int
1512 vioif_dev_features(struct vioif_softc *sc)
1513 {
1514         uint32_t host_features;
1515 
1516         host_features = virtio_negotiate_features(&sc->sc_virtio,
1517             VIRTIO_NET_F_CSUM |
1518             VIRTIO_NET_F_HOST_TSO4 |
1519             VIRTIO_NET_F_HOST_ECN |
1520             VIRTIO_NET_F_MAC |
1521             VIRTIO_NET_F_STATUS |
1522             VIRTIO_F_RING_INDIRECT_DESC |
1523             VIRTIO_F_NOTIFY_ON_EMPTY);
1524 
1525         vioif_show_features(sc, "Host features: ", host_features);
1526         vioif_show_features(sc, "Negotiated features: ",
1527             sc->sc_virtio.sc_features);
1528 
1529         if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
1530                 dev_err(sc->sc_dev, CE_WARN,
1531                     "Host does not support RING_INDIRECT_DESC. Cannot attach.");
1532                 return (DDI_FAILURE);
1533         }
1534 
1535         return (DDI_SUCCESS);
1536 }
1537 
1538 static int
1539 vioif_has_feature(struct vioif_softc *sc, uint32_t feature)
1540 {
1541         return (virtio_has_feature(&sc->sc_virtio, feature));
1542 }
1543 
1544 static void
1545 vioif_set_mac(struct vioif_softc *sc)
1546 {
1547         int i;
1548 
1549         for (i = 0; i < ETHERADDRL; i++) {
1550                 virtio_write_device_config_1(&sc->sc_virtio,
1551                     VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
1552         }
1553         sc->sc_mac_from_host = 0;
1554 }
1555 
1556 /* Get the mac address out of the hardware, or make up one. */
1557 static void
1558 vioif_get_mac(struct vioif_softc *sc)
1559 {
1560         int i;
1561         if (sc->sc_virtio.sc_features & VIRTIO_NET_F_MAC) {
1562                 for (i = 0; i < ETHERADDRL; i++) {
1563                         sc->sc_mac[i] = virtio_read_device_config_1(
1564                             &sc->sc_virtio,
1565                             VIRTIO_NET_CONFIG_MAC + i);
1566                 }
1567                 sc->sc_mac_from_host = 1;
1568         } else {
1569                 /* Get a few random bytes */
1570                 (void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
1571                 /* Make sure it's a unicast MAC */
1572                 sc->sc_mac[0] &= ~1;
1573                 /* Set the "locally administered" bit */
1574                 sc->sc_mac[1] |= 2;
1575 
1576                 vioif_set_mac(sc);
1577 
1578                 dev_err(sc->sc_dev, CE_NOTE,
1579                     "!Generated a random MAC address: %s",
1580                     ether_sprintf((struct ether_addr *)sc->sc_mac));
1581         }
1582 }
1583 
1584 /*
1585  * Virtqueue interrupt handlers
1586  */
1587 /* ARGSUSED */
1588 uint_t
1589 vioif_rx_handler(caddr_t arg1, caddr_t arg2)
1590 {
1591         struct virtio_softc *vsc = (void *) arg1;
1592         struct vioif_softc *sc = container_of(vsc,
1593             struct vioif_softc, sc_virtio);
1594 
1595         /*
1596          * The return values of these functions are not needed but they make
1597          * debugging interrupts simpler because you can use them to detect when
1598          * stuff was processed and repopulated in this handler.
1599          */
 
1632                 { NULL }
1633         };
1634 
1635         ret = virtio_register_ints(&sc->sc_virtio, NULL, vioif_vq_h);
1636 
1637         return (ret);
1638 }
1639 
1640 
1641 static void
1642 vioif_check_features(struct vioif_softc *sc)
1643 {
1644         if (vioif_has_feature(sc, VIRTIO_NET_F_CSUM)) {
1645                 /* The GSO/GRO featured depend on CSUM, check them here. */
1646                 sc->sc_tx_csum = 1;
1647                 sc->sc_rx_csum = 1;
1648 
1649                 if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
1650                         sc->sc_rx_csum = 0;
1651                 }
1652                 dev_err(sc->sc_dev, CE_NOTE, "!Csum enabled.");
1653 
1654                 if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
1655 
1656                         sc->sc_tx_tso4 = 1;
1657                         /*
1658                          * We don't seem to have a way to ask the system
1659                          * not to send us LSO packets with Explicit
1660                          * Congestion Notification bit set, so we require
1661                          * the device to support it in order to do
1662                          * LSO.
1663                          */
1664                         if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
1665                                 dev_err(sc->sc_dev, CE_NOTE,
1666                                     "!TSO4 supported, but not ECN. "
1667                                     "Not using LSO.");
1668                                 sc->sc_tx_tso4 = 0;
1669                         } else {
1670                                 dev_err(sc->sc_dev, CE_NOTE, "!LSO enabled");
1671                         }
1672                 }
1673         }
1674 }
1675 
1676 static int
1677 vioif_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
1678 {
1679         int ret, instance;
1680         struct vioif_softc *sc;
1681         struct virtio_softc *vsc;
1682         mac_register_t *macp;
1683         char cache_name[CACHE_NAME_SIZE];
1684 
1685         instance = ddi_get_instance(devinfo);
1686 
1687         switch (cmd) {
1688         case DDI_ATTACH:
1689                 break;
1690 
 
1774                 sc->sc_ctrl_vq = virtio_alloc_vq(&sc->sc_virtio, 2,
1775                     VIOIF_CTRL_QLEN, 0, "ctrl");
1776                 if (!sc->sc_ctrl_vq) {
1777                         goto exit_alloc3;
1778                 }
1779                 virtio_stop_vq_intr(sc->sc_ctrl_vq);
1780         }
1781 
1782         virtio_set_status(&sc->sc_virtio,
1783             VIRTIO_CONFIG_DEVICE_STATUS_DRIVER_OK);
1784 
1785         sc->sc_rxloan = 0;
1786 
1787         /* set some reasonable-small default values */
1788         sc->sc_rxcopy_thresh = 300;
1789         sc->sc_txcopy_thresh = 300;
1790         sc->sc_mtu = ETHERMTU;
1791 
1792         vioif_check_features(sc);
1793 
1794         if (vioif_alloc_mems(sc) != 0)
1795                 goto exit_alloc_mems;
1796 
1797         if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
1798                 dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
1799                 goto exit_macalloc;
1800         }
1801 
1802         macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1803         macp->m_driver = sc;
1804         macp->m_dip = devinfo;
1805         macp->m_src_addr = sc->sc_mac;
1806         macp->m_callbacks = &vioif_m_callbacks;
1807         macp->m_min_sdu = 0;
1808         macp->m_max_sdu = sc->sc_mtu;
1809         macp->m_margin = VLAN_TAGSZ;
1810         macp->m_priv_props = vioif_priv_props;
1811 
1812         sc->sc_macp = macp;
1813 
1814         /* Pre-fill the rx ring. */
 
1862 static int
1863 vioif_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1864 {
1865         struct vioif_softc *sc;
1866 
1867         if ((sc = ddi_get_driver_private(devinfo)) == NULL)
1868                 return (DDI_FAILURE);
1869 
1870         switch (cmd) {
1871         case DDI_DETACH:
1872                 break;
1873 
1874         case DDI_PM_SUSPEND:
1875                 /* We do not support suspend/resume for vioif. */
1876                 return (DDI_FAILURE);
1877 
1878         default:
1879                 return (DDI_FAILURE);
1880         }
1881 
1882         if (sc->sc_rxloan > 0) {
1883                 dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream,"
1884                     " not detaching.");
1885                 return (DDI_FAILURE);
1886         }
1887 
1888         virtio_stop_vq_intr(sc->sc_rx_vq);
1889         virtio_stop_vq_intr(sc->sc_tx_vq);
1890 
1891         virtio_release_ints(&sc->sc_virtio);
1892 
1893         if (mac_unregister(sc->sc_mac_handle)) {
1894                 return (DDI_FAILURE);
1895         }
1896 
1897         mac_free(sc->sc_macp);
1898 
1899         vioif_free_mems(sc);
1900         virtio_free_vq(sc->sc_rx_vq);
1901         virtio_free_vq(sc->sc_tx_vq);
1902 
 
 |