Print this page
MFV: illumos-gate@54b146cf23443d91aef04e2d2a59b7434add3030
7096 vioif should not log to the console on boot, or ever
Reviewed by: Alexander Pyhalov <apyhalov@gmail.com>
Reviewed by: Andy Stormont <astormont@racktopsystems.com>
Reviewed by: Igor Kozhukhov <igor@dilos.org>
Reviewed by: Toomas Soome <tsoome@me.com>
Approved by: Dan McDonald <danmcd@joyent.com>
Author: Joshua M. Clulow <jmc@joyent.com>
OS-76 vioif kernel heap corruption, NULL pointer dereference and mtu problem
port of illumos-3644
3644 Add virtio-net support into the Illumos
Reviewed by: Alexey Zaytsev <alexey.zaytsev@gmail.com>
Reviewed by: Yuri Pankov <yuri.pankov@nexenta.com>
Reviewed by: David Hoppner <0xffea@gmail.com>
*** 8,19 ****
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*/
/*
! * Copyright 2013 Nexenta Inc. All rights reserved.
* Copyright (c) 2014, 2016 by Delphix. All rights reserved.
*/
/* Based on the NetBSD virtio driver by Minoura Makoto. */
/*
* Copyright (c) 2010 Minoura Makoto.
--- 8,20 ----
* source. A copy of the CDDL is also available via the Internet at
* http://www.illumos.org/license/CDDL.
*/
/*
! * Copyright 2013 Nexenta Systems, Inc. All rights reserved.
* Copyright (c) 2014, 2016 by Delphix. All rights reserved.
+ * Copyright 2015 Joyent, Inc.
*/
/* Based on the NetBSD virtio driver by Minoura Makoto. */
/*
* Copyright (c) 2010 Minoura Makoto.
*** 282,291 ****
--- 283,299 ----
/* Feature bits. */
unsigned int sc_rx_csum:1;
unsigned int sc_tx_csum:1;
unsigned int sc_tx_tso4:1;
+ /*
+ * For debugging, it is useful to know whether the MAC address we
+ * are using came from the host (via VIRTIO_NET_CONFIG_MAC) or
+ * was otherwise generated or set from within the guest.
+ */
+ unsigned int sc_mac_from_host:1;
+
int sc_mtu;
uint8_t sc_mac[ETHERADDRL];
/*
* For rx buffers, we keep a pointer array, because the buffers
* can be loaned upstream, and we have to repopulate the array with
*** 309,319 ****
ulong_t sc_rxloan;
/* Copying small packets turns out to be faster then mapping them. */
unsigned long sc_rxcopy_thresh;
unsigned long sc_txcopy_thresh;
! /* Some statistic coming here */
uint64_t sc_ipackets;
uint64_t sc_opackets;
uint64_t sc_rbytes;
uint64_t sc_obytes;
uint64_t sc_brdcstxmt;
--- 317,330 ----
ulong_t sc_rxloan;
/* Copying small packets turns out to be faster then mapping them. */
unsigned long sc_rxcopy_thresh;
unsigned long sc_txcopy_thresh;
!
! /*
! * Statistics visible through mac:
! */
uint64_t sc_ipackets;
uint64_t sc_opackets;
uint64_t sc_rbytes;
uint64_t sc_obytes;
uint64_t sc_brdcstxmt;
*** 322,331 ****
--- 333,354 ----
uint64_t sc_multircv;
uint64_t sc_norecvbuf;
uint64_t sc_notxbuf;
uint64_t sc_ierrors;
uint64_t sc_oerrors;
+
+ /*
+ * Internal debugging statistics:
+ */
+ uint64_t sc_rxfail_dma_handle;
+ uint64_t sc_rxfail_dma_buffer;
+ uint64_t sc_rxfail_dma_bind;
+ uint64_t sc_rxfail_chain_undersize;
+ uint64_t sc_rxfail_no_descriptors;
+ uint64_t sc_txfail_dma_handle;
+ uint64_t sc_txfail_dma_bind;
+ uint64_t sc_txfail_indirect_limit;
};
#define ETHER_HEADER_LEN sizeof (struct ether_header)
/* MTU + the ethernet header. */
*** 471,501 ****
struct vioif_rx_buf *buf = buffer;
size_t len;
if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
! dev_err(sc->sc_dev, CE_WARN,
! "Can't allocate dma handle for rx buffer");
goto exit_handle;
}
if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
&vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
! dev_err(sc->sc_dev, CE_WARN,
! "Can't allocate rx buffer");
goto exit_alloc;
}
ASSERT(len >= VIOIF_RX_SIZE);
if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
&buf->rb_mapping.vbm_ncookies)) {
! dev_err(sc->sc_dev, CE_WARN, "Can't bind tx buffer");
!
goto exit_bind;
}
ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
--- 494,521 ----
struct vioif_rx_buf *buf = buffer;
size_t len;
if (ddi_dma_alloc_handle(sc->sc_dev, &vioif_mapped_buf_dma_attr,
DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmah)) {
! sc->sc_rxfail_dma_handle++;
goto exit_handle;
}
if (ddi_dma_mem_alloc(buf->rb_mapping.vbm_dmah,
VIOIF_RX_SIZE + sizeof (struct virtio_net_hdr),
&vioif_bufattr, DDI_DMA_STREAMING, DDI_DMA_SLEEP,
NULL, &buf->rb_mapping.vbm_buf, &len, &buf->rb_mapping.vbm_acch)) {
! sc->sc_rxfail_dma_buffer++;
goto exit_alloc;
}
ASSERT(len >= VIOIF_RX_SIZE);
if (ddi_dma_addr_bind_handle(buf->rb_mapping.vbm_dmah, NULL,
buf->rb_mapping.vbm_buf, len, DDI_DMA_READ | DDI_DMA_STREAMING,
DDI_DMA_SLEEP, NULL, &buf->rb_mapping.vbm_dmac,
&buf->rb_mapping.vbm_ncookies)) {
! sc->sc_rxfail_dma_bind++;
goto exit_bind;
}
ASSERT(buf->rb_mapping.vbm_ncookies <= VIOIF_INDIRECT_MAX);
*** 715,734 ****
struct vq_entry *ve;
while ((ve = vq_alloc_entry(sc->sc_rx_vq)) != NULL) {
struct vioif_rx_buf *buf = sc->sc_rxbufs[ve->qe_index];
! if (!buf) {
/* First run, allocate the buffer. */
buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
sc->sc_rxbufs[ve->qe_index] = buf;
}
/* Still nothing? Bye. */
! if (!buf) {
! dev_err(sc->sc_dev, CE_WARN,
! "Can't allocate rx buffer");
sc->sc_norecvbuf++;
vq_free_entry(sc->sc_rx_vq, ve);
break;
}
--- 735,752 ----
struct vq_entry *ve;
while ((ve = vq_alloc_entry(sc->sc_rx_vq)) != NULL) {
struct vioif_rx_buf *buf = sc->sc_rxbufs[ve->qe_index];
! if (buf == NULL) {
/* First run, allocate the buffer. */
buf = kmem_cache_alloc(sc->sc_rxbuf_cache, kmflag);
sc->sc_rxbufs[ve->qe_index] = buf;
}
/* Still nothing? Bye. */
! if (buf == NULL) {
sc->sc_norecvbuf++;
vq_free_entry(sc->sc_rx_vq, ve);
break;
}
*** 798,809 ****
buf = sc->sc_rxbufs[ve->qe_index];
ASSERT(buf);
if (len < sizeof (struct virtio_net_hdr)) {
! dev_err(sc->sc_dev, CE_WARN, "RX: Cnain too small: %u",
! len - (uint32_t)sizeof (struct virtio_net_hdr));
sc->sc_ierrors++;
virtio_free_chain(ve);
continue;
}
--- 816,826 ----
buf = sc->sc_rxbufs[ve->qe_index];
ASSERT(buf);
if (len < sizeof (struct virtio_net_hdr)) {
! sc->sc_rxfail_chain_undersize++;
sc->sc_ierrors++;
virtio_free_chain(ve);
continue;
}
*** 813,823 ****
* cookie and reuse the buffers. For bigger ones, we loan
* the buffers upstream.
*/
if (len < sc->sc_rxcopy_thresh) {
mp = allocb(len, 0);
! if (!mp) {
sc->sc_norecvbuf++;
sc->sc_ierrors++;
virtio_free_chain(ve);
break;
--- 830,840 ----
* cookie and reuse the buffers. For bigger ones, we loan
* the buffers upstream.
*/
if (len < sc->sc_rxcopy_thresh) {
mp = allocb(len, 0);
! if (mp == NULL) {
sc->sc_norecvbuf++;
sc->sc_ierrors++;
virtio_free_chain(ve);
break;
*** 830,840 ****
} else {
mp = desballoc((unsigned char *)
buf->rb_mapping.vbm_buf +
sizeof (struct virtio_net_hdr) +
VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
! if (!mp) {
sc->sc_norecvbuf++;
sc->sc_ierrors++;
virtio_free_chain(ve);
break;
--- 847,857 ----
} else {
mp = desballoc((unsigned char *)
buf->rb_mapping.vbm_buf +
sizeof (struct virtio_net_hdr) +
VIOIF_IP_ALIGN, len, 0, &buf->rb_frtn);
! if (mp == NULL) {
sc->sc_norecvbuf++;
sc->sc_ierrors++;
virtio_free_chain(ve);
break;
*** 896,915 ****
buf = &sc->sc_txbufs[ve->qe_index];
mp = buf->tb_mp;
buf->tb_mp = NULL;
! if (mp) {
for (int i = 0; i < buf->tb_external_num; i++)
(void) ddi_dma_unbind_handle(
buf->tb_external_mapping[i].vbm_dmah);
}
virtio_free_chain(ve);
/* External mapping used, mp was not freed in vioif_send() */
! if (mp)
freemsg(mp);
num_reclaimed++;
}
if (sc->sc_tx_stopped && num_reclaimed > 0) {
--- 913,932 ----
buf = &sc->sc_txbufs[ve->qe_index];
mp = buf->tb_mp;
buf->tb_mp = NULL;
! if (mp != NULL) {
for (int i = 0; i < buf->tb_external_num; i++)
(void) ddi_dma_unbind_handle(
buf->tb_external_mapping[i].vbm_dmah);
}
virtio_free_chain(ve);
/* External mapping used, mp was not freed in vioif_send() */
! if (mp != NULL)
freemsg(mp);
num_reclaimed++;
}
if (sc->sc_tx_stopped && num_reclaimed > 0) {
*** 949,960 ****
if (!buf->tb_external_mapping[i].vbm_dmah) {
ret = ddi_dma_alloc_handle(sc->sc_dev,
&vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
&buf->tb_external_mapping[i].vbm_dmah);
if (ret != DDI_SUCCESS) {
! dev_err(sc->sc_dev, CE_WARN,
! "Can't allocate dma handle for external tx buffer");
}
}
return (ret);
}
--- 966,976 ----
if (!buf->tb_external_mapping[i].vbm_dmah) {
ret = ddi_dma_alloc_handle(sc->sc_dev,
&vioif_mapped_buf_dma_attr, DDI_DMA_SLEEP, NULL,
&buf->tb_external_mapping[i].vbm_dmah);
if (ret != DDI_SUCCESS) {
! sc->sc_txfail_dma_handle++;
}
}
return (ret);
}
*** 1004,1024 ****
(caddr_t)nmp->b_rptr, len,
DDI_DMA_WRITE | DDI_DMA_STREAMING,
DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
if (ret != DDI_SUCCESS) {
sc->sc_oerrors++;
- dev_err(sc->sc_dev, CE_NOTE,
- "TX: Failed to bind external handle");
goto exit_bind;
}
/* Check if we still fit into the indirect table. */
if (virtio_ve_indirect_available(ve) < ncookies) {
! dev_err(sc->sc_dev, CE_NOTE,
! "TX: Indirect descriptor table limit reached."
! " It took %d fragments.", i);
sc->sc_notxbuf++;
sc->sc_oerrors++;
ret = DDI_FAILURE;
goto exit_limit;
--- 1020,1037 ----
(caddr_t)nmp->b_rptr, len,
DDI_DMA_WRITE | DDI_DMA_STREAMING,
DDI_DMA_SLEEP, NULL, &dmac, &ncookies);
if (ret != DDI_SUCCESS) {
+ sc->sc_txfail_dma_bind++;
sc->sc_oerrors++;
goto exit_bind;
}
/* Check if we still fit into the indirect table. */
if (virtio_ve_indirect_available(ve) < ncookies) {
! sc->sc_txfail_indirect_limit++;
sc->sc_notxbuf++;
sc->sc_oerrors++;
ret = DDI_FAILURE;
goto exit_limit;
*** 1073,1083 ****
lso_required = (lso_flags & HW_LSO);
}
ve = vq_alloc_entry(sc->sc_tx_vq);
! if (!ve) {
sc->sc_notxbuf++;
/* Out of free descriptors - try later. */
return (B_FALSE);
}
buf = &sc->sc_txbufs[ve->qe_index];
--- 1086,1096 ----
lso_required = (lso_flags & HW_LSO);
}
ve = vq_alloc_entry(sc->sc_tx_vq);
! if (ve == NULL) {
sc->sc_notxbuf++;
/* Out of free descriptors - try later. */
return (B_FALSE);
}
buf = &sc->sc_txbufs[ve->qe_index];
*** 1191,1202 ****
{
struct vioif_softc *sc = arg;
struct vq_entry *ve;
uint32_t len;
! mac_link_update(sc->sc_mac_handle,
! vioif_link_state(sc));
virtio_start_vq_intr(sc->sc_rx_vq);
/*
* Don't start interrupts on sc_tx_vq. We use VIRTIO_F_NOTIFY_ON_EMPTY,
--- 1204,1214 ----
{
struct vioif_softc *sc = arg;
struct vq_entry *ve;
uint32_t len;
! mac_link_update(sc->sc_mac_handle, vioif_link_state(sc));
virtio_start_vq_intr(sc->sc_rx_vq);
/*
* Don't start interrupts on sc_tx_vq. We use VIRTIO_F_NOTIFY_ON_EMPTY,
*** 1408,1421 ****
break;
case MAC_PROP_PRIVATE:
bzero(valstr, sizeof (valstr));
if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
-
value = sc->sc_txcopy_thresh;
! } else if (strcmp(pr_name,
! vioif_rxcopy_thresh) == 0) {
value = sc->sc_rxcopy_thresh;
} else {
return;
}
(void) snprintf(valstr, sizeof (valstr), "%d", value);
--- 1420,1431 ----
break;
case MAC_PROP_PRIVATE:
bzero(valstr, sizeof (valstr));
if (strcmp(pr_name, vioif_txcopy_thresh) == 0) {
value = sc->sc_txcopy_thresh;
! } else if (strcmp(pr_name, vioif_rxcopy_thresh) == 0) {
value = sc->sc_rxcopy_thresh;
} else {
return;
}
(void) snprintf(valstr, sizeof (valstr), "%d", value);
*** 1487,1497 ****
bufp += snprintf(bufp, bufend - bufp, prefix);
/* LINTED E_PTRDIFF_OVERFLOW */
bufp += virtio_show_features(features, bufp, bufend - bufp);
*bufp = '\0';
-
/* Using '!' to only CE_NOTE this to the system log. */
dev_err(sc->sc_dev, CE_NOTE, "!%s Vioif (%b)", buf, features,
VIRTIO_NET_FEATURE_BITS);
}
--- 1497,1506 ----
*** 1516,1527 ****
vioif_show_features(sc, "Host features: ", host_features);
vioif_show_features(sc, "Negotiated features: ",
sc->sc_virtio.sc_features);
if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
! dev_err(sc->sc_dev, CE_NOTE,
! "Host does not support RING_INDIRECT_DESC, bye.");
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
--- 1525,1536 ----
vioif_show_features(sc, "Host features: ", host_features);
vioif_show_features(sc, "Negotiated features: ",
sc->sc_virtio.sc_features);
if (!(sc->sc_virtio.sc_features & VIRTIO_F_RING_INDIRECT_DESC)) {
! dev_err(sc->sc_dev, CE_WARN,
! "Host does not support RING_INDIRECT_DESC. Cannot attach.");
return (DDI_FAILURE);
}
return (DDI_SUCCESS);
}
*** 1539,1548 ****
--- 1548,1558 ----
for (i = 0; i < ETHERADDRL; i++) {
virtio_write_device_config_1(&sc->sc_virtio,
VIRTIO_NET_CONFIG_MAC + i, sc->sc_mac[i]);
}
+ sc->sc_mac_from_host = 0;
}
/* Get the mac address out of the hardware, or make up one. */
static void
vioif_get_mac(struct vioif_softc *sc)
*** 1552,1563 ****
for (i = 0; i < ETHERADDRL; i++) {
sc->sc_mac[i] = virtio_read_device_config_1(
&sc->sc_virtio,
VIRTIO_NET_CONFIG_MAC + i);
}
! dev_err(sc->sc_dev, CE_NOTE, "Got MAC address from host: %s",
! ether_sprintf((struct ether_addr *)sc->sc_mac));
} else {
/* Get a few random bytes */
(void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
/* Make sure it's a unicast MAC */
sc->sc_mac[0] &= ~1;
--- 1562,1572 ----
for (i = 0; i < ETHERADDRL; i++) {
sc->sc_mac[i] = virtio_read_device_config_1(
&sc->sc_virtio,
VIRTIO_NET_CONFIG_MAC + i);
}
! sc->sc_mac_from_host = 1;
} else {
/* Get a few random bytes */
(void) random_get_pseudo_bytes(sc->sc_mac, ETHERADDRL);
/* Make sure it's a unicast MAC */
sc->sc_mac[0] &= ~1;
*** 1565,1575 ****
sc->sc_mac[1] |= 2;
vioif_set_mac(sc);
dev_err(sc->sc_dev, CE_NOTE,
! "Generated a random MAC address: %s",
ether_sprintf((struct ether_addr *)sc->sc_mac));
}
}
/*
--- 1574,1584 ----
sc->sc_mac[1] |= 2;
vioif_set_mac(sc);
dev_err(sc->sc_dev, CE_NOTE,
! "!Generated a random MAC address: %s",
ether_sprintf((struct ether_addr *)sc->sc_mac));
}
}
/*
*** 1638,1648 ****
sc->sc_rx_csum = 1;
if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
sc->sc_rx_csum = 0;
}
! cmn_err(CE_NOTE, "Csum enabled.");
if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
sc->sc_tx_tso4 = 1;
/*
--- 1647,1657 ----
sc->sc_rx_csum = 1;
if (!vioif_has_feature(sc, VIRTIO_NET_F_GUEST_CSUM)) {
sc->sc_rx_csum = 0;
}
! dev_err(sc->sc_dev, CE_NOTE, "!Csum enabled.");
if (vioif_has_feature(sc, VIRTIO_NET_F_HOST_TSO4)) {
sc->sc_tx_tso4 = 1;
/*
*** 1652,1666 ****
* the device to support it in order to do
* LSO.
*/
if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
dev_err(sc->sc_dev, CE_NOTE,
! "TSO4 supported, but not ECN. "
"Not using LSO.");
sc->sc_tx_tso4 = 0;
} else {
! cmn_err(CE_NOTE, "LSO enabled");
}
}
}
}
--- 1661,1675 ----
* the device to support it in order to do
* LSO.
*/
if (!vioif_has_feature(sc, VIRTIO_NET_F_HOST_ECN)) {
dev_err(sc->sc_dev, CE_NOTE,
! "!TSO4 supported, but not ECN. "
"Not using LSO.");
sc->sc_tx_tso4 = 0;
} else {
! dev_err(sc->sc_dev, CE_NOTE, "!LSO enabled");
}
}
}
}
*** 1780,1790 ****
sc->sc_txcopy_thresh = 300;
sc->sc_mtu = ETHERMTU;
vioif_check_features(sc);
! if (vioif_alloc_mems(sc))
goto exit_alloc_mems;
if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
goto exit_macalloc;
--- 1789,1799 ----
sc->sc_txcopy_thresh = 300;
sc->sc_mtu = ETHERMTU;
vioif_check_features(sc);
! if (vioif_alloc_mems(sc) != 0)
goto exit_alloc_mems;
if ((macp = mac_alloc(MAC_VERSION)) == NULL) {
dev_err(devinfo, CE_WARN, "Failed to allocate a mac_register");
goto exit_macalloc;
*** 1868,1878 ****
default:
return (DDI_FAILURE);
}
! if (sc->sc_rxloan) {
dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream,"
" not detaching.");
return (DDI_FAILURE);
}
--- 1877,1887 ----
default:
return (DDI_FAILURE);
}
! if (sc->sc_rxloan > 0) {
dev_err(devinfo, CE_WARN, "!Some rx buffers are still upstream,"
" not detaching.");
return (DDI_FAILURE);
}