Print this page
NEX-14556 xnf: panic on Xen 4.x
Reviewed by: Dan Fields <dan.fields@nexenta.com>
Reviewed by: Roman Strashkin <roman.strashkin@nexenta.com>
re #13140 rb4270 hvm_sd module missing dependencies on scsi and cmlb
re #13166 rb4270 Check for Xen HVM even if CPUID signature returns Microsoft Hv
re #13187 rb4270 Fix Xen HVM related warnings
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/xen/io/xnf.c
+++ new/usr/src/uts/common/xen/io/xnf.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2010 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 25 */
26 26
27 27 /*
28 28 * Copyright (c) 2014, 2017 by Delphix. All rights reserved.
29 29 */
30 30
31 31 /*
32 32 *
33 33 * Copyright (c) 2004 Christian Limpach.
34 34 * All rights reserved.
35 35 *
36 36 * Redistribution and use in source and binary forms, with or without
37 37 * modification, are permitted provided that the following conditions
38 38 * are met:
39 39 * 1. Redistributions of source code must retain the above copyright
40 40 * notice, this list of conditions and the following disclaimer.
41 41 * 2. Redistributions in binary form must reproduce the above copyright
42 42 * notice, this list of conditions and the following disclaimer in the
43 43 * documentation and/or other materials provided with the distribution.
44 44 * 3. This section intentionally left blank.
45 45 * 4. The name of the author may not be used to endorse or promote products
46 46 * derived from this software without specific prior written permission.
47 47 *
48 48 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR
49 49 * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES
50 50 * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED.
51 51 * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT,
52 52 * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT
53 53 * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
54 54 * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
55 55 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
56 56 * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF
57 57 * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
58 58 */
59 59 /*
60 60 * Section 3 of the above license was updated in response to bug 6379571.
61 61 */
62 62
63 63 /*
64 64 * xnf.c - GLDv3 network driver for domU.
65 65 */
66 66
67 67 /*
68 68 * This driver uses four per-instance locks:
69 69 *
70 70 * xnf_gref_lock:
71 71 *
72 72 * Protects access to the grant reference list stored in
73 73 * xnf_gref_head. Grant references should be acquired and released
74 74 * using gref_get() and gref_put() respectively.
75 75 *
76 76 * xnf_schedlock:
77 77 *
78 78 * Protects:
79 79 * xnf_need_sched - used to record that a previous transmit attempt
80 80 * failed (and consequently it will be necessary to call
81 81 * mac_tx_update() when transmit resources are available).
82 82 * xnf_pending_multicast - the number of multicast requests that
83 83 * have been submitted to the backend for which we have not
84 84 * processed responses.
85 85 *
86 86 * xnf_txlock:
87 87 *
88 88 * Protects the transmit ring (xnf_tx_ring) and associated
89 89 * structures (notably xnf_tx_pkt_id and xnf_tx_pkt_id_head).
90 90 *
91 91 * xnf_rxlock:
92 92 *
93 93 * Protects the receive ring (xnf_rx_ring) and associated
94 94 * structures (notably xnf_rx_pkt_info).
95 95 *
96 96 * If driver-global state that affects both the transmit and receive
97 97 * rings is manipulated, both xnf_txlock and xnf_rxlock should be
98 98 * held, in that order.
99 99 *
100 100 * xnf_schedlock is acquired both whilst holding xnf_txlock and
101 101 * without. It should always be acquired after xnf_txlock if both are
102 102 * held.
103 103 *
104 104 * Notes:
105 105 * - atomic_add_64() is used to manipulate counters where we require
106 106 * accuracy. For counters intended only for observation by humans,
107 107 * post increment/decrement are used instead.
108 108 */
109 109
110 110 #include <sys/types.h>
111 111 #include <sys/errno.h>
112 112 #include <sys/param.h>
113 113 #include <sys/sysmacros.h>
114 114 #include <sys/systm.h>
115 115 #include <sys/stream.h>
116 116 #include <sys/strsubr.h>
117 117 #include <sys/strsun.h>
118 118 #include <sys/conf.h>
119 119 #include <sys/ddi.h>
120 120 #include <sys/devops.h>
121 121 #include <sys/sunddi.h>
122 122 #include <sys/sunndi.h>
123 123 #include <sys/dlpi.h>
124 124 #include <sys/ethernet.h>
125 125 #include <sys/strsun.h>
126 126 #include <sys/pattr.h>
127 127 #include <inet/ip.h>
128 128 #include <inet/ip_impl.h>
129 129 #include <inet/tcp.h>
130 130 #include <netinet/udp.h>
131 131 #include <sys/gld.h>
132 132 #include <sys/modctl.h>
133 133 #include <sys/mac_provider.h>
134 134 #include <sys/mac_ether.h>
135 135 #include <sys/bootinfo.h>
136 136 #include <sys/mach_mmu.h>
137 137 #ifdef XPV_HVM_DRIVER
138 138 #include <sys/xpv_support.h>
139 139 #include <sys/hypervisor.h>
140 140 #else
141 141 #include <sys/hypervisor.h>
142 142 #include <sys/evtchn_impl.h>
143 143 #include <sys/balloon_impl.h>
144 144 #endif
145 145 #include <xen/public/io/netif.h>
146 146 #include <sys/gnttab.h>
147 147 #include <xen/sys/xendev.h>
148 148 #include <sys/sdt.h>
149 149 #include <sys/note.h>
150 150 #include <sys/debug.h>
151 151
152 152 #include <io/xnf.h>
153 153
154 154 #if defined(DEBUG) || defined(__lint)
155 155 #define XNF_DEBUG
156 156 #endif
157 157
158 158 #ifdef XNF_DEBUG
159 159 int xnf_debug = 0;
160 160 xnf_t *xnf_debug_instance = NULL;
161 161 #endif
162 162
163 163 /*
164 164 * On a 32 bit PAE system physical and machine addresses are larger
165 165 * than 32 bits. ddi_btop() on such systems take an unsigned long
166 166 * argument, and so addresses above 4G are truncated before ddi_btop()
167 167 * gets to see them. To avoid this, code the shift operation here.
168 168 */
169 169 #define xnf_btop(addr) ((addr) >> PAGESHIFT)
170 170
171 171 /*
172 172 * The parameters below should only be changed in /etc/system, never in mdb.
173 173 */
174 174
175 175 /*
176 176 * Should we use the multicast control feature if the backend provides
177 177 * it?
178 178 */
179 179 boolean_t xnf_multicast_control = B_TRUE;
180 180
181 181 /*
182 182 * Should we allow scatter-gather for tx if backend allows it?
183 183 */
184 184 boolean_t xnf_enable_tx_sg = B_TRUE;
185 185
186 186 /*
187 187 * Should we allow scatter-gather for rx if backend allows it?
188 188 */
189 189 boolean_t xnf_enable_rx_sg = B_TRUE;
190 190
191 191 /*
192 192 * Should we allow lso for tx sends if backend allows it?
193 193 * Requires xnf_enable_tx_sg to be also set to TRUE.
194 194 */
195 195 boolean_t xnf_enable_lso = B_TRUE;
196 196
197 197 /*
198 198 * Should we allow lro on rx if backend supports it?
199 199 * Requires xnf_enable_rx_sg to be also set to TRUE.
200 200 *
201 201 * !! WARNING !!
202 202 * LRO is not yet supported in the OS so this should be left as FALSE.
203 203 * !! WARNING !!
204 204 */
205 205 boolean_t xnf_enable_lro = B_FALSE;
206 206
207 207 /*
208 208 * Received packets below this size are copied to a new streams buffer
209 209 * rather than being desballoc'ed.
210 210 *
211 211 * This value is chosen to accommodate traffic where there are a large
212 212 * number of small packets. For data showing a typical distribution,
213 213 * see:
214 214 *
215 215 * Sinha07a:
216 216 * Rishi Sinha, Christos Papadopoulos, and John
217 217 * Heidemann. Internet Packet Size Distributions: Some
218 218 * Observations. Technical Report ISI-TR-2007-643,
219 219 * USC/Information Sciences Institute, May, 2007. Orignally
220 220 * released October 2005 as web page
221 221 * http://netweb.usc.edu/~sinha/pkt-sizes/.
222 222 * <http://www.isi.edu/~johnh/PAPERS/Sinha07a.html>.
223 223 */
224 224 size_t xnf_rx_copy_limit = 64;
225 225
226 226 #define INVALID_GRANT_HANDLE ((grant_handle_t)-1)
227 227 #define INVALID_GRANT_REF ((grant_ref_t)-1)
228 228 #define INVALID_TX_ID ((uint16_t)-1)
229 229
230 230 #define TX_ID_TO_TXID(p, id) (&((p)->xnf_tx_pkt_id[(id)]))
231 231 #define TX_ID_VALID(i) \
232 232 (((i) != INVALID_TX_ID) && ((i) < NET_TX_RING_SIZE))
233 233
234 234 /*
235 235 * calculate how many pages are spanned by an mblk fragment
236 236 */
237 237 #define xnf_mblk_pages(mp) (MBLKL(mp) == 0 ? 0 : \
238 238 xnf_btop((uintptr_t)mp->b_wptr - 1) - xnf_btop((uintptr_t)mp->b_rptr) + 1)
239 239
240 240 /* Required system entry points */
241 241 static int xnf_attach(dev_info_t *, ddi_attach_cmd_t);
242 242 static int xnf_detach(dev_info_t *, ddi_detach_cmd_t);
243 243
244 244 /* Required driver entry points for Nemo */
245 245 static int xnf_start(void *);
246 246 static void xnf_stop(void *);
247 247 static int xnf_set_mac_addr(void *, const uint8_t *);
248 248 static int xnf_set_multicast(void *, boolean_t, const uint8_t *);
249 249 static int xnf_set_promiscuous(void *, boolean_t);
250 250 static mblk_t *xnf_send(void *, mblk_t *);
251 251 static uint_t xnf_intr(caddr_t);
252 252 static int xnf_stat(void *, uint_t, uint64_t *);
253 253 static boolean_t xnf_getcapab(void *, mac_capab_t, void *);
254 254 static int xnf_getprop(void *, const char *, mac_prop_id_t, uint_t, void *);
255 255 static int xnf_setprop(void *, const char *, mac_prop_id_t, uint_t,
256 256 const void *);
257 257 static void xnf_propinfo(void *, const char *, mac_prop_id_t,
258 258 mac_prop_info_handle_t);
259 259
260 260 /* Driver private functions */
261 261 static int xnf_alloc_dma_resources(xnf_t *);
262 262 static void xnf_release_dma_resources(xnf_t *);
263 263 static void xnf_release_mblks(xnf_t *);
264 264
265 265 static int xnf_buf_constructor(void *, void *, int);
266 266 static void xnf_buf_destructor(void *, void *);
267 267 static xnf_buf_t *xnf_buf_get(xnf_t *, int, boolean_t);
268 268 #pragma inline(xnf_buf_get)
269 269 static void xnf_buf_put(xnf_t *, xnf_buf_t *, boolean_t);
270 270 #pragma inline(xnf_buf_put)
271 271 static void xnf_buf_refresh(xnf_buf_t *);
272 272 #pragma inline(xnf_buf_refresh)
273 273 static void xnf_buf_recycle(xnf_buf_t *);
274 274
275 275 static int xnf_tx_buf_constructor(void *, void *, int);
276 276 static void xnf_tx_buf_destructor(void *, void *);
277 277
278 278 static grant_ref_t xnf_gref_get(xnf_t *);
279 279 #pragma inline(xnf_gref_get)
280 280 static void xnf_gref_put(xnf_t *, grant_ref_t);
281 281 #pragma inline(xnf_gref_put)
282 282
283 283 static xnf_txid_t *xnf_txid_get(xnf_t *);
284 284 #pragma inline(xnf_txid_get)
285 285 static void xnf_txid_put(xnf_t *, xnf_txid_t *);
286 286 #pragma inline(xnf_txid_put)
287 287
288 288 static void xnf_rxbuf_hang(xnf_t *, xnf_buf_t *);
289 289 static int xnf_tx_clean_ring(xnf_t *);
290 290 static void oe_state_change(dev_info_t *, ddi_eventcookie_t,
291 291 void *, void *);
292 292 static boolean_t xnf_kstat_init(xnf_t *);
293 293 static void xnf_rx_collect(xnf_t *);
294 294
295 295 #define XNF_CALLBACK_FLAGS (MC_GETCAPAB | MC_PROPERTIES)
296 296
297 297 static mac_callbacks_t xnf_callbacks = {
298 298 .mc_callbacks = XNF_CALLBACK_FLAGS,
299 299 .mc_getstat = xnf_stat,
300 300 .mc_start = xnf_start,
301 301 .mc_stop = xnf_stop,
302 302 .mc_setpromisc = xnf_set_promiscuous,
303 303 .mc_multicst = xnf_set_multicast,
304 304 .mc_unicst = xnf_set_mac_addr,
305 305 .mc_tx = xnf_send,
306 306 .mc_getcapab = xnf_getcapab,
307 307 .mc_setprop = xnf_setprop,
308 308 .mc_getprop = xnf_getprop,
309 309 .mc_propinfo = xnf_propinfo,
310 310 };
311 311
312 312 /* DMA attributes for network ring buffer */
313 313 static ddi_dma_attr_t ringbuf_dma_attr = {
314 314 .dma_attr_version = DMA_ATTR_V0,
315 315 .dma_attr_addr_lo = 0,
316 316 .dma_attr_addr_hi = 0xffffffffffffffffULL,
317 317 .dma_attr_count_max = 0x7fffffff,
318 318 .dma_attr_align = MMU_PAGESIZE,
319 319 .dma_attr_burstsizes = 0x7ff,
320 320 .dma_attr_minxfer = 1,
321 321 .dma_attr_maxxfer = 0xffffffffU,
322 322 .dma_attr_seg = 0xffffffffffffffffULL,
323 323 .dma_attr_sgllen = 1,
324 324 .dma_attr_granular = 1,
325 325 .dma_attr_flags = 0
326 326 };
327 327
328 328 /* DMA attributes for receive data */
329 329 static ddi_dma_attr_t rx_buf_dma_attr = {
330 330 .dma_attr_version = DMA_ATTR_V0,
331 331 .dma_attr_addr_lo = 0,
332 332 .dma_attr_addr_hi = 0xffffffffffffffffULL,
333 333 .dma_attr_count_max = MMU_PAGEOFFSET,
334 334 .dma_attr_align = MMU_PAGESIZE, /* allocation alignment */
335 335 .dma_attr_burstsizes = 0x7ff,
336 336 .dma_attr_minxfer = 1,
337 337 .dma_attr_maxxfer = 0xffffffffU,
338 338 .dma_attr_seg = 0xffffffffffffffffULL,
339 339 .dma_attr_sgllen = 1,
340 340 .dma_attr_granular = 1,
341 341 .dma_attr_flags = 0
342 342 };
343 343
344 344 /* DMA attributes for transmit data */
345 345 static ddi_dma_attr_t tx_buf_dma_attr = {
346 346 .dma_attr_version = DMA_ATTR_V0,
347 347 .dma_attr_addr_lo = 0,
348 348 .dma_attr_addr_hi = 0xffffffffffffffffULL,
349 349 .dma_attr_count_max = MMU_PAGEOFFSET,
350 350 .dma_attr_align = 1,
351 351 .dma_attr_burstsizes = 0x7ff,
352 352 .dma_attr_minxfer = 1,
353 353 .dma_attr_maxxfer = 0xffffffffU,
354 354 .dma_attr_seg = XEN_DATA_BOUNDARY - 1, /* segment boundary */
355 355 .dma_attr_sgllen = XEN_MAX_TX_DATA_PAGES, /* max number of segments */
356 356 .dma_attr_granular = 1,
357 357 .dma_attr_flags = 0
358 358 };
359 359
360 360 /* DMA access attributes for registers and descriptors */
361 361 static ddi_device_acc_attr_t accattr = {
362 362 DDI_DEVICE_ATTR_V0,
363 363 DDI_STRUCTURE_LE_ACC, /* This is a little-endian device */
364 364 DDI_STRICTORDER_ACC
365 365 };
366 366
367 367 /* DMA access attributes for data: NOT to be byte swapped. */
368 368 static ddi_device_acc_attr_t data_accattr = {
369 369 DDI_DEVICE_ATTR_V0,
370 370 DDI_NEVERSWAP_ACC,
371 371 DDI_STRICTORDER_ACC
372 372 };
373 373
374 374 DDI_DEFINE_STREAM_OPS(xnf_dev_ops, nulldev, nulldev, xnf_attach, xnf_detach,
375 375 nodev, NULL, D_MP, NULL, ddi_quiesce_not_supported);
376 376
377 377 static struct modldrv xnf_modldrv = {
378 378 &mod_driverops,
379 379 "Virtual Ethernet driver",
380 380 &xnf_dev_ops
381 381 };
382 382
383 383 static struct modlinkage modlinkage = {
384 384 MODREV_1, &xnf_modldrv, NULL
385 385 };
386 386
387 387 int
388 388 _init(void)
389 389 {
390 390 int r;
391 391
392 392 mac_init_ops(&xnf_dev_ops, "xnf");
393 393 r = mod_install(&modlinkage);
394 394 if (r != DDI_SUCCESS)
395 395 mac_fini_ops(&xnf_dev_ops);
396 396
397 397 return (r);
398 398 }
399 399
400 400 int
401 401 _fini(void)
402 402 {
403 403 return (EBUSY); /* XXPV should be removable */
404 404 }
405 405
406 406 int
407 407 _info(struct modinfo *modinfop)
408 408 {
409 409 return (mod_info(&modlinkage, modinfop));
410 410 }
411 411
412 412 /*
413 413 * Acquire a grant reference.
414 414 */
415 415 static grant_ref_t
416 416 xnf_gref_get(xnf_t *xnfp)
417 417 {
418 418 grant_ref_t gref;
419 419
420 420 mutex_enter(&xnfp->xnf_gref_lock);
421 421
422 422 do {
423 423 gref = gnttab_claim_grant_reference(&xnfp->xnf_gref_head);
424 424
425 425 } while ((gref == INVALID_GRANT_REF) &&
426 426 (gnttab_alloc_grant_references(16, &xnfp->xnf_gref_head) == 0));
427 427
428 428 mutex_exit(&xnfp->xnf_gref_lock);
429 429
430 430 if (gref == INVALID_GRANT_REF) {
431 431 xnfp->xnf_stat_gref_failure++;
432 432 } else {
433 433 atomic_inc_64(&xnfp->xnf_stat_gref_outstanding);
434 434 if (xnfp->xnf_stat_gref_outstanding > xnfp->xnf_stat_gref_peak)
435 435 xnfp->xnf_stat_gref_peak =
436 436 xnfp->xnf_stat_gref_outstanding;
437 437 }
438 438
439 439 return (gref);
440 440 }
441 441
442 442 /*
443 443 * Release a grant reference.
444 444 */
445 445 static void
446 446 xnf_gref_put(xnf_t *xnfp, grant_ref_t gref)
447 447 {
448 448 ASSERT(gref != INVALID_GRANT_REF);
449 449
450 450 mutex_enter(&xnfp->xnf_gref_lock);
451 451 gnttab_release_grant_reference(&xnfp->xnf_gref_head, gref);
452 452 mutex_exit(&xnfp->xnf_gref_lock);
453 453
454 454 atomic_dec_64(&xnfp->xnf_stat_gref_outstanding);
455 455 }
456 456
457 457 /*
458 458 * Acquire a transmit id.
459 459 */
460 460 static xnf_txid_t *
461 461 xnf_txid_get(xnf_t *xnfp)
462 462 {
463 463 xnf_txid_t *tidp;
464 464
465 465 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
466 466
467 467 if (xnfp->xnf_tx_pkt_id_head == INVALID_TX_ID)
468 468 return (NULL);
469 469
470 470 ASSERT(TX_ID_VALID(xnfp->xnf_tx_pkt_id_head));
471 471
472 472 tidp = TX_ID_TO_TXID(xnfp, xnfp->xnf_tx_pkt_id_head);
473 473 xnfp->xnf_tx_pkt_id_head = tidp->next;
474 474 tidp->next = INVALID_TX_ID;
475 475
476 476 ASSERT(tidp->txbuf == NULL);
477 477
478 478 return (tidp);
479 479 }
480 480
481 481 /*
482 482 * Release a transmit id.
483 483 */
484 484 static void
485 485 xnf_txid_put(xnf_t *xnfp, xnf_txid_t *tidp)
486 486 {
487 487 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
488 488 ASSERT(TX_ID_VALID(tidp->id));
489 489 ASSERT(tidp->next == INVALID_TX_ID);
490 490
491 491 tidp->txbuf = NULL;
492 492 tidp->next = xnfp->xnf_tx_pkt_id_head;
493 493 xnfp->xnf_tx_pkt_id_head = tidp->id;
494 494 }
495 495
496 496 static void
497 497 xnf_data_txbuf_free(xnf_t *xnfp, xnf_txbuf_t *txp)
498 498 {
499 499 ASSERT3U(txp->tx_type, ==, TX_DATA);
500 500
501 501 /*
502 502 * We are either using a lookaside buffer or we are mapping existing
503 503 * buffers.
504 504 */
505 505 if (txp->tx_bdesc != NULL) {
506 506 ASSERT(!txp->tx_handle_bound);
507 507 xnf_buf_put(xnfp, txp->tx_bdesc, B_TRUE);
508 508 } else {
509 509 if (txp->tx_txreq.gref != INVALID_GRANT_REF) {
510 510 if (gnttab_query_foreign_access(txp->tx_txreq.gref) !=
511 511 0) {
512 512 cmn_err(CE_PANIC, "tx grant %d still in use by "
513 513 "backend domain", txp->tx_txreq.gref);
514 514 }
515 515 (void) gnttab_end_foreign_access_ref(
516 516 txp->tx_txreq.gref, 1);
517 517 xnf_gref_put(xnfp, txp->tx_txreq.gref);
518 518 }
519 519
520 520 if (txp->tx_handle_bound)
521 521 (void) ddi_dma_unbind_handle(txp->tx_dma_handle);
522 522 }
523 523
524 524 if (txp->tx_mp != NULL)
525 525 freemsg(txp->tx_mp);
526 526
527 527 if (txp->tx_prev != NULL) {
528 528 ASSERT3P(txp->tx_prev->tx_next, ==, txp);
529 529 txp->tx_prev->tx_next = NULL;
530 530 }
531 531
532 532 if (txp->tx_txreq.id != INVALID_TX_ID) {
533 533 /*
534 534 * This should be only possible when resuming from a suspend.
535 535 */
536 536 ASSERT(!xnfp->xnf_connected);
537 537 xnf_txid_put(xnfp, TX_ID_TO_TXID(xnfp, txp->tx_txreq.id));
538 538 txp->tx_txreq.id = INVALID_TX_ID;
539 539 }
540 540
541 541 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
542 542 }
543 543
544 544 static void
545 545 xnf_data_txbuf_free_chain(xnf_t *xnfp, xnf_txbuf_t *txp)
546 546 {
547 547 if (txp == NULL)
548 548 return;
549 549
550 550 while (txp->tx_next != NULL)
551 551 txp = txp->tx_next;
552 552
553 553 /*
554 554 * We free the chain in reverse order so that grants can be released
555 555 * for all dma chunks before unbinding the dma handles. The mblk is
556 556 * freed last, after all its fragments' dma handles are unbound.
557 557 */
558 558 xnf_txbuf_t *prev;
559 559 for (; txp != NULL; txp = prev) {
560 560 prev = txp->tx_prev;
561 561 xnf_data_txbuf_free(xnfp, txp);
562 562 }
563 563 }
564 564
565 565 static xnf_txbuf_t *
566 566 xnf_data_txbuf_alloc(xnf_t *xnfp)
567 567 {
568 568 xnf_txbuf_t *txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, KM_SLEEP);
569 569 txp->tx_type = TX_DATA;
570 570 txp->tx_next = NULL;
571 571 txp->tx_prev = NULL;
572 572 txp->tx_head = txp;
573 573 txp->tx_frags_to_ack = 0;
574 574 txp->tx_mp = NULL;
575 575 txp->tx_bdesc = NULL;
576 576 txp->tx_handle_bound = B_FALSE;
577 577 txp->tx_txreq.gref = INVALID_GRANT_REF;
578 578 txp->tx_txreq.id = INVALID_TX_ID;
579 579
580 580 return (txp);
581 581 }
582 582
583 583 /*
584 584 * Get `wanted' slots in the transmit ring, waiting for at least that
585 585 * number if `wait' is B_TRUE. Force the ring to be cleaned by setting
586 586 * `wanted' to zero.
587 587 *
588 588 * Return the number of slots available.
589 589 */
590 590 static int
591 591 xnf_tx_slots_get(xnf_t *xnfp, int wanted, boolean_t wait)
592 592 {
593 593 int slotsfree;
594 594 boolean_t forced_clean = (wanted == 0);
595 595
596 596 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
597 597
598 598 /* LINTED: constant in conditional context */
599 599 while (B_TRUE) {
600 600 slotsfree = RING_FREE_REQUESTS(&xnfp->xnf_tx_ring);
601 601
602 602 if ((slotsfree < wanted) || forced_clean)
603 603 slotsfree = xnf_tx_clean_ring(xnfp);
604 604
605 605 /*
606 606 * If there are more than we need free, tell other
607 607 * people to come looking again. We hold txlock, so we
608 608 * are able to take our slots before anyone else runs.
609 609 */
610 610 if (slotsfree > wanted)
611 611 cv_broadcast(&xnfp->xnf_cv_tx_slots);
612 612
613 613 if (slotsfree >= wanted)
614 614 break;
615 615
616 616 if (!wait)
617 617 break;
618 618
619 619 cv_wait(&xnfp->xnf_cv_tx_slots, &xnfp->xnf_txlock);
620 620 }
621 621
622 622 ASSERT(slotsfree <= RING_SIZE(&(xnfp->xnf_tx_ring)));
623 623
624 624 return (slotsfree);
625 625 }
626 626
627 627 static int
628 628 xnf_setup_rings(xnf_t *xnfp)
629 629 {
630 630 domid_t oeid;
631 631 struct xenbus_device *xsd;
632 632 RING_IDX i;
633 633 int err;
634 634 xnf_txid_t *tidp;
635 635 xnf_buf_t **bdescp;
636 636
637 637 oeid = xvdi_get_oeid(xnfp->xnf_devinfo);
638 638 xsd = xvdi_get_xsd(xnfp->xnf_devinfo);
639 639
640 640 if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF)
641 641 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0);
642 642
643 643 err = gnttab_grant_foreign_access(oeid,
644 644 xnf_btop(pa_to_ma(xnfp->xnf_tx_ring_phys_addr)), 0);
645 645 if (err <= 0) {
646 646 err = -err;
647 647 xenbus_dev_error(xsd, err, "granting access to tx ring page");
648 648 goto out;
649 649 }
650 650 xnfp->xnf_tx_ring_ref = (grant_ref_t)err;
651 651
652 652 if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF)
653 653 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0);
654 654
655 655 err = gnttab_grant_foreign_access(oeid,
656 656 xnf_btop(pa_to_ma(xnfp->xnf_rx_ring_phys_addr)), 0);
657 657 if (err <= 0) {
658 658 err = -err;
659 659 xenbus_dev_error(xsd, err, "granting access to rx ring page");
660 660 goto out;
661 661 }
662 662 xnfp->xnf_rx_ring_ref = (grant_ref_t)err;
663 663
664 664 mutex_enter(&xnfp->xnf_txlock);
665 665
666 666 /*
667 667 * We first cleanup the TX ring in case we are doing a resume.
668 668 * Note that this can lose packets, but we expect to stagger on.
669 669 */
670 670 xnfp->xnf_tx_pkt_id_head = INVALID_TX_ID; /* I.e. emtpy list. */
671 671 for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0];
672 672 i < NET_TX_RING_SIZE;
673 673 i++, tidp++) {
674 674 xnf_txbuf_t *txp = tidp->txbuf;
675 675 if (txp == NULL)
676 676 continue;
677 677
678 678 switch (txp->tx_type) {
679 679 case TX_DATA:
680 680 /*
681 681 * txid_put() will be called for each txbuf's txid in
682 682 * the chain which will result in clearing tidp->txbuf.
683 683 */
684 684 xnf_data_txbuf_free_chain(xnfp, txp);
685 685
686 686 break;
687 687
688 688 case TX_MCAST_REQ:
689 689 txp->tx_type = TX_MCAST_RSP;
690 690 txp->tx_status = NETIF_RSP_DROPPED;
691 691 cv_broadcast(&xnfp->xnf_cv_multicast);
692 692
693 693 /*
694 694 * The request consumed two slots in the ring,
695 695 * yet only a single xnf_txid_t is used. Step
696 696 * over the empty slot.
697 697 */
698 698 i++;
699 699 ASSERT3U(i, <, NET_TX_RING_SIZE);
700 700 break;
701 701
702 702 case TX_MCAST_RSP:
703 703 break;
704 704 }
705 705 }
706 706
707 707 /*
708 708 * Now purge old list and add each txid to the new free list.
709 709 */
710 710 xnfp->xnf_tx_pkt_id_head = INVALID_TX_ID; /* I.e. emtpy list. */
711 711 for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0];
712 712 i < NET_TX_RING_SIZE;
713 713 i++, tidp++) {
714 714 tidp->id = i;
715 715 ASSERT3P(tidp->txbuf, ==, NULL);
716 716 tidp->next = INVALID_TX_ID; /* Appease txid_put(). */
717 717 xnf_txid_put(xnfp, tidp);
718 718 }
719 719
720 720 /* LINTED: constant in conditional context */
721 721 SHARED_RING_INIT(xnfp->xnf_tx_ring.sring);
722 722 /* LINTED: constant in conditional context */
723 723 FRONT_RING_INIT(&xnfp->xnf_tx_ring,
724 724 xnfp->xnf_tx_ring.sring, PAGESIZE);
725 725
726 726 mutex_exit(&xnfp->xnf_txlock);
727 727
728 728 mutex_enter(&xnfp->xnf_rxlock);
729 729
730 730 /*
731 731 * Clean out any buffers currently posted to the receive ring
732 732 * before we reset it.
733 733 */
734 734 for (i = 0, bdescp = &xnfp->xnf_rx_pkt_info[0];
735 735 i < NET_RX_RING_SIZE;
736 736 i++, bdescp++) {
737 737 if (*bdescp != NULL) {
738 738 xnf_buf_put(xnfp, *bdescp, B_FALSE);
739 739 *bdescp = NULL;
740 740 }
741 741 }
742 742
743 743 /* LINTED: constant in conditional context */
744 744 SHARED_RING_INIT(xnfp->xnf_rx_ring.sring);
745 745 /* LINTED: constant in conditional context */
746 746 FRONT_RING_INIT(&xnfp->xnf_rx_ring,
747 747 xnfp->xnf_rx_ring.sring, PAGESIZE);
748 748
749 749 /*
750 750 * Fill the ring with buffers.
751 751 */
752 752 for (i = 0; i < NET_RX_RING_SIZE; i++) {
753 753 xnf_buf_t *bdesc;
754 754
755 755 bdesc = xnf_buf_get(xnfp, KM_SLEEP, B_FALSE);
756 756 VERIFY(bdesc != NULL);
757 757 xnf_rxbuf_hang(xnfp, bdesc);
758 758 }
759 759
760 760 /* LINTED: constant in conditional context */
761 761 RING_PUSH_REQUESTS(&xnfp->xnf_rx_ring);
762 762
763 763 mutex_exit(&xnfp->xnf_rxlock);
764 764
765 765 return (0);
766 766
767 767 out:
768 768 if (xnfp->xnf_tx_ring_ref != INVALID_GRANT_REF)
769 769 gnttab_end_foreign_access(xnfp->xnf_tx_ring_ref, 0, 0);
770 770 xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF;
771 771
772 772 if (xnfp->xnf_rx_ring_ref != INVALID_GRANT_REF)
773 773 gnttab_end_foreign_access(xnfp->xnf_rx_ring_ref, 0, 0);
774 774 xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF;
775 775
776 776 return (err);
777 777 }
778 778
779 779 /*
780 780 * Connect driver to back end, called to set up communication with
781 781 * back end driver both initially and on resume after restore/migrate.
782 782 */
783 783 void
784 784 xnf_be_connect(xnf_t *xnfp)
785 785 {
786 786 const char *message;
787 787 xenbus_transaction_t xbt;
788 788 struct xenbus_device *xsd;
789 789 char *xsname;
790 790 int err;
791 791
792 792 ASSERT(!xnfp->xnf_connected);
793 793
794 794 xsd = xvdi_get_xsd(xnfp->xnf_devinfo);
795 795 xsname = xvdi_get_xsname(xnfp->xnf_devinfo);
796 796
797 797 err = xnf_setup_rings(xnfp);
798 798 if (err != 0) {
799 799 cmn_err(CE_WARN, "failed to set up tx/rx rings");
800 800 xenbus_dev_error(xsd, err, "setting up ring");
801 801 return;
802 802 }
803 803
804 804 again:
805 805 err = xenbus_transaction_start(&xbt);
806 806 if (err != 0) {
807 807 xenbus_dev_error(xsd, EIO, "starting transaction");
808 808 return;
809 809 }
810 810
811 811 err = xenbus_printf(xbt, xsname, "tx-ring-ref", "%u",
812 812 xnfp->xnf_tx_ring_ref);
813 813 if (err != 0) {
814 814 message = "writing tx ring-ref";
815 815 goto abort_transaction;
816 816 }
817 817
818 818 err = xenbus_printf(xbt, xsname, "rx-ring-ref", "%u",
819 819 xnfp->xnf_rx_ring_ref);
820 820 if (err != 0) {
821 821 message = "writing rx ring-ref";
822 822 goto abort_transaction;
823 823 }
824 824
825 825 err = xenbus_printf(xbt, xsname, "event-channel", "%u",
826 826 xnfp->xnf_evtchn);
827 827 if (err != 0) {
828 828 message = "writing event-channel";
829 829 goto abort_transaction;
830 830 }
831 831
832 832 err = xenbus_printf(xbt, xsname, "feature-rx-notify", "%d", 1);
833 833 if (err != 0) {
834 834 message = "writing feature-rx-notify";
835 835 goto abort_transaction;
836 836 }
837 837
838 838 err = xenbus_printf(xbt, xsname, "request-rx-copy", "%d", 1);
839 839 if (err != 0) {
840 840 message = "writing request-rx-copy";
841 841 goto abort_transaction;
842 842 }
843 843
844 844 if (xnfp->xnf_be_mcast_control) {
845 845 err = xenbus_printf(xbt, xsname, "request-multicast-control",
846 846 "%d", 1);
847 847 if (err != 0) {
848 848 message = "writing request-multicast-control";
849 849 goto abort_transaction;
850 850 }
851 851 }
852 852
853 853 /*
854 854 * Tell backend if we support scatter-gather lists on the rx side.
855 855 */
856 856 err = xenbus_printf(xbt, xsname, "feature-sg", "%d",
857 857 xnf_enable_rx_sg ? 1 : 0);
858 858 if (err != 0) {
859 859 message = "writing feature-sg";
860 860 goto abort_transaction;
861 861 }
862 862
863 863 /*
864 864 * Tell backend if we support LRO for IPv4. Scatter-gather on rx is
865 865 * a prerequisite.
866 866 */
867 867 err = xenbus_printf(xbt, xsname, "feature-gso-tcpv4", "%d",
868 868 (xnf_enable_rx_sg && xnf_enable_lro) ? 1 : 0);
869 869 if (err != 0) {
870 870 message = "writing feature-gso-tcpv4";
871 871 goto abort_transaction;
872 872 }
873 873
874 874 err = xvdi_switch_state(xnfp->xnf_devinfo, xbt, XenbusStateConnected);
875 875 if (err != 0) {
876 876 message = "switching state to XenbusStateConnected";
877 877 goto abort_transaction;
878 878 }
879 879
880 880 err = xenbus_transaction_end(xbt, 0);
881 881 if (err != 0) {
882 882 if (err == EAGAIN)
883 883 goto again;
884 884 xenbus_dev_error(xsd, err, "completing transaction");
885 885 }
886 886
887 887 return;
888 888
889 889 abort_transaction:
890 890 (void) xenbus_transaction_end(xbt, 1);
891 891 xenbus_dev_error(xsd, err, "%s", message);
892 892 }
893 893
894 894 /*
895 895 * Read configuration information from xenstore.
896 896 */
897 897 void
898 898 xnf_read_config(xnf_t *xnfp)
899 899 {
900 900 int err, be_cap;
901 901 char mac[ETHERADDRL * 3];
902 902 char *oename = xvdi_get_oename(xnfp->xnf_devinfo);
903 903
904 904 err = xenbus_scanf(XBT_NULL, oename, "mac",
905 905 "%s", (char *)&mac[0]);
906 906 if (err != 0) {
907 907 /*
908 908 * bad: we're supposed to be set up with a proper mac
909 909 * addr. at this point
910 910 */
911 911 cmn_err(CE_WARN, "%s%d: no mac address",
912 912 ddi_driver_name(xnfp->xnf_devinfo),
913 913 ddi_get_instance(xnfp->xnf_devinfo));
914 914 return;
915 915 }
916 916 if (ether_aton(mac, xnfp->xnf_mac_addr) != ETHERADDRL) {
917 917 err = ENOENT;
918 918 xenbus_dev_error(xvdi_get_xsd(xnfp->xnf_devinfo), ENOENT,
919 919 "parsing %s/mac", xvdi_get_xsname(xnfp->xnf_devinfo));
920 920 return;
921 921 }
922 922
923 923 err = xenbus_scanf(XBT_NULL, oename,
924 924 "feature-rx-copy", "%d", &be_cap);
925 925 /*
926 926 * If we fail to read the store we assume that the key is
927 927 * absent, implying an older domain at the far end. Older
928 928 * domains cannot do HV copy.
929 929 */
930 930 if (err != 0)
931 931 be_cap = 0;
932 932 xnfp->xnf_be_rx_copy = (be_cap != 0);
933 933
934 934 err = xenbus_scanf(XBT_NULL, oename,
935 935 "feature-multicast-control", "%d", &be_cap);
936 936 /*
937 937 * If we fail to read the store we assume that the key is
938 938 * absent, implying an older domain at the far end. Older
939 939 * domains do not support multicast control.
940 940 */
941 941 if (err != 0)
942 942 be_cap = 0;
943 943 xnfp->xnf_be_mcast_control = (be_cap != 0) && xnf_multicast_control;
944 944
945 945 /*
946 946 * See if back-end supports scatter-gather for transmits. If not,
947 947 * we will not support LSO and limit the mtu to 1500.
948 948 */
949 949 err = xenbus_scanf(XBT_NULL, oename, "feature-sg", "%d", &be_cap);
950 950 if (err != 0) {
951 951 be_cap = 0;
952 952 dev_err(xnfp->xnf_devinfo, CE_WARN, "error reading "
953 953 "'feature-sg' from backend driver");
954 954 }
955 955 if (be_cap == 0) {
956 956 dev_err(xnfp->xnf_devinfo, CE_WARN, "scatter-gather is not "
957 957 "supported for transmits in the backend driver. LSO is "
958 958 "disabled and MTU is restricted to 1500 bytes.");
959 959 }
960 960 xnfp->xnf_be_tx_sg = (be_cap != 0) && xnf_enable_tx_sg;
961 961
962 962 if (xnfp->xnf_be_tx_sg) {
963 963 /*
964 964 * Check if LSO is supported. Currently we only check for
965 965 * IPv4 as Illumos doesn't support LSO for IPv6.
966 966 */
967 967 err = xenbus_scanf(XBT_NULL, oename, "feature-gso-tcpv4", "%d",
968 968 &be_cap);
969 969 if (err != 0) {
970 970 be_cap = 0;
971 971 dev_err(xnfp->xnf_devinfo, CE_WARN, "error reading "
972 972 "'feature-gso-tcpv4' from backend driver");
973 973 }
974 974 if (be_cap == 0) {
975 975 dev_err(xnfp->xnf_devinfo, CE_WARN, "LSO is not "
976 976 "supported by the backend driver. Performance "
977 977 "will be affected.");
978 978 }
979 979 xnfp->xnf_be_lso = (be_cap != 0) && xnf_enable_lso;
980 980 }
981 981 }
982 982
983 983 /*
984 984 * attach(9E) -- Attach a device to the system
985 985 */
986 986 static int
987 987 xnf_attach(dev_info_t *devinfo, ddi_attach_cmd_t cmd)
988 988 {
989 989 mac_register_t *macp;
990 990 xnf_t *xnfp;
991 991 int err;
992 992 char cachename[32];
993 993
994 994 #ifdef XNF_DEBUG
995 995 if (xnf_debug & XNF_DEBUG_DDI)
996 996 printf("xnf%d: attach(0x%p)\n", ddi_get_instance(devinfo),
997 997 (void *)devinfo);
998 998 #endif
999 999
1000 1000 switch (cmd) {
1001 1001 case DDI_RESUME:
1002 1002 xnfp = ddi_get_driver_private(devinfo);
1003 1003 xnfp->xnf_gen++;
1004 1004
1005 1005 (void) xvdi_resume(devinfo);
1006 1006 (void) xvdi_alloc_evtchn(devinfo);
1007 1007 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo);
1008 1008 #ifdef XPV_HVM_DRIVER
1009 1009 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr,
1010 1010 xnfp);
1011 1011 #else
1012 1012 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr,
1013 1013 (caddr_t)xnfp);
1014 1014 #endif
1015 1015 return (DDI_SUCCESS);
1016 1016
1017 1017 case DDI_ATTACH:
1018 1018 break;
1019 1019
1020 1020 default:
1021 1021 return (DDI_FAILURE);
1022 1022 }
1023 1023
1024 1024 /*
1025 1025 * Allocate gld_mac_info_t and xnf_instance structures
1026 1026 */
1027 1027 macp = mac_alloc(MAC_VERSION);
1028 1028 if (macp == NULL)
1029 1029 return (DDI_FAILURE);
1030 1030 xnfp = kmem_zalloc(sizeof (*xnfp), KM_SLEEP);
1031 1031
1032 1032 xnfp->xnf_tx_pkt_id =
1033 1033 kmem_zalloc(sizeof (xnf_txid_t) * NET_TX_RING_SIZE, KM_SLEEP);
1034 1034
1035 1035 xnfp->xnf_rx_pkt_info =
1036 1036 kmem_zalloc(sizeof (xnf_buf_t *) * NET_RX_RING_SIZE, KM_SLEEP);
1037 1037
1038 1038 macp->m_dip = devinfo;
1039 1039 macp->m_driver = xnfp;
1040 1040 xnfp->xnf_devinfo = devinfo;
1041 1041
1042 1042 macp->m_type_ident = MAC_PLUGIN_IDENT_ETHER;
1043 1043 macp->m_src_addr = xnfp->xnf_mac_addr;
1044 1044 macp->m_callbacks = &xnf_callbacks;
1045 1045 macp->m_min_sdu = 0;
1046 1046 xnfp->xnf_mtu = ETHERMTU;
1047 1047 macp->m_max_sdu = xnfp->xnf_mtu;
1048 1048
1049 1049 xnfp->xnf_running = B_FALSE;
|
↓ open down ↓ |
1049 lines elided |
↑ open up ↑ |
1050 1050 xnfp->xnf_connected = B_FALSE;
1051 1051 xnfp->xnf_be_rx_copy = B_FALSE;
1052 1052 xnfp->xnf_be_mcast_control = B_FALSE;
1053 1053 xnfp->xnf_need_sched = B_FALSE;
1054 1054
1055 1055 xnfp->xnf_rx_head = NULL;
1056 1056 xnfp->xnf_rx_tail = NULL;
1057 1057 xnfp->xnf_rx_new_buffers_posted = B_FALSE;
1058 1058
1059 1059 #ifdef XPV_HVM_DRIVER
1060 - /*
1061 - * Report our version to dom0.
1062 - */
1063 - if (xenbus_printf(XBT_NULL, "guest/xnf", "version", "%d",
1064 - HVMPV_XNF_VERS))
1065 - cmn_err(CE_WARN, "xnf: couldn't write version\n");
1060 + /* Report our version to dom0 */
1061 + (void) xenbus_printf(XBT_NULL, "guest/xnf", "version", "%d",
1062 + HVMPV_XNF_VERS);
1066 1063 #endif
1067 1064
1068 1065 /*
1069 1066 * Get the iblock cookie with which to initialize the mutexes.
1070 1067 */
1071 1068 if (ddi_get_iblock_cookie(devinfo, 0, &xnfp->xnf_icookie)
1072 1069 != DDI_SUCCESS)
1073 1070 goto failure;
1074 1071
1075 1072 mutex_init(&xnfp->xnf_txlock,
1076 1073 NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
1077 1074 mutex_init(&xnfp->xnf_rxlock,
1078 1075 NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
1079 1076 mutex_init(&xnfp->xnf_schedlock,
1080 1077 NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
1081 1078 mutex_init(&xnfp->xnf_gref_lock,
1082 1079 NULL, MUTEX_DRIVER, xnfp->xnf_icookie);
1083 1080
1084 1081 cv_init(&xnfp->xnf_cv_state, NULL, CV_DEFAULT, NULL);
1085 1082 cv_init(&xnfp->xnf_cv_multicast, NULL, CV_DEFAULT, NULL);
1086 1083 cv_init(&xnfp->xnf_cv_tx_slots, NULL, CV_DEFAULT, NULL);
1087 1084
1088 1085 (void) sprintf(cachename, "xnf_buf_cache_%d",
1089 1086 ddi_get_instance(devinfo));
1090 1087 xnfp->xnf_buf_cache = kmem_cache_create(cachename,
1091 1088 sizeof (xnf_buf_t), 0,
1092 1089 xnf_buf_constructor, xnf_buf_destructor,
1093 1090 NULL, xnfp, NULL, 0);
1094 1091 if (xnfp->xnf_buf_cache == NULL)
1095 1092 goto failure_0;
1096 1093
1097 1094 (void) sprintf(cachename, "xnf_tx_buf_cache_%d",
1098 1095 ddi_get_instance(devinfo));
1099 1096 xnfp->xnf_tx_buf_cache = kmem_cache_create(cachename,
1100 1097 sizeof (xnf_txbuf_t), 0,
1101 1098 xnf_tx_buf_constructor, xnf_tx_buf_destructor,
1102 1099 NULL, xnfp, NULL, 0);
1103 1100 if (xnfp->xnf_tx_buf_cache == NULL)
1104 1101 goto failure_1;
1105 1102
1106 1103 xnfp->xnf_gref_head = INVALID_GRANT_REF;
1107 1104
1108 1105 if (xnf_alloc_dma_resources(xnfp) == DDI_FAILURE) {
1109 1106 cmn_err(CE_WARN, "xnf%d: failed to allocate and initialize "
1110 1107 "driver data structures",
1111 1108 ddi_get_instance(xnfp->xnf_devinfo));
1112 1109 goto failure_2;
1113 1110 }
1114 1111
1115 1112 xnfp->xnf_rx_ring.sring->rsp_event =
1116 1113 xnfp->xnf_tx_ring.sring->rsp_event = 1;
1117 1114
1118 1115 xnfp->xnf_tx_ring_ref = INVALID_GRANT_REF;
1119 1116 xnfp->xnf_rx_ring_ref = INVALID_GRANT_REF;
1120 1117
1121 1118 /* set driver private pointer now */
1122 1119 ddi_set_driver_private(devinfo, xnfp);
1123 1120
1124 1121 if (!xnf_kstat_init(xnfp))
1125 1122 goto failure_3;
1126 1123
1127 1124 /*
1128 1125 * Allocate an event channel, add the interrupt handler and
1129 1126 * bind it to the event channel.
1130 1127 */
1131 1128 (void) xvdi_alloc_evtchn(devinfo);
1132 1129 xnfp->xnf_evtchn = xvdi_get_evtchn(devinfo);
1133 1130 #ifdef XPV_HVM_DRIVER
1134 1131 ec_bind_evtchn_to_handler(xnfp->xnf_evtchn, IPL_VIF, xnf_intr, xnfp);
1135 1132 #else
1136 1133 (void) ddi_add_intr(devinfo, 0, NULL, NULL, xnf_intr, (caddr_t)xnfp);
1137 1134 #endif
1138 1135
1139 1136 err = mac_register(macp, &xnfp->xnf_mh);
1140 1137 mac_free(macp);
1141 1138 macp = NULL;
1142 1139 if (err != 0)
1143 1140 goto failure_4;
1144 1141
1145 1142 if (xvdi_add_event_handler(devinfo, XS_OE_STATE, oe_state_change, NULL)
1146 1143 != DDI_SUCCESS)
1147 1144 goto failure_5;
1148 1145
1149 1146 #ifdef XPV_HVM_DRIVER
1150 1147 /*
1151 1148 * In the HVM case, this driver essentially replaces a driver for
1152 1149 * a 'real' PCI NIC. Without the "model" property set to
1153 1150 * "Ethernet controller", like the PCI code does, netbooting does
1154 1151 * not work correctly, as strplumb_get_netdev_path() will not find
1155 1152 * this interface.
1156 1153 */
1157 1154 (void) ndi_prop_update_string(DDI_DEV_T_NONE, devinfo, "model",
1158 1155 "Ethernet controller");
1159 1156 #endif
1160 1157
1161 1158 #ifdef XNF_DEBUG
1162 1159 if (xnf_debug_instance == NULL)
1163 1160 xnf_debug_instance = xnfp;
1164 1161 #endif
1165 1162
1166 1163 return (DDI_SUCCESS);
1167 1164
1168 1165 failure_5:
1169 1166 (void) mac_unregister(xnfp->xnf_mh);
1170 1167
1171 1168 failure_4:
1172 1169 #ifdef XPV_HVM_DRIVER
1173 1170 ec_unbind_evtchn(xnfp->xnf_evtchn);
1174 1171 xvdi_free_evtchn(devinfo);
1175 1172 #else
1176 1173 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie);
1177 1174 #endif
1178 1175 xnfp->xnf_evtchn = INVALID_EVTCHN;
1179 1176 kstat_delete(xnfp->xnf_kstat_aux);
1180 1177
1181 1178 failure_3:
1182 1179 xnf_release_dma_resources(xnfp);
1183 1180
1184 1181 failure_2:
1185 1182 kmem_cache_destroy(xnfp->xnf_tx_buf_cache);
1186 1183
1187 1184 failure_1:
1188 1185 kmem_cache_destroy(xnfp->xnf_buf_cache);
1189 1186
1190 1187 failure_0:
1191 1188 cv_destroy(&xnfp->xnf_cv_tx_slots);
1192 1189 cv_destroy(&xnfp->xnf_cv_multicast);
1193 1190 cv_destroy(&xnfp->xnf_cv_state);
1194 1191
1195 1192 mutex_destroy(&xnfp->xnf_gref_lock);
1196 1193 mutex_destroy(&xnfp->xnf_schedlock);
1197 1194 mutex_destroy(&xnfp->xnf_rxlock);
1198 1195 mutex_destroy(&xnfp->xnf_txlock);
1199 1196
1200 1197 failure:
1201 1198 kmem_free(xnfp, sizeof (*xnfp));
1202 1199 if (macp != NULL)
1203 1200 mac_free(macp);
1204 1201
1205 1202 return (DDI_FAILURE);
1206 1203 }
1207 1204
1208 1205 /* detach(9E) -- Detach a device from the system */
1209 1206 static int
1210 1207 xnf_detach(dev_info_t *devinfo, ddi_detach_cmd_t cmd)
1211 1208 {
1212 1209 xnf_t *xnfp; /* Our private device info */
1213 1210
1214 1211 #ifdef XNF_DEBUG
1215 1212 if (xnf_debug & XNF_DEBUG_DDI)
1216 1213 printf("xnf_detach(0x%p)\n", (void *)devinfo);
1217 1214 #endif
1218 1215
1219 1216 xnfp = ddi_get_driver_private(devinfo);
1220 1217
1221 1218 switch (cmd) {
1222 1219 case DDI_SUSPEND:
1223 1220 #ifdef XPV_HVM_DRIVER
1224 1221 ec_unbind_evtchn(xnfp->xnf_evtchn);
1225 1222 xvdi_free_evtchn(devinfo);
1226 1223 #else
1227 1224 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie);
1228 1225 #endif
1229 1226
1230 1227 xvdi_suspend(devinfo);
1231 1228
1232 1229 mutex_enter(&xnfp->xnf_rxlock);
1233 1230 mutex_enter(&xnfp->xnf_txlock);
1234 1231
1235 1232 xnfp->xnf_evtchn = INVALID_EVTCHN;
1236 1233 xnfp->xnf_connected = B_FALSE;
1237 1234 mutex_exit(&xnfp->xnf_txlock);
1238 1235 mutex_exit(&xnfp->xnf_rxlock);
1239 1236
1240 1237 /* claim link to be down after disconnect */
1241 1238 mac_link_update(xnfp->xnf_mh, LINK_STATE_DOWN);
1242 1239 return (DDI_SUCCESS);
1243 1240
1244 1241 case DDI_DETACH:
1245 1242 break;
1246 1243
1247 1244 default:
1248 1245 return (DDI_FAILURE);
1249 1246 }
1250 1247
1251 1248 if (xnfp->xnf_connected)
1252 1249 return (DDI_FAILURE);
1253 1250
1254 1251 /*
1255 1252 * Cannot detach if we have xnf_buf_t outstanding.
1256 1253 */
1257 1254 if (xnfp->xnf_stat_buf_allocated > 0)
1258 1255 return (DDI_FAILURE);
1259 1256
1260 1257 if (mac_unregister(xnfp->xnf_mh) != 0)
1261 1258 return (DDI_FAILURE);
1262 1259
1263 1260 kstat_delete(xnfp->xnf_kstat_aux);
1264 1261
1265 1262 /* Stop the receiver */
1266 1263 xnf_stop(xnfp);
1267 1264
1268 1265 xvdi_remove_event_handler(devinfo, XS_OE_STATE);
1269 1266
1270 1267 /* Remove the interrupt */
1271 1268 #ifdef XPV_HVM_DRIVER
1272 1269 ec_unbind_evtchn(xnfp->xnf_evtchn);
1273 1270 xvdi_free_evtchn(devinfo);
1274 1271 #else
1275 1272 ddi_remove_intr(devinfo, 0, xnfp->xnf_icookie);
1276 1273 #endif
1277 1274
1278 1275 /* Release any pending xmit mblks */
1279 1276 xnf_release_mblks(xnfp);
1280 1277
1281 1278 /* Release all DMA resources */
1282 1279 xnf_release_dma_resources(xnfp);
1283 1280
1284 1281 cv_destroy(&xnfp->xnf_cv_tx_slots);
1285 1282 cv_destroy(&xnfp->xnf_cv_multicast);
1286 1283 cv_destroy(&xnfp->xnf_cv_state);
1287 1284
1288 1285 kmem_cache_destroy(xnfp->xnf_tx_buf_cache);
1289 1286 kmem_cache_destroy(xnfp->xnf_buf_cache);
1290 1287
1291 1288 mutex_destroy(&xnfp->xnf_gref_lock);
1292 1289 mutex_destroy(&xnfp->xnf_schedlock);
1293 1290 mutex_destroy(&xnfp->xnf_rxlock);
1294 1291 mutex_destroy(&xnfp->xnf_txlock);
1295 1292
1296 1293 kmem_free(xnfp, sizeof (*xnfp));
1297 1294
1298 1295 return (DDI_SUCCESS);
1299 1296 }
1300 1297
1301 1298 /*
1302 1299 * xnf_set_mac_addr() -- set the physical network address on the board.
1303 1300 */
1304 1301 static int
1305 1302 xnf_set_mac_addr(void *arg, const uint8_t *macaddr)
1306 1303 {
1307 1304 _NOTE(ARGUNUSED(arg, macaddr));
1308 1305
1309 1306 /*
1310 1307 * We can't set our macaddr.
1311 1308 */
1312 1309 return (ENOTSUP);
1313 1310 }
1314 1311
1315 1312 /*
1316 1313 * xnf_set_multicast() -- set (enable) or disable a multicast address.
1317 1314 *
1318 1315 * Program the hardware to enable/disable the multicast address
1319 1316 * in "mca". Enable if "add" is true, disable if false.
1320 1317 */
1321 1318 static int
1322 1319 xnf_set_multicast(void *arg, boolean_t add, const uint8_t *mca)
1323 1320 {
1324 1321 xnf_t *xnfp = arg;
1325 1322 xnf_txbuf_t *txp;
1326 1323 int n_slots;
1327 1324 RING_IDX slot;
1328 1325 xnf_txid_t *tidp;
1329 1326 netif_tx_request_t *txrp;
1330 1327 struct netif_extra_info *erp;
1331 1328 boolean_t notify, result;
1332 1329
1333 1330 /*
1334 1331 * If the backend does not support multicast control then we
1335 1332 * must assume that the right packets will just arrive.
1336 1333 */
1337 1334 if (!xnfp->xnf_be_mcast_control)
1338 1335 return (0);
1339 1336
1340 1337 txp = kmem_cache_alloc(xnfp->xnf_tx_buf_cache, KM_SLEEP);
1341 1338
1342 1339 mutex_enter(&xnfp->xnf_txlock);
1343 1340
1344 1341 /*
1345 1342 * If we're not yet connected then claim success. This is
1346 1343 * acceptable because we refresh the entire set of multicast
1347 1344 * addresses when we get connected.
1348 1345 *
1349 1346 * We can't wait around here because the MAC layer expects
1350 1347 * this to be a non-blocking operation - waiting ends up
1351 1348 * causing a deadlock during resume.
1352 1349 */
1353 1350 if (!xnfp->xnf_connected) {
1354 1351 mutex_exit(&xnfp->xnf_txlock);
1355 1352 return (0);
1356 1353 }
1357 1354
1358 1355 /*
1359 1356 * 1. Acquire two slots in the ring.
1360 1357 * 2. Fill in the slots.
1361 1358 * 3. Request notification when the operation is done.
1362 1359 * 4. Kick the peer.
1363 1360 * 5. Wait for the response via xnf_tx_clean_ring().
1364 1361 */
1365 1362
1366 1363 n_slots = xnf_tx_slots_get(xnfp, 2, B_TRUE);
1367 1364 ASSERT(n_slots >= 2);
1368 1365
1369 1366 slot = xnfp->xnf_tx_ring.req_prod_pvt;
1370 1367 tidp = xnf_txid_get(xnfp);
1371 1368 VERIFY(tidp != NULL);
1372 1369
1373 1370 txp->tx_type = TX_MCAST_REQ;
1374 1371 txp->tx_slot = slot;
1375 1372
1376 1373 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot);
1377 1374 erp = (struct netif_extra_info *)
1378 1375 RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot + 1);
1379 1376
1380 1377 txrp->gref = 0;
1381 1378 txrp->size = 0;
1382 1379 txrp->offset = 0;
1383 1380 /* Set tx_txreq.id to appease xnf_tx_clean_ring(). */
1384 1381 txrp->id = txp->tx_txreq.id = tidp->id;
1385 1382 txrp->flags = NETTXF_extra_info;
1386 1383
1387 1384 erp->type = add ? XEN_NETIF_EXTRA_TYPE_MCAST_ADD :
1388 1385 XEN_NETIF_EXTRA_TYPE_MCAST_DEL;
1389 1386 bcopy((void *)mca, &erp->u.mcast.addr, ETHERADDRL);
1390 1387
1391 1388 tidp->txbuf = txp;
1392 1389
1393 1390 xnfp->xnf_tx_ring.req_prod_pvt = slot + 2;
1394 1391
1395 1392 mutex_enter(&xnfp->xnf_schedlock);
1396 1393 xnfp->xnf_pending_multicast++;
1397 1394 mutex_exit(&xnfp->xnf_schedlock);
1398 1395
1399 1396 /* LINTED: constant in conditional context */
1400 1397 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring,
1401 1398 notify);
1402 1399 if (notify)
1403 1400 ec_notify_via_evtchn(xnfp->xnf_evtchn);
1404 1401
1405 1402 while (txp->tx_type == TX_MCAST_REQ)
1406 1403 cv_wait(&xnfp->xnf_cv_multicast, &xnfp->xnf_txlock);
1407 1404
1408 1405 ASSERT3U(txp->tx_type, ==, TX_MCAST_RSP);
1409 1406
1410 1407 mutex_enter(&xnfp->xnf_schedlock);
1411 1408 xnfp->xnf_pending_multicast--;
1412 1409 mutex_exit(&xnfp->xnf_schedlock);
1413 1410
1414 1411 result = (txp->tx_status == NETIF_RSP_OKAY);
1415 1412
1416 1413 xnf_txid_put(xnfp, tidp);
1417 1414
1418 1415 mutex_exit(&xnfp->xnf_txlock);
1419 1416
1420 1417 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
1421 1418
1422 1419 return (result ? 0 : 1);
1423 1420 }
1424 1421
1425 1422 /*
1426 1423 * xnf_set_promiscuous() -- set or reset promiscuous mode on the board
1427 1424 *
1428 1425 * Program the hardware to enable/disable promiscuous mode.
1429 1426 */
1430 1427 static int
1431 1428 xnf_set_promiscuous(void *arg, boolean_t on)
1432 1429 {
1433 1430 _NOTE(ARGUNUSED(arg, on));
1434 1431
1435 1432 /*
1436 1433 * We can't really do this, but we pretend that we can in
1437 1434 * order that snoop will work.
1438 1435 */
1439 1436 return (0);
1440 1437 }
1441 1438
1442 1439 /*
1443 1440 * Clean buffers that we have responses for from the transmit ring.
1444 1441 */
1445 1442 static int
1446 1443 xnf_tx_clean_ring(xnf_t *xnfp)
1447 1444 {
1448 1445 boolean_t work_to_do;
1449 1446
1450 1447 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
1451 1448
1452 1449 loop:
1453 1450 while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_tx_ring)) {
1454 1451 RING_IDX cons, prod, i;
1455 1452
1456 1453 cons = xnfp->xnf_tx_ring.rsp_cons;
1457 1454 prod = xnfp->xnf_tx_ring.sring->rsp_prod;
1458 1455 membar_consumer();
1459 1456 /*
1460 1457 * Clean tx requests from ring that we have responses
1461 1458 * for.
1462 1459 */
1463 1460 DTRACE_PROBE2(xnf_tx_clean_range, int, cons, int, prod);
1464 1461 for (i = cons; i != prod; i++) {
1465 1462 netif_tx_response_t *trp;
1466 1463 xnf_txid_t *tidp;
1467 1464 xnf_txbuf_t *txp;
1468 1465
1469 1466 trp = RING_GET_RESPONSE(&xnfp->xnf_tx_ring, i);
1470 1467 /*
1471 1468 * if this slot was occupied by netif_extra_info_t,
1472 1469 * then the response will be NETIF_RSP_NULL. In this
1473 1470 * case there are no resources to clean up.
1474 1471 */
1475 1472 if (trp->status == NETIF_RSP_NULL)
1476 1473 continue;
1477 1474
1478 1475 ASSERT(TX_ID_VALID(trp->id));
1479 1476
1480 1477 tidp = TX_ID_TO_TXID(xnfp, trp->id);
1481 1478 ASSERT3U(tidp->id, ==, trp->id);
1482 1479 ASSERT3U(tidp->next, ==, INVALID_TX_ID);
1483 1480
1484 1481 txp = tidp->txbuf;
1485 1482 ASSERT(txp != NULL);
1486 1483 ASSERT3U(txp->tx_txreq.id, ==, trp->id);
1487 1484
1488 1485 switch (txp->tx_type) {
1489 1486 case TX_DATA:
1490 1487 /*
1491 1488 * We must put the txid for each response we
1492 1489 * acknowledge to make sure that we never have
1493 1490 * more free slots than txids. Because of this
1494 1491 * we do it here instead of waiting for it to
1495 1492 * be done in xnf_data_txbuf_free_chain().
1496 1493 */
1497 1494 xnf_txid_put(xnfp, tidp);
1498 1495 txp->tx_txreq.id = INVALID_TX_ID;
1499 1496 ASSERT3S(txp->tx_head->tx_frags_to_ack, >, 0);
1500 1497 txp->tx_head->tx_frags_to_ack--;
1501 1498
1502 1499 /*
1503 1500 * We clean the whole chain once we got a
1504 1501 * response for each fragment.
1505 1502 */
1506 1503 if (txp->tx_head->tx_frags_to_ack == 0)
1507 1504 xnf_data_txbuf_free_chain(xnfp, txp);
1508 1505
1509 1506 break;
1510 1507
1511 1508 case TX_MCAST_REQ:
1512 1509 txp->tx_type = TX_MCAST_RSP;
1513 1510 txp->tx_status = trp->status;
1514 1511 cv_broadcast(&xnfp->xnf_cv_multicast);
1515 1512
1516 1513 break;
1517 1514
1518 1515 default:
1519 1516 cmn_err(CE_PANIC, "xnf_tx_clean_ring: "
1520 1517 "invalid xnf_txbuf_t type: %d",
1521 1518 txp->tx_type);
1522 1519 break;
1523 1520 }
1524 1521 }
1525 1522 /*
1526 1523 * Record the last response we dealt with so that we
1527 1524 * know where to start next time around.
1528 1525 */
1529 1526 xnfp->xnf_tx_ring.rsp_cons = prod;
1530 1527 membar_enter();
1531 1528 }
1532 1529
1533 1530 /* LINTED: constant in conditional context */
1534 1531 RING_FINAL_CHECK_FOR_RESPONSES(&xnfp->xnf_tx_ring, work_to_do);
1535 1532 if (work_to_do)
1536 1533 goto loop;
1537 1534
1538 1535 return (RING_FREE_REQUESTS(&xnfp->xnf_tx_ring));
1539 1536 }
1540 1537
1541 1538 /*
1542 1539 * Allocate and fill in a look-aside buffer for the packet `mp'. Used
1543 1540 * to ensure that the packet is physically contiguous and contained
1544 1541 * within a single page.
1545 1542 */
1546 1543 static xnf_buf_t *
1547 1544 xnf_tx_get_lookaside(xnf_t *xnfp, mblk_t *mp, size_t *plen)
1548 1545 {
1549 1546 xnf_buf_t *bd;
1550 1547 caddr_t bp;
1551 1548
1552 1549 bd = xnf_buf_get(xnfp, KM_SLEEP, B_TRUE);
1553 1550 if (bd == NULL)
1554 1551 return (NULL);
1555 1552
1556 1553 bp = bd->buf;
1557 1554 while (mp != NULL) {
1558 1555 size_t len = MBLKL(mp);
1559 1556
1560 1557 bcopy(mp->b_rptr, bp, len);
1561 1558 bp += len;
1562 1559
1563 1560 mp = mp->b_cont;
1564 1561 }
1565 1562
1566 1563 *plen = bp - bd->buf;
1567 1564 ASSERT3U(*plen, <=, PAGESIZE);
1568 1565
1569 1566 xnfp->xnf_stat_tx_lookaside++;
1570 1567
1571 1568 return (bd);
1572 1569 }
1573 1570
1574 1571 /*
1575 1572 * Insert the pseudo-header checksum into the packet.
1576 1573 * Assumes packet is IPv4, TCP/UDP since we only advertised support for
1577 1574 * HCKSUM_INET_FULL_V4.
1578 1575 */
1579 1576 int
1580 1577 xnf_pseudo_cksum(mblk_t *mp)
1581 1578 {
1582 1579 struct ether_header *ehp;
1583 1580 uint16_t sap, iplen, *stuff;
1584 1581 uint32_t cksum;
1585 1582 size_t len;
1586 1583 ipha_t *ipha;
1587 1584 ipaddr_t src, dst;
1588 1585 uchar_t *ptr;
1589 1586
1590 1587 ptr = mp->b_rptr;
1591 1588 len = MBLKL(mp);
1592 1589
1593 1590 /* Each header must fit completely in an mblk. */
1594 1591 ASSERT3U(len, >=, sizeof (*ehp));
1595 1592
1596 1593 ehp = (struct ether_header *)ptr;
1597 1594
1598 1595 if (ntohs(ehp->ether_type) == VLAN_TPID) {
1599 1596 struct ether_vlan_header *evhp;
1600 1597 ASSERT3U(len, >=, sizeof (*evhp));
1601 1598 evhp = (struct ether_vlan_header *)ptr;
1602 1599 sap = ntohs(evhp->ether_type);
1603 1600 ptr += sizeof (*evhp);
1604 1601 len -= sizeof (*evhp);
1605 1602 } else {
1606 1603 sap = ntohs(ehp->ether_type);
1607 1604 ptr += sizeof (*ehp);
1608 1605 len -= sizeof (*ehp);
1609 1606 }
1610 1607
1611 1608 ASSERT3U(sap, ==, ETHERTYPE_IP);
1612 1609
1613 1610 /*
1614 1611 * Ethernet and IP headers may be in different mblks.
1615 1612 */
1616 1613 ASSERT3P(ptr, <=, mp->b_wptr);
1617 1614 if (ptr == mp->b_wptr) {
1618 1615 mp = mp->b_cont;
1619 1616 ptr = mp->b_rptr;
1620 1617 len = MBLKL(mp);
1621 1618 }
1622 1619
1623 1620 ASSERT3U(len, >=, sizeof (ipha_t));
1624 1621 ipha = (ipha_t *)ptr;
1625 1622
1626 1623 /*
1627 1624 * We assume the IP header has no options. (This is enforced in
1628 1625 * ire_send_wire_v4() -- search for IXAF_NO_HW_CKSUM).
1629 1626 */
1630 1627 ASSERT3U(IPH_HDR_LENGTH(ipha), ==, IP_SIMPLE_HDR_LENGTH);
1631 1628 iplen = ntohs(ipha->ipha_length) - IP_SIMPLE_HDR_LENGTH;
1632 1629
1633 1630 ptr += IP_SIMPLE_HDR_LENGTH;
1634 1631 len -= IP_SIMPLE_HDR_LENGTH;
1635 1632
1636 1633 /*
1637 1634 * IP and L4 headers may be in different mblks.
1638 1635 */
1639 1636 ASSERT3P(ptr, <=, mp->b_wptr);
1640 1637 if (ptr == mp->b_wptr) {
1641 1638 mp = mp->b_cont;
1642 1639 ptr = mp->b_rptr;
1643 1640 len = MBLKL(mp);
1644 1641 }
1645 1642
1646 1643 switch (ipha->ipha_protocol) {
1647 1644 case IPPROTO_TCP:
1648 1645 ASSERT3U(len, >=, sizeof (tcph_t));
1649 1646 stuff = (uint16_t *)(ptr + TCP_CHECKSUM_OFFSET);
1650 1647 cksum = IP_TCP_CSUM_COMP;
1651 1648 break;
1652 1649 case IPPROTO_UDP:
1653 1650 ASSERT3U(len, >=, sizeof (struct udphdr));
1654 1651 stuff = (uint16_t *)(ptr + UDP_CHECKSUM_OFFSET);
1655 1652 cksum = IP_UDP_CSUM_COMP;
1656 1653 break;
1657 1654 default:
1658 1655 cmn_err(CE_WARN, "xnf_pseudo_cksum: unexpected protocol %d",
1659 1656 ipha->ipha_protocol);
1660 1657 return (EINVAL);
1661 1658 }
1662 1659
1663 1660 src = ipha->ipha_src;
1664 1661 dst = ipha->ipha_dst;
1665 1662
1666 1663 cksum += (dst >> 16) + (dst & 0xFFFF);
1667 1664 cksum += (src >> 16) + (src & 0xFFFF);
1668 1665 cksum += htons(iplen);
1669 1666
1670 1667 cksum = (cksum >> 16) + (cksum & 0xFFFF);
1671 1668 cksum = (cksum >> 16) + (cksum & 0xFFFF);
1672 1669
1673 1670 ASSERT(cksum <= 0xFFFF);
1674 1671
1675 1672 *stuff = (uint16_t)(cksum ? cksum : ~cksum);
1676 1673
1677 1674 return (0);
1678 1675 }
1679 1676
1680 1677 /*
1681 1678 * Push a packet into the transmit ring.
1682 1679 *
1683 1680 * Note: the format of a tx packet that spans multiple slots is similar to
1684 1681 * what is described in xnf_rx_one_packet().
1685 1682 */
1686 1683 static void
1687 1684 xnf_tx_push_packet(xnf_t *xnfp, xnf_txbuf_t *head)
1688 1685 {
1689 1686 int nslots = 0;
1690 1687 int extras = 0;
1691 1688 RING_IDX slot;
1692 1689 boolean_t notify;
1693 1690
1694 1691 ASSERT(MUTEX_HELD(&xnfp->xnf_txlock));
1695 1692 ASSERT(xnfp->xnf_running);
1696 1693
1697 1694 slot = xnfp->xnf_tx_ring.req_prod_pvt;
1698 1695
1699 1696 /*
1700 1697 * The caller has already checked that we have enough slots to proceed.
1701 1698 */
1702 1699 for (xnf_txbuf_t *txp = head; txp != NULL; txp = txp->tx_next) {
1703 1700 xnf_txid_t *tidp;
1704 1701 netif_tx_request_t *txrp;
1705 1702
1706 1703 tidp = xnf_txid_get(xnfp);
1707 1704 VERIFY(tidp != NULL);
1708 1705 txrp = RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot);
1709 1706
1710 1707 txp->tx_slot = slot;
1711 1708 txp->tx_txreq.id = tidp->id;
1712 1709 *txrp = txp->tx_txreq;
1713 1710
1714 1711 tidp->txbuf = txp;
1715 1712 slot++;
1716 1713 nslots++;
1717 1714
1718 1715 /*
1719 1716 * When present, LSO info is placed in a slot after the first
1720 1717 * data segment, and doesn't require a txid.
1721 1718 */
1722 1719 if (txp->tx_txreq.flags & NETTXF_extra_info) {
1723 1720 netif_extra_info_t *extra;
1724 1721 ASSERT3U(nslots, ==, 1);
1725 1722
1726 1723 extra = (netif_extra_info_t *)
1727 1724 RING_GET_REQUEST(&xnfp->xnf_tx_ring, slot);
1728 1725 *extra = txp->tx_extra;
1729 1726 slot++;
1730 1727 nslots++;
1731 1728 extras = 1;
1732 1729 }
1733 1730 }
1734 1731
1735 1732 ASSERT3U(nslots, <=, XEN_MAX_SLOTS_PER_TX);
1736 1733
1737 1734 /*
1738 1735 * Store the number of data fragments.
1739 1736 */
1740 1737 head->tx_frags_to_ack = nslots - extras;
1741 1738
1742 1739 xnfp->xnf_tx_ring.req_prod_pvt = slot;
1743 1740
1744 1741 /*
1745 1742 * Tell the peer that we sent something, if it cares.
1746 1743 */
1747 1744 /* LINTED: constant in conditional context */
1748 1745 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_tx_ring, notify);
1749 1746 if (notify)
1750 1747 ec_notify_via_evtchn(xnfp->xnf_evtchn);
1751 1748 }
1752 1749
1753 1750 static xnf_txbuf_t *
1754 1751 xnf_mblk_copy(xnf_t *xnfp, mblk_t *mp)
1755 1752 {
1756 1753 xnf_txbuf_t *txp = xnf_data_txbuf_alloc(xnfp);
1757 1754 size_t length;
1758 1755
1759 1756 txp->tx_bdesc = xnf_tx_get_lookaside(xnfp, mp, &length);
1760 1757 if (txp->tx_bdesc == NULL) {
1761 1758 xnf_data_txbuf_free(xnfp, txp);
1762 1759 return (NULL);
1763 1760 }
1764 1761 txp->tx_mfn = txp->tx_bdesc->buf_mfn;
1765 1762 txp->tx_txreq.gref = txp->tx_bdesc->grant_ref;
1766 1763 txp->tx_txreq.size = length;
1767 1764 txp->tx_txreq.offset = (uintptr_t)txp->tx_bdesc->buf & PAGEOFFSET;
1768 1765 txp->tx_txreq.flags = 0;
1769 1766
1770 1767 return (txp);
1771 1768 }
1772 1769
1773 1770 static xnf_txbuf_t *
1774 1771 xnf_mblk_map(xnf_t *xnfp, mblk_t *mp, int *countp)
1775 1772 {
1776 1773 xnf_txbuf_t *head = NULL;
1777 1774 xnf_txbuf_t *tail = NULL;
1778 1775 domid_t oeid;
1779 1776 int nsegs = 0;
1780 1777
1781 1778 oeid = xvdi_get_oeid(xnfp->xnf_devinfo);
1782 1779
1783 1780 for (mblk_t *ml = mp; ml != NULL; ml = ml->b_cont) {
1784 1781 ddi_dma_handle_t dma_handle;
1785 1782 ddi_dma_cookie_t dma_cookie;
1786 1783 uint_t ncookies;
1787 1784 xnf_txbuf_t *txp;
1788 1785
1789 1786 if (MBLKL(ml) == 0)
1790 1787 continue;
1791 1788
1792 1789 txp = xnf_data_txbuf_alloc(xnfp);
1793 1790
1794 1791 if (head == NULL) {
1795 1792 head = txp;
1796 1793 } else {
1797 1794 ASSERT(tail != NULL);
1798 1795 TXBUF_SETNEXT(tail, txp);
1799 1796 txp->tx_head = head;
1800 1797 }
1801 1798
1802 1799 /*
1803 1800 * The necessary segmentation rules (e.g. not crossing a page
1804 1801 * boundary) are enforced by the dma attributes of the handle.
1805 1802 */
1806 1803 dma_handle = txp->tx_dma_handle;
1807 1804 int ret = ddi_dma_addr_bind_handle(dma_handle,
1808 1805 NULL, (char *)ml->b_rptr, MBLKL(ml),
1809 1806 DDI_DMA_WRITE | DDI_DMA_STREAMING,
1810 1807 DDI_DMA_DONTWAIT, 0, &dma_cookie,
1811 1808 &ncookies);
1812 1809 if (ret != DDI_DMA_MAPPED) {
1813 1810 if (ret != DDI_DMA_NORESOURCES) {
1814 1811 dev_err(xnfp->xnf_devinfo, CE_WARN,
1815 1812 "ddi_dma_addr_bind_handle() failed "
1816 1813 "[dma_error=%d]", ret);
1817 1814 }
1818 1815 goto error;
1819 1816 }
1820 1817 txp->tx_handle_bound = B_TRUE;
1821 1818
1822 1819 ASSERT(ncookies > 0);
1823 1820 for (int i = 0; i < ncookies; i++) {
1824 1821 if (nsegs == XEN_MAX_TX_DATA_PAGES) {
1825 1822 dev_err(xnfp->xnf_devinfo, CE_WARN,
1826 1823 "xnf_dmamap_alloc() failed: "
1827 1824 "too many segments");
1828 1825 goto error;
1829 1826 }
1830 1827 if (i > 0) {
1831 1828 txp = xnf_data_txbuf_alloc(xnfp);
1832 1829 ASSERT(tail != NULL);
1833 1830 TXBUF_SETNEXT(tail, txp);
1834 1831 txp->tx_head = head;
1835 1832 }
1836 1833
1837 1834 txp->tx_mfn =
1838 1835 xnf_btop(pa_to_ma(dma_cookie.dmac_laddress));
1839 1836 txp->tx_txreq.gref = xnf_gref_get(xnfp);
1840 1837 if (txp->tx_txreq.gref == INVALID_GRANT_REF) {
1841 1838 dev_err(xnfp->xnf_devinfo, CE_WARN,
1842 1839 "xnf_dmamap_alloc() failed: "
1843 1840 "invalid grant ref");
1844 1841 goto error;
1845 1842 }
1846 1843 gnttab_grant_foreign_access_ref(txp->tx_txreq.gref,
1847 1844 oeid, txp->tx_mfn, 1);
1848 1845 txp->tx_txreq.offset =
1849 1846 dma_cookie.dmac_laddress & PAGEOFFSET;
1850 1847 txp->tx_txreq.size = dma_cookie.dmac_size;
1851 1848 txp->tx_txreq.flags = 0;
1852 1849
1853 1850 ddi_dma_nextcookie(dma_handle, &dma_cookie);
1854 1851 nsegs++;
1855 1852
1856 1853 if (tail != NULL)
1857 1854 tail->tx_txreq.flags = NETTXF_more_data;
1858 1855 tail = txp;
1859 1856 }
1860 1857 }
1861 1858
1862 1859 *countp = nsegs;
1863 1860 return (head);
1864 1861
1865 1862 error:
1866 1863 xnf_data_txbuf_free_chain(xnfp, head);
1867 1864 return (NULL);
1868 1865 }
1869 1866
1870 1867 static void
1871 1868 xnf_tx_setup_offload(xnf_t *xnfp, xnf_txbuf_t *head,
1872 1869 uint32_t cksum_flags, uint32_t lso_flags, uint32_t mss)
1873 1870 {
1874 1871 if (lso_flags != 0) {
1875 1872 ASSERT3U(lso_flags, ==, HW_LSO);
1876 1873 ASSERT3P(head->tx_bdesc, ==, NULL);
1877 1874
1878 1875 head->tx_txreq.flags |= NETTXF_extra_info;
1879 1876 netif_extra_info_t *extra = &head->tx_extra;
1880 1877 extra->type = XEN_NETIF_EXTRA_TYPE_GSO;
1881 1878 extra->flags = 0;
1882 1879 extra->u.gso.type = XEN_NETIF_GSO_TYPE_TCPV4;
1883 1880 extra->u.gso.size = mss;
1884 1881 extra->u.gso.features = 0;
1885 1882 extra->u.gso.pad = 0;
1886 1883 } else if (cksum_flags != 0) {
1887 1884 ASSERT3U(cksum_flags, ==, HCK_FULLCKSUM);
1888 1885 /*
1889 1886 * If the local protocol stack requests checksum
1890 1887 * offload we set the 'checksum blank' flag,
1891 1888 * indicating to the peer that we need the checksum
1892 1889 * calculated for us.
1893 1890 *
1894 1891 * We _don't_ set the validated flag, because we haven't
1895 1892 * validated that the data and the checksum match.
1896 1893 *
1897 1894 * Note: we already called xnf_pseudo_cksum() in
1898 1895 * xnf_send(), so we just set the txreq flag here.
1899 1896 */
1900 1897 head->tx_txreq.flags |= NETTXF_csum_blank;
1901 1898 xnfp->xnf_stat_tx_cksum_deferred++;
1902 1899 }
1903 1900 }
1904 1901
1905 1902 /*
1906 1903 * Send packet mp. Called by the MAC framework.
1907 1904 */
1908 1905 static mblk_t *
1909 1906 xnf_send(void *arg, mblk_t *mp)
1910 1907 {
1911 1908 xnf_t *xnfp = arg;
1912 1909 xnf_txbuf_t *head;
1913 1910 mblk_t *ml;
1914 1911 int length;
1915 1912 int pages, chunks, slots, slots_free;
1916 1913 uint32_t cksum_flags, lso_flags, mss;
1917 1914 boolean_t pulledup = B_FALSE;
1918 1915 boolean_t force_copy = B_FALSE;
1919 1916
1920 1917 ASSERT3P(mp->b_next, ==, NULL);
1921 1918
1922 1919 mutex_enter(&xnfp->xnf_txlock);
1923 1920
1924 1921 /*
1925 1922 * Wait until we are connected to the backend.
1926 1923 */
1927 1924 while (!xnfp->xnf_connected)
1928 1925 cv_wait(&xnfp->xnf_cv_state, &xnfp->xnf_txlock);
1929 1926
1930 1927 /*
1931 1928 * To simplify logic and be in sync with the rescheduling mechanism,
1932 1929 * we require the maximum amount of slots that could be used by a
1933 1930 * transaction to be free before proceeding. The only downside of doing
1934 1931 * this is that it slightly reduces the effective size of the ring.
1935 1932 */
1936 1933 slots_free = xnf_tx_slots_get(xnfp, XEN_MAX_SLOTS_PER_TX, B_FALSE);
1937 1934 if (slots_free < XEN_MAX_SLOTS_PER_TX) {
1938 1935 /*
1939 1936 * We need to ask for a re-schedule later as the ring is full.
1940 1937 */
1941 1938 mutex_enter(&xnfp->xnf_schedlock);
1942 1939 xnfp->xnf_need_sched = B_TRUE;
1943 1940 mutex_exit(&xnfp->xnf_schedlock);
1944 1941
1945 1942 xnfp->xnf_stat_tx_defer++;
1946 1943 mutex_exit(&xnfp->xnf_txlock);
1947 1944 return (mp);
1948 1945 }
1949 1946
1950 1947 /*
1951 1948 * Get hw offload parameters.
1952 1949 * This must be done before pulling up the mp as those parameters
1953 1950 * are not copied over.
1954 1951 */
1955 1952 mac_hcksum_get(mp, NULL, NULL, NULL, NULL, &cksum_flags);
1956 1953 mac_lso_get(mp, &mss, &lso_flags);
1957 1954
1958 1955 /*
1959 1956 * XXX: fix MAC framework so that we can advertise support for
1960 1957 * partial checksum for IPv4 only. This way we won't need to calculate
1961 1958 * the pseudo header checksum ourselves.
1962 1959 */
1963 1960 if (cksum_flags != 0) {
1964 1961 ASSERT3U(cksum_flags, ==, HCK_FULLCKSUM);
1965 1962 (void) xnf_pseudo_cksum(mp);
1966 1963 }
1967 1964
1968 1965 pulledup:
1969 1966 for (ml = mp, pages = 0, chunks = 0, length = 0; ml != NULL;
1970 1967 ml = ml->b_cont, chunks++) {
1971 1968 pages += xnf_mblk_pages(ml);
1972 1969 length += MBLKL(ml);
1973 1970 }
1974 1971 DTRACE_PROBE3(packet, int, length, int, chunks, int, pages);
1975 1972 DTRACE_PROBE3(lso, int, length, uint32_t, lso_flags, uint32_t, mss);
1976 1973
1977 1974 /*
1978 1975 * If the ethernet header crosses a page boundary the packet
1979 1976 * will be dropped by the backend. In practice it seems like
1980 1977 * this happens fairly rarely so we'll do nothing unless the
1981 1978 * packet is small enough to fit in a look-aside buffer.
1982 1979 */
1983 1980 if (((uintptr_t)mp->b_rptr & PAGEOFFSET) +
1984 1981 sizeof (struct ether_header) > PAGESIZE) {
1985 1982 xnfp->xnf_stat_tx_eth_hdr_split++;
1986 1983 if (length <= PAGESIZE)
1987 1984 force_copy = B_TRUE;
1988 1985 }
1989 1986
1990 1987 if (force_copy || (pages > 1 && !xnfp->xnf_be_tx_sg)) {
1991 1988 /*
1992 1989 * If the packet spans several pages and scatter-gather is not
1993 1990 * supported then use a look-aside buffer.
1994 1991 */
1995 1992 ASSERT3U(length, <=, PAGESIZE);
1996 1993 head = xnf_mblk_copy(xnfp, mp);
1997 1994 if (head == NULL) {
1998 1995 dev_err(xnfp->xnf_devinfo, CE_WARN,
1999 1996 "xnf_mblk_copy() failed");
2000 1997 goto drop;
2001 1998 }
2002 1999 } else {
2003 2000 /*
2004 2001 * There's a limit for how many pages can be passed to the
2005 2002 * backend. If we pass that limit, the packet will be dropped
2006 2003 * and some backend implementations (e.g. Linux) could even
2007 2004 * offline the interface.
2008 2005 */
2009 2006 if (pages > XEN_MAX_TX_DATA_PAGES) {
2010 2007 if (pulledup) {
2011 2008 dev_err(xnfp->xnf_devinfo, CE_WARN,
2012 2009 "too many pages, even after pullup: %d.",
2013 2010 pages);
2014 2011 goto drop;
2015 2012 }
2016 2013
2017 2014 /*
2018 2015 * Defragment packet if it spans too many pages.
2019 2016 */
2020 2017 mblk_t *newmp = msgpullup(mp, -1);
2021 2018 freemsg(mp);
2022 2019 mp = newmp;
2023 2020 xnfp->xnf_stat_tx_pullup++;
2024 2021 pulledup = B_TRUE;
2025 2022 goto pulledup;
2026 2023 }
2027 2024
2028 2025 head = xnf_mblk_map(xnfp, mp, &slots);
2029 2026 if (head == NULL)
2030 2027 goto drop;
2031 2028
2032 2029 IMPLY(slots > 1, xnfp->xnf_be_tx_sg);
2033 2030 }
2034 2031
2035 2032 /*
2036 2033 * Set tx_mp so that mblk is freed when the txbuf chain is freed.
2037 2034 */
2038 2035 head->tx_mp = mp;
2039 2036
2040 2037 xnf_tx_setup_offload(xnfp, head, cksum_flags, lso_flags, mss);
2041 2038
2042 2039 /*
2043 2040 * The first request must store the total length of the packet.
2044 2041 */
2045 2042 head->tx_txreq.size = length;
2046 2043
2047 2044 /*
2048 2045 * Push the packet we have prepared into the ring.
2049 2046 */
2050 2047 xnf_tx_push_packet(xnfp, head);
2051 2048 xnfp->xnf_stat_opackets++;
2052 2049 xnfp->xnf_stat_obytes += length;
2053 2050
2054 2051 mutex_exit(&xnfp->xnf_txlock);
2055 2052 return (NULL);
2056 2053
2057 2054 drop:
2058 2055 freemsg(mp);
2059 2056 xnfp->xnf_stat_tx_drop++;
2060 2057 mutex_exit(&xnfp->xnf_txlock);
2061 2058 return (NULL);
2062 2059 }
2063 2060
2064 2061 /*
2065 2062 * Notification of RX packets. Currently no TX-complete interrupt is
2066 2063 * used, as we clean the TX ring lazily.
2067 2064 */
2068 2065 static uint_t
2069 2066 xnf_intr(caddr_t arg)
2070 2067 {
2071 2068 xnf_t *xnfp = (xnf_t *)arg;
2072 2069 mblk_t *mp;
2073 2070 boolean_t need_sched, clean_ring;
2074 2071
2075 2072 mutex_enter(&xnfp->xnf_rxlock);
2076 2073
2077 2074 /*
2078 2075 * Interrupts before we are connected are spurious.
2079 2076 */
2080 2077 if (!xnfp->xnf_connected) {
2081 2078 mutex_exit(&xnfp->xnf_rxlock);
2082 2079 xnfp->xnf_stat_unclaimed_interrupts++;
2083 2080 return (DDI_INTR_UNCLAIMED);
2084 2081 }
2085 2082
2086 2083 /*
2087 2084 * Receive side processing.
2088 2085 */
2089 2086 do {
2090 2087 /*
2091 2088 * Collect buffers from the ring.
2092 2089 */
2093 2090 xnf_rx_collect(xnfp);
2094 2091
2095 2092 /*
2096 2093 * Interrupt me when the next receive buffer is consumed.
2097 2094 */
2098 2095 xnfp->xnf_rx_ring.sring->rsp_event =
2099 2096 xnfp->xnf_rx_ring.rsp_cons + 1;
2100 2097 xen_mb();
2101 2098
2102 2099 } while (RING_HAS_UNCONSUMED_RESPONSES(&xnfp->xnf_rx_ring));
2103 2100
2104 2101 if (xnfp->xnf_rx_new_buffers_posted) {
2105 2102 boolean_t notify;
2106 2103
2107 2104 /*
2108 2105 * Indicate to the peer that we have re-filled the
2109 2106 * receive ring, if it cares.
2110 2107 */
2111 2108 /* LINTED: constant in conditional context */
2112 2109 RING_PUSH_REQUESTS_AND_CHECK_NOTIFY(&xnfp->xnf_rx_ring, notify);
2113 2110 if (notify)
2114 2111 ec_notify_via_evtchn(xnfp->xnf_evtchn);
2115 2112 xnfp->xnf_rx_new_buffers_posted = B_FALSE;
2116 2113 }
2117 2114
2118 2115 mp = xnfp->xnf_rx_head;
2119 2116 xnfp->xnf_rx_head = xnfp->xnf_rx_tail = NULL;
2120 2117
2121 2118 xnfp->xnf_stat_interrupts++;
2122 2119 mutex_exit(&xnfp->xnf_rxlock);
2123 2120
2124 2121 if (mp != NULL)
2125 2122 mac_rx(xnfp->xnf_mh, NULL, mp);
2126 2123
2127 2124 /*
2128 2125 * Transmit side processing.
2129 2126 *
2130 2127 * If a previous transmit attempt failed or we have pending
2131 2128 * multicast requests, clean the ring.
2132 2129 *
2133 2130 * If we previously stalled transmission and cleaning produces
2134 2131 * some free slots, tell upstream to attempt sending again.
2135 2132 *
2136 2133 * The odd style is to avoid acquiring xnf_txlock unless we
2137 2134 * will actually look inside the tx machinery.
2138 2135 */
2139 2136 mutex_enter(&xnfp->xnf_schedlock);
2140 2137 need_sched = xnfp->xnf_need_sched;
2141 2138 clean_ring = need_sched || (xnfp->xnf_pending_multicast > 0);
2142 2139 mutex_exit(&xnfp->xnf_schedlock);
2143 2140
2144 2141 if (clean_ring) {
2145 2142 int free_slots;
2146 2143
2147 2144 mutex_enter(&xnfp->xnf_txlock);
2148 2145 free_slots = xnf_tx_slots_get(xnfp, 0, B_FALSE);
2149 2146
2150 2147 if (need_sched && (free_slots >= XEN_MAX_SLOTS_PER_TX)) {
2151 2148 mutex_enter(&xnfp->xnf_schedlock);
2152 2149 xnfp->xnf_need_sched = B_FALSE;
2153 2150 mutex_exit(&xnfp->xnf_schedlock);
2154 2151
2155 2152 mac_tx_update(xnfp->xnf_mh);
2156 2153 }
2157 2154 mutex_exit(&xnfp->xnf_txlock);
2158 2155 }
2159 2156
2160 2157 return (DDI_INTR_CLAIMED);
2161 2158 }
2162 2159
2163 2160 /*
2164 2161 * xnf_start() -- start the board receiving and enable interrupts.
2165 2162 */
2166 2163 static int
2167 2164 xnf_start(void *arg)
2168 2165 {
2169 2166 xnf_t *xnfp = arg;
2170 2167
2171 2168 #ifdef XNF_DEBUG
2172 2169 if (xnf_debug & XNF_DEBUG_TRACE)
2173 2170 printf("xnf%d start(0x%p)\n",
2174 2171 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp);
2175 2172 #endif
2176 2173
2177 2174 mutex_enter(&xnfp->xnf_rxlock);
2178 2175 mutex_enter(&xnfp->xnf_txlock);
2179 2176
2180 2177 /* Accept packets from above. */
2181 2178 xnfp->xnf_running = B_TRUE;
2182 2179
2183 2180 mutex_exit(&xnfp->xnf_txlock);
2184 2181 mutex_exit(&xnfp->xnf_rxlock);
2185 2182
2186 2183 return (0);
2187 2184 }
2188 2185
2189 2186 /* xnf_stop() - disable hardware */
2190 2187 static void
2191 2188 xnf_stop(void *arg)
2192 2189 {
2193 2190 xnf_t *xnfp = arg;
2194 2191
2195 2192 #ifdef XNF_DEBUG
2196 2193 if (xnf_debug & XNF_DEBUG_TRACE)
2197 2194 printf("xnf%d stop(0x%p)\n",
2198 2195 ddi_get_instance(xnfp->xnf_devinfo), (void *)xnfp);
2199 2196 #endif
2200 2197
2201 2198 mutex_enter(&xnfp->xnf_rxlock);
2202 2199 mutex_enter(&xnfp->xnf_txlock);
2203 2200
2204 2201 xnfp->xnf_running = B_FALSE;
2205 2202
2206 2203 mutex_exit(&xnfp->xnf_txlock);
2207 2204 mutex_exit(&xnfp->xnf_rxlock);
2208 2205 }
2209 2206
2210 2207 /*
2211 2208 * Hang buffer `bdesc' on the RX ring.
2212 2209 */
2213 2210 static void
2214 2211 xnf_rxbuf_hang(xnf_t *xnfp, xnf_buf_t *bdesc)
2215 2212 {
2216 2213 netif_rx_request_t *reqp;
2217 2214 RING_IDX hang_ix;
2218 2215
2219 2216 ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock));
2220 2217
2221 2218 reqp = RING_GET_REQUEST(&xnfp->xnf_rx_ring,
2222 2219 xnfp->xnf_rx_ring.req_prod_pvt);
2223 2220 hang_ix = (RING_IDX) (reqp - RING_GET_REQUEST(&xnfp->xnf_rx_ring, 0));
2224 2221 ASSERT(xnfp->xnf_rx_pkt_info[hang_ix] == NULL);
2225 2222
2226 2223 reqp->id = bdesc->id = hang_ix;
2227 2224 reqp->gref = bdesc->grant_ref;
2228 2225
2229 2226 xnfp->xnf_rx_pkt_info[hang_ix] = bdesc;
2230 2227 xnfp->xnf_rx_ring.req_prod_pvt++;
2231 2228
2232 2229 xnfp->xnf_rx_new_buffers_posted = B_TRUE;
2233 2230 }
2234 2231
2235 2232 /*
2236 2233 * Receive an entire packet from the ring, starting from slot *consp.
2237 2234 * prod indicates the slot of the latest response.
2238 2235 * On return, *consp will point to the head of the next packet.
2239 2236 *
2240 2237 * Note: If slot prod was reached before we could gather a full packet, we will
2241 2238 * drop the partial packet; this would most likely indicate a bug in either
2242 2239 * the front-end or the back-end driver.
2243 2240 *
2244 2241 * An rx packet can consist of several fragments and thus span multiple slots.
2245 2242 * Each fragment can contain up to 4k of data.
2246 2243 *
2247 2244 * A typical 9000 MTU packet with look like this:
2248 2245 * +------+---------------------+-------------------+-----------------------+
2249 2246 * | SLOT | TYPE | CONTENTS | FLAGS |
2250 2247 * +------+---------------------+-------------------+-----------------------+
2251 2248 * | 1 | netif_rx_response_t | 1st data fragment | more_data |
2252 2249 * +------+---------------------+-------------------+-----------------------+
2253 2250 * | 2 | netif_rx_response_t | 2nd data fragment | more_data |
2254 2251 * +------+---------------------+-------------------+-----------------------+
2255 2252 * | 3 | netif_rx_response_t | 3rd data fragment | [none] |
2256 2253 * +------+---------------------+-------------------+-----------------------+
2257 2254 *
2258 2255 * Fragments are chained by setting NETRXF_more_data in the previous
2259 2256 * response's flags. If there are additional flags, such as
2260 2257 * NETRXF_data_validated or NETRXF_extra_info, those should be set on the
2261 2258 * first fragment.
2262 2259 *
2263 2260 * Sometimes extra info can be present. If so, it will follow the first
2264 2261 * fragment, and NETRXF_extra_info flag will be set on the first response.
2265 2262 * If LRO is set on a packet, it will be stored in the extra info. Conforming
2266 2263 * to the spec, extra info can also be chained, but must all be present right
2267 2264 * after the first fragment.
2268 2265 *
2269 2266 * Example of a packet with 2 extra infos:
2270 2267 * +------+---------------------+-------------------+-----------------------+
2271 2268 * | SLOT | TYPE | CONTENTS | FLAGS |
2272 2269 * +------+---------------------+-------------------+-----------------------+
2273 2270 * | 1 | netif_rx_response_t | 1st data fragment | extra_info, more_data |
2274 2271 * +------+---------------------+-------------------+-----------------------+
2275 2272 * | 2 | netif_extra_info_t | 1st extra info | EXTRA_FLAG_MORE |
2276 2273 * +------+---------------------+-------------------+-----------------------+
2277 2274 * | 3 | netif_extra_info_t | 2nd extra info | [none] |
2278 2275 * +------+---------------------+-------------------+-----------------------+
2279 2276 * | 4 | netif_rx_response_t | 2nd data fragment | more_data |
2280 2277 * +------+---------------------+-------------------+-----------------------+
2281 2278 * | 5 | netif_rx_response_t | 3rd data fragment | more_data |
2282 2279 * +------+---------------------+-------------------+-----------------------+
2283 2280 * | 6 | netif_rx_response_t | 4th data fragment | [none] |
2284 2281 * +------+---------------------+-------------------+-----------------------+
2285 2282 *
2286 2283 * In practice, the only extra we expect is for LRO, but only if we advertise
2287 2284 * that we support it to the backend (xnf_enable_lro == TRUE).
2288 2285 */
2289 2286 static int
2290 2287 xnf_rx_one_packet(xnf_t *xnfp, RING_IDX prod, RING_IDX *consp, mblk_t **mpp)
2291 2288 {
2292 2289 mblk_t *head = NULL;
2293 2290 mblk_t *tail = NULL;
2294 2291 mblk_t *mp;
2295 2292 int error = 0;
2296 2293 RING_IDX cons = *consp;
2297 2294 netif_extra_info_t lro;
2298 2295 boolean_t is_lro = B_FALSE;
2299 2296 boolean_t is_extra = B_FALSE;
2300 2297
2301 2298 netif_rx_response_t rsp = *RING_GET_RESPONSE(&xnfp->xnf_rx_ring, cons);
2302 2299
2303 2300 boolean_t hwcsum = (rsp.flags & NETRXF_data_validated) != 0;
2304 2301 boolean_t more_data = (rsp.flags & NETRXF_more_data) != 0;
2305 2302 boolean_t more_extra = (rsp.flags & NETRXF_extra_info) != 0;
2306 2303
2307 2304 IMPLY(more_data, xnf_enable_rx_sg);
2308 2305
2309 2306 while (cons != prod) {
2310 2307 xnf_buf_t *bdesc;
2311 2308 int len, off;
2312 2309 int rxidx = cons & (NET_RX_RING_SIZE - 1);
2313 2310
2314 2311 bdesc = xnfp->xnf_rx_pkt_info[rxidx];
2315 2312 xnfp->xnf_rx_pkt_info[rxidx] = NULL;
2316 2313
2317 2314 if (is_extra) {
2318 2315 netif_extra_info_t *extra = (netif_extra_info_t *)&rsp;
2319 2316 /*
2320 2317 * The only extra we expect is for LRO, and it should
2321 2318 * only be present once.
2322 2319 */
2323 2320 if (extra->type == XEN_NETIF_EXTRA_TYPE_GSO &&
2324 2321 !is_lro) {
2325 2322 ASSERT(xnf_enable_lro);
2326 2323 lro = *extra;
2327 2324 is_lro = B_TRUE;
2328 2325 DTRACE_PROBE1(lro, netif_extra_info_t *, &lro);
2329 2326 } else {
2330 2327 dev_err(xnfp->xnf_devinfo, CE_WARN, "rx packet "
2331 2328 "contains unexpected extra info of type %d",
2332 2329 extra->type);
2333 2330 error = EINVAL;
2334 2331 }
2335 2332 more_extra =
2336 2333 (extra->flags & XEN_NETIF_EXTRA_FLAG_MORE) != 0;
2337 2334
2338 2335 goto hang_buf;
2339 2336 }
2340 2337
2341 2338 ASSERT3U(bdesc->id, ==, rsp.id);
2342 2339
2343 2340 /*
2344 2341 * status stores packet length when >= 0, or errors when < 0.
2345 2342 */
2346 2343 len = rsp.status;
2347 2344 off = rsp.offset;
2348 2345 more_data = (rsp.flags & NETRXF_more_data) != 0;
2349 2346
2350 2347 /*
2351 2348 * sanity checks.
2352 2349 */
2353 2350 if (!xnfp->xnf_running) {
2354 2351 error = EBUSY;
2355 2352 } else if (len <= 0) {
2356 2353 xnfp->xnf_stat_errrx++;
2357 2354
2358 2355 switch (len) {
2359 2356 case 0:
2360 2357 xnfp->xnf_stat_runt++;
2361 2358 break;
2362 2359 case NETIF_RSP_ERROR:
2363 2360 xnfp->xnf_stat_mac_rcv_error++;
2364 2361 break;
2365 2362 case NETIF_RSP_DROPPED:
2366 2363 xnfp->xnf_stat_norxbuf++;
2367 2364 break;
2368 2365 }
2369 2366 error = EINVAL;
2370 2367 } else if (bdesc->grant_ref == INVALID_GRANT_REF) {
2371 2368 dev_err(xnfp->xnf_devinfo, CE_WARN,
2372 2369 "Bad rx grant reference, rsp id %d", rsp.id);
2373 2370 error = EINVAL;
2374 2371 } else if ((off + len) > PAGESIZE) {
2375 2372 dev_err(xnfp->xnf_devinfo, CE_WARN, "Rx packet crosses "
2376 2373 "page boundary (offset %d, length %d)", off, len);
2377 2374 error = EINVAL;
2378 2375 }
2379 2376
2380 2377 if (error != 0) {
2381 2378 /*
2382 2379 * If an error has been detected, we do not attempt
2383 2380 * to read the data but we still need to replace
2384 2381 * the rx bufs.
2385 2382 */
2386 2383 goto hang_buf;
2387 2384 }
2388 2385
2389 2386 xnf_buf_t *nbuf = NULL;
2390 2387
2391 2388 /*
2392 2389 * If the packet is below a pre-determined size we will
2393 2390 * copy data out of the buf rather than replace it.
2394 2391 */
2395 2392 if (len > xnf_rx_copy_limit)
2396 2393 nbuf = xnf_buf_get(xnfp, KM_NOSLEEP, B_FALSE);
2397 2394
2398 2395 if (nbuf != NULL) {
2399 2396 mp = desballoc((unsigned char *)bdesc->buf,
2400 2397 bdesc->len, 0, &bdesc->free_rtn);
2401 2398
2402 2399 if (mp == NULL) {
2403 2400 xnfp->xnf_stat_rx_desballoc_fail++;
2404 2401 xnfp->xnf_stat_norxbuf++;
2405 2402 error = ENOMEM;
2406 2403 /*
2407 2404 * we free the buf we just allocated as we
2408 2405 * will re-hang the old buf.
2409 2406 */
2410 2407 xnf_buf_put(xnfp, nbuf, B_FALSE);
2411 2408 goto hang_buf;
2412 2409 }
2413 2410
2414 2411 mp->b_rptr = mp->b_rptr + off;
2415 2412 mp->b_wptr = mp->b_rptr + len;
2416 2413
2417 2414 /*
2418 2415 * Release the grant as the backend doesn't need to
2419 2416 * access this buffer anymore and grants are scarce.
2420 2417 */
2421 2418 (void) gnttab_end_foreign_access_ref(bdesc->grant_ref,
2422 2419 0);
2423 2420 xnf_gref_put(xnfp, bdesc->grant_ref);
2424 2421 bdesc->grant_ref = INVALID_GRANT_REF;
2425 2422
2426 2423 bdesc = nbuf;
2427 2424 } else {
2428 2425 /*
2429 2426 * We failed to allocate a new buf or decided to reuse
2430 2427 * the old one. In either case we copy the data off it
2431 2428 * and put it back into the ring.
2432 2429 */
2433 2430 mp = allocb(len, 0);
2434 2431 if (mp == NULL) {
2435 2432 xnfp->xnf_stat_rx_allocb_fail++;
2436 2433 xnfp->xnf_stat_norxbuf++;
2437 2434 error = ENOMEM;
2438 2435 goto hang_buf;
2439 2436 }
2440 2437 bcopy(bdesc->buf + off, mp->b_wptr, len);
2441 2438 mp->b_wptr += len;
2442 2439 }
2443 2440
2444 2441 if (head == NULL)
2445 2442 head = mp;
2446 2443 else
2447 2444 tail->b_cont = mp;
2448 2445 tail = mp;
2449 2446
2450 2447 hang_buf:
2451 2448 /*
2452 2449 * No matter what happens, for each response we need to hang
2453 2450 * a new buf on the rx ring. Put either the old one, or a new
2454 2451 * one if the old one is borrowed by the kernel via desballoc().
2455 2452 */
2456 2453 xnf_rxbuf_hang(xnfp, bdesc);
2457 2454 cons++;
2458 2455
2459 2456 /* next response is an extra */
2460 2457 is_extra = more_extra;
2461 2458
2462 2459 if (!more_data && !more_extra)
2463 2460 break;
2464 2461
2465 2462 /*
2466 2463 * Note that since requests and responses are union'd on the
2467 2464 * same ring, we copy the response to a local variable instead
2468 2465 * of keeping a pointer. Otherwise xnf_rxbuf_hang() would have
2469 2466 * overwritten contents of rsp.
2470 2467 */
2471 2468 rsp = *RING_GET_RESPONSE(&xnfp->xnf_rx_ring, cons);
2472 2469 }
2473 2470
2474 2471 /*
2475 2472 * Check that we do not get stuck in a loop.
2476 2473 */
2477 2474 ASSERT3U(*consp, !=, cons);
2478 2475 *consp = cons;
2479 2476
2480 2477 /*
2481 2478 * We ran out of responses but the flags indicate there is more data.
2482 2479 */
2483 2480 if (more_data) {
2484 2481 dev_err(xnfp->xnf_devinfo, CE_WARN, "rx: need more fragments.");
2485 2482 error = EINVAL;
2486 2483 }
2487 2484 if (more_extra) {
2488 2485 dev_err(xnfp->xnf_devinfo, CE_WARN, "rx: need more fragments "
2489 2486 "(extras).");
2490 2487 error = EINVAL;
2491 2488 }
2492 2489
2493 2490 /*
2494 2491 * An error means the packet must be dropped. If we have already formed
2495 2492 * a partial packet, then discard it.
2496 2493 */
2497 2494 if (error != 0) {
2498 2495 if (head != NULL)
2499 2496 freemsg(head);
2500 2497 xnfp->xnf_stat_rx_drop++;
2501 2498 return (error);
2502 2499 }
2503 2500
2504 2501 ASSERT(head != NULL);
2505 2502
2506 2503 if (hwcsum) {
2507 2504 /*
2508 2505 * If the peer says that the data has been validated then we
2509 2506 * declare that the full checksum has been verified.
2510 2507 *
2511 2508 * We don't look at the "checksum blank" flag, and hence could
2512 2509 * have a packet here that we are asserting is good with
2513 2510 * a blank checksum.
2514 2511 */
2515 2512 mac_hcksum_set(head, 0, 0, 0, 0, HCK_FULLCKSUM_OK);
2516 2513 xnfp->xnf_stat_rx_cksum_no_need++;
2517 2514 }
2518 2515
2519 2516 /* XXX: set lro info for packet once LRO is supported in OS. */
2520 2517
2521 2518 *mpp = head;
2522 2519
2523 2520 return (0);
2524 2521 }
2525 2522
2526 2523 /*
2527 2524 * Collect packets from the RX ring, storing them in `xnfp' for later use.
2528 2525 */
2529 2526 static void
2530 2527 xnf_rx_collect(xnf_t *xnfp)
2531 2528 {
2532 2529 RING_IDX prod;
2533 2530
2534 2531 ASSERT(MUTEX_HELD(&xnfp->xnf_rxlock));
2535 2532
2536 2533 prod = xnfp->xnf_rx_ring.sring->rsp_prod;
2537 2534 /*
2538 2535 * Ensure we see queued responses up to 'prod'.
2539 2536 */
2540 2537 membar_consumer();
2541 2538
2542 2539 while (xnfp->xnf_rx_ring.rsp_cons != prod) {
2543 2540 mblk_t *mp;
2544 2541
2545 2542 /*
2546 2543 * Collect a packet.
2547 2544 * rsp_cons is updated inside xnf_rx_one_packet().
2548 2545 */
2549 2546 int error = xnf_rx_one_packet(xnfp, prod,
2550 2547 &xnfp->xnf_rx_ring.rsp_cons, &mp);
2551 2548 if (error == 0) {
2552 2549 xnfp->xnf_stat_ipackets++;
2553 2550 xnfp->xnf_stat_rbytes += xmsgsize(mp);
2554 2551
2555 2552 /*
2556 2553 * Append the mblk to the rx list.
2557 2554 */
2558 2555 if (xnfp->xnf_rx_head == NULL) {
2559 2556 ASSERT3P(xnfp->xnf_rx_tail, ==, NULL);
2560 2557 xnfp->xnf_rx_head = mp;
2561 2558 } else {
2562 2559 ASSERT(xnfp->xnf_rx_tail != NULL);
2563 2560 xnfp->xnf_rx_tail->b_next = mp;
2564 2561 }
2565 2562 xnfp->xnf_rx_tail = mp;
2566 2563 }
2567 2564 }
2568 2565 }
2569 2566
2570 2567 /*
2571 2568 * xnf_alloc_dma_resources() -- initialize the drivers structures
2572 2569 */
2573 2570 static int
2574 2571 xnf_alloc_dma_resources(xnf_t *xnfp)
2575 2572 {
2576 2573 dev_info_t *devinfo = xnfp->xnf_devinfo;
2577 2574 size_t len;
2578 2575 ddi_dma_cookie_t dma_cookie;
2579 2576 uint_t ncookies;
2580 2577 int rc;
2581 2578 caddr_t rptr;
2582 2579
2583 2580 /*
2584 2581 * The code below allocates all the DMA data structures that
2585 2582 * need to be released when the driver is detached.
2586 2583 *
2587 2584 * Allocate page for the transmit descriptor ring.
2588 2585 */
2589 2586 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr,
2590 2587 DDI_DMA_SLEEP, 0, &xnfp->xnf_tx_ring_dma_handle) != DDI_SUCCESS)
2591 2588 goto alloc_error;
2592 2589
2593 2590 if (ddi_dma_mem_alloc(xnfp->xnf_tx_ring_dma_handle,
2594 2591 PAGESIZE, &accattr, DDI_DMA_CONSISTENT,
2595 2592 DDI_DMA_SLEEP, 0, &rptr, &len,
2596 2593 &xnfp->xnf_tx_ring_dma_acchandle) != DDI_SUCCESS) {
2597 2594 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle);
2598 2595 xnfp->xnf_tx_ring_dma_handle = NULL;
2599 2596 goto alloc_error;
2600 2597 }
2601 2598
2602 2599 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_tx_ring_dma_handle, NULL,
2603 2600 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
2604 2601 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) {
2605 2602 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle);
2606 2603 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle);
2607 2604 xnfp->xnf_tx_ring_dma_handle = NULL;
2608 2605 xnfp->xnf_tx_ring_dma_acchandle = NULL;
2609 2606 if (rc == DDI_DMA_NORESOURCES)
2610 2607 goto alloc_error;
2611 2608 else
2612 2609 goto error;
2613 2610 }
2614 2611
2615 2612 ASSERT(ncookies == 1);
2616 2613 bzero(rptr, PAGESIZE);
2617 2614 /* LINTED: constant in conditional context */
2618 2615 SHARED_RING_INIT((netif_tx_sring_t *)rptr);
2619 2616 /* LINTED: constant in conditional context */
2620 2617 FRONT_RING_INIT(&xnfp->xnf_tx_ring, (netif_tx_sring_t *)rptr, PAGESIZE);
2621 2618 xnfp->xnf_tx_ring_phys_addr = dma_cookie.dmac_laddress;
2622 2619
2623 2620 /*
2624 2621 * Allocate page for the receive descriptor ring.
2625 2622 */
2626 2623 if (ddi_dma_alloc_handle(devinfo, &ringbuf_dma_attr,
2627 2624 DDI_DMA_SLEEP, 0, &xnfp->xnf_rx_ring_dma_handle) != DDI_SUCCESS)
2628 2625 goto alloc_error;
2629 2626
2630 2627 if (ddi_dma_mem_alloc(xnfp->xnf_rx_ring_dma_handle,
2631 2628 PAGESIZE, &accattr, DDI_DMA_CONSISTENT,
2632 2629 DDI_DMA_SLEEP, 0, &rptr, &len,
2633 2630 &xnfp->xnf_rx_ring_dma_acchandle) != DDI_SUCCESS) {
2634 2631 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle);
2635 2632 xnfp->xnf_rx_ring_dma_handle = NULL;
2636 2633 goto alloc_error;
2637 2634 }
2638 2635
2639 2636 if ((rc = ddi_dma_addr_bind_handle(xnfp->xnf_rx_ring_dma_handle, NULL,
2640 2637 rptr, PAGESIZE, DDI_DMA_RDWR | DDI_DMA_CONSISTENT,
2641 2638 DDI_DMA_SLEEP, 0, &dma_cookie, &ncookies)) != DDI_DMA_MAPPED) {
2642 2639 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle);
2643 2640 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle);
2644 2641 xnfp->xnf_rx_ring_dma_handle = NULL;
2645 2642 xnfp->xnf_rx_ring_dma_acchandle = NULL;
2646 2643 if (rc == DDI_DMA_NORESOURCES)
2647 2644 goto alloc_error;
2648 2645 else
2649 2646 goto error;
2650 2647 }
2651 2648
2652 2649 ASSERT(ncookies == 1);
2653 2650 bzero(rptr, PAGESIZE);
2654 2651 /* LINTED: constant in conditional context */
2655 2652 SHARED_RING_INIT((netif_rx_sring_t *)rptr);
2656 2653 /* LINTED: constant in conditional context */
2657 2654 FRONT_RING_INIT(&xnfp->xnf_rx_ring, (netif_rx_sring_t *)rptr, PAGESIZE);
2658 2655 xnfp->xnf_rx_ring_phys_addr = dma_cookie.dmac_laddress;
2659 2656
2660 2657 return (DDI_SUCCESS);
2661 2658
2662 2659 alloc_error:
2663 2660 cmn_err(CE_WARN, "xnf%d: could not allocate enough DMA memory",
2664 2661 ddi_get_instance(xnfp->xnf_devinfo));
2665 2662 error:
2666 2663 xnf_release_dma_resources(xnfp);
2667 2664 return (DDI_FAILURE);
2668 2665 }
2669 2666
2670 2667 /*
2671 2668 * Release all DMA resources in the opposite order from acquisition
2672 2669 */
2673 2670 static void
2674 2671 xnf_release_dma_resources(xnf_t *xnfp)
2675 2672 {
2676 2673 int i;
2677 2674
2678 2675 /*
2679 2676 * Free receive buffers which are currently associated with
2680 2677 * descriptors.
2681 2678 */
2682 2679 mutex_enter(&xnfp->xnf_rxlock);
2683 2680 for (i = 0; i < NET_RX_RING_SIZE; i++) {
2684 2681 xnf_buf_t *bp;
2685 2682
2686 2683 if ((bp = xnfp->xnf_rx_pkt_info[i]) == NULL)
2687 2684 continue;
2688 2685 xnfp->xnf_rx_pkt_info[i] = NULL;
2689 2686 xnf_buf_put(xnfp, bp, B_FALSE);
2690 2687 }
2691 2688 mutex_exit(&xnfp->xnf_rxlock);
2692 2689
2693 2690 /* Free the receive ring buffer. */
2694 2691 if (xnfp->xnf_rx_ring_dma_acchandle != NULL) {
2695 2692 (void) ddi_dma_unbind_handle(xnfp->xnf_rx_ring_dma_handle);
2696 2693 ddi_dma_mem_free(&xnfp->xnf_rx_ring_dma_acchandle);
2697 2694 ddi_dma_free_handle(&xnfp->xnf_rx_ring_dma_handle);
2698 2695 xnfp->xnf_rx_ring_dma_acchandle = NULL;
2699 2696 }
2700 2697 /* Free the transmit ring buffer. */
2701 2698 if (xnfp->xnf_tx_ring_dma_acchandle != NULL) {
2702 2699 (void) ddi_dma_unbind_handle(xnfp->xnf_tx_ring_dma_handle);
2703 2700 ddi_dma_mem_free(&xnfp->xnf_tx_ring_dma_acchandle);
2704 2701 ddi_dma_free_handle(&xnfp->xnf_tx_ring_dma_handle);
2705 2702 xnfp->xnf_tx_ring_dma_acchandle = NULL;
2706 2703 }
2707 2704
2708 2705 }
2709 2706
2710 2707 /*
2711 2708 * Release any packets and associated structures used by the TX ring.
2712 2709 */
2713 2710 static void
2714 2711 xnf_release_mblks(xnf_t *xnfp)
2715 2712 {
2716 2713 RING_IDX i;
2717 2714 xnf_txid_t *tidp;
2718 2715
2719 2716 for (i = 0, tidp = &xnfp->xnf_tx_pkt_id[0];
2720 2717 i < NET_TX_RING_SIZE;
2721 2718 i++, tidp++) {
2722 2719 xnf_txbuf_t *txp = tidp->txbuf;
2723 2720
2724 2721 if (txp != NULL) {
2725 2722 ASSERT(txp->tx_mp != NULL);
2726 2723 freemsg(txp->tx_mp);
2727 2724
2728 2725 xnf_txid_put(xnfp, tidp);
2729 2726 kmem_cache_free(xnfp->xnf_tx_buf_cache, txp);
2730 2727 }
2731 2728 }
2732 2729 }
2733 2730
2734 2731 static int
2735 2732 xnf_buf_constructor(void *buf, void *arg, int kmflag)
2736 2733 {
2737 2734 int (*ddiflags)(caddr_t) = DDI_DMA_SLEEP;
2738 2735 xnf_buf_t *bdesc = buf;
2739 2736 xnf_t *xnfp = arg;
2740 2737 ddi_dma_cookie_t dma_cookie;
2741 2738 uint_t ncookies;
2742 2739 size_t len;
2743 2740
2744 2741 if (kmflag & KM_NOSLEEP)
2745 2742 ddiflags = DDI_DMA_DONTWAIT;
2746 2743
2747 2744 /* Allocate a DMA access handle for the buffer. */
2748 2745 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &rx_buf_dma_attr,
2749 2746 ddiflags, 0, &bdesc->dma_handle) != DDI_SUCCESS)
2750 2747 goto failure;
2751 2748
2752 2749 /* Allocate DMA-able memory for buffer. */
2753 2750 if (ddi_dma_mem_alloc(bdesc->dma_handle,
2754 2751 PAGESIZE, &data_accattr, DDI_DMA_STREAMING, ddiflags, 0,
2755 2752 &bdesc->buf, &len, &bdesc->acc_handle) != DDI_SUCCESS)
2756 2753 goto failure_1;
2757 2754
2758 2755 /* Bind to virtual address of buffer to get physical address. */
2759 2756 if (ddi_dma_addr_bind_handle(bdesc->dma_handle, NULL,
2760 2757 bdesc->buf, len, DDI_DMA_RDWR | DDI_DMA_STREAMING,
2761 2758 ddiflags, 0, &dma_cookie, &ncookies) != DDI_DMA_MAPPED)
2762 2759 goto failure_2;
2763 2760 ASSERT(ncookies == 1);
2764 2761
2765 2762 bdesc->free_rtn.free_func = xnf_buf_recycle;
2766 2763 bdesc->free_rtn.free_arg = (caddr_t)bdesc;
2767 2764 bdesc->xnfp = xnfp;
2768 2765 bdesc->buf_phys = dma_cookie.dmac_laddress;
2769 2766 bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys));
2770 2767 bdesc->len = dma_cookie.dmac_size;
2771 2768 bdesc->grant_ref = INVALID_GRANT_REF;
2772 2769 bdesc->gen = xnfp->xnf_gen;
2773 2770
2774 2771 atomic_inc_64(&xnfp->xnf_stat_buf_allocated);
2775 2772
2776 2773 return (0);
2777 2774
2778 2775 failure_2:
2779 2776 ddi_dma_mem_free(&bdesc->acc_handle);
2780 2777
2781 2778 failure_1:
2782 2779 ddi_dma_free_handle(&bdesc->dma_handle);
2783 2780
2784 2781 failure:
2785 2782
2786 2783 ASSERT(kmflag & KM_NOSLEEP); /* Cannot fail for KM_SLEEP. */
2787 2784 return (-1);
2788 2785 }
2789 2786
2790 2787 static void
2791 2788 xnf_buf_destructor(void *buf, void *arg)
2792 2789 {
2793 2790 xnf_buf_t *bdesc = buf;
2794 2791 xnf_t *xnfp = arg;
2795 2792
2796 2793 (void) ddi_dma_unbind_handle(bdesc->dma_handle);
2797 2794 ddi_dma_mem_free(&bdesc->acc_handle);
2798 2795 ddi_dma_free_handle(&bdesc->dma_handle);
2799 2796
2800 2797 atomic_dec_64(&xnfp->xnf_stat_buf_allocated);
2801 2798 }
2802 2799
2803 2800 static xnf_buf_t *
2804 2801 xnf_buf_get(xnf_t *xnfp, int flags, boolean_t readonly)
2805 2802 {
2806 2803 grant_ref_t gref;
2807 2804 xnf_buf_t *bufp;
2808 2805
2809 2806 /*
2810 2807 * Usually grant references are more scarce than memory, so we
2811 2808 * attempt to acquire a grant reference first.
2812 2809 */
2813 2810 gref = xnf_gref_get(xnfp);
2814 2811 if (gref == INVALID_GRANT_REF)
2815 2812 return (NULL);
2816 2813
2817 2814 bufp = kmem_cache_alloc(xnfp->xnf_buf_cache, flags);
2818 2815 if (bufp == NULL) {
2819 2816 xnf_gref_put(xnfp, gref);
2820 2817 return (NULL);
2821 2818 }
2822 2819
2823 2820 ASSERT3U(bufp->grant_ref, ==, INVALID_GRANT_REF);
2824 2821
2825 2822 bufp->grant_ref = gref;
2826 2823
2827 2824 if (bufp->gen != xnfp->xnf_gen)
2828 2825 xnf_buf_refresh(bufp);
2829 2826
2830 2827 gnttab_grant_foreign_access_ref(bufp->grant_ref,
2831 2828 xvdi_get_oeid(bufp->xnfp->xnf_devinfo),
2832 2829 bufp->buf_mfn, readonly ? 1 : 0);
2833 2830
2834 2831 atomic_inc_64(&xnfp->xnf_stat_buf_outstanding);
2835 2832
2836 2833 return (bufp);
2837 2834 }
2838 2835
2839 2836 static void
2840 2837 xnf_buf_put(xnf_t *xnfp, xnf_buf_t *bufp, boolean_t readonly)
2841 2838 {
2842 2839 if (bufp->grant_ref != INVALID_GRANT_REF) {
2843 2840 (void) gnttab_end_foreign_access_ref(
2844 2841 bufp->grant_ref, readonly ? 1 : 0);
2845 2842 xnf_gref_put(xnfp, bufp->grant_ref);
2846 2843 bufp->grant_ref = INVALID_GRANT_REF;
2847 2844 }
2848 2845
2849 2846 kmem_cache_free(xnfp->xnf_buf_cache, bufp);
2850 2847
2851 2848 atomic_dec_64(&xnfp->xnf_stat_buf_outstanding);
2852 2849 }
2853 2850
2854 2851 /*
2855 2852 * Refresh any cached data about a buffer after resume.
2856 2853 */
2857 2854 static void
2858 2855 xnf_buf_refresh(xnf_buf_t *bdesc)
2859 2856 {
2860 2857 bdesc->buf_mfn = pfn_to_mfn(xnf_btop(bdesc->buf_phys));
2861 2858 bdesc->gen = bdesc->xnfp->xnf_gen;
2862 2859 }
2863 2860
2864 2861 /*
2865 2862 * Streams `freeb' routine for `xnf_buf_t' when used as transmit
2866 2863 * look-aside buffers.
2867 2864 */
2868 2865 static void
2869 2866 xnf_buf_recycle(xnf_buf_t *bdesc)
2870 2867 {
2871 2868 xnf_t *xnfp = bdesc->xnfp;
2872 2869
2873 2870 xnf_buf_put(xnfp, bdesc, B_TRUE);
2874 2871 }
2875 2872
2876 2873 static int
2877 2874 xnf_tx_buf_constructor(void *buf, void *arg, int kmflag)
2878 2875 {
2879 2876 int (*ddiflags)(caddr_t) = DDI_DMA_SLEEP;
2880 2877 xnf_txbuf_t *txp = buf;
2881 2878 xnf_t *xnfp = arg;
2882 2879
2883 2880 if (kmflag & KM_NOSLEEP)
2884 2881 ddiflags = DDI_DMA_DONTWAIT;
2885 2882
2886 2883 if (ddi_dma_alloc_handle(xnfp->xnf_devinfo, &tx_buf_dma_attr,
2887 2884 ddiflags, 0, &txp->tx_dma_handle) != DDI_SUCCESS) {
2888 2885 ASSERT(kmflag & KM_NOSLEEP); /* Cannot fail for KM_SLEEP. */
2889 2886 return (-1);
2890 2887 }
2891 2888
2892 2889 return (0);
2893 2890 }
2894 2891
2895 2892 static void
2896 2893 xnf_tx_buf_destructor(void *buf, void *arg)
2897 2894 {
2898 2895 _NOTE(ARGUNUSED(arg));
2899 2896 xnf_txbuf_t *txp = buf;
2900 2897
2901 2898 ddi_dma_free_handle(&txp->tx_dma_handle);
2902 2899 }
2903 2900
2904 2901 /*
2905 2902 * Statistics.
2906 2903 */
2907 2904 static char *xnf_aux_statistics[] = {
2908 2905 "tx_cksum_deferred",
2909 2906 "rx_cksum_no_need",
2910 2907 "interrupts",
2911 2908 "unclaimed_interrupts",
2912 2909 "tx_pullup",
2913 2910 "tx_lookaside",
2914 2911 "tx_drop",
2915 2912 "tx_eth_hdr_split",
2916 2913 "buf_allocated",
2917 2914 "buf_outstanding",
2918 2915 "gref_outstanding",
2919 2916 "gref_failure",
2920 2917 "gref_peak",
2921 2918 "rx_allocb_fail",
2922 2919 "rx_desballoc_fail",
2923 2920 };
2924 2921
2925 2922 static int
2926 2923 xnf_kstat_aux_update(kstat_t *ksp, int flag)
2927 2924 {
2928 2925 xnf_t *xnfp;
2929 2926 kstat_named_t *knp;
2930 2927
2931 2928 if (flag != KSTAT_READ)
2932 2929 return (EACCES);
2933 2930
2934 2931 xnfp = ksp->ks_private;
2935 2932 knp = ksp->ks_data;
2936 2933
2937 2934 /*
2938 2935 * Assignment order must match that of the names in
2939 2936 * xnf_aux_statistics.
2940 2937 */
2941 2938 (knp++)->value.ui64 = xnfp->xnf_stat_tx_cksum_deferred;
2942 2939 (knp++)->value.ui64 = xnfp->xnf_stat_rx_cksum_no_need;
2943 2940
2944 2941 (knp++)->value.ui64 = xnfp->xnf_stat_interrupts;
2945 2942 (knp++)->value.ui64 = xnfp->xnf_stat_unclaimed_interrupts;
2946 2943 (knp++)->value.ui64 = xnfp->xnf_stat_tx_pullup;
2947 2944 (knp++)->value.ui64 = xnfp->xnf_stat_tx_lookaside;
2948 2945 (knp++)->value.ui64 = xnfp->xnf_stat_tx_drop;
2949 2946 (knp++)->value.ui64 = xnfp->xnf_stat_tx_eth_hdr_split;
2950 2947
2951 2948 (knp++)->value.ui64 = xnfp->xnf_stat_buf_allocated;
2952 2949 (knp++)->value.ui64 = xnfp->xnf_stat_buf_outstanding;
2953 2950 (knp++)->value.ui64 = xnfp->xnf_stat_gref_outstanding;
2954 2951 (knp++)->value.ui64 = xnfp->xnf_stat_gref_failure;
2955 2952 (knp++)->value.ui64 = xnfp->xnf_stat_gref_peak;
2956 2953 (knp++)->value.ui64 = xnfp->xnf_stat_rx_allocb_fail;
2957 2954 (knp++)->value.ui64 = xnfp->xnf_stat_rx_desballoc_fail;
2958 2955
2959 2956 return (0);
2960 2957 }
2961 2958
2962 2959 static boolean_t
2963 2960 xnf_kstat_init(xnf_t *xnfp)
2964 2961 {
2965 2962 int nstat = sizeof (xnf_aux_statistics) /
2966 2963 sizeof (xnf_aux_statistics[0]);
2967 2964 char **cp = xnf_aux_statistics;
2968 2965 kstat_named_t *knp;
2969 2966
2970 2967 /*
2971 2968 * Create and initialise kstats.
2972 2969 */
2973 2970 if ((xnfp->xnf_kstat_aux = kstat_create("xnf",
2974 2971 ddi_get_instance(xnfp->xnf_devinfo),
2975 2972 "aux_statistics", "net", KSTAT_TYPE_NAMED,
2976 2973 nstat, 0)) == NULL)
2977 2974 return (B_FALSE);
2978 2975
2979 2976 xnfp->xnf_kstat_aux->ks_private = xnfp;
2980 2977 xnfp->xnf_kstat_aux->ks_update = xnf_kstat_aux_update;
2981 2978
2982 2979 knp = xnfp->xnf_kstat_aux->ks_data;
2983 2980 while (nstat > 0) {
2984 2981 kstat_named_init(knp, *cp, KSTAT_DATA_UINT64);
2985 2982
2986 2983 knp++;
2987 2984 cp++;
2988 2985 nstat--;
2989 2986 }
2990 2987
2991 2988 kstat_install(xnfp->xnf_kstat_aux);
2992 2989
2993 2990 return (B_TRUE);
2994 2991 }
2995 2992
2996 2993 static int
2997 2994 xnf_stat(void *arg, uint_t stat, uint64_t *val)
2998 2995 {
2999 2996 xnf_t *xnfp = arg;
3000 2997
3001 2998 mutex_enter(&xnfp->xnf_rxlock);
3002 2999 mutex_enter(&xnfp->xnf_txlock);
3003 3000
3004 3001 #define mac_stat(q, r) \
3005 3002 case (MAC_STAT_##q): \
3006 3003 *val = xnfp->xnf_stat_##r; \
3007 3004 break
3008 3005
3009 3006 #define ether_stat(q, r) \
3010 3007 case (ETHER_STAT_##q): \
3011 3008 *val = xnfp->xnf_stat_##r; \
3012 3009 break
3013 3010
3014 3011 switch (stat) {
3015 3012
3016 3013 mac_stat(IPACKETS, ipackets);
3017 3014 mac_stat(OPACKETS, opackets);
3018 3015 mac_stat(RBYTES, rbytes);
3019 3016 mac_stat(OBYTES, obytes);
3020 3017 mac_stat(NORCVBUF, norxbuf);
3021 3018 mac_stat(IERRORS, errrx);
3022 3019 mac_stat(NOXMTBUF, tx_defer);
3023 3020
3024 3021 ether_stat(MACRCV_ERRORS, mac_rcv_error);
3025 3022 ether_stat(TOOSHORT_ERRORS, runt);
3026 3023
3027 3024 /* always claim to be in full duplex mode */
3028 3025 case ETHER_STAT_LINK_DUPLEX:
3029 3026 *val = LINK_DUPLEX_FULL;
3030 3027 break;
3031 3028
3032 3029 /* always claim to be at 1Gb/s link speed */
3033 3030 case MAC_STAT_IFSPEED:
3034 3031 *val = 1000000000ull;
3035 3032 break;
3036 3033
3037 3034 default:
3038 3035 mutex_exit(&xnfp->xnf_txlock);
3039 3036 mutex_exit(&xnfp->xnf_rxlock);
3040 3037
3041 3038 return (ENOTSUP);
3042 3039 }
3043 3040
3044 3041 #undef mac_stat
3045 3042 #undef ether_stat
3046 3043
3047 3044 mutex_exit(&xnfp->xnf_txlock);
3048 3045 mutex_exit(&xnfp->xnf_rxlock);
3049 3046
3050 3047 return (0);
3051 3048 }
3052 3049
3053 3050 static int
3054 3051 xnf_change_mtu(xnf_t *xnfp, uint32_t mtu)
3055 3052 {
3056 3053 if (mtu > ETHERMTU) {
3057 3054 if (!xnf_enable_tx_sg) {
3058 3055 dev_err(xnfp->xnf_devinfo, CE_WARN, "MTU limited to %d "
3059 3056 "because scatter-gather is disabled for transmit "
3060 3057 "in driver settings", ETHERMTU);
3061 3058 return (EINVAL);
3062 3059 } else if (!xnf_enable_rx_sg) {
3063 3060 dev_err(xnfp->xnf_devinfo, CE_WARN, "MTU limited to %d "
3064 3061 "because scatter-gather is disabled for receive "
3065 3062 "in driver settings", ETHERMTU);
3066 3063 return (EINVAL);
3067 3064 } else if (!xnfp->xnf_be_tx_sg) {
3068 3065 dev_err(xnfp->xnf_devinfo, CE_WARN, "MTU limited to %d "
3069 3066 "because backend doesn't support scatter-gather",
3070 3067 ETHERMTU);
3071 3068 return (EINVAL);
3072 3069 }
3073 3070 if (mtu > XNF_MAXPKT)
3074 3071 return (EINVAL);
3075 3072 }
3076 3073 int error = mac_maxsdu_update(xnfp->xnf_mh, mtu);
3077 3074 if (error == 0)
3078 3075 xnfp->xnf_mtu = mtu;
3079 3076
3080 3077 return (error);
3081 3078 }
3082 3079
3083 3080 /*ARGSUSED*/
3084 3081 static int
3085 3082 xnf_getprop(void *data, const char *prop_name, mac_prop_id_t prop_id,
3086 3083 uint_t prop_val_size, void *prop_val)
3087 3084 {
3088 3085 xnf_t *xnfp = data;
3089 3086
3090 3087 switch (prop_id) {
3091 3088 case MAC_PROP_MTU:
3092 3089 ASSERT(prop_val_size >= sizeof (uint32_t));
3093 3090 bcopy(&xnfp->xnf_mtu, prop_val, sizeof (uint32_t));
3094 3091 break;
3095 3092 default:
3096 3093 return (ENOTSUP);
3097 3094 }
3098 3095 return (0);
3099 3096 }
3100 3097
3101 3098 /*ARGSUSED*/
3102 3099 static int
3103 3100 xnf_setprop(void *data, const char *prop_name, mac_prop_id_t prop_id,
3104 3101 uint_t prop_val_size, const void *prop_val)
3105 3102 {
3106 3103 xnf_t *xnfp = data;
3107 3104 uint32_t new_mtu;
3108 3105 int error;
3109 3106
3110 3107 switch (prop_id) {
3111 3108 case MAC_PROP_MTU:
3112 3109 ASSERT(prop_val_size >= sizeof (uint32_t));
3113 3110 bcopy(prop_val, &new_mtu, sizeof (new_mtu));
3114 3111 error = xnf_change_mtu(xnfp, new_mtu);
3115 3112 break;
3116 3113 default:
3117 3114 return (ENOTSUP);
3118 3115 }
3119 3116
3120 3117 return (error);
3121 3118 }
3122 3119
3123 3120 /*ARGSUSED*/
3124 3121 static void
3125 3122 xnf_propinfo(void *data, const char *prop_name, mac_prop_id_t prop_id,
3126 3123 mac_prop_info_handle_t prop_handle)
3127 3124 {
3128 3125 switch (prop_id) {
3129 3126 case MAC_PROP_MTU:
3130 3127 mac_prop_info_set_range_uint32(prop_handle, 0, XNF_MAXPKT);
3131 3128 break;
3132 3129 default:
3133 3130 break;
3134 3131 }
3135 3132 }
3136 3133
3137 3134 static boolean_t
3138 3135 xnf_getcapab(void *arg, mac_capab_t cap, void *cap_data)
3139 3136 {
3140 3137 xnf_t *xnfp = arg;
3141 3138
3142 3139 switch (cap) {
3143 3140 case MAC_CAPAB_HCKSUM: {
3144 3141 uint32_t *capab = cap_data;
3145 3142
3146 3143 /*
3147 3144 * Whilst the flag used to communicate with the IO
3148 3145 * domain is called "NETTXF_csum_blank", the checksum
3149 3146 * in the packet must contain the pseudo-header
3150 3147 * checksum and not zero.
3151 3148 *
3152 3149 * To help out the IO domain, we might use
3153 3150 * HCKSUM_INET_PARTIAL. Unfortunately our stack will
3154 3151 * then use checksum offload for IPv6 packets, which
3155 3152 * the IO domain can't handle.
3156 3153 *
3157 3154 * As a result, we declare outselves capable of
3158 3155 * HCKSUM_INET_FULL_V4. This means that we receive
3159 3156 * IPv4 packets from the stack with a blank checksum
3160 3157 * field and must insert the pseudo-header checksum
3161 3158 * before passing the packet to the IO domain.
3162 3159 */
3163 3160 *capab = HCKSUM_INET_FULL_V4;
3164 3161
3165 3162 /*
3166 3163 * TODO: query the "feature-ipv6-csum-offload" capability.
3167 3164 * If enabled, that could allow us to use HCKSUM_INET_PARTIAL.
3168 3165 */
3169 3166
3170 3167 break;
3171 3168 }
3172 3169 case MAC_CAPAB_LSO: {
3173 3170 if (!xnfp->xnf_be_lso)
3174 3171 return (B_FALSE);
3175 3172
3176 3173 mac_capab_lso_t *lso = cap_data;
3177 3174 lso->lso_flags = LSO_TX_BASIC_TCP_IPV4;
3178 3175 lso->lso_basic_tcp_ipv4.lso_max = IP_MAXPACKET;
3179 3176 break;
3180 3177 }
3181 3178 default:
3182 3179 return (B_FALSE);
3183 3180 }
3184 3181
3185 3182 return (B_TRUE);
3186 3183 }
3187 3184
3188 3185 /*
3189 3186 * The state of the peer has changed - react accordingly.
3190 3187 */
3191 3188 static void
3192 3189 oe_state_change(dev_info_t *dip, ddi_eventcookie_t id,
3193 3190 void *arg, void *impl_data)
3194 3191 {
3195 3192 _NOTE(ARGUNUSED(id, arg));
3196 3193 xnf_t *xnfp = ddi_get_driver_private(dip);
3197 3194 XenbusState new_state = *(XenbusState *)impl_data;
3198 3195
3199 3196 ASSERT(xnfp != NULL);
3200 3197
3201 3198 switch (new_state) {
3202 3199 case XenbusStateUnknown:
3203 3200 case XenbusStateInitialising:
3204 3201 case XenbusStateInitialised:
3205 3202 case XenbusStateClosing:
3206 3203 case XenbusStateClosed:
3207 3204 case XenbusStateReconfiguring:
3208 3205 case XenbusStateReconfigured:
3209 3206 break;
3210 3207
3211 3208 case XenbusStateInitWait:
3212 3209 xnf_read_config(xnfp);
3213 3210
3214 3211 if (!xnfp->xnf_be_rx_copy) {
3215 3212 cmn_err(CE_WARN,
3216 3213 "The xnf driver requires a dom0 that "
3217 3214 "supports 'feature-rx-copy'.");
3218 3215 (void) xvdi_switch_state(xnfp->xnf_devinfo,
3219 3216 XBT_NULL, XenbusStateClosed);
3220 3217 break;
3221 3218 }
3222 3219
3223 3220 /*
3224 3221 * Connect to the backend.
3225 3222 */
3226 3223 xnf_be_connect(xnfp);
3227 3224
3228 3225 /*
3229 3226 * Our MAC address as discovered by xnf_read_config().
3230 3227 */
3231 3228 mac_unicst_update(xnfp->xnf_mh, xnfp->xnf_mac_addr);
3232 3229
3233 3230 /*
3234 3231 * We do not know if some features such as LSO are supported
3235 3232 * until we connect to the backend. We request the MAC layer
3236 3233 * to poll our capabilities again.
3237 3234 */
3238 3235 mac_capab_update(xnfp->xnf_mh);
3239 3236
3240 3237 break;
3241 3238
3242 3239 case XenbusStateConnected:
3243 3240 mutex_enter(&xnfp->xnf_rxlock);
3244 3241 mutex_enter(&xnfp->xnf_txlock);
3245 3242
3246 3243 xnfp->xnf_connected = B_TRUE;
3247 3244 /*
3248 3245 * Wake up any threads waiting to send data to
3249 3246 * backend.
3250 3247 */
3251 3248 cv_broadcast(&xnfp->xnf_cv_state);
3252 3249
3253 3250 mutex_exit(&xnfp->xnf_txlock);
3254 3251 mutex_exit(&xnfp->xnf_rxlock);
3255 3252
3256 3253 /*
3257 3254 * Kick the peer in case it missed any transmits
3258 3255 * request in the TX ring.
3259 3256 */
3260 3257 ec_notify_via_evtchn(xnfp->xnf_evtchn);
3261 3258
3262 3259 /*
3263 3260 * There may already be completed receive requests in
3264 3261 * the ring sent by backend after it gets connected
3265 3262 * but before we see its state change here, so we call
3266 3263 * xnf_intr() to handle them, if any.
3267 3264 */
3268 3265 (void) xnf_intr((caddr_t)xnfp);
3269 3266
3270 3267 /*
3271 3268 * Mark the link up now that we are connected.
3272 3269 */
3273 3270 mac_link_update(xnfp->xnf_mh, LINK_STATE_UP);
3274 3271
3275 3272 /*
3276 3273 * Tell the backend about the multicast addresses in
3277 3274 * which we are interested.
3278 3275 */
3279 3276 mac_multicast_refresh(xnfp->xnf_mh, NULL, xnfp, B_TRUE);
3280 3277
3281 3278 break;
3282 3279
3283 3280 default:
3284 3281 break;
3285 3282 }
3286 3283 }
|
↓ open down ↓ |
2211 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX