Print this page
8634 epoll fails to wake on certain edge-triggered conditions
8635 epoll should not emit POLLNVAL
8636 recursive epoll should emit EPOLLRDNORM
Reviewed by: Jerry Jelinek <jerry.jelinek@joyent.com>
Reviewed by: Robert Mustacchi <rm@joyent.com>
Reviewed by: Toomas Soome <tsoome@me.com>
Reviewed by: Igor Kozhukhov <igor@dilos.org>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/common/xen/io/xpvtap.c
+++ new/usr/src/uts/common/xen/io/xpvtap.c
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
|
↓ open down ↓ |
14 lines elided |
↑ open up ↑ |
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21
22 22 /*
23 23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 24 * Use is subject to license terms.
25 + * Copyright 2017 Joyent, Inc.
25 26 */
26 27
27 28
28 29 #include <sys/errno.h>
29 30 #include <sys/types.h>
30 31 #include <sys/conf.h>
31 32 #include <sys/kmem.h>
32 33 #include <sys/ddi.h>
33 34 #include <sys/stat.h>
34 35 #include <sys/sunddi.h>
35 36 #include <sys/file.h>
36 37 #include <sys/open.h>
37 38 #include <sys/modctl.h>
38 39 #include <sys/ddi_impldefs.h>
39 40 #include <sys/sysmacros.h>
40 41 #include <sys/ddidevmap.h>
41 42 #include <sys/policy.h>
42 43
43 44 #include <sys/vmsystm.h>
44 45 #include <vm/hat_i86.h>
45 46 #include <vm/hat_pte.h>
46 47 #include <vm/seg_kmem.h>
47 48 #include <vm/seg_mf.h>
48 49
49 50 #include <xen/io/blkif_impl.h>
50 51 #include <xen/io/blk_common.h>
51 52 #include <xen/io/xpvtap.h>
52 53
53 54
54 55 static int xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred);
55 56 static int xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred);
56 57 static int xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode,
57 58 cred_t *cred, int *rval);
58 59 static int xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off,
59 60 size_t len, size_t *maplen, uint_t model);
60 61 static int xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
61 62 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
62 63 cred_t *cred_p);
63 64 static int xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
64 65 struct pollhead **phpp);
65 66
66 67 static struct cb_ops xpvtap_cb_ops = {
67 68 xpvtap_open, /* cb_open */
68 69 xpvtap_close, /* cb_close */
69 70 nodev, /* cb_strategy */
70 71 nodev, /* cb_print */
71 72 nodev, /* cb_dump */
72 73 nodev, /* cb_read */
73 74 nodev, /* cb_write */
74 75 xpvtap_ioctl, /* cb_ioctl */
75 76 xpvtap_devmap, /* cb_devmap */
76 77 nodev, /* cb_mmap */
77 78 xpvtap_segmap, /* cb_segmap */
78 79 xpvtap_chpoll, /* cb_chpoll */
79 80 ddi_prop_op, /* cb_prop_op */
80 81 NULL, /* cb_stream */
81 82 D_NEW | D_MP | D_64BIT | D_DEVMAP, /* cb_flag */
82 83 CB_REV
83 84 };
84 85
85 86 static int xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg,
86 87 void **result);
87 88 static int xpvtap_attach(dev_info_t *devi, ddi_attach_cmd_t cmd);
88 89 static int xpvtap_detach(dev_info_t *devi, ddi_detach_cmd_t cmd);
89 90
90 91 static struct dev_ops xpvtap_dev_ops = {
91 92 DEVO_REV, /* devo_rev */
92 93 0, /* devo_refcnt */
93 94 xpvtap_getinfo, /* devo_getinfo */
94 95 nulldev, /* devo_identify */
95 96 nulldev, /* devo_probe */
96 97 xpvtap_attach, /* devo_attach */
97 98 xpvtap_detach, /* devo_detach */
98 99 nodev, /* devo_reset */
99 100 &xpvtap_cb_ops, /* devo_cb_ops */
100 101 NULL, /* devo_bus_ops */
101 102 NULL /* power */
102 103 };
103 104
104 105
105 106 static struct modldrv xpvtap_modldrv = {
106 107 &mod_driverops, /* Type of module. This one is a driver */
107 108 "xpvtap driver", /* Name of the module. */
108 109 &xpvtap_dev_ops, /* driver ops */
109 110 };
110 111
111 112 static struct modlinkage xpvtap_modlinkage = {
112 113 MODREV_1,
113 114 (void *) &xpvtap_modldrv,
114 115 NULL
115 116 };
116 117
117 118
118 119 void *xpvtap_statep;
119 120
120 121
121 122 static xpvtap_state_t *xpvtap_drv_init(int instance);
122 123 static void xpvtap_drv_fini(xpvtap_state_t *state);
123 124 static uint_t xpvtap_intr(caddr_t arg);
124 125
125 126 typedef void (*xpvtap_rs_cleanup_t)(xpvtap_state_t *state, uint_t rs);
126 127 static void xpvtap_rs_init(uint_t min_val, uint_t max_val,
127 128 xpvtap_rs_hdl_t *handle);
128 129 static void xpvtap_rs_fini(xpvtap_rs_hdl_t *handle);
129 130 static int xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *rs);
130 131 static void xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t rs);
131 132 static void xpvtap_rs_flush(xpvtap_rs_hdl_t handle,
132 133 xpvtap_rs_cleanup_t callback, void *arg);
133 134
134 135 static int xpvtap_segmf_register(xpvtap_state_t *state);
135 136 static void xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event);
136 137
137 138 static int xpvtap_user_init(xpvtap_state_t *state);
138 139 static void xpvtap_user_fini(xpvtap_state_t *state);
139 140 static int xpvtap_user_ring_init(xpvtap_state_t *state);
140 141 static void xpvtap_user_ring_fini(xpvtap_state_t *state);
141 142 static int xpvtap_user_thread_init(xpvtap_state_t *state);
142 143 static void xpvtap_user_thread_fini(xpvtap_state_t *state);
143 144 static void xpvtap_user_thread_start(caddr_t arg);
144 145 static void xpvtap_user_thread_stop(xpvtap_state_t *state);
145 146 static void xpvtap_user_thread(void *arg);
146 147
147 148 static void xpvtap_user_app_stop(caddr_t arg);
148 149
149 150 static int xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
150 151 uint_t *uid);
151 152 static int xpvtap_user_request_push(xpvtap_state_t *state,
152 153 blkif_request_t *req, uint_t uid);
153 154 static int xpvtap_user_response_get(xpvtap_state_t *state,
154 155 blkif_response_t *resp, uint_t *uid);
155 156 static void xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid);
156 157
157 158
158 159 /*
159 160 * _init()
160 161 */
161 162 int
162 163 _init(void)
163 164 {
164 165 int e;
165 166
166 167 e = ddi_soft_state_init(&xpvtap_statep, sizeof (xpvtap_state_t), 1);
167 168 if (e != 0) {
168 169 return (e);
169 170 }
170 171
171 172 e = mod_install(&xpvtap_modlinkage);
172 173 if (e != 0) {
173 174 ddi_soft_state_fini(&xpvtap_statep);
174 175 return (e);
175 176 }
176 177
177 178 return (0);
178 179 }
179 180
180 181
181 182 /*
182 183 * _info()
183 184 */
184 185 int
185 186 _info(struct modinfo *modinfop)
186 187 {
187 188 return (mod_info(&xpvtap_modlinkage, modinfop));
188 189 }
189 190
190 191
191 192 /*
192 193 * _fini()
193 194 */
194 195 int
195 196 _fini(void)
196 197 {
197 198 int e;
198 199
199 200 e = mod_remove(&xpvtap_modlinkage);
200 201 if (e != 0) {
201 202 return (e);
202 203 }
203 204
204 205 ddi_soft_state_fini(&xpvtap_statep);
205 206
206 207 return (0);
207 208 }
208 209
209 210
210 211 /*
211 212 * xpvtap_attach()
212 213 */
213 214 static int
214 215 xpvtap_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
215 216 {
216 217 blk_ringinit_args_t args;
217 218 xpvtap_state_t *state;
218 219 int instance;
219 220 int e;
220 221
221 222
222 223 switch (cmd) {
223 224 case DDI_ATTACH:
224 225 break;
225 226
226 227 case DDI_RESUME:
227 228 return (DDI_SUCCESS);
228 229
229 230 default:
230 231 return (DDI_FAILURE);
231 232 }
232 233
233 234 /* initialize our state info */
234 235 instance = ddi_get_instance(dip);
235 236 state = xpvtap_drv_init(instance);
236 237 if (state == NULL) {
237 238 return (DDI_FAILURE);
238 239 }
239 240 state->bt_dip = dip;
240 241
241 242 /* Initialize the guest ring */
242 243 args.ar_dip = state->bt_dip;
243 244 args.ar_intr = xpvtap_intr;
244 245 args.ar_intr_arg = (caddr_t)state;
245 246 args.ar_ringup = xpvtap_user_thread_start;
246 247 args.ar_ringup_arg = (caddr_t)state;
247 248 args.ar_ringdown = xpvtap_user_app_stop;
248 249 args.ar_ringdown_arg = (caddr_t)state;
249 250 e = blk_ring_init(&args, &state->bt_guest_ring);
250 251 if (e != DDI_SUCCESS) {
251 252 goto attachfail_ringinit;
252 253 }
253 254
254 255 /* create the minor node (for ioctl/mmap) */
255 256 e = ddi_create_minor_node(dip, "xpvtap", S_IFCHR, instance,
256 257 DDI_PSEUDO, 0);
257 258 if (e != DDI_SUCCESS) {
258 259 goto attachfail_minor_node;
259 260 }
260 261
261 262 /* Report that driver was loaded */
262 263 ddi_report_dev(dip);
263 264
264 265 return (DDI_SUCCESS);
265 266
266 267 attachfail_minor_node:
267 268 blk_ring_fini(&state->bt_guest_ring);
268 269 attachfail_ringinit:
269 270 xpvtap_drv_fini(state);
270 271 return (DDI_FAILURE);
271 272 }
272 273
273 274
274 275 /*
275 276 * xpvtap_detach()
276 277 */
277 278 static int
278 279 xpvtap_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
279 280 {
280 281 xpvtap_state_t *state;
281 282 int instance;
282 283
283 284
284 285 instance = ddi_get_instance(dip);
285 286 state = ddi_get_soft_state(xpvtap_statep, instance);
286 287 if (state == NULL) {
287 288 return (DDI_FAILURE);
288 289 }
289 290
290 291 switch (cmd) {
291 292 case DDI_DETACH:
292 293 break;
293 294
294 295 case DDI_SUSPEND:
295 296 default:
296 297 return (DDI_FAILURE);
297 298 }
298 299
299 300 xpvtap_user_thread_stop(state);
300 301 blk_ring_fini(&state->bt_guest_ring);
301 302 xpvtap_drv_fini(state);
302 303 ddi_remove_minor_node(dip, NULL);
303 304
304 305 return (DDI_SUCCESS);
305 306 }
306 307
307 308
308 309 /*
309 310 * xpvtap_getinfo()
310 311 */
311 312 /*ARGSUSED*/
312 313 static int
313 314 xpvtap_getinfo(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
314 315 {
315 316 xpvtap_state_t *state;
316 317 int instance;
317 318 dev_t dev;
318 319 int e;
319 320
320 321
321 322 dev = (dev_t)arg;
322 323 instance = getminor(dev);
323 324
324 325 switch (cmd) {
325 326 case DDI_INFO_DEVT2DEVINFO:
326 327 state = ddi_get_soft_state(xpvtap_statep, instance);
327 328 if (state == NULL) {
328 329 return (DDI_FAILURE);
329 330 }
330 331 *result = (void *)state->bt_dip;
331 332 e = DDI_SUCCESS;
332 333 break;
333 334
334 335 case DDI_INFO_DEVT2INSTANCE:
335 336 *result = (void *)(uintptr_t)instance;
336 337 e = DDI_SUCCESS;
337 338 break;
338 339
339 340 default:
340 341 e = DDI_FAILURE;
341 342 break;
342 343 }
343 344
344 345 return (e);
345 346 }
346 347
347 348
348 349 /*
349 350 * xpvtap_open()
350 351 */
351 352 /*ARGSUSED*/
352 353 static int
353 354 xpvtap_open(dev_t *devp, int flag, int otyp, cred_t *cred)
354 355 {
355 356 xpvtap_state_t *state;
356 357 int instance;
357 358
358 359
359 360 if (secpolicy_xvm_control(cred)) {
360 361 return (EPERM);
361 362 }
362 363
363 364 instance = getminor(*devp);
364 365 state = ddi_get_soft_state(xpvtap_statep, instance);
365 366 if (state == NULL) {
366 367 return (ENXIO);
367 368 }
368 369
369 370 /* we should only be opened once */
370 371 mutex_enter(&state->bt_open.bo_mutex);
371 372 if (state->bt_open.bo_opened) {
372 373 mutex_exit(&state->bt_open.bo_mutex);
373 374 return (EBUSY);
374 375 }
375 376 state->bt_open.bo_opened = B_TRUE;
376 377 mutex_exit(&state->bt_open.bo_mutex);
377 378
378 379 /*
379 380 * save the apps address space. need it for mapping/unmapping grefs
380 381 * since will be doing it in a separate kernel thread.
381 382 */
382 383 state->bt_map.um_as = curproc->p_as;
383 384
384 385 return (0);
385 386 }
386 387
387 388
388 389 /*
389 390 * xpvtap_close()
390 391 */
391 392 /*ARGSUSED*/
392 393 static int
393 394 xpvtap_close(dev_t devp, int flag, int otyp, cred_t *cred)
394 395 {
395 396 xpvtap_state_t *state;
396 397 int instance;
397 398
398 399
399 400 instance = getminor(devp);
400 401 state = ddi_get_soft_state(xpvtap_statep, instance);
401 402 if (state == NULL) {
402 403 return (ENXIO);
403 404 }
404 405
405 406 /*
406 407 * wake thread so it can cleanup and wait for it to exit so we can
407 408 * be sure it's not in the middle of processing a request/response.
408 409 */
409 410 mutex_enter(&state->bt_thread.ut_mutex);
410 411 state->bt_thread.ut_wake = B_TRUE;
411 412 state->bt_thread.ut_exit = B_TRUE;
412 413 cv_signal(&state->bt_thread.ut_wake_cv);
413 414 if (!state->bt_thread.ut_exit_done) {
414 415 cv_wait(&state->bt_thread.ut_exit_done_cv,
415 416 &state->bt_thread.ut_mutex);
416 417 }
417 418 ASSERT(state->bt_thread.ut_exit_done);
418 419 mutex_exit(&state->bt_thread.ut_mutex);
419 420
420 421 state->bt_map.um_as = NULL;
421 422 state->bt_map.um_guest_pages = NULL;
422 423
423 424 /*
424 425 * when the ring is brought down, a userland hotplug script is run
425 426 * which tries to bring the userland app down. We'll wait for a bit
426 427 * for the user app to exit. Notify the thread waiting that the app
427 428 * has closed the driver.
428 429 */
429 430 mutex_enter(&state->bt_open.bo_mutex);
430 431 ASSERT(state->bt_open.bo_opened);
431 432 state->bt_open.bo_opened = B_FALSE;
432 433 cv_signal(&state->bt_open.bo_exit_cv);
433 434 mutex_exit(&state->bt_open.bo_mutex);
434 435
435 436 return (0);
436 437 }
437 438
438 439
439 440 /*
440 441 * xpvtap_ioctl()
441 442 */
442 443 /*ARGSUSED*/
443 444 static int
444 445 xpvtap_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *cred,
445 446 int *rval)
446 447 {
447 448 xpvtap_state_t *state;
448 449 int instance;
449 450
450 451
451 452 if (secpolicy_xvm_control(cred)) {
452 453 return (EPERM);
453 454 }
454 455
455 456 instance = getminor(dev);
456 457 if (instance == -1) {
457 458 return (EBADF);
458 459 }
459 460
460 461 state = ddi_get_soft_state(xpvtap_statep, instance);
461 462 if (state == NULL) {
462 463 return (EBADF);
463 464 }
464 465
465 466 switch (cmd) {
466 467 case XPVTAP_IOCTL_RESP_PUSH:
467 468 /*
468 469 * wake thread, thread handles guest requests and user app
469 470 * responses.
470 471 */
471 472 mutex_enter(&state->bt_thread.ut_mutex);
472 473 state->bt_thread.ut_wake = B_TRUE;
473 474 cv_signal(&state->bt_thread.ut_wake_cv);
474 475 mutex_exit(&state->bt_thread.ut_mutex);
475 476 break;
476 477
477 478 default:
478 479 cmn_err(CE_WARN, "ioctl(%d) not supported\n", cmd);
479 480 return (ENXIO);
480 481 }
481 482
482 483 return (0);
483 484 }
484 485
485 486
486 487 /*
487 488 * xpvtap_segmap()
488 489 */
489 490 /*ARGSUSED*/
490 491 static int
491 492 xpvtap_segmap(dev_t dev, off_t off, struct as *asp, caddr_t *addrp,
492 493 off_t len, unsigned int prot, unsigned int maxprot, unsigned int flags,
493 494 cred_t *cred_p)
494 495 {
495 496 struct segmf_crargs a;
496 497 xpvtap_state_t *state;
497 498 int instance;
498 499 int e;
499 500
500 501
501 502 if (secpolicy_xvm_control(cred_p)) {
502 503 return (EPERM);
503 504 }
504 505
505 506 instance = getminor(dev);
506 507 state = ddi_get_soft_state(xpvtap_statep, instance);
507 508 if (state == NULL) {
508 509 return (EBADF);
509 510 }
510 511
511 512 /* the user app should be doing a MAP_SHARED mapping */
512 513 if ((flags & MAP_TYPE) != MAP_SHARED) {
513 514 return (EINVAL);
514 515 }
515 516
516 517 /*
517 518 * if this is the user ring (offset = 0), devmap it (which ends up in
518 519 * xpvtap_devmap). devmap will alloc and map the ring into the
519 520 * app's VA space.
520 521 */
521 522 if (off == 0) {
522 523 e = devmap_setup(dev, (offset_t)off, asp, addrp, (size_t)len,
523 524 prot, maxprot, flags, cred_p);
524 525 return (e);
525 526 }
526 527
527 528 /* this should be the mmap for the gref pages (offset = PAGESIZE) */
528 529 if (off != PAGESIZE) {
529 530 return (EINVAL);
530 531 }
531 532
532 533 /* make sure we get the size we're expecting */
533 534 if (len != XPVTAP_GREF_BUFSIZE) {
534 535 return (EINVAL);
535 536 }
536 537
537 538 /*
538 539 * reserve user app VA space for the gref pages and use segmf to
539 540 * manage the backing store for the physical memory. segmf will
540 541 * map in/out the grefs and fault them in/out.
541 542 */
542 543 ASSERT(asp == state->bt_map.um_as);
543 544 as_rangelock(asp);
544 545 if ((flags & MAP_FIXED) == 0) {
545 546 map_addr(addrp, len, 0, 0, flags);
546 547 if (*addrp == NULL) {
547 548 as_rangeunlock(asp);
548 549 return (ENOMEM);
549 550 }
550 551 } else {
551 552 /* User specified address */
552 553 (void) as_unmap(asp, *addrp, len);
553 554 }
554 555 a.dev = dev;
555 556 a.prot = (uchar_t)prot;
556 557 a.maxprot = (uchar_t)maxprot;
557 558 e = as_map(asp, *addrp, len, segmf_create, &a);
558 559 if (e != 0) {
559 560 as_rangeunlock(asp);
560 561 return (e);
561 562 }
562 563 as_rangeunlock(asp);
563 564
564 565 /*
565 566 * Stash user base address, and compute address where the request
566 567 * array will end up.
567 568 */
568 569 state->bt_map.um_guest_pages = (caddr_t)*addrp;
569 570 state->bt_map.um_guest_size = (size_t)len;
570 571
571 572 /* register an as callback so we can cleanup when the app goes away */
572 573 e = as_add_callback(asp, xpvtap_segmf_unregister, state,
573 574 AS_UNMAP_EVENT, *addrp, len, KM_SLEEP);
574 575 if (e != 0) {
575 576 (void) as_unmap(asp, *addrp, len);
576 577 return (EINVAL);
577 578 }
578 579
579 580 /* wake thread to see if there are requests already queued up */
580 581 mutex_enter(&state->bt_thread.ut_mutex);
581 582 state->bt_thread.ut_wake = B_TRUE;
582 583 cv_signal(&state->bt_thread.ut_wake_cv);
583 584 mutex_exit(&state->bt_thread.ut_mutex);
584 585
585 586 return (0);
586 587 }
587 588
588 589
589 590 /*
590 591 * xpvtap_devmap()
591 592 */
592 593 /*ARGSUSED*/
593 594 static int
594 595 xpvtap_devmap(dev_t dev, devmap_cookie_t dhp, offset_t off, size_t len,
595 596 size_t *maplen, uint_t model)
596 597 {
597 598 xpvtap_user_ring_t *usring;
598 599 xpvtap_state_t *state;
599 600 int instance;
600 601 int e;
601 602
602 603
603 604 instance = getminor(dev);
604 605 state = ddi_get_soft_state(xpvtap_statep, instance);
605 606 if (state == NULL) {
606 607 return (EBADF);
607 608 }
608 609
609 610 /* we should only get here if the offset was == 0 */
610 611 if (off != 0) {
611 612 return (EINVAL);
612 613 }
613 614
614 615 /* we should only be mapping in one page */
615 616 if (len != PAGESIZE) {
616 617 return (EINVAL);
617 618 }
618 619
619 620 /*
620 621 * we already allocated the user ring during driver attach, all we
621 622 * need to do is map it into the user app's VA.
622 623 */
623 624 usring = &state->bt_user_ring;
624 625 e = devmap_umem_setup(dhp, state->bt_dip, NULL, usring->ur_cookie, 0,
625 626 PAGESIZE, PROT_ALL, DEVMAP_DEFAULTS, NULL);
626 627 if (e < 0) {
627 628 return (e);
628 629 }
629 630
630 631 /* return the size to compete the devmap */
631 632 *maplen = PAGESIZE;
632 633
633 634 return (0);
634 635 }
635 636
636 637
637 638 /*
638 639 * xpvtap_chpoll()
639 640 */
640 641 static int
641 642 xpvtap_chpoll(dev_t dev, short events, int anyyet, short *reventsp,
642 643 struct pollhead **phpp)
643 644 {
644 645 xpvtap_user_ring_t *usring;
645 646 xpvtap_state_t *state;
646 647 int instance;
647 648
648 649
|
↓ open down ↓ |
614 lines elided |
↑ open up ↑ |
649 650 instance = getminor(dev);
650 651 if (instance == -1) {
651 652 return (EBADF);
652 653 }
653 654 state = ddi_get_soft_state(xpvtap_statep, instance);
654 655 if (state == NULL) {
655 656 return (EBADF);
656 657 }
657 658
658 659 if (((events & (POLLIN | POLLRDNORM)) == 0) && !anyyet) {
659 - *reventsp = 0;
660 660 return (EINVAL);
661 661 }
662 662
663 663 /*
664 664 * if we pushed requests on the user ring since the last poll, wakeup
665 665 * the user app
666 666 */
667 + *reventsp = 0;
667 668 usring = &state->bt_user_ring;
668 669 if (usring->ur_prod_polled != usring->ur_ring.req_prod_pvt) {
669 670
670 671 /*
671 672 * XXX - is this faster here or xpvtap_user_request_push??
672 673 * prelim data says here. Because less membars or because
673 674 * user thread will spin in poll requests before getting to
674 675 * responses?
675 676 */
676 677 RING_PUSH_REQUESTS(&usring->ur_ring);
677 678
678 679 usring->ur_prod_polled = usring->ur_ring.sring->req_prod;
679 680 *reventsp = POLLIN | POLLRDNORM;
681 + }
680 682
681 - /* no new requests */
682 - } else {
683 - *reventsp = 0;
684 - if (!anyyet) {
685 - *phpp = &state->bt_pollhead;
686 - }
683 + if ((*reventsp == 0 && !anyyet) || (events & POLLET)) {
684 + *phpp = &state->bt_pollhead;
687 685 }
688 686
689 687 return (0);
690 688 }
691 689
692 690
693 691 /*
694 692 * xpvtap_drv_init()
695 693 */
696 694 static xpvtap_state_t *
697 695 xpvtap_drv_init(int instance)
698 696 {
699 697 xpvtap_state_t *state;
700 698 int e;
701 699
702 700
703 701 e = ddi_soft_state_zalloc(xpvtap_statep, instance);
704 702 if (e != DDI_SUCCESS) {
705 703 return (NULL);
706 704 }
707 705 state = ddi_get_soft_state(xpvtap_statep, instance);
708 706 if (state == NULL) {
709 707 goto drvinitfail_get_soft_state;
710 708 }
711 709
712 710 state->bt_instance = instance;
713 711 mutex_init(&state->bt_open.bo_mutex, NULL, MUTEX_DRIVER, NULL);
714 712 cv_init(&state->bt_open.bo_exit_cv, NULL, CV_DRIVER, NULL);
715 713 state->bt_open.bo_opened = B_FALSE;
716 714 state->bt_map.um_registered = B_FALSE;
717 715
718 716 /* initialize user ring, thread, mapping state */
719 717 e = xpvtap_user_init(state);
720 718 if (e != DDI_SUCCESS) {
721 719 goto drvinitfail_userinit;
722 720 }
723 721
724 722 return (state);
725 723
726 724 drvinitfail_userinit:
727 725 cv_destroy(&state->bt_open.bo_exit_cv);
728 726 mutex_destroy(&state->bt_open.bo_mutex);
729 727 drvinitfail_get_soft_state:
730 728 (void) ddi_soft_state_free(xpvtap_statep, instance);
731 729 return (NULL);
732 730 }
733 731
734 732
735 733 /*
736 734 * xpvtap_drv_fini()
737 735 */
738 736 static void
739 737 xpvtap_drv_fini(xpvtap_state_t *state)
740 738 {
741 739 xpvtap_user_fini(state);
742 740 cv_destroy(&state->bt_open.bo_exit_cv);
743 741 mutex_destroy(&state->bt_open.bo_mutex);
744 742 (void) ddi_soft_state_free(xpvtap_statep, state->bt_instance);
745 743 }
746 744
747 745
748 746 /*
749 747 * xpvtap_intr()
750 748 * this routine will be called when we have a request on the guest ring.
751 749 */
752 750 static uint_t
753 751 xpvtap_intr(caddr_t arg)
754 752 {
755 753 xpvtap_state_t *state;
756 754
757 755
758 756 state = (xpvtap_state_t *)arg;
759 757
760 758 /* wake thread, thread handles guest requests and user app responses */
761 759 mutex_enter(&state->bt_thread.ut_mutex);
762 760 state->bt_thread.ut_wake = B_TRUE;
763 761 cv_signal(&state->bt_thread.ut_wake_cv);
764 762 mutex_exit(&state->bt_thread.ut_mutex);
765 763
766 764 return (DDI_INTR_CLAIMED);
767 765 }
768 766
769 767
770 768 /*
771 769 * xpvtap_segmf_register()
772 770 */
773 771 static int
774 772 xpvtap_segmf_register(xpvtap_state_t *state)
775 773 {
776 774 struct seg *seg;
777 775 uint64_t pte_ma;
778 776 struct as *as;
779 777 caddr_t uaddr;
780 778 uint_t pgcnt;
781 779 int i;
782 780
783 781
784 782 as = state->bt_map.um_as;
785 783 pgcnt = btopr(state->bt_map.um_guest_size);
786 784 uaddr = state->bt_map.um_guest_pages;
787 785
788 786 if (pgcnt == 0) {
789 787 return (DDI_FAILURE);
790 788 }
791 789
792 790 AS_LOCK_ENTER(as, RW_READER);
793 791
794 792 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
795 793 if ((seg == NULL) || ((uaddr + state->bt_map.um_guest_size) >
796 794 (seg->s_base + seg->s_size))) {
797 795 AS_LOCK_EXIT(as);
798 796 return (DDI_FAILURE);
799 797 }
800 798
801 799 /*
802 800 * lock down the htables so the HAT can't steal them. Register the
803 801 * PTE MA's for each gref page with seg_mf so we can do user space
804 802 * gref mappings.
805 803 */
806 804 for (i = 0; i < pgcnt; i++) {
807 805 hat_prepare_mapping(as->a_hat, uaddr, &pte_ma);
808 806 hat_devload(as->a_hat, uaddr, PAGESIZE, (pfn_t)0,
809 807 PROT_READ | PROT_WRITE | PROT_USER | HAT_UNORDERED_OK,
810 808 HAT_LOAD_NOCONSIST | HAT_LOAD_LOCK);
811 809 hat_release_mapping(as->a_hat, uaddr);
812 810 segmf_add_gref_pte(seg, uaddr, pte_ma);
813 811 uaddr += PAGESIZE;
814 812 }
815 813
816 814 state->bt_map.um_registered = B_TRUE;
817 815
818 816 AS_LOCK_EXIT(as);
819 817
820 818 return (DDI_SUCCESS);
821 819 }
822 820
823 821
824 822 /*
825 823 * xpvtap_segmf_unregister()
826 824 * as_callback routine
827 825 */
828 826 /*ARGSUSED*/
829 827 static void
830 828 xpvtap_segmf_unregister(struct as *as, void *arg, uint_t event)
831 829 {
832 830 xpvtap_state_t *state;
833 831 caddr_t uaddr;
834 832 uint_t pgcnt;
835 833 int i;
836 834
837 835
838 836 state = (xpvtap_state_t *)arg;
839 837 if (!state->bt_map.um_registered) {
840 838 /* remove the callback (which is this routine) */
841 839 (void) as_delete_callback(as, arg);
842 840 return;
843 841 }
844 842
845 843 pgcnt = btopr(state->bt_map.um_guest_size);
846 844 uaddr = state->bt_map.um_guest_pages;
847 845
848 846 /* unmap any outstanding req's grefs */
849 847 xpvtap_rs_flush(state->bt_map.um_rs, xpvtap_user_request_unmap, state);
850 848
851 849 /* Unlock the gref pages */
852 850 for (i = 0; i < pgcnt; i++) {
853 851 AS_LOCK_ENTER(as, RW_WRITER);
854 852 hat_prepare_mapping(as->a_hat, uaddr, NULL);
855 853 hat_unload(as->a_hat, uaddr, PAGESIZE, HAT_UNLOAD_UNLOCK);
856 854 hat_release_mapping(as->a_hat, uaddr);
857 855 AS_LOCK_EXIT(as);
858 856 uaddr += PAGESIZE;
859 857 }
860 858
861 859 /* remove the callback (which is this routine) */
862 860 (void) as_delete_callback(as, arg);
863 861
864 862 state->bt_map.um_registered = B_FALSE;
865 863 }
866 864
867 865
868 866 /*
869 867 * xpvtap_user_init()
870 868 */
871 869 static int
872 870 xpvtap_user_init(xpvtap_state_t *state)
873 871 {
874 872 xpvtap_user_map_t *map;
875 873 int e;
876 874
877 875
878 876 map = &state->bt_map;
879 877
880 878 /* Setup the ring between the driver and user app */
881 879 e = xpvtap_user_ring_init(state);
882 880 if (e != DDI_SUCCESS) {
883 881 return (DDI_FAILURE);
884 882 }
885 883
886 884 /*
887 885 * the user ring can handle BLKIF_RING_SIZE outstanding requests. This
888 886 * is the same number of requests as the guest ring. Initialize the
889 887 * state we use to track request IDs to the user app. These IDs will
890 888 * also identify which group of gref pages correspond with the
891 889 * request.
892 890 */
893 891 xpvtap_rs_init(0, (BLKIF_RING_SIZE - 1), &map->um_rs);
894 892
895 893 /*
896 894 * allocate the space to store a copy of each outstanding requests. We
897 895 * will need to reference the ID and the number of segments when we
898 896 * get the response from the user app.
899 897 */
900 898 map->um_outstanding_reqs = kmem_zalloc(
901 899 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE,
902 900 KM_SLEEP);
903 901
904 902 /*
905 903 * initialize the thread we use to process guest requests and user
906 904 * responses.
907 905 */
908 906 e = xpvtap_user_thread_init(state);
909 907 if (e != DDI_SUCCESS) {
910 908 goto userinitfail_user_thread_init;
911 909 }
912 910
913 911 return (DDI_SUCCESS);
914 912
915 913 userinitfail_user_thread_init:
916 914 xpvtap_rs_fini(&map->um_rs);
917 915 kmem_free(map->um_outstanding_reqs,
918 916 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
919 917 xpvtap_user_ring_fini(state);
920 918 return (DDI_FAILURE);
921 919 }
922 920
923 921
924 922 /*
925 923 * xpvtap_user_ring_init()
926 924 */
927 925 static int
928 926 xpvtap_user_ring_init(xpvtap_state_t *state)
929 927 {
930 928 xpvtap_user_ring_t *usring;
931 929
932 930
933 931 usring = &state->bt_user_ring;
934 932
935 933 /* alocate and initialize the page for the shared user ring */
936 934 usring->ur_sring = (blkif_sring_t *)ddi_umem_alloc(PAGESIZE,
937 935 DDI_UMEM_SLEEP, &usring->ur_cookie);
938 936 SHARED_RING_INIT(usring->ur_sring);
939 937 FRONT_RING_INIT(&usring->ur_ring, usring->ur_sring, PAGESIZE);
940 938 usring->ur_prod_polled = 0;
941 939
942 940 return (DDI_SUCCESS);
943 941 }
944 942
945 943
946 944 /*
947 945 * xpvtap_user_thread_init()
948 946 */
949 947 static int
950 948 xpvtap_user_thread_init(xpvtap_state_t *state)
951 949 {
952 950 xpvtap_user_thread_t *thread;
953 951 char taskqname[32];
954 952
955 953
956 954 thread = &state->bt_thread;
957 955
958 956 mutex_init(&thread->ut_mutex, NULL, MUTEX_DRIVER, NULL);
959 957 cv_init(&thread->ut_wake_cv, NULL, CV_DRIVER, NULL);
960 958 cv_init(&thread->ut_exit_done_cv, NULL, CV_DRIVER, NULL);
961 959 thread->ut_wake = B_FALSE;
962 960 thread->ut_exit = B_FALSE;
963 961 thread->ut_exit_done = B_TRUE;
964 962
965 963 /* create but don't start the user thread */
966 964 (void) sprintf(taskqname, "xvptap_%d", state->bt_instance);
967 965 thread->ut_taskq = ddi_taskq_create(state->bt_dip, taskqname, 1,
968 966 TASKQ_DEFAULTPRI, 0);
969 967 if (thread->ut_taskq == NULL) {
970 968 goto userinitthrfail_taskq_create;
971 969 }
972 970
973 971 return (DDI_SUCCESS);
974 972
975 973 userinitthrfail_taskq_dispatch:
976 974 ddi_taskq_destroy(thread->ut_taskq);
977 975 userinitthrfail_taskq_create:
978 976 cv_destroy(&thread->ut_exit_done_cv);
979 977 cv_destroy(&thread->ut_wake_cv);
980 978 mutex_destroy(&thread->ut_mutex);
981 979
982 980 return (DDI_FAILURE);
983 981 }
984 982
985 983
986 984 /*
987 985 * xpvtap_user_thread_start()
988 986 */
989 987 static void
990 988 xpvtap_user_thread_start(caddr_t arg)
991 989 {
992 990 xpvtap_user_thread_t *thread;
993 991 xpvtap_state_t *state;
994 992 int e;
995 993
996 994
997 995 state = (xpvtap_state_t *)arg;
998 996 thread = &state->bt_thread;
999 997
1000 998 /* start the user thread */
1001 999 thread->ut_exit_done = B_FALSE;
1002 1000 e = ddi_taskq_dispatch(thread->ut_taskq, xpvtap_user_thread, state,
1003 1001 DDI_SLEEP);
1004 1002 if (e != DDI_SUCCESS) {
1005 1003 thread->ut_exit_done = B_TRUE;
1006 1004 cmn_err(CE_WARN, "Unable to start user thread\n");
1007 1005 }
1008 1006 }
1009 1007
1010 1008
1011 1009 /*
1012 1010 * xpvtap_user_thread_stop()
1013 1011 */
1014 1012 static void
1015 1013 xpvtap_user_thread_stop(xpvtap_state_t *state)
1016 1014 {
1017 1015 /* wake thread so it can exit */
1018 1016 mutex_enter(&state->bt_thread.ut_mutex);
1019 1017 state->bt_thread.ut_wake = B_TRUE;
1020 1018 state->bt_thread.ut_exit = B_TRUE;
1021 1019 cv_signal(&state->bt_thread.ut_wake_cv);
1022 1020 if (!state->bt_thread.ut_exit_done) {
1023 1021 cv_wait(&state->bt_thread.ut_exit_done_cv,
1024 1022 &state->bt_thread.ut_mutex);
1025 1023 }
1026 1024 mutex_exit(&state->bt_thread.ut_mutex);
1027 1025 ASSERT(state->bt_thread.ut_exit_done);
1028 1026 }
1029 1027
1030 1028
1031 1029 /*
1032 1030 * xpvtap_user_fini()
1033 1031 */
1034 1032 static void
1035 1033 xpvtap_user_fini(xpvtap_state_t *state)
1036 1034 {
1037 1035 xpvtap_user_map_t *map;
1038 1036
1039 1037
1040 1038 map = &state->bt_map;
1041 1039
1042 1040 xpvtap_user_thread_fini(state);
1043 1041 xpvtap_rs_fini(&map->um_rs);
1044 1042 kmem_free(map->um_outstanding_reqs,
1045 1043 sizeof (*map->um_outstanding_reqs) * BLKIF_RING_SIZE);
1046 1044 xpvtap_user_ring_fini(state);
1047 1045 }
1048 1046
1049 1047
1050 1048 /*
1051 1049 * xpvtap_user_ring_fini()
1052 1050 */
1053 1051 static void
1054 1052 xpvtap_user_ring_fini(xpvtap_state_t *state)
1055 1053 {
1056 1054 ddi_umem_free(state->bt_user_ring.ur_cookie);
1057 1055 }
1058 1056
1059 1057
1060 1058 /*
1061 1059 * xpvtap_user_thread_fini()
1062 1060 */
1063 1061 static void
1064 1062 xpvtap_user_thread_fini(xpvtap_state_t *state)
1065 1063 {
1066 1064 ddi_taskq_destroy(state->bt_thread.ut_taskq);
1067 1065 cv_destroy(&state->bt_thread.ut_exit_done_cv);
1068 1066 cv_destroy(&state->bt_thread.ut_wake_cv);
1069 1067 mutex_destroy(&state->bt_thread.ut_mutex);
1070 1068 }
1071 1069
1072 1070
1073 1071 /*
1074 1072 * xpvtap_user_thread()
1075 1073 */
1076 1074 static void
1077 1075 xpvtap_user_thread(void *arg)
1078 1076 {
1079 1077 xpvtap_user_thread_t *thread;
1080 1078 blkif_response_t resp;
1081 1079 xpvtap_state_t *state;
1082 1080 blkif_request_t req;
1083 1081 boolean_t b;
1084 1082 uint_t uid;
1085 1083 int e;
1086 1084
1087 1085
1088 1086 state = (xpvtap_state_t *)arg;
1089 1087 thread = &state->bt_thread;
1090 1088
1091 1089 xpvtap_thread_start:
1092 1090 /* See if we are supposed to exit */
1093 1091 mutex_enter(&thread->ut_mutex);
1094 1092 if (thread->ut_exit) {
1095 1093 thread->ut_exit_done = B_TRUE;
1096 1094 cv_signal(&state->bt_thread.ut_exit_done_cv);
1097 1095 mutex_exit(&thread->ut_mutex);
1098 1096 return;
1099 1097 }
1100 1098
1101 1099 /*
1102 1100 * if we aren't supposed to be awake, wait until someone wakes us.
1103 1101 * when we wake up, check for a kill or someone telling us to exit.
1104 1102 */
1105 1103 if (!thread->ut_wake) {
1106 1104 e = cv_wait_sig(&thread->ut_wake_cv, &thread->ut_mutex);
1107 1105 if ((e == 0) || (thread->ut_exit)) {
1108 1106 thread->ut_exit = B_TRUE;
1109 1107 mutex_exit(&thread->ut_mutex);
1110 1108 goto xpvtap_thread_start;
1111 1109 }
1112 1110 }
1113 1111
1114 1112 /* if someone didn't wake us, go back to the start of the thread */
1115 1113 if (!thread->ut_wake) {
1116 1114 mutex_exit(&thread->ut_mutex);
1117 1115 goto xpvtap_thread_start;
1118 1116 }
1119 1117
1120 1118 /* we are awake */
1121 1119 thread->ut_wake = B_FALSE;
1122 1120 mutex_exit(&thread->ut_mutex);
1123 1121
1124 1122 /* process requests from the guest */
1125 1123 do {
1126 1124 /*
1127 1125 * check for requests from the guest. if we don't have any,
1128 1126 * break out of the loop.
1129 1127 */
1130 1128 e = blk_ring_request_get(state->bt_guest_ring, &req);
1131 1129 if (e == B_FALSE) {
1132 1130 break;
1133 1131 }
1134 1132
1135 1133 /* we got a request, map the grefs into the user app's VA */
1136 1134 e = xpvtap_user_request_map(state, &req, &uid);
1137 1135 if (e != DDI_SUCCESS) {
1138 1136 /*
1139 1137 * If we couldn't map the request (e.g. user app hasn't
1140 1138 * opened the device yet), requeue it and try again
1141 1139 * later
1142 1140 */
1143 1141 blk_ring_request_requeue(state->bt_guest_ring);
1144 1142 break;
1145 1143 }
1146 1144
1147 1145 /* push the request to the user app */
1148 1146 e = xpvtap_user_request_push(state, &req, uid);
1149 1147 if (e != DDI_SUCCESS) {
1150 1148 resp.id = req.id;
1151 1149 resp.operation = req.operation;
1152 1150 resp.status = BLKIF_RSP_ERROR;
1153 1151 blk_ring_response_put(state->bt_guest_ring, &resp);
1154 1152 }
1155 1153 } while (!thread->ut_exit);
1156 1154
1157 1155 /* process reponses from the user app */
1158 1156 do {
1159 1157 /*
1160 1158 * check for responses from the user app. if we don't have any,
1161 1159 * break out of the loop.
1162 1160 */
1163 1161 b = xpvtap_user_response_get(state, &resp, &uid);
1164 1162 if (b != B_TRUE) {
1165 1163 break;
1166 1164 }
1167 1165
1168 1166 /*
1169 1167 * if we got a response, unmap the grefs from the matching
1170 1168 * request.
1171 1169 */
1172 1170 xpvtap_user_request_unmap(state, uid);
1173 1171
1174 1172 /* push the response to the guest */
1175 1173 blk_ring_response_put(state->bt_guest_ring, &resp);
1176 1174 } while (!thread->ut_exit);
1177 1175
1178 1176 goto xpvtap_thread_start;
1179 1177 }
1180 1178
1181 1179
1182 1180 /*
1183 1181 * xpvtap_user_request_map()
1184 1182 */
1185 1183 static int
1186 1184 xpvtap_user_request_map(xpvtap_state_t *state, blkif_request_t *req,
1187 1185 uint_t *uid)
1188 1186 {
1189 1187 grant_ref_t gref[BLKIF_MAX_SEGMENTS_PER_REQUEST];
1190 1188 struct seg *seg;
1191 1189 struct as *as;
1192 1190 domid_t domid;
1193 1191 caddr_t uaddr;
1194 1192 uint_t flags;
1195 1193 int i;
1196 1194 int e;
1197 1195
1198 1196
1199 1197 domid = xvdi_get_oeid(state->bt_dip);
1200 1198
1201 1199 as = state->bt_map.um_as;
1202 1200 if ((as == NULL) || (state->bt_map.um_guest_pages == NULL)) {
1203 1201 return (DDI_FAILURE);
1204 1202 }
1205 1203
1206 1204 /* has to happen after segmap returns */
1207 1205 if (!state->bt_map.um_registered) {
1208 1206 /* register the pte's with segmf */
1209 1207 e = xpvtap_segmf_register(state);
1210 1208 if (e != DDI_SUCCESS) {
1211 1209 return (DDI_FAILURE);
1212 1210 }
1213 1211 }
1214 1212
1215 1213 /* alloc an ID for the user ring */
1216 1214 e = xpvtap_rs_alloc(state->bt_map.um_rs, uid);
1217 1215 if (e != DDI_SUCCESS) {
1218 1216 return (DDI_FAILURE);
1219 1217 }
1220 1218
1221 1219 /* if we don't have any segments to map, we're done */
1222 1220 if ((req->operation == BLKIF_OP_WRITE_BARRIER) ||
1223 1221 (req->operation == BLKIF_OP_FLUSH_DISKCACHE) ||
1224 1222 (req->nr_segments == 0)) {
1225 1223 return (DDI_SUCCESS);
1226 1224 }
1227 1225
1228 1226 /* get the apps gref address */
1229 1227 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, *uid);
1230 1228
1231 1229 AS_LOCK_ENTER(as, RW_READER);
1232 1230 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1233 1231 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1234 1232 (seg->s_base + seg->s_size))) {
1235 1233 AS_LOCK_EXIT(as);
1236 1234 return (DDI_FAILURE);
1237 1235 }
1238 1236
1239 1237 /* if we are reading from disk, we are writing into memory */
1240 1238 flags = 0;
1241 1239 if (req->operation == BLKIF_OP_READ) {
1242 1240 flags |= SEGMF_GREF_WR;
1243 1241 }
1244 1242
1245 1243 /* Load the grefs into seg_mf */
1246 1244 for (i = 0; i < req->nr_segments; i++) {
1247 1245 gref[i] = req->seg[i].gref;
1248 1246 }
1249 1247 (void) segmf_add_grefs(seg, uaddr, flags, gref, req->nr_segments,
1250 1248 domid);
1251 1249
1252 1250 AS_LOCK_EXIT(as);
1253 1251
1254 1252 return (DDI_SUCCESS);
1255 1253 }
1256 1254
1257 1255
1258 1256 /*
1259 1257 * xpvtap_user_request_push()
1260 1258 */
1261 1259 static int
1262 1260 xpvtap_user_request_push(xpvtap_state_t *state, blkif_request_t *req,
1263 1261 uint_t uid)
1264 1262 {
1265 1263 blkif_request_t *outstanding_req;
1266 1264 blkif_front_ring_t *uring;
1267 1265 blkif_request_t *target;
1268 1266 xpvtap_user_map_t *map;
1269 1267
1270 1268
1271 1269 uring = &state->bt_user_ring.ur_ring;
1272 1270 map = &state->bt_map;
1273 1271
1274 1272 target = RING_GET_REQUEST(uring, uring->req_prod_pvt);
1275 1273
1276 1274 /*
1277 1275 * Save request from the frontend. used for ID mapping and unmap
1278 1276 * on response/cleanup
1279 1277 */
1280 1278 outstanding_req = &map->um_outstanding_reqs[uid];
1281 1279 bcopy(req, outstanding_req, sizeof (*outstanding_req));
1282 1280
1283 1281 /* put the request on the user ring */
1284 1282 bcopy(req, target, sizeof (*req));
1285 1283 target->id = (uint64_t)uid;
1286 1284 uring->req_prod_pvt++;
1287 1285
1288 1286 pollwakeup(&state->bt_pollhead, POLLIN | POLLRDNORM);
1289 1287
1290 1288 return (DDI_SUCCESS);
1291 1289 }
1292 1290
1293 1291
1294 1292 static void
1295 1293 xpvtap_user_request_unmap(xpvtap_state_t *state, uint_t uid)
1296 1294 {
1297 1295 blkif_request_t *req;
1298 1296 struct seg *seg;
1299 1297 struct as *as;
1300 1298 caddr_t uaddr;
1301 1299 int e;
1302 1300
1303 1301
1304 1302 as = state->bt_map.um_as;
1305 1303 if (as == NULL) {
1306 1304 return;
1307 1305 }
1308 1306
1309 1307 /* get a copy of the original request */
1310 1308 req = &state->bt_map.um_outstanding_reqs[uid];
1311 1309
1312 1310 /* unmap the grefs for this request */
1313 1311 if ((req->operation != BLKIF_OP_WRITE_BARRIER) &&
1314 1312 (req->operation != BLKIF_OP_FLUSH_DISKCACHE) &&
1315 1313 (req->nr_segments != 0)) {
1316 1314 uaddr = XPVTAP_GREF_REQADDR(state->bt_map.um_guest_pages, uid);
1317 1315 AS_LOCK_ENTER(as, RW_READER);
1318 1316 seg = as_findseg(as, state->bt_map.um_guest_pages, 0);
1319 1317 if ((seg == NULL) || ((uaddr + mmu_ptob(req->nr_segments)) >
1320 1318 (seg->s_base + seg->s_size))) {
1321 1319 AS_LOCK_EXIT(as);
1322 1320 xpvtap_rs_free(state->bt_map.um_rs, uid);
1323 1321 return;
1324 1322 }
1325 1323
1326 1324 e = segmf_release_grefs(seg, uaddr, req->nr_segments);
1327 1325 if (e != 0) {
1328 1326 cmn_err(CE_WARN, "unable to release grefs");
1329 1327 }
1330 1328
1331 1329 AS_LOCK_EXIT(as);
1332 1330 }
1333 1331
1334 1332 /* free up the user ring id */
1335 1333 xpvtap_rs_free(state->bt_map.um_rs, uid);
1336 1334 }
1337 1335
1338 1336
1339 1337 static int
1340 1338 xpvtap_user_response_get(xpvtap_state_t *state, blkif_response_t *resp,
1341 1339 uint_t *uid)
1342 1340 {
1343 1341 blkif_front_ring_t *uring;
1344 1342 blkif_response_t *target;
1345 1343
1346 1344
1347 1345 uring = &state->bt_user_ring.ur_ring;
1348 1346
1349 1347 if (!RING_HAS_UNCONSUMED_RESPONSES(uring)) {
1350 1348 return (B_FALSE);
1351 1349 }
1352 1350
1353 1351 target = NULL;
1354 1352 target = RING_GET_RESPONSE(uring, uring->rsp_cons);
1355 1353 if (target == NULL) {
1356 1354 return (B_FALSE);
1357 1355 }
1358 1356
1359 1357 /* copy out the user app response */
1360 1358 bcopy(target, resp, sizeof (*resp));
1361 1359 uring->rsp_cons++;
1362 1360
1363 1361 /* restore the quests id from the original request */
1364 1362 *uid = (uint_t)resp->id;
1365 1363 resp->id = state->bt_map.um_outstanding_reqs[*uid].id;
1366 1364
1367 1365 return (B_TRUE);
1368 1366 }
1369 1367
1370 1368
1371 1369 /*
1372 1370 * xpvtap_user_app_stop()
1373 1371 */
1374 1372 static void xpvtap_user_app_stop(caddr_t arg)
1375 1373 {
1376 1374 xpvtap_state_t *state;
1377 1375 clock_t rc;
1378 1376
1379 1377 state = (xpvtap_state_t *)arg;
1380 1378
1381 1379 /*
1382 1380 * Give the app 10 secs to exit. If it doesn't exit, it's not a serious
1383 1381 * problem, we just won't auto-detach the driver.
1384 1382 */
1385 1383 mutex_enter(&state->bt_open.bo_mutex);
1386 1384 if (state->bt_open.bo_opened) {
1387 1385 rc = cv_reltimedwait(&state->bt_open.bo_exit_cv,
1388 1386 &state->bt_open.bo_mutex, drv_usectohz(10000000),
1389 1387 TR_CLOCK_TICK);
1390 1388 if (rc <= 0) {
1391 1389 cmn_err(CE_NOTE, "!user process still has driver open, "
1392 1390 "deferring detach\n");
1393 1391 }
1394 1392 }
1395 1393 mutex_exit(&state->bt_open.bo_mutex);
1396 1394 }
1397 1395
1398 1396
1399 1397 /*
1400 1398 * xpvtap_rs_init()
1401 1399 * Initialize the resource structure. init() returns a handle to be used
1402 1400 * for the rest of the resource functions. This code is written assuming
1403 1401 * that min_val will be close to 0. Therefore, we will allocate the free
1404 1402 * buffer only taking max_val into account.
1405 1403 */
1406 1404 static void
1407 1405 xpvtap_rs_init(uint_t min_val, uint_t max_val, xpvtap_rs_hdl_t *handle)
1408 1406 {
1409 1407 xpvtap_rs_t *rstruct;
1410 1408 uint_t array_size;
1411 1409 uint_t index;
1412 1410
1413 1411
1414 1412 ASSERT(handle != NULL);
1415 1413 ASSERT(min_val < max_val);
1416 1414
1417 1415 /* alloc space for resource structure */
1418 1416 rstruct = kmem_alloc(sizeof (xpvtap_rs_t), KM_SLEEP);
1419 1417
1420 1418 /*
1421 1419 * Test to see if the max value is 64-bit aligned. If so, we don't need
1422 1420 * to allocate an extra 64-bit word. alloc space for free buffer
1423 1421 * (8 bytes per uint64_t).
1424 1422 */
1425 1423 if ((max_val & 0x3F) == 0) {
1426 1424 rstruct->rs_free_size = (max_val >> 6) * 8;
1427 1425 } else {
1428 1426 rstruct->rs_free_size = ((max_val >> 6) + 1) * 8;
1429 1427 }
1430 1428 rstruct->rs_free = kmem_alloc(rstruct->rs_free_size, KM_SLEEP);
1431 1429
1432 1430 /* Initialize resource structure */
1433 1431 rstruct->rs_min = min_val;
1434 1432 rstruct->rs_last = min_val;
1435 1433 rstruct->rs_max = max_val;
1436 1434 mutex_init(&rstruct->rs_mutex, NULL, MUTEX_DRIVER, NULL);
1437 1435 rstruct->rs_flushing = B_FALSE;
1438 1436
1439 1437 /* Mark all resources as free */
1440 1438 array_size = rstruct->rs_free_size >> 3;
1441 1439 for (index = 0; index < array_size; index++) {
1442 1440 rstruct->rs_free[index] = (uint64_t)0xFFFFFFFFFFFFFFFF;
1443 1441 }
1444 1442
1445 1443 /* setup handle which is returned from this function */
1446 1444 *handle = rstruct;
1447 1445 }
1448 1446
1449 1447
1450 1448 /*
1451 1449 * xpvtap_rs_fini()
1452 1450 * Frees up the space allocated in init(). Notice that a pointer to the
1453 1451 * handle is used for the parameter. fini() will set the handle to NULL
1454 1452 * before returning.
1455 1453 */
1456 1454 static void
1457 1455 xpvtap_rs_fini(xpvtap_rs_hdl_t *handle)
1458 1456 {
1459 1457 xpvtap_rs_t *rstruct;
1460 1458
1461 1459
1462 1460 ASSERT(handle != NULL);
1463 1461
1464 1462 rstruct = (xpvtap_rs_t *)*handle;
1465 1463
1466 1464 mutex_destroy(&rstruct->rs_mutex);
1467 1465 kmem_free(rstruct->rs_free, rstruct->rs_free_size);
1468 1466 kmem_free(rstruct, sizeof (xpvtap_rs_t));
1469 1467
1470 1468 /* set handle to null. This helps catch bugs. */
1471 1469 *handle = NULL;
1472 1470 }
1473 1471
1474 1472
1475 1473 /*
1476 1474 * xpvtap_rs_alloc()
1477 1475 * alloc a resource. If alloc fails, we are out of resources.
1478 1476 */
1479 1477 static int
1480 1478 xpvtap_rs_alloc(xpvtap_rs_hdl_t handle, uint_t *resource)
1481 1479 {
1482 1480 xpvtap_rs_t *rstruct;
1483 1481 uint_t array_idx;
1484 1482 uint64_t free;
1485 1483 uint_t index;
1486 1484 uint_t last;
1487 1485 uint_t min;
1488 1486 uint_t max;
1489 1487
1490 1488
1491 1489 ASSERT(handle != NULL);
1492 1490 ASSERT(resource != NULL);
1493 1491
1494 1492 rstruct = (xpvtap_rs_t *)handle;
1495 1493
1496 1494 mutex_enter(&rstruct->rs_mutex);
1497 1495 min = rstruct->rs_min;
1498 1496 max = rstruct->rs_max;
1499 1497
1500 1498 /*
1501 1499 * Find a free resource. This will return out of the loop once it finds
1502 1500 * a free resource. There are a total of 'max'-'min'+1 resources.
1503 1501 * Performs a round robin allocation.
1504 1502 */
1505 1503 for (index = min; index <= max; index++) {
1506 1504
1507 1505 array_idx = rstruct->rs_last >> 6;
1508 1506 free = rstruct->rs_free[array_idx];
1509 1507 last = rstruct->rs_last & 0x3F;
1510 1508
1511 1509 /* if the next resource to check is free */
1512 1510 if ((free & ((uint64_t)1 << last)) != 0) {
1513 1511 /* we are using this resource */
1514 1512 *resource = rstruct->rs_last;
1515 1513
1516 1514 /* take it out of the free list */
1517 1515 rstruct->rs_free[array_idx] &= ~((uint64_t)1 << last);
1518 1516
1519 1517 /*
1520 1518 * increment the last count so we start checking the
1521 1519 * next resource on the next alloc(). Note the rollover
1522 1520 * at 'max'+1.
1523 1521 */
1524 1522 rstruct->rs_last++;
1525 1523 if (rstruct->rs_last > max) {
1526 1524 rstruct->rs_last = rstruct->rs_min;
1527 1525 }
1528 1526
1529 1527 /* unlock the resource structure */
1530 1528 mutex_exit(&rstruct->rs_mutex);
1531 1529
1532 1530 return (DDI_SUCCESS);
1533 1531 }
1534 1532
1535 1533 /*
1536 1534 * This resource is not free, lets go to the next one. Note the
1537 1535 * rollover at 'max'.
1538 1536 */
1539 1537 rstruct->rs_last++;
1540 1538 if (rstruct->rs_last > max) {
1541 1539 rstruct->rs_last = rstruct->rs_min;
1542 1540 }
1543 1541 }
1544 1542
1545 1543 mutex_exit(&rstruct->rs_mutex);
1546 1544
1547 1545 return (DDI_FAILURE);
1548 1546 }
1549 1547
1550 1548
1551 1549 /*
1552 1550 * xpvtap_rs_free()
1553 1551 * Free the previously alloc'd resource. Once a resource has been free'd,
1554 1552 * it can be used again when alloc is called.
1555 1553 */
1556 1554 static void
1557 1555 xpvtap_rs_free(xpvtap_rs_hdl_t handle, uint_t resource)
1558 1556 {
1559 1557 xpvtap_rs_t *rstruct;
1560 1558 uint_t array_idx;
1561 1559 uint_t offset;
1562 1560
1563 1561
1564 1562 ASSERT(handle != NULL);
1565 1563
1566 1564 rstruct = (xpvtap_rs_t *)handle;
1567 1565 ASSERT(resource >= rstruct->rs_min);
1568 1566 ASSERT(resource <= rstruct->rs_max);
1569 1567
1570 1568 if (!rstruct->rs_flushing) {
1571 1569 mutex_enter(&rstruct->rs_mutex);
1572 1570 }
1573 1571
1574 1572 /* Put the resource back in the free list */
1575 1573 array_idx = resource >> 6;
1576 1574 offset = resource & 0x3F;
1577 1575 rstruct->rs_free[array_idx] |= ((uint64_t)1 << offset);
1578 1576
1579 1577 if (!rstruct->rs_flushing) {
1580 1578 mutex_exit(&rstruct->rs_mutex);
1581 1579 }
1582 1580 }
1583 1581
1584 1582
1585 1583 /*
1586 1584 * xpvtap_rs_flush()
1587 1585 */
1588 1586 static void
1589 1587 xpvtap_rs_flush(xpvtap_rs_hdl_t handle, xpvtap_rs_cleanup_t callback,
1590 1588 void *arg)
1591 1589 {
1592 1590 xpvtap_rs_t *rstruct;
1593 1591 uint_t array_idx;
1594 1592 uint64_t free;
1595 1593 uint_t index;
1596 1594 uint_t last;
1597 1595 uint_t min;
1598 1596 uint_t max;
1599 1597
1600 1598
1601 1599 ASSERT(handle != NULL);
1602 1600
1603 1601 rstruct = (xpvtap_rs_t *)handle;
1604 1602
1605 1603 mutex_enter(&rstruct->rs_mutex);
1606 1604 min = rstruct->rs_min;
1607 1605 max = rstruct->rs_max;
1608 1606
1609 1607 rstruct->rs_flushing = B_TRUE;
1610 1608
1611 1609 /*
1612 1610 * for all resources not free, call the callback routine to clean it
1613 1611 * up.
1614 1612 */
1615 1613 for (index = min; index <= max; index++) {
1616 1614
1617 1615 array_idx = rstruct->rs_last >> 6;
1618 1616 free = rstruct->rs_free[array_idx];
1619 1617 last = rstruct->rs_last & 0x3F;
1620 1618
1621 1619 /* if the next resource to check is not free */
1622 1620 if ((free & ((uint64_t)1 << last)) == 0) {
1623 1621 /* call the callback to cleanup */
1624 1622 (*callback)(arg, rstruct->rs_last);
1625 1623
1626 1624 /* put it back in the free list */
1627 1625 rstruct->rs_free[array_idx] |= ((uint64_t)1 << last);
1628 1626 }
1629 1627
1630 1628 /* go to the next one. Note the rollover at 'max' */
1631 1629 rstruct->rs_last++;
1632 1630 if (rstruct->rs_last > max) {
1633 1631 rstruct->rs_last = rstruct->rs_min;
1634 1632 }
1635 1633 }
1636 1634
1637 1635 mutex_exit(&rstruct->rs_mutex);
1638 1636 }
|
↓ open down ↓ |
942 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX