Print this page
13275 bhyve needs richer INIT/SIPI support
Reviewed by: Robert Mustacchi <rm@fingolfin.org>
Approved by: Gordon Ross <gordon.w.ross@gmail.com>
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
+++ new/usr/src/uts/i86pc/io/vmm/vmm_sol_dev.c
1 1 /*
2 2 * This file and its contents are supplied under the terms of the
3 3 * Common Development and Distribution License ("CDDL"), version 1.0.
4 4 * You may only use this file in accordance with the terms of version
5 5 * 1.0 of the CDDL.
6 6 *
7 7 * A full copy of the text of the CDDL should have accompanied this
8 8 * source. A copy of the CDDL is also available via the Internet at
9 9 * http://www.illumos.org/license/CDDL.
10 10 */
11 11 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
12 12
13 13 /*
14 14 * Copyright 2015 Pluribus Networks Inc.
15 15 * Copyright 2019 Joyent, Inc.
16 16 * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
17 17 * Copyright 2020 Oxide Computer Company
18 18 */
19 19
20 20 #include <sys/types.h>
21 21 #include <sys/conf.h>
22 22 #include <sys/cpuvar.h>
23 23 #include <sys/ioccom.h>
24 24 #include <sys/stat.h>
25 25 #include <sys/vmsystm.h>
26 26 #include <sys/ddi.h>
27 27 #include <sys/mkdev.h>
28 28 #include <sys/sunddi.h>
29 29 #include <sys/fs/dv_node.h>
30 30 #include <sys/cpuset.h>
31 31 #include <sys/id_space.h>
32 32 #include <sys/fs/sdev_plugin.h>
33 33 #include <sys/smt.h>
34 34
35 35 #include <sys/kernel.h>
36 36 #include <sys/hma.h>
37 37 #include <sys/x86_archext.h>
38 38 #include <x86/apicreg.h>
39 39
40 40 #include <sys/vmm.h>
41 41 #include <sys/vmm_kernel.h>
42 42 #include <sys/vmm_instruction_emul.h>
43 43 #include <sys/vmm_dev.h>
44 44 #include <sys/vmm_impl.h>
45 45 #include <sys/vmm_drv.h>
46 46
47 47 #include <vm/vm.h>
48 48 #include <vm/seg_dev.h>
49 49
50 50 #include "io/ppt.h"
51 51 #include "io/vatpic.h"
52 52 #include "io/vioapic.h"
53 53 #include "io/vrtc.h"
54 54 #include "io/vhpet.h"
55 55 #include "io/vpmtmr.h"
56 56 #include "vmm_lapic.h"
57 57 #include "vmm_stat.h"
58 58 #include "vmm_util.h"
59 59 #include "vm/vm_glue.h"
60 60
61 61 /*
62 62 * Locking details:
63 63 *
64 64 * Driver-wide data (vmmdev_*) , including HMA and sdev registration, is
65 65 * protected by vmmdev_mtx. The list of vmm_softc_t instances and related data
66 66 * (vmm_*) are protected by vmm_mtx. Actions requiring both locks must acquire
67 67 * vmmdev_mtx before vmm_mtx. The sdev plugin functions must not attempt to
68 68 * acquire vmmdev_mtx, as they could deadlock with plugin unregistration.
69 69 */
70 70
71 71 static kmutex_t vmmdev_mtx;
72 72 static dev_info_t *vmmdev_dip;
73 73 static hma_reg_t *vmmdev_hma_reg;
74 74 static uint_t vmmdev_hma_ref;
75 75 static sdev_plugin_hdl_t vmmdev_sdev_hdl;
76 76
77 77 static kmutex_t vmm_mtx;
78 78 static list_t vmm_list;
79 79 static list_t vmm_destroy_list;
80 80 static id_space_t *vmm_minors;
81 81 static void *vmm_statep;
82 82
83 83 static const char *vmmdev_hvm_name = "bhyve";
84 84
85 85 /* For sdev plugin (/dev) */
86 86 #define VMM_SDEV_ROOT "/dev/vmm"
87 87
88 88 /* From uts/i86pc/io/vmm/intel/vmx.c */
89 89 extern int vmx_x86_supported(const char **);
90 90
91 91 /* Holds and hooks from drivers external to vmm */
92 92 struct vmm_hold {
93 93 list_node_t vmh_node;
94 94 vmm_softc_t *vmh_sc;
95 95 boolean_t vmh_release_req;
96 96 uint_t vmh_ioport_hook_cnt;
97 97 };
98 98
99 99 struct vmm_lease {
100 100 list_node_t vml_node;
101 101 struct vm *vml_vm;
102 102 boolean_t vml_expired;
103 103 boolean_t (*vml_expire_func)(void *);
104 104 void *vml_expire_arg;
105 105 list_node_t vml_expire_node;
106 106 struct vmm_hold *vml_hold;
107 107 };
108 108
109 109 static int vmm_drv_block_hook(vmm_softc_t *, boolean_t);
110 110 static void vmm_lease_break_locked(vmm_softc_t *, vmm_lease_t *);
111 111
112 112 static int
113 113 vmmdev_get_memseg(vmm_softc_t *sc, struct vm_memseg *mseg)
114 114 {
115 115 int error;
116 116 bool sysmem;
117 117
118 118 error = vm_get_memseg(sc->vmm_vm, mseg->segid, &mseg->len, &sysmem,
119 119 NULL);
120 120 if (error || mseg->len == 0)
121 121 return (error);
122 122
123 123 if (!sysmem) {
124 124 vmm_devmem_entry_t *de;
125 125 list_t *dl = &sc->vmm_devmem_list;
126 126
127 127 for (de = list_head(dl); de != NULL; de = list_next(dl, de)) {
128 128 if (de->vde_segid == mseg->segid) {
129 129 break;
130 130 }
131 131 }
132 132 if (de != NULL) {
133 133 (void) strlcpy(mseg->name, de->vde_name,
134 134 sizeof (mseg->name));
135 135 }
136 136 } else {
137 137 bzero(mseg->name, sizeof (mseg->name));
138 138 }
139 139
140 140 return (error);
141 141 }
142 142
143 143 /*
144 144 * The 'devmem' hack:
145 145 *
146 146 * On native FreeBSD, bhyve consumers are allowed to create 'devmem' segments
147 147 * in the vm which appear with their own name related to the vm under /dev.
148 148 * Since this would be a hassle from an sdev perspective and would require a
149 149 * new cdev interface (or complicate the existing one), we choose to implement
150 150 * this in a different manner. When 'devmem' mappings are created, an
151 151 * identifying off_t is communicated back out to userspace. That off_t,
152 152 * residing above the normal guest memory space, can be used to mmap the
153 153 * 'devmem' mapping from the already-open vm device.
154 154 */
155 155
156 156 static int
157 157 vmmdev_devmem_create(vmm_softc_t *sc, struct vm_memseg *mseg, const char *name)
158 158 {
159 159 off_t map_offset;
160 160 vmm_devmem_entry_t *entry;
161 161
162 162 if (list_is_empty(&sc->vmm_devmem_list)) {
163 163 map_offset = VM_DEVMEM_START;
164 164 } else {
165 165 entry = list_tail(&sc->vmm_devmem_list);
166 166 map_offset = entry->vde_off + entry->vde_len;
167 167 if (map_offset < entry->vde_off) {
168 168 /* Do not tolerate overflow */
169 169 return (ERANGE);
170 170 }
171 171 /*
172 172 * XXXJOY: We could choose to search the list for duplicate
173 173 * names and toss an error. Since we're using the offset
174 174 * method for now, it does not make much of a difference.
175 175 */
176 176 }
177 177
178 178 entry = kmem_zalloc(sizeof (*entry), KM_SLEEP);
179 179 entry->vde_segid = mseg->segid;
180 180 entry->vde_len = mseg->len;
181 181 entry->vde_off = map_offset;
182 182 (void) strlcpy(entry->vde_name, name, sizeof (entry->vde_name));
183 183 list_insert_tail(&sc->vmm_devmem_list, entry);
184 184
185 185 return (0);
186 186 }
187 187
188 188 static boolean_t
189 189 vmmdev_devmem_segid(vmm_softc_t *sc, off_t off, off_t len, int *segidp,
190 190 off_t *map_offp)
191 191 {
192 192 list_t *dl = &sc->vmm_devmem_list;
193 193 vmm_devmem_entry_t *de = NULL;
194 194 const off_t map_end = off + len;
195 195
196 196 VERIFY(off >= VM_DEVMEM_START);
197 197
198 198 if (map_end < off) {
199 199 /* No match on overflow */
200 200 return (B_FALSE);
201 201 }
202 202
203 203 for (de = list_head(dl); de != NULL; de = list_next(dl, de)) {
204 204 const off_t item_end = de->vde_off + de->vde_len;
205 205
206 206 if (de->vde_off <= off && item_end >= map_end) {
207 207 *segidp = de->vde_segid;
208 208 *map_offp = off - de->vde_off;
209 209 return (B_TRUE);
210 210 }
211 211 }
212 212 return (B_FALSE);
213 213 }
214 214
215 215 static void
216 216 vmmdev_devmem_purge(vmm_softc_t *sc)
217 217 {
218 218 vmm_devmem_entry_t *entry;
219 219
220 220 while ((entry = list_remove_head(&sc->vmm_devmem_list)) != NULL) {
221 221 kmem_free(entry, sizeof (*entry));
222 222 }
223 223 }
224 224
225 225 static int
226 226 vmmdev_alloc_memseg(vmm_softc_t *sc, struct vm_memseg *mseg)
227 227 {
228 228 int error;
229 229 bool sysmem = true;
230 230
231 231 if (VM_MEMSEG_NAME(mseg)) {
232 232 sysmem = false;
233 233 }
234 234 error = vm_alloc_memseg(sc->vmm_vm, mseg->segid, mseg->len, sysmem);
235 235
236 236 if (error == 0 && VM_MEMSEG_NAME(mseg)) {
237 237 /*
238 238 * Rather than create a whole fresh device from which userspace
239 239 * can mmap this segment, instead make it available at an
240 240 * offset above where the main guest memory resides.
241 241 */
242 242 error = vmmdev_devmem_create(sc, mseg, mseg->name);
243 243 if (error != 0) {
244 244 vm_free_memseg(sc->vmm_vm, mseg->segid);
245 245 }
246 246 }
247 247 return (error);
248 248 }
249 249
250 250 /*
251 251 * Resource Locking and Exclusion
252 252 *
253 253 * Much of bhyve depends on key portions of VM state, such as the guest memory
254 254 * map, to remain unchanged while the guest is running. As ported from
255 255 * FreeBSD, the initial strategy for this resource exclusion hinged on gating
256 256 * access to the instance vCPUs. Threads acting on a single vCPU, like those
257 257 * performing the work of actually running the guest in VMX/SVM, would lock
258 258 * only that vCPU during ioctl() entry. For ioctls which would change VM-wide
259 259 * state, all of the vCPUs would be first locked, ensuring that the
260 260 * operation(s) could complete without any other threads stumbling into
261 261 * intermediate states.
262 262 *
263 263 * This approach is largely effective for bhyve. Common operations, such as
264 264 * running the vCPUs, steer clear of lock contention. The model begins to
265 265 * break down for operations which do not occur in the context of a specific
266 266 * vCPU. LAPIC MSI delivery, for example, may be initiated from a worker
267 267 * thread in the bhyve process. In order to properly protect those vCPU-less
268 268 * operations from encountering invalid states, additional locking is required.
269 269 * This was solved by forcing those operations to lock the VM_MAXCPU-1 vCPU.
270 270 * It does mean that class of operations will be serialized on locking the
271 271 * specific vCPU and that instances sized at VM_MAXCPU will potentially see
272 272 * undue contention on the VM_MAXCPU-1 vCPU.
273 273 *
274 274 * In order to address the shortcomings of this model, the concept of a
275 275 * read/write lock has been added to bhyve. Operations which change
276 276 * fundamental aspects of a VM (such as the memory map) must acquire the write
277 277 * lock, which also implies locking all of the vCPUs and waiting for all read
278 278 * lock holders to release. While it increases the cost and waiting time for
279 279 * those few operations, it allows most hot-path operations on the VM (which
280 280 * depend on its configuration remaining stable) to occur with minimal locking.
281 281 *
282 282 * Consumers of the Driver API (see below) are a special case when it comes to
283 283 * this locking, since they may hold a read lock via the drv_lease mechanism
284 284 * for an extended period of time. Rather than forcing those consumers to
285 285 * continuously poll for a write lock attempt, the lease system forces them to
286 286 * provide a release callback to trigger their clean-up (and potential later
287 287 * reacquisition) of the read lock.
288 288 */
289 289
290 290 static void
291 291 vcpu_lock_one(vmm_softc_t *sc, int vcpu)
292 292 {
293 293 ASSERT(vcpu >= 0 && vcpu < VM_MAXCPU);
294 294
295 295 /*
296 296 * Since this state transition is utilizing from_idle=true, it should
297 297 * not fail, but rather block until it can be successful.
298 298 */
299 299 VERIFY0(vcpu_set_state(sc->vmm_vm, vcpu, VCPU_FROZEN, true));
300 300 }
301 301
302 302 static void
303 303 vcpu_unlock_one(vmm_softc_t *sc, int vcpu)
304 304 {
305 305 ASSERT(vcpu >= 0 && vcpu < VM_MAXCPU);
306 306
307 307 VERIFY3U(vcpu_get_state(sc->vmm_vm, vcpu, NULL), ==, VCPU_FROZEN);
308 308 vcpu_set_state(sc->vmm_vm, vcpu, VCPU_IDLE, false);
309 309 }
310 310
311 311 static void
312 312 vmm_read_lock(vmm_softc_t *sc)
313 313 {
314 314 rw_enter(&sc->vmm_rwlock, RW_READER);
315 315 }
316 316
317 317 static void
318 318 vmm_read_unlock(vmm_softc_t *sc)
319 319 {
320 320 rw_exit(&sc->vmm_rwlock);
321 321 }
322 322
323 323 static void
324 324 vmm_write_lock(vmm_softc_t *sc)
325 325 {
326 326 int maxcpus;
327 327
328 328 /* First lock all the vCPUs */
329 329 maxcpus = vm_get_maxcpus(sc->vmm_vm);
330 330 for (int vcpu = 0; vcpu < maxcpus; vcpu++) {
331 331 vcpu_lock_one(sc, vcpu);
332 332 }
333 333
334 334 mutex_enter(&sc->vmm_lease_lock);
335 335 VERIFY3U(sc->vmm_lease_blocker, !=, UINT_MAX);
336 336 sc->vmm_lease_blocker++;
337 337 if (sc->vmm_lease_blocker == 1) {
338 338 list_t *list = &sc->vmm_lease_list;
339 339 vmm_lease_t *lease = list_head(list);
340 340
341 341 while (lease != NULL) {
342 342 boolean_t sync_break = B_FALSE;
343 343
344 344 if (!lease->vml_expired) {
345 345 void *arg = lease->vml_expire_arg;
346 346 lease->vml_expired = B_TRUE;
347 347 sync_break = lease->vml_expire_func(arg);
348 348 }
349 349
350 350 if (sync_break) {
351 351 vmm_lease_t *next;
352 352
353 353 /*
354 354 * These leases which are synchronously broken
355 355 * result in vmm_read_unlock() calls from a
356 356 * different thread than the corresponding
357 357 * vmm_read_lock(). This is acceptable, given
358 358 * that the rwlock underpinning the whole
359 359 * mechanism tolerates the behavior. This
360 360 * flexibility is _only_ afforded to VM read
361 361 * lock (RW_READER) holders.
362 362 */
363 363 next = list_next(list, lease);
364 364 vmm_lease_break_locked(sc, lease);
365 365 lease = next;
366 366 } else {
367 367 lease = list_next(list, lease);
368 368 }
369 369 }
370 370 }
371 371 mutex_exit(&sc->vmm_lease_lock);
372 372
373 373 rw_enter(&sc->vmm_rwlock, RW_WRITER);
374 374 /*
375 375 * For now, the 'maxcpus' value for an instance is fixed at the
376 376 * compile-time constant of VM_MAXCPU at creation. If this changes in
377 377 * the future, allowing for dynamic vCPU resource sizing, acquisition
378 378 * of the write lock will need to be wary of such changes.
379 379 */
380 380 VERIFY(maxcpus == vm_get_maxcpus(sc->vmm_vm));
381 381 }
382 382
383 383 static void
384 384 vmm_write_unlock(vmm_softc_t *sc)
385 385 {
386 386 int maxcpus;
387 387
388 388 mutex_enter(&sc->vmm_lease_lock);
389 389 VERIFY3U(sc->vmm_lease_blocker, !=, 0);
390 390 sc->vmm_lease_blocker--;
391 391 if (sc->vmm_lease_blocker == 0) {
392 392 cv_broadcast(&sc->vmm_lease_cv);
393 393 }
394 394 mutex_exit(&sc->vmm_lease_lock);
395 395
396 396 /*
397 397 * The VM write lock _must_ be released from the same thread it was
398 398 * acquired in, unlike the read lock.
399 399 */
400 400 VERIFY(rw_write_held(&sc->vmm_rwlock));
401 401 rw_exit(&sc->vmm_rwlock);
402 402
403 403 /* Unlock all the vCPUs */
404 404 maxcpus = vm_get_maxcpus(sc->vmm_vm);
405 405 for (int vcpu = 0; vcpu < maxcpus; vcpu++) {
406 406 vcpu_unlock_one(sc, vcpu);
407 407 }
408 408 }
409 409
410 410 static int
411 411 vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
412 412 cred_t *credp, int *rvalp)
413 413 {
414 414 int error = 0, vcpu = -1;
415 415 void *datap = (void *)arg;
416 416 enum vm_lock_type {
417 417 LOCK_NONE = 0,
418 418 LOCK_VCPU,
419 419 LOCK_READ_HOLD,
420 420 LOCK_WRITE_HOLD
421 421 } lock_type = LOCK_NONE;
422 422
423 423 /* Acquire any exclusion resources needed for the operation. */
424 424 switch (cmd) {
425 425 case VM_RUN:
426 426 case VM_GET_REGISTER:
427 427 case VM_SET_REGISTER:
428 428 case VM_GET_SEGMENT_DESCRIPTOR:
429 429 case VM_SET_SEGMENT_DESCRIPTOR:
430 430 case VM_GET_REGISTER_SET:
431 431 case VM_SET_REGISTER_SET:
432 432 case VM_INJECT_EXCEPTION:
433 433 case VM_GET_CAPABILITY:
434 434 case VM_SET_CAPABILITY:
435 435 case VM_PPTDEV_MSI:
|
↓ open down ↓ |
435 lines elided |
↑ open up ↑ |
436 436 case VM_PPTDEV_MSIX:
437 437 case VM_SET_X2APIC_STATE:
438 438 case VM_GLA2GPA:
439 439 case VM_GLA2GPA_NOFAULT:
440 440 case VM_ACTIVATE_CPU:
441 441 case VM_SET_INTINFO:
442 442 case VM_GET_INTINFO:
443 443 case VM_RESTART_INSTRUCTION:
444 444 case VM_SET_KERNEMU_DEV:
445 445 case VM_GET_KERNEMU_DEV:
446 + case VM_RESET_CPU:
447 + case VM_GET_RUN_STATE:
448 + case VM_SET_RUN_STATE:
446 449 /*
447 450 * Copy in the ID of the vCPU chosen for this operation.
448 451 * Since a nefarious caller could update their struct between
449 452 * this locking and when the rest of the ioctl data is copied
450 453 * in, it is _critical_ that this local 'vcpu' variable be used
451 454 * rather than the in-struct one when performing the ioctl.
452 455 */
453 456 if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) {
454 457 return (EFAULT);
455 458 }
456 459 if (vcpu < 0 || vcpu > vm_get_maxcpus(sc->vmm_vm)) {
457 460 return (EINVAL);
458 461 }
459 462 vcpu_lock_one(sc, vcpu);
460 463 lock_type = LOCK_VCPU;
461 464 break;
462 465
463 466 case VM_REINIT:
464 467 case VM_BIND_PPTDEV:
465 468 case VM_UNBIND_PPTDEV:
466 469 case VM_MAP_PPTDEV_MMIO:
467 470 case VM_ALLOC_MEMSEG:
468 471 case VM_MMAP_MEMSEG:
469 472 case VM_WRLOCK_CYCLE:
470 473 case VM_PMTMR_LOCATE:
471 474 vmm_write_lock(sc);
472 475 lock_type = LOCK_WRITE_HOLD;
473 476 break;
474 477
475 478 case VM_GET_GPA_PMAP:
476 479 case VM_GET_MEMSEG:
477 480 case VM_MMAP_GETNEXT:
478 481 case VM_LAPIC_IRQ:
479 482 case VM_INJECT_NMI:
480 483 case VM_IOAPIC_ASSERT_IRQ:
481 484 case VM_IOAPIC_DEASSERT_IRQ:
482 485 case VM_IOAPIC_PULSE_IRQ:
483 486 case VM_LAPIC_MSI:
484 487 case VM_LAPIC_LOCAL_IRQ:
485 488 case VM_GET_X2APIC_STATE:
486 489 case VM_RTC_READ:
487 490 case VM_RTC_WRITE:
488 491 case VM_RTC_SETTIME:
489 492 case VM_RTC_GETTIME:
490 493 #ifndef __FreeBSD__
491 494 case VM_DEVMEM_GETOFFSET:
492 495 #endif
493 496 vmm_read_lock(sc);
494 497 lock_type = LOCK_READ_HOLD;
495 498 break;
496 499
497 500 case VM_IOAPIC_PINCOUNT:
498 501 default:
499 502 break;
500 503 }
501 504
502 505 /* Execute the primary logic for the ioctl. */
503 506 switch (cmd) {
504 507 case VM_RUN: {
505 508 struct vm_entry entry;
506 509
507 510 if (ddi_copyin(datap, &entry, sizeof (entry), md)) {
508 511 error = EFAULT;
509 512 break;
510 513 }
511 514
512 515 if (!(curthread->t_schedflag & TS_VCPU))
513 516 smt_mark_as_vcpu();
514 517
515 518 error = vm_run(sc->vmm_vm, vcpu, &entry);
516 519
517 520 /*
518 521 * Unexpected states in vm_run() are expressed through positive
519 522 * errno-oriented return values. VM states which expect further
520 523 * processing in userspace (necessary context via exitinfo) are
521 524 * expressed through negative return values. For the time being
522 525 * a return value of 0 is not expected from vm_run().
523 526 */
524 527 ASSERT(error != 0);
525 528 if (error < 0) {
526 529 const struct vm_exit *vme;
527 530 void *outp = entry.exit_data;
528 531
529 532 error = 0;
530 533 vme = vm_exitinfo(sc->vmm_vm, vcpu);
531 534 if (ddi_copyout(vme, outp, sizeof (*vme), md)) {
532 535 error = EFAULT;
533 536 }
534 537 }
535 538 break;
536 539 }
537 540 case VM_SUSPEND: {
538 541 struct vm_suspend vmsuspend;
539 542
540 543 if (ddi_copyin(datap, &vmsuspend, sizeof (vmsuspend), md)) {
541 544 error = EFAULT;
542 545 break;
543 546 }
544 547 error = vm_suspend(sc->vmm_vm, vmsuspend.how);
545 548 break;
546 549 }
547 550 case VM_REINIT:
548 551 if ((error = vmm_drv_block_hook(sc, B_TRUE)) != 0) {
549 552 /*
550 553 * The VM instance should be free of driver-attached
551 554 * hooks during the reinitialization process.
552 555 */
553 556 break;
554 557 }
555 558 error = vm_reinit(sc->vmm_vm);
556 559 (void) vmm_drv_block_hook(sc, B_FALSE);
557 560 break;
558 561 case VM_STAT_DESC: {
559 562 struct vm_stat_desc statdesc;
560 563
561 564 if (ddi_copyin(datap, &statdesc, sizeof (statdesc), md)) {
562 565 error = EFAULT;
563 566 break;
564 567 }
565 568 error = vmm_stat_desc_copy(statdesc.index, statdesc.desc,
566 569 sizeof (statdesc.desc));
567 570 if (error == 0 &&
568 571 ddi_copyout(&statdesc, datap, sizeof (statdesc), md)) {
569 572 error = EFAULT;
570 573 break;
571 574 }
572 575 break;
573 576 }
574 577 case VM_STATS_IOC: {
575 578 struct vm_stats vmstats;
576 579
577 580 CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
578 581 if (ddi_copyin(datap, &vmstats, sizeof (vmstats), md)) {
579 582 error = EFAULT;
580 583 break;
581 584 }
582 585 hrt2tv(gethrtime(), &vmstats.tv);
583 586 error = vmm_stat_copy(sc->vmm_vm, vmstats.cpuid,
584 587 &vmstats.num_entries, vmstats.statbuf);
585 588 if (error == 0 &&
586 589 ddi_copyout(&vmstats, datap, sizeof (vmstats), md)) {
587 590 error = EFAULT;
588 591 break;
589 592 }
590 593 break;
591 594 }
592 595
593 596 case VM_PPTDEV_MSI: {
594 597 struct vm_pptdev_msi pptmsi;
595 598
596 599 if (ddi_copyin(datap, &pptmsi, sizeof (pptmsi), md)) {
597 600 error = EFAULT;
598 601 break;
599 602 }
600 603 error = ppt_setup_msi(sc->vmm_vm, pptmsi.vcpu, pptmsi.pptfd,
601 604 pptmsi.addr, pptmsi.msg, pptmsi.numvec);
602 605 break;
603 606 }
604 607 case VM_PPTDEV_MSIX: {
605 608 struct vm_pptdev_msix pptmsix;
606 609
607 610 if (ddi_copyin(datap, &pptmsix, sizeof (pptmsix), md)) {
608 611 error = EFAULT;
609 612 break;
610 613 }
611 614 error = ppt_setup_msix(sc->vmm_vm, pptmsix.vcpu, pptmsix.pptfd,
612 615 pptmsix.idx, pptmsix.addr, pptmsix.msg,
613 616 pptmsix.vector_control);
614 617 break;
615 618 }
616 619 case VM_MAP_PPTDEV_MMIO: {
617 620 struct vm_pptdev_mmio pptmmio;
618 621
619 622 if (ddi_copyin(datap, &pptmmio, sizeof (pptmmio), md)) {
620 623 error = EFAULT;
621 624 break;
622 625 }
623 626 error = ppt_map_mmio(sc->vmm_vm, pptmmio.pptfd, pptmmio.gpa,
624 627 pptmmio.len, pptmmio.hpa);
625 628 break;
626 629 }
627 630 case VM_BIND_PPTDEV: {
628 631 struct vm_pptdev pptdev;
629 632
630 633 if (ddi_copyin(datap, &pptdev, sizeof (pptdev), md)) {
631 634 error = EFAULT;
632 635 break;
633 636 }
634 637 error = vm_assign_pptdev(sc->vmm_vm, pptdev.pptfd);
635 638 break;
636 639 }
637 640 case VM_UNBIND_PPTDEV: {
638 641 struct vm_pptdev pptdev;
639 642
640 643 if (ddi_copyin(datap, &pptdev, sizeof (pptdev), md)) {
641 644 error = EFAULT;
642 645 break;
643 646 }
644 647 error = vm_unassign_pptdev(sc->vmm_vm, pptdev.pptfd);
645 648 break;
646 649 }
647 650 case VM_GET_PPTDEV_LIMITS: {
648 651 struct vm_pptdev_limits pptlimits;
649 652
650 653 if (ddi_copyin(datap, &pptlimits, sizeof (pptlimits), md)) {
651 654 error = EFAULT;
652 655 break;
653 656 }
654 657 error = ppt_get_limits(sc->vmm_vm, pptlimits.pptfd,
655 658 &pptlimits.msi_limit, &pptlimits.msix_limit);
656 659 if (error == 0 &&
657 660 ddi_copyout(&pptlimits, datap, sizeof (pptlimits), md)) {
658 661 error = EFAULT;
659 662 break;
660 663 }
661 664 break;
662 665 }
663 666 case VM_INJECT_EXCEPTION: {
664 667 struct vm_exception vmexc;
665 668 if (ddi_copyin(datap, &vmexc, sizeof (vmexc), md)) {
666 669 error = EFAULT;
667 670 break;
668 671 }
669 672 error = vm_inject_exception(sc->vmm_vm, vcpu, vmexc.vector,
670 673 vmexc.error_code_valid, vmexc.error_code,
671 674 vmexc.restart_instruction);
672 675 break;
673 676 }
674 677 case VM_INJECT_NMI: {
675 678 struct vm_nmi vmnmi;
676 679
677 680 if (ddi_copyin(datap, &vmnmi, sizeof (vmnmi), md)) {
678 681 error = EFAULT;
679 682 break;
680 683 }
681 684 error = vm_inject_nmi(sc->vmm_vm, vmnmi.cpuid);
682 685 break;
683 686 }
684 687 case VM_LAPIC_IRQ: {
685 688 struct vm_lapic_irq vmirq;
686 689
687 690 if (ddi_copyin(datap, &vmirq, sizeof (vmirq), md)) {
688 691 error = EFAULT;
689 692 break;
690 693 }
691 694 error = lapic_intr_edge(sc->vmm_vm, vmirq.cpuid, vmirq.vector);
692 695 break;
693 696 }
694 697 case VM_LAPIC_LOCAL_IRQ: {
695 698 struct vm_lapic_irq vmirq;
696 699
697 700 if (ddi_copyin(datap, &vmirq, sizeof (vmirq), md)) {
698 701 error = EFAULT;
699 702 break;
700 703 }
701 704 error = lapic_set_local_intr(sc->vmm_vm, vmirq.cpuid,
702 705 vmirq.vector);
703 706 break;
704 707 }
705 708 case VM_LAPIC_MSI: {
706 709 struct vm_lapic_msi vmmsi;
707 710
708 711 if (ddi_copyin(datap, &vmmsi, sizeof (vmmsi), md)) {
709 712 error = EFAULT;
710 713 break;
711 714 }
712 715 error = lapic_intr_msi(sc->vmm_vm, vmmsi.addr, vmmsi.msg);
713 716 break;
714 717 }
715 718
716 719 case VM_IOAPIC_ASSERT_IRQ: {
717 720 struct vm_ioapic_irq ioapic_irq;
718 721
719 722 if (ddi_copyin(datap, &ioapic_irq, sizeof (ioapic_irq), md)) {
720 723 error = EFAULT;
721 724 break;
722 725 }
723 726 error = vioapic_assert_irq(sc->vmm_vm, ioapic_irq.irq);
724 727 break;
725 728 }
726 729 case VM_IOAPIC_DEASSERT_IRQ: {
727 730 struct vm_ioapic_irq ioapic_irq;
728 731
729 732 if (ddi_copyin(datap, &ioapic_irq, sizeof (ioapic_irq), md)) {
730 733 error = EFAULT;
731 734 break;
732 735 }
733 736 error = vioapic_deassert_irq(sc->vmm_vm, ioapic_irq.irq);
734 737 break;
735 738 }
736 739 case VM_IOAPIC_PULSE_IRQ: {
737 740 struct vm_ioapic_irq ioapic_irq;
738 741
739 742 if (ddi_copyin(datap, &ioapic_irq, sizeof (ioapic_irq), md)) {
740 743 error = EFAULT;
741 744 break;
742 745 }
743 746 error = vioapic_pulse_irq(sc->vmm_vm, ioapic_irq.irq);
744 747 break;
745 748 }
746 749 case VM_IOAPIC_PINCOUNT: {
747 750 int pincount;
748 751
749 752 pincount = vioapic_pincount(sc->vmm_vm);
750 753 if (ddi_copyout(&pincount, datap, sizeof (int), md)) {
751 754 error = EFAULT;
752 755 break;
753 756 }
754 757 break;
755 758 }
756 759
757 760 case VM_ISA_ASSERT_IRQ: {
758 761 struct vm_isa_irq isa_irq;
759 762
760 763 if (ddi_copyin(datap, &isa_irq, sizeof (isa_irq), md)) {
761 764 error = EFAULT;
762 765 break;
763 766 }
764 767 error = vatpic_assert_irq(sc->vmm_vm, isa_irq.atpic_irq);
765 768 if (error == 0 && isa_irq.ioapic_irq != -1) {
766 769 error = vioapic_assert_irq(sc->vmm_vm,
767 770 isa_irq.ioapic_irq);
768 771 }
769 772 break;
770 773 }
771 774 case VM_ISA_DEASSERT_IRQ: {
772 775 struct vm_isa_irq isa_irq;
773 776
774 777 if (ddi_copyin(datap, &isa_irq, sizeof (isa_irq), md)) {
775 778 error = EFAULT;
776 779 break;
777 780 }
778 781 error = vatpic_deassert_irq(sc->vmm_vm, isa_irq.atpic_irq);
779 782 if (error == 0 && isa_irq.ioapic_irq != -1) {
780 783 error = vioapic_deassert_irq(sc->vmm_vm,
781 784 isa_irq.ioapic_irq);
782 785 }
783 786 break;
784 787 }
785 788 case VM_ISA_PULSE_IRQ: {
786 789 struct vm_isa_irq isa_irq;
787 790
788 791 if (ddi_copyin(datap, &isa_irq, sizeof (isa_irq), md)) {
789 792 error = EFAULT;
790 793 break;
791 794 }
792 795 error = vatpic_pulse_irq(sc->vmm_vm, isa_irq.atpic_irq);
793 796 if (error == 0 && isa_irq.ioapic_irq != -1) {
794 797 error = vioapic_pulse_irq(sc->vmm_vm,
795 798 isa_irq.ioapic_irq);
796 799 }
797 800 break;
798 801 }
799 802 case VM_ISA_SET_IRQ_TRIGGER: {
800 803 struct vm_isa_irq_trigger isa_irq_trigger;
801 804
802 805 if (ddi_copyin(datap, &isa_irq_trigger,
803 806 sizeof (isa_irq_trigger), md)) {
804 807 error = EFAULT;
805 808 break;
806 809 }
807 810 error = vatpic_set_irq_trigger(sc->vmm_vm,
808 811 isa_irq_trigger.atpic_irq, isa_irq_trigger.trigger);
809 812 break;
810 813 }
811 814
812 815 case VM_MMAP_GETNEXT: {
813 816 struct vm_memmap mm;
814 817
815 818 if (ddi_copyin(datap, &mm, sizeof (mm), md)) {
816 819 error = EFAULT;
817 820 break;
818 821 }
819 822 error = vm_mmap_getnext(sc->vmm_vm, &mm.gpa, &mm.segid,
820 823 &mm.segoff, &mm.len, &mm.prot, &mm.flags);
821 824 if (error == 0 && ddi_copyout(&mm, datap, sizeof (mm), md)) {
822 825 error = EFAULT;
823 826 break;
824 827 }
825 828 break;
826 829 }
827 830 case VM_MMAP_MEMSEG: {
828 831 struct vm_memmap mm;
829 832
830 833 if (ddi_copyin(datap, &mm, sizeof (mm), md)) {
831 834 error = EFAULT;
832 835 break;
833 836 }
834 837 error = vm_mmap_memseg(sc->vmm_vm, mm.gpa, mm.segid, mm.segoff,
835 838 mm.len, mm.prot, mm.flags);
836 839 break;
837 840 }
838 841 case VM_ALLOC_MEMSEG: {
839 842 struct vm_memseg vmseg;
840 843
841 844 if (ddi_copyin(datap, &vmseg, sizeof (vmseg), md)) {
842 845 error = EFAULT;
843 846 break;
844 847 }
845 848 error = vmmdev_alloc_memseg(sc, &vmseg);
846 849 break;
847 850 }
848 851 case VM_GET_MEMSEG: {
849 852 struct vm_memseg vmseg;
850 853
851 854 if (ddi_copyin(datap, &vmseg, sizeof (vmseg), md)) {
852 855 error = EFAULT;
853 856 break;
854 857 }
855 858 error = vmmdev_get_memseg(sc, &vmseg);
856 859 if (error == 0 &&
857 860 ddi_copyout(&vmseg, datap, sizeof (vmseg), md)) {
858 861 error = EFAULT;
859 862 break;
860 863 }
861 864 break;
862 865 }
863 866 case VM_GET_REGISTER: {
864 867 struct vm_register vmreg;
865 868
866 869 if (ddi_copyin(datap, &vmreg, sizeof (vmreg), md)) {
867 870 error = EFAULT;
868 871 break;
869 872 }
870 873 error = vm_get_register(sc->vmm_vm, vcpu, vmreg.regnum,
871 874 &vmreg.regval);
872 875 if (error == 0 &&
873 876 ddi_copyout(&vmreg, datap, sizeof (vmreg), md)) {
874 877 error = EFAULT;
875 878 break;
876 879 }
877 880 break;
878 881 }
879 882 case VM_SET_REGISTER: {
880 883 struct vm_register vmreg;
881 884
882 885 if (ddi_copyin(datap, &vmreg, sizeof (vmreg), md)) {
883 886 error = EFAULT;
884 887 break;
885 888 }
886 889 error = vm_set_register(sc->vmm_vm, vcpu, vmreg.regnum,
887 890 vmreg.regval);
888 891 break;
889 892 }
890 893 case VM_SET_SEGMENT_DESCRIPTOR: {
891 894 struct vm_seg_desc vmsegd;
892 895
893 896 if (ddi_copyin(datap, &vmsegd, sizeof (vmsegd), md)) {
894 897 error = EFAULT;
895 898 break;
896 899 }
897 900 error = vm_set_seg_desc(sc->vmm_vm, vcpu, vmsegd.regnum,
898 901 &vmsegd.desc);
899 902 break;
900 903 }
901 904 case VM_GET_SEGMENT_DESCRIPTOR: {
902 905 struct vm_seg_desc vmsegd;
903 906
904 907 if (ddi_copyin(datap, &vmsegd, sizeof (vmsegd), md)) {
905 908 error = EFAULT;
906 909 break;
907 910 }
908 911 error = vm_get_seg_desc(sc->vmm_vm, vcpu, vmsegd.regnum,
909 912 &vmsegd.desc);
910 913 if (error == 0 &&
911 914 ddi_copyout(&vmsegd, datap, sizeof (vmsegd), md)) {
912 915 error = EFAULT;
913 916 break;
914 917 }
915 918 break;
916 919 }
917 920 case VM_GET_REGISTER_SET: {
918 921 struct vm_register_set vrs;
919 922 int regnums[VM_REG_LAST];
920 923 uint64_t regvals[VM_REG_LAST];
921 924
922 925 if (ddi_copyin(datap, &vrs, sizeof (vrs), md)) {
923 926 error = EFAULT;
924 927 break;
925 928 }
926 929 if (vrs.count > VM_REG_LAST || vrs.count == 0) {
927 930 error = EINVAL;
928 931 break;
929 932 }
930 933 if (ddi_copyin(vrs.regnums, regnums,
931 934 sizeof (int) * vrs.count, md)) {
932 935 error = EFAULT;
933 936 break;
934 937 }
935 938
936 939 error = 0;
937 940 for (uint_t i = 0; i < vrs.count && error == 0; i++) {
938 941 if (regnums[i] < 0) {
939 942 error = EINVAL;
940 943 break;
941 944 }
942 945 error = vm_get_register(sc->vmm_vm, vcpu, regnums[i],
943 946 ®vals[i]);
944 947 }
945 948 if (error == 0 && ddi_copyout(regvals, vrs.regvals,
946 949 sizeof (uint64_t) * vrs.count, md)) {
947 950 error = EFAULT;
948 951 }
949 952 break;
950 953 }
951 954 case VM_SET_REGISTER_SET: {
952 955 struct vm_register_set vrs;
953 956 int regnums[VM_REG_LAST];
954 957 uint64_t regvals[VM_REG_LAST];
955 958
956 959 if (ddi_copyin(datap, &vrs, sizeof (vrs), md)) {
957 960 error = EFAULT;
958 961 break;
959 962 }
960 963 if (vrs.count > VM_REG_LAST || vrs.count == 0) {
961 964 error = EINVAL;
962 965 break;
963 966 }
964 967 if (ddi_copyin(vrs.regnums, regnums,
965 968 sizeof (int) * vrs.count, md)) {
966 969 error = EFAULT;
967 970 break;
968 971 }
969 972 if (ddi_copyin(vrs.regvals, regvals,
970 973 sizeof (uint64_t) * vrs.count, md)) {
971 974 error = EFAULT;
972 975 break;
973 976 }
974 977
975 978 error = 0;
976 979 for (uint_t i = 0; i < vrs.count && error == 0; i++) {
977 980 /*
978 981 * Setting registers in a set is not atomic, since a
979 982 * failure in the middle of the set will cause a
980 983 * bail-out and inconsistent register state. Callers
981 984 * should be wary of this.
|
↓ open down ↓ |
526 lines elided |
↑ open up ↑ |
982 985 */
983 986 if (regnums[i] < 0) {
984 987 error = EINVAL;
985 988 break;
986 989 }
987 990 error = vm_set_register(sc->vmm_vm, vcpu, regnums[i],
988 991 regvals[i]);
989 992 }
990 993 break;
991 994 }
995 + case VM_RESET_CPU: {
996 + struct vm_vcpu_reset vvr;
997 +
998 + if (ddi_copyin(datap, &vvr, sizeof (vvr), md)) {
999 + error = EFAULT;
1000 + break;
1001 + }
1002 + if (vvr.kind != VRK_RESET && vvr.kind != VRK_INIT) {
1003 + error = EINVAL;
1004 + }
1005 +
1006 + error = vcpu_arch_reset(sc->vmm_vm, vcpu, vvr.kind == VRK_INIT);
1007 + break;
1008 + }
1009 + case VM_GET_RUN_STATE: {
1010 + struct vm_run_state vrs;
1011 +
1012 + bzero(&vrs, sizeof (vrs));
1013 + error = vm_get_run_state(sc->vmm_vm, vcpu, &vrs.state,
1014 + &vrs.sipi_vector);
1015 + if (error == 0) {
1016 + if (ddi_copyout(&vrs, datap, sizeof (vrs), md)) {
1017 + error = EFAULT;
1018 + break;
1019 + }
1020 + }
1021 + break;
1022 + }
1023 + case VM_SET_RUN_STATE: {
1024 + struct vm_run_state vrs;
1025 +
1026 + if (ddi_copyin(datap, &vrs, sizeof (vrs), md)) {
1027 + error = EFAULT;
1028 + break;
1029 + }
1030 + error = vm_set_run_state(sc->vmm_vm, vcpu, vrs.state,
1031 + vrs.sipi_vector);
1032 + break;
1033 + }
992 1034
993 1035 case VM_SET_KERNEMU_DEV:
994 1036 case VM_GET_KERNEMU_DEV: {
995 1037 struct vm_readwrite_kernemu_device kemu;
996 1038 size_t size = 0;
997 1039
998 1040 if (ddi_copyin(datap, &kemu, sizeof (kemu), md)) {
999 1041 error = EFAULT;
1000 1042 break;
1001 1043 }
1002 1044
1003 1045 if (kemu.access_width > 3) {
1004 1046 error = EINVAL;
1005 1047 break;
1006 1048 }
1007 1049 size = (1 << kemu.access_width);
1008 1050 ASSERT(size >= 1 && size <= 8);
1009 1051
1010 1052 if (cmd == VM_SET_KERNEMU_DEV) {
1011 1053 error = vm_service_mmio_write(sc->vmm_vm, vcpu,
1012 1054 kemu.gpa, kemu.value, size);
1013 1055 } else {
1014 1056 error = vm_service_mmio_read(sc->vmm_vm, vcpu,
1015 1057 kemu.gpa, &kemu.value, size);
1016 1058 }
1017 1059
1018 1060 if (error == 0) {
1019 1061 if (ddi_copyout(&kemu, datap, sizeof (kemu), md)) {
1020 1062 error = EFAULT;
1021 1063 break;
1022 1064 }
1023 1065 }
1024 1066 break;
1025 1067 }
1026 1068
1027 1069 case VM_GET_CAPABILITY: {
1028 1070 struct vm_capability vmcap;
1029 1071
1030 1072 if (ddi_copyin(datap, &vmcap, sizeof (vmcap), md)) {
1031 1073 error = EFAULT;
1032 1074 break;
1033 1075 }
1034 1076 error = vm_get_capability(sc->vmm_vm, vcpu, vmcap.captype,
1035 1077 &vmcap.capval);
1036 1078 if (error == 0 &&
1037 1079 ddi_copyout(&vmcap, datap, sizeof (vmcap), md)) {
1038 1080 error = EFAULT;
1039 1081 break;
1040 1082 }
1041 1083 break;
1042 1084 }
1043 1085 case VM_SET_CAPABILITY: {
1044 1086 struct vm_capability vmcap;
1045 1087
1046 1088 if (ddi_copyin(datap, &vmcap, sizeof (vmcap), md)) {
1047 1089 error = EFAULT;
1048 1090 break;
1049 1091 }
1050 1092 error = vm_set_capability(sc->vmm_vm, vcpu, vmcap.captype,
1051 1093 vmcap.capval);
1052 1094 break;
1053 1095 }
1054 1096 case VM_SET_X2APIC_STATE: {
1055 1097 struct vm_x2apic x2apic;
1056 1098
1057 1099 if (ddi_copyin(datap, &x2apic, sizeof (x2apic), md)) {
1058 1100 error = EFAULT;
1059 1101 break;
1060 1102 }
1061 1103 error = vm_set_x2apic_state(sc->vmm_vm, vcpu, x2apic.state);
1062 1104 break;
1063 1105 }
1064 1106 case VM_GET_X2APIC_STATE: {
1065 1107 struct vm_x2apic x2apic;
1066 1108
1067 1109 if (ddi_copyin(datap, &x2apic, sizeof (x2apic), md)) {
1068 1110 error = EFAULT;
1069 1111 break;
1070 1112 }
1071 1113 error = vm_get_x2apic_state(sc->vmm_vm, x2apic.cpuid,
1072 1114 &x2apic.state);
1073 1115 if (error == 0 &&
1074 1116 ddi_copyout(&x2apic, datap, sizeof (x2apic), md)) {
1075 1117 error = EFAULT;
1076 1118 break;
1077 1119 }
1078 1120 break;
1079 1121 }
1080 1122 case VM_GET_GPA_PMAP: {
1081 1123 struct vm_gpa_pte gpapte;
1082 1124
1083 1125 if (ddi_copyin(datap, &gpapte, sizeof (gpapte), md)) {
1084 1126 error = EFAULT;
1085 1127 break;
1086 1128 }
1087 1129 #ifdef __FreeBSD__
1088 1130 /* XXXJOY: add function? */
1089 1131 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vmm_vm)),
1090 1132 gpapte.gpa, gpapte.pte, &gpapte.ptenum);
1091 1133 #endif
1092 1134 error = 0;
1093 1135 break;
1094 1136 }
1095 1137 case VM_GET_HPET_CAPABILITIES: {
1096 1138 struct vm_hpet_cap hpetcap;
1097 1139
1098 1140 error = vhpet_getcap(&hpetcap);
1099 1141 if (error == 0 &&
1100 1142 ddi_copyout(&hpetcap, datap, sizeof (hpetcap), md)) {
1101 1143 error = EFAULT;
1102 1144 break;
1103 1145 }
1104 1146 break;
1105 1147 }
1106 1148 case VM_GLA2GPA: {
1107 1149 struct vm_gla2gpa gg;
1108 1150
1109 1151 CTASSERT(PROT_READ == VM_PROT_READ);
1110 1152 CTASSERT(PROT_WRITE == VM_PROT_WRITE);
1111 1153 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
1112 1154
1113 1155 if (ddi_copyin(datap, &gg, sizeof (gg), md)) {
1114 1156 error = EFAULT;
1115 1157 break;
1116 1158 }
1117 1159 gg.vcpuid = vcpu;
1118 1160 error = vm_gla2gpa(sc->vmm_vm, vcpu, &gg.paging, gg.gla,
1119 1161 gg.prot, &gg.gpa, &gg.fault);
1120 1162 if (error == 0 && ddi_copyout(&gg, datap, sizeof (gg), md)) {
1121 1163 error = EFAULT;
1122 1164 break;
1123 1165 }
1124 1166 break;
1125 1167 }
1126 1168 case VM_GLA2GPA_NOFAULT: {
1127 1169 struct vm_gla2gpa gg;
1128 1170
1129 1171 CTASSERT(PROT_READ == VM_PROT_READ);
1130 1172 CTASSERT(PROT_WRITE == VM_PROT_WRITE);
1131 1173 CTASSERT(PROT_EXEC == VM_PROT_EXECUTE);
1132 1174
1133 1175 if (ddi_copyin(datap, &gg, sizeof (gg), md)) {
1134 1176 error = EFAULT;
1135 1177 break;
1136 1178 }
1137 1179 gg.vcpuid = vcpu;
1138 1180 error = vm_gla2gpa_nofault(sc->vmm_vm, vcpu, &gg.paging,
1139 1181 gg.gla, gg.prot, &gg.gpa, &gg.fault);
1140 1182 if (error == 0 && ddi_copyout(&gg, datap, sizeof (gg), md)) {
1141 1183 error = EFAULT;
1142 1184 break;
1143 1185 }
1144 1186 break;
1145 1187 }
1146 1188
1147 1189 case VM_ACTIVATE_CPU:
1148 1190 error = vm_activate_cpu(sc->vmm_vm, vcpu);
1149 1191 break;
1150 1192
1151 1193 case VM_SUSPEND_CPU:
1152 1194 if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) {
1153 1195 error = EFAULT;
1154 1196 } else {
1155 1197 error = vm_suspend_cpu(sc->vmm_vm, vcpu);
1156 1198 }
1157 1199 break;
1158 1200
1159 1201 case VM_RESUME_CPU:
1160 1202 if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) {
1161 1203 error = EFAULT;
1162 1204 } else {
1163 1205 error = vm_resume_cpu(sc->vmm_vm, vcpu);
1164 1206 }
1165 1207 break;
1166 1208
1167 1209 case VM_GET_CPUS: {
1168 1210 struct vm_cpuset vm_cpuset;
1169 1211 cpuset_t tempset;
1170 1212 void *srcp = &tempset;
1171 1213 int size;
1172 1214
1173 1215 if (ddi_copyin(datap, &vm_cpuset, sizeof (vm_cpuset), md)) {
1174 1216 error = EFAULT;
1175 1217 break;
1176 1218 }
1177 1219
1178 1220 /* Be more generous about sizing since our cpuset_t is large. */
1179 1221 size = vm_cpuset.cpusetsize;
1180 1222 if (size <= 0 || size > sizeof (cpuset_t)) {
1181 1223 error = ERANGE;
1182 1224 }
1183 1225 /*
1184 1226 * If they want a ulong_t or less, make sure they receive the
1185 1227 * low bits with all the useful information.
1186 1228 */
1187 1229 if (size <= sizeof (tempset.cpub[0])) {
1188 1230 srcp = &tempset.cpub[0];
1189 1231 }
1190 1232
1191 1233 if (vm_cpuset.which == VM_ACTIVE_CPUS) {
1192 1234 tempset = vm_active_cpus(sc->vmm_vm);
1193 1235 } else if (vm_cpuset.which == VM_SUSPENDED_CPUS) {
1194 1236 tempset = vm_suspended_cpus(sc->vmm_vm);
1195 1237 } else if (vm_cpuset.which == VM_DEBUG_CPUS) {
1196 1238 tempset = vm_debug_cpus(sc->vmm_vm);
1197 1239 } else {
1198 1240 error = EINVAL;
1199 1241 }
1200 1242
1201 1243 ASSERT(size > 0 && size <= sizeof (tempset));
1202 1244 if (error == 0 &&
1203 1245 ddi_copyout(srcp, vm_cpuset.cpus, size, md)) {
1204 1246 error = EFAULT;
1205 1247 break;
1206 1248 }
1207 1249 break;
1208 1250 }
1209 1251 case VM_SET_INTINFO: {
1210 1252 struct vm_intinfo vmii;
1211 1253
1212 1254 if (ddi_copyin(datap, &vmii, sizeof (vmii), md)) {
1213 1255 error = EFAULT;
1214 1256 break;
1215 1257 }
1216 1258 error = vm_exit_intinfo(sc->vmm_vm, vcpu, vmii.info1);
1217 1259 break;
1218 1260 }
1219 1261 case VM_GET_INTINFO: {
1220 1262 struct vm_intinfo vmii;
1221 1263
1222 1264 vmii.vcpuid = vcpu;
1223 1265 error = vm_get_intinfo(sc->vmm_vm, vcpu, &vmii.info1,
1224 1266 &vmii.info2);
1225 1267 if (error == 0 &&
1226 1268 ddi_copyout(&vmii, datap, sizeof (vmii), md)) {
1227 1269 error = EFAULT;
1228 1270 break;
1229 1271 }
1230 1272 break;
1231 1273 }
1232 1274 case VM_RTC_WRITE: {
1233 1275 struct vm_rtc_data rtcdata;
1234 1276
1235 1277 if (ddi_copyin(datap, &rtcdata, sizeof (rtcdata), md)) {
1236 1278 error = EFAULT;
1237 1279 break;
1238 1280 }
1239 1281 error = vrtc_nvram_write(sc->vmm_vm, rtcdata.offset,
1240 1282 rtcdata.value);
1241 1283 break;
1242 1284 }
1243 1285 case VM_RTC_READ: {
1244 1286 struct vm_rtc_data rtcdata;
1245 1287
1246 1288 if (ddi_copyin(datap, &rtcdata, sizeof (rtcdata), md)) {
1247 1289 error = EFAULT;
1248 1290 break;
1249 1291 }
1250 1292 error = vrtc_nvram_read(sc->vmm_vm, rtcdata.offset,
1251 1293 &rtcdata.value);
1252 1294 if (error == 0 &&
1253 1295 ddi_copyout(&rtcdata, datap, sizeof (rtcdata), md)) {
1254 1296 error = EFAULT;
1255 1297 break;
1256 1298 }
1257 1299 break;
1258 1300 }
1259 1301 case VM_RTC_SETTIME: {
1260 1302 struct vm_rtc_time rtctime;
1261 1303
1262 1304 if (ddi_copyin(datap, &rtctime, sizeof (rtctime), md)) {
1263 1305 error = EFAULT;
1264 1306 break;
1265 1307 }
1266 1308 error = vrtc_set_time(sc->vmm_vm, rtctime.secs);
1267 1309 break;
1268 1310 }
1269 1311 case VM_RTC_GETTIME: {
1270 1312 struct vm_rtc_time rtctime;
1271 1313
1272 1314 rtctime.secs = vrtc_get_time(sc->vmm_vm);
1273 1315 if (ddi_copyout(&rtctime, datap, sizeof (rtctime), md)) {
1274 1316 error = EFAULT;
1275 1317 break;
1276 1318 }
1277 1319 break;
1278 1320 }
1279 1321
1280 1322 case VM_PMTMR_LOCATE: {
1281 1323 uint16_t port = arg;
1282 1324 error = vpmtmr_set_location(sc->vmm_vm, port);
1283 1325 break;
1284 1326 }
1285 1327
1286 1328 case VM_RESTART_INSTRUCTION:
1287 1329 error = vm_restart_instruction(sc->vmm_vm, vcpu);
1288 1330 break;
1289 1331
1290 1332 case VM_SET_TOPOLOGY: {
1291 1333 struct vm_cpu_topology topo;
1292 1334
1293 1335 if (ddi_copyin(datap, &topo, sizeof (topo), md) != 0) {
1294 1336 error = EFAULT;
1295 1337 break;
1296 1338 }
1297 1339 error = vm_set_topology(sc->vmm_vm, topo.sockets, topo.cores,
1298 1340 topo.threads, topo.maxcpus);
1299 1341 break;
1300 1342 }
1301 1343 case VM_GET_TOPOLOGY: {
1302 1344 struct vm_cpu_topology topo;
1303 1345
1304 1346 vm_get_topology(sc->vmm_vm, &topo.sockets, &topo.cores,
1305 1347 &topo.threads, &topo.maxcpus);
1306 1348 if (ddi_copyout(&topo, datap, sizeof (topo), md) != 0) {
1307 1349 error = EFAULT;
1308 1350 break;
1309 1351 }
1310 1352 break;
1311 1353 }
1312 1354
1313 1355 #ifndef __FreeBSD__
1314 1356 case VM_DEVMEM_GETOFFSET: {
1315 1357 struct vm_devmem_offset vdo;
1316 1358 list_t *dl = &sc->vmm_devmem_list;
1317 1359 vmm_devmem_entry_t *de = NULL;
1318 1360
1319 1361 if (ddi_copyin(datap, &vdo, sizeof (vdo), md) != 0) {
1320 1362 error = EFAULT;
1321 1363 break;
1322 1364 }
1323 1365
1324 1366 for (de = list_head(dl); de != NULL; de = list_next(dl, de)) {
1325 1367 if (de->vde_segid == vdo.segid) {
1326 1368 break;
1327 1369 }
1328 1370 }
1329 1371 if (de != NULL) {
1330 1372 vdo.offset = de->vde_off;
1331 1373 if (ddi_copyout(&vdo, datap, sizeof (vdo), md) != 0) {
1332 1374 error = EFAULT;
1333 1375 }
1334 1376 } else {
1335 1377 error = ENOENT;
1336 1378 }
1337 1379 break;
1338 1380 }
1339 1381 case VM_WRLOCK_CYCLE: {
1340 1382 /*
1341 1383 * Present a test mechanism to acquire/release the write lock
1342 1384 * on the VM without any other effects.
1343 1385 */
1344 1386 break;
1345 1387 }
1346 1388 #endif
1347 1389 default:
1348 1390 error = ENOTTY;
1349 1391 break;
1350 1392 }
1351 1393
1352 1394 /* Release exclusion resources */
1353 1395 switch (lock_type) {
1354 1396 case LOCK_NONE:
1355 1397 break;
1356 1398 case LOCK_VCPU:
1357 1399 vcpu_unlock_one(sc, vcpu);
1358 1400 break;
1359 1401 case LOCK_READ_HOLD:
1360 1402 vmm_read_unlock(sc);
1361 1403 break;
1362 1404 case LOCK_WRITE_HOLD:
1363 1405 vmm_write_unlock(sc);
1364 1406 break;
1365 1407 default:
1366 1408 panic("unexpected lock type");
1367 1409 break;
1368 1410 }
1369 1411
1370 1412 return (error);
1371 1413 }
1372 1414
1373 1415 static vmm_softc_t *
1374 1416 vmm_lookup(const char *name)
1375 1417 {
1376 1418 list_t *vml = &vmm_list;
1377 1419 vmm_softc_t *sc;
1378 1420
1379 1421 ASSERT(MUTEX_HELD(&vmm_mtx));
1380 1422
1381 1423 for (sc = list_head(vml); sc != NULL; sc = list_next(vml, sc)) {
1382 1424 if (strcmp(sc->vmm_name, name) == 0) {
1383 1425 break;
1384 1426 }
1385 1427 }
1386 1428
1387 1429 return (sc);
1388 1430 }
1389 1431
1390 1432 /*
1391 1433 * Acquire an HMA registration if not already held.
1392 1434 */
1393 1435 static boolean_t
1394 1436 vmm_hma_acquire(void)
1395 1437 {
1396 1438 ASSERT(MUTEX_NOT_HELD(&vmm_mtx));
1397 1439
1398 1440 mutex_enter(&vmmdev_mtx);
1399 1441
1400 1442 if (vmmdev_hma_reg == NULL) {
1401 1443 VERIFY3U(vmmdev_hma_ref, ==, 0);
1402 1444 vmmdev_hma_reg = hma_register(vmmdev_hvm_name);
1403 1445 if (vmmdev_hma_reg == NULL) {
1404 1446 cmn_err(CE_WARN, "%s HMA registration failed.",
1405 1447 vmmdev_hvm_name);
1406 1448 mutex_exit(&vmmdev_mtx);
1407 1449 return (B_FALSE);
1408 1450 }
1409 1451 }
1410 1452
1411 1453 vmmdev_hma_ref++;
1412 1454
1413 1455 mutex_exit(&vmmdev_mtx);
1414 1456
1415 1457 return (B_TRUE);
1416 1458 }
1417 1459
1418 1460 /*
1419 1461 * Release the HMA registration if held and there are no remaining VMs.
1420 1462 */
1421 1463 static void
1422 1464 vmm_hma_release(void)
1423 1465 {
1424 1466 ASSERT(MUTEX_NOT_HELD(&vmm_mtx));
1425 1467
1426 1468 mutex_enter(&vmmdev_mtx);
1427 1469
1428 1470 VERIFY3U(vmmdev_hma_ref, !=, 0);
1429 1471
1430 1472 vmmdev_hma_ref--;
1431 1473
1432 1474 if (vmmdev_hma_ref == 0) {
1433 1475 VERIFY(vmmdev_hma_reg != NULL);
1434 1476 hma_unregister(vmmdev_hma_reg);
1435 1477 vmmdev_hma_reg = NULL;
1436 1478 }
1437 1479 mutex_exit(&vmmdev_mtx);
1438 1480 }
1439 1481
1440 1482 static int
1441 1483 vmmdev_do_vm_create(char *name, cred_t *cr)
1442 1484 {
1443 1485 vmm_softc_t *sc = NULL;
1444 1486 minor_t minor;
1445 1487 int error = ENOMEM;
1446 1488
1447 1489 if (strnlen(name, VM_MAX_NAMELEN) >= VM_MAX_NAMELEN) {
1448 1490 return (EINVAL);
1449 1491 }
1450 1492
1451 1493 if (!vmm_hma_acquire())
1452 1494 return (ENXIO);
1453 1495
1454 1496 mutex_enter(&vmm_mtx);
1455 1497
1456 1498 /* Look for duplicate names */
1457 1499 if (vmm_lookup(name) != NULL) {
1458 1500 mutex_exit(&vmm_mtx);
1459 1501 vmm_hma_release();
1460 1502 return (EEXIST);
1461 1503 }
1462 1504
1463 1505 /* Allow only one instance per non-global zone. */
1464 1506 if (!INGLOBALZONE(curproc)) {
1465 1507 for (sc = list_head(&vmm_list); sc != NULL;
1466 1508 sc = list_next(&vmm_list, sc)) {
1467 1509 if (sc->vmm_zone == curzone) {
1468 1510 mutex_exit(&vmm_mtx);
1469 1511 vmm_hma_release();
1470 1512 return (EINVAL);
1471 1513 }
1472 1514 }
1473 1515 }
1474 1516
1475 1517 minor = id_alloc(vmm_minors);
1476 1518 if (ddi_soft_state_zalloc(vmm_statep, minor) != DDI_SUCCESS) {
1477 1519 goto fail;
1478 1520 } else if ((sc = ddi_get_soft_state(vmm_statep, minor)) == NULL) {
1479 1521 ddi_soft_state_free(vmm_statep, minor);
1480 1522 goto fail;
1481 1523 } else if (ddi_create_minor_node(vmmdev_dip, name, S_IFCHR, minor,
1482 1524 DDI_PSEUDO, 0) != DDI_SUCCESS) {
1483 1525 goto fail;
1484 1526 }
1485 1527
1486 1528 error = vm_create(name, &sc->vmm_vm);
1487 1529 if (error == 0) {
1488 1530 /* Complete VM intialization and report success. */
1489 1531 (void) strlcpy(sc->vmm_name, name, sizeof (sc->vmm_name));
1490 1532 sc->vmm_minor = minor;
1491 1533 list_create(&sc->vmm_devmem_list, sizeof (vmm_devmem_entry_t),
1492 1534 offsetof(vmm_devmem_entry_t, vde_node));
1493 1535
1494 1536 list_create(&sc->vmm_holds, sizeof (vmm_hold_t),
1495 1537 offsetof(vmm_hold_t, vmh_node));
1496 1538 cv_init(&sc->vmm_cv, NULL, CV_DEFAULT, NULL);
1497 1539
1498 1540 mutex_init(&sc->vmm_lease_lock, NULL, MUTEX_DEFAULT, NULL);
1499 1541 list_create(&sc->vmm_lease_list, sizeof (vmm_lease_t),
1500 1542 offsetof(vmm_lease_t, vml_node));
1501 1543 cv_init(&sc->vmm_lease_cv, NULL, CV_DEFAULT, NULL);
1502 1544 rw_init(&sc->vmm_rwlock, NULL, RW_DEFAULT, NULL);
1503 1545
1504 1546 sc->vmm_zone = crgetzone(cr);
1505 1547 zone_hold(sc->vmm_zone);
1506 1548 vmm_zsd_add_vm(sc);
1507 1549
1508 1550 list_insert_tail(&vmm_list, sc);
1509 1551 mutex_exit(&vmm_mtx);
1510 1552 return (0);
1511 1553 }
1512 1554
1513 1555 ddi_remove_minor_node(vmmdev_dip, name);
1514 1556 fail:
1515 1557 id_free(vmm_minors, minor);
1516 1558 if (sc != NULL) {
1517 1559 ddi_soft_state_free(vmm_statep, minor);
1518 1560 }
1519 1561 mutex_exit(&vmm_mtx);
1520 1562 vmm_hma_release();
1521 1563
1522 1564 return (error);
1523 1565 }
1524 1566
1525 1567 /*
1526 1568 * Bhyve 'Driver' Interface
1527 1569 *
1528 1570 * While many devices are emulated in the bhyve userspace process, there are
1529 1571 * others with performance constraints which require that they run mostly or
1530 1572 * entirely in-kernel. For those not integrated directly into bhyve, an API is
1531 1573 * needed so they can query/manipulate the portions of VM state needed to
1532 1574 * fulfill their purpose.
1533 1575 *
1534 1576 * This includes:
1535 1577 * - Translating guest-physical addresses to host-virtual pointers
1536 1578 * - Injecting MSIs
1537 1579 * - Hooking IO port addresses
1538 1580 *
1539 1581 * The vmm_drv interface exists to provide that functionality to its consumers.
1540 1582 * (At this time, 'viona' is the only user)
1541 1583 */
1542 1584 int
1543 1585 vmm_drv_hold(file_t *fp, cred_t *cr, vmm_hold_t **holdp)
1544 1586 {
1545 1587 vnode_t *vp = fp->f_vnode;
1546 1588 const dev_t dev = vp->v_rdev;
1547 1589 vmm_softc_t *sc;
1548 1590 vmm_hold_t *hold;
1549 1591 int err = 0;
1550 1592
1551 1593 if (vp->v_type != VCHR) {
1552 1594 return (ENXIO);
1553 1595 }
1554 1596 const major_t major = getmajor(dev);
1555 1597 const minor_t minor = getminor(dev);
1556 1598
1557 1599 mutex_enter(&vmmdev_mtx);
1558 1600 if (vmmdev_dip == NULL || major != ddi_driver_major(vmmdev_dip)) {
1559 1601 mutex_exit(&vmmdev_mtx);
1560 1602 return (ENOENT);
1561 1603 }
1562 1604 mutex_enter(&vmm_mtx);
1563 1605 mutex_exit(&vmmdev_mtx);
1564 1606
1565 1607 if ((sc = ddi_get_soft_state(vmm_statep, minor)) == NULL) {
1566 1608 err = ENOENT;
1567 1609 goto out;
1568 1610 }
1569 1611 /* XXXJOY: check cred permissions against instance */
1570 1612
1571 1613 if ((sc->vmm_flags & (VMM_CLEANUP|VMM_PURGED|VMM_DESTROY)) != 0) {
1572 1614 err = EBUSY;
1573 1615 goto out;
1574 1616 }
1575 1617
1576 1618 hold = kmem_zalloc(sizeof (*hold), KM_SLEEP);
1577 1619 hold->vmh_sc = sc;
1578 1620 hold->vmh_release_req = B_FALSE;
1579 1621
1580 1622 list_insert_tail(&sc->vmm_holds, hold);
1581 1623 sc->vmm_flags |= VMM_HELD;
1582 1624 *holdp = hold;
1583 1625
1584 1626 out:
1585 1627 mutex_exit(&vmm_mtx);
1586 1628 return (err);
1587 1629 }
1588 1630
1589 1631 void
1590 1632 vmm_drv_rele(vmm_hold_t *hold)
1591 1633 {
1592 1634 vmm_softc_t *sc;
1593 1635
1594 1636 ASSERT(hold != NULL);
1595 1637 ASSERT(hold->vmh_sc != NULL);
1596 1638 VERIFY(hold->vmh_ioport_hook_cnt == 0);
1597 1639
1598 1640 mutex_enter(&vmm_mtx);
1599 1641 sc = hold->vmh_sc;
1600 1642 list_remove(&sc->vmm_holds, hold);
1601 1643 if (list_is_empty(&sc->vmm_holds)) {
1602 1644 sc->vmm_flags &= ~VMM_HELD;
1603 1645 cv_broadcast(&sc->vmm_cv);
1604 1646 }
1605 1647 mutex_exit(&vmm_mtx);
1606 1648 kmem_free(hold, sizeof (*hold));
1607 1649 }
1608 1650
1609 1651 boolean_t
1610 1652 vmm_drv_release_reqd(vmm_hold_t *hold)
1611 1653 {
1612 1654 ASSERT(hold != NULL);
1613 1655
1614 1656 return (hold->vmh_release_req);
1615 1657 }
1616 1658
1617 1659 vmm_lease_t *
1618 1660 vmm_drv_lease_sign(vmm_hold_t *hold, boolean_t (*expiref)(void *), void *arg)
1619 1661 {
1620 1662 vmm_softc_t *sc = hold->vmh_sc;
1621 1663 vmm_lease_t *lease;
1622 1664
1623 1665 ASSERT3P(expiref, !=, NULL);
1624 1666
1625 1667 if (hold->vmh_release_req) {
1626 1668 return (NULL);
1627 1669 }
1628 1670
1629 1671 lease = kmem_alloc(sizeof (*lease), KM_SLEEP);
1630 1672 list_link_init(&lease->vml_node);
1631 1673 lease->vml_expire_func = expiref;
1632 1674 lease->vml_expire_arg = arg;
1633 1675 lease->vml_expired = B_FALSE;
1634 1676 lease->vml_hold = hold;
1635 1677 /* cache the VM pointer for one less pointer chase */
1636 1678 lease->vml_vm = sc->vmm_vm;
1637 1679
1638 1680 mutex_enter(&sc->vmm_lease_lock);
1639 1681 while (sc->vmm_lease_blocker != 0) {
1640 1682 cv_wait(&sc->vmm_lease_cv, &sc->vmm_lease_lock);
1641 1683 }
1642 1684 list_insert_tail(&sc->vmm_lease_list, lease);
1643 1685 vmm_read_lock(sc);
1644 1686 mutex_exit(&sc->vmm_lease_lock);
1645 1687
1646 1688 return (lease);
1647 1689 }
1648 1690
1649 1691 static void
1650 1692 vmm_lease_break_locked(vmm_softc_t *sc, vmm_lease_t *lease)
1651 1693 {
1652 1694 ASSERT(MUTEX_HELD(&sc->vmm_lease_lock));
1653 1695
1654 1696 list_remove(&sc->vmm_lease_list, lease);
1655 1697 vmm_read_unlock(sc);
1656 1698 kmem_free(lease, sizeof (*lease));
1657 1699 }
1658 1700
1659 1701 void
1660 1702 vmm_drv_lease_break(vmm_hold_t *hold, vmm_lease_t *lease)
1661 1703 {
1662 1704 vmm_softc_t *sc = hold->vmh_sc;
1663 1705
1664 1706 VERIFY3P(hold, ==, lease->vml_hold);
1665 1707
1666 1708 mutex_enter(&sc->vmm_lease_lock);
1667 1709 vmm_lease_break_locked(sc, lease);
1668 1710 mutex_exit(&sc->vmm_lease_lock);
1669 1711 }
1670 1712
1671 1713 boolean_t
1672 1714 vmm_drv_lease_expired(vmm_lease_t *lease)
1673 1715 {
1674 1716 return (lease->vml_expired);
1675 1717 }
1676 1718
1677 1719 void *
1678 1720 vmm_drv_gpa2kva(vmm_lease_t *lease, uintptr_t gpa, size_t sz)
1679 1721 {
1680 1722 ASSERT(lease != NULL);
1681 1723
1682 1724 return (vmspace_find_kva(vm_get_vmspace(lease->vml_vm), gpa, sz));
1683 1725 }
1684 1726
1685 1727 int
1686 1728 vmm_drv_msi(vmm_lease_t *lease, uint64_t addr, uint64_t msg)
1687 1729 {
1688 1730 ASSERT(lease != NULL);
1689 1731
1690 1732 return (lapic_intr_msi(lease->vml_vm, addr, msg));
1691 1733 }
1692 1734
1693 1735 int
1694 1736 vmm_drv_ioport_hook(vmm_hold_t *hold, uint16_t ioport, vmm_drv_iop_cb_t func,
1695 1737 void *arg, void **cookie)
1696 1738 {
1697 1739 vmm_softc_t *sc;
1698 1740 int err;
1699 1741
1700 1742 ASSERT(hold != NULL);
1701 1743 ASSERT(cookie != NULL);
1702 1744
1703 1745 sc = hold->vmh_sc;
1704 1746 mutex_enter(&vmm_mtx);
1705 1747 /* Confirm that hook installation is not blocked */
1706 1748 if ((sc->vmm_flags & VMM_BLOCK_HOOK) != 0) {
1707 1749 mutex_exit(&vmm_mtx);
1708 1750 return (EBUSY);
1709 1751 }
1710 1752 /*
1711 1753 * Optimistically record an installed hook which will prevent a block
1712 1754 * from being asserted while the mutex is dropped.
1713 1755 */
1714 1756 hold->vmh_ioport_hook_cnt++;
1715 1757 mutex_exit(&vmm_mtx);
1716 1758
1717 1759 vmm_write_lock(sc);
1718 1760 err = vm_ioport_hook(sc->vmm_vm, ioport, (ioport_handler_t)func,
1719 1761 arg, cookie);
1720 1762 vmm_write_unlock(sc);
1721 1763
1722 1764 if (err != 0) {
1723 1765 mutex_enter(&vmm_mtx);
1724 1766 /* Walk back optimism about the hook installation */
1725 1767 hold->vmh_ioport_hook_cnt--;
1726 1768 mutex_exit(&vmm_mtx);
1727 1769 }
1728 1770 return (err);
1729 1771 }
1730 1772
1731 1773 void
1732 1774 vmm_drv_ioport_unhook(vmm_hold_t *hold, void **cookie)
1733 1775 {
1734 1776 vmm_softc_t *sc;
1735 1777
1736 1778 ASSERT(hold != NULL);
1737 1779 ASSERT(cookie != NULL);
1738 1780 ASSERT(hold->vmh_ioport_hook_cnt != 0);
1739 1781
1740 1782 sc = hold->vmh_sc;
1741 1783 vmm_write_lock(sc);
1742 1784 vm_ioport_unhook(sc->vmm_vm, cookie);
1743 1785 vmm_write_unlock(sc);
1744 1786
1745 1787 mutex_enter(&vmm_mtx);
1746 1788 hold->vmh_ioport_hook_cnt--;
1747 1789 mutex_exit(&vmm_mtx);
1748 1790 }
1749 1791
1750 1792 static int
1751 1793 vmm_drv_purge(vmm_softc_t *sc)
1752 1794 {
1753 1795 ASSERT(MUTEX_HELD(&vmm_mtx));
1754 1796
1755 1797 if ((sc->vmm_flags & VMM_HELD) != 0) {
1756 1798 vmm_hold_t *hold;
1757 1799
1758 1800 sc->vmm_flags |= VMM_CLEANUP;
1759 1801 for (hold = list_head(&sc->vmm_holds); hold != NULL;
1760 1802 hold = list_next(&sc->vmm_holds, hold)) {
1761 1803 hold->vmh_release_req = B_TRUE;
1762 1804 }
1763 1805 while ((sc->vmm_flags & VMM_HELD) != 0) {
1764 1806 if (cv_wait_sig(&sc->vmm_cv, &vmm_mtx) <= 0) {
1765 1807 return (EINTR);
1766 1808 }
1767 1809 }
1768 1810 sc->vmm_flags &= ~VMM_CLEANUP;
1769 1811 }
1770 1812
1771 1813 VERIFY(list_is_empty(&sc->vmm_holds));
1772 1814 sc->vmm_flags |= VMM_PURGED;
1773 1815 return (0);
1774 1816 }
1775 1817
1776 1818 static int
1777 1819 vmm_drv_block_hook(vmm_softc_t *sc, boolean_t enable_block)
1778 1820 {
1779 1821 int err = 0;
1780 1822
1781 1823 mutex_enter(&vmm_mtx);
1782 1824 if (!enable_block) {
1783 1825 VERIFY((sc->vmm_flags & VMM_BLOCK_HOOK) != 0);
1784 1826
1785 1827 sc->vmm_flags &= ~VMM_BLOCK_HOOK;
1786 1828 goto done;
1787 1829 }
1788 1830
1789 1831 /* If any holds have hooks installed, the block is a failure */
1790 1832 if (!list_is_empty(&sc->vmm_holds)) {
1791 1833 vmm_hold_t *hold;
1792 1834
1793 1835 for (hold = list_head(&sc->vmm_holds); hold != NULL;
1794 1836 hold = list_next(&sc->vmm_holds, hold)) {
1795 1837 if (hold->vmh_ioport_hook_cnt != 0) {
1796 1838 err = EBUSY;
1797 1839 goto done;
1798 1840 }
1799 1841 }
1800 1842 }
1801 1843 sc->vmm_flags |= VMM_BLOCK_HOOK;
1802 1844
1803 1845 done:
1804 1846 mutex_exit(&vmm_mtx);
1805 1847 return (err);
1806 1848 }
1807 1849
1808 1850 static int
1809 1851 vmm_do_vm_destroy_locked(vmm_softc_t *sc, boolean_t clean_zsd,
1810 1852 boolean_t *hma_release)
1811 1853 {
1812 1854 dev_info_t *pdip = ddi_get_parent(vmmdev_dip);
1813 1855 minor_t minor;
1814 1856
1815 1857 ASSERT(MUTEX_HELD(&vmm_mtx));
1816 1858
1817 1859 *hma_release = B_FALSE;
1818 1860
1819 1861 if (clean_zsd) {
1820 1862 vmm_zsd_rem_vm(sc);
1821 1863 }
1822 1864
1823 1865 if (vmm_drv_purge(sc) != 0) {
1824 1866 return (EINTR);
1825 1867 }
1826 1868
1827 1869 /* Clean up devmem entries */
1828 1870 vmmdev_devmem_purge(sc);
1829 1871
1830 1872 list_remove(&vmm_list, sc);
1831 1873 ddi_remove_minor_node(vmmdev_dip, sc->vmm_name);
1832 1874 minor = sc->vmm_minor;
1833 1875 zone_rele(sc->vmm_zone);
1834 1876 if (sc->vmm_is_open) {
1835 1877 list_insert_tail(&vmm_destroy_list, sc);
1836 1878 sc->vmm_flags |= VMM_DESTROY;
1837 1879 } else {
1838 1880 vm_destroy(sc->vmm_vm);
1839 1881 ddi_soft_state_free(vmm_statep, minor);
1840 1882 id_free(vmm_minors, minor);
1841 1883 *hma_release = B_TRUE;
1842 1884 }
1843 1885 (void) devfs_clean(pdip, NULL, DV_CLEAN_FORCE);
1844 1886
1845 1887 return (0);
1846 1888 }
1847 1889
1848 1890 int
1849 1891 vmm_do_vm_destroy(vmm_softc_t *sc, boolean_t clean_zsd)
1850 1892 {
1851 1893 boolean_t hma_release = B_FALSE;
1852 1894 int err;
1853 1895
1854 1896 mutex_enter(&vmm_mtx);
1855 1897 err = vmm_do_vm_destroy_locked(sc, clean_zsd, &hma_release);
1856 1898 mutex_exit(&vmm_mtx);
1857 1899
1858 1900 if (hma_release)
1859 1901 vmm_hma_release();
1860 1902
1861 1903 return (err);
1862 1904 }
1863 1905
1864 1906 /* ARGSUSED */
1865 1907 static int
1866 1908 vmmdev_do_vm_destroy(const char *name, cred_t *cr)
1867 1909 {
1868 1910 boolean_t hma_release = B_FALSE;
1869 1911 vmm_softc_t *sc;
1870 1912 int err;
1871 1913
1872 1914 if (crgetuid(cr) != 0)
1873 1915 return (EPERM);
1874 1916
1875 1917 mutex_enter(&vmm_mtx);
1876 1918
1877 1919 if ((sc = vmm_lookup(name)) == NULL) {
1878 1920 mutex_exit(&vmm_mtx);
1879 1921 return (ENOENT);
1880 1922 }
1881 1923 /*
1882 1924 * We don't check this in vmm_lookup() since that function is also used
1883 1925 * for validation during create and currently vmm names must be unique.
1884 1926 */
1885 1927 if (!INGLOBALZONE(curproc) && sc->vmm_zone != curzone) {
1886 1928 mutex_exit(&vmm_mtx);
1887 1929 return (EPERM);
1888 1930 }
1889 1931 err = vmm_do_vm_destroy_locked(sc, B_TRUE, &hma_release);
1890 1932
1891 1933 mutex_exit(&vmm_mtx);
1892 1934
1893 1935 if (hma_release)
1894 1936 vmm_hma_release();
1895 1937
1896 1938 return (err);
1897 1939 }
1898 1940
1899 1941 static int
1900 1942 vmm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
1901 1943 {
1902 1944 minor_t minor;
1903 1945 vmm_softc_t *sc;
1904 1946
1905 1947 minor = getminor(*devp);
1906 1948 if (minor == VMM_CTL_MINOR) {
1907 1949 /*
1908 1950 * Master control device must be opened exclusively.
1909 1951 */
1910 1952 if ((flag & FEXCL) != FEXCL || otyp != OTYP_CHR) {
1911 1953 return (EINVAL);
1912 1954 }
1913 1955
1914 1956 return (0);
1915 1957 }
1916 1958
1917 1959 mutex_enter(&vmm_mtx);
1918 1960 sc = ddi_get_soft_state(vmm_statep, minor);
1919 1961 if (sc == NULL) {
1920 1962 mutex_exit(&vmm_mtx);
1921 1963 return (ENXIO);
1922 1964 }
1923 1965
1924 1966 sc->vmm_is_open = B_TRUE;
1925 1967 mutex_exit(&vmm_mtx);
1926 1968
1927 1969 return (0);
1928 1970 }
1929 1971
1930 1972 static int
1931 1973 vmm_close(dev_t dev, int flag, int otyp, cred_t *credp)
1932 1974 {
1933 1975 minor_t minor;
1934 1976 vmm_softc_t *sc;
1935 1977 boolean_t hma_release = B_FALSE;
1936 1978
1937 1979 minor = getminor(dev);
1938 1980 if (minor == VMM_CTL_MINOR)
1939 1981 return (0);
1940 1982
1941 1983 mutex_enter(&vmm_mtx);
1942 1984 sc = ddi_get_soft_state(vmm_statep, minor);
1943 1985 if (sc == NULL) {
1944 1986 mutex_exit(&vmm_mtx);
1945 1987 return (ENXIO);
1946 1988 }
1947 1989
1948 1990 VERIFY(sc->vmm_is_open);
1949 1991 sc->vmm_is_open = B_FALSE;
1950 1992
1951 1993 /*
1952 1994 * If this VM was destroyed while the vmm device was open, then
1953 1995 * clean it up now that it is closed.
1954 1996 */
1955 1997 if (sc->vmm_flags & VMM_DESTROY) {
1956 1998 list_remove(&vmm_destroy_list, sc);
1957 1999 vm_destroy(sc->vmm_vm);
1958 2000 ddi_soft_state_free(vmm_statep, minor);
1959 2001 id_free(vmm_minors, minor);
1960 2002 hma_release = B_TRUE;
1961 2003 }
1962 2004 mutex_exit(&vmm_mtx);
1963 2005
1964 2006 if (hma_release)
1965 2007 vmm_hma_release();
1966 2008
1967 2009 return (0);
1968 2010 }
1969 2011
1970 2012 static int
1971 2013 vmm_is_supported(intptr_t arg)
1972 2014 {
1973 2015 int r;
1974 2016 const char *msg;
1975 2017
1976 2018 if (vmm_is_intel()) {
1977 2019 r = vmx_x86_supported(&msg);
1978 2020 } else if (vmm_is_svm()) {
1979 2021 /*
1980 2022 * HMA already ensured that the features necessary for SVM
1981 2023 * operation were present and online during vmm_attach().
1982 2024 */
1983 2025 r = 0;
1984 2026 } else {
1985 2027 r = ENXIO;
1986 2028 msg = "Unsupported CPU vendor";
1987 2029 }
1988 2030
1989 2031 if (r != 0 && arg != (intptr_t)NULL) {
1990 2032 if (copyoutstr(msg, (char *)arg, strlen(msg), NULL) != 0)
1991 2033 return (EFAULT);
1992 2034 }
1993 2035 return (r);
1994 2036 }
1995 2037
1996 2038 static int
1997 2039 vmm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
1998 2040 int *rvalp)
1999 2041 {
2000 2042 vmm_softc_t *sc;
2001 2043 minor_t minor;
2002 2044
2003 2045 /* The structs in bhyve ioctls assume a 64-bit datamodel */
2004 2046 if (ddi_model_convert_from(mode & FMODELS) != DDI_MODEL_NONE) {
2005 2047 return (ENOTSUP);
2006 2048 }
2007 2049
2008 2050 minor = getminor(dev);
2009 2051
2010 2052 if (minor == VMM_CTL_MINOR) {
2011 2053 void *argp = (void *)arg;
2012 2054 char name[VM_MAX_NAMELEN] = { 0 };
2013 2055 size_t len = 0;
2014 2056
2015 2057 if ((mode & FKIOCTL) != 0) {
2016 2058 len = strlcpy(name, argp, sizeof (name));
2017 2059 } else {
2018 2060 if (copyinstr(argp, name, sizeof (name), &len) != 0) {
2019 2061 return (EFAULT);
2020 2062 }
2021 2063 }
2022 2064 if (len >= VM_MAX_NAMELEN) {
2023 2065 return (ENAMETOOLONG);
2024 2066 }
2025 2067
2026 2068 switch (cmd) {
2027 2069 case VMM_CREATE_VM:
2028 2070 if ((mode & FWRITE) == 0)
2029 2071 return (EPERM);
2030 2072 return (vmmdev_do_vm_create(name, credp));
2031 2073 case VMM_DESTROY_VM:
2032 2074 if ((mode & FWRITE) == 0)
2033 2075 return (EPERM);
2034 2076 return (vmmdev_do_vm_destroy(name, credp));
2035 2077 case VMM_VM_SUPPORTED:
2036 2078 return (vmm_is_supported(arg));
2037 2079 default:
2038 2080 /* No other actions are legal on ctl device */
2039 2081 return (ENOTTY);
2040 2082 }
2041 2083 }
2042 2084
2043 2085 sc = ddi_get_soft_state(vmm_statep, minor);
2044 2086 ASSERT(sc);
2045 2087
2046 2088 if (sc->vmm_flags & VMM_DESTROY)
2047 2089 return (ENXIO);
2048 2090
2049 2091 return (vmmdev_do_ioctl(sc, cmd, arg, mode, credp, rvalp));
2050 2092 }
2051 2093
2052 2094 static int
2053 2095 vmm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
2054 2096 unsigned int prot, unsigned int maxprot, unsigned int flags, cred_t *credp)
2055 2097 {
2056 2098 vmm_softc_t *sc;
2057 2099 const minor_t minor = getminor(dev);
2058 2100 struct vm *vm;
2059 2101 int err;
2060 2102 vm_object_t vmo = NULL;
2061 2103 struct vmspace *vms;
2062 2104
2063 2105 if (minor == VMM_CTL_MINOR) {
2064 2106 return (ENODEV);
2065 2107 }
2066 2108 if (off < 0 || (off + len) <= 0) {
2067 2109 return (EINVAL);
2068 2110 }
2069 2111 if ((prot & PROT_USER) == 0) {
2070 2112 return (EACCES);
2071 2113 }
2072 2114
2073 2115 sc = ddi_get_soft_state(vmm_statep, minor);
2074 2116 ASSERT(sc);
2075 2117
2076 2118 if (sc->vmm_flags & VMM_DESTROY)
2077 2119 return (ENXIO);
2078 2120
2079 2121 /* Grab read lock on the VM to prevent any changes to the memory map */
2080 2122 vmm_read_lock(sc);
2081 2123
2082 2124 vm = sc->vmm_vm;
2083 2125 vms = vm_get_vmspace(vm);
2084 2126 if (off >= VM_DEVMEM_START) {
2085 2127 int segid;
2086 2128 off_t map_off = 0;
2087 2129
2088 2130 /* Mapping a devmem "device" */
2089 2131 if (!vmmdev_devmem_segid(sc, off, len, &segid, &map_off)) {
2090 2132 err = ENODEV;
2091 2133 goto out;
2092 2134 }
2093 2135 err = vm_get_memseg(vm, segid, NULL, NULL, &vmo);
2094 2136 if (err != 0) {
2095 2137 goto out;
2096 2138 }
2097 2139 err = vm_segmap_obj(vmo, map_off, len, as, addrp, prot, maxprot,
2098 2140 flags);
2099 2141 } else {
2100 2142 /* Mapping a part of the guest physical space */
2101 2143 err = vm_segmap_space(vms, off, as, addrp, len, prot, maxprot,
2102 2144 flags);
2103 2145 }
2104 2146
2105 2147
2106 2148 out:
2107 2149 vmm_read_unlock(sc);
2108 2150 return (err);
2109 2151 }
2110 2152
2111 2153 static sdev_plugin_validate_t
2112 2154 vmm_sdev_validate(sdev_ctx_t ctx)
2113 2155 {
2114 2156 const char *name = sdev_ctx_name(ctx);
2115 2157 vmm_softc_t *sc;
2116 2158 sdev_plugin_validate_t ret;
2117 2159 minor_t minor;
2118 2160
2119 2161 if (sdev_ctx_vtype(ctx) != VCHR)
2120 2162 return (SDEV_VTOR_INVALID);
2121 2163
2122 2164 VERIFY3S(sdev_ctx_minor(ctx, &minor), ==, 0);
2123 2165
2124 2166 mutex_enter(&vmm_mtx);
2125 2167 if ((sc = vmm_lookup(name)) == NULL)
2126 2168 ret = SDEV_VTOR_INVALID;
2127 2169 else if (sc->vmm_minor != minor)
2128 2170 ret = SDEV_VTOR_STALE;
2129 2171 else
2130 2172 ret = SDEV_VTOR_VALID;
2131 2173 mutex_exit(&vmm_mtx);
2132 2174
2133 2175 return (ret);
2134 2176 }
2135 2177
2136 2178 static int
2137 2179 vmm_sdev_filldir(sdev_ctx_t ctx)
2138 2180 {
2139 2181 vmm_softc_t *sc;
2140 2182 int ret;
2141 2183
2142 2184 if (strcmp(sdev_ctx_path(ctx), VMM_SDEV_ROOT) != 0) {
2143 2185 cmn_err(CE_WARN, "%s: bad path '%s' != '%s'\n", __func__,
2144 2186 sdev_ctx_path(ctx), VMM_SDEV_ROOT);
2145 2187 return (EINVAL);
2146 2188 }
2147 2189
2148 2190 mutex_enter(&vmm_mtx);
2149 2191 ASSERT(vmmdev_dip != NULL);
2150 2192 for (sc = list_head(&vmm_list); sc != NULL;
2151 2193 sc = list_next(&vmm_list, sc)) {
2152 2194 if (INGLOBALZONE(curproc) || sc->vmm_zone == curzone) {
2153 2195 ret = sdev_plugin_mknod(ctx, sc->vmm_name,
2154 2196 S_IFCHR | 0600,
2155 2197 makedevice(ddi_driver_major(vmmdev_dip),
2156 2198 sc->vmm_minor));
2157 2199 } else {
2158 2200 continue;
2159 2201 }
2160 2202 if (ret != 0 && ret != EEXIST)
2161 2203 goto out;
2162 2204 }
2163 2205
2164 2206 ret = 0;
2165 2207
2166 2208 out:
2167 2209 mutex_exit(&vmm_mtx);
2168 2210 return (ret);
2169 2211 }
2170 2212
2171 2213 /* ARGSUSED */
2172 2214 static void
2173 2215 vmm_sdev_inactive(sdev_ctx_t ctx)
2174 2216 {
2175 2217 }
2176 2218
2177 2219 static sdev_plugin_ops_t vmm_sdev_ops = {
2178 2220 .spo_version = SDEV_PLUGIN_VERSION,
2179 2221 .spo_flags = SDEV_PLUGIN_SUBDIR,
2180 2222 .spo_validate = vmm_sdev_validate,
2181 2223 .spo_filldir = vmm_sdev_filldir,
2182 2224 .spo_inactive = vmm_sdev_inactive
2183 2225 };
2184 2226
2185 2227 /* ARGSUSED */
2186 2228 static int
2187 2229 vmm_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
2188 2230 {
2189 2231 int error;
2190 2232
2191 2233 switch (cmd) {
2192 2234 case DDI_INFO_DEVT2DEVINFO:
2193 2235 *result = (void *)vmmdev_dip;
2194 2236 error = DDI_SUCCESS;
2195 2237 break;
2196 2238 case DDI_INFO_DEVT2INSTANCE:
2197 2239 *result = (void *)0;
2198 2240 error = DDI_SUCCESS;
2199 2241 break;
2200 2242 default:
2201 2243 error = DDI_FAILURE;
2202 2244 break;
2203 2245 }
2204 2246 return (error);
2205 2247 }
2206 2248
2207 2249 static int
2208 2250 vmm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
2209 2251 {
2210 2252 sdev_plugin_hdl_t sph;
2211 2253 hma_reg_t *reg = NULL;
2212 2254 boolean_t vmm_loaded = B_FALSE;
2213 2255
2214 2256 if (cmd != DDI_ATTACH) {
2215 2257 return (DDI_FAILURE);
2216 2258 }
2217 2259
2218 2260 mutex_enter(&vmmdev_mtx);
2219 2261 /* Ensure we are not already attached. */
2220 2262 if (vmmdev_dip != NULL) {
2221 2263 mutex_exit(&vmmdev_mtx);
2222 2264 return (DDI_FAILURE);
2223 2265 }
2224 2266
2225 2267 vmm_sol_glue_init();
2226 2268 vmm_arena_init();
2227 2269
2228 2270 /*
2229 2271 * Perform temporary HMA registration to determine if the system
2230 2272 * is capable.
2231 2273 */
2232 2274 if ((reg = hma_register(vmmdev_hvm_name)) == NULL) {
2233 2275 goto fail;
2234 2276 } else if (vmm_mod_load() != 0) {
2235 2277 goto fail;
2236 2278 }
2237 2279 vmm_loaded = B_TRUE;
2238 2280 hma_unregister(reg);
2239 2281 reg = NULL;
2240 2282
2241 2283 /* Create control node. Other nodes will be created on demand. */
2242 2284 if (ddi_create_minor_node(dip, "ctl", S_IFCHR,
2243 2285 VMM_CTL_MINOR, DDI_PSEUDO, 0) != 0) {
2244 2286 goto fail;
2245 2287 }
2246 2288
2247 2289 if ((sph = sdev_plugin_register("vmm", &vmm_sdev_ops, NULL)) ==
2248 2290 (sdev_plugin_hdl_t)NULL) {
2249 2291 ddi_remove_minor_node(dip, NULL);
2250 2292 goto fail;
2251 2293 }
2252 2294
2253 2295 ddi_report_dev(dip);
2254 2296 vmmdev_sdev_hdl = sph;
2255 2297 vmmdev_dip = dip;
2256 2298 mutex_exit(&vmmdev_mtx);
2257 2299 return (DDI_SUCCESS);
2258 2300
2259 2301 fail:
2260 2302 if (vmm_loaded) {
2261 2303 VERIFY0(vmm_mod_unload());
2262 2304 }
2263 2305 if (reg != NULL) {
2264 2306 hma_unregister(reg);
2265 2307 }
2266 2308 vmm_arena_fini();
2267 2309 vmm_sol_glue_cleanup();
2268 2310 mutex_exit(&vmmdev_mtx);
2269 2311 return (DDI_FAILURE);
2270 2312 }
2271 2313
2272 2314 static int
2273 2315 vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
2274 2316 {
2275 2317 if (cmd != DDI_DETACH) {
2276 2318 return (DDI_FAILURE);
2277 2319 }
2278 2320
2279 2321 /*
2280 2322 * Ensure that all resources have been cleaned up.
2281 2323 *
2282 2324 * To prevent a deadlock with iommu_cleanup() we'll fail the detach if
2283 2325 * vmmdev_mtx is already held. We can't wait for vmmdev_mtx with our
2284 2326 * devinfo locked as iommu_cleanup() tries to recursively lock each
2285 2327 * devinfo, including our own, while holding vmmdev_mtx.
2286 2328 */
2287 2329 if (mutex_tryenter(&vmmdev_mtx) == 0)
2288 2330 return (DDI_FAILURE);
2289 2331
2290 2332 mutex_enter(&vmm_mtx);
2291 2333 if (!list_is_empty(&vmm_list) || !list_is_empty(&vmm_destroy_list)) {
2292 2334 mutex_exit(&vmm_mtx);
2293 2335 mutex_exit(&vmmdev_mtx);
2294 2336 return (DDI_FAILURE);
2295 2337 }
2296 2338 mutex_exit(&vmm_mtx);
2297 2339
2298 2340 VERIFY(vmmdev_sdev_hdl != (sdev_plugin_hdl_t)NULL);
2299 2341 if (sdev_plugin_unregister(vmmdev_sdev_hdl) != 0) {
2300 2342 mutex_exit(&vmmdev_mtx);
2301 2343 return (DDI_FAILURE);
2302 2344 }
2303 2345 vmmdev_sdev_hdl = (sdev_plugin_hdl_t)NULL;
2304 2346
2305 2347 /* Remove the control node. */
2306 2348 ddi_remove_minor_node(dip, "ctl");
2307 2349 vmmdev_dip = NULL;
2308 2350
2309 2351 VERIFY0(vmm_mod_unload());
2310 2352 VERIFY3U(vmmdev_hma_reg, ==, NULL);
2311 2353 vmm_arena_fini();
2312 2354 vmm_sol_glue_cleanup();
2313 2355
2314 2356 mutex_exit(&vmmdev_mtx);
2315 2357
2316 2358 return (DDI_SUCCESS);
2317 2359 }
2318 2360
2319 2361 static struct cb_ops vmm_cb_ops = {
2320 2362 vmm_open,
2321 2363 vmm_close,
2322 2364 nodev, /* strategy */
2323 2365 nodev, /* print */
2324 2366 nodev, /* dump */
2325 2367 nodev, /* read */
2326 2368 nodev, /* write */
2327 2369 vmm_ioctl,
2328 2370 nodev, /* devmap */
2329 2371 nodev, /* mmap */
2330 2372 vmm_segmap,
2331 2373 nochpoll, /* poll */
2332 2374 ddi_prop_op,
2333 2375 NULL,
2334 2376 D_NEW | D_MP | D_DEVMAP
2335 2377 };
2336 2378
2337 2379 static struct dev_ops vmm_ops = {
2338 2380 DEVO_REV,
2339 2381 0,
2340 2382 vmm_info,
2341 2383 nulldev, /* identify */
2342 2384 nulldev, /* probe */
2343 2385 vmm_attach,
2344 2386 vmm_detach,
2345 2387 nodev, /* reset */
2346 2388 &vmm_cb_ops,
2347 2389 (struct bus_ops *)NULL
2348 2390 };
2349 2391
2350 2392 static struct modldrv modldrv = {
2351 2393 &mod_driverops,
2352 2394 "bhyve vmm",
2353 2395 &vmm_ops
2354 2396 };
2355 2397
2356 2398 static struct modlinkage modlinkage = {
2357 2399 MODREV_1,
2358 2400 &modldrv,
2359 2401 NULL
2360 2402 };
2361 2403
2362 2404 int
2363 2405 _init(void)
2364 2406 {
2365 2407 int error;
2366 2408
2367 2409 sysinit();
2368 2410
2369 2411 mutex_init(&vmmdev_mtx, NULL, MUTEX_DRIVER, NULL);
2370 2412 mutex_init(&vmm_mtx, NULL, MUTEX_DRIVER, NULL);
2371 2413 list_create(&vmm_list, sizeof (vmm_softc_t),
2372 2414 offsetof(vmm_softc_t, vmm_node));
2373 2415 list_create(&vmm_destroy_list, sizeof (vmm_softc_t),
2374 2416 offsetof(vmm_softc_t, vmm_node));
2375 2417 vmm_minors = id_space_create("vmm_minors", VMM_CTL_MINOR + 1, MAXMIN32);
2376 2418
2377 2419 error = ddi_soft_state_init(&vmm_statep, sizeof (vmm_softc_t), 0);
2378 2420 if (error) {
2379 2421 return (error);
2380 2422 }
2381 2423
2382 2424 vmm_zsd_init();
2383 2425
2384 2426 error = mod_install(&modlinkage);
2385 2427 if (error) {
2386 2428 ddi_soft_state_fini(&vmm_statep);
2387 2429 vmm_zsd_fini();
2388 2430 }
2389 2431
2390 2432 return (error);
2391 2433 }
2392 2434
2393 2435 int
2394 2436 _fini(void)
2395 2437 {
2396 2438 int error;
2397 2439
2398 2440 error = mod_remove(&modlinkage);
2399 2441 if (error) {
2400 2442 return (error);
2401 2443 }
2402 2444
2403 2445 vmm_zsd_fini();
2404 2446
2405 2447 ddi_soft_state_fini(&vmm_statep);
2406 2448
2407 2449 return (0);
2408 2450 }
2409 2451
2410 2452 int
2411 2453 _info(struct modinfo *modinfop)
2412 2454 {
2413 2455 return (mod_info(&modlinkage, modinfop));
2414 2456 }
|
↓ open down ↓ |
1413 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX