1 /*
   2  * This file and its contents are supplied under the terms of the
   3  * Common Development and Distribution License ("CDDL"), version 1.0.
   4  * You may only use this file in accordance with the terms of version
   5  * 1.0 of the CDDL.
   6  *
   7  * A full copy of the text of the CDDL should have accompanied this
   8  * source.  A copy of the CDDL is also available via the Internet at
   9  * http://www.illumos.org/license/CDDL.
  10  */
  11 /* This file is dual-licensed; see usr/src/contrib/bhyve/LICENSE */
  12 
  13 /*
  14  * Copyright 2015 Pluribus Networks Inc.
  15  * Copyright 2019 Joyent, Inc.
  16  * Copyright 2020 OmniOS Community Edition (OmniOSce) Association.
  17  * Copyright 2021 Oxide Computer Company
  18  */
  19 
  20 #include <sys/types.h>
  21 #include <sys/conf.h>
  22 #include <sys/cpuvar.h>
  23 #include <sys/ioccom.h>
  24 #include <sys/stat.h>
  25 #include <sys/vmsystm.h>
  26 #include <sys/ddi.h>
  27 #include <sys/mkdev.h>
  28 #include <sys/sunddi.h>
  29 #include <sys/fs/dv_node.h>
  30 #include <sys/cpuset.h>
  31 #include <sys/id_space.h>
  32 #include <sys/fs/sdev_plugin.h>
  33 #include <sys/smt.h>
  34 #include <sys/kstat.h>
  35 
  36 #include <sys/kernel.h>
  37 #include <sys/hma.h>
  38 #include <sys/x86_archext.h>
  39 #include <x86/apicreg.h>
  40 
  41 #include <sys/vmm.h>
  42 #include <sys/vmm_kernel.h>
  43 #include <sys/vmm_instruction_emul.h>
  44 #include <sys/vmm_dev.h>
  45 #include <sys/vmm_impl.h>
  46 #include <sys/vmm_drv.h>
  47 #include <sys/vmm_vm.h>
  48 
  49 #include <vm/seg_dev.h>
  50 
  51 #include "io/ppt.h"
  52 #include "io/vatpic.h"
  53 #include "io/vioapic.h"
  54 #include "io/vrtc.h"
  55 #include "io/vhpet.h"
  56 #include "io/vpmtmr.h"
  57 #include "vmm_lapic.h"
  58 #include "vmm_stat.h"
  59 #include "vmm_util.h"
  60 
  61 /*
  62  * Locking details:
  63  *
  64  * Driver-wide data (vmmdev_*) , including HMA and sdev registration, is
  65  * protected by vmmdev_mtx.  The list of vmm_softc_t instances and related data
  66  * (vmm_*) are protected by vmm_mtx.  Actions requiring both locks must acquire
  67  * vmmdev_mtx before vmm_mtx.  The sdev plugin functions must not attempt to
  68  * acquire vmmdev_mtx, as they could deadlock with plugin unregistration.
  69  */
  70 
  71 static kmutex_t         vmmdev_mtx;
  72 static dev_info_t       *vmmdev_dip;
  73 static hma_reg_t        *vmmdev_hma_reg;
  74 static uint_t           vmmdev_hma_ref;
  75 static sdev_plugin_hdl_t vmmdev_sdev_hdl;
  76 
  77 static kmutex_t         vmm_mtx;
  78 static list_t           vmm_list;
  79 static list_t           vmm_destroy_list;
  80 static id_space_t       *vmm_minors;
  81 static void             *vmm_statep;
  82 
  83 static const char *vmmdev_hvm_name = "bhyve";
  84 
  85 /* For sdev plugin (/dev) */
  86 #define VMM_SDEV_ROOT "/dev/vmm"
  87 
  88 /* From uts/i86pc/io/vmm/intel/vmx.c */
  89 extern int vmx_x86_supported(const char **);
  90 
  91 /* Holds and hooks from drivers external to vmm */
  92 struct vmm_hold {
  93         list_node_t     vmh_node;
  94         vmm_softc_t     *vmh_sc;
  95         boolean_t       vmh_release_req;
  96         uint_t          vmh_ioport_hook_cnt;
  97 };
  98 
  99 struct vmm_lease {
 100         list_node_t             vml_node;
 101         struct vm               *vml_vm;
 102         boolean_t               vml_expired;
 103         boolean_t               (*vml_expire_func)(void *);
 104         void                    *vml_expire_arg;
 105         list_node_t             vml_expire_node;
 106         struct vmm_hold         *vml_hold;
 107 };
 108 
 109 static int vmm_drv_block_hook(vmm_softc_t *, boolean_t);
 110 static void vmm_lease_break_locked(vmm_softc_t *, vmm_lease_t *);
 111 static int vmm_kstat_alloc(vmm_softc_t *, minor_t, const cred_t *);
 112 static void vmm_kstat_init(vmm_softc_t *);
 113 static void vmm_kstat_fini(vmm_softc_t *);
 114 
 115 static int
 116 vmmdev_get_memseg(vmm_softc_t *sc, struct vm_memseg *mseg)
 117 {
 118         int error;
 119         bool sysmem;
 120 
 121         error = vm_get_memseg(sc->vmm_vm, mseg->segid, &mseg->len, &sysmem,
 122             NULL);
 123         if (error || mseg->len == 0)
 124                 return (error);
 125 
 126         if (!sysmem) {
 127                 vmm_devmem_entry_t *de;
 128                 list_t *dl = &sc->vmm_devmem_list;
 129 
 130                 for (de = list_head(dl); de != NULL; de = list_next(dl, de)) {
 131                         if (de->vde_segid == mseg->segid) {
 132                                 break;
 133                         }
 134                 }
 135                 if (de != NULL) {
 136                         (void) strlcpy(mseg->name, de->vde_name,
 137                             sizeof (mseg->name));
 138                 }
 139         } else {
 140                 bzero(mseg->name, sizeof (mseg->name));
 141         }
 142 
 143         return (error);
 144 }
 145 
 146 /*
 147  * The 'devmem' hack:
 148  *
 149  * On native FreeBSD, bhyve consumers are allowed to create 'devmem' segments
 150  * in the vm which appear with their own name related to the vm under /dev.
 151  * Since this would be a hassle from an sdev perspective and would require a
 152  * new cdev interface (or complicate the existing one), we choose to implement
 153  * this in a different manner.  When 'devmem' mappings are created, an
 154  * identifying off_t is communicated back out to userspace.  That off_t,
 155  * residing above the normal guest memory space, can be used to mmap the
 156  * 'devmem' mapping from the already-open vm device.
 157  */
 158 
 159 static int
 160 vmmdev_devmem_create(vmm_softc_t *sc, struct vm_memseg *mseg, const char *name)
 161 {
 162         off_t map_offset;
 163         vmm_devmem_entry_t *entry;
 164 
 165         if (list_is_empty(&sc->vmm_devmem_list)) {
 166                 map_offset = VM_DEVMEM_START;
 167         } else {
 168                 entry = list_tail(&sc->vmm_devmem_list);
 169                 map_offset = entry->vde_off + entry->vde_len;
 170                 if (map_offset < entry->vde_off) {
 171                         /* Do not tolerate overflow */
 172                         return (ERANGE);
 173                 }
 174                 /*
 175                  * XXXJOY: We could choose to search the list for duplicate
 176                  * names and toss an error.  Since we're using the offset
 177                  * method for now, it does not make much of a difference.
 178                  */
 179         }
 180 
 181         entry = kmem_zalloc(sizeof (*entry), KM_SLEEP);
 182         entry->vde_segid = mseg->segid;
 183         entry->vde_len = mseg->len;
 184         entry->vde_off = map_offset;
 185         (void) strlcpy(entry->vde_name, name, sizeof (entry->vde_name));
 186         list_insert_tail(&sc->vmm_devmem_list, entry);
 187 
 188         return (0);
 189 }
 190 
 191 static boolean_t
 192 vmmdev_devmem_segid(vmm_softc_t *sc, off_t off, off_t len, int *segidp,
 193     off_t *map_offp)
 194 {
 195         list_t *dl = &sc->vmm_devmem_list;
 196         vmm_devmem_entry_t *de = NULL;
 197         const off_t map_end = off + len;
 198 
 199         VERIFY(off >= VM_DEVMEM_START);
 200 
 201         if (map_end < off) {
 202                 /* No match on overflow */
 203                 return (B_FALSE);
 204         }
 205 
 206         for (de = list_head(dl); de != NULL; de = list_next(dl, de)) {
 207                 const off_t item_end = de->vde_off + de->vde_len;
 208 
 209                 if (de->vde_off <= off && item_end >= map_end) {
 210                         *segidp = de->vde_segid;
 211                         *map_offp = off - de->vde_off;
 212                         return (B_TRUE);
 213                 }
 214         }
 215         return (B_FALSE);
 216 }
 217 
 218 static void
 219 vmmdev_devmem_purge(vmm_softc_t *sc)
 220 {
 221         vmm_devmem_entry_t *entry;
 222 
 223         while ((entry = list_remove_head(&sc->vmm_devmem_list)) != NULL) {
 224                 kmem_free(entry, sizeof (*entry));
 225         }
 226 }
 227 
 228 static int
 229 vmmdev_alloc_memseg(vmm_softc_t *sc, struct vm_memseg *mseg)
 230 {
 231         int error;
 232         bool sysmem = true;
 233 
 234         if (VM_MEMSEG_NAME(mseg)) {
 235                 sysmem = false;
 236         }
 237         error = vm_alloc_memseg(sc->vmm_vm, mseg->segid, mseg->len, sysmem);
 238 
 239         if (error == 0 && VM_MEMSEG_NAME(mseg)) {
 240                 /*
 241                  * Rather than create a whole fresh device from which userspace
 242                  * can mmap this segment, instead make it available at an
 243                  * offset above where the main guest memory resides.
 244                  */
 245                 error = vmmdev_devmem_create(sc, mseg, mseg->name);
 246                 if (error != 0) {
 247                         vm_free_memseg(sc->vmm_vm, mseg->segid);
 248                 }
 249         }
 250         return (error);
 251 }
 252 
 253 /*
 254  * Resource Locking and Exclusion
 255  *
 256  * Much of bhyve depends on key portions of VM state, such as the guest memory
 257  * map, to remain unchanged while the guest is running.  As ported from
 258  * FreeBSD, the initial strategy for this resource exclusion hinged on gating
 259  * access to the instance vCPUs.  Threads acting on a single vCPU, like those
 260  * performing the work of actually running the guest in VMX/SVM, would lock
 261  * only that vCPU during ioctl() entry.  For ioctls which would change VM-wide
 262  * state, all of the vCPUs would be first locked, ensuring that the
 263  * operation(s) could complete without any other threads stumbling into
 264  * intermediate states.
 265  *
 266  * This approach is largely effective for bhyve.  Common operations, such as
 267  * running the vCPUs, steer clear of lock contention.  The model begins to
 268  * break down for operations which do not occur in the context of a specific
 269  * vCPU.  LAPIC MSI delivery, for example, may be initiated from a worker
 270  * thread in the bhyve process.  In order to properly protect those vCPU-less
 271  * operations from encountering invalid states, additional locking is required.
 272  * This was solved by forcing those operations to lock the VM_MAXCPU-1 vCPU.
 273  * It does mean that class of operations will be serialized on locking the
 274  * specific vCPU and that instances sized at VM_MAXCPU will potentially see
 275  * undue contention on the VM_MAXCPU-1 vCPU.
 276  *
 277  * In order to address the shortcomings of this model, the concept of a
 278  * read/write lock has been added to bhyve.  Operations which change
 279  * fundamental aspects of a VM (such as the memory map) must acquire the write
 280  * lock, which also implies locking all of the vCPUs and waiting for all read
 281  * lock holders to release.  While it increases the cost and waiting time for
 282  * those few operations, it allows most hot-path operations on the VM (which
 283  * depend on its configuration remaining stable) to occur with minimal locking.
 284  *
 285  * Consumers of the Driver API (see below) are a special case when it comes to
 286  * this locking, since they may hold a read lock via the drv_lease mechanism
 287  * for an extended period of time.  Rather than forcing those consumers to
 288  * continuously poll for a write lock attempt, the lease system forces them to
 289  * provide a release callback to trigger their clean-up (and potential later
 290  * reacquisition) of the read lock.
 291  */
 292 
 293 static void
 294 vcpu_lock_one(vmm_softc_t *sc, int vcpu)
 295 {
 296         ASSERT(vcpu >= 0 && vcpu < VM_MAXCPU);
 297 
 298         /*
 299          * Since this state transition is utilizing from_idle=true, it should
 300          * not fail, but rather block until it can be successful.
 301          */
 302         VERIFY0(vcpu_set_state(sc->vmm_vm, vcpu, VCPU_FROZEN, true));
 303 }
 304 
 305 static void
 306 vcpu_unlock_one(vmm_softc_t *sc, int vcpu)
 307 {
 308         ASSERT(vcpu >= 0 && vcpu < VM_MAXCPU);
 309 
 310         VERIFY3U(vcpu_get_state(sc->vmm_vm, vcpu, NULL), ==, VCPU_FROZEN);
 311         vcpu_set_state(sc->vmm_vm, vcpu, VCPU_IDLE, false);
 312 }
 313 
 314 static void
 315 vmm_read_lock(vmm_softc_t *sc)
 316 {
 317         rw_enter(&sc->vmm_rwlock, RW_READER);
 318 }
 319 
 320 static void
 321 vmm_read_unlock(vmm_softc_t *sc)
 322 {
 323         rw_exit(&sc->vmm_rwlock);
 324 }
 325 
 326 static void
 327 vmm_write_lock(vmm_softc_t *sc)
 328 {
 329         int maxcpus;
 330 
 331         /* First lock all the vCPUs */
 332         maxcpus = vm_get_maxcpus(sc->vmm_vm);
 333         for (int vcpu = 0; vcpu < maxcpus; vcpu++) {
 334                 vcpu_lock_one(sc, vcpu);
 335         }
 336 
 337         mutex_enter(&sc->vmm_lease_lock);
 338         VERIFY3U(sc->vmm_lease_blocker, !=, UINT_MAX);
 339         sc->vmm_lease_blocker++;
 340         if (sc->vmm_lease_blocker == 1) {
 341                 list_t *list = &sc->vmm_lease_list;
 342                 vmm_lease_t *lease = list_head(list);
 343 
 344                 while (lease != NULL) {
 345                         boolean_t sync_break = B_FALSE;
 346 
 347                         if (!lease->vml_expired) {
 348                                 void *arg = lease->vml_expire_arg;
 349                                 lease->vml_expired = B_TRUE;
 350                                 sync_break = lease->vml_expire_func(arg);
 351                         }
 352 
 353                         if (sync_break) {
 354                                 vmm_lease_t *next;
 355 
 356                                 /*
 357                                  * These leases which are synchronously broken
 358                                  * result in vmm_read_unlock() calls from a
 359                                  * different thread than the corresponding
 360                                  * vmm_read_lock().  This is acceptable, given
 361                                  * that the rwlock underpinning the whole
 362                                  * mechanism tolerates the behavior.  This
 363                                  * flexibility is _only_ afforded to VM read
 364                                  * lock (RW_READER) holders.
 365                                  */
 366                                 next = list_next(list, lease);
 367                                 vmm_lease_break_locked(sc, lease);
 368                                 lease = next;
 369                         } else {
 370                                 lease = list_next(list, lease);
 371                         }
 372                 }
 373         }
 374         mutex_exit(&sc->vmm_lease_lock);
 375 
 376         rw_enter(&sc->vmm_rwlock, RW_WRITER);
 377         /*
 378          * For now, the 'maxcpus' value for an instance is fixed at the
 379          * compile-time constant of VM_MAXCPU at creation.  If this changes in
 380          * the future, allowing for dynamic vCPU resource sizing, acquisition
 381          * of the write lock will need to be wary of such changes.
 382          */
 383         VERIFY(maxcpus == vm_get_maxcpus(sc->vmm_vm));
 384 }
 385 
 386 static void
 387 vmm_write_unlock(vmm_softc_t *sc)
 388 {
 389         int maxcpus;
 390 
 391         mutex_enter(&sc->vmm_lease_lock);
 392         VERIFY3U(sc->vmm_lease_blocker, !=, 0);
 393         sc->vmm_lease_blocker--;
 394         if (sc->vmm_lease_blocker == 0) {
 395                 cv_broadcast(&sc->vmm_lease_cv);
 396         }
 397         mutex_exit(&sc->vmm_lease_lock);
 398 
 399         /*
 400          * The VM write lock _must_ be released from the same thread it was
 401          * acquired in, unlike the read lock.
 402          */
 403         VERIFY(rw_write_held(&sc->vmm_rwlock));
 404         rw_exit(&sc->vmm_rwlock);
 405 
 406         /* Unlock all the vCPUs */
 407         maxcpus = vm_get_maxcpus(sc->vmm_vm);
 408         for (int vcpu = 0; vcpu < maxcpus; vcpu++) {
 409                 vcpu_unlock_one(sc, vcpu);
 410         }
 411 }
 412 
 413 static int
 414 vmmdev_do_ioctl(vmm_softc_t *sc, int cmd, intptr_t arg, int md,
 415     cred_t *credp, int *rvalp)
 416 {
 417         int error = 0, vcpu = -1;
 418         void *datap = (void *)arg;
 419         enum vm_lock_type {
 420                 LOCK_NONE = 0,
 421                 LOCK_VCPU,
 422                 LOCK_READ_HOLD,
 423                 LOCK_WRITE_HOLD
 424         } lock_type = LOCK_NONE;
 425 
 426         /* Acquire any exclusion resources needed for the operation. */
 427         switch (cmd) {
 428         case VM_RUN:
 429         case VM_GET_REGISTER:
 430         case VM_SET_REGISTER:
 431         case VM_GET_SEGMENT_DESCRIPTOR:
 432         case VM_SET_SEGMENT_DESCRIPTOR:
 433         case VM_GET_REGISTER_SET:
 434         case VM_SET_REGISTER_SET:
 435         case VM_INJECT_EXCEPTION:
 436         case VM_GET_CAPABILITY:
 437         case VM_SET_CAPABILITY:
 438         case VM_PPTDEV_MSI:
 439         case VM_PPTDEV_MSIX:
 440         case VM_SET_X2APIC_STATE:
 441         case VM_GLA2GPA:
 442         case VM_GLA2GPA_NOFAULT:
 443         case VM_ACTIVATE_CPU:
 444         case VM_SET_INTINFO:
 445         case VM_GET_INTINFO:
 446         case VM_RESTART_INSTRUCTION:
 447         case VM_SET_KERNEMU_DEV:
 448         case VM_GET_KERNEMU_DEV:
 449         case VM_RESET_CPU:
 450         case VM_GET_RUN_STATE:
 451         case VM_SET_RUN_STATE:
 452                 /*
 453                  * Copy in the ID of the vCPU chosen for this operation.
 454                  * Since a nefarious caller could update their struct between
 455                  * this locking and when the rest of the ioctl data is copied
 456                  * in, it is _critical_ that this local 'vcpu' variable be used
 457                  * rather than the in-struct one when performing the ioctl.
 458                  */
 459                 if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) {
 460                         return (EFAULT);
 461                 }
 462                 if (vcpu < 0 || vcpu > vm_get_maxcpus(sc->vmm_vm)) {
 463                         return (EINVAL);
 464                 }
 465                 vcpu_lock_one(sc, vcpu);
 466                 lock_type = LOCK_VCPU;
 467                 break;
 468 
 469         case VM_REINIT:
 470         case VM_BIND_PPTDEV:
 471         case VM_UNBIND_PPTDEV:
 472         case VM_MAP_PPTDEV_MMIO:
 473         case VM_UNMAP_PPTDEV_MMIO:
 474         case VM_ALLOC_MEMSEG:
 475         case VM_MMAP_MEMSEG:
 476         case VM_MUNMAP_MEMSEG:
 477         case VM_WRLOCK_CYCLE:
 478         case VM_PMTMR_LOCATE:
 479                 vmm_write_lock(sc);
 480                 lock_type = LOCK_WRITE_HOLD;
 481                 break;
 482 
 483         case VM_GET_GPA_PMAP:
 484         case VM_GET_MEMSEG:
 485         case VM_MMAP_GETNEXT:
 486         case VM_LAPIC_IRQ:
 487         case VM_INJECT_NMI:
 488         case VM_IOAPIC_ASSERT_IRQ:
 489         case VM_IOAPIC_DEASSERT_IRQ:
 490         case VM_IOAPIC_PULSE_IRQ:
 491         case VM_LAPIC_MSI:
 492         case VM_LAPIC_LOCAL_IRQ:
 493         case VM_GET_X2APIC_STATE:
 494         case VM_RTC_READ:
 495         case VM_RTC_WRITE:
 496         case VM_RTC_SETTIME:
 497         case VM_RTC_GETTIME:
 498         case VM_PPTDEV_DISABLE_MSIX:
 499         case VM_DEVMEM_GETOFFSET:
 500                 vmm_read_lock(sc);
 501                 lock_type = LOCK_READ_HOLD;
 502                 break;
 503 
 504         case VM_IOAPIC_PINCOUNT:
 505         default:
 506                 break;
 507         }
 508 
 509         /* Execute the primary logic for the ioctl. */
 510         switch (cmd) {
 511         case VM_RUN: {
 512                 struct vm_entry entry;
 513 
 514                 if (ddi_copyin(datap, &entry, sizeof (entry), md)) {
 515                         error = EFAULT;
 516                         break;
 517                 }
 518 
 519                 if (!(curthread->t_schedflag & TS_VCPU))
 520                         smt_mark_as_vcpu();
 521 
 522                 error = vm_run(sc->vmm_vm, vcpu, &entry);
 523 
 524                 /*
 525                  * Unexpected states in vm_run() are expressed through positive
 526                  * errno-oriented return values.  VM states which expect further
 527                  * processing in userspace (necessary context via exitinfo) are
 528                  * expressed through negative return values.  For the time being
 529                  * a return value of 0 is not expected from vm_run().
 530                  */
 531                 ASSERT(error != 0);
 532                 if (error < 0) {
 533                         const struct vm_exit *vme;
 534                         void *outp = entry.exit_data;
 535 
 536                         error = 0;
 537                         vme = vm_exitinfo(sc->vmm_vm, vcpu);
 538                         if (ddi_copyout(vme, outp, sizeof (*vme), md)) {
 539                                 error = EFAULT;
 540                         }
 541                 }
 542                 break;
 543         }
 544         case VM_SUSPEND: {
 545                 struct vm_suspend vmsuspend;
 546 
 547                 if (ddi_copyin(datap, &vmsuspend, sizeof (vmsuspend), md)) {
 548                         error = EFAULT;
 549                         break;
 550                 }
 551                 error = vm_suspend(sc->vmm_vm, vmsuspend.how);
 552                 break;
 553         }
 554         case VM_REINIT:
 555                 if ((error = vmm_drv_block_hook(sc, B_TRUE)) != 0) {
 556                         /*
 557                          * The VM instance should be free of driver-attached
 558                          * hooks during the reinitialization process.
 559                          */
 560                         break;
 561                 }
 562                 error = vm_reinit(sc->vmm_vm);
 563                 (void) vmm_drv_block_hook(sc, B_FALSE);
 564                 break;
 565         case VM_STAT_DESC: {
 566                 struct vm_stat_desc statdesc;
 567 
 568                 if (ddi_copyin(datap, &statdesc, sizeof (statdesc), md)) {
 569                         error = EFAULT;
 570                         break;
 571                 }
 572                 error = vmm_stat_desc_copy(statdesc.index, statdesc.desc,
 573                     sizeof (statdesc.desc));
 574                 if (error == 0 &&
 575                     ddi_copyout(&statdesc, datap, sizeof (statdesc), md)) {
 576                         error = EFAULT;
 577                         break;
 578                 }
 579                 break;
 580         }
 581         case VM_STATS_IOC: {
 582                 struct vm_stats vmstats;
 583 
 584                 CTASSERT(MAX_VM_STATS >= MAX_VMM_STAT_ELEMS);
 585                 if (ddi_copyin(datap, &vmstats, sizeof (vmstats), md)) {
 586                         error = EFAULT;
 587                         break;
 588                 }
 589                 hrt2tv(gethrtime(), &vmstats.tv);
 590                 error = vmm_stat_copy(sc->vmm_vm, vmstats.cpuid,
 591                     &vmstats.num_entries, vmstats.statbuf);
 592                 if (error == 0 &&
 593                     ddi_copyout(&vmstats, datap, sizeof (vmstats), md)) {
 594                         error = EFAULT;
 595                         break;
 596                 }
 597                 break;
 598         }
 599 
 600         case VM_PPTDEV_MSI: {
 601                 struct vm_pptdev_msi pptmsi;
 602 
 603                 if (ddi_copyin(datap, &pptmsi, sizeof (pptmsi), md)) {
 604                         error = EFAULT;
 605                         break;
 606                 }
 607                 error = ppt_setup_msi(sc->vmm_vm, pptmsi.vcpu, pptmsi.pptfd,
 608                     pptmsi.addr, pptmsi.msg, pptmsi.numvec);
 609                 break;
 610         }
 611         case VM_PPTDEV_MSIX: {
 612                 struct vm_pptdev_msix pptmsix;
 613 
 614                 if (ddi_copyin(datap, &pptmsix, sizeof (pptmsix), md)) {
 615                         error = EFAULT;
 616                         break;
 617                 }
 618                 error = ppt_setup_msix(sc->vmm_vm, pptmsix.vcpu, pptmsix.pptfd,
 619                     pptmsix.idx, pptmsix.addr, pptmsix.msg,
 620                     pptmsix.vector_control);
 621                 break;
 622         }
 623         case VM_PPTDEV_DISABLE_MSIX: {
 624                 struct vm_pptdev pptdev;
 625 
 626                 if (ddi_copyin(datap, &pptdev, sizeof (pptdev), md)) {
 627                         error = EFAULT;
 628                         break;
 629                 }
 630                 error = ppt_disable_msix(sc->vmm_vm, pptdev.pptfd);
 631                 break;
 632         }
 633         case VM_MAP_PPTDEV_MMIO: {
 634                 struct vm_pptdev_mmio pptmmio;
 635 
 636                 if (ddi_copyin(datap, &pptmmio, sizeof (pptmmio), md)) {
 637                         error = EFAULT;
 638                         break;
 639                 }
 640                 error = ppt_map_mmio(sc->vmm_vm, pptmmio.pptfd, pptmmio.gpa,
 641                     pptmmio.len, pptmmio.hpa);
 642                 break;
 643         }
 644         case VM_UNMAP_PPTDEV_MMIO: {
 645                 struct vm_pptdev_mmio pptmmio;
 646 
 647                 if (ddi_copyin(datap, &pptmmio, sizeof (pptmmio), md)) {
 648                         error = EFAULT;
 649                         break;
 650                 }
 651                 error = ppt_unmap_mmio(sc->vmm_vm, pptmmio.pptfd, pptmmio.gpa,
 652                     pptmmio.len);
 653                 break;
 654         }
 655         case VM_BIND_PPTDEV: {
 656                 struct vm_pptdev pptdev;
 657 
 658                 if (ddi_copyin(datap, &pptdev, sizeof (pptdev), md)) {
 659                         error = EFAULT;
 660                         break;
 661                 }
 662                 error = vm_assign_pptdev(sc->vmm_vm, pptdev.pptfd);
 663                 break;
 664         }
 665         case VM_UNBIND_PPTDEV: {
 666                 struct vm_pptdev pptdev;
 667 
 668                 if (ddi_copyin(datap, &pptdev, sizeof (pptdev), md)) {
 669                         error = EFAULT;
 670                         break;
 671                 }
 672                 error = vm_unassign_pptdev(sc->vmm_vm, pptdev.pptfd);
 673                 break;
 674         }
 675         case VM_GET_PPTDEV_LIMITS: {
 676                 struct vm_pptdev_limits pptlimits;
 677 
 678                 if (ddi_copyin(datap, &pptlimits, sizeof (pptlimits), md)) {
 679                         error = EFAULT;
 680                         break;
 681                 }
 682                 error = ppt_get_limits(sc->vmm_vm, pptlimits.pptfd,
 683                     &pptlimits.msi_limit, &pptlimits.msix_limit);
 684                 if (error == 0 &&
 685                     ddi_copyout(&pptlimits, datap, sizeof (pptlimits), md)) {
 686                         error = EFAULT;
 687                         break;
 688                 }
 689                 break;
 690         }
 691         case VM_INJECT_EXCEPTION: {
 692                 struct vm_exception vmexc;
 693                 if (ddi_copyin(datap, &vmexc, sizeof (vmexc), md)) {
 694                         error = EFAULT;
 695                         break;
 696                 }
 697                 error = vm_inject_exception(sc->vmm_vm, vcpu, vmexc.vector,
 698                     vmexc.error_code_valid, vmexc.error_code,
 699                     vmexc.restart_instruction);
 700                 break;
 701         }
 702         case VM_INJECT_NMI: {
 703                 struct vm_nmi vmnmi;
 704 
 705                 if (ddi_copyin(datap, &vmnmi, sizeof (vmnmi), md)) {
 706                         error = EFAULT;
 707                         break;
 708                 }
 709                 error = vm_inject_nmi(sc->vmm_vm, vmnmi.cpuid);
 710                 break;
 711         }
 712         case VM_LAPIC_IRQ: {
 713                 struct vm_lapic_irq vmirq;
 714 
 715                 if (ddi_copyin(datap, &vmirq, sizeof (vmirq), md)) {
 716                         error = EFAULT;
 717                         break;
 718                 }
 719                 error = lapic_intr_edge(sc->vmm_vm, vmirq.cpuid, vmirq.vector);
 720                 break;
 721         }
 722         case VM_LAPIC_LOCAL_IRQ: {
 723                 struct vm_lapic_irq vmirq;
 724 
 725                 if (ddi_copyin(datap, &vmirq, sizeof (vmirq), md)) {
 726                         error = EFAULT;
 727                         break;
 728                 }
 729                 error = lapic_set_local_intr(sc->vmm_vm, vmirq.cpuid,
 730                     vmirq.vector);
 731                 break;
 732         }
 733         case VM_LAPIC_MSI: {
 734                 struct vm_lapic_msi vmmsi;
 735 
 736                 if (ddi_copyin(datap, &vmmsi, sizeof (vmmsi), md)) {
 737                         error = EFAULT;
 738                         break;
 739                 }
 740                 error = lapic_intr_msi(sc->vmm_vm, vmmsi.addr, vmmsi.msg);
 741                 break;
 742         }
 743 
 744         case VM_IOAPIC_ASSERT_IRQ: {
 745                 struct vm_ioapic_irq ioapic_irq;
 746 
 747                 if (ddi_copyin(datap, &ioapic_irq, sizeof (ioapic_irq), md)) {
 748                         error = EFAULT;
 749                         break;
 750                 }
 751                 error = vioapic_assert_irq(sc->vmm_vm, ioapic_irq.irq);
 752                 break;
 753         }
 754         case VM_IOAPIC_DEASSERT_IRQ: {
 755                 struct vm_ioapic_irq ioapic_irq;
 756 
 757                 if (ddi_copyin(datap, &ioapic_irq, sizeof (ioapic_irq), md)) {
 758                         error = EFAULT;
 759                         break;
 760                 }
 761                 error = vioapic_deassert_irq(sc->vmm_vm, ioapic_irq.irq);
 762                 break;
 763         }
 764         case VM_IOAPIC_PULSE_IRQ: {
 765                 struct vm_ioapic_irq ioapic_irq;
 766 
 767                 if (ddi_copyin(datap, &ioapic_irq, sizeof (ioapic_irq), md)) {
 768                         error = EFAULT;
 769                         break;
 770                 }
 771                 error = vioapic_pulse_irq(sc->vmm_vm, ioapic_irq.irq);
 772                 break;
 773         }
 774         case VM_IOAPIC_PINCOUNT: {
 775                 int pincount;
 776 
 777                 pincount = vioapic_pincount(sc->vmm_vm);
 778                 if (ddi_copyout(&pincount, datap, sizeof (int), md)) {
 779                         error = EFAULT;
 780                         break;
 781                 }
 782                 break;
 783         }
 784 
 785         case VM_ISA_ASSERT_IRQ: {
 786                 struct vm_isa_irq isa_irq;
 787 
 788                 if (ddi_copyin(datap, &isa_irq, sizeof (isa_irq), md)) {
 789                         error = EFAULT;
 790                         break;
 791                 }
 792                 error = vatpic_assert_irq(sc->vmm_vm, isa_irq.atpic_irq);
 793                 if (error == 0 && isa_irq.ioapic_irq != -1) {
 794                         error = vioapic_assert_irq(sc->vmm_vm,
 795                             isa_irq.ioapic_irq);
 796                 }
 797                 break;
 798         }
 799         case VM_ISA_DEASSERT_IRQ: {
 800                 struct vm_isa_irq isa_irq;
 801 
 802                 if (ddi_copyin(datap, &isa_irq, sizeof (isa_irq), md)) {
 803                         error = EFAULT;
 804                         break;
 805                 }
 806                 error = vatpic_deassert_irq(sc->vmm_vm, isa_irq.atpic_irq);
 807                 if (error == 0 && isa_irq.ioapic_irq != -1) {
 808                         error = vioapic_deassert_irq(sc->vmm_vm,
 809                             isa_irq.ioapic_irq);
 810                 }
 811                 break;
 812         }
 813         case VM_ISA_PULSE_IRQ: {
 814                 struct vm_isa_irq isa_irq;
 815 
 816                 if (ddi_copyin(datap, &isa_irq, sizeof (isa_irq), md)) {
 817                         error = EFAULT;
 818                         break;
 819                 }
 820                 error = vatpic_pulse_irq(sc->vmm_vm, isa_irq.atpic_irq);
 821                 if (error == 0 && isa_irq.ioapic_irq != -1) {
 822                         error = vioapic_pulse_irq(sc->vmm_vm,
 823                             isa_irq.ioapic_irq);
 824                 }
 825                 break;
 826         }
 827         case VM_ISA_SET_IRQ_TRIGGER: {
 828                 struct vm_isa_irq_trigger isa_irq_trigger;
 829 
 830                 if (ddi_copyin(datap, &isa_irq_trigger,
 831                     sizeof (isa_irq_trigger), md)) {
 832                         error = EFAULT;
 833                         break;
 834                 }
 835                 error = vatpic_set_irq_trigger(sc->vmm_vm,
 836                     isa_irq_trigger.atpic_irq, isa_irq_trigger.trigger);
 837                 break;
 838         }
 839 
 840         case VM_MMAP_GETNEXT: {
 841                 struct vm_memmap mm;
 842 
 843                 if (ddi_copyin(datap, &mm, sizeof (mm), md)) {
 844                         error = EFAULT;
 845                         break;
 846                 }
 847                 error = vm_mmap_getnext(sc->vmm_vm, &mm.gpa, &mm.segid,
 848                     &mm.segoff, &mm.len, &mm.prot, &mm.flags);
 849                 if (error == 0 && ddi_copyout(&mm, datap, sizeof (mm), md)) {
 850                         error = EFAULT;
 851                         break;
 852                 }
 853                 break;
 854         }
 855         case VM_MMAP_MEMSEG: {
 856                 struct vm_memmap mm;
 857 
 858                 if (ddi_copyin(datap, &mm, sizeof (mm), md)) {
 859                         error = EFAULT;
 860                         break;
 861                 }
 862                 error = vm_mmap_memseg(sc->vmm_vm, mm.gpa, mm.segid, mm.segoff,
 863                     mm.len, mm.prot, mm.flags);
 864                 break;
 865         }
 866         case VM_MUNMAP_MEMSEG: {
 867                 struct vm_munmap mu;
 868 
 869                 if (ddi_copyin(datap, &mu, sizeof (mu), md)) {
 870                         error = EFAULT;
 871                         break;
 872                 }
 873                 error = vm_munmap_memseg(sc->vmm_vm, mu.gpa, mu.len);
 874                 break;
 875         }
 876         case VM_ALLOC_MEMSEG: {
 877                 struct vm_memseg vmseg;
 878 
 879                 if (ddi_copyin(datap, &vmseg, sizeof (vmseg), md)) {
 880                         error = EFAULT;
 881                         break;
 882                 }
 883                 error = vmmdev_alloc_memseg(sc, &vmseg);
 884                 break;
 885         }
 886         case VM_GET_MEMSEG: {
 887                 struct vm_memseg vmseg;
 888 
 889                 if (ddi_copyin(datap, &vmseg, sizeof (vmseg), md)) {
 890                         error = EFAULT;
 891                         break;
 892                 }
 893                 error = vmmdev_get_memseg(sc, &vmseg);
 894                 if (error == 0 &&
 895                     ddi_copyout(&vmseg, datap, sizeof (vmseg), md)) {
 896                         error = EFAULT;
 897                         break;
 898                 }
 899                 break;
 900         }
 901         case VM_GET_REGISTER: {
 902                 struct vm_register vmreg;
 903 
 904                 if (ddi_copyin(datap, &vmreg, sizeof (vmreg), md)) {
 905                         error = EFAULT;
 906                         break;
 907                 }
 908                 error = vm_get_register(sc->vmm_vm, vcpu, vmreg.regnum,
 909                     &vmreg.regval);
 910                 if (error == 0 &&
 911                     ddi_copyout(&vmreg, datap, sizeof (vmreg), md)) {
 912                         error = EFAULT;
 913                         break;
 914                 }
 915                 break;
 916         }
 917         case VM_SET_REGISTER: {
 918                 struct vm_register vmreg;
 919 
 920                 if (ddi_copyin(datap, &vmreg, sizeof (vmreg), md)) {
 921                         error = EFAULT;
 922                         break;
 923                 }
 924                 error = vm_set_register(sc->vmm_vm, vcpu, vmreg.regnum,
 925                     vmreg.regval);
 926                 break;
 927         }
 928         case VM_SET_SEGMENT_DESCRIPTOR: {
 929                 struct vm_seg_desc vmsegd;
 930 
 931                 if (ddi_copyin(datap, &vmsegd, sizeof (vmsegd), md)) {
 932                         error = EFAULT;
 933                         break;
 934                 }
 935                 error = vm_set_seg_desc(sc->vmm_vm, vcpu, vmsegd.regnum,
 936                     &vmsegd.desc);
 937                 break;
 938         }
 939         case VM_GET_SEGMENT_DESCRIPTOR: {
 940                 struct vm_seg_desc vmsegd;
 941 
 942                 if (ddi_copyin(datap, &vmsegd, sizeof (vmsegd), md)) {
 943                         error = EFAULT;
 944                         break;
 945                 }
 946                 error = vm_get_seg_desc(sc->vmm_vm, vcpu, vmsegd.regnum,
 947                     &vmsegd.desc);
 948                 if (error == 0 &&
 949                     ddi_copyout(&vmsegd, datap, sizeof (vmsegd), md)) {
 950                         error = EFAULT;
 951                         break;
 952                 }
 953                 break;
 954         }
 955         case VM_GET_REGISTER_SET: {
 956                 struct vm_register_set vrs;
 957                 int regnums[VM_REG_LAST];
 958                 uint64_t regvals[VM_REG_LAST];
 959 
 960                 if (ddi_copyin(datap, &vrs, sizeof (vrs), md)) {
 961                         error = EFAULT;
 962                         break;
 963                 }
 964                 if (vrs.count > VM_REG_LAST || vrs.count == 0) {
 965                         error = EINVAL;
 966                         break;
 967                 }
 968                 if (ddi_copyin(vrs.regnums, regnums,
 969                     sizeof (int) * vrs.count, md)) {
 970                         error = EFAULT;
 971                         break;
 972                 }
 973 
 974                 error = 0;
 975                 for (uint_t i = 0; i < vrs.count && error == 0; i++) {
 976                         if (regnums[i] < 0) {
 977                                 error = EINVAL;
 978                                 break;
 979                         }
 980                         error = vm_get_register(sc->vmm_vm, vcpu, regnums[i],
 981                             &regvals[i]);
 982                 }
 983                 if (error == 0 && ddi_copyout(regvals, vrs.regvals,
 984                     sizeof (uint64_t) * vrs.count, md)) {
 985                         error = EFAULT;
 986                 }
 987                 break;
 988         }
 989         case VM_SET_REGISTER_SET: {
 990                 struct vm_register_set vrs;
 991                 int regnums[VM_REG_LAST];
 992                 uint64_t regvals[VM_REG_LAST];
 993 
 994                 if (ddi_copyin(datap, &vrs, sizeof (vrs), md)) {
 995                         error = EFAULT;
 996                         break;
 997                 }
 998                 if (vrs.count > VM_REG_LAST || vrs.count == 0) {
 999                         error = EINVAL;
1000                         break;
1001                 }
1002                 if (ddi_copyin(vrs.regnums, regnums,
1003                     sizeof (int) * vrs.count, md)) {
1004                         error = EFAULT;
1005                         break;
1006                 }
1007                 if (ddi_copyin(vrs.regvals, regvals,
1008                     sizeof (uint64_t) * vrs.count, md)) {
1009                         error = EFAULT;
1010                         break;
1011                 }
1012 
1013                 error = 0;
1014                 for (uint_t i = 0; i < vrs.count && error == 0; i++) {
1015                         /*
1016                          * Setting registers in a set is not atomic, since a
1017                          * failure in the middle of the set will cause a
1018                          * bail-out and inconsistent register state.  Callers
1019                          * should be wary of this.
1020                          */
1021                         if (regnums[i] < 0) {
1022                                 error = EINVAL;
1023                                 break;
1024                         }
1025                         error = vm_set_register(sc->vmm_vm, vcpu, regnums[i],
1026                             regvals[i]);
1027                 }
1028                 break;
1029         }
1030         case VM_RESET_CPU: {
1031                 struct vm_vcpu_reset vvr;
1032 
1033                 if (ddi_copyin(datap, &vvr, sizeof (vvr), md)) {
1034                         error = EFAULT;
1035                         break;
1036                 }
1037                 if (vvr.kind != VRK_RESET && vvr.kind != VRK_INIT) {
1038                         error = EINVAL;
1039                 }
1040 
1041                 error = vcpu_arch_reset(sc->vmm_vm, vcpu, vvr.kind == VRK_INIT);
1042                 break;
1043         }
1044         case VM_GET_RUN_STATE: {
1045                 struct vm_run_state vrs;
1046 
1047                 bzero(&vrs, sizeof (vrs));
1048                 error = vm_get_run_state(sc->vmm_vm, vcpu, &vrs.state,
1049                     &vrs.sipi_vector);
1050                 if (error == 0) {
1051                         if (ddi_copyout(&vrs, datap, sizeof (vrs), md)) {
1052                                 error = EFAULT;
1053                                 break;
1054                         }
1055                 }
1056                 break;
1057         }
1058         case VM_SET_RUN_STATE: {
1059                 struct vm_run_state vrs;
1060 
1061                 if (ddi_copyin(datap, &vrs, sizeof (vrs), md)) {
1062                         error = EFAULT;
1063                         break;
1064                 }
1065                 error = vm_set_run_state(sc->vmm_vm, vcpu, vrs.state,
1066                     vrs.sipi_vector);
1067                 break;
1068         }
1069 
1070         case VM_SET_KERNEMU_DEV:
1071         case VM_GET_KERNEMU_DEV: {
1072                 struct vm_readwrite_kernemu_device kemu;
1073                 size_t size = 0;
1074 
1075                 if (ddi_copyin(datap, &kemu, sizeof (kemu), md)) {
1076                         error = EFAULT;
1077                         break;
1078                 }
1079 
1080                 if (kemu.access_width > 3) {
1081                         error = EINVAL;
1082                         break;
1083                 }
1084                 size = (1 << kemu.access_width);
1085                 ASSERT(size >= 1 && size <= 8);
1086 
1087                 if (cmd == VM_SET_KERNEMU_DEV) {
1088                         error = vm_service_mmio_write(sc->vmm_vm, vcpu,
1089                             kemu.gpa, kemu.value, size);
1090                 } else {
1091                         error = vm_service_mmio_read(sc->vmm_vm, vcpu,
1092                             kemu.gpa, &kemu.value, size);
1093                 }
1094 
1095                 if (error == 0) {
1096                         if (ddi_copyout(&kemu, datap, sizeof (kemu), md)) {
1097                                 error = EFAULT;
1098                                 break;
1099                         }
1100                 }
1101                 break;
1102         }
1103 
1104         case VM_GET_CAPABILITY: {
1105                 struct vm_capability vmcap;
1106 
1107                 if (ddi_copyin(datap, &vmcap, sizeof (vmcap), md)) {
1108                         error = EFAULT;
1109                         break;
1110                 }
1111                 error = vm_get_capability(sc->vmm_vm, vcpu, vmcap.captype,
1112                     &vmcap.capval);
1113                 if (error == 0 &&
1114                     ddi_copyout(&vmcap, datap, sizeof (vmcap), md)) {
1115                         error = EFAULT;
1116                         break;
1117                 }
1118                 break;
1119         }
1120         case VM_SET_CAPABILITY: {
1121                 struct vm_capability vmcap;
1122 
1123                 if (ddi_copyin(datap, &vmcap, sizeof (vmcap), md)) {
1124                         error = EFAULT;
1125                         break;
1126                 }
1127                 error = vm_set_capability(sc->vmm_vm, vcpu, vmcap.captype,
1128                     vmcap.capval);
1129                 break;
1130         }
1131         case VM_SET_X2APIC_STATE: {
1132                 struct vm_x2apic x2apic;
1133 
1134                 if (ddi_copyin(datap, &x2apic, sizeof (x2apic), md)) {
1135                         error = EFAULT;
1136                         break;
1137                 }
1138                 error = vm_set_x2apic_state(sc->vmm_vm, vcpu, x2apic.state);
1139                 break;
1140         }
1141         case VM_GET_X2APIC_STATE: {
1142                 struct vm_x2apic x2apic;
1143 
1144                 if (ddi_copyin(datap, &x2apic, sizeof (x2apic), md)) {
1145                         error = EFAULT;
1146                         break;
1147                 }
1148                 error = vm_get_x2apic_state(sc->vmm_vm, x2apic.cpuid,
1149                     &x2apic.state);
1150                 if (error == 0 &&
1151                     ddi_copyout(&x2apic, datap, sizeof (x2apic), md)) {
1152                         error = EFAULT;
1153                         break;
1154                 }
1155                 break;
1156         }
1157         case VM_GET_GPA_PMAP: {
1158                 struct vm_gpa_pte gpapte;
1159 
1160                 if (ddi_copyin(datap, &gpapte, sizeof (gpapte), md)) {
1161                         error = EFAULT;
1162                         break;
1163                 }
1164 #ifdef __FreeBSD__
1165                 /* XXXJOY: add function? */
1166                 pmap_get_mapping(vmspace_pmap(vm_get_vmspace(sc->vmm_vm)),
1167                     gpapte.gpa, gpapte.pte, &gpapte.ptenum);
1168 #endif
1169                 error = 0;
1170                 break;
1171         }
1172         case VM_GET_HPET_CAPABILITIES: {
1173                 struct vm_hpet_cap hpetcap;
1174 
1175                 error = vhpet_getcap(&hpetcap);
1176                 if (error == 0 &&
1177                     ddi_copyout(&hpetcap, datap, sizeof (hpetcap), md)) {
1178                         error = EFAULT;
1179                         break;
1180                 }
1181                 break;
1182         }
1183         case VM_GLA2GPA: {
1184                 struct vm_gla2gpa gg;
1185 
1186                 if (ddi_copyin(datap, &gg, sizeof (gg), md)) {
1187                         error = EFAULT;
1188                         break;
1189                 }
1190                 gg.vcpuid = vcpu;
1191                 error = vm_gla2gpa(sc->vmm_vm, vcpu, &gg.paging, gg.gla,
1192                     gg.prot, &gg.gpa, &gg.fault);
1193                 if (error == 0 && ddi_copyout(&gg, datap, sizeof (gg), md)) {
1194                         error = EFAULT;
1195                         break;
1196                 }
1197                 break;
1198         }
1199         case VM_GLA2GPA_NOFAULT: {
1200                 struct vm_gla2gpa gg;
1201 
1202                 if (ddi_copyin(datap, &gg, sizeof (gg), md)) {
1203                         error = EFAULT;
1204                         break;
1205                 }
1206                 gg.vcpuid = vcpu;
1207                 error = vm_gla2gpa_nofault(sc->vmm_vm, vcpu, &gg.paging,
1208                     gg.gla, gg.prot, &gg.gpa, &gg.fault);
1209                 if (error == 0 && ddi_copyout(&gg, datap, sizeof (gg), md)) {
1210                         error = EFAULT;
1211                         break;
1212                 }
1213                 break;
1214         }
1215 
1216         case VM_ACTIVATE_CPU:
1217                 error = vm_activate_cpu(sc->vmm_vm, vcpu);
1218                 break;
1219 
1220         case VM_SUSPEND_CPU:
1221                 if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) {
1222                         error = EFAULT;
1223                 } else {
1224                         error = vm_suspend_cpu(sc->vmm_vm, vcpu);
1225                 }
1226                 break;
1227 
1228         case VM_RESUME_CPU:
1229                 if (ddi_copyin(datap, &vcpu, sizeof (vcpu), md)) {
1230                         error = EFAULT;
1231                 } else {
1232                         error = vm_resume_cpu(sc->vmm_vm, vcpu);
1233                 }
1234                 break;
1235 
1236         case VM_GET_CPUS: {
1237                 struct vm_cpuset vm_cpuset;
1238                 cpuset_t tempset;
1239                 void *srcp = &tempset;
1240                 int size;
1241 
1242                 if (ddi_copyin(datap, &vm_cpuset, sizeof (vm_cpuset), md)) {
1243                         error = EFAULT;
1244                         break;
1245                 }
1246 
1247                 /* Be more generous about sizing since our cpuset_t is large. */
1248                 size = vm_cpuset.cpusetsize;
1249                 if (size <= 0 || size > sizeof (cpuset_t)) {
1250                         error = ERANGE;
1251                 }
1252                 /*
1253                  * If they want a ulong_t or less, make sure they receive the
1254                  * low bits with all the useful information.
1255                  */
1256                 if (size <= sizeof (tempset.cpub[0])) {
1257                         srcp = &tempset.cpub[0];
1258                 }
1259 
1260                 if (vm_cpuset.which == VM_ACTIVE_CPUS) {
1261                         tempset = vm_active_cpus(sc->vmm_vm);
1262                 } else if (vm_cpuset.which == VM_SUSPENDED_CPUS) {
1263                         tempset = vm_suspended_cpus(sc->vmm_vm);
1264                 } else if (vm_cpuset.which == VM_DEBUG_CPUS) {
1265                         tempset = vm_debug_cpus(sc->vmm_vm);
1266                 } else {
1267                         error = EINVAL;
1268                 }
1269 
1270                 ASSERT(size > 0 && size <= sizeof (tempset));
1271                 if (error == 0 &&
1272                     ddi_copyout(srcp, vm_cpuset.cpus, size, md)) {
1273                         error = EFAULT;
1274                         break;
1275                 }
1276                 break;
1277         }
1278         case VM_SET_INTINFO: {
1279                 struct vm_intinfo vmii;
1280 
1281                 if (ddi_copyin(datap, &vmii, sizeof (vmii), md)) {
1282                         error = EFAULT;
1283                         break;
1284                 }
1285                 error = vm_exit_intinfo(sc->vmm_vm, vcpu, vmii.info1);
1286                 break;
1287         }
1288         case VM_GET_INTINFO: {
1289                 struct vm_intinfo vmii;
1290 
1291                 vmii.vcpuid = vcpu;
1292                 error = vm_get_intinfo(sc->vmm_vm, vcpu, &vmii.info1,
1293                     &vmii.info2);
1294                 if (error == 0 &&
1295                     ddi_copyout(&vmii, datap, sizeof (vmii), md)) {
1296                         error = EFAULT;
1297                         break;
1298                 }
1299                 break;
1300         }
1301         case VM_RTC_WRITE: {
1302                 struct vm_rtc_data rtcdata;
1303 
1304                 if (ddi_copyin(datap, &rtcdata, sizeof (rtcdata), md)) {
1305                         error = EFAULT;
1306                         break;
1307                 }
1308                 error = vrtc_nvram_write(sc->vmm_vm, rtcdata.offset,
1309                     rtcdata.value);
1310                 break;
1311         }
1312         case VM_RTC_READ: {
1313                 struct vm_rtc_data rtcdata;
1314 
1315                 if (ddi_copyin(datap, &rtcdata, sizeof (rtcdata), md)) {
1316                         error = EFAULT;
1317                         break;
1318                 }
1319                 error = vrtc_nvram_read(sc->vmm_vm, rtcdata.offset,
1320                     &rtcdata.value);
1321                 if (error == 0 &&
1322                     ddi_copyout(&rtcdata, datap, sizeof (rtcdata), md)) {
1323                         error = EFAULT;
1324                         break;
1325                 }
1326                 break;
1327         }
1328         case VM_RTC_SETTIME: {
1329                 struct vm_rtc_time rtctime;
1330 
1331                 if (ddi_copyin(datap, &rtctime, sizeof (rtctime), md)) {
1332                         error = EFAULT;
1333                         break;
1334                 }
1335                 error = vrtc_set_time(sc->vmm_vm, rtctime.secs);
1336                 break;
1337         }
1338         case VM_RTC_GETTIME: {
1339                 struct vm_rtc_time rtctime;
1340 
1341                 rtctime.secs = vrtc_get_time(sc->vmm_vm);
1342                 if (ddi_copyout(&rtctime, datap, sizeof (rtctime), md)) {
1343                         error = EFAULT;
1344                         break;
1345                 }
1346                 break;
1347         }
1348 
1349         case VM_PMTMR_LOCATE: {
1350                 uint16_t port = arg;
1351                 error = vpmtmr_set_location(sc->vmm_vm, port);
1352                 break;
1353         }
1354 
1355         case VM_RESTART_INSTRUCTION:
1356                 error = vm_restart_instruction(sc->vmm_vm, vcpu);
1357                 break;
1358 
1359         case VM_SET_TOPOLOGY: {
1360                 struct vm_cpu_topology topo;
1361 
1362                 if (ddi_copyin(datap, &topo, sizeof (topo), md) != 0) {
1363                         error = EFAULT;
1364                         break;
1365                 }
1366                 error = vm_set_topology(sc->vmm_vm, topo.sockets, topo.cores,
1367                     topo.threads, topo.maxcpus);
1368                 break;
1369         }
1370         case VM_GET_TOPOLOGY: {
1371                 struct vm_cpu_topology topo;
1372 
1373                 vm_get_topology(sc->vmm_vm, &topo.sockets, &topo.cores,
1374                     &topo.threads, &topo.maxcpus);
1375                 if (ddi_copyout(&topo, datap, sizeof (topo), md) != 0) {
1376                         error = EFAULT;
1377                         break;
1378                 }
1379                 break;
1380         }
1381 
1382         case VM_DEVMEM_GETOFFSET: {
1383                 struct vm_devmem_offset vdo;
1384                 list_t *dl = &sc->vmm_devmem_list;
1385                 vmm_devmem_entry_t *de = NULL;
1386 
1387                 if (ddi_copyin(datap, &vdo, sizeof (vdo), md) != 0) {
1388                         error = EFAULT;
1389                         break;
1390                 }
1391 
1392                 for (de = list_head(dl); de != NULL; de = list_next(dl, de)) {
1393                         if (de->vde_segid == vdo.segid) {
1394                                 break;
1395                         }
1396                 }
1397                 if (de != NULL) {
1398                         vdo.offset = de->vde_off;
1399                         if (ddi_copyout(&vdo, datap, sizeof (vdo), md) != 0) {
1400                                 error = EFAULT;
1401                         }
1402                 } else {
1403                         error = ENOENT;
1404                 }
1405                 break;
1406         }
1407         case VM_WRLOCK_CYCLE: {
1408                 /*
1409                  * Present a test mechanism to acquire/release the write lock
1410                  * on the VM without any other effects.
1411                  */
1412                 break;
1413         }
1414 
1415         default:
1416                 error = ENOTTY;
1417                 break;
1418         }
1419 
1420         /* Release exclusion resources */
1421         switch (lock_type) {
1422         case LOCK_NONE:
1423                 break;
1424         case LOCK_VCPU:
1425                 vcpu_unlock_one(sc, vcpu);
1426                 break;
1427         case LOCK_READ_HOLD:
1428                 vmm_read_unlock(sc);
1429                 break;
1430         case LOCK_WRITE_HOLD:
1431                 vmm_write_unlock(sc);
1432                 break;
1433         default:
1434                 panic("unexpected lock type");
1435                 break;
1436         }
1437 
1438         return (error);
1439 }
1440 
1441 static vmm_softc_t *
1442 vmm_lookup(const char *name)
1443 {
1444         list_t *vml = &vmm_list;
1445         vmm_softc_t *sc;
1446 
1447         ASSERT(MUTEX_HELD(&vmm_mtx));
1448 
1449         for (sc = list_head(vml); sc != NULL; sc = list_next(vml, sc)) {
1450                 if (strcmp(sc->vmm_name, name) == 0) {
1451                         break;
1452                 }
1453         }
1454 
1455         return (sc);
1456 }
1457 
1458 /*
1459  * Acquire an HMA registration if not already held.
1460  */
1461 static boolean_t
1462 vmm_hma_acquire(void)
1463 {
1464         ASSERT(MUTEX_NOT_HELD(&vmm_mtx));
1465 
1466         mutex_enter(&vmmdev_mtx);
1467 
1468         if (vmmdev_hma_reg == NULL) {
1469                 VERIFY3U(vmmdev_hma_ref, ==, 0);
1470                 vmmdev_hma_reg = hma_register(vmmdev_hvm_name);
1471                 if (vmmdev_hma_reg == NULL) {
1472                         cmn_err(CE_WARN, "%s HMA registration failed.",
1473                             vmmdev_hvm_name);
1474                         mutex_exit(&vmmdev_mtx);
1475                         return (B_FALSE);
1476                 }
1477         }
1478 
1479         vmmdev_hma_ref++;
1480 
1481         mutex_exit(&vmmdev_mtx);
1482 
1483         return (B_TRUE);
1484 }
1485 
1486 /*
1487  * Release the HMA registration if held and there are no remaining VMs.
1488  */
1489 static void
1490 vmm_hma_release(void)
1491 {
1492         ASSERT(MUTEX_NOT_HELD(&vmm_mtx));
1493 
1494         mutex_enter(&vmmdev_mtx);
1495 
1496         VERIFY3U(vmmdev_hma_ref, !=, 0);
1497 
1498         vmmdev_hma_ref--;
1499 
1500         if (vmmdev_hma_ref == 0) {
1501                 VERIFY(vmmdev_hma_reg != NULL);
1502                 hma_unregister(vmmdev_hma_reg);
1503                 vmmdev_hma_reg = NULL;
1504         }
1505         mutex_exit(&vmmdev_mtx);
1506 }
1507 
1508 static int
1509 vmmdev_do_vm_create(char *name, cred_t *cr)
1510 {
1511         vmm_softc_t     *sc = NULL;
1512         minor_t         minor;
1513         int             error = ENOMEM;
1514 
1515         if (strnlen(name, VM_MAX_NAMELEN) >= VM_MAX_NAMELEN) {
1516                 return (EINVAL);
1517         }
1518 
1519         if (!vmm_hma_acquire())
1520                 return (ENXIO);
1521 
1522         mutex_enter(&vmm_mtx);
1523 
1524         /* Look for duplicate names */
1525         if (vmm_lookup(name) != NULL) {
1526                 mutex_exit(&vmm_mtx);
1527                 vmm_hma_release();
1528                 return (EEXIST);
1529         }
1530 
1531         /* Allow only one instance per non-global zone. */
1532         if (!INGLOBALZONE(curproc)) {
1533                 for (sc = list_head(&vmm_list); sc != NULL;
1534                     sc = list_next(&vmm_list, sc)) {
1535                         if (sc->vmm_zone == curzone) {
1536                                 mutex_exit(&vmm_mtx);
1537                                 vmm_hma_release();
1538                                 return (EINVAL);
1539                         }
1540                 }
1541         }
1542 
1543         minor = id_alloc(vmm_minors);
1544         if (ddi_soft_state_zalloc(vmm_statep, minor) != DDI_SUCCESS) {
1545                 goto fail;
1546         } else if ((sc = ddi_get_soft_state(vmm_statep, minor)) == NULL) {
1547                 ddi_soft_state_free(vmm_statep, minor);
1548                 goto fail;
1549         } else if (ddi_create_minor_node(vmmdev_dip, name, S_IFCHR, minor,
1550             DDI_PSEUDO, 0) != DDI_SUCCESS) {
1551                 goto fail;
1552         }
1553 
1554         if (vmm_kstat_alloc(sc, minor, cr) != 0) {
1555                 goto fail;
1556         }
1557 
1558         error = vm_create(name, &sc->vmm_vm);
1559         if (error == 0) {
1560                 /* Complete VM intialization and report success. */
1561                 (void) strlcpy(sc->vmm_name, name, sizeof (sc->vmm_name));
1562                 sc->vmm_minor = minor;
1563                 list_create(&sc->vmm_devmem_list, sizeof (vmm_devmem_entry_t),
1564                     offsetof(vmm_devmem_entry_t, vde_node));
1565 
1566                 list_create(&sc->vmm_holds, sizeof (vmm_hold_t),
1567                     offsetof(vmm_hold_t, vmh_node));
1568                 cv_init(&sc->vmm_cv, NULL, CV_DEFAULT, NULL);
1569 
1570                 mutex_init(&sc->vmm_lease_lock, NULL, MUTEX_DEFAULT, NULL);
1571                 list_create(&sc->vmm_lease_list, sizeof (vmm_lease_t),
1572                     offsetof(vmm_lease_t, vml_node));
1573                 cv_init(&sc->vmm_lease_cv, NULL, CV_DEFAULT, NULL);
1574                 rw_init(&sc->vmm_rwlock, NULL, RW_DEFAULT, NULL);
1575 
1576                 sc->vmm_zone = crgetzone(cr);
1577                 zone_hold(sc->vmm_zone);
1578                 vmm_zsd_add_vm(sc);
1579                 vmm_kstat_init(sc);
1580 
1581                 list_insert_tail(&vmm_list, sc);
1582                 mutex_exit(&vmm_mtx);
1583                 return (0);
1584         }
1585 
1586         vmm_kstat_fini(sc);
1587         ddi_remove_minor_node(vmmdev_dip, name);
1588 fail:
1589         id_free(vmm_minors, minor);
1590         if (sc != NULL) {
1591                 ddi_soft_state_free(vmm_statep, minor);
1592         }
1593         mutex_exit(&vmm_mtx);
1594         vmm_hma_release();
1595 
1596         return (error);
1597 }
1598 
1599 /*
1600  * Bhyve 'Driver' Interface
1601  *
1602  * While many devices are emulated in the bhyve userspace process, there are
1603  * others with performance constraints which require that they run mostly or
1604  * entirely in-kernel.  For those not integrated directly into bhyve, an API is
1605  * needed so they can query/manipulate the portions of VM state needed to
1606  * fulfill their purpose.
1607  *
1608  * This includes:
1609  * - Translating guest-physical addresses to host-virtual pointers
1610  * - Injecting MSIs
1611  * - Hooking IO port addresses
1612  *
1613  * The vmm_drv interface exists to provide that functionality to its consumers.
1614  * (At this time, 'viona' is the only user)
1615  */
1616 int
1617 vmm_drv_hold(file_t *fp, cred_t *cr, vmm_hold_t **holdp)
1618 {
1619         vnode_t *vp = fp->f_vnode;
1620         const dev_t dev = vp->v_rdev;
1621         vmm_softc_t *sc;
1622         vmm_hold_t *hold;
1623         int err = 0;
1624 
1625         if (vp->v_type != VCHR) {
1626                 return (ENXIO);
1627         }
1628         const major_t major = getmajor(dev);
1629         const minor_t minor = getminor(dev);
1630 
1631         mutex_enter(&vmmdev_mtx);
1632         if (vmmdev_dip == NULL || major != ddi_driver_major(vmmdev_dip)) {
1633                 mutex_exit(&vmmdev_mtx);
1634                 return (ENOENT);
1635         }
1636         mutex_enter(&vmm_mtx);
1637         mutex_exit(&vmmdev_mtx);
1638 
1639         if ((sc = ddi_get_soft_state(vmm_statep, minor)) == NULL) {
1640                 err = ENOENT;
1641                 goto out;
1642         }
1643         /* XXXJOY: check cred permissions against instance */
1644 
1645         if ((sc->vmm_flags & (VMM_CLEANUP|VMM_PURGED|VMM_DESTROY)) != 0) {
1646                 err = EBUSY;
1647                 goto out;
1648         }
1649 
1650         hold = kmem_zalloc(sizeof (*hold), KM_SLEEP);
1651         hold->vmh_sc = sc;
1652         hold->vmh_release_req = B_FALSE;
1653 
1654         list_insert_tail(&sc->vmm_holds, hold);
1655         sc->vmm_flags |= VMM_HELD;
1656         *holdp = hold;
1657 
1658 out:
1659         mutex_exit(&vmm_mtx);
1660         return (err);
1661 }
1662 
1663 void
1664 vmm_drv_rele(vmm_hold_t *hold)
1665 {
1666         vmm_softc_t *sc;
1667 
1668         ASSERT(hold != NULL);
1669         ASSERT(hold->vmh_sc != NULL);
1670         VERIFY(hold->vmh_ioport_hook_cnt == 0);
1671 
1672         mutex_enter(&vmm_mtx);
1673         sc = hold->vmh_sc;
1674         list_remove(&sc->vmm_holds, hold);
1675         if (list_is_empty(&sc->vmm_holds)) {
1676                 sc->vmm_flags &= ~VMM_HELD;
1677                 cv_broadcast(&sc->vmm_cv);
1678         }
1679         mutex_exit(&vmm_mtx);
1680         kmem_free(hold, sizeof (*hold));
1681 }
1682 
1683 boolean_t
1684 vmm_drv_release_reqd(vmm_hold_t *hold)
1685 {
1686         ASSERT(hold != NULL);
1687 
1688         return (hold->vmh_release_req);
1689 }
1690 
1691 vmm_lease_t *
1692 vmm_drv_lease_sign(vmm_hold_t *hold, boolean_t (*expiref)(void *), void *arg)
1693 {
1694         vmm_softc_t *sc = hold->vmh_sc;
1695         vmm_lease_t *lease;
1696 
1697         ASSERT3P(expiref, !=, NULL);
1698 
1699         if (hold->vmh_release_req) {
1700                 return (NULL);
1701         }
1702 
1703         lease = kmem_alloc(sizeof (*lease), KM_SLEEP);
1704         list_link_init(&lease->vml_node);
1705         lease->vml_expire_func = expiref;
1706         lease->vml_expire_arg = arg;
1707         lease->vml_expired = B_FALSE;
1708         lease->vml_hold = hold;
1709         /* cache the VM pointer for one less pointer chase */
1710         lease->vml_vm = sc->vmm_vm;
1711 
1712         mutex_enter(&sc->vmm_lease_lock);
1713         while (sc->vmm_lease_blocker != 0) {
1714                 cv_wait(&sc->vmm_lease_cv, &sc->vmm_lease_lock);
1715         }
1716         list_insert_tail(&sc->vmm_lease_list, lease);
1717         vmm_read_lock(sc);
1718         mutex_exit(&sc->vmm_lease_lock);
1719 
1720         return (lease);
1721 }
1722 
1723 static void
1724 vmm_lease_break_locked(vmm_softc_t *sc, vmm_lease_t *lease)
1725 {
1726         ASSERT(MUTEX_HELD(&sc->vmm_lease_lock));
1727 
1728         list_remove(&sc->vmm_lease_list, lease);
1729         vmm_read_unlock(sc);
1730         kmem_free(lease, sizeof (*lease));
1731 }
1732 
1733 void
1734 vmm_drv_lease_break(vmm_hold_t *hold, vmm_lease_t *lease)
1735 {
1736         vmm_softc_t *sc = hold->vmh_sc;
1737 
1738         VERIFY3P(hold, ==, lease->vml_hold);
1739 
1740         mutex_enter(&sc->vmm_lease_lock);
1741         vmm_lease_break_locked(sc, lease);
1742         mutex_exit(&sc->vmm_lease_lock);
1743 }
1744 
1745 boolean_t
1746 vmm_drv_lease_expired(vmm_lease_t *lease)
1747 {
1748         return (lease->vml_expired);
1749 }
1750 
1751 void *
1752 vmm_drv_gpa2kva(vmm_lease_t *lease, uintptr_t gpa, size_t sz)
1753 {
1754         ASSERT(lease != NULL);
1755 
1756         return (vmspace_find_kva(vm_get_vmspace(lease->vml_vm), gpa, sz));
1757 }
1758 
1759 int
1760 vmm_drv_msi(vmm_lease_t *lease, uint64_t addr, uint64_t msg)
1761 {
1762         ASSERT(lease != NULL);
1763 
1764         return (lapic_intr_msi(lease->vml_vm, addr, msg));
1765 }
1766 
1767 int
1768 vmm_drv_ioport_hook(vmm_hold_t *hold, uint16_t ioport, vmm_drv_iop_cb_t func,
1769     void *arg, void **cookie)
1770 {
1771         vmm_softc_t *sc;
1772         int err;
1773 
1774         ASSERT(hold != NULL);
1775         ASSERT(cookie != NULL);
1776 
1777         sc = hold->vmh_sc;
1778         mutex_enter(&vmm_mtx);
1779         /* Confirm that hook installation is not blocked */
1780         if ((sc->vmm_flags & VMM_BLOCK_HOOK) != 0) {
1781                 mutex_exit(&vmm_mtx);
1782                 return (EBUSY);
1783         }
1784         /*
1785          * Optimistically record an installed hook which will prevent a block
1786          * from being asserted while the mutex is dropped.
1787          */
1788         hold->vmh_ioport_hook_cnt++;
1789         mutex_exit(&vmm_mtx);
1790 
1791         vmm_write_lock(sc);
1792         err = vm_ioport_hook(sc->vmm_vm, ioport, (ioport_handler_t)func,
1793             arg, cookie);
1794         vmm_write_unlock(sc);
1795 
1796         if (err != 0) {
1797                 mutex_enter(&vmm_mtx);
1798                 /* Walk back optimism about the hook installation */
1799                 hold->vmh_ioport_hook_cnt--;
1800                 mutex_exit(&vmm_mtx);
1801         }
1802         return (err);
1803 }
1804 
1805 void
1806 vmm_drv_ioport_unhook(vmm_hold_t *hold, void **cookie)
1807 {
1808         vmm_softc_t *sc;
1809 
1810         ASSERT(hold != NULL);
1811         ASSERT(cookie != NULL);
1812         ASSERT(hold->vmh_ioport_hook_cnt != 0);
1813 
1814         sc = hold->vmh_sc;
1815         vmm_write_lock(sc);
1816         vm_ioport_unhook(sc->vmm_vm, cookie);
1817         vmm_write_unlock(sc);
1818 
1819         mutex_enter(&vmm_mtx);
1820         hold->vmh_ioport_hook_cnt--;
1821         mutex_exit(&vmm_mtx);
1822 }
1823 
1824 static int
1825 vmm_drv_purge(vmm_softc_t *sc)
1826 {
1827         ASSERT(MUTEX_HELD(&vmm_mtx));
1828 
1829         if ((sc->vmm_flags & VMM_HELD) != 0) {
1830                 vmm_hold_t *hold;
1831 
1832                 sc->vmm_flags |= VMM_CLEANUP;
1833                 for (hold = list_head(&sc->vmm_holds); hold != NULL;
1834                     hold = list_next(&sc->vmm_holds, hold)) {
1835                         hold->vmh_release_req = B_TRUE;
1836                 }
1837                 while ((sc->vmm_flags & VMM_HELD) != 0) {
1838                         if (cv_wait_sig(&sc->vmm_cv, &vmm_mtx) <= 0) {
1839                                 return (EINTR);
1840                         }
1841                 }
1842                 sc->vmm_flags &= ~VMM_CLEANUP;
1843         }
1844 
1845         VERIFY(list_is_empty(&sc->vmm_holds));
1846         sc->vmm_flags |= VMM_PURGED;
1847         return (0);
1848 }
1849 
1850 static int
1851 vmm_drv_block_hook(vmm_softc_t *sc, boolean_t enable_block)
1852 {
1853         int err = 0;
1854 
1855         mutex_enter(&vmm_mtx);
1856         if (!enable_block) {
1857                 VERIFY((sc->vmm_flags & VMM_BLOCK_HOOK) != 0);
1858 
1859                 sc->vmm_flags &= ~VMM_BLOCK_HOOK;
1860                 goto done;
1861         }
1862 
1863         /* If any holds have hooks installed, the block is a failure */
1864         if (!list_is_empty(&sc->vmm_holds)) {
1865                 vmm_hold_t *hold;
1866 
1867                 for (hold = list_head(&sc->vmm_holds); hold != NULL;
1868                     hold = list_next(&sc->vmm_holds, hold)) {
1869                         if (hold->vmh_ioport_hook_cnt != 0) {
1870                                 err = EBUSY;
1871                                 goto done;
1872                         }
1873                 }
1874         }
1875         sc->vmm_flags |= VMM_BLOCK_HOOK;
1876 
1877 done:
1878         mutex_exit(&vmm_mtx);
1879         return (err);
1880 }
1881 
1882 static int
1883 vmm_do_vm_destroy_locked(vmm_softc_t *sc, boolean_t clean_zsd,
1884     boolean_t *hma_release)
1885 {
1886         dev_info_t      *pdip = ddi_get_parent(vmmdev_dip);
1887         minor_t         minor;
1888 
1889         ASSERT(MUTEX_HELD(&vmm_mtx));
1890 
1891         *hma_release = B_FALSE;
1892 
1893         if (vmm_drv_purge(sc) != 0) {
1894                 return (EINTR);
1895         }
1896 
1897         if (clean_zsd) {
1898                 vmm_zsd_rem_vm(sc);
1899         }
1900 
1901         /* Clean up devmem entries */
1902         vmmdev_devmem_purge(sc);
1903 
1904         list_remove(&vmm_list, sc);
1905         ddi_remove_minor_node(vmmdev_dip, sc->vmm_name);
1906         minor = sc->vmm_minor;
1907         zone_rele(sc->vmm_zone);
1908         if (sc->vmm_is_open) {
1909                 list_insert_tail(&vmm_destroy_list, sc);
1910                 sc->vmm_flags |= VMM_DESTROY;
1911         } else {
1912                 vm_destroy(sc->vmm_vm);
1913                 vmm_kstat_fini(sc);
1914                 ddi_soft_state_free(vmm_statep, minor);
1915                 id_free(vmm_minors, minor);
1916                 *hma_release = B_TRUE;
1917         }
1918         (void) devfs_clean(pdip, NULL, DV_CLEAN_FORCE);
1919 
1920         return (0);
1921 }
1922 
1923 int
1924 vmm_do_vm_destroy(vmm_softc_t *sc, boolean_t clean_zsd)
1925 {
1926         boolean_t       hma_release = B_FALSE;
1927         int             err;
1928 
1929         mutex_enter(&vmm_mtx);
1930         err = vmm_do_vm_destroy_locked(sc, clean_zsd, &hma_release);
1931         mutex_exit(&vmm_mtx);
1932 
1933         if (hma_release)
1934                 vmm_hma_release();
1935 
1936         return (err);
1937 }
1938 
1939 /* ARGSUSED */
1940 static int
1941 vmmdev_do_vm_destroy(const char *name, cred_t *cr)
1942 {
1943         boolean_t       hma_release = B_FALSE;
1944         vmm_softc_t     *sc;
1945         int             err;
1946 
1947         if (crgetuid(cr) != 0)
1948                 return (EPERM);
1949 
1950         mutex_enter(&vmm_mtx);
1951 
1952         if ((sc = vmm_lookup(name)) == NULL) {
1953                 mutex_exit(&vmm_mtx);
1954                 return (ENOENT);
1955         }
1956         /*
1957          * We don't check this in vmm_lookup() since that function is also used
1958          * for validation during create and currently vmm names must be unique.
1959          */
1960         if (!INGLOBALZONE(curproc) && sc->vmm_zone != curzone) {
1961                 mutex_exit(&vmm_mtx);
1962                 return (EPERM);
1963         }
1964         err = vmm_do_vm_destroy_locked(sc, B_TRUE, &hma_release);
1965 
1966         mutex_exit(&vmm_mtx);
1967 
1968         if (hma_release)
1969                 vmm_hma_release();
1970 
1971         return (err);
1972 }
1973 
1974 #define VCPU_NAME_BUFLEN        32
1975 
1976 static int
1977 vmm_kstat_alloc(vmm_softc_t *sc, minor_t minor, const cred_t *cr)
1978 {
1979         zoneid_t zid = crgetzoneid(cr);
1980         int instance = minor;
1981         kstat_t *ksp;
1982 
1983         ASSERT3P(sc->vmm_kstat_vm, ==, NULL);
1984 
1985         ksp = kstat_create_zone(VMM_MODULE_NAME, instance, "vm",
1986             VMM_KSTAT_CLASS, KSTAT_TYPE_NAMED,
1987             sizeof (vmm_kstats_t) / sizeof (kstat_named_t), 0, zid);
1988 
1989         if (ksp == NULL) {
1990                 return (-1);
1991         }
1992         sc->vmm_kstat_vm = ksp;
1993 
1994         for (uint_t i = 0; i < VM_MAXCPU; i++) {
1995                 char namebuf[VCPU_NAME_BUFLEN];
1996 
1997                 ASSERT3P(sc->vmm_kstat_vcpu[i], ==, NULL);
1998 
1999                 (void) snprintf(namebuf, VCPU_NAME_BUFLEN, "vcpu%u", i);
2000                 ksp = kstat_create_zone(VMM_MODULE_NAME, instance, namebuf,
2001                     VMM_KSTAT_CLASS, KSTAT_TYPE_NAMED,
2002                     sizeof (vmm_vcpu_kstats_t) / sizeof (kstat_named_t),
2003                     0, zid);
2004                 if (ksp == NULL) {
2005                         goto fail;
2006                 }
2007 
2008                 sc->vmm_kstat_vcpu[i] = ksp;
2009         }
2010 
2011         /*
2012          * If this instance is associated with a non-global zone, make its
2013          * kstats visible from the GZ.
2014          */
2015         if (zid != GLOBAL_ZONEID) {
2016                 kstat_zone_add(sc->vmm_kstat_vm, GLOBAL_ZONEID);
2017                 for (uint_t i = 0; i < VM_MAXCPU; i++) {
2018                         kstat_zone_add(sc->vmm_kstat_vcpu[i], GLOBAL_ZONEID);
2019                 }
2020         }
2021 
2022         return (0);
2023 
2024 fail:
2025         for (uint_t i = 0; i < VM_MAXCPU; i++) {
2026                 if (sc->vmm_kstat_vcpu[i] != NULL) {
2027                         kstat_delete(sc->vmm_kstat_vcpu[i]);
2028                         sc->vmm_kstat_vcpu[i] = NULL;
2029                 } else {
2030                         break;
2031                 }
2032         }
2033         kstat_delete(sc->vmm_kstat_vm);
2034         sc->vmm_kstat_vm = NULL;
2035         return (-1);
2036 }
2037 
2038 static void
2039 vmm_kstat_init(vmm_softc_t *sc)
2040 {
2041         kstat_t *ksp;
2042 
2043         ASSERT3P(sc->vmm_vm, !=, NULL);
2044         ASSERT3P(sc->vmm_kstat_vm, !=, NULL);
2045 
2046         ksp = sc->vmm_kstat_vm;
2047         vmm_kstats_t *vk = ksp->ks_data;
2048         ksp->ks_private = sc->vmm_vm;
2049         kstat_named_init(&vk->vk_name, "vm_name", KSTAT_DATA_STRING);
2050         kstat_named_setstr(&vk->vk_name, sc->vmm_name);
2051 
2052         for (uint_t i = 0; i < VM_MAXCPU; i++) {
2053                 ASSERT3P(sc->vmm_kstat_vcpu[i], !=, NULL);
2054 
2055                 ksp = sc->vmm_kstat_vcpu[i];
2056                 vmm_vcpu_kstats_t *vvk = ksp->ks_data;
2057 
2058                 kstat_named_init(&vvk->vvk_vcpu, "vcpu", KSTAT_DATA_UINT32);
2059                 vvk->vvk_vcpu.value.ui32 = i;
2060                 kstat_named_init(&vvk->vvk_time_init, "time_init",
2061                     KSTAT_DATA_UINT64);
2062                 kstat_named_init(&vvk->vvk_time_run, "time_run",
2063                     KSTAT_DATA_UINT64);
2064                 kstat_named_init(&vvk->vvk_time_idle, "time_idle",
2065                     KSTAT_DATA_UINT64);
2066                 kstat_named_init(&vvk->vvk_time_emu_kern, "time_emu_kern",
2067                     KSTAT_DATA_UINT64);
2068                 kstat_named_init(&vvk->vvk_time_emu_user, "time_emu_user",
2069                     KSTAT_DATA_UINT64);
2070                 kstat_named_init(&vvk->vvk_time_sched, "time_sched",
2071                     KSTAT_DATA_UINT64);
2072                 ksp->ks_private = sc->vmm_vm;
2073                 ksp->ks_update = vmm_kstat_update_vcpu;
2074         }
2075 
2076         kstat_install(sc->vmm_kstat_vm);
2077         for (uint_t i = 0; i < VM_MAXCPU; i++) {
2078                 kstat_install(sc->vmm_kstat_vcpu[i]);
2079         }
2080 }
2081 
2082 static void
2083 vmm_kstat_fini(vmm_softc_t *sc)
2084 {
2085         ASSERT(sc->vmm_kstat_vm != NULL);
2086 
2087         kstat_delete(sc->vmm_kstat_vm);
2088         sc->vmm_kstat_vm = NULL;
2089 
2090         for (uint_t i = 0; i < VM_MAXCPU; i++) {
2091                 ASSERT3P(sc->vmm_kstat_vcpu[i], !=, NULL);
2092 
2093                 kstat_delete(sc->vmm_kstat_vcpu[i]);
2094                 sc->vmm_kstat_vcpu[i] = NULL;
2095         }
2096 }
2097 
2098 static int
2099 vmm_open(dev_t *devp, int flag, int otyp, cred_t *credp)
2100 {
2101         minor_t         minor;
2102         vmm_softc_t     *sc;
2103 
2104         minor = getminor(*devp);
2105         if (minor == VMM_CTL_MINOR) {
2106                 /*
2107                  * Master control device must be opened exclusively.
2108                  */
2109                 if ((flag & FEXCL) != FEXCL || otyp != OTYP_CHR) {
2110                         return (EINVAL);
2111                 }
2112 
2113                 return (0);
2114         }
2115 
2116         mutex_enter(&vmm_mtx);
2117         sc = ddi_get_soft_state(vmm_statep, minor);
2118         if (sc == NULL) {
2119                 mutex_exit(&vmm_mtx);
2120                 return (ENXIO);
2121         }
2122 
2123         sc->vmm_is_open = B_TRUE;
2124         mutex_exit(&vmm_mtx);
2125 
2126         return (0);
2127 }
2128 
2129 static int
2130 vmm_close(dev_t dev, int flag, int otyp, cred_t *credp)
2131 {
2132         minor_t         minor;
2133         vmm_softc_t     *sc;
2134         boolean_t       hma_release = B_FALSE;
2135 
2136         minor = getminor(dev);
2137         if (minor == VMM_CTL_MINOR)
2138                 return (0);
2139 
2140         mutex_enter(&vmm_mtx);
2141         sc = ddi_get_soft_state(vmm_statep, minor);
2142         if (sc == NULL) {
2143                 mutex_exit(&vmm_mtx);
2144                 return (ENXIO);
2145         }
2146 
2147         VERIFY(sc->vmm_is_open);
2148         sc->vmm_is_open = B_FALSE;
2149 
2150         /*
2151          * If this VM was destroyed while the vmm device was open, then
2152          * clean it up now that it is closed.
2153          */
2154         if (sc->vmm_flags & VMM_DESTROY) {
2155                 list_remove(&vmm_destroy_list, sc);
2156                 vm_destroy(sc->vmm_vm);
2157                 ddi_soft_state_free(vmm_statep, minor);
2158                 id_free(vmm_minors, minor);
2159                 hma_release = B_TRUE;
2160         }
2161         mutex_exit(&vmm_mtx);
2162 
2163         if (hma_release)
2164                 vmm_hma_release();
2165 
2166         return (0);
2167 }
2168 
2169 static int
2170 vmm_is_supported(intptr_t arg)
2171 {
2172         int r;
2173         const char *msg;
2174 
2175         if (vmm_is_intel()) {
2176                 r = vmx_x86_supported(&msg);
2177         } else if (vmm_is_svm()) {
2178                 /*
2179                  * HMA already ensured that the features necessary for SVM
2180                  * operation were present and online during vmm_attach().
2181                  */
2182                 r = 0;
2183         } else {
2184                 r = ENXIO;
2185                 msg = "Unsupported CPU vendor";
2186         }
2187 
2188         if (r != 0 && arg != (intptr_t)NULL) {
2189                 if (copyoutstr(msg, (char *)arg, strlen(msg) + 1, NULL) != 0)
2190                         return (EFAULT);
2191         }
2192         return (r);
2193 }
2194 
2195 static int
2196 vmm_ioctl(dev_t dev, int cmd, intptr_t arg, int mode, cred_t *credp,
2197     int *rvalp)
2198 {
2199         vmm_softc_t     *sc;
2200         minor_t         minor;
2201 
2202         /* The structs in bhyve ioctls assume a 64-bit datamodel */
2203         if (ddi_model_convert_from(mode & FMODELS) != DDI_MODEL_NONE) {
2204                 return (ENOTSUP);
2205         }
2206 
2207         minor = getminor(dev);
2208 
2209         if (minor == VMM_CTL_MINOR) {
2210                 void *argp = (void *)arg;
2211                 char name[VM_MAX_NAMELEN] = { 0 };
2212                 size_t len = 0;
2213 
2214                 if ((mode & FKIOCTL) != 0) {
2215                         len = strlcpy(name, argp, sizeof (name));
2216                 } else {
2217                         if (copyinstr(argp, name, sizeof (name), &len) != 0) {
2218                                 return (EFAULT);
2219                         }
2220                 }
2221                 if (len >= VM_MAX_NAMELEN) {
2222                         return (ENAMETOOLONG);
2223                 }
2224 
2225                 switch (cmd) {
2226                 case VMM_CREATE_VM:
2227                         if ((mode & FWRITE) == 0)
2228                                 return (EPERM);
2229                         return (vmmdev_do_vm_create(name, credp));
2230                 case VMM_DESTROY_VM:
2231                         if ((mode & FWRITE) == 0)
2232                                 return (EPERM);
2233                         return (vmmdev_do_vm_destroy(name, credp));
2234                 case VMM_VM_SUPPORTED:
2235                         return (vmm_is_supported(arg));
2236                 default:
2237                         /* No other actions are legal on ctl device */
2238                         return (ENOTTY);
2239                 }
2240         }
2241 
2242         sc = ddi_get_soft_state(vmm_statep, minor);
2243         ASSERT(sc);
2244 
2245         if (sc->vmm_flags & VMM_DESTROY)
2246                 return (ENXIO);
2247 
2248         return (vmmdev_do_ioctl(sc, cmd, arg, mode, credp, rvalp));
2249 }
2250 
2251 static int
2252 vmm_segmap(dev_t dev, off_t off, struct as *as, caddr_t *addrp, off_t len,
2253     unsigned int prot, unsigned int maxprot, unsigned int flags, cred_t *credp)
2254 {
2255         vmm_softc_t *sc;
2256         const minor_t minor = getminor(dev);
2257         struct vm *vm;
2258         int err;
2259         vm_object_t vmo = NULL;
2260         struct vmspace *vms;
2261 
2262         if (minor == VMM_CTL_MINOR) {
2263                 return (ENODEV);
2264         }
2265         if (off < 0 || (off + len) <= 0) {
2266                 return (EINVAL);
2267         }
2268         if ((prot & PROT_USER) == 0) {
2269                 return (EACCES);
2270         }
2271 
2272         sc = ddi_get_soft_state(vmm_statep, minor);
2273         ASSERT(sc);
2274 
2275         if (sc->vmm_flags & VMM_DESTROY)
2276                 return (ENXIO);
2277 
2278         /* Grab read lock on the VM to prevent any changes to the memory map */
2279         vmm_read_lock(sc);
2280 
2281         vm = sc->vmm_vm;
2282         vms = vm_get_vmspace(vm);
2283         if (off >= VM_DEVMEM_START) {
2284                 int segid;
2285                 off_t map_off = 0;
2286 
2287                 /* Mapping a devmem "device" */
2288                 if (!vmmdev_devmem_segid(sc, off, len, &segid, &map_off)) {
2289                         err = ENODEV;
2290                         goto out;
2291                 }
2292                 err = vm_get_memseg(vm, segid, NULL, NULL, &vmo);
2293                 if (err != 0) {
2294                         goto out;
2295                 }
2296                 err = vm_segmap_obj(vmo, map_off, len, as, addrp, prot, maxprot,
2297                     flags);
2298         } else {
2299                 /* Mapping a part of the guest physical space */
2300                 err = vm_segmap_space(vms, off, as, addrp, len, prot, maxprot,
2301                     flags);
2302         }
2303 
2304 
2305 out:
2306         vmm_read_unlock(sc);
2307         return (err);
2308 }
2309 
2310 static sdev_plugin_validate_t
2311 vmm_sdev_validate(sdev_ctx_t ctx)
2312 {
2313         const char *name = sdev_ctx_name(ctx);
2314         vmm_softc_t *sc;
2315         sdev_plugin_validate_t ret;
2316         minor_t minor;
2317 
2318         if (sdev_ctx_vtype(ctx) != VCHR)
2319                 return (SDEV_VTOR_INVALID);
2320 
2321         VERIFY3S(sdev_ctx_minor(ctx, &minor), ==, 0);
2322 
2323         mutex_enter(&vmm_mtx);
2324         if ((sc = vmm_lookup(name)) == NULL)
2325                 ret = SDEV_VTOR_INVALID;
2326         else if (sc->vmm_minor != minor)
2327                 ret = SDEV_VTOR_STALE;
2328         else
2329                 ret = SDEV_VTOR_VALID;
2330         mutex_exit(&vmm_mtx);
2331 
2332         return (ret);
2333 }
2334 
2335 static int
2336 vmm_sdev_filldir(sdev_ctx_t ctx)
2337 {
2338         vmm_softc_t *sc;
2339         int ret;
2340 
2341         if (strcmp(sdev_ctx_path(ctx), VMM_SDEV_ROOT) != 0) {
2342                 cmn_err(CE_WARN, "%s: bad path '%s' != '%s'\n", __func__,
2343                     sdev_ctx_path(ctx), VMM_SDEV_ROOT);
2344                 return (EINVAL);
2345         }
2346 
2347         mutex_enter(&vmm_mtx);
2348         ASSERT(vmmdev_dip != NULL);
2349         for (sc = list_head(&vmm_list); sc != NULL;
2350             sc = list_next(&vmm_list, sc)) {
2351                 if (INGLOBALZONE(curproc) || sc->vmm_zone == curzone) {
2352                         ret = sdev_plugin_mknod(ctx, sc->vmm_name,
2353                             S_IFCHR | 0600,
2354                             makedevice(ddi_driver_major(vmmdev_dip),
2355                             sc->vmm_minor));
2356                 } else {
2357                         continue;
2358                 }
2359                 if (ret != 0 && ret != EEXIST)
2360                         goto out;
2361         }
2362 
2363         ret = 0;
2364 
2365 out:
2366         mutex_exit(&vmm_mtx);
2367         return (ret);
2368 }
2369 
2370 /* ARGSUSED */
2371 static void
2372 vmm_sdev_inactive(sdev_ctx_t ctx)
2373 {
2374 }
2375 
2376 static sdev_plugin_ops_t vmm_sdev_ops = {
2377         .spo_version = SDEV_PLUGIN_VERSION,
2378         .spo_flags = SDEV_PLUGIN_SUBDIR,
2379         .spo_validate = vmm_sdev_validate,
2380         .spo_filldir = vmm_sdev_filldir,
2381         .spo_inactive = vmm_sdev_inactive
2382 };
2383 
2384 /* ARGSUSED */
2385 static int
2386 vmm_info(dev_info_t *dip, ddi_info_cmd_t cmd, void *arg, void **result)
2387 {
2388         int error;
2389 
2390         switch (cmd) {
2391         case DDI_INFO_DEVT2DEVINFO:
2392                 *result = (void *)vmmdev_dip;
2393                 error = DDI_SUCCESS;
2394                 break;
2395         case DDI_INFO_DEVT2INSTANCE:
2396                 *result = (void *)0;
2397                 error = DDI_SUCCESS;
2398                 break;
2399         default:
2400                 error = DDI_FAILURE;
2401                 break;
2402         }
2403         return (error);
2404 }
2405 
2406 static int
2407 vmm_attach(dev_info_t *dip, ddi_attach_cmd_t cmd)
2408 {
2409         sdev_plugin_hdl_t sph;
2410         hma_reg_t *reg = NULL;
2411         boolean_t vmm_loaded = B_FALSE;
2412 
2413         if (cmd != DDI_ATTACH) {
2414                 return (DDI_FAILURE);
2415         }
2416 
2417         mutex_enter(&vmmdev_mtx);
2418         /* Ensure we are not already attached. */
2419         if (vmmdev_dip != NULL) {
2420                 mutex_exit(&vmmdev_mtx);
2421                 return (DDI_FAILURE);
2422         }
2423 
2424         vmm_sol_glue_init();
2425         vmm_arena_init();
2426 
2427         /*
2428          * Perform temporary HMA registration to determine if the system
2429          * is capable.
2430          */
2431         if ((reg = hma_register(vmmdev_hvm_name)) == NULL) {
2432                 goto fail;
2433         } else if (vmm_mod_load() != 0) {
2434                 goto fail;
2435         }
2436         vmm_loaded = B_TRUE;
2437         hma_unregister(reg);
2438         reg = NULL;
2439 
2440         /* Create control node.  Other nodes will be created on demand. */
2441         if (ddi_create_minor_node(dip, "ctl", S_IFCHR,
2442             VMM_CTL_MINOR, DDI_PSEUDO, 0) != 0) {
2443                 goto fail;
2444         }
2445 
2446         sph = sdev_plugin_register(VMM_MODULE_NAME, &vmm_sdev_ops, NULL);
2447         if (sph == (sdev_plugin_hdl_t)NULL) {
2448                 ddi_remove_minor_node(dip, NULL);
2449                 goto fail;
2450         }
2451 
2452         ddi_report_dev(dip);
2453         vmmdev_sdev_hdl = sph;
2454         vmmdev_dip = dip;
2455         mutex_exit(&vmmdev_mtx);
2456         return (DDI_SUCCESS);
2457 
2458 fail:
2459         if (vmm_loaded) {
2460                 VERIFY0(vmm_mod_unload());
2461         }
2462         if (reg != NULL) {
2463                 hma_unregister(reg);
2464         }
2465         vmm_arena_fini();
2466         vmm_sol_glue_cleanup();
2467         mutex_exit(&vmmdev_mtx);
2468         return (DDI_FAILURE);
2469 }
2470 
2471 static int
2472 vmm_detach(dev_info_t *dip, ddi_detach_cmd_t cmd)
2473 {
2474         if (cmd != DDI_DETACH) {
2475                 return (DDI_FAILURE);
2476         }
2477 
2478         /*
2479          * Ensure that all resources have been cleaned up.
2480          *
2481          * To prevent a deadlock with iommu_cleanup() we'll fail the detach if
2482          * vmmdev_mtx is already held. We can't wait for vmmdev_mtx with our
2483          * devinfo locked as iommu_cleanup() tries to recursively lock each
2484          * devinfo, including our own, while holding vmmdev_mtx.
2485          */
2486         if (mutex_tryenter(&vmmdev_mtx) == 0)
2487                 return (DDI_FAILURE);
2488 
2489         mutex_enter(&vmm_mtx);
2490         if (!list_is_empty(&vmm_list) || !list_is_empty(&vmm_destroy_list)) {
2491                 mutex_exit(&vmm_mtx);
2492                 mutex_exit(&vmmdev_mtx);
2493                 return (DDI_FAILURE);
2494         }
2495         mutex_exit(&vmm_mtx);
2496 
2497         VERIFY(vmmdev_sdev_hdl != (sdev_plugin_hdl_t)NULL);
2498         if (sdev_plugin_unregister(vmmdev_sdev_hdl) != 0) {
2499                 mutex_exit(&vmmdev_mtx);
2500                 return (DDI_FAILURE);
2501         }
2502         vmmdev_sdev_hdl = (sdev_plugin_hdl_t)NULL;
2503 
2504         /* Remove the control node. */
2505         ddi_remove_minor_node(dip, "ctl");
2506         vmmdev_dip = NULL;
2507 
2508         VERIFY0(vmm_mod_unload());
2509         VERIFY3U(vmmdev_hma_reg, ==, NULL);
2510         vmm_arena_fini();
2511         vmm_sol_glue_cleanup();
2512 
2513         mutex_exit(&vmmdev_mtx);
2514 
2515         return (DDI_SUCCESS);
2516 }
2517 
2518 static struct cb_ops vmm_cb_ops = {
2519         vmm_open,
2520         vmm_close,
2521         nodev,          /* strategy */
2522         nodev,          /* print */
2523         nodev,          /* dump */
2524         nodev,          /* read */
2525         nodev,          /* write */
2526         vmm_ioctl,
2527         nodev,          /* devmap */
2528         nodev,          /* mmap */
2529         vmm_segmap,
2530         nochpoll,       /* poll */
2531         ddi_prop_op,
2532         NULL,
2533         D_NEW | D_MP | D_DEVMAP
2534 };
2535 
2536 static struct dev_ops vmm_ops = {
2537         DEVO_REV,
2538         0,
2539         vmm_info,
2540         nulldev,        /* identify */
2541         nulldev,        /* probe */
2542         vmm_attach,
2543         vmm_detach,
2544         nodev,          /* reset */
2545         &vmm_cb_ops,
2546         (struct bus_ops *)NULL
2547 };
2548 
2549 static struct modldrv modldrv = {
2550         &mod_driverops,
2551         "bhyve vmm",
2552         &vmm_ops
2553 };
2554 
2555 static struct modlinkage modlinkage = {
2556         MODREV_1,
2557         &modldrv,
2558         NULL
2559 };
2560 
2561 int
2562 _init(void)
2563 {
2564         int     error;
2565 
2566         sysinit();
2567 
2568         mutex_init(&vmmdev_mtx, NULL, MUTEX_DRIVER, NULL);
2569         mutex_init(&vmm_mtx, NULL, MUTEX_DRIVER, NULL);
2570         list_create(&vmm_list, sizeof (vmm_softc_t),
2571             offsetof(vmm_softc_t, vmm_node));
2572         list_create(&vmm_destroy_list, sizeof (vmm_softc_t),
2573             offsetof(vmm_softc_t, vmm_node));
2574         vmm_minors = id_space_create("vmm_minors", VMM_CTL_MINOR + 1, MAXMIN32);
2575 
2576         error = ddi_soft_state_init(&vmm_statep, sizeof (vmm_softc_t), 0);
2577         if (error) {
2578                 return (error);
2579         }
2580 
2581         vmm_zsd_init();
2582 
2583         error = mod_install(&modlinkage);
2584         if (error) {
2585                 ddi_soft_state_fini(&vmm_statep);
2586                 vmm_zsd_fini();
2587         }
2588 
2589         return (error);
2590 }
2591 
2592 int
2593 _fini(void)
2594 {
2595         int     error;
2596 
2597         error = mod_remove(&modlinkage);
2598         if (error) {
2599                 return (error);
2600         }
2601 
2602         vmm_zsd_fini();
2603 
2604         ddi_soft_state_fini(&vmm_statep);
2605 
2606         return (0);
2607 }
2608 
2609 int
2610 _info(struct modinfo *modinfop)
2611 {
2612         return (mod_info(&modlinkage, modinfop));
2613 }