checkme Wdiff usr/src/uts/i86pc/io/vmm/vmm.c

Print this page

13275 bhyve needs richer INIT/SIPI support
Reviewed by: Robert Mustacchi <rm@fingolfin.org>
Approved by: Gordon Ross <gordon.w.ross@gmail.com>

Split	Close
Expand all
Collapse all

          --- old/usr/src/uts/i86pc/io/vmm/vmm.c
          +++ new/usr/src/uts/i86pc/io/vmm/vmm.c

   1    1  /*-
   2    2   * SPDX-License-Identifier: BSD-2-Clause-FreeBSD
   3    3   *
   4    4   * Copyright (c) 2011 NetApp, Inc.
   5    5   * All rights reserved.
   6    6   *
   7    7   * Redistribution and use in source and binary forms, with or without
   8    8   * modification, are permitted provided that the following conditions
   9    9   * are met:
  10   10   * 1. Redistributions of source code must retain the above copyright
  11   11   *    notice, this list of conditions and the following disclaimer.
  12   12   * 2. Redistributions in binary form must reproduce the above copyright
  13   13   *    notice, this list of conditions and the following disclaimer in the
  14   14   *    documentation and/or other materials provided with the distribution.
  15   15   *
  16   16   * THIS SOFTWARE IS PROVIDED BY NETAPP, INC ``AS IS'' AND
  17   17   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
  18   18   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
  19   19   * ARE DISCLAIMED.  IN NO EVENT SHALL NETAPP, INC OR CONTRIBUTORS BE LIABLE
  20   20   * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
  21   21   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
  22   22   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
  23   23   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
  24   24   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
  25   25   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
  26   26   * SUCH DAMAGE.
  27   27   *
  28   28   * $FreeBSD$
  29   29   */
  30   30  /*
  31   31   * This file and its contents are supplied under the terms of the
  32   32   * Common Development and Distribution License ("CDDL"), version 1.0.
  33   33   * You may only use this file in accordance with the terms of version
  34   34   * 1.0 of the CDDL.
  35   35   *
  36   36   * A full copy of the text of the CDDL should have accompanied this
  37   37   * source.  A copy of the CDDL is also available via the Internet at
  38   38   * http://www.illumos.org/license/CDDL.
  39   39   *
  40   40   * Copyright 2015 Pluribus Networks Inc.
  41   41   * Copyright 2018 Joyent, Inc.
  42   42   * Copyright 2020 Oxide Computer Company
  43   43   */
  44   44  
  45   45  #include <sys/cdefs.h>
  46   46  __FBSDID("$FreeBSD$");
  47   47  
  48   48  #include <sys/param.h>
  49   49  #include <sys/systm.h>
  50   50  #include <sys/kernel.h>
  51   51  #include <sys/module.h>
  52   52  #include <sys/sysctl.h>
  53   53  #include <sys/malloc.h>
  54   54  #include <sys/pcpu.h>
  55   55  #include <sys/lock.h>
  56   56  #include <sys/mutex.h>
  57   57  #include <sys/proc.h>
  58   58  #include <sys/rwlock.h>
  59   59  #include <sys/sched.h>
  60   60  #include <sys/smp.h>
  61   61  #include <sys/systm.h>
  62   62  
  63   63  #include <vm/vm.h>
  64   64  #include <vm/vm_object.h>
  65   65  #include <vm/vm_map.h>
  66   66  #include <vm/vm_page.h>
  67   67  #include <vm/pmap.h>
  68   68  #include <vm/vm_extern.h>
  69   69  #include <vm/vm_param.h>
  70   70  
  71   71  #ifdef __FreeBSD__
  72   72  #include <machine/cpu.h>
  73   73  #endif
  74   74  #include <machine/pcb.h>
  75   75  #include <machine/smp.h>
  76   76  #include <machine/md_var.h>
  77   77  #include <x86/psl.h>
  78   78  #include <x86/apicreg.h>
  79   79  
  80   80  #include <machine/vmm.h>
  81   81  #include <machine/vmm_dev.h>
  82   82  #include <sys/vmm_instruction_emul.h>
  83   83  
  84   84  #include "vmm_ioport.h"
  85   85  #include "vmm_ktr.h"
  86   86  #include "vmm_host.h"
  87   87  #include "vmm_mem.h"
  88   88  #include "vmm_util.h"
  89   89  #include "vatpic.h"
  90   90  #include "vatpit.h"
  91   91  #include "vhpet.h"
  92   92  #include "vioapic.h"
  93   93  #include "vlapic.h"
  94   94  #include "vpmtmr.h"
  95   95  #include "vrtc.h"
  96   96  #include "vmm_stat.h"
  97   97  #include "vmm_lapic.h"
  98   98  
  99   99  #include "io/ppt.h"
 100  100  #include "io/iommu.h"
 101  101

↓ open down ↓

101 lines elided

↑ open up ↑

 102  102  struct vlapic;
 103  103  
 104  104  /*
 105  105   * Initialization:
 106  106   * (a) allocated when vcpu is created
 107  107   * (i) initialized when vcpu is created and when it is reinitialized
 108  108   * (o) initialized the first time the vcpu is created
 109  109   * (x) initialized before use
 110  110   */
 111  111  struct vcpu {
 112      -        struct mtx      mtx;            /* (o) protects 'state' and 'hostcpu' */
      112 +        /* (o) protects state, run_state, hostcpu, sipi_vector */
      113 +        struct mtx      mtx;
      114 +
 113  115          enum vcpu_state state;          /* (o) vcpu state */
 114      -#ifndef __FreeBSD__
      116 +        enum vcpu_run_state run_state;  /* (i) vcpu init/sipi/run state */
 115  117          kcondvar_t      vcpu_cv;        /* (o) cpu waiter cv */
 116  118          kcondvar_t      state_cv;       /* (o) IDLE-transition cv */
 117      -#endif /* __FreeBSD__ */
 118  119          int             hostcpu;        /* (o) vcpu's current host cpu */
 119      -#ifndef __FreeBSD__
 120  120          int             lastloccpu;     /* (o) last host cpu localized to */
 121      -#endif
 122      -        uint_t          runblock;       /* (i) block vcpu from run state */
 123  121          int             reqidle;        /* (i) request vcpu to idle */
 124  122          struct vlapic   *vlapic;        /* (i) APIC device model */
 125  123          enum x2apic_state x2apic_state; /* (i) APIC mode */
 126  124          uint64_t        exitintinfo;    /* (i) events pending at VM exit */
 127  125          int             nmi_pending;    /* (i) NMI pending */
 128  126          int             extint_pending; /* (i) INTR pending */
 129  127          int     exception_pending;      /* (i) exception pending */
 130  128          int     exc_vector;             /* (x) exception collateral */
 131  129          int     exc_errcode_valid;
 132  130          uint32_t exc_errcode;
      131 +        uint8_t         sipi_vector;    /* (i) SIPI vector */
 133  132          struct savefpu  *guestfpu;      /* (a,i) guest fpu state */
 134  133          uint64_t        guest_xcr0;     /* (i) guest %xcr0 register */
 135  134          void            *stats;         /* (a,i) statistics */
 136  135          struct vm_exit  exitinfo;       /* (x) exit reason and collateral */
 137  136          uint64_t        nextrip;        /* (x) next instruction to execute */
 138  137          struct vie      *vie_ctx;       /* (x) instruction emulation context */
 139  138  #ifndef __FreeBSD__
 140  139          uint64_t        tsc_offset;     /* (x) offset from host TSC */
 141  140  #endif
 142  141  };

 143  142  
 144  143  #define vcpu_lock_initialized(v) mtx_initialized(&((v)->mtx))
 145  144  #define vcpu_lock_init(v)       mtx_init(&((v)->mtx), "vcpu lock", 0, MTX_SPIN)
 146  145  #define vcpu_lock(v)            mtx_lock_spin(&((v)->mtx))
 147  146  #define vcpu_unlock(v)          mtx_unlock_spin(&((v)->mtx))
 148  147  #define vcpu_assert_locked(v)   mtx_assert(&((v)->mtx), MA_OWNED)
 149  148  
 150  149  struct mem_seg {
 151  150          size_t  len;
 152  151          bool    sysmem;
 153  152          struct vm_object *object;
 154  153  };
 155  154  #ifdef __FreeBSD__
 156  155  #define VM_MAX_MEMSEGS  3
 157  156  #else
 158  157  #define VM_MAX_MEMSEGS  4
 159  158  #endif
 160  159  
 161  160  struct mem_map {
 162  161          vm_paddr_t      gpa;
 163  162          size_t          len;
 164  163          vm_ooffset_t    segoff;
 165  164          int             segid;
 166  165          int             prot;
 167  166          int             flags;
 168  167  };
 169  168  #define VM_MAX_MEMMAPS  8
 170  169  
 171  170  /*
 172  171   * Initialization:
 173  172   * (o) initialized the first time the VM is created
 174  173   * (i) initialized when VM is created and when it is reinitialized
 175  174   * (x) initialized before use
 176  175   */
 177  176  struct vm {
 178  177          void            *cookie;                /* (i) cpu-specific data */
 179  178          void            *iommu;                 /* (x) iommu-specific data */
 180  179          struct vhpet    *vhpet;                 /* (i) virtual HPET */
 181  180          struct vioapic  *vioapic;               /* (i) virtual ioapic */
 182  181          struct vatpic   *vatpic;                /* (i) virtual atpic */
 183  182          struct vatpit   *vatpit;                /* (i) virtual atpit */
 184  183          struct vpmtmr   *vpmtmr;                /* (i) virtual ACPI PM timer */
 185  184          struct vrtc     *vrtc;                  /* (o) virtual RTC */
 186  185          volatile cpuset_t active_cpus;          /* (i) active vcpus */
 187  186          volatile cpuset_t debug_cpus;           /* (i) vcpus stopped for dbg */
 188  187          int             suspend;                /* (i) stop VM execution */
 189  188          volatile cpuset_t suspended_cpus;       /* (i) suspended vcpus */
 190  189          volatile cpuset_t halted_cpus;          /* (x) cpus in a hard halt */
 191  190          struct mem_map  mem_maps[VM_MAX_MEMMAPS]; /* (i) guest address space */
 192  191          struct mem_seg  mem_segs[VM_MAX_MEMSEGS]; /* (o) guest memory regions */

↓ open down ↓

50 lines elided

↑ open up ↑

 193  192          struct vmspace  *vmspace;               /* (o) guest's address space */
 194  193          char            name[VM_MAX_NAMELEN];   /* (o) virtual machine name */
 195  194          struct vcpu     vcpu[VM_MAXCPU];        /* (i) guest vcpus */
 196  195          /* The following describe the vm cpu topology */
 197  196          uint16_t        sockets;                /* (o) num of sockets */
 198  197          uint16_t        cores;                  /* (o) num of cores/socket */
 199  198          uint16_t        threads;                /* (o) num of threads/core */
 200  199          uint16_t        maxcpus;                /* (o) max pluggable cpus */
 201  200  
 202  201          struct ioport_config ioports;           /* (o) ioport handling */
 203      -
 204      -        bool            sipi_req;               /* (i) SIPI requested */
 205      -        int             sipi_req_vcpu;          /* (i) SIPI destination */
 206      -        uint64_t        sipi_req_rip;           /* (i) SIPI start %rip */
 207      -
 208      -        /* Miscellaneous VM-wide statistics and counters */
 209      -        struct vm_wide_stats {
 210      -                uint64_t sipi_supersede;
 211      -        } stats;
 212  202  };
 213  203  
 214  204  static int vmm_initialized;
 215  205  
 216  206  
 217  207  static void
 218  208  nullop_panic(void)
 219  209  {
 220  210          panic("null vmm operation call");
 221  211  }

 222  212  
 223  213  /* Do not allow use of an un-set `ops` to do anything but panic */
 224  214  static struct vmm_ops vmm_ops_null = {
 225  215          .init           = (vmm_init_func_t)nullop_panic,
 226  216          .cleanup        = (vmm_cleanup_func_t)nullop_panic,
 227  217          .resume         = (vmm_resume_func_t)nullop_panic,
 228  218          .vminit         = (vmi_init_func_t)nullop_panic,
 229  219          .vmrun          = (vmi_run_func_t)nullop_panic,
 230  220          .vmcleanup      = (vmi_cleanup_func_t)nullop_panic,
 231  221          .vmgetreg       = (vmi_get_register_t)nullop_panic,
 232  222          .vmsetreg       = (vmi_set_register_t)nullop_panic,
 233  223          .vmgetdesc      = (vmi_get_desc_t)nullop_panic,
 234  224          .vmsetdesc      = (vmi_set_desc_t)nullop_panic,
 235  225          .vmgetcap       = (vmi_get_cap_t)nullop_panic,
 236  226          .vmsetcap       = (vmi_set_cap_t)nullop_panic,
 237  227          .vmspace_alloc  = (vmi_vmspace_alloc)nullop_panic,
 238  228          .vmspace_free   = (vmi_vmspace_free)nullop_panic,
 239  229          .vlapic_init    = (vmi_vlapic_init)nullop_panic,
 240  230          .vlapic_cleanup = (vmi_vlapic_cleanup)nullop_panic,
 241  231          .vmsavectx      = (vmi_savectx)nullop_panic,

↓ open down ↓

20 lines elided

↑ open up ↑

 242  232          .vmrestorectx   = (vmi_restorectx)nullop_panic,
 243  233  };
 244  234  
 245  235  static struct vmm_ops *ops = &vmm_ops_null;
 246  236  
 247  237  #define VMM_INIT(num)                   ((*ops->init)(num))
 248  238  #define VMM_CLEANUP()                   ((*ops->cleanup)())
 249  239  #define VMM_RESUME()                    ((*ops->resume)())
 250  240  
 251  241  #define VMINIT(vm, pmap)                ((*ops->vminit)(vm, pmap))
 252      -#define VMRUN(vmi, vcpu, rip, pmap, evinfo) \
 253      -        ((*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo))
      242 +#define VMRUN(vmi, vcpu, rip, pmap) \
      243 +        ((*ops->vmrun)(vmi, vcpu, rip, pmap))
 254  244  #define VMCLEANUP(vmi)                  ((*ops->vmcleanup)(vmi))
 255  245  #define VMSPACE_ALLOC(min, max)         ((*ops->vmspace_alloc)(min, max))
 256  246  #define VMSPACE_FREE(vmspace)           ((*ops->vmspace_free)(vmspace))
 257  247  
 258  248  #define VMGETREG(vmi, vcpu, num, rv)    ((*ops->vmgetreg)(vmi, vcpu, num, rv))
 259  249  #define VMSETREG(vmi, vcpu, num, val)   ((*ops->vmsetreg)(vmi, vcpu, num, val))
 260  250  #define VMGETDESC(vmi, vcpu, num, dsc)  ((*ops->vmgetdesc)(vmi, vcpu, num, dsc))
 261  251  #define VMSETDESC(vmi, vcpu, num, dsc)  ((*ops->vmsetdesc)(vmi, vcpu, num, dsc))
 262  252  #define VMGETCAP(vmi, vcpu, num, rv)    ((*ops->vmgetcap)(vmi, vcpu, num, rv))
 263  253  #define VMSETCAP(vmi, vcpu, num, val)   ((*ops->vmsetcap)(vmi, vcpu, num, val))

 264  254  #define VLAPIC_INIT(vmi, vcpu)          ((*ops->vlapic_init)(vmi, vcpu))
 265  255  #define VLAPIC_CLEANUP(vmi, vlapic)     ((*ops->vlapic_cleanup)(vmi, vlapic))
 266  256  
 267  257  #define fpu_start_emulating()   load_cr0(rcr0() | CR0_TS)
 268  258  #define fpu_stop_emulating()    clts()
 269  259  
 270  260  SDT_PROVIDER_DEFINE(vmm);
 271  261  
 272  262  static MALLOC_DEFINE(M_VM, "vm", "vm");
 273  263  
 274  264  /* statistics */
 275  265  static VMM_STAT(VCPU_TOTAL_RUNTIME, "vcpu total runtime");
 276  266  
 277  267  SYSCTL_NODE(_hw, OID_AUTO, vmm, CTLFLAG_RW | CTLFLAG_MPSAFE, NULL,
 278  268      NULL);
 279  269  
 280  270  /*
 281  271   * Halt the guest if all vcpus are executing a HLT instruction with
 282  272   * interrupts disabled.
 283  273   */
 284  274  static int halt_detection_enabled = 1;

↓ open down ↓

21 lines elided

↑ open up ↑

 285  275  
 286  276  /* IPI vector used for vcpu notifications */
 287  277  static int vmm_ipinum;
 288  278  
 289  279  /* Trap into hypervisor on all guest exceptions and reflect them back */
 290  280  static int trace_guest_exceptions;
 291  281  
 292  282  static void vm_free_memmap(struct vm *vm, int ident);
 293  283  static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
 294  284  static void vcpu_notify_event_locked(struct vcpu *vcpu, vcpu_notify_t);
      285 +static bool vcpu_sleep_bailout_checks(struct vm *vm, int vcpuid);
      286 +static int vcpu_vector_sipi(struct vm *vm, int vcpuid, uint8_t vector);
 295  287  
 296  288  #ifndef __FreeBSD__
 297  289  static void vm_clear_memseg(struct vm *, int);
 298  290  
 299  291  /* Flags for vtc_status */
 300  292  #define VTCS_FPU_RESTORED       1 /* guest FPU restored, host FPU saved */
 301  293  #define VTCS_FPU_CTX_CRITICAL   2 /* in ctx where FPU restore cannot be lazy */
 302  294  
 303  295  typedef struct vm_thread_ctx {
 304  296          struct vm       *vtc_vm;

 305  297          int             vtc_vcpuid;
 306  298          uint_t          vtc_status;
 307  299  } vm_thread_ctx_t;
 308  300  #endif /* __FreeBSD__ */
 309  301  
 310  302  #ifdef KTR
 311  303  static const char *
 312  304  vcpu_state2str(enum vcpu_state state)
 313  305  {
 314  306  
 315  307          switch (state) {
 316  308          case VCPU_IDLE:
 317  309                  return ("idle");
 318  310          case VCPU_FROZEN:
 319  311                  return ("frozen");
 320  312          case VCPU_RUNNING:
 321  313                  return ("running");
 322  314          case VCPU_SLEEPING:
 323  315                  return ("sleeping");
 324  316          default:
 325  317                  return ("unknown");
 326  318          }
 327  319  }
 328  320  #endif
 329  321  
 330  322  static void
 331  323  vcpu_cleanup(struct vm *vm, int i, bool destroy)
 332  324  {
 333  325          struct vcpu *vcpu = &vm->vcpu[i];
 334  326  
 335  327          VLAPIC_CLEANUP(vm->cookie, vcpu->vlapic);
 336  328          if (destroy) {
 337  329                  vmm_stat_free(vcpu->stats);
 338  330                  fpu_save_area_free(vcpu->guestfpu);
 339  331                  vie_free(vcpu->vie_ctx);
 340  332                  vcpu->vie_ctx = NULL;
 341  333          }
 342  334  }
 343  335  
 344  336  static void
 345  337  vcpu_init(struct vm *vm, int vcpu_id, bool create)
 346  338  {
 347  339          struct vcpu *vcpu;
 348  340  
 349  341          KASSERT(vcpu_id >= 0 && vcpu_id < vm->maxcpus,
 350  342              ("vcpu_init: invalid vcpu %d", vcpu_id));
 351  343  
 352  344          vcpu = &vm->vcpu[vcpu_id];
 353  345  
 354  346          if (create) {
 355  347  #ifdef __FreeBSD__
 356  348                  KASSERT(!vcpu_lock_initialized(vcpu), ("vcpu %d already "
 357  349                      "initialized", vcpu_id));
 358  350  #endif
 359  351                  vcpu_lock_init(vcpu);
 360  352                  vcpu->state = VCPU_IDLE;
 361  353                  vcpu->hostcpu = NOCPU;
 362  354  #ifndef __FreeBSD__

↓ open down ↓

58 lines elided

↑ open up ↑

 363  355                  vcpu->lastloccpu = NOCPU;
 364  356  #endif
 365  357                  vcpu->guestfpu = fpu_save_area_alloc();
 366  358                  vcpu->stats = vmm_stat_alloc();
 367  359                  vcpu->vie_ctx = vie_alloc();
 368  360          } else {
 369  361                  vie_reset(vcpu->vie_ctx);
 370  362                  bzero(&vcpu->exitinfo, sizeof (vcpu->exitinfo));
 371  363          }
 372  364  
      365 +        vcpu->run_state = VRS_HALT;
 373  366          vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
 374  367          vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
 375      -        vcpu->runblock = 0;
 376  368          vcpu->reqidle = 0;
 377  369          vcpu->exitintinfo = 0;
 378  370          vcpu->nmi_pending = 0;
 379  371          vcpu->extint_pending = 0;
 380  372          vcpu->exception_pending = 0;
 381  373          vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
 382  374          fpu_save_area_reset(vcpu->guestfpu);
 383  375          vmm_stat_init(vcpu->stats);
 384  376  }
 385  377

 386  378  int
 387  379  vcpu_trace_exceptions(struct vm *vm, int vcpuid)
 388  380  {
 389  381  
 390  382          return (trace_guest_exceptions);
 391  383  }
 392  384  
 393  385  struct vm_exit *
 394  386  vm_exitinfo(struct vm *vm, int cpuid)
 395  387  {
 396  388          struct vcpu *vcpu;
 397  389  
 398  390          if (cpuid < 0 || cpuid >= vm->maxcpus)
 399  391                  panic("vm_exitinfo: invalid cpuid %d", cpuid);
 400  392  
 401  393          vcpu = &vm->vcpu[cpuid];
 402  394  
 403  395          return (&vcpu->exitinfo);
 404  396  }
 405  397  
 406  398  struct vie *
 407  399  vm_vie_ctx(struct vm *vm, int cpuid)
 408  400  {
 409  401          if (cpuid < 0 || cpuid >= vm->maxcpus)
 410  402                  panic("vm_vie_ctx: invalid cpuid %d", cpuid);
 411  403  
 412  404          return (vm->vcpu[cpuid].vie_ctx);
 413  405  }
 414  406  
 415  407  static int
 416  408  vmm_init(void)
 417  409  {
 418  410          int error;
 419  411  
 420  412          vmm_host_state_init();
 421  413  
 422  414  #ifdef __FreeBSD__
 423  415          vmm_ipinum = lapic_ipi_alloc(pti ? &IDTVEC(justreturn1_pti) :
 424  416              &IDTVEC(justreturn));
 425  417          if (vmm_ipinum < 0)
 426  418                  vmm_ipinum = IPI_AST;
 427  419  #else
 428  420          /* We use cpu_poke() for IPIs */
 429  421          vmm_ipinum = 0;
 430  422  #endif
 431  423  
 432  424          error = vmm_mem_init();
 433  425          if (error)
 434  426                  return (error);
 435  427  
 436  428          if (vmm_is_intel())
 437  429                  ops = &vmm_ops_intel;
 438  430          else if (vmm_is_svm())
 439  431                  ops = &vmm_ops_amd;
 440  432          else
 441  433                  return (ENXIO);
 442  434  
 443  435  #ifdef __FreeBSD__
 444  436          vmm_resume_p = vmm_resume;
 445  437  #endif
 446  438  
 447  439          return (VMM_INIT(vmm_ipinum));
 448  440  }
 449  441  
 450  442  int
 451  443  vmm_mod_load()
 452  444  {
 453  445          int     error;
 454  446  
 455  447          VERIFY(vmm_initialized == 0);
 456  448  
 457  449          error = vmm_init();
 458  450          if (error == 0)
 459  451                  vmm_initialized = 1;
 460  452  
 461  453          return (error);
 462  454  }
 463  455  
 464  456  int
 465  457  vmm_mod_unload()
 466  458  {
 467  459          int     error;
 468  460  
 469  461          VERIFY(vmm_initialized == 1);
 470  462  
 471  463          iommu_cleanup();
 472  464          error = VMM_CLEANUP();
 473  465          if (error)
 474  466                  return (error);
 475  467          vmm_initialized = 0;
 476  468  
 477  469          return (0);
 478  470  }
 479  471  
 480  472  static void
 481  473  vm_init(struct vm *vm, bool create)
 482  474  {
 483  475          int i;
 484  476  #ifndef __FreeBSD__
 485  477          uint64_t tsc_off;
 486  478  #endif
 487  479  
 488  480          vm->cookie = VMINIT(vm, vmspace_pmap(vm->vmspace));
 489  481          vm->iommu = NULL;
 490  482          vm->vioapic = vioapic_init(vm);
 491  483          vm->vhpet = vhpet_init(vm);
 492  484          vm->vatpic = vatpic_init(vm);
 493  485          vm->vatpit = vatpit_init(vm);
 494  486          vm->vpmtmr = vpmtmr_init(vm);
 495  487          if (create)
 496  488                  vm->vrtc = vrtc_init(vm);
 497  489  
 498  490          vm_inout_init(vm, &vm->ioports);
 499  491  
 500  492          CPU_ZERO(&vm->active_cpus);
 501  493          CPU_ZERO(&vm->debug_cpus);
 502  494  
 503  495          vm->suspend = 0;
 504  496          CPU_ZERO(&vm->suspended_cpus);
 505  497  
 506  498          for (i = 0; i < vm->maxcpus; i++)
 507  499                  vcpu_init(vm, i, create);
 508  500  
 509  501  #ifndef __FreeBSD__
 510  502          tsc_off = (uint64_t)(-(int64_t)rdtsc());
 511  503          for (i = 0; i < vm->maxcpus; i++) {
 512  504                  vm->vcpu[i].tsc_offset = tsc_off;
 513  505          }
 514  506  #endif /* __FreeBSD__ */
 515  507  }
 516  508  
 517  509  /*
 518  510   * The default CPU topology is a single thread per package.
 519  511   */
 520  512  uint_t cores_per_package = 1;
 521  513  uint_t threads_per_core = 1;
 522  514  
 523  515  int
 524  516  vm_create(const char *name, struct vm **retvm)
 525  517  {
 526  518          struct vm *vm;
 527  519          struct vmspace *vmspace;
 528  520  
 529  521          /*
 530  522           * If vmm.ko could not be successfully initialized then don't attempt
 531  523           * to create the virtual machine.
 532  524           */
 533  525          if (!vmm_initialized)
 534  526                  return (ENXIO);
 535  527  
 536  528          if (name == NULL || strlen(name) >= VM_MAX_NAMELEN)
 537  529                  return (EINVAL);
 538  530  
 539  531          vmspace = VMSPACE_ALLOC(0, VM_MAXUSER_ADDRESS);
 540  532          if (vmspace == NULL)
 541  533                  return (ENOMEM);
 542  534  
 543  535          vm = malloc(sizeof (struct vm), M_VM, M_WAITOK | M_ZERO);
 544  536          strcpy(vm->name, name);
 545  537          vm->vmspace = vmspace;
 546  538  
 547  539          vm->sockets = 1;
 548  540          vm->cores = cores_per_package;  /* XXX backwards compatibility */
 549  541          vm->threads = threads_per_core; /* XXX backwards compatibility */
 550  542          vm->maxcpus = VM_MAXCPU;        /* XXX temp to keep code working */
 551  543  
 552  544          vm_init(vm, true);
 553  545  
 554  546          *retvm = vm;
 555  547          return (0);
 556  548  }
 557  549  
 558  550  void
 559  551  vm_get_topology(struct vm *vm, uint16_t *sockets, uint16_t *cores,
 560  552      uint16_t *threads, uint16_t *maxcpus)
 561  553  {
 562  554          *sockets = vm->sockets;
 563  555          *cores = vm->cores;
 564  556          *threads = vm->threads;
 565  557          *maxcpus = vm->maxcpus;
 566  558  }
 567  559  
 568  560  uint16_t
 569  561  vm_get_maxcpus(struct vm *vm)
 570  562  {
 571  563          return (vm->maxcpus);
 572  564  }
 573  565  
 574  566  int
 575  567  vm_set_topology(struct vm *vm, uint16_t sockets, uint16_t cores,
 576  568      uint16_t threads, uint16_t maxcpus)
 577  569  {
 578  570          if (maxcpus != 0)
 579  571                  return (EINVAL);        /* XXX remove when supported */
 580  572          if ((sockets * cores * threads) > vm->maxcpus)
 581  573                  return (EINVAL);
 582  574          /* XXX need to check sockets * cores * threads == vCPU, how? */
 583  575          vm->sockets = sockets;
 584  576          vm->cores = cores;
 585  577          vm->threads = threads;
 586  578          vm->maxcpus = VM_MAXCPU;        /* XXX temp to keep code working */
 587  579          return (0);
 588  580  }
 589  581  
 590  582  static void
 591  583  vm_cleanup(struct vm *vm, bool destroy)
 592  584  {
 593  585          struct mem_map *mm;
 594  586          int i;
 595  587  
 596  588          ppt_unassign_all(vm);
 597  589  
 598  590          if (vm->iommu != NULL)
 599  591                  iommu_destroy_domain(vm->iommu);
 600  592  
 601  593          /*
 602  594           * Devices which attach their own ioport hooks should be cleaned up
 603  595           * first so they can tear down those registrations.
 604  596           */
 605  597          vpmtmr_cleanup(vm->vpmtmr);
 606  598  
 607  599          vm_inout_cleanup(vm, &vm->ioports);
 608  600  
 609  601          if (destroy)
 610  602                  vrtc_cleanup(vm->vrtc);
 611  603          else
 612  604                  vrtc_reset(vm->vrtc);
 613  605  
 614  606          vatpit_cleanup(vm->vatpit);
 615  607          vhpet_cleanup(vm->vhpet);
 616  608          vatpic_cleanup(vm->vatpic);
 617  609          vioapic_cleanup(vm->vioapic);
 618  610  
 619  611          for (i = 0; i < vm->maxcpus; i++)
 620  612                  vcpu_cleanup(vm, i, destroy);
 621  613  
 622  614          VMCLEANUP(vm->cookie);
 623  615  
 624  616          /*
 625  617           * System memory is removed from the guest address space only when
 626  618           * the VM is destroyed. This is because the mapping remains the same
 627  619           * across VM reset.
 628  620           *
 629  621           * Device memory can be relocated by the guest (e.g. using PCI BARs)
 630  622           * so those mappings are removed on a VM reset.
 631  623           */
 632  624          for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 633  625                  mm = &vm->mem_maps[i];
 634  626                  if (destroy || !sysmem_mapping(vm, mm))
 635  627                          vm_free_memmap(vm, i);
 636  628  #ifndef __FreeBSD__
 637  629                  else {
 638  630                          /*
 639  631                           * We need to reset the IOMMU flag so this mapping can
 640  632                           * be reused when a VM is rebooted. Since the IOMMU
 641  633                           * domain has already been destroyed we can just reset
 642  634                           * the flag here.
 643  635                           */
 644  636                          mm->flags &= ~VM_MEMMAP_F_IOMMU;
 645  637                  }
 646  638  #endif
 647  639          }
 648  640  
 649  641          if (destroy) {
 650  642                  for (i = 0; i < VM_MAX_MEMSEGS; i++)
 651  643                          vm_free_memseg(vm, i);
 652  644  
 653  645                  VMSPACE_FREE(vm->vmspace);
 654  646                  vm->vmspace = NULL;
 655  647          }
 656  648  #ifndef __FreeBSD__
 657  649          else {
 658  650                  /*
 659  651                   * Clear the first memory segment (low mem), old memory contents
 660  652                   * could confuse the UEFI firmware.
 661  653                   */
 662  654                  vm_clear_memseg(vm, 0);
 663  655          }
 664  656  #endif
 665  657  }
 666  658  
 667  659  void
 668  660  vm_destroy(struct vm *vm)
 669  661  {
 670  662          vm_cleanup(vm, true);
 671  663          free(vm, M_VM);
 672  664  }
 673  665  
 674  666  int
 675  667  vm_reinit(struct vm *vm)
 676  668  {
 677  669          int error;
 678  670  
 679  671          /*
 680  672           * A virtual machine can be reset only if all vcpus are suspended.
 681  673           */
 682  674          if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
 683  675                  vm_cleanup(vm, false);
 684  676                  vm_init(vm, false);
 685  677                  error = 0;
 686  678          } else {
 687  679                  error = EBUSY;
 688  680          }
 689  681  
 690  682          return (error);
 691  683  }
 692  684  
 693  685  const char *
 694  686  vm_name(struct vm *vm)
 695  687  {
 696  688          return (vm->name);
 697  689  }
 698  690  
 699  691  int
 700  692  vm_map_mmio(struct vm *vm, vm_paddr_t gpa, size_t len, vm_paddr_t hpa)
 701  693  {
 702  694          vm_object_t obj;
 703  695  
 704  696          if ((obj = vmm_mmio_alloc(vm->vmspace, gpa, len, hpa)) == NULL)
 705  697                  return (ENOMEM);
 706  698          else
 707  699                  return (0);
 708  700  }
 709  701  
 710  702  int
 711  703  vm_unmap_mmio(struct vm *vm, vm_paddr_t gpa, size_t len)
 712  704  {
 713  705  
 714  706          vmm_mmio_free(vm->vmspace, gpa, len);
 715  707          return (0);
 716  708  }
 717  709  
 718  710  /*
 719  711   * Return 'true' if 'gpa' is allocated in the guest address space.
 720  712   *
 721  713   * This function is called in the context of a running vcpu which acts as
 722  714   * an implicit lock on 'vm->mem_maps[]'.
 723  715   */
 724  716  bool
 725  717  vm_mem_allocated(struct vm *vm, int vcpuid, vm_paddr_t gpa)
 726  718  {
 727  719          struct mem_map *mm;
 728  720          int i;
 729  721  
 730  722  #ifdef INVARIANTS
 731  723          int hostcpu, state;
 732  724          state = vcpu_get_state(vm, vcpuid, &hostcpu);
 733  725          KASSERT(state == VCPU_RUNNING && hostcpu == curcpu,
 734  726              ("%s: invalid vcpu state %d/%d", __func__, state, hostcpu));
 735  727  #endif
 736  728  
 737  729          for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 738  730                  mm = &vm->mem_maps[i];
 739  731                  if (mm->len != 0 && gpa >= mm->gpa && gpa < mm->gpa + mm->len)
 740  732                          return (true);          /* 'gpa' is sysmem or devmem */
 741  733          }
 742  734  
 743  735          if (ppt_is_mmio(vm, gpa))
 744  736                  return (true);                  /* 'gpa' is pci passthru mmio */
 745  737  
 746  738          return (false);
 747  739  }
 748  740  
 749  741  int
 750  742  vm_alloc_memseg(struct vm *vm, int ident, size_t len, bool sysmem)
 751  743  {
 752  744          struct mem_seg *seg;
 753  745          vm_object_t obj;
 754  746  
 755  747  #ifndef __FreeBSD__
 756  748          extern pgcnt_t get_max_page_get(void);
 757  749  #endif
 758  750  
 759  751          if (ident < 0 || ident >= VM_MAX_MEMSEGS)
 760  752                  return (EINVAL);
 761  753  
 762  754          if (len == 0 || (len & PAGE_MASK))
 763  755                  return (EINVAL);
 764  756  
 765  757  #ifndef __FreeBSD__
 766  758          if (len > ptob(get_max_page_get()))
 767  759                  return (EINVAL);
 768  760  #endif
 769  761  
 770  762          seg = &vm->mem_segs[ident];
 771  763          if (seg->object != NULL) {
 772  764                  if (seg->len == len && seg->sysmem == sysmem)
 773  765                          return (EEXIST);
 774  766                  else
 775  767                          return (EINVAL);
 776  768          }
 777  769  
 778  770          obj = vm_object_allocate(OBJT_DEFAULT, len >> PAGE_SHIFT);
 779  771          if (obj == NULL)
 780  772                  return (ENOMEM);
 781  773  
 782  774          seg->len = len;
 783  775          seg->object = obj;
 784  776          seg->sysmem = sysmem;
 785  777          return (0);
 786  778  }
 787  779  
 788  780  int
 789  781  vm_get_memseg(struct vm *vm, int ident, size_t *len, bool *sysmem,
 790  782      vm_object_t *objptr)
 791  783  {
 792  784          struct mem_seg *seg;
 793  785  
 794  786          if (ident < 0 || ident >= VM_MAX_MEMSEGS)
 795  787                  return (EINVAL);
 796  788  
 797  789          seg = &vm->mem_segs[ident];
 798  790          if (len)
 799  791                  *len = seg->len;
 800  792          if (sysmem)
 801  793                  *sysmem = seg->sysmem;
 802  794          if (objptr)
 803  795                  *objptr = seg->object;
 804  796          return (0);
 805  797  }
 806  798  
 807  799  #ifndef __FreeBSD__
 808  800  static void
 809  801  vm_clear_memseg(struct vm *vm, int ident)
 810  802  {
 811  803          struct mem_seg *seg;
 812  804  
 813  805          KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
 814  806              ("%s: invalid memseg ident %d", __func__, ident));
 815  807  
 816  808          seg = &vm->mem_segs[ident];
 817  809  
 818  810          if (seg->object != NULL)
 819  811                  vm_object_clear(seg->object);
 820  812  }
 821  813  #endif
 822  814  
 823  815  void
 824  816  vm_free_memseg(struct vm *vm, int ident)
 825  817  {
 826  818          struct mem_seg *seg;
 827  819  
 828  820          KASSERT(ident >= 0 && ident < VM_MAX_MEMSEGS,
 829  821              ("%s: invalid memseg ident %d", __func__, ident));
 830  822  
 831  823          seg = &vm->mem_segs[ident];
 832  824          if (seg->object != NULL) {
 833  825                  vm_object_deallocate(seg->object);
 834  826                  bzero(seg, sizeof (struct mem_seg));
 835  827          }
 836  828  }
 837  829  
 838  830  int
 839  831  vm_mmap_memseg(struct vm *vm, vm_paddr_t gpa, int segid, vm_ooffset_t first,
 840  832      size_t len, int prot, int flags)
 841  833  {
 842  834          struct mem_seg *seg;
 843  835          struct mem_map *m, *map;
 844  836          vm_ooffset_t last;
 845  837          int i, error;
 846  838  
 847  839          if (prot == 0 || (prot & ~(VM_PROT_ALL)) != 0)
 848  840                  return (EINVAL);
 849  841  
 850  842          if (flags & ~VM_MEMMAP_F_WIRED)
 851  843                  return (EINVAL);
 852  844  
 853  845          if (segid < 0 || segid >= VM_MAX_MEMSEGS)
 854  846                  return (EINVAL);
 855  847  
 856  848          seg = &vm->mem_segs[segid];
 857  849          if (seg->object == NULL)
 858  850                  return (EINVAL);
 859  851  
 860  852          last = first + len;
 861  853          if (first < 0 || first >= last || last > seg->len)
 862  854                  return (EINVAL);
 863  855  
 864  856          if ((gpa | first | last) & PAGE_MASK)
 865  857                  return (EINVAL);
 866  858  
 867  859          map = NULL;
 868  860          for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 869  861                  m = &vm->mem_maps[i];
 870  862                  if (m->len == 0) {
 871  863                          map = m;
 872  864                          break;
 873  865                  }
 874  866          }
 875  867  
 876  868          if (map == NULL)
 877  869                  return (ENOSPC);
 878  870  
 879  871          error = vm_map_find(&vm->vmspace->vm_map, seg->object, first, &gpa,
 880  872              len, 0, VMFS_NO_SPACE, prot, prot, 0);
 881  873          if (error != KERN_SUCCESS)
 882  874                  return (EFAULT);
 883  875  
 884  876          vm_object_reference(seg->object);
 885  877  
 886  878          if ((flags & VM_MEMMAP_F_WIRED) != 0) {
 887  879                  error = vm_map_wire(&vm->vmspace->vm_map, gpa, gpa + len,
 888  880                      VM_MAP_WIRE_USER | VM_MAP_WIRE_NOHOLES);
 889  881                  if (error != KERN_SUCCESS) {
 890  882                          vm_map_remove(&vm->vmspace->vm_map, gpa, gpa + len);
 891  883                          return (error == KERN_RESOURCE_SHORTAGE ? ENOMEM :
 892  884                              EFAULT);
 893  885                  }
 894  886          }
 895  887  
 896  888          map->gpa = gpa;
 897  889          map->len = len;
 898  890          map->segoff = first;
 899  891          map->segid = segid;
 900  892          map->prot = prot;
 901  893          map->flags = flags;
 902  894          return (0);
 903  895  }
 904  896  
 905  897  int
 906  898  vm_mmap_getnext(struct vm *vm, vm_paddr_t *gpa, int *segid,
 907  899      vm_ooffset_t *segoff, size_t *len, int *prot, int *flags)
 908  900  {
 909  901          struct mem_map *mm, *mmnext;
 910  902          int i;
 911  903  
 912  904          mmnext = NULL;
 913  905          for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 914  906                  mm = &vm->mem_maps[i];
 915  907                  if (mm->len == 0 || mm->gpa < *gpa)
 916  908                          continue;
 917  909                  if (mmnext == NULL || mm->gpa < mmnext->gpa)
 918  910                          mmnext = mm;
 919  911          }
 920  912  
 921  913          if (mmnext != NULL) {
 922  914                  *gpa = mmnext->gpa;
 923  915                  if (segid)
 924  916                          *segid = mmnext->segid;
 925  917                  if (segoff)
 926  918                          *segoff = mmnext->segoff;
 927  919                  if (len)
 928  920                          *len = mmnext->len;
 929  921                  if (prot)
 930  922                          *prot = mmnext->prot;
 931  923                  if (flags)
 932  924                          *flags = mmnext->flags;
 933  925                  return (0);
 934  926          } else {
 935  927                  return (ENOENT);
 936  928          }
 937  929  }
 938  930  
 939  931  static void
 940  932  vm_free_memmap(struct vm *vm, int ident)
 941  933  {
 942  934          struct mem_map *mm;
 943  935          int error;
 944  936  
 945  937          mm = &vm->mem_maps[ident];
 946  938          if (mm->len) {
 947  939                  error = vm_map_remove(&vm->vmspace->vm_map, mm->gpa,
 948  940                      mm->gpa + mm->len);
 949  941                  KASSERT(error == KERN_SUCCESS, ("%s: vm_map_remove error %d",
 950  942                      __func__, error));
 951  943                  bzero(mm, sizeof (struct mem_map));
 952  944          }
 953  945  }
 954  946  
 955  947  static __inline bool
 956  948  sysmem_mapping(struct vm *vm, struct mem_map *mm)
 957  949  {
 958  950  
 959  951          if (mm->len != 0 && vm->mem_segs[mm->segid].sysmem)
 960  952                  return (true);
 961  953          else
 962  954                  return (false);
 963  955  }
 964  956  
 965  957  vm_paddr_t
 966  958  vmm_sysmem_maxaddr(struct vm *vm)
 967  959  {
 968  960          struct mem_map *mm;
 969  961          vm_paddr_t maxaddr;
 970  962          int i;
 971  963  
 972  964          maxaddr = 0;
 973  965          for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 974  966                  mm = &vm->mem_maps[i];
 975  967                  if (sysmem_mapping(vm, mm)) {
 976  968                          if (maxaddr < mm->gpa + mm->len)
 977  969                                  maxaddr = mm->gpa + mm->len;
 978  970                  }
 979  971          }
 980  972          return (maxaddr);
 981  973  }
 982  974  
 983  975  static void
 984  976  vm_iommu_modify(struct vm *vm, bool map)
 985  977  {
 986  978          int i, sz;
 987  979          vm_paddr_t gpa, hpa;
 988  980          struct mem_map *mm;
 989  981  #ifdef __FreeBSD__
 990  982          void *vp, *cookie, *host_domain;
 991  983  #else
 992  984          void *vp, *cookie, *host_domain __unused;
 993  985  #endif
 994  986  
 995  987          sz = PAGE_SIZE;
 996  988          host_domain = iommu_host_domain();
 997  989  
 998  990          for (i = 0; i < VM_MAX_MEMMAPS; i++) {
 999  991                  mm = &vm->mem_maps[i];
1000  992                  if (!sysmem_mapping(vm, mm))
1001  993                          continue;
1002  994  
1003  995                  if (map) {
1004  996                          KASSERT((mm->flags & VM_MEMMAP_F_IOMMU) == 0,
1005  997                              ("iommu map found invalid memmap %lx/%lx/%x",
1006  998                              mm->gpa, mm->len, mm->flags));
1007  999                          if ((mm->flags & VM_MEMMAP_F_WIRED) == 0)
1008 1000                                  continue;
1009 1001                          mm->flags |= VM_MEMMAP_F_IOMMU;
1010 1002                  } else {
1011 1003                          if ((mm->flags & VM_MEMMAP_F_IOMMU) == 0)
1012 1004                                  continue;
1013 1005                          mm->flags &= ~VM_MEMMAP_F_IOMMU;
1014 1006                          KASSERT((mm->flags & VM_MEMMAP_F_WIRED) != 0,
1015 1007                              ("iommu unmap found invalid memmap %lx/%lx/%x",
1016 1008                              mm->gpa, mm->len, mm->flags));
1017 1009                  }
1018 1010  
1019 1011                  gpa = mm->gpa;
1020 1012                  while (gpa < mm->gpa + mm->len) {
1021 1013                          vp = vm_gpa_hold(vm, -1, gpa, PAGE_SIZE, VM_PROT_WRITE,
1022 1014                              &cookie);
1023 1015                          KASSERT(vp != NULL, ("vm(%s) could not map gpa %lx",
1024 1016                              vm_name(vm), gpa));
1025 1017  
1026 1018                          vm_gpa_release(cookie);
1027 1019  
1028 1020                          hpa = DMAP_TO_PHYS((uintptr_t)vp);
1029 1021                          if (map) {
1030 1022                                  iommu_create_mapping(vm->iommu, gpa, hpa, sz);
1031 1023  #ifdef __FreeBSD__
1032 1024                                  iommu_remove_mapping(host_domain, hpa, sz);
1033 1025  #endif
1034 1026                          } else {
1035 1027                                  iommu_remove_mapping(vm->iommu, gpa, sz);
1036 1028  #ifdef __FreeBSD__
1037 1029                                  iommu_create_mapping(host_domain, hpa, hpa, sz);
1038 1030  #endif
1039 1031                          }
1040 1032  
1041 1033                          gpa += PAGE_SIZE;
1042 1034                  }
1043 1035          }
1044 1036  
1045 1037          /*
1046 1038           * Invalidate the cached translations associated with the domain
1047 1039           * from which pages were removed.
1048 1040           */
1049 1041  #ifdef __FreeBSD__
1050 1042          if (map)
1051 1043                  iommu_invalidate_tlb(host_domain);
1052 1044          else
1053 1045                  iommu_invalidate_tlb(vm->iommu);
1054 1046  #else
1055 1047          iommu_invalidate_tlb(vm->iommu);
1056 1048  #endif
1057 1049  }
1058 1050  
1059 1051  #define vm_iommu_unmap(vm)      vm_iommu_modify((vm), false)
1060 1052  #define vm_iommu_map(vm)        vm_iommu_modify((vm), true)
1061 1053  
1062 1054  int
1063 1055  vm_unassign_pptdev(struct vm *vm, int pptfd)
1064 1056  {
1065 1057          int error;
1066 1058  
1067 1059          error = ppt_unassign_device(vm, pptfd);
1068 1060          if (error)
1069 1061                  return (error);
1070 1062  
1071 1063          if (ppt_assigned_devices(vm) == 0)
1072 1064                  vm_iommu_unmap(vm);
1073 1065  
1074 1066          return (0);
1075 1067  }
1076 1068  
1077 1069  int
1078 1070  vm_assign_pptdev(struct vm *vm, int pptfd)
1079 1071  {
1080 1072          int error;
1081 1073          vm_paddr_t maxaddr;
1082 1074  
1083 1075          /* Set up the IOMMU to do the 'gpa' to 'hpa' translation */
1084 1076          if (ppt_assigned_devices(vm) == 0) {
1085 1077                  KASSERT(vm->iommu == NULL,
1086 1078                      ("vm_assign_pptdev: iommu must be NULL"));
1087 1079                  maxaddr = vmm_sysmem_maxaddr(vm);
1088 1080                  vm->iommu = iommu_create_domain(maxaddr);
1089 1081                  if (vm->iommu == NULL)
1090 1082                          return (ENXIO);
1091 1083                  vm_iommu_map(vm);
1092 1084          }
1093 1085  
1094 1086          error = ppt_assign_device(vm, pptfd);
1095 1087          return (error);
1096 1088  }
1097 1089  
1098 1090  void *
1099 1091  vm_gpa_hold(struct vm *vm, int vcpuid, vm_paddr_t gpa, size_t len, int reqprot,
1100 1092      void **cookie)
1101 1093  {
1102 1094          int i, count, pageoff;
1103 1095          struct mem_map *mm;
1104 1096          vm_page_t m;
1105 1097  #ifdef INVARIANTS
1106 1098          /*
1107 1099           * All vcpus are frozen by ioctls that modify the memory map
1108 1100           * (e.g. VM_MMAP_MEMSEG). Therefore 'vm->memmap[]' stability is
1109 1101           * guaranteed if at least one vcpu is in the VCPU_FROZEN state.
1110 1102           */
1111 1103          int state;
1112 1104          KASSERT(vcpuid >= -1 && vcpuid < vm->maxcpus, ("%s: invalid vcpuid %d",
1113 1105              __func__, vcpuid));
1114 1106          for (i = 0; i < vm->maxcpus; i++) {
1115 1107                  if (vcpuid != -1 && vcpuid != i)
1116 1108                          continue;
1117 1109                  state = vcpu_get_state(vm, i, NULL);
1118 1110                  KASSERT(state == VCPU_FROZEN, ("%s: invalid vcpu state %d",
1119 1111                      __func__, state));
1120 1112          }
1121 1113  #endif
1122 1114          pageoff = gpa & PAGE_MASK;
1123 1115          if (len > PAGE_SIZE - pageoff)
1124 1116                  panic("vm_gpa_hold: invalid gpa/len: 0x%016lx/%lu", gpa, len);
1125 1117  
1126 1118          count = 0;
1127 1119          for (i = 0; i < VM_MAX_MEMMAPS; i++) {
1128 1120                  mm = &vm->mem_maps[i];
1129 1121                  if (mm->len == 0) {
1130 1122                          continue;
1131 1123                  }
1132 1124                  if (gpa >= mm->gpa && gpa < mm->gpa + mm->len) {
1133 1125                          count = vm_fault_quick_hold_pages(&vm->vmspace->vm_map,
1134 1126                              trunc_page(gpa), PAGE_SIZE, reqprot, &m, 1);
1135 1127                          break;
1136 1128                  }
1137 1129          }
1138 1130  
1139 1131          if (count == 1) {
1140 1132                  *cookie = m;
1141 1133                  return ((void *)(PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m)) + pageoff));
1142 1134          } else {
1143 1135                  *cookie = NULL;
1144 1136                  return (NULL);
1145 1137          }
1146 1138  }
1147 1139  
1148 1140  void
1149 1141  vm_gpa_release(void *cookie)
1150 1142  {
1151 1143          vm_page_t m = cookie;
1152 1144  
1153 1145          vm_page_unwire(m, PQ_ACTIVE);
1154 1146  }
1155 1147  
1156 1148  int
1157 1149  vm_get_register(struct vm *vm, int vcpu, int reg, uint64_t *retval)
1158 1150  {
1159 1151  
1160 1152          if (vcpu < 0 || vcpu >= vm->maxcpus)
1161 1153                  return (EINVAL);
1162 1154  
1163 1155          if (reg >= VM_REG_LAST)
1164 1156                  return (EINVAL);
1165 1157  
1166 1158          return (VMGETREG(vm->cookie, vcpu, reg, retval));
1167 1159  }
1168 1160  
1169 1161  int
1170 1162  vm_set_register(struct vm *vm, int vcpuid, int reg, uint64_t val)
1171 1163  {
1172 1164          struct vcpu *vcpu;
1173 1165          int error;
1174 1166  
1175 1167          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
1176 1168                  return (EINVAL);
1177 1169  
1178 1170          if (reg >= VM_REG_LAST)
1179 1171                  return (EINVAL);
1180 1172  
1181 1173          error = VMSETREG(vm->cookie, vcpuid, reg, val);
1182 1174          if (error || reg != VM_REG_GUEST_RIP)
1183 1175                  return (error);
1184 1176  
1185 1177          /* Set 'nextrip' to match the value of %rip */
1186 1178          VCPU_CTR1(vm, vcpuid, "Setting nextrip to %lx", val);
1187 1179          vcpu = &vm->vcpu[vcpuid];
1188 1180          vcpu->nextrip = val;
1189 1181          return (0);
1190 1182  }
1191 1183  
1192 1184  static bool
1193 1185  is_descriptor_table(int reg)
1194 1186  {
1195 1187          switch (reg) {
1196 1188          case VM_REG_GUEST_IDTR:
1197 1189          case VM_REG_GUEST_GDTR:
1198 1190                  return (true);
1199 1191          default:
1200 1192                  return (false);
1201 1193          }
1202 1194  }
1203 1195  
1204 1196  static bool
1205 1197  is_segment_register(int reg)
1206 1198  {
1207 1199          switch (reg) {
1208 1200          case VM_REG_GUEST_ES:
1209 1201          case VM_REG_GUEST_CS:
1210 1202          case VM_REG_GUEST_SS:
1211 1203          case VM_REG_GUEST_DS:
1212 1204          case VM_REG_GUEST_FS:
1213 1205          case VM_REG_GUEST_GS:
1214 1206          case VM_REG_GUEST_TR:
1215 1207          case VM_REG_GUEST_LDTR:
1216 1208                  return (true);
1217 1209          default:
1218 1210                  return (false);
1219 1211          }
1220 1212  }
1221 1213  
1222 1214  int
1223 1215  vm_get_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc)
1224 1216  {
1225 1217

↓ open down ↓

840 lines elided

↑ open up ↑

1226 1218          if (vcpu < 0 || vcpu >= vm->maxcpus)
1227 1219                  return (EINVAL);
1228 1220  
1229 1221          if (!is_segment_register(reg) && !is_descriptor_table(reg))
1230 1222                  return (EINVAL);
1231 1223  
1232 1224          return (VMGETDESC(vm->cookie, vcpu, reg, desc));
1233 1225  }
1234 1226  
1235 1227  int
1236      -vm_set_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc)
     1228 +vm_set_seg_desc(struct vm *vm, int vcpu, int reg, const struct seg_desc *desc)
1237 1229  {
1238 1230          if (vcpu < 0 || vcpu >= vm->maxcpus)
1239 1231                  return (EINVAL);
1240 1232  
1241 1233          if (!is_segment_register(reg) && !is_descriptor_table(reg))
1242 1234                  return (EINVAL);
1243 1235  
1244 1236          return (VMSETDESC(vm->cookie, vcpu, reg, desc));
1245 1237  }
1246 1238  
     1239 +int
     1240 +vm_get_run_state(struct vm *vm, int vcpuid, uint32_t *state, uint8_t *sipi_vec)
     1241 +{
     1242 +        struct vcpu *vcpu;
     1243 +
     1244 +        if (vcpuid < 0 || vcpuid >= vm->maxcpus) {
     1245 +                return (EINVAL);
     1246 +        }
     1247 +
     1248 +        vcpu = &vm->vcpu[vcpuid];
     1249 +
     1250 +        vcpu_lock(vcpu);
     1251 +        *state = vcpu->run_state;
     1252 +        *sipi_vec = vcpu->sipi_vector;
     1253 +        vcpu_unlock(vcpu);
     1254 +
     1255 +        return (0);
     1256 +}
     1257 +
     1258 +int
     1259 +vm_set_run_state(struct vm *vm, int vcpuid, uint32_t state, uint8_t sipi_vec)
     1260 +{
     1261 +        struct vcpu *vcpu;
     1262 +
     1263 +        if (vcpuid < 0 || vcpuid >= vm->maxcpus) {
     1264 +                return (EINVAL);
     1265 +        }
     1266 +        if (!VRS_IS_VALID(state)) {
     1267 +                return (EINVAL);
     1268 +        }
     1269 +
     1270 +        vcpu = &vm->vcpu[vcpuid];
     1271 +
     1272 +        vcpu_lock(vcpu);
     1273 +        vcpu->run_state = state;
     1274 +        vcpu->sipi_vector = sipi_vec;
     1275 +        vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
     1276 +        vcpu_unlock(vcpu);
     1277 +
     1278 +        return (0);
     1279 +}
     1280 +
     1281 +
1247 1282  static void
1248 1283  restore_guest_fpustate(struct vcpu *vcpu)
1249 1284  {
1250 1285  
1251 1286          /* flush host state to the pcb */
1252 1287          fpuexit(curthread);
1253 1288  
1254 1289          /* restore guest FPU state */
1255 1290          fpu_stop_emulating();
1256 1291          fpurestore(vcpu->guestfpu);

1257 1292  
1258 1293          /* restore guest XCR0 if XSAVE is enabled in the host */
1259 1294          if (rcr4() & CR4_XSAVE)
1260 1295                  load_xcr(0, vcpu->guest_xcr0);
1261 1296  
1262 1297          /*
1263 1298           * The FPU is now "dirty" with the guest's state so turn on emulation
1264 1299           * to trap any access to the FPU by the host.
1265 1300           */
1266 1301          fpu_start_emulating();
1267 1302  }
1268 1303  
1269 1304  static void
1270 1305  save_guest_fpustate(struct vcpu *vcpu)
1271 1306  {
1272 1307  
1273 1308          if ((rcr0() & CR0_TS) == 0)
1274 1309                  panic("fpu emulation not enabled in host!");
1275 1310  
1276 1311          /* save guest XCR0 and restore host XCR0 */
1277 1312          if (rcr4() & CR4_XSAVE) {
1278 1313                  vcpu->guest_xcr0 = rxcr(0);
1279 1314                  load_xcr(0, vmm_get_host_xcr0());
1280 1315          }
1281 1316  
1282 1317          /* save guest FPU state */
1283 1318          fpu_stop_emulating();
1284 1319          fpusave(vcpu->guestfpu);
1285 1320  #ifdef __FreeBSD__
1286 1321          fpu_start_emulating();
1287 1322  #else
1288 1323          /*
1289 1324           * When the host state has been restored, we should not re-enable
1290 1325           * CR0.TS on illumos for eager FPU.
1291 1326           */
1292 1327  #endif
1293 1328  }
1294 1329  
1295 1330  static VMM_STAT(VCPU_IDLE_TICKS, "number of ticks vcpu was idle");
1296 1331  
1297 1332  static int
1298 1333  vcpu_set_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate,
1299 1334      bool from_idle)
1300 1335  {
1301 1336          struct vcpu *vcpu;
1302 1337          int error;
1303 1338  
1304 1339          vcpu = &vm->vcpu[vcpuid];
1305 1340          vcpu_assert_locked(vcpu);
1306 1341  
1307 1342          /*
1308 1343           * State transitions from the vmmdev_ioctl() must always begin from
1309 1344           * the VCPU_IDLE state. This guarantees that there is only a single
1310 1345           * ioctl() operating on a vcpu at any point.
1311 1346           */
1312 1347          if (from_idle) {
1313 1348                  while (vcpu->state != VCPU_IDLE) {
1314 1349                          vcpu->reqidle = 1;
1315 1350                          vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
1316 1351                          VCPU_CTR1(vm, vcpuid, "vcpu state change from %s to "
1317 1352                              "idle requested", vcpu_state2str(vcpu->state));
1318 1353  #ifdef __FreeBSD__
1319 1354                          msleep_spin(&vcpu->state, &vcpu->mtx, "vmstat", hz);
1320 1355  #else
1321 1356                          cv_wait(&vcpu->state_cv, &vcpu->mtx.m);
1322 1357  #endif
1323 1358                  }
1324 1359          } else {
1325 1360                  KASSERT(vcpu->state != VCPU_IDLE, ("invalid transition from "
1326 1361                      "vcpu idle state"));
1327 1362          }
1328 1363  
1329 1364          if (vcpu->state == VCPU_RUNNING) {
1330 1365                  KASSERT(vcpu->hostcpu == curcpu, ("curcpu %d and hostcpu %d "
1331 1366                      "mismatch for running vcpu", curcpu, vcpu->hostcpu));
1332 1367          } else {
1333 1368                  KASSERT(vcpu->hostcpu == NOCPU, ("Invalid hostcpu %d for a "
1334 1369                      "vcpu that is not running", vcpu->hostcpu));
1335 1370          }
1336 1371  
1337 1372          /*
1338 1373           * The following state transitions are allowed:
1339 1374           * IDLE -> FROZEN -> IDLE
1340 1375           * FROZEN -> RUNNING -> FROZEN
1341 1376           * FROZEN -> SLEEPING -> FROZEN
1342 1377           */
1343 1378          switch (vcpu->state) {
1344 1379          case VCPU_IDLE:
1345 1380          case VCPU_RUNNING:
1346 1381          case VCPU_SLEEPING:

↓ open down ↓

90 lines elided

↑ open up ↑

1347 1382                  error = (newstate != VCPU_FROZEN);
1348 1383                  break;
1349 1384          case VCPU_FROZEN:
1350 1385                  error = (newstate == VCPU_FROZEN);
1351 1386                  break;
1352 1387          default:
1353 1388                  error = 1;
1354 1389                  break;
1355 1390          }
1356 1391  
1357      -        if (newstate == VCPU_RUNNING) {
1358      -                while (vcpu->runblock != 0) {
1359      -#ifdef __FreeBSD__
1360      -                        msleep_spin(&vcpu->state, &vcpu->mtx, "vcpublk", 0);
1361      -#else
1362      -                        cv_wait(&vcpu->state_cv, &vcpu->mtx.m);
1363      -#endif
1364      -                }
1365      -        }
1366      -
1367 1392          if (error)
1368 1393                  return (EBUSY);
1369 1394  
1370 1395          VCPU_CTR2(vm, vcpuid, "vcpu state changed from %s to %s",
1371 1396              vcpu_state2str(vcpu->state), vcpu_state2str(newstate));
1372 1397  
1373 1398          vcpu->state = newstate;
1374 1399          if (newstate == VCPU_RUNNING)
1375 1400                  vcpu->hostcpu = curcpu;
1376 1401          else
1377 1402                  vcpu->hostcpu = NOCPU;
1378 1403  
1379      -        if (newstate == VCPU_IDLE ||
1380      -            (newstate == VCPU_FROZEN && vcpu->runblock != 0)) {
     1404 +        if (newstate == VCPU_IDLE) {
1381 1405  #ifdef __FreeBSD__
1382 1406                  wakeup(&vcpu->state);
1383 1407  #else
1384 1408                  cv_broadcast(&vcpu->state_cv);
1385 1409  #endif
1386 1410          }
1387 1411  
1388 1412          return (0);
1389 1413  }
1390 1414

1391 1415  static void
1392 1416  vcpu_require_state(struct vm *vm, int vcpuid, enum vcpu_state newstate)
1393 1417  {
1394 1418          int error;
1395 1419  
1396 1420          if ((error = vcpu_set_state(vm, vcpuid, newstate, false)) != 0)
1397 1421                  panic("Error %d setting state to %d\n", error, newstate);
1398 1422  }
1399 1423  
1400 1424  static void
1401 1425  vcpu_require_state_locked(struct vm *vm, int vcpuid, enum vcpu_state newstate)
1402 1426  {
1403 1427          int error;
1404 1428  
1405 1429          if ((error = vcpu_set_state_locked(vm, vcpuid, newstate, false)) != 0)

↓ open down ↓

15 lines elided

↑ open up ↑

1406 1430                  panic("Error %d setting state to %d", error, newstate);
1407 1431  }
1408 1432  
1409 1433  /*
1410 1434   * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
1411 1435   */
1412 1436  static int
1413 1437  vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled)
1414 1438  {
1415 1439          struct vcpu *vcpu;
1416      -#ifdef __FreeBSD__
1417      -        const char *wmesg;
1418      -#else
1419      -        const char *wmesg __unused;
1420      -#endif
1421 1440          int t, vcpu_halted, vm_halted;
     1441 +        bool userspace_exit = false;
1422 1442  
1423 1443          KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
1424 1444  
1425 1445          vcpu = &vm->vcpu[vcpuid];
1426 1446          vcpu_halted = 0;
1427 1447          vm_halted = 0;
1428 1448  
1429 1449          vcpu_lock(vcpu);
1430 1450          while (1) {
1431 1451                  /*
1432      -                 * Do a final check for pending NMI or interrupts before
1433      -                 * really putting this thread to sleep. Also check for
1434      -                 * software events that would cause this vcpu to wakeup.
1435      -                 *
1436      -                 * These interrupts/events could have happened after the
1437      -                 * vcpu returned from VMRUN() and before it acquired the
1438      -                 * vcpu lock above.
     1452 +                 * Do a final check for pending interrupts (including NMI and
     1453 +                 * INIT) before putting this thread to sleep.
1439 1454                   */
1440      -                if (vm->suspend || vcpu->reqidle)
1441      -                        break;
1442 1455                  if (vm_nmi_pending(vm, vcpuid))
1443 1456                          break;
     1457 +                if (vcpu_run_state_pending(vm, vcpuid))
     1458 +                        break;
1444 1459                  if (!intr_disabled) {
1445 1460                          if (vm_extint_pending(vm, vcpuid) ||
1446 1461                              vlapic_pending_intr(vcpu->vlapic, NULL)) {
1447 1462                                  break;
1448 1463                          }
1449 1464                  }
1450 1465  
1451      -                /* Don't go to sleep if the vcpu thread needs to yield */
1452      -                if (vcpu_should_yield(vm, vcpuid))
     1466 +                /*
     1467 +                 * Also check for software events which would cause a wake-up.
     1468 +                 * This will set the appropriate exitcode directly, rather than
     1469 +                 * requiring a trip through VM_RUN().
     1470 +                 */
     1471 +                if (vcpu_sleep_bailout_checks(vm, vcpuid)) {
     1472 +                        userspace_exit = true;
1453 1473                          break;
     1474 +                }
1454 1475  
1455      -                if (vcpu_debugged(vm, vcpuid))
1456      -                        break;
1457      -
1458 1476                  /*
1459 1477                   * Some Linux guests implement "halt" by having all vcpus
1460 1478                   * execute HLT with interrupts disabled. 'halted_cpus' keeps
1461 1479                   * track of the vcpus that have entered this state. When all
1462 1480                   * vcpus enter the halted state the virtual machine is halted.
1463 1481                   */
1464 1482                  if (intr_disabled) {
1465      -                        wmesg = "vmhalt";
1466      -                        VCPU_CTR0(vm, vcpuid, "Halted");
1467 1483                          if (!vcpu_halted && halt_detection_enabled) {
1468 1484                                  vcpu_halted = 1;
1469 1485                                  CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus);
1470 1486                          }
1471 1487                          if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) {
1472 1488                                  vm_halted = 1;
1473 1489                                  break;
1474 1490                          }
1475      -                } else {
1476      -                        wmesg = "vmidle";
1477 1491                  }
1478 1492  
1479 1493                  t = ticks;
1480 1494                  vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
1481      -#ifdef __FreeBSD__
1482      -                /*
1483      -                 * XXX msleep_spin() cannot be interrupted by signals so
1484      -                 * wake up periodically to check pending signals.
1485      -                 */
1486      -                msleep_spin(vcpu, &vcpu->mtx, wmesg, hz);
1487      -#else
1488      -                /*
1489      -                 * Fortunately, cv_wait_sig can be interrupted by signals, so
1490      -                 * there is no need to periodically wake up.
1491      -                 */
1492 1495                  (void) cv_wait_sig(&vcpu->vcpu_cv, &vcpu->mtx.m);
1493      -#endif
1494 1496                  vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
1495 1497                  vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
1496 1498          }
1497 1499  
1498 1500          if (vcpu_halted)
1499 1501                  CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus);
1500 1502  
1501 1503          vcpu_unlock(vcpu);
1502 1504  
1503 1505          if (vm_halted)
1504 1506                  vm_suspend(vm, VM_SUSPEND_HALT);
1505 1507  
1506      -        return (0);
     1508 +        return (userspace_exit ? -1 : 0);
1507 1509  }
1508 1510  
1509 1511  static int
1510 1512  vm_handle_paging(struct vm *vm, int vcpuid)
1511 1513  {
1512 1514          int rv, ftype;
1513 1515          struct vm_map *map;
1514 1516          struct vcpu *vcpu;
1515 1517          struct vm_exit *vme;
1516 1518

1517 1519          vcpu = &vm->vcpu[vcpuid];
1518 1520          vme = &vcpu->exitinfo;
1519 1521  
1520 1522          KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d",
1521 1523              __func__, vme->inst_length));
1522 1524  
1523 1525          ftype = vme->u.paging.fault_type;
1524 1526          KASSERT(ftype == VM_PROT_READ ||
1525 1527              ftype == VM_PROT_WRITE || ftype == VM_PROT_EXECUTE,
1526 1528              ("vm_handle_paging: invalid fault_type %d", ftype));
1527 1529  
1528 1530          if (ftype == VM_PROT_READ || ftype == VM_PROT_WRITE) {
1529 1531                  rv = pmap_emulate_accessed_dirty(vmspace_pmap(vm->vmspace),
1530 1532                      vme->u.paging.gpa, ftype);
1531 1533                  if (rv == 0) {
1532 1534                          VCPU_CTR2(vm, vcpuid, "%s bit emulation for gpa %lx",
1533 1535                              ftype == VM_PROT_READ ? "accessed" : "dirty",
1534 1536                              vme->u.paging.gpa);
1535 1537                          goto done;
1536 1538                  }
1537 1539          }
1538 1540  
1539 1541          map = &vm->vmspace->vm_map;
1540 1542          rv = vm_fault(map, vme->u.paging.gpa, ftype, VM_FAULT_NORMAL);
1541 1543  
1542 1544          VCPU_CTR3(vm, vcpuid, "vm_handle_paging rv = %d, gpa = %lx, "
1543 1545              "ftype = %d", rv, vme->u.paging.gpa, ftype);
1544 1546  
1545 1547          if (rv != KERN_SUCCESS)
1546 1548                  return (EFAULT);
1547 1549  done:
1548 1550          return (0);
1549 1551  }
1550 1552  
1551 1553  int
1552 1554  vm_service_mmio_read(struct vm *vm, int cpuid, uint64_t gpa, uint64_t *rval,
1553 1555      int rsize)
1554 1556  {
1555 1557          int err = ESRCH;
1556 1558  
1557 1559          if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
1558 1560                  err = lapic_mmio_read(vm, cpuid, gpa, rval, rsize);
1559 1561          } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
1560 1562                  err = vioapic_mmio_read(vm, cpuid, gpa, rval, rsize);
1561 1563          } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
1562 1564                  err = vhpet_mmio_read(vm, cpuid, gpa, rval, rsize);
1563 1565          }
1564 1566  
1565 1567          return (err);
1566 1568  }
1567 1569  
1568 1570  int
1569 1571  vm_service_mmio_write(struct vm *vm, int cpuid, uint64_t gpa, uint64_t wval,
1570 1572      int wsize)
1571 1573  {
1572 1574          int err = ESRCH;
1573 1575  
1574 1576          if (gpa >= DEFAULT_APIC_BASE && gpa < DEFAULT_APIC_BASE + PAGE_SIZE) {
1575 1577                  err = lapic_mmio_write(vm, cpuid, gpa, wval, wsize);
1576 1578          } else if (gpa >= VIOAPIC_BASE && gpa < VIOAPIC_BASE + VIOAPIC_SIZE) {
1577 1579                  err = vioapic_mmio_write(vm, cpuid, gpa, wval, wsize);
1578 1580          } else if (gpa >= VHPET_BASE && gpa < VHPET_BASE + VHPET_SIZE) {
1579 1581                  err = vhpet_mmio_write(vm, cpuid, gpa, wval, wsize);
1580 1582          }
1581 1583  
1582 1584          return (err);
1583 1585  }
1584 1586  
1585 1587  static int
1586 1588  vm_handle_mmio_emul(struct vm *vm, int vcpuid)
1587 1589  {
1588 1590          struct vie *vie;
1589 1591          struct vcpu *vcpu;
1590 1592          struct vm_exit *vme;
1591 1593          uint64_t inst_addr;
1592 1594          int error, fault, cs_d;
1593 1595  
1594 1596          vcpu = &vm->vcpu[vcpuid];
1595 1597          vme = &vcpu->exitinfo;
1596 1598          vie = vcpu->vie_ctx;
1597 1599  
1598 1600          KASSERT(vme->inst_length == 0, ("%s: invalid inst_length %d",
1599 1601              __func__, vme->inst_length));
1600 1602  
1601 1603          inst_addr = vme->rip + vme->u.mmio_emul.cs_base;
1602 1604          cs_d = vme->u.mmio_emul.cs_d;
1603 1605  
1604 1606          VCPU_CTR1(vm, vcpuid, "inst_emul fault accessing gpa %lx",
1605 1607              vme->u.mmio_emul.gpa);
1606 1608  
1607 1609          /* Fetch the faulting instruction */
1608 1610          if (vie_needs_fetch(vie)) {
1609 1611                  error = vie_fetch_instruction(vie, vm, vcpuid, inst_addr,
1610 1612                      &fault);
1611 1613                  if (error != 0) {
1612 1614                          return (error);
1613 1615                  } else if (fault) {
1614 1616                          /*
1615 1617                           * If a fault during instruction fetch was encounted, it
1616 1618                           * will have asserted that the appropriate exception be
1617 1619                           * injected at next entry.  No further work is required.
1618 1620                           */
1619 1621                          return (0);
1620 1622                  }
1621 1623          }
1622 1624  
1623 1625          if (vie_decode_instruction(vie, vm, vcpuid, cs_d) != 0) {
1624 1626                  VCPU_CTR1(vm, vcpuid, "Error decoding instruction at %lx",
1625 1627                      inst_addr);
1626 1628                  /* Dump (unrecognized) instruction bytes in userspace */
1627 1629                  vie_fallback_exitinfo(vie, vme);
1628 1630                  return (-1);
1629 1631          }
1630 1632          if (vme->u.mmio_emul.gla != VIE_INVALID_GLA &&
1631 1633              vie_verify_gla(vie, vm, vcpuid, vme->u.mmio_emul.gla) != 0) {
1632 1634                  /* Decoded GLA does not match GLA from VM exit state */
1633 1635                  vie_fallback_exitinfo(vie, vme);
1634 1636                  return (-1);
1635 1637          }
1636 1638  
1637 1639  repeat:
1638 1640          error = vie_emulate_mmio(vie, vm, vcpuid);
1639 1641          if (error < 0) {
1640 1642                  /*
1641 1643                   * MMIO not handled by any of the in-kernel-emulated devices, so
1642 1644                   * make a trip out to userspace for it.
1643 1645                   */
1644 1646                  vie_exitinfo(vie, vme);
1645 1647          } else if (error == EAGAIN) {
1646 1648                  /*
1647 1649                   * Continue emulating the rep-prefixed instruction, which has
1648 1650                   * not completed its iterations.
1649 1651                   *
1650 1652                   * In case this can be emulated in-kernel and has a high
1651 1653                   * repetition count (causing a tight spin), it should be
1652 1654                   * deferential to yield conditions.
1653 1655                   */
1654 1656                  if (!vcpu_should_yield(vm, vcpuid)) {
1655 1657                          goto repeat;
1656 1658                  } else {
1657 1659                          /*
1658 1660                           * Defer to the contending load by making a trip to
1659 1661                           * userspace with a no-op (BOGUS) exit reason.
1660 1662                           */
1661 1663                          vie_reset(vie);
1662 1664                          vme->exitcode = VM_EXITCODE_BOGUS;
1663 1665                          return (-1);
1664 1666                  }
1665 1667          } else if (error == 0) {
1666 1668                  /* Update %rip now that instruction has been emulated */
1667 1669                  vie_advance_pc(vie, &vcpu->nextrip);
1668 1670          }
1669 1671          return (error);
1670 1672  }
1671 1673  
1672 1674  static int
1673 1675  vm_handle_inout(struct vm *vm, int vcpuid, struct vm_exit *vme)
1674 1676  {
1675 1677          struct vcpu *vcpu;
1676 1678          struct vie *vie;
1677 1679          int err;
1678 1680  
1679 1681          vcpu = &vm->vcpu[vcpuid];
1680 1682          vie = vcpu->vie_ctx;
1681 1683  
1682 1684  repeat:
1683 1685          err = vie_emulate_inout(vie, vm, vcpuid);
1684 1686  
1685 1687          if (err < 0) {
1686 1688                  /*
1687 1689                   * In/out not handled by any of the in-kernel-emulated devices,
1688 1690                   * so make a trip out to userspace for it.
1689 1691                   */
1690 1692                  vie_exitinfo(vie, vme);
1691 1693                  return (err);
1692 1694          } else if (err == EAGAIN) {
1693 1695                  /*
1694 1696                   * Continue emulating the rep-prefixed ins/outs, which has not
1695 1697                   * completed its iterations.
1696 1698                   *
1697 1699                   * In case this can be emulated in-kernel and has a high
1698 1700                   * repetition count (causing a tight spin), it should be
1699 1701                   * deferential to yield conditions.
1700 1702                   */
1701 1703                  if (!vcpu_should_yield(vm, vcpuid)) {
1702 1704                          goto repeat;
1703 1705                  } else {
1704 1706                          /*
1705 1707                           * Defer to the contending load by making a trip to
1706 1708                           * userspace with a no-op (BOGUS) exit reason.
1707 1709                           */
1708 1710                          vie_reset(vie);
1709 1711                          vme->exitcode = VM_EXITCODE_BOGUS;
1710 1712                          return (-1);
1711 1713                  }
1712 1714          } else if (err != 0) {
1713 1715                  /* Emulation failure.  Bail all the way out to userspace. */
1714 1716                  vme->exitcode = VM_EXITCODE_INST_EMUL;
1715 1717                  bzero(&vme->u.inst_emul, sizeof (vme->u.inst_emul));
1716 1718                  return (-1);
1717 1719          }
1718 1720  
1719 1721          vie_advance_pc(vie, &vcpu->nextrip);
1720 1722          return (0);
1721 1723  }
1722 1724  
1723 1725  static int
1724 1726  vm_handle_suspend(struct vm *vm, int vcpuid)
1725 1727  {
1726 1728  #ifdef __FreeBSD__
1727 1729          int error, i;
1728 1730          struct vcpu *vcpu;
1729 1731          struct thread *td;
1730 1732  
1731 1733          error = 0;
1732 1734          vcpu = &vm->vcpu[vcpuid];
1733 1735          td = curthread;
1734 1736  #else
1735 1737          int i;
1736 1738          struct vcpu *vcpu;
1737 1739  
1738 1740          vcpu = &vm->vcpu[vcpuid];
1739 1741  #endif
1740 1742  
1741 1743          CPU_SET_ATOMIC(vcpuid, &vm->suspended_cpus);
1742 1744  
1743 1745  #ifdef __FreeBSD__
1744 1746          /*
1745 1747           * Wait until all 'active_cpus' have suspended themselves.
1746 1748           *
1747 1749           * Since a VM may be suspended at any time including when one or
1748 1750           * more vcpus are doing a rendezvous we need to call the rendezvous
1749 1751           * handler while we are waiting to prevent a deadlock.
1750 1752           */
1751 1753          vcpu_lock(vcpu);
1752 1754          while (error == 0) {
1753 1755                  if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
1754 1756                          VCPU_CTR0(vm, vcpuid, "All vcpus suspended");
1755 1757                          break;
1756 1758                  }
1757 1759  
1758 1760                  if (vm->rendezvous_func == NULL) {
1759 1761                          VCPU_CTR0(vm, vcpuid, "Sleeping during suspend");
1760 1762                          vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
1761 1763                          msleep_spin(vcpu, &vcpu->mtx, "vmsusp", hz);
1762 1764                          vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
1763 1765                          if ((td->td_flags & TDF_NEEDSUSPCHK) != 0) {
1764 1766                                  vcpu_unlock(vcpu);
1765 1767                                  error = thread_check_susp(td, false);
1766 1768                                  vcpu_lock(vcpu);
1767 1769                          }
1768 1770                  } else {
1769 1771                          VCPU_CTR0(vm, vcpuid, "Rendezvous during suspend");
1770 1772                          vcpu_unlock(vcpu);
1771 1773                          error = vm_handle_rendezvous(vm, vcpuid);
1772 1774                          vcpu_lock(vcpu);
1773 1775                  }
1774 1776          }
1775 1777          vcpu_unlock(vcpu);
1776 1778  #else
1777 1779          vcpu_lock(vcpu);
1778 1780          while (1) {
1779 1781                  int rc;
1780 1782  
1781 1783                  if (CPU_CMP(&vm->suspended_cpus, &vm->active_cpus) == 0) {
1782 1784                          VCPU_CTR0(vm, vcpuid, "All vcpus suspended");
1783 1785                          break;
1784 1786                  }
1785 1787  
1786 1788                  vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
1787 1789                  rc = cv_reltimedwait_sig(&vcpu->vcpu_cv, &vcpu->mtx.m, hz,
1788 1790                      TR_CLOCK_TICK);
1789 1791                  vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
1790 1792  
1791 1793                  /*
1792 1794                   * If the userspace process driving the instance is killed, any
1793 1795                   * vCPUs yet to be marked suspended (because they are not
1794 1796                   * VM_RUN-ing in the kernel presently) will never reach that
1795 1797                   * state.
1796 1798                   *
1797 1799                   * To avoid vm_handle_suspend() getting stuck in the kernel
1798 1800                   * waiting for those vCPUs, offer a bail-out even though it
1799 1801                   * means returning without all vCPUs in a suspended state.
1800 1802                   */
1801 1803                  if (rc <= 0) {
1802 1804                          if ((curproc->p_flag & SEXITING) != 0) {
1803 1805                                  break;
1804 1806                          }
1805 1807                  }
1806 1808          }
1807 1809          vcpu_unlock(vcpu);
1808 1810  
1809 1811  #endif
1810 1812  
1811 1813          /*
1812 1814           * Wakeup the other sleeping vcpus and return to userspace.
1813 1815           */
1814 1816          for (i = 0; i < vm->maxcpus; i++) {
1815 1817                  if (CPU_ISSET(i, &vm->suspended_cpus)) {
1816 1818                          vcpu_notify_event(vm, i);
1817 1819                  }
1818 1820          }
1819 1821  
1820 1822          return (-1);
1821 1823  }
1822 1824  
1823 1825  static int
1824 1826  vm_handle_reqidle(struct vm *vm, int vcpuid)

↓ open down ↓

308 lines elided

↑ open up ↑

1825 1827  {
1826 1828          struct vcpu *vcpu = &vm->vcpu[vcpuid];
1827 1829  
1828 1830          vcpu_lock(vcpu);
1829 1831          KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle));
1830 1832          vcpu->reqidle = 0;
1831 1833          vcpu_unlock(vcpu);
1832 1834          return (-1);
1833 1835  }
1834 1836  
     1837 +static int
     1838 +vm_handle_run_state(struct vm *vm, int vcpuid)
     1839 +{
     1840 +        struct vcpu *vcpu = &vm->vcpu[vcpuid];
     1841 +        bool handled = false;
     1842 +
     1843 +        vcpu_lock(vcpu);
     1844 +        while (1) {
     1845 +                if ((vcpu->run_state & VRS_PEND_INIT) != 0) {
     1846 +                        vcpu_unlock(vcpu);
     1847 +                        VERIFY0(vcpu_arch_reset(vm, vcpuid, true));
     1848 +                        vcpu_lock(vcpu);
     1849 +
     1850 +                        vcpu->run_state &= ~(VRS_RUN | VRS_PEND_INIT);
     1851 +                        vcpu->run_state |= VRS_INIT;
     1852 +                }
     1853 +
     1854 +                if ((vcpu->run_state & (VRS_INIT | VRS_RUN | VRS_PEND_SIPI)) ==
     1855 +                    (VRS_INIT | VRS_PEND_SIPI)) {
     1856 +                        const uint8_t vector = vcpu->sipi_vector;
     1857 +
     1858 +                        vcpu_unlock(vcpu);
     1859 +                        VERIFY0(vcpu_vector_sipi(vm, vcpuid, vector));
     1860 +                        vcpu_lock(vcpu);
     1861 +
     1862 +                        vcpu->run_state &= ~VRS_PEND_SIPI;
     1863 +                        vcpu->run_state |= VRS_RUN;
     1864 +                }
     1865 +
     1866 +                /*
     1867 +                 * If the vCPU is now in the running state, there is no need to
     1868 +                 * wait for anything prior to re-entry.
     1869 +                 */
     1870 +                if ((vcpu->run_state & VRS_RUN) != 0) {
     1871 +                        handled = true;
     1872 +                        break;
     1873 +                }
     1874 +
     1875 +                /*
     1876 +                 * Also check for software events which would cause a wake-up.
     1877 +                 * This will set the appropriate exitcode directly, rather than
     1878 +                 * requiring a trip through VM_RUN().
     1879 +                 */
     1880 +                if (vcpu_sleep_bailout_checks(vm, vcpuid)) {
     1881 +                        break;
     1882 +                }
     1883 +
     1884 +                vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
     1885 +                (void) cv_wait_sig(&vcpu->vcpu_cv, &vcpu->mtx.m);
     1886 +                vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
     1887 +        }
     1888 +        vcpu_unlock(vcpu);
     1889 +
     1890 +        return (handled ? 0 : -1);
     1891 +}
     1892 +
1835 1893  #ifndef __FreeBSD__
1836 1894  static int
1837 1895  vm_handle_wrmsr(struct vm *vm, int vcpuid, struct vm_exit *vme)
1838 1896  {
1839 1897          struct vcpu *cpu = &vm->vcpu[vcpuid];
1840 1898          const uint32_t code = vme->u.msr.code;
1841 1899          const uint64_t val = vme->u.msr.wval;
1842 1900  
1843 1901          switch (code) {
1844 1902          case MSR_TSC:
1845 1903                  cpu->tsc_offset = val - rdtsc();
1846 1904                  return (0);
1847 1905          }
1848 1906  
1849 1907          return (-1);
1850 1908  }
1851 1909  #endif /* __FreeBSD__ */
1852 1910  
1853      -void
1854      -vm_req_spinup_ap(struct vm *vm, int req_vcpuid, uint64_t req_rip)
1855      -{
1856      -        if (vm->sipi_req) {
1857      -                /* This should never occur if userspace is doing its job. */
1858      -                vm->stats.sipi_supersede++;
1859      -        }
1860      -        vm->sipi_req = true;
1861      -        vm->sipi_req_vcpu = req_vcpuid;
1862      -        vm->sipi_req_rip = req_rip;
1863      -}
1864      -
1865 1911  int
1866 1912  vm_suspend(struct vm *vm, enum vm_suspend_how how)
1867 1913  {
1868 1914          int i;
1869 1915  
1870 1916          if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
1871 1917                  return (EINVAL);
1872 1918  
1873 1919          if (atomic_cmpset_int((uint_t *)&vm->suspend, 0, how) == 0) {
1874 1920                  VM_CTR2(vm, "virtual machine already suspended %d/%d",

1875 1921                      vm->suspend, how);
1876 1922                  return (EALREADY);
1877 1923          }
1878 1924  
1879 1925          VM_CTR1(vm, "virtual machine successfully suspended %d", how);
1880 1926  
1881 1927          /*
1882 1928           * Notify all active vcpus that they are now suspended.

↓ open down ↓

8 lines elided

↑ open up ↑

1883 1929           */
1884 1930          for (i = 0; i < vm->maxcpus; i++) {
1885 1931                  if (CPU_ISSET(i, &vm->active_cpus))
1886 1932                          vcpu_notify_event(vm, i);
1887 1933          }
1888 1934  
1889 1935          return (0);
1890 1936  }
1891 1937  
1892 1938  void
1893      -vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip)
     1939 +vm_exit_run_state(struct vm *vm, int vcpuid, uint64_t rip)
1894 1940  {
1895 1941          struct vm_exit *vmexit;
1896 1942  
1897      -        KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
1898      -            ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
1899      -
1900 1943          vmexit = vm_exitinfo(vm, vcpuid);
1901 1944          vmexit->rip = rip;
1902 1945          vmexit->inst_length = 0;
1903      -        vmexit->exitcode = VM_EXITCODE_SUSPENDED;
1904      -        vmexit->u.suspended.how = vm->suspend;
     1946 +        vmexit->exitcode = VM_EXITCODE_RUN_STATE;
     1947 +        vmm_stat_incr(vm, vcpuid, VMEXIT_RUN_STATE, 1);
1905 1948  }
1906 1949  
1907      -void
1908      -vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip)
1909      -{
1910      -        struct vm_exit *vmexit;
1911 1950  
1912      -        vmexit = vm_exitinfo(vm, vcpuid);
1913      -        vmexit->rip = rip;
1914      -        vmexit->inst_length = 0;
1915      -        vmexit->exitcode = VM_EXITCODE_DEBUG;
1916      -}
1917      -
1918      -void
1919      -vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip)
1920      -{
1921      -        struct vm_exit *vmexit;
1922      -
1923      -        vmexit = vm_exitinfo(vm, vcpuid);
1924      -        vmexit->rip = rip;
1925      -        vmexit->inst_length = 0;
1926      -        vmexit->exitcode = VM_EXITCODE_RUNBLOCK;
1927      -        vmm_stat_incr(vm, vcpuid, VMEXIT_RUNBLOCK, 1);
1928      -}
1929      -
1930      -void
1931      -vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip)
1932      -{
1933      -        struct vm_exit *vmexit;
1934      -
1935      -        vmexit = vm_exitinfo(vm, vcpuid);
1936      -        vmexit->rip = rip;
1937      -        vmexit->inst_length = 0;
1938      -        vmexit->exitcode = VM_EXITCODE_REQIDLE;
1939      -        vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1);
1940      -}
1941      -
1942      -void
1943      -vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip)
1944      -{
1945      -        struct vm_exit *vmexit;
1946      -
1947      -        vmexit = vm_exitinfo(vm, vcpuid);
1948      -        vmexit->rip = rip;
1949      -        vmexit->inst_length = 0;
1950      -        vmexit->exitcode = VM_EXITCODE_BOGUS;
1951      -        vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1);
1952      -}
1953      -
1954 1951  #ifndef __FreeBSD__
1955 1952  /*
1956 1953   * Some vmm resources, such as the lapic, may have CPU-specific resources
1957 1954   * allocated to them which would benefit from migration onto the host CPU which
1958 1955   * is processing the vcpu state.
1959 1956   */
1960 1957  static void
1961 1958  vm_localize_resources(struct vm *vm, struct vcpu *vcpu)
1962 1959  {
1963 1960          /*

1964 1961           * Localizing cyclic resources requires acquisition of cpu_lock, and
1965 1962           * doing so with kpreempt disabled is a recipe for deadlock disaster.
1966 1963           */
1967 1964          VERIFY(curthread->t_preempt == 0);
1968 1965  
1969 1966          /*
1970 1967           * Do not bother with localization if this vCPU is about to return to
1971 1968           * the host CPU it was last localized to.
1972 1969           */
1973 1970          if (vcpu->lastloccpu == curcpu)
1974 1971                  return;
1975 1972  
1976 1973          /*
1977 1974           * Localize system-wide resources to the primary boot vCPU.  While any
1978 1975           * of the other vCPUs may access them, it keeps the potential interrupt
1979 1976           * footprint constrained to CPUs involved with this instance.
1980 1977           */
1981 1978          if (vcpu == &vm->vcpu[0]) {
1982 1979                  vhpet_localize_resources(vm->vhpet);
1983 1980                  vrtc_localize_resources(vm->vrtc);
1984 1981                  vatpit_localize_resources(vm->vatpit);
1985 1982          }
1986 1983  
1987 1984          vlapic_localize_resources(vcpu->vlapic);
1988 1985  
1989 1986          vcpu->lastloccpu = curcpu;
1990 1987  }
1991 1988  
1992 1989  static void
1993 1990  vmm_savectx(void *arg)
1994 1991  {
1995 1992          vm_thread_ctx_t *vtc = arg;
1996 1993          struct vm *vm = vtc->vtc_vm;
1997 1994          const int vcpuid = vtc->vtc_vcpuid;
1998 1995  
1999 1996          if (ops->vmsavectx != NULL) {
2000 1997                  ops->vmsavectx(vm->cookie, vcpuid);
2001 1998          }
2002 1999  
2003 2000          /*
2004 2001           * If the CPU holds the restored guest FPU state, save it and restore
2005 2002           * the host FPU state before this thread goes off-cpu.
2006 2003           */
2007 2004          if ((vtc->vtc_status & VTCS_FPU_RESTORED) != 0) {
2008 2005                  struct vcpu *vcpu = &vm->vcpu[vcpuid];
2009 2006  
2010 2007                  save_guest_fpustate(vcpu);
2011 2008                  vtc->vtc_status &= ~VTCS_FPU_RESTORED;
2012 2009          }
2013 2010  }
2014 2011  
2015 2012  static void
2016 2013  vmm_restorectx(void *arg)
2017 2014  {
2018 2015          vm_thread_ctx_t *vtc = arg;
2019 2016          struct vm *vm = vtc->vtc_vm;
2020 2017          const int vcpuid = vtc->vtc_vcpuid;
2021 2018  
2022 2019          /*
2023 2020           * When coming back on-cpu, only restore the guest FPU status if the
2024 2021           * thread is in a context marked as requiring it.  This should be rare,
2025 2022           * occurring only when a future logic error results in a voluntary
2026 2023           * sleep during the VMRUN critical section.
2027 2024           *
2028 2025           * The common case will result in elision of the guest FPU state
2029 2026           * restoration, deferring that action until it is clearly necessary
2030 2027           * during vm_run.
2031 2028           */
2032 2029          VERIFY((vtc->vtc_status & VTCS_FPU_RESTORED) == 0);
2033 2030          if ((vtc->vtc_status & VTCS_FPU_CTX_CRITICAL) != 0) {
2034 2031                  struct vcpu *vcpu = &vm->vcpu[vcpuid];
2035 2032  
2036 2033                  restore_guest_fpustate(vcpu);
2037 2034                  vtc->vtc_status |= VTCS_FPU_RESTORED;
2038 2035          }
2039 2036  
2040 2037          if (ops->vmrestorectx != NULL) {
2041 2038                  ops->vmrestorectx(vm->cookie, vcpuid);
2042 2039          }
2043 2040  
2044 2041  }
2045 2042  
2046 2043  /*
2047 2044   * If we're in removectx(), we might still have state to tidy up.
2048 2045   */
2049 2046  static void
2050 2047  vmm_freectx(void *arg, int isexec)
2051 2048  {
2052 2049          vmm_savectx(arg);
2053 2050  }
2054 2051  
2055 2052  #endif /* __FreeBSD */
2056 2053  
2057 2054  static int
2058 2055  vm_entry_actions(struct vm *vm, int vcpuid, const struct vm_entry *entry,
2059 2056      struct vm_exit *vme)
2060 2057  {
2061 2058          struct vcpu *vcpu;
2062 2059          struct vie *vie;
2063 2060          int err;
2064 2061

↓ open down ↓

101 lines elided

↑ open up ↑

2065 2062          vcpu = &vm->vcpu[vcpuid];
2066 2063          vie = vcpu->vie_ctx;
2067 2064          err = 0;
2068 2065  
2069 2066          switch (entry->cmd) {
2070 2067          case VEC_DEFAULT:
2071 2068                  return (0);
2072 2069          case VEC_DISCARD_INSTR:
2073 2070                  vie_reset(vie);
2074 2071                  return (0);
2075      -        case VEC_COMPLETE_MMIO:
     2072 +        case VEC_FULFILL_MMIO:
2076 2073                  err = vie_fulfill_mmio(vie, &entry->u.mmio);
2077 2074                  if (err == 0) {
2078 2075                          err = vie_emulate_mmio(vie, vm, vcpuid);
2079 2076                          if (err == 0) {
2080 2077                                  vie_advance_pc(vie, &vcpu->nextrip);
2081 2078                          } else if (err < 0) {
2082 2079                                  vie_exitinfo(vie, vme);
2083 2080                          } else if (err == EAGAIN) {
2084 2081                                  /*
2085 2082                                   * Clear the instruction emulation state in
2086 2083                                   * order to re-enter VM context and continue
2087 2084                                   * this 'rep <instruction>'
2088 2085                                   */
2089 2086                                  vie_reset(vie);
2090 2087                                  err = 0;
2091 2088                          }
2092 2089                  }
2093 2090                  break;
2094      -        case VEC_COMPLETE_INOUT:
     2091 +        case VEC_FULFILL_INOUT:
2095 2092                  err = vie_fulfill_inout(vie, &entry->u.inout);
2096 2093                  if (err == 0) {
2097 2094                          err = vie_emulate_inout(vie, vm, vcpuid);
2098 2095                          if (err == 0) {
2099 2096                                  vie_advance_pc(vie, &vcpu->nextrip);
2100 2097                          } else if (err < 0) {
2101 2098                                  vie_exitinfo(vie, vme);
2102 2099                          } else if (err == EAGAIN) {
2103 2100                                  /*
2104 2101                                   * Clear the instruction emulation state in

2105 2102                                   * order to re-enter VM context and continue
2106 2103                                   * this 'rep ins/outs'
2107 2104                                   */
2108 2105                                  vie_reset(vie);
2109 2106                                  err = 0;
2110 2107                          }
2111 2108                  }
2112 2109                  break;
2113 2110          default:
2114 2111                  return (EINVAL);
2115 2112          }
2116 2113          return (err);
2117 2114  }
2118 2115  
2119 2116  static int
2120 2117  vm_loop_checks(struct vm *vm, int vcpuid, struct vm_exit *vme)
2121 2118  {
2122 2119          struct vie *vie;
2123 2120  
2124 2121          vie = vm->vcpu[vcpuid].vie_ctx;

↓ open down ↓

20 lines elided

↑ open up ↑

2125 2122  
2126 2123          if (vie_pending(vie)) {
2127 2124                  /*
2128 2125                   * Userspace has not fulfilled the pending needs of the
2129 2126                   * instruction emulation, so bail back out.
2130 2127                   */
2131 2128                  vie_exitinfo(vie, vme);
2132 2129                  return (-1);
2133 2130          }
2134 2131  
2135      -        if (vcpuid == 0 && vm->sipi_req) {
2136      -                /* The boot vCPU has sent a SIPI to one of the other CPUs */
2137      -                vme->exitcode = VM_EXITCODE_SPINUP_AP;
2138      -                vme->u.spinup_ap.vcpu = vm->sipi_req_vcpu;
2139      -                vme->u.spinup_ap.rip = vm->sipi_req_rip;
2140      -
2141      -                vm->sipi_req = false;
2142      -                vm->sipi_req_vcpu = 0;
2143      -                vm->sipi_req_rip = 0;
2144      -                return (-1);
2145      -        }
2146      -
2147 2132          return (0);
2148 2133  }
2149 2134  
2150 2135  int
2151 2136  vm_run(struct vm *vm, int vcpuid, const struct vm_entry *entry)
2152 2137  {
2153      -        struct vm_eventinfo evinfo;
2154 2138          int error;
2155 2139          struct vcpu *vcpu;
2156 2140  #ifdef  __FreeBSD__
2157 2141          struct pcb *pcb;
2158 2142  #endif
2159 2143          uint64_t tscval;
2160 2144          struct vm_exit *vme;
2161 2145          bool intr_disabled;
2162 2146          pmap_t pmap;
2163 2147  #ifndef __FreeBSD__

2164 2148          vm_thread_ctx_t vtc;
2165 2149          int affinity_type = CPU_CURRENT;
2166 2150  #endif
2167 2151  
2168 2152          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2169 2153                  return (EINVAL);

↓ open down ↓

6 lines elided

↑ open up ↑

2170 2154  
2171 2155          if (!CPU_ISSET(vcpuid, &vm->active_cpus))
2172 2156                  return (EINVAL);
2173 2157  
2174 2158          if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
2175 2159                  return (EINVAL);
2176 2160  
2177 2161          pmap = vmspace_pmap(vm->vmspace);
2178 2162          vcpu = &vm->vcpu[vcpuid];
2179 2163          vme = &vcpu->exitinfo;
2180      -        evinfo.rptr = &vcpu->runblock;
2181      -        evinfo.sptr = &vm->suspend;
2182      -        evinfo.iptr = &vcpu->reqidle;
2183 2164  
2184 2165  #ifndef __FreeBSD__
2185 2166          vtc.vtc_vm = vm;
2186 2167          vtc.vtc_vcpuid = vcpuid;
2187 2168          vtc.vtc_status = 0;
2188 2169  
2189 2170          installctx(curthread, &vtc, vmm_savectx, vmm_restorectx, NULL, NULL,
2190 2171              NULL, vmm_freectx);
2191 2172  #endif
2192 2173

2193 2174          error = vm_entry_actions(vm, vcpuid, entry, vme);
2194 2175          if (error != 0) {
2195 2176                  goto exit;
2196 2177          }
2197 2178  
2198 2179  restart:
2199 2180          error = vm_loop_checks(vm, vcpuid, vme);
2200 2181          if (error != 0) {
2201 2182                  goto exit;
2202 2183          }
2203 2184  
2204 2185  #ifndef __FreeBSD__
2205 2186          thread_affinity_set(curthread, affinity_type);
2206 2187          /*
2207 2188           * Resource localization should happen after the CPU affinity for the
2208 2189           * thread has been set to ensure that access from restricted contexts,
2209 2190           * such as VMX-accelerated APIC operations, can occur without inducing
2210 2191           * cyclic cross-calls.
2211 2192           *
2212 2193           * This must be done prior to disabling kpreempt via critical_enter().
2213 2194           */
2214 2195          vm_localize_resources(vm, vcpu);
2215 2196  
2216 2197          affinity_type = CPU_CURRENT;
2217 2198  #endif
2218 2199  
2219 2200          critical_enter();
2220 2201  
2221 2202          KASSERT(!CPU_ISSET(curcpu, &pmap->pm_active),
2222 2203              ("vm_run: absurd pm_active"));
2223 2204  
2224 2205          tscval = rdtsc();
2225 2206  
2226 2207  #ifdef  __FreeBSD__
2227 2208          pcb = PCPU_GET(curpcb);
2228 2209          set_pcb_flags(pcb, PCB_FULL_IRET);
2229 2210  #else
2230 2211          /* Force a trip through update_sregs to reload %fs/%gs and friends */
2231 2212          PCB_SET_UPDATE_SEGS(&ttolwp(curthread)->lwp_pcb);
2232 2213  #endif
2233 2214  
2234 2215  #ifdef  __FreeBSD__

↓ open down ↓

42 lines elided

↑ open up ↑

2235 2216          restore_guest_fpustate(vcpu);
2236 2217  #else
2237 2218          if ((vtc.vtc_status & VTCS_FPU_RESTORED) == 0) {
2238 2219                  restore_guest_fpustate(vcpu);
2239 2220                  vtc.vtc_status |= VTCS_FPU_RESTORED;
2240 2221          }
2241 2222          vtc.vtc_status |= VTCS_FPU_CTX_CRITICAL;
2242 2223  #endif
2243 2224  
2244 2225          vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
2245      -        error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, &evinfo);
     2226 +        error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap);
2246 2227          vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
2247 2228  
2248 2229  #ifdef  __FreeBSD__
2249 2230          save_guest_fpustate(vcpu);
2250 2231  #else
2251 2232          vtc.vtc_status &= ~VTCS_FPU_CTX_CRITICAL;
2252 2233  #endif
2253 2234  
2254 2235  #ifndef __FreeBSD__
2255 2236          /*

2256 2237           * Once clear of the delicate contexts comprising the VM_RUN handler,
2257 2238           * thread CPU affinity can be loosened while other processing occurs.
2258 2239           */
2259 2240          thread_affinity_clear(curthread);
2260 2241  #endif
2261 2242  
2262 2243          vmm_stat_incr(vm, vcpuid, VCPU_TOTAL_RUNTIME, rdtsc() - tscval);
2263 2244  
2264 2245          critical_exit();
2265 2246

↓ open down ↓

10 lines elided

↑ open up ↑

2266 2247          if (error != 0) {
2267 2248                  /* Communicate out any error from VMRUN() above */
2268 2249                  goto exit;
2269 2250          }
2270 2251  
2271 2252          vcpu->nextrip = vme->rip + vme->inst_length;
2272 2253          switch (vme->exitcode) {
2273 2254          case VM_EXITCODE_REQIDLE:
2274 2255                  error = vm_handle_reqidle(vm, vcpuid);
2275 2256                  break;
     2257 +        case VM_EXITCODE_RUN_STATE:
     2258 +                error = vm_handle_run_state(vm, vcpuid);
     2259 +                break;
2276 2260          case VM_EXITCODE_SUSPENDED:
2277 2261                  error = vm_handle_suspend(vm, vcpuid);
2278 2262                  break;
2279 2263          case VM_EXITCODE_IOAPIC_EOI:
2280 2264                  vioapic_process_eoi(vm, vcpuid,
2281 2265                      vme->u.ioapic_eoi.vector);
2282 2266                  break;
2283      -        case VM_EXITCODE_RUNBLOCK:
2284      -                break;
2285 2267          case VM_EXITCODE_HLT:
2286 2268                  intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
2287 2269                  error = vm_handle_hlt(vm, vcpuid, intr_disabled);
2288 2270                  break;
2289 2271          case VM_EXITCODE_PAGING:
2290 2272                  error = vm_handle_paging(vm, vcpuid);
2291 2273                  break;
2292 2274          case VM_EXITCODE_MMIO_EMUL:
2293 2275                  error = vm_handle_mmio_emul(vm, vcpuid);
2294 2276                  break;

2295 2277          case VM_EXITCODE_INOUT:
2296 2278                  error = vm_handle_inout(vm, vcpuid, vme);
2297 2279                  break;
2298 2280          case VM_EXITCODE_MONITOR:
2299 2281          case VM_EXITCODE_MWAIT:
2300 2282          case VM_EXITCODE_VMINSN:
2301 2283                  vm_inject_ud(vm, vcpuid);
2302 2284                  break;
2303 2285  #ifndef __FreeBSD__
2304 2286          case VM_EXITCODE_WRMSR:
2305 2287                  if (vm_handle_wrmsr(vm, vcpuid, vme) != 0) {
2306 2288                          error = -1;
2307 2289                  }
2308 2290                  break;
2309 2291  
2310 2292          case VM_EXITCODE_HT: {
2311 2293                  affinity_type = CPU_BEST;
2312 2294                  break;
2313 2295          }
2314 2296  #endif
2315 2297  
2316 2298          case VM_EXITCODE_MTRAP:
2317 2299                  vm_suspend_cpu(vm, vcpuid);
2318 2300                  error = -1;
2319 2301                  break;
2320 2302          default:
2321 2303                  /* handled in userland */
2322 2304                  error = -1;
2323 2305                  break;
2324 2306          }
2325 2307  
2326 2308          if (error == 0) {
2327 2309                  /* VM exit conditions handled in-kernel, continue running */
2328 2310                  goto restart;
2329 2311          }
2330 2312  
2331 2313  exit:
2332 2314  #ifndef __FreeBSD__
2333 2315          removectx(curthread, &vtc, vmm_savectx, vmm_restorectx, NULL, NULL,
2334 2316              NULL, vmm_freectx);
2335 2317  #endif
2336 2318  
2337 2319          VCPU_CTR2(vm, vcpuid, "retu %d/%d", error, vme->exitcode);
2338 2320  
2339 2321          return (error);
2340 2322  }
2341 2323  
2342 2324  int
2343 2325  vm_restart_instruction(void *arg, int vcpuid)
2344 2326  {
2345 2327          struct vm *vm;
2346 2328          struct vcpu *vcpu;
2347 2329          enum vcpu_state state;
2348 2330          uint64_t rip;
2349 2331          int error;
2350 2332  
2351 2333          vm = arg;
2352 2334          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2353 2335                  return (EINVAL);
2354 2336  
2355 2337          vcpu = &vm->vcpu[vcpuid];
2356 2338          state = vcpu_get_state(vm, vcpuid, NULL);
2357 2339          if (state == VCPU_RUNNING) {
2358 2340                  /*
2359 2341                   * When a vcpu is "running" the next instruction is determined
2360 2342                   * by adding 'rip' and 'inst_length' in the vcpu's 'exitinfo'.
2361 2343                   * Thus setting 'inst_length' to zero will cause the current
2362 2344                   * instruction to be restarted.
2363 2345                   */
2364 2346                  vcpu->exitinfo.inst_length = 0;
2365 2347                  VCPU_CTR1(vm, vcpuid, "restarting instruction at %lx by "
2366 2348                      "setting inst_length to zero", vcpu->exitinfo.rip);
2367 2349          } else if (state == VCPU_FROZEN) {
2368 2350                  /*
2369 2351                   * When a vcpu is "frozen" it is outside the critical section
2370 2352                   * around VMRUN() and 'nextrip' points to the next instruction.
2371 2353                   * Thus instruction restart is achieved by setting 'nextrip'
2372 2354                   * to the vcpu's %rip.
2373 2355                   */
2374 2356                  error = vm_get_register(vm, vcpuid, VM_REG_GUEST_RIP, &rip);
2375 2357                  KASSERT(!error, ("%s: error %d getting rip", __func__, error));
2376 2358                  VCPU_CTR2(vm, vcpuid, "restarting instruction by updating "
2377 2359                      "nextrip from %lx to %lx", vcpu->nextrip, rip);
2378 2360                  vcpu->nextrip = rip;
2379 2361          } else {
2380 2362                  panic("%s: invalid state %d", __func__, state);
2381 2363          }
2382 2364          return (0);
2383 2365  }
2384 2366  
2385 2367  int
2386 2368  vm_exit_intinfo(struct vm *vm, int vcpuid, uint64_t info)
2387 2369  {
2388 2370          struct vcpu *vcpu;
2389 2371          int type, vector;
2390 2372  
2391 2373          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2392 2374                  return (EINVAL);
2393 2375  
2394 2376          vcpu = &vm->vcpu[vcpuid];
2395 2377  
2396 2378          if (info & VM_INTINFO_VALID) {
2397 2379                  type = info & VM_INTINFO_TYPE;
2398 2380                  vector = info & 0xff;
2399 2381                  if (type == VM_INTINFO_NMI && vector != IDT_NMI)
2400 2382                          return (EINVAL);
2401 2383                  if (type == VM_INTINFO_HWEXCEPTION && vector >= 32)
2402 2384                          return (EINVAL);
2403 2385                  if (info & VM_INTINFO_RSVD)
2404 2386                          return (EINVAL);
2405 2387          } else {
2406 2388                  info = 0;
2407 2389          }
2408 2390          VCPU_CTR2(vm, vcpuid, "%s: info1(%lx)", __func__, info);
2409 2391          vcpu->exitintinfo = info;
2410 2392          return (0);
2411 2393  }
2412 2394  
2413 2395  enum exc_class {
2414 2396          EXC_BENIGN,
2415 2397          EXC_CONTRIBUTORY,
2416 2398          EXC_PAGEFAULT
2417 2399  };
2418 2400  
2419 2401  #define IDT_VE  20      /* Virtualization Exception (Intel specific) */
2420 2402  
2421 2403  static enum exc_class
2422 2404  exception_class(uint64_t info)
2423 2405  {
2424 2406          int type, vector;
2425 2407  
2426 2408          KASSERT(info & VM_INTINFO_VALID, ("intinfo must be valid: %lx", info));
2427 2409          type = info & VM_INTINFO_TYPE;
2428 2410          vector = info & 0xff;
2429 2411  
2430 2412          /* Table 6-4, "Interrupt and Exception Classes", Intel SDM, Vol 3 */
2431 2413          switch (type) {
2432 2414          case VM_INTINFO_HWINTR:
2433 2415          case VM_INTINFO_SWINTR:
2434 2416          case VM_INTINFO_NMI:
2435 2417                  return (EXC_BENIGN);
2436 2418          default:
2437 2419                  /*
2438 2420                   * Hardware exception.
2439 2421                   *
2440 2422                   * SVM and VT-x use identical type values to represent NMI,
2441 2423                   * hardware interrupt and software interrupt.
2442 2424                   *
2443 2425                   * SVM uses type '3' for all exceptions. VT-x uses type '3'
2444 2426                   * for exceptions except #BP and #OF. #BP and #OF use a type
2445 2427                   * value of '5' or '6'. Therefore we don't check for explicit
2446 2428                   * values of 'type' to classify 'intinfo' into a hardware
2447 2429                   * exception.
2448 2430                   */
2449 2431                  break;
2450 2432          }
2451 2433  
2452 2434          switch (vector) {
2453 2435          case IDT_PF:
2454 2436          case IDT_VE:
2455 2437                  return (EXC_PAGEFAULT);
2456 2438          case IDT_DE:
2457 2439          case IDT_TS:
2458 2440          case IDT_NP:
2459 2441          case IDT_SS:
2460 2442          case IDT_GP:
2461 2443                  return (EXC_CONTRIBUTORY);
2462 2444          default:
2463 2445                  return (EXC_BENIGN);
2464 2446          }
2465 2447  }
2466 2448  
2467 2449  static int
2468 2450  nested_fault(struct vm *vm, int vcpuid, uint64_t info1, uint64_t info2,
2469 2451      uint64_t *retinfo)
2470 2452  {
2471 2453          enum exc_class exc1, exc2;
2472 2454          int type1, vector1;
2473 2455  
2474 2456          KASSERT(info1 & VM_INTINFO_VALID, ("info1 %lx is not valid", info1));
2475 2457          KASSERT(info2 & VM_INTINFO_VALID, ("info2 %lx is not valid", info2));
2476 2458  
2477 2459          /*
2478 2460           * If an exception occurs while attempting to call the double-fault
2479 2461           * handler the processor enters shutdown mode (aka triple fault).
2480 2462           */
2481 2463          type1 = info1 & VM_INTINFO_TYPE;
2482 2464          vector1 = info1 & 0xff;
2483 2465          if (type1 == VM_INTINFO_HWEXCEPTION && vector1 == IDT_DF) {
2484 2466                  VCPU_CTR2(vm, vcpuid, "triple fault: info1(%lx), info2(%lx)",
2485 2467                      info1, info2);
2486 2468                  vm_suspend(vm, VM_SUSPEND_TRIPLEFAULT);
2487 2469                  *retinfo = 0;
2488 2470                  return (0);
2489 2471          }
2490 2472  
2491 2473          /*
2492 2474           * Table 6-5 "Conditions for Generating a Double Fault", Intel SDM, Vol3
2493 2475           */
2494 2476          exc1 = exception_class(info1);
2495 2477          exc2 = exception_class(info2);
2496 2478          if ((exc1 == EXC_CONTRIBUTORY && exc2 == EXC_CONTRIBUTORY) ||
2497 2479              (exc1 == EXC_PAGEFAULT && exc2 != EXC_BENIGN)) {
2498 2480                  /* Convert nested fault into a double fault. */
2499 2481                  *retinfo = IDT_DF;
2500 2482                  *retinfo |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
2501 2483                  *retinfo |= VM_INTINFO_DEL_ERRCODE;
2502 2484          } else {
2503 2485                  /* Handle exceptions serially */
2504 2486                  *retinfo = info2;
2505 2487          }
2506 2488          return (1);
2507 2489  }
2508 2490  
2509 2491  static uint64_t
2510 2492  vcpu_exception_intinfo(struct vcpu *vcpu)
2511 2493  {
2512 2494          uint64_t info = 0;
2513 2495  
2514 2496          if (vcpu->exception_pending) {
2515 2497                  info = vcpu->exc_vector & 0xff;
2516 2498                  info |= VM_INTINFO_VALID | VM_INTINFO_HWEXCEPTION;
2517 2499                  if (vcpu->exc_errcode_valid) {
2518 2500                          info |= VM_INTINFO_DEL_ERRCODE;
2519 2501                          info |= (uint64_t)vcpu->exc_errcode << 32;
2520 2502                  }
2521 2503          }
2522 2504          return (info);
2523 2505  }
2524 2506  
2525 2507  int
2526 2508  vm_entry_intinfo(struct vm *vm, int vcpuid, uint64_t *retinfo)
2527 2509  {
2528 2510          struct vcpu *vcpu;
2529 2511          uint64_t info1, info2;
2530 2512          int valid;
2531 2513  
2532 2514          KASSERT(vcpuid >= 0 &&
2533 2515              vcpuid < vm->maxcpus, ("invalid vcpu %d", vcpuid));
2534 2516  
2535 2517          vcpu = &vm->vcpu[vcpuid];
2536 2518  
2537 2519          info1 = vcpu->exitintinfo;
2538 2520          vcpu->exitintinfo = 0;
2539 2521  
2540 2522          info2 = 0;
2541 2523          if (vcpu->exception_pending) {
2542 2524                  info2 = vcpu_exception_intinfo(vcpu);
2543 2525                  vcpu->exception_pending = 0;
2544 2526                  VCPU_CTR2(vm, vcpuid, "Exception %d delivered: %lx",
2545 2527                      vcpu->exc_vector, info2);
2546 2528          }
2547 2529  
2548 2530          if ((info1 & VM_INTINFO_VALID) && (info2 & VM_INTINFO_VALID)) {
2549 2531                  valid = nested_fault(vm, vcpuid, info1, info2, retinfo);
2550 2532          } else if (info1 & VM_INTINFO_VALID) {
2551 2533                  *retinfo = info1;
2552 2534                  valid = 1;
2553 2535          } else if (info2 & VM_INTINFO_VALID) {
2554 2536                  *retinfo = info2;
2555 2537                  valid = 1;
2556 2538          } else {
2557 2539                  valid = 0;
2558 2540          }
2559 2541  
2560 2542          if (valid) {
2561 2543                  VCPU_CTR4(vm, vcpuid, "%s: info1(%lx), info2(%lx), "
2562 2544                      "retinfo(%lx)", __func__, info1, info2, *retinfo);
2563 2545          }
2564 2546  
2565 2547          return (valid);
2566 2548  }
2567 2549  
2568 2550  int
2569 2551  vm_get_intinfo(struct vm *vm, int vcpuid, uint64_t *info1, uint64_t *info2)
2570 2552  {
2571 2553          struct vcpu *vcpu;
2572 2554  
2573 2555          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2574 2556                  return (EINVAL);
2575 2557  
2576 2558          vcpu = &vm->vcpu[vcpuid];
2577 2559          *info1 = vcpu->exitintinfo;
2578 2560          *info2 = vcpu_exception_intinfo(vcpu);
2579 2561          return (0);
2580 2562  }
2581 2563  
2582 2564  int
2583 2565  vm_inject_exception(struct vm *vm, int vcpuid, int vector, int errcode_valid,
2584 2566      uint32_t errcode, int restart_instruction)
2585 2567  {
2586 2568          struct vcpu *vcpu;
2587 2569          uint64_t regval;
2588 2570          int error;
2589 2571  
2590 2572          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2591 2573                  return (EINVAL);
2592 2574  
2593 2575          if (vector < 0 || vector >= 32)
2594 2576                  return (EINVAL);
2595 2577  
2596 2578          /*
2597 2579           * NMIs (which bear an exception vector of 2) are to be injected via
2598 2580           * their own specialized path using vm_inject_nmi().
2599 2581           */
2600 2582          if (vector == 2) {
2601 2583                  return (EINVAL);
2602 2584          }
2603 2585  
2604 2586          /*
2605 2587           * A double fault exception should never be injected directly into
2606 2588           * the guest. It is a derived exception that results from specific
2607 2589           * combinations of nested faults.
2608 2590           */
2609 2591          if (vector == IDT_DF)
2610 2592                  return (EINVAL);
2611 2593  
2612 2594          vcpu = &vm->vcpu[vcpuid];
2613 2595  
2614 2596          if (vcpu->exception_pending) {
2615 2597                  VCPU_CTR2(vm, vcpuid, "Unable to inject exception %d due to "
2616 2598                      "pending exception %d", vector, vcpu->exc_vector);
2617 2599                  return (EBUSY);
2618 2600          }
2619 2601  
2620 2602          if (errcode_valid) {
2621 2603                  /*
2622 2604                   * Exceptions don't deliver an error code in real mode.
2623 2605                   */
2624 2606                  error = vm_get_register(vm, vcpuid, VM_REG_GUEST_CR0, &regval);
2625 2607                  KASSERT(!error, ("%s: error %d getting CR0", __func__, error));
2626 2608                  if (!(regval & CR0_PE))
2627 2609                          errcode_valid = 0;
2628 2610          }
2629 2611  
2630 2612          /*
2631 2613           * From section 26.6.1 "Interruptibility State" in Intel SDM:
2632 2614           *
2633 2615           * Event blocking by "STI" or "MOV SS" is cleared after guest executes
2634 2616           * one instruction or incurs an exception.
2635 2617           */
2636 2618          error = vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0);
2637 2619          KASSERT(error == 0, ("%s: error %d clearing interrupt shadow",
2638 2620              __func__, error));
2639 2621  
2640 2622          if (restart_instruction)
2641 2623                  vm_restart_instruction(vm, vcpuid);
2642 2624  
2643 2625          vcpu->exception_pending = 1;
2644 2626          vcpu->exc_vector = vector;
2645 2627          vcpu->exc_errcode = errcode;
2646 2628          vcpu->exc_errcode_valid = errcode_valid;
2647 2629          VCPU_CTR1(vm, vcpuid, "Exception %d pending", vector);
2648 2630          return (0);
2649 2631  }
2650 2632  
2651 2633  void
2652 2634  vm_inject_fault(struct vm *vm, int vcpuid, int vector, int errcode_valid,
2653 2635      int errcode)
2654 2636  {
2655 2637          int error;
2656 2638  
2657 2639          error = vm_inject_exception(vm, vcpuid, vector, errcode_valid,
2658 2640              errcode, 1);
2659 2641          KASSERT(error == 0, ("vm_inject_exception error %d", error));
2660 2642  }
2661 2643  
2662 2644  void
2663 2645  vm_inject_ud(struct vm *vm, int vcpuid)
2664 2646  {
2665 2647          vm_inject_fault(vm, vcpuid, IDT_UD, 0, 0);
2666 2648  }
2667 2649  
2668 2650  void
2669 2651  vm_inject_gp(struct vm *vm, int vcpuid)
2670 2652  {
2671 2653          vm_inject_fault(vm, vcpuid, IDT_GP, 1, 0);
2672 2654  }
2673 2655  
2674 2656  void
2675 2657  vm_inject_ac(struct vm *vm, int vcpuid, int errcode)
2676 2658  {
2677 2659          vm_inject_fault(vm, vcpuid, IDT_AC, 1, errcode);
2678 2660  }
2679 2661  
2680 2662  void
2681 2663  vm_inject_ss(struct vm *vm, int vcpuid, int errcode)
2682 2664  {
2683 2665          vm_inject_fault(vm, vcpuid, IDT_SS, 1, errcode);
2684 2666  }
2685 2667  
2686 2668  void
2687 2669  vm_inject_pf(struct vm *vm, int vcpuid, int error_code, uint64_t cr2)
2688 2670  {
2689 2671          int error;
2690 2672  
2691 2673          VCPU_CTR2(vm, vcpuid, "Injecting page fault: error_code %x, cr2 %lx",
2692 2674              error_code, cr2);
2693 2675  
2694 2676          error = vm_set_register(vm, vcpuid, VM_REG_GUEST_CR2, cr2);
2695 2677          KASSERT(error == 0, ("vm_set_register(cr2) error %d", error));
2696 2678  
2697 2679          vm_inject_fault(vm, vcpuid, IDT_PF, 1, error_code);
2698 2680  }
2699 2681  
2700 2682  static VMM_STAT(VCPU_NMI_COUNT, "number of NMIs delivered to vcpu");
2701 2683  
2702 2684  int
2703 2685  vm_inject_nmi(struct vm *vm, int vcpuid)
2704 2686  {
2705 2687          struct vcpu *vcpu;
2706 2688  
2707 2689          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2708 2690                  return (EINVAL);
2709 2691  
2710 2692          vcpu = &vm->vcpu[vcpuid];
2711 2693  
2712 2694          vcpu->nmi_pending = 1;
2713 2695          vcpu_notify_event(vm, vcpuid);
2714 2696          return (0);
2715 2697  }
2716 2698  
2717 2699  int
2718 2700  vm_nmi_pending(struct vm *vm, int vcpuid)
2719 2701  {
2720 2702          struct vcpu *vcpu;
2721 2703  
2722 2704          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2723 2705                  panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
2724 2706  
2725 2707          vcpu = &vm->vcpu[vcpuid];
2726 2708  
2727 2709          return (vcpu->nmi_pending);
2728 2710  }
2729 2711  
2730 2712  void
2731 2713  vm_nmi_clear(struct vm *vm, int vcpuid)
2732 2714  {
2733 2715          struct vcpu *vcpu;
2734 2716  
2735 2717          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2736 2718                  panic("vm_nmi_pending: invalid vcpuid %d", vcpuid);
2737 2719  
2738 2720          vcpu = &vm->vcpu[vcpuid];
2739 2721  
2740 2722          if (vcpu->nmi_pending == 0)
2741 2723                  panic("vm_nmi_clear: inconsistent nmi_pending state");
2742 2724  
2743 2725          vcpu->nmi_pending = 0;
2744 2726          vmm_stat_incr(vm, vcpuid, VCPU_NMI_COUNT, 1);
2745 2727  }
2746 2728  
2747 2729  static VMM_STAT(VCPU_EXTINT_COUNT, "number of ExtINTs delivered to vcpu");
2748 2730  
2749 2731  int
2750 2732  vm_inject_extint(struct vm *vm, int vcpuid)
2751 2733  {
2752 2734          struct vcpu *vcpu;
2753 2735  
2754 2736          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2755 2737                  return (EINVAL);
2756 2738  
2757 2739          vcpu = &vm->vcpu[vcpuid];
2758 2740  
2759 2741          vcpu->extint_pending = 1;
2760 2742          vcpu_notify_event(vm, vcpuid);
2761 2743          return (0);
2762 2744  }
2763 2745  
2764 2746  int
2765 2747  vm_extint_pending(struct vm *vm, int vcpuid)
2766 2748  {
2767 2749          struct vcpu *vcpu;
2768 2750  
2769 2751          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2770 2752                  panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
2771 2753  
2772 2754          vcpu = &vm->vcpu[vcpuid];
2773 2755  
2774 2756          return (vcpu->extint_pending);
2775 2757  }
2776 2758  
2777 2759  void
2778 2760  vm_extint_clear(struct vm *vm, int vcpuid)
2779 2761  {
2780 2762          struct vcpu *vcpu;
2781 2763  
2782 2764          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2783 2765                  panic("vm_extint_pending: invalid vcpuid %d", vcpuid);
2784 2766

↓ open down ↓

490 lines elided

↑ open up ↑

2785 2767          vcpu = &vm->vcpu[vcpuid];
2786 2768  
2787 2769          if (vcpu->extint_pending == 0)
2788 2770                  panic("vm_extint_clear: inconsistent extint_pending state");
2789 2771  
2790 2772          vcpu->extint_pending = 0;
2791 2773          vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1);
2792 2774  }
2793 2775  
2794 2776  int
     2777 +vm_inject_init(struct vm *vm, int vcpuid)
     2778 +{
     2779 +        struct vcpu *vcpu;
     2780 +
     2781 +        if (vcpuid < 0 || vcpuid >= vm->maxcpus)
     2782 +                return (EINVAL);
     2783 +
     2784 +        vcpu = &vm->vcpu[vcpuid];
     2785 +        vcpu_lock(vcpu);
     2786 +        vcpu->run_state |= VRS_PEND_INIT;
     2787 +        vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
     2788 +        vcpu_unlock(vcpu);
     2789 +        return (0);
     2790 +}
     2791 +
     2792 +int
     2793 +vm_inject_sipi(struct vm *vm, int vcpuid, uint8_t vector)
     2794 +{
     2795 +        struct vcpu *vcpu;
     2796 +
     2797 +        if (vcpuid < 0 || vcpuid >= vm->maxcpus)
     2798 +                return (EINVAL);
     2799 +
     2800 +        vcpu = &vm->vcpu[vcpuid];
     2801 +        vcpu_lock(vcpu);
     2802 +        vcpu->run_state |= VRS_PEND_SIPI;
     2803 +        vcpu->sipi_vector = vector;
     2804 +        /* SIPI is only actionable if the CPU is waiting in INIT state */
     2805 +        if ((vcpu->run_state & (VRS_INIT | VRS_RUN)) == VRS_INIT) {
     2806 +                vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
     2807 +        }
     2808 +        vcpu_unlock(vcpu);
     2809 +        return (0);
     2810 +}
     2811 +
     2812 +bool
     2813 +vcpu_run_state_pending(struct vm *vm, int vcpuid)
     2814 +{
     2815 +        struct vcpu *vcpu;
     2816 +
     2817 +        ASSERT(vcpuid >= 0 && vcpuid < vm->maxcpus);
     2818 +        vcpu = &vm->vcpu[vcpuid];
     2819 +
     2820 +        /* Of interest: vCPU not in running state or with pending INIT */
     2821 +        return ((vcpu->run_state & (VRS_RUN | VRS_PEND_INIT)) != VRS_RUN);
     2822 +}
     2823 +
     2824 +int
     2825 +vcpu_arch_reset(struct vm *vm, int vcpuid, bool init_only)
     2826 +{
     2827 +        struct seg_desc desc;
     2828 +        const enum vm_reg_name clear_regs[] = {
     2829 +                VM_REG_GUEST_CR2,
     2830 +                VM_REG_GUEST_CR3,
     2831 +                VM_REG_GUEST_CR4,
     2832 +                VM_REG_GUEST_RAX,
     2833 +                VM_REG_GUEST_RBX,
     2834 +                VM_REG_GUEST_RCX,
     2835 +                VM_REG_GUEST_RSI,
     2836 +                VM_REG_GUEST_RDI,
     2837 +                VM_REG_GUEST_RBP,
     2838 +                VM_REG_GUEST_RSP,
     2839 +                VM_REG_GUEST_R8,
     2840 +                VM_REG_GUEST_R9,
     2841 +                VM_REG_GUEST_R10,
     2842 +                VM_REG_GUEST_R11,
     2843 +                VM_REG_GUEST_R12,
     2844 +                VM_REG_GUEST_R13,
     2845 +                VM_REG_GUEST_R14,
     2846 +                VM_REG_GUEST_R15,
     2847 +                VM_REG_GUEST_DR0,
     2848 +                VM_REG_GUEST_DR1,
     2849 +                VM_REG_GUEST_DR2,
     2850 +                VM_REG_GUEST_DR3,
     2851 +                VM_REG_GUEST_EFER,
     2852 +        };
     2853 +        const enum vm_reg_name data_segs[] = {
     2854 +                VM_REG_GUEST_SS,
     2855 +                VM_REG_GUEST_DS,
     2856 +                VM_REG_GUEST_ES,
     2857 +                VM_REG_GUEST_FS,
     2858 +                VM_REG_GUEST_GS,
     2859 +        };
     2860 +        struct vcpu *vcpu = &vm->vcpu[vcpuid];
     2861 +
     2862 +        if (vcpuid < 0 || vcpuid >= vm->maxcpus)
     2863 +                return (EINVAL);
     2864 +
     2865 +        for (uint_t i = 0; i < nitems(clear_regs); i++) {
     2866 +                VERIFY0(vm_set_register(vm, vcpuid, clear_regs[i], 0));
     2867 +        }
     2868 +
     2869 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, 2));
     2870 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RIP, 0xfff0));
     2871 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_CR0, 0x60000010));
     2872 +
     2873 +        /*
     2874 +         * The prescribed contents of %rdx differ slightly between the Intel and
     2875 +         * AMD architectural definitions.  The former expects the Extended Model
     2876 +         * in bits 16-19 where the latter expects all the Family, Model, and
     2877 +         * Stepping be there.  Common boot ROMs appear to disregard this
     2878 +         * anyways, so we stick with a compromise value similar to what is
     2879 +         * spelled out in the Intel SDM.
     2880 +         */
     2881 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RDX, 0x600));
     2882 +
     2883 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_DR6, 0xffff0ff0));
     2884 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_DR7, 0x400));
     2885 +
     2886 +        /* CS: Present, R/W, Accessed */
     2887 +        desc.access = 0x0093;
     2888 +        desc.base = 0xffff0000;
     2889 +        desc.limit = 0xffff;
     2890 +        VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_CS, &desc));
     2891 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_CS, 0xf000));
     2892 +
     2893 +        /* SS, DS, ES, FS, GS: Present, R/W, Accessed */
     2894 +        desc.access = 0x0093;
     2895 +        desc.base = 0;
     2896 +        desc.limit = 0xffff;
     2897 +        for (uint_t i = 0; i < nitems(data_segs); i++) {
     2898 +                VERIFY0(vm_set_seg_desc(vm, vcpuid, data_segs[i], &desc));
     2899 +                VERIFY0(vm_set_register(vm, vcpuid, data_segs[i], 0));
     2900 +        }
     2901 +
     2902 +        /* GDTR, IDTR */
     2903 +        desc.base = 0;
     2904 +        desc.limit = 0xffff;
     2905 +        VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_GDTR, &desc));
     2906 +        VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_IDTR, &desc));
     2907 +
     2908 +        /* LDTR: Present, LDT */
     2909 +        desc.access = 0x0082;
     2910 +        desc.base = 0;
     2911 +        desc.limit = 0xffff;
     2912 +        VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_LDTR, &desc));
     2913 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_LDTR, 0));
     2914 +
     2915 +        /* TR: Present, 32-bit TSS */
     2916 +        desc.access = 0x008b;
     2917 +        desc.base = 0;
     2918 +        desc.limit = 0xffff;
     2919 +        VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_TR, &desc));
     2920 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_TR, 0));
     2921 +
     2922 +        vlapic_reset(vm_lapic(vm, vcpuid));
     2923 +
     2924 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0));
     2925 +
     2926 +        vcpu->exitintinfo = 0;
     2927 +        vcpu->exception_pending = 0;
     2928 +        vcpu->nmi_pending = 0;
     2929 +        vcpu->extint_pending = 0;
     2930 +
     2931 +        /*
     2932 +         * A CPU reset caused by power-on or system reset clears more state than
     2933 +         * one which is trigged from an INIT IPI.
     2934 +         */
     2935 +        if (!init_only) {
     2936 +                vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
     2937 +                fpu_save_area_reset(vcpu->guestfpu);
     2938 +
     2939 +                /* XXX: clear MSRs and other pieces */
     2940 +        }
     2941 +
     2942 +        return (0);
     2943 +}
     2944 +
     2945 +static int
     2946 +vcpu_vector_sipi(struct vm *vm, int vcpuid, uint8_t vector)
     2947 +{
     2948 +        struct seg_desc desc;
     2949 +
     2950 +        if (vcpuid < 0 || vcpuid >= vm->maxcpus)
     2951 +                return (EINVAL);
     2952 +
     2953 +        /* CS: Present, R/W, Accessed */
     2954 +        desc.access = 0x0093;
     2955 +        desc.base = (uint64_t)vector << 12;
     2956 +        desc.limit = 0xffff;
     2957 +        VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_CS, &desc));
     2958 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_CS,
     2959 +            (uint64_t)vector << 8));
     2960 +
     2961 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RIP, 0));
     2962 +
     2963 +        return (0);
     2964 +}
     2965 +
     2966 +int
2795 2967  vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
2796 2968  {
2797 2969          if (vcpu < 0 || vcpu >= vm->maxcpus)
2798 2970                  return (EINVAL);
2799 2971  
2800 2972          if (type < 0 || type >= VM_CAP_MAX)
2801 2973                  return (EINVAL);
2802 2974  
2803 2975          return (VMGETCAP(vm->cookie, vcpu, type, retval));
2804 2976  }

2805 2977  
2806 2978  int
2807 2979  vm_set_capability(struct vm *vm, int vcpu, int type, int val)
2808 2980  {
2809 2981          if (vcpu < 0 || vcpu >= vm->maxcpus)
2810 2982                  return (EINVAL);
2811 2983  
2812 2984          if (type < 0 || type >= VM_CAP_MAX)
2813 2985                  return (EINVAL);
2814 2986  
2815 2987          return (VMSETCAP(vm->cookie, vcpu, type, val));
2816 2988  }
2817 2989  
2818 2990  struct vlapic *
2819 2991  vm_lapic(struct vm *vm, int cpu)
2820 2992  {
2821 2993          return (vm->vcpu[cpu].vlapic);
2822 2994  }
2823 2995  
2824 2996  struct vioapic *
2825 2997  vm_ioapic(struct vm *vm)
2826 2998  {
2827 2999  
2828 3000          return (vm->vioapic);
2829 3001  }
2830 3002  
2831 3003  struct vhpet *
2832 3004  vm_hpet(struct vm *vm)
2833 3005  {
2834 3006  
2835 3007          return (vm->vhpet);
2836 3008  }
2837 3009  
2838 3010  #ifdef  __FreeBSD__
2839 3011  bool
2840 3012  vmm_is_pptdev(int bus, int slot, int func)
2841 3013  {
2842 3014          int b, f, i, n, s;
2843 3015          char *val, *cp, *cp2;
2844 3016          bool found;
2845 3017  
2846 3018          /*
2847 3019           * XXX
2848 3020           * The length of an environment variable is limited to 128 bytes which
2849 3021           * puts an upper limit on the number of passthru devices that may be
2850 3022           * specified using a single environment variable.
2851 3023           *
2852 3024           * Work around this by scanning multiple environment variable
2853 3025           * names instead of a single one - yuck!
2854 3026           */
2855 3027          const char *names[] = { "pptdevs", "pptdevs2", "pptdevs3", NULL };
2856 3028  
2857 3029          /* set pptdevs="1/2/3 4/5/6 7/8/9 10/11/12" */
2858 3030          found = false;
2859 3031          for (i = 0; names[i] != NULL && !found; i++) {
2860 3032                  cp = val = kern_getenv(names[i]);
2861 3033                  while (cp != NULL && *cp != '\0') {
2862 3034                          if ((cp2 = strchr(cp, ' ')) != NULL)
2863 3035                                  *cp2 = '\0';
2864 3036  
2865 3037                          n = sscanf(cp, "%d/%d/%d", &b, &s, &f);
2866 3038                          if (n == 3 && bus == b && slot == s && func == f) {
2867 3039                                  found = true;
2868 3040                                  break;
2869 3041                          }
2870 3042  
2871 3043                          if (cp2 != NULL)
2872 3044                                  *cp2++ = ' ';
2873 3045  
2874 3046                          cp = cp2;
2875 3047                  }
2876 3048                  freeenv(val);
2877 3049          }
2878 3050          return (found);
2879 3051  }
2880 3052  #endif
2881 3053  
2882 3054  void *
2883 3055  vm_iommu_domain(struct vm *vm)
2884 3056  {
2885 3057  
2886 3058          return (vm->iommu);

↓ open down ↓

82 lines elided

↑ open up ↑

2887 3059  }
2888 3060  
2889 3061  int
2890 3062  vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
2891 3063      bool from_idle)
2892 3064  {
2893 3065          int error;
2894 3066          struct vcpu *vcpu;
2895 3067  
2896 3068          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2897      -                panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
     3069 +                panic("vcpu_set_state: invalid vcpuid %d", vcpuid);
2898 3070  
2899 3071          vcpu = &vm->vcpu[vcpuid];
2900 3072  
2901 3073          vcpu_lock(vcpu);
2902 3074          error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle);
2903 3075          vcpu_unlock(vcpu);
2904 3076  
2905 3077          return (error);
2906 3078  }
2907 3079  
2908 3080  enum vcpu_state
2909 3081  vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
2910 3082  {
2911 3083          struct vcpu *vcpu;
2912 3084          enum vcpu_state state;
2913 3085  
2914 3086          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2915      -                panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
     3087 +                panic("vcpu_get_state: invalid vcpuid %d", vcpuid);
2916 3088  
2917 3089          vcpu = &vm->vcpu[vcpuid];
2918 3090  
2919 3091          vcpu_lock(vcpu);
2920 3092          state = vcpu->state;
2921 3093          if (hostcpu != NULL)
2922 3094                  *hostcpu = vcpu->hostcpu;
2923 3095          vcpu_unlock(vcpu);
2924 3096  
2925 3097          return (state);
2926 3098  }
2927 3099  
2928      -void
2929      -vcpu_block_run(struct vm *vm, int vcpuid)
2930      -{
2931      -        struct vcpu *vcpu;
2932      -
2933      -        if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
2934      -                panic("vcpu_block_run: invalid vcpuid %d", vcpuid);
2935      -
2936      -        vcpu = &vm->vcpu[vcpuid];
2937      -
2938      -        vcpu_lock(vcpu);
2939      -        vcpu->runblock++;
2940      -        if (vcpu->runblock == 1 && vcpu->state == VCPU_RUNNING) {
2941      -                vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
2942      -        }
2943      -        while (vcpu->state == VCPU_RUNNING) {
2944      -#ifdef __FreeBSD__
2945      -                msleep_spin(&vcpu->state, &vcpu->mtx, "vcpublk", 0);
2946      -#else
2947      -                cv_wait(&vcpu->state_cv, &vcpu->mtx.m);
2948      -#endif
2949      -        }
2950      -        vcpu_unlock(vcpu);
2951      -}
2952      -
2953      -void
2954      -vcpu_unblock_run(struct vm *vm, int vcpuid)
2955      -{
2956      -        struct vcpu *vcpu;
2957      -
2958      -        if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
2959      -                panic("vcpu_block_run: invalid vcpuid %d", vcpuid);
2960      -
2961      -        vcpu = &vm->vcpu[vcpuid];
2962      -
2963      -        vcpu_lock(vcpu);
2964      -        KASSERT(vcpu->runblock != 0, ("expected non-zero runblock"));
2965      -        vcpu->runblock--;
2966      -        if (vcpu->runblock == 0) {
2967      -#ifdef __FreeBSD__
2968      -                wakeup(&vcpu->state);
2969      -#else
2970      -                cv_broadcast(&vcpu->state_cv);
2971      -#endif
2972      -        }
2973      -        vcpu_unlock(vcpu);
2974      -}
2975      -
2976 3100  #ifndef __FreeBSD__
2977 3101  uint64_t
2978 3102  vcpu_tsc_offset(struct vm *vm, int vcpuid)
2979 3103  {
2980 3104          return (vm->vcpu[vcpuid].tsc_offset);
2981 3105  }
2982 3106  #endif /* __FreeBSD__ */
2983 3107  
2984 3108  int
2985 3109  vm_activate_cpu(struct vm *vm, int vcpuid)

2986 3110  {
2987 3111  
2988 3112          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2989 3113                  return (EINVAL);
2990 3114  
2991 3115          if (CPU_ISSET(vcpuid, &vm->active_cpus))
2992 3116                  return (EBUSY);
2993 3117  
2994 3118          VCPU_CTR0(vm, vcpuid, "activated");
2995 3119          CPU_SET_ATOMIC(vcpuid, &vm->active_cpus);
2996 3120          return (0);
2997 3121  }
2998 3122  
2999 3123  int
3000 3124  vm_suspend_cpu(struct vm *vm, int vcpuid)
3001 3125  {
3002 3126          int i;
3003 3127  
3004 3128          if (vcpuid < -1 || vcpuid >= vm->maxcpus)
3005 3129                  return (EINVAL);
3006 3130  
3007 3131          if (vcpuid == -1) {
3008 3132                  vm->debug_cpus = vm->active_cpus;
3009 3133                  for (i = 0; i < vm->maxcpus; i++) {
3010 3134                          if (CPU_ISSET(i, &vm->active_cpus))
3011 3135                                  vcpu_notify_event(vm, i);
3012 3136                  }
3013 3137          } else {
3014 3138                  if (!CPU_ISSET(vcpuid, &vm->active_cpus))
3015 3139                          return (EINVAL);
3016 3140  
3017 3141                  CPU_SET_ATOMIC(vcpuid, &vm->debug_cpus);
3018 3142                  vcpu_notify_event(vm, vcpuid);
3019 3143          }
3020 3144          return (0);
3021 3145  }
3022 3146  
3023 3147  int
3024 3148  vm_resume_cpu(struct vm *vm, int vcpuid)
3025 3149  {
3026 3150  
3027 3151          if (vcpuid < -1 || vcpuid >= vm->maxcpus)
3028 3152                  return (EINVAL);
3029 3153  
3030 3154          if (vcpuid == -1) {

↓ open down ↓

45 lines elided

↑ open up ↑

3031 3155                  CPU_ZERO(&vm->debug_cpus);
3032 3156          } else {
3033 3157                  if (!CPU_ISSET(vcpuid, &vm->debug_cpus))
3034 3158                          return (EINVAL);
3035 3159  
3036 3160                  CPU_CLR_ATOMIC(vcpuid, &vm->debug_cpus);
3037 3161          }
3038 3162          return (0);
3039 3163  }
3040 3164  
3041      -int
3042      -vcpu_debugged(struct vm *vm, int vcpuid)
     3165 +static bool
     3166 +vcpu_bailout_checks(struct vm *vm, int vcpuid, bool on_entry,
     3167 +    uint64_t entry_rip)
3043 3168  {
     3169 +        struct vcpu *vcpu = &vm->vcpu[vcpuid];
     3170 +        struct vm_exit *vme = &vcpu->exitinfo;
     3171 +        bool bail = false;
3044 3172  
3045      -        return (CPU_ISSET(vcpuid, &vm->debug_cpus));
     3173 +        ASSERT(vcpuid >= 0 && vcpuid < vm->maxcpus);
     3174 +
     3175 +        if (vm->suspend) {
     3176 +                if (on_entry) {
     3177 +                        VERIFY(vm->suspend > VM_SUSPEND_NONE &&
     3178 +                            vm->suspend < VM_SUSPEND_LAST);
     3179 +
     3180 +                        vme->exitcode = VM_EXITCODE_SUSPENDED;
     3181 +                        vme->u.suspended.how = vm->suspend;
     3182 +                } else {
     3183 +                        /*
     3184 +                         * Handling VM suspend is complicated, so if that
     3185 +                         * condition is detected outside of VM-entry itself,
     3186 +                         * just emit a BOGUS exitcode so we take a lap to pick
     3187 +                         * up the event during an entry and are directed into
     3188 +                         * the vm_handle_suspend() logic.
     3189 +                         */
     3190 +                        vme->exitcode = VM_EXITCODE_BOGUS;
     3191 +                }
     3192 +                bail = true;
     3193 +        }
     3194 +        if (vcpu->reqidle) {
     3195 +                vme->exitcode = VM_EXITCODE_REQIDLE;
     3196 +                vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1);
     3197 +
     3198 +                if (!on_entry) {
     3199 +                        /*
     3200 +                         * A reqidle request detected outside of VM-entry can be
     3201 +                         * handled directly by clearing the request (and taking
     3202 +                         * a lap to userspace).
     3203 +                         */
     3204 +                        vcpu_assert_locked(vcpu);
     3205 +                        vcpu->reqidle = 0;
     3206 +                }
     3207 +                bail = true;
     3208 +        }
     3209 +        if (vcpu_should_yield(vm, vcpuid)) {
     3210 +                vme->exitcode = VM_EXITCODE_BOGUS;
     3211 +                vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1);
     3212 +                bail = true;
     3213 +        }
     3214 +        if (CPU_ISSET(vcpuid, &vm->debug_cpus)) {
     3215 +                vme->exitcode = VM_EXITCODE_DEBUG;
     3216 +                bail = true;
     3217 +        }
     3218 +
     3219 +        if (bail) {
     3220 +                if (on_entry) {
     3221 +                        /*
     3222 +                         * If bailing out during VM-entry, the current %rip must
     3223 +                         * be recorded in the exitinfo.
     3224 +                         */
     3225 +                        vme->rip = entry_rip;
     3226 +                }
     3227 +                vme->inst_length = 0;
     3228 +        }
     3229 +        return (bail);
3046 3230  }
3047 3231  
     3232 +static bool
     3233 +vcpu_sleep_bailout_checks(struct vm *vm, int vcpuid)
     3234 +{
     3235 +        /*
     3236 +         * Bail-out check done prior to sleeping (in vCPU contexts like HLT or
     3237 +         * wait-for-SIPI) expect that %rip is already populated in the vm_exit
     3238 +         * structure, and we would only modify the exitcode.
     3239 +         */
     3240 +        return (vcpu_bailout_checks(vm, vcpuid, false, 0));
     3241 +}
     3242 +
     3243 +bool
     3244 +vcpu_entry_bailout_checks(struct vm *vm, int vcpuid, uint64_t rip)
     3245 +{
     3246 +        /*
     3247 +         * Bail-out checks done as part of VM entry require an updated %rip to
     3248 +         * populate the vm_exit struct if any of the conditions of interest are
     3249 +         * matched in the check.
     3250 +         */
     3251 +        return (vcpu_bailout_checks(vm, vcpuid, true, rip));
     3252 +}
     3253 +
3048 3254  cpuset_t
3049 3255  vm_active_cpus(struct vm *vm)
3050 3256  {
3051 3257  
3052 3258          return (vm->active_cpus);
3053 3259  }
3054 3260  
3055 3261  cpuset_t
3056 3262  vm_debug_cpus(struct vm *vm)
3057 3263  {

3058 3264  
3059 3265          return (vm->debug_cpus);
3060 3266  }
3061 3267  
3062 3268  cpuset_t
3063 3269  vm_suspended_cpus(struct vm *vm)
3064 3270  {
3065 3271  
3066 3272          return (vm->suspended_cpus);
3067 3273  }
3068 3274  
3069 3275  void *
3070 3276  vcpu_stats(struct vm *vm, int vcpuid)
3071 3277  {
3072 3278  
3073 3279          return (vm->vcpu[vcpuid].stats);
3074 3280  }
3075 3281  
3076 3282  int
3077 3283  vm_get_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state *state)
3078 3284  {
3079 3285          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
3080 3286                  return (EINVAL);
3081 3287  
3082 3288          *state = vm->vcpu[vcpuid].x2apic_state;
3083 3289  
3084 3290          return (0);
3085 3291  }
3086 3292  
3087 3293  int
3088 3294  vm_set_x2apic_state(struct vm *vm, int vcpuid, enum x2apic_state state)
3089 3295  {
3090 3296          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
3091 3297                  return (EINVAL);
3092 3298  
3093 3299          if (state >= X2APIC_STATE_LAST)
3094 3300                  return (EINVAL);
3095 3301  
3096 3302          vm->vcpu[vcpuid].x2apic_state = state;
3097 3303  
3098 3304          vlapic_set_x2apic_state(vm, vcpuid, state);
3099 3305  
3100 3306          return (0);
3101 3307  }
3102 3308  
3103 3309  /*
3104 3310   * This function is called to ensure that a vcpu "sees" a pending event
3105 3311   * as soon as possible:
3106 3312   * - If the vcpu thread is sleeping then it is woken up.
3107 3313   * - If the vcpu is running on a different host_cpu then an IPI will be directed
3108 3314   *   to the host_cpu to cause the vcpu to trap into the hypervisor.
3109 3315   */
3110 3316  static void
3111 3317  vcpu_notify_event_locked(struct vcpu *vcpu, vcpu_notify_t ntype)
3112 3318  {
3113 3319          int hostcpu;
3114 3320  
3115 3321          ASSERT(ntype == VCPU_NOTIFY_APIC || VCPU_NOTIFY_EXIT);
3116 3322  
3117 3323          hostcpu = vcpu->hostcpu;
3118 3324          if (vcpu->state == VCPU_RUNNING) {
3119 3325                  KASSERT(hostcpu != NOCPU, ("vcpu running on invalid hostcpu"));
3120 3326                  if (hostcpu != curcpu) {
3121 3327                          if (ntype == VCPU_NOTIFY_APIC) {
3122 3328                                  vlapic_post_intr(vcpu->vlapic, hostcpu,
3123 3329                                      vmm_ipinum);
3124 3330                          } else {
3125 3331                                  ipi_cpu(hostcpu, vmm_ipinum);
3126 3332                          }
3127 3333                  } else {
3128 3334                          /*
3129 3335                           * If the 'vcpu' is running on 'curcpu' then it must
3130 3336                           * be sending a notification to itself (e.g. SELF_IPI).
3131 3337                           * The pending event will be picked up when the vcpu
3132 3338                           * transitions back to guest context.
3133 3339                           */
3134 3340                  }
3135 3341          } else {
3136 3342                  KASSERT(hostcpu == NOCPU, ("vcpu state %d not consistent "
3137 3343                      "with hostcpu %d", vcpu->state, hostcpu));
3138 3344                  if (vcpu->state == VCPU_SLEEPING) {
3139 3345  #ifdef __FreeBSD__
3140 3346                          wakeup_one(vcpu);
3141 3347  #else
3142 3348                          cv_signal(&vcpu->vcpu_cv);
3143 3349  #endif
3144 3350                  }
3145 3351          }
3146 3352  }
3147 3353  
3148 3354  void
3149 3355  vcpu_notify_event(struct vm *vm, int vcpuid)
3150 3356  {
3151 3357          struct vcpu *vcpu = &vm->vcpu[vcpuid];
3152 3358  
3153 3359          vcpu_lock(vcpu);
3154 3360          vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
3155 3361          vcpu_unlock(vcpu);
3156 3362  }
3157 3363  
3158 3364  void
3159 3365  vcpu_notify_event_type(struct vm *vm, int vcpuid, vcpu_notify_t ntype)
3160 3366  {
3161 3367          struct vcpu *vcpu = &vm->vcpu[vcpuid];
3162 3368  
3163 3369          if (ntype == VCPU_NOTIFY_NONE) {
3164 3370                  return;
3165 3371          }
3166 3372  
3167 3373          vcpu_lock(vcpu);
3168 3374          vcpu_notify_event_locked(vcpu, ntype);
3169 3375          vcpu_unlock(vcpu);
3170 3376  }
3171 3377  
3172 3378  struct vmspace *
3173 3379  vm_get_vmspace(struct vm *vm)
3174 3380  {
3175 3381  
3176 3382          return (vm->vmspace);
3177 3383  }
3178 3384  
3179 3385  int
3180 3386  vm_apicid2vcpuid(struct vm *vm, int apicid)
3181 3387  {
3182 3388          /*
3183 3389           * XXX apic id is assumed to be numerically identical to vcpu id
3184 3390           */
3185 3391          return (apicid);
3186 3392  }
3187 3393  
3188 3394  struct vatpic *
3189 3395  vm_atpic(struct vm *vm)
3190 3396  {
3191 3397          return (vm->vatpic);
3192 3398  }
3193 3399  
3194 3400  struct vatpit *
3195 3401  vm_atpit(struct vm *vm)
3196 3402  {
3197 3403          return (vm->vatpit);
3198 3404  }
3199 3405  
3200 3406  struct vpmtmr *
3201 3407  vm_pmtmr(struct vm *vm)
3202 3408  {
3203 3409  
3204 3410          return (vm->vpmtmr);
3205 3411  }
3206 3412  
3207 3413  struct vrtc *
3208 3414  vm_rtc(struct vm *vm)
3209 3415  {
3210 3416  
3211 3417          return (vm->vrtc);
3212 3418  }
3213 3419  
3214 3420  enum vm_reg_name
3215 3421  vm_segment_name(int seg)
3216 3422  {
3217 3423          static enum vm_reg_name seg_names[] = {
3218 3424                  VM_REG_GUEST_ES,
3219 3425                  VM_REG_GUEST_CS,
3220 3426                  VM_REG_GUEST_SS,
3221 3427                  VM_REG_GUEST_DS,
3222 3428                  VM_REG_GUEST_FS,
3223 3429                  VM_REG_GUEST_GS
3224 3430          };
3225 3431  
3226 3432          KASSERT(seg >= 0 && seg < nitems(seg_names),
3227 3433              ("%s: invalid segment encoding %d", __func__, seg));
3228 3434          return (seg_names[seg]);
3229 3435  }
3230 3436  
3231 3437  void
3232 3438  vm_copy_teardown(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo,
3233 3439      int num_copyinfo)
3234 3440  {
3235 3441          int idx;
3236 3442  
3237 3443          for (idx = 0; idx < num_copyinfo; idx++) {
3238 3444                  if (copyinfo[idx].cookie != NULL)
3239 3445                          vm_gpa_release(copyinfo[idx].cookie);
3240 3446          }
3241 3447          bzero(copyinfo, num_copyinfo * sizeof (struct vm_copyinfo));
3242 3448  }
3243 3449  
3244 3450  int
3245 3451  vm_copy_setup(struct vm *vm, int vcpuid, struct vm_guest_paging *paging,
3246 3452      uint64_t gla, size_t len, int prot, struct vm_copyinfo *copyinfo,
3247 3453      int num_copyinfo, int *fault)
3248 3454  {
3249 3455          int error, idx, nused;
3250 3456          size_t n, off, remaining;
3251 3457          void *hva, *cookie;
3252 3458          uint64_t gpa;
3253 3459  
3254 3460          bzero(copyinfo, sizeof (struct vm_copyinfo) * num_copyinfo);
3255 3461  
3256 3462          nused = 0;
3257 3463          remaining = len;
3258 3464          while (remaining > 0) {
3259 3465                  KASSERT(nused < num_copyinfo, ("insufficient vm_copyinfo"));
3260 3466                  error = vm_gla2gpa(vm, vcpuid, paging, gla, prot, &gpa, fault);
3261 3467                  if (error || *fault)
3262 3468                          return (error);
3263 3469                  off = gpa & PAGE_MASK;
3264 3470                  n = min(remaining, PAGE_SIZE - off);
3265 3471                  copyinfo[nused].gpa = gpa;
3266 3472                  copyinfo[nused].len = n;
3267 3473                  remaining -= n;
3268 3474                  gla += n;
3269 3475                  nused++;
3270 3476          }
3271 3477  
3272 3478          for (idx = 0; idx < nused; idx++) {
3273 3479                  hva = vm_gpa_hold(vm, vcpuid, copyinfo[idx].gpa,
3274 3480                      copyinfo[idx].len, prot, &cookie);
3275 3481                  if (hva == NULL)
3276 3482                          break;
3277 3483                  copyinfo[idx].hva = hva;
3278 3484                  copyinfo[idx].cookie = cookie;
3279 3485          }
3280 3486  
3281 3487          if (idx != nused) {
3282 3488                  vm_copy_teardown(vm, vcpuid, copyinfo, num_copyinfo);
3283 3489                  return (EFAULT);
3284 3490          } else {
3285 3491                  *fault = 0;
3286 3492                  return (0);
3287 3493          }
3288 3494  }
3289 3495  
3290 3496  void
3291 3497  vm_copyin(struct vm *vm, int vcpuid, struct vm_copyinfo *copyinfo, void *kaddr,
3292 3498      size_t len)
3293 3499  {
3294 3500          char *dst;
3295 3501          int idx;
3296 3502  
3297 3503          dst = kaddr;
3298 3504          idx = 0;
3299 3505          while (len > 0) {
3300 3506                  bcopy(copyinfo[idx].hva, dst, copyinfo[idx].len);
3301 3507                  len -= copyinfo[idx].len;
3302 3508                  dst += copyinfo[idx].len;
3303 3509                  idx++;
3304 3510          }
3305 3511  }
3306 3512  
3307 3513  void
3308 3514  vm_copyout(struct vm *vm, int vcpuid, const void *kaddr,
3309 3515      struct vm_copyinfo *copyinfo, size_t len)
3310 3516  {
3311 3517          const char *src;
3312 3518          int idx;
3313 3519  
3314 3520          src = kaddr;
3315 3521          idx = 0;
3316 3522          while (len > 0) {
3317 3523                  bcopy(src, copyinfo[idx].hva, copyinfo[idx].len);
3318 3524                  len -= copyinfo[idx].len;
3319 3525                  src += copyinfo[idx].len;
3320 3526                  idx++;
3321 3527          }
3322 3528  }
3323 3529  
3324 3530  /*
3325 3531   * Return the amount of in-use and wired memory for the VM. Since
3326 3532   * these are global stats, only return the values with for vCPU 0
3327 3533   */
3328 3534  VMM_STAT_DECLARE(VMM_MEM_RESIDENT);
3329 3535  VMM_STAT_DECLARE(VMM_MEM_WIRED);
3330 3536  
3331 3537  static void
3332 3538  vm_get_rescnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
3333 3539  {
3334 3540  
3335 3541          if (vcpu == 0) {
3336 3542                  vmm_stat_set(vm, vcpu, VMM_MEM_RESIDENT,
3337 3543                      PAGE_SIZE * vmspace_resident_count(vm->vmspace));
3338 3544          }
3339 3545  }
3340 3546  
3341 3547  static void
3342 3548  vm_get_wiredcnt(struct vm *vm, int vcpu, struct vmm_stat_type *stat)
3343 3549  {
3344 3550  
3345 3551          if (vcpu == 0) {
3346 3552                  vmm_stat_set(vm, vcpu, VMM_MEM_WIRED,
3347 3553                      PAGE_SIZE * pmap_wired_count(vmspace_pmap(vm->vmspace)));
3348 3554          }
3349 3555  }
3350 3556  
3351 3557  VMM_STAT_FUNC(VMM_MEM_RESIDENT, "Resident memory", vm_get_rescnt);
3352 3558  VMM_STAT_FUNC(VMM_MEM_WIRED, "Wired memory", vm_get_wiredcnt);
3353 3559  
3354 3560  int
3355 3561  vm_ioport_access(struct vm *vm, int vcpuid, bool in, uint16_t port,
3356 3562      uint8_t bytes, uint32_t *val)
3357 3563  {
3358 3564          return (vm_inout_access(&vm->ioports, in, port, bytes, val));
3359 3565  }
3360 3566  
3361 3567  /*
3362 3568   * bhyve-internal interfaces to attach or detach IO port handlers.
3363 3569   * Must be called with VM write lock held for safety.
3364 3570   */
3365 3571  int
3366 3572  vm_ioport_attach(struct vm *vm, uint16_t port, ioport_handler_t func, void *arg,
3367 3573      void **cookie)
3368 3574  {
3369 3575          int err;
3370 3576          err = vm_inout_attach(&vm->ioports, port, IOPF_DEFAULT, func, arg);
3371 3577          if (err == 0) {
3372 3578                  *cookie = (void *)IOP_GEN_COOKIE(func, arg, port);
3373 3579          }
3374 3580          return (err);
3375 3581  }
3376 3582  int
3377 3583  vm_ioport_detach(struct vm *vm, void **cookie, ioport_handler_t *old_func,
3378 3584      void **old_arg)
3379 3585  {
3380 3586          uint16_t port = IOP_PORT_FROM_COOKIE((uintptr_t)*cookie);
3381 3587          int err;
3382 3588  
3383 3589          err = vm_inout_detach(&vm->ioports, port, false, old_func, old_arg);
3384 3590          if (err == 0) {
3385 3591                  *cookie = NULL;
3386 3592          }
3387 3593          return (err);
3388 3594  }
3389 3595  
3390 3596  /*
3391 3597   * External driver interfaces to attach or detach IO port handlers.
3392 3598   * Must be called with VM write lock held for safety.
3393 3599   */
3394 3600  int
3395 3601  vm_ioport_hook(struct vm *vm, uint16_t port, ioport_handler_t func,
3396 3602      void *arg, void **cookie)
3397 3603  {
3398 3604          int err;
3399 3605  
3400 3606          if (port == 0) {
3401 3607                  return (EINVAL);
3402 3608          }
3403 3609  
3404 3610          err = vm_inout_attach(&vm->ioports, port, IOPF_DRV_HOOK, func, arg);
3405 3611          if (err == 0) {
3406 3612                  *cookie = (void *)IOP_GEN_COOKIE(func, arg, port);
3407 3613          }
3408 3614          return (err);
3409 3615  }
3410 3616  void
3411 3617  vm_ioport_unhook(struct vm *vm, void **cookie)
3412 3618  {
3413 3619          uint16_t port = IOP_PORT_FROM_COOKIE((uintptr_t)*cookie);
3414 3620          ioport_handler_t old_func;
3415 3621          void *old_arg;
3416 3622          int err;
3417 3623  
3418 3624          err = vm_inout_detach(&vm->ioports, port, true, &old_func, &old_arg);
3419 3625  
3420 3626          /* ioport-hook-using drivers are expected to be well-behaved */
3421 3627          VERIFY0(err);
3422 3628          VERIFY(IOP_GEN_COOKIE(old_func, old_arg, port) == (uintptr_t)*cookie);
3423 3629  
3424 3630          *cookie = NULL;
3425 3631  }

↓ open down ↓

368 lines elided

↑ open up ↑

XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX