Print this page
13275 bhyve needs richer INIT/SIPI support
Reviewed by: Robert Mustacchi <rm@fingolfin.org>
Approved by: Gordon Ross <gordon.w.ross@gmail.com>
*** 107,127 ****
* (i) initialized when vcpu is created and when it is reinitialized
* (o) initialized the first time the vcpu is created
* (x) initialized before use
*/
struct vcpu {
! struct mtx mtx; /* (o) protects 'state' and 'hostcpu' */
enum vcpu_state state; /* (o) vcpu state */
! #ifndef __FreeBSD__
kcondvar_t vcpu_cv; /* (o) cpu waiter cv */
kcondvar_t state_cv; /* (o) IDLE-transition cv */
- #endif /* __FreeBSD__ */
int hostcpu; /* (o) vcpu's current host cpu */
- #ifndef __FreeBSD__
int lastloccpu; /* (o) last host cpu localized to */
- #endif
- uint_t runblock; /* (i) block vcpu from run state */
int reqidle; /* (i) request vcpu to idle */
struct vlapic *vlapic; /* (i) APIC device model */
enum x2apic_state x2apic_state; /* (i) APIC mode */
uint64_t exitintinfo; /* (i) events pending at VM exit */
int nmi_pending; /* (i) NMI pending */
--- 107,125 ----
* (i) initialized when vcpu is created and when it is reinitialized
* (o) initialized the first time the vcpu is created
* (x) initialized before use
*/
struct vcpu {
! /* (o) protects state, run_state, hostcpu, sipi_vector */
! struct mtx mtx;
!
enum vcpu_state state; /* (o) vcpu state */
! enum vcpu_run_state run_state; /* (i) vcpu init/sipi/run state */
kcondvar_t vcpu_cv; /* (o) cpu waiter cv */
kcondvar_t state_cv; /* (o) IDLE-transition cv */
int hostcpu; /* (o) vcpu's current host cpu */
int lastloccpu; /* (o) last host cpu localized to */
int reqidle; /* (i) request vcpu to idle */
struct vlapic *vlapic; /* (i) APIC device model */
enum x2apic_state x2apic_state; /* (i) APIC mode */
uint64_t exitintinfo; /* (i) events pending at VM exit */
int nmi_pending; /* (i) NMI pending */
*** 128,137 ****
--- 126,136 ----
int extint_pending; /* (i) INTR pending */
int exception_pending; /* (i) exception pending */
int exc_vector; /* (x) exception collateral */
int exc_errcode_valid;
uint32_t exc_errcode;
+ uint8_t sipi_vector; /* (i) SIPI vector */
struct savefpu *guestfpu; /* (a,i) guest fpu state */
uint64_t guest_xcr0; /* (i) guest %xcr0 register */
void *stats; /* (a,i) statistics */
struct vm_exit exitinfo; /* (x) exit reason and collateral */
uint64_t nextrip; /* (x) next instruction to execute */
*** 198,216 ****
uint16_t cores; /* (o) num of cores/socket */
uint16_t threads; /* (o) num of threads/core */
uint16_t maxcpus; /* (o) max pluggable cpus */
struct ioport_config ioports; /* (o) ioport handling */
-
- bool sipi_req; /* (i) SIPI requested */
- int sipi_req_vcpu; /* (i) SIPI destination */
- uint64_t sipi_req_rip; /* (i) SIPI start %rip */
-
- /* Miscellaneous VM-wide statistics and counters */
- struct vm_wide_stats {
- uint64_t sipi_supersede;
- } stats;
};
static int vmm_initialized;
--- 197,206 ----
*** 247,258 ****
#define VMM_INIT(num) ((*ops->init)(num))
#define VMM_CLEANUP() ((*ops->cleanup)())
#define VMM_RESUME() ((*ops->resume)())
#define VMINIT(vm, pmap) ((*ops->vminit)(vm, pmap))
! #define VMRUN(vmi, vcpu, rip, pmap, evinfo) \
! ((*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo))
#define VMCLEANUP(vmi) ((*ops->vmcleanup)(vmi))
#define VMSPACE_ALLOC(min, max) ((*ops->vmspace_alloc)(min, max))
#define VMSPACE_FREE(vmspace) ((*ops->vmspace_free)(vmspace))
#define VMGETREG(vmi, vcpu, num, rv) ((*ops->vmgetreg)(vmi, vcpu, num, rv))
--- 237,248 ----
#define VMM_INIT(num) ((*ops->init)(num))
#define VMM_CLEANUP() ((*ops->cleanup)())
#define VMM_RESUME() ((*ops->resume)())
#define VMINIT(vm, pmap) ((*ops->vminit)(vm, pmap))
! #define VMRUN(vmi, vcpu, rip, pmap) \
! ((*ops->vmrun)(vmi, vcpu, rip, pmap))
#define VMCLEANUP(vmi) ((*ops->vmcleanup)(vmi))
#define VMSPACE_ALLOC(min, max) ((*ops->vmspace_alloc)(min, max))
#define VMSPACE_FREE(vmspace) ((*ops->vmspace_free)(vmspace))
#define VMGETREG(vmi, vcpu, num, rv) ((*ops->vmgetreg)(vmi, vcpu, num, rv))
*** 290,299 ****
--- 280,291 ----
static int trace_guest_exceptions;
static void vm_free_memmap(struct vm *vm, int ident);
static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
static void vcpu_notify_event_locked(struct vcpu *vcpu, vcpu_notify_t);
+ static bool vcpu_sleep_bailout_checks(struct vm *vm, int vcpuid);
+ static int vcpu_vector_sipi(struct vm *vm, int vcpuid, uint8_t vector);
#ifndef __FreeBSD__
static void vm_clear_memseg(struct vm *, int);
/* Flags for vtc_status */
*** 368,380 ****
} else {
vie_reset(vcpu->vie_ctx);
bzero(&vcpu->exitinfo, sizeof (vcpu->exitinfo));
}
vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
- vcpu->runblock = 0;
vcpu->reqidle = 0;
vcpu->exitintinfo = 0;
vcpu->nmi_pending = 0;
vcpu->extint_pending = 0;
vcpu->exception_pending = 0;
--- 360,372 ----
} else {
vie_reset(vcpu->vie_ctx);
bzero(&vcpu->exitinfo, sizeof (vcpu->exitinfo));
}
+ vcpu->run_state = VRS_HALT;
vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
vcpu->reqidle = 0;
vcpu->exitintinfo = 0;
vcpu->nmi_pending = 0;
vcpu->extint_pending = 0;
vcpu->exception_pending = 0;
*** 1231,1241 ****
return (VMGETDESC(vm->cookie, vcpu, reg, desc));
}
int
! vm_set_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc)
{
if (vcpu < 0 || vcpu >= vm->maxcpus)
return (EINVAL);
if (!is_segment_register(reg) && !is_descriptor_table(reg))
--- 1223,1233 ----
return (VMGETDESC(vm->cookie, vcpu, reg, desc));
}
int
! vm_set_seg_desc(struct vm *vm, int vcpu, int reg, const struct seg_desc *desc)
{
if (vcpu < 0 || vcpu >= vm->maxcpus)
return (EINVAL);
if (!is_segment_register(reg) && !is_descriptor_table(reg))
*** 1242,1251 ****
--- 1234,1286 ----
return (EINVAL);
return (VMSETDESC(vm->cookie, vcpu, reg, desc));
}
+ int
+ vm_get_run_state(struct vm *vm, int vcpuid, uint32_t *state, uint8_t *sipi_vec)
+ {
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus) {
+ return (EINVAL);
+ }
+
+ vcpu = &vm->vcpu[vcpuid];
+
+ vcpu_lock(vcpu);
+ *state = vcpu->run_state;
+ *sipi_vec = vcpu->sipi_vector;
+ vcpu_unlock(vcpu);
+
+ return (0);
+ }
+
+ int
+ vm_set_run_state(struct vm *vm, int vcpuid, uint32_t state, uint8_t sipi_vec)
+ {
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus) {
+ return (EINVAL);
+ }
+ if (!VRS_IS_VALID(state)) {
+ return (EINVAL);
+ }
+
+ vcpu = &vm->vcpu[vcpuid];
+
+ vcpu_lock(vcpu);
+ vcpu->run_state = state;
+ vcpu->sipi_vector = sipi_vec;
+ vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
+ vcpu_unlock(vcpu);
+
+ return (0);
+ }
+
+
static void
restore_guest_fpustate(struct vcpu *vcpu)
{
/* flush host state to the pcb */
*** 1352,1371 ****
default:
error = 1;
break;
}
- if (newstate == VCPU_RUNNING) {
- while (vcpu->runblock != 0) {
- #ifdef __FreeBSD__
- msleep_spin(&vcpu->state, &vcpu->mtx, "vcpublk", 0);
- #else
- cv_wait(&vcpu->state_cv, &vcpu->mtx.m);
- #endif
- }
- }
-
if (error)
return (EBUSY);
VCPU_CTR2(vm, vcpuid, "vcpu state changed from %s to %s",
vcpu_state2str(vcpu->state), vcpu_state2str(newstate));
--- 1387,1396 ----
*** 1374,1385 ****
if (newstate == VCPU_RUNNING)
vcpu->hostcpu = curcpu;
else
vcpu->hostcpu = NOCPU;
! if (newstate == VCPU_IDLE ||
! (newstate == VCPU_FROZEN && vcpu->runblock != 0)) {
#ifdef __FreeBSD__
wakeup(&vcpu->state);
#else
cv_broadcast(&vcpu->state_cv);
#endif
--- 1399,1409 ----
if (newstate == VCPU_RUNNING)
vcpu->hostcpu = curcpu;
else
vcpu->hostcpu = NOCPU;
! if (newstate == VCPU_IDLE) {
#ifdef __FreeBSD__
wakeup(&vcpu->state);
#else
cv_broadcast(&vcpu->state_cv);
#endif
*** 1411,1426 ****
*/
static int
vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled)
{
struct vcpu *vcpu;
- #ifdef __FreeBSD__
- const char *wmesg;
- #else
- const char *wmesg __unused;
- #endif
int t, vcpu_halted, vm_halted;
KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
vcpu = &vm->vcpu[vcpuid];
vcpu_halted = 0;
--- 1435,1446 ----
*/
static int
vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled)
{
struct vcpu *vcpu;
int t, vcpu_halted, vm_halted;
+ bool userspace_exit = false;
KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
vcpu = &vm->vcpu[vcpuid];
vcpu_halted = 0;
*** 1427,1498 ****
vm_halted = 0;
vcpu_lock(vcpu);
while (1) {
/*
! * Do a final check for pending NMI or interrupts before
! * really putting this thread to sleep. Also check for
! * software events that would cause this vcpu to wakeup.
! *
! * These interrupts/events could have happened after the
! * vcpu returned from VMRUN() and before it acquired the
! * vcpu lock above.
*/
- if (vm->suspend || vcpu->reqidle)
- break;
if (vm_nmi_pending(vm, vcpuid))
break;
if (!intr_disabled) {
if (vm_extint_pending(vm, vcpuid) ||
vlapic_pending_intr(vcpu->vlapic, NULL)) {
break;
}
}
! /* Don't go to sleep if the vcpu thread needs to yield */
! if (vcpu_should_yield(vm, vcpuid))
break;
- if (vcpu_debugged(vm, vcpuid))
- break;
-
/*
* Some Linux guests implement "halt" by having all vcpus
* execute HLT with interrupts disabled. 'halted_cpus' keeps
* track of the vcpus that have entered this state. When all
* vcpus enter the halted state the virtual machine is halted.
*/
if (intr_disabled) {
- wmesg = "vmhalt";
- VCPU_CTR0(vm, vcpuid, "Halted");
if (!vcpu_halted && halt_detection_enabled) {
vcpu_halted = 1;
CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus);
}
if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) {
vm_halted = 1;
break;
}
- } else {
- wmesg = "vmidle";
}
t = ticks;
vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
- #ifdef __FreeBSD__
- /*
- * XXX msleep_spin() cannot be interrupted by signals so
- * wake up periodically to check pending signals.
- */
- msleep_spin(vcpu, &vcpu->mtx, wmesg, hz);
- #else
- /*
- * Fortunately, cv_wait_sig can be interrupted by signals, so
- * there is no need to periodically wake up.
- */
(void) cv_wait_sig(&vcpu->vcpu_cv, &vcpu->mtx.m);
- #endif
vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
}
if (vcpu_halted)
--- 1447,1500 ----
vm_halted = 0;
vcpu_lock(vcpu);
while (1) {
/*
! * Do a final check for pending interrupts (including NMI and
! * INIT) before putting this thread to sleep.
*/
if (vm_nmi_pending(vm, vcpuid))
break;
+ if (vcpu_run_state_pending(vm, vcpuid))
+ break;
if (!intr_disabled) {
if (vm_extint_pending(vm, vcpuid) ||
vlapic_pending_intr(vcpu->vlapic, NULL)) {
break;
}
}
! /*
! * Also check for software events which would cause a wake-up.
! * This will set the appropriate exitcode directly, rather than
! * requiring a trip through VM_RUN().
! */
! if (vcpu_sleep_bailout_checks(vm, vcpuid)) {
! userspace_exit = true;
break;
+ }
/*
* Some Linux guests implement "halt" by having all vcpus
* execute HLT with interrupts disabled. 'halted_cpus' keeps
* track of the vcpus that have entered this state. When all
* vcpus enter the halted state the virtual machine is halted.
*/
if (intr_disabled) {
if (!vcpu_halted && halt_detection_enabled) {
vcpu_halted = 1;
CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus);
}
if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) {
vm_halted = 1;
break;
}
}
t = ticks;
vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
(void) cv_wait_sig(&vcpu->vcpu_cv, &vcpu->mtx.m);
vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
}
if (vcpu_halted)
*** 1501,1511 ****
vcpu_unlock(vcpu);
if (vm_halted)
vm_suspend(vm, VM_SUSPEND_HALT);
! return (0);
}
static int
vm_handle_paging(struct vm *vm, int vcpuid)
{
--- 1503,1513 ----
vcpu_unlock(vcpu);
if (vm_halted)
vm_suspend(vm, VM_SUSPEND_HALT);
! return (userspace_exit ? -1 : 0);
}
static int
vm_handle_paging(struct vm *vm, int vcpuid)
{
*** 1830,1839 ****
--- 1832,1897 ----
vcpu->reqidle = 0;
vcpu_unlock(vcpu);
return (-1);
}
+ static int
+ vm_handle_run_state(struct vm *vm, int vcpuid)
+ {
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+ bool handled = false;
+
+ vcpu_lock(vcpu);
+ while (1) {
+ if ((vcpu->run_state & VRS_PEND_INIT) != 0) {
+ vcpu_unlock(vcpu);
+ VERIFY0(vcpu_arch_reset(vm, vcpuid, true));
+ vcpu_lock(vcpu);
+
+ vcpu->run_state &= ~(VRS_RUN | VRS_PEND_INIT);
+ vcpu->run_state |= VRS_INIT;
+ }
+
+ if ((vcpu->run_state & (VRS_INIT | VRS_RUN | VRS_PEND_SIPI)) ==
+ (VRS_INIT | VRS_PEND_SIPI)) {
+ const uint8_t vector = vcpu->sipi_vector;
+
+ vcpu_unlock(vcpu);
+ VERIFY0(vcpu_vector_sipi(vm, vcpuid, vector));
+ vcpu_lock(vcpu);
+
+ vcpu->run_state &= ~VRS_PEND_SIPI;
+ vcpu->run_state |= VRS_RUN;
+ }
+
+ /*
+ * If the vCPU is now in the running state, there is no need to
+ * wait for anything prior to re-entry.
+ */
+ if ((vcpu->run_state & VRS_RUN) != 0) {
+ handled = true;
+ break;
+ }
+
+ /*
+ * Also check for software events which would cause a wake-up.
+ * This will set the appropriate exitcode directly, rather than
+ * requiring a trip through VM_RUN().
+ */
+ if (vcpu_sleep_bailout_checks(vm, vcpuid)) {
+ break;
+ }
+
+ vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
+ (void) cv_wait_sig(&vcpu->vcpu_cv, &vcpu->mtx.m);
+ vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
+ }
+ vcpu_unlock(vcpu);
+
+ return (handled ? 0 : -1);
+ }
+
#ifndef __FreeBSD__
static int
vm_handle_wrmsr(struct vm *vm, int vcpuid, struct vm_exit *vme)
{
struct vcpu *cpu = &vm->vcpu[vcpuid];
*** 1848,1869 ****
return (-1);
}
#endif /* __FreeBSD__ */
- void
- vm_req_spinup_ap(struct vm *vm, int req_vcpuid, uint64_t req_rip)
- {
- if (vm->sipi_req) {
- /* This should never occur if userspace is doing its job. */
- vm->stats.sipi_supersede++;
- }
- vm->sipi_req = true;
- vm->sipi_req_vcpu = req_vcpuid;
- vm->sipi_req_rip = req_rip;
- }
-
int
vm_suspend(struct vm *vm, enum vm_suspend_how how)
{
int i;
--- 1906,1915 ----
*** 1888,1958 ****
return (0);
}
void
! vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip)
{
struct vm_exit *vmexit;
- KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
- ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
-
vmexit = vm_exitinfo(vm, vcpuid);
vmexit->rip = rip;
vmexit->inst_length = 0;
! vmexit->exitcode = VM_EXITCODE_SUSPENDED;
! vmexit->u.suspended.how = vm->suspend;
}
- void
- vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip)
- {
- struct vm_exit *vmexit;
- vmexit = vm_exitinfo(vm, vcpuid);
- vmexit->rip = rip;
- vmexit->inst_length = 0;
- vmexit->exitcode = VM_EXITCODE_DEBUG;
- }
-
- void
- vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip)
- {
- struct vm_exit *vmexit;
-
- vmexit = vm_exitinfo(vm, vcpuid);
- vmexit->rip = rip;
- vmexit->inst_length = 0;
- vmexit->exitcode = VM_EXITCODE_RUNBLOCK;
- vmm_stat_incr(vm, vcpuid, VMEXIT_RUNBLOCK, 1);
- }
-
- void
- vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip)
- {
- struct vm_exit *vmexit;
-
- vmexit = vm_exitinfo(vm, vcpuid);
- vmexit->rip = rip;
- vmexit->inst_length = 0;
- vmexit->exitcode = VM_EXITCODE_REQIDLE;
- vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1);
- }
-
- void
- vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip)
- {
- struct vm_exit *vmexit;
-
- vmexit = vm_exitinfo(vm, vcpuid);
- vmexit->rip = rip;
- vmexit->inst_length = 0;
- vmexit->exitcode = VM_EXITCODE_BOGUS;
- vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1);
- }
-
#ifndef __FreeBSD__
/*
* Some vmm resources, such as the lapic, may have CPU-specific resources
* allocated to them which would benefit from migration onto the host CPU which
* is processing the vcpu state.
--- 1934,1955 ----
return (0);
}
void
! vm_exit_run_state(struct vm *vm, int vcpuid, uint64_t rip)
{
struct vm_exit *vmexit;
vmexit = vm_exitinfo(vm, vcpuid);
vmexit->rip = rip;
vmexit->inst_length = 0;
! vmexit->exitcode = VM_EXITCODE_RUN_STATE;
! vmm_stat_incr(vm, vcpuid, VMEXIT_RUN_STATE, 1);
}
#ifndef __FreeBSD__
/*
* Some vmm resources, such as the lapic, may have CPU-specific resources
* allocated to them which would benefit from migration onto the host CPU which
* is processing the vcpu state.
*** 2070,2080 ****
case VEC_DEFAULT:
return (0);
case VEC_DISCARD_INSTR:
vie_reset(vie);
return (0);
! case VEC_COMPLETE_MMIO:
err = vie_fulfill_mmio(vie, &entry->u.mmio);
if (err == 0) {
err = vie_emulate_mmio(vie, vm, vcpuid);
if (err == 0) {
vie_advance_pc(vie, &vcpu->nextrip);
--- 2067,2077 ----
case VEC_DEFAULT:
return (0);
case VEC_DISCARD_INSTR:
vie_reset(vie);
return (0);
! case VEC_FULFILL_MMIO:
err = vie_fulfill_mmio(vie, &entry->u.mmio);
if (err == 0) {
err = vie_emulate_mmio(vie, vm, vcpuid);
if (err == 0) {
vie_advance_pc(vie, &vcpu->nextrip);
*** 2089,2099 ****
vie_reset(vie);
err = 0;
}
}
break;
! case VEC_COMPLETE_INOUT:
err = vie_fulfill_inout(vie, &entry->u.inout);
if (err == 0) {
err = vie_emulate_inout(vie, vm, vcpuid);
if (err == 0) {
vie_advance_pc(vie, &vcpu->nextrip);
--- 2086,2096 ----
vie_reset(vie);
err = 0;
}
}
break;
! case VEC_FULFILL_INOUT:
err = vie_fulfill_inout(vie, &entry->u.inout);
if (err == 0) {
err = vie_emulate_inout(vie, vm, vcpuid);
if (err == 0) {
vie_advance_pc(vie, &vcpu->nextrip);
*** 2130,2158 ****
*/
vie_exitinfo(vie, vme);
return (-1);
}
- if (vcpuid == 0 && vm->sipi_req) {
- /* The boot vCPU has sent a SIPI to one of the other CPUs */
- vme->exitcode = VM_EXITCODE_SPINUP_AP;
- vme->u.spinup_ap.vcpu = vm->sipi_req_vcpu;
- vme->u.spinup_ap.rip = vm->sipi_req_rip;
-
- vm->sipi_req = false;
- vm->sipi_req_vcpu = 0;
- vm->sipi_req_rip = 0;
- return (-1);
- }
-
return (0);
}
int
vm_run(struct vm *vm, int vcpuid, const struct vm_entry *entry)
{
- struct vm_eventinfo evinfo;
int error;
struct vcpu *vcpu;
#ifdef __FreeBSD__
struct pcb *pcb;
#endif
--- 2127,2142 ----
*** 2175,2187 ****
return (EINVAL);
pmap = vmspace_pmap(vm->vmspace);
vcpu = &vm->vcpu[vcpuid];
vme = &vcpu->exitinfo;
- evinfo.rptr = &vcpu->runblock;
- evinfo.sptr = &vm->suspend;
- evinfo.iptr = &vcpu->reqidle;
#ifndef __FreeBSD__
vtc.vtc_vm = vm;
vtc.vtc_vcpuid = vcpuid;
vtc.vtc_status = 0;
--- 2159,2168 ----
*** 2240,2250 ****
}
vtc.vtc_status |= VTCS_FPU_CTX_CRITICAL;
#endif
vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
! error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, &evinfo);
vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
#ifdef __FreeBSD__
save_guest_fpustate(vcpu);
#else
--- 2221,2231 ----
}
vtc.vtc_status |= VTCS_FPU_CTX_CRITICAL;
#endif
vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
! error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap);
vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
#ifdef __FreeBSD__
save_guest_fpustate(vcpu);
#else
*** 2271,2289 ****
vcpu->nextrip = vme->rip + vme->inst_length;
switch (vme->exitcode) {
case VM_EXITCODE_REQIDLE:
error = vm_handle_reqidle(vm, vcpuid);
break;
case VM_EXITCODE_SUSPENDED:
error = vm_handle_suspend(vm, vcpuid);
break;
case VM_EXITCODE_IOAPIC_EOI:
vioapic_process_eoi(vm, vcpuid,
vme->u.ioapic_eoi.vector);
break;
- case VM_EXITCODE_RUNBLOCK:
- break;
case VM_EXITCODE_HLT:
intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
error = vm_handle_hlt(vm, vcpuid, intr_disabled);
break;
case VM_EXITCODE_PAGING:
--- 2252,2271 ----
vcpu->nextrip = vme->rip + vme->inst_length;
switch (vme->exitcode) {
case VM_EXITCODE_REQIDLE:
error = vm_handle_reqidle(vm, vcpuid);
break;
+ case VM_EXITCODE_RUN_STATE:
+ error = vm_handle_run_state(vm, vcpuid);
+ break;
case VM_EXITCODE_SUSPENDED:
error = vm_handle_suspend(vm, vcpuid);
break;
case VM_EXITCODE_IOAPIC_EOI:
vioapic_process_eoi(vm, vcpuid,
vme->u.ioapic_eoi.vector);
break;
case VM_EXITCODE_HLT:
intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
error = vm_handle_hlt(vm, vcpuid, intr_disabled);
break;
case VM_EXITCODE_PAGING:
*** 2790,2799 ****
--- 2772,2971 ----
vcpu->extint_pending = 0;
vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1);
}
int
+ vm_inject_init(struct vm *vm, int vcpuid)
+ {
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ vcpu = &vm->vcpu[vcpuid];
+ vcpu_lock(vcpu);
+ vcpu->run_state |= VRS_PEND_INIT;
+ vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
+ vcpu_unlock(vcpu);
+ return (0);
+ }
+
+ int
+ vm_inject_sipi(struct vm *vm, int vcpuid, uint8_t vector)
+ {
+ struct vcpu *vcpu;
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ vcpu = &vm->vcpu[vcpuid];
+ vcpu_lock(vcpu);
+ vcpu->run_state |= VRS_PEND_SIPI;
+ vcpu->sipi_vector = vector;
+ /* SIPI is only actionable if the CPU is waiting in INIT state */
+ if ((vcpu->run_state & (VRS_INIT | VRS_RUN)) == VRS_INIT) {
+ vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
+ }
+ vcpu_unlock(vcpu);
+ return (0);
+ }
+
+ bool
+ vcpu_run_state_pending(struct vm *vm, int vcpuid)
+ {
+ struct vcpu *vcpu;
+
+ ASSERT(vcpuid >= 0 && vcpuid < vm->maxcpus);
+ vcpu = &vm->vcpu[vcpuid];
+
+ /* Of interest: vCPU not in running state or with pending INIT */
+ return ((vcpu->run_state & (VRS_RUN | VRS_PEND_INIT)) != VRS_RUN);
+ }
+
+ int
+ vcpu_arch_reset(struct vm *vm, int vcpuid, bool init_only)
+ {
+ struct seg_desc desc;
+ const enum vm_reg_name clear_regs[] = {
+ VM_REG_GUEST_CR2,
+ VM_REG_GUEST_CR3,
+ VM_REG_GUEST_CR4,
+ VM_REG_GUEST_RAX,
+ VM_REG_GUEST_RBX,
+ VM_REG_GUEST_RCX,
+ VM_REG_GUEST_RSI,
+ VM_REG_GUEST_RDI,
+ VM_REG_GUEST_RBP,
+ VM_REG_GUEST_RSP,
+ VM_REG_GUEST_R8,
+ VM_REG_GUEST_R9,
+ VM_REG_GUEST_R10,
+ VM_REG_GUEST_R11,
+ VM_REG_GUEST_R12,
+ VM_REG_GUEST_R13,
+ VM_REG_GUEST_R14,
+ VM_REG_GUEST_R15,
+ VM_REG_GUEST_DR0,
+ VM_REG_GUEST_DR1,
+ VM_REG_GUEST_DR2,
+ VM_REG_GUEST_DR3,
+ VM_REG_GUEST_EFER,
+ };
+ const enum vm_reg_name data_segs[] = {
+ VM_REG_GUEST_SS,
+ VM_REG_GUEST_DS,
+ VM_REG_GUEST_ES,
+ VM_REG_GUEST_FS,
+ VM_REG_GUEST_GS,
+ };
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ for (uint_t i = 0; i < nitems(clear_regs); i++) {
+ VERIFY0(vm_set_register(vm, vcpuid, clear_regs[i], 0));
+ }
+
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, 2));
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RIP, 0xfff0));
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_CR0, 0x60000010));
+
+ /*
+ * The prescribed contents of %rdx differ slightly between the Intel and
+ * AMD architectural definitions. The former expects the Extended Model
+ * in bits 16-19 where the latter expects all the Family, Model, and
+ * Stepping be there. Common boot ROMs appear to disregard this
+ * anyways, so we stick with a compromise value similar to what is
+ * spelled out in the Intel SDM.
+ */
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RDX, 0x600));
+
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_DR6, 0xffff0ff0));
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_DR7, 0x400));
+
+ /* CS: Present, R/W, Accessed */
+ desc.access = 0x0093;
+ desc.base = 0xffff0000;
+ desc.limit = 0xffff;
+ VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_CS, &desc));
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_CS, 0xf000));
+
+ /* SS, DS, ES, FS, GS: Present, R/W, Accessed */
+ desc.access = 0x0093;
+ desc.base = 0;
+ desc.limit = 0xffff;
+ for (uint_t i = 0; i < nitems(data_segs); i++) {
+ VERIFY0(vm_set_seg_desc(vm, vcpuid, data_segs[i], &desc));
+ VERIFY0(vm_set_register(vm, vcpuid, data_segs[i], 0));
+ }
+
+ /* GDTR, IDTR */
+ desc.base = 0;
+ desc.limit = 0xffff;
+ VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_GDTR, &desc));
+ VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_IDTR, &desc));
+
+ /* LDTR: Present, LDT */
+ desc.access = 0x0082;
+ desc.base = 0;
+ desc.limit = 0xffff;
+ VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_LDTR, &desc));
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_LDTR, 0));
+
+ /* TR: Present, 32-bit TSS */
+ desc.access = 0x008b;
+ desc.base = 0;
+ desc.limit = 0xffff;
+ VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_TR, &desc));
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_TR, 0));
+
+ vlapic_reset(vm_lapic(vm, vcpuid));
+
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0));
+
+ vcpu->exitintinfo = 0;
+ vcpu->exception_pending = 0;
+ vcpu->nmi_pending = 0;
+ vcpu->extint_pending = 0;
+
+ /*
+ * A CPU reset caused by power-on or system reset clears more state than
+ * one which is trigged from an INIT IPI.
+ */
+ if (!init_only) {
+ vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
+ fpu_save_area_reset(vcpu->guestfpu);
+
+ /* XXX: clear MSRs and other pieces */
+ }
+
+ return (0);
+ }
+
+ static int
+ vcpu_vector_sipi(struct vm *vm, int vcpuid, uint8_t vector)
+ {
+ struct seg_desc desc;
+
+ if (vcpuid < 0 || vcpuid >= vm->maxcpus)
+ return (EINVAL);
+
+ /* CS: Present, R/W, Accessed */
+ desc.access = 0x0093;
+ desc.base = (uint64_t)vector << 12;
+ desc.limit = 0xffff;
+ VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_CS, &desc));
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_CS,
+ (uint64_t)vector << 8));
+
+ VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RIP, 0));
+
+ return (0);
+ }
+
+ int
vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
{
if (vcpu < 0 || vcpu >= vm->maxcpus)
return (EINVAL);
*** 2892,2902 ****
{
int error;
struct vcpu *vcpu;
if (vcpuid < 0 || vcpuid >= vm->maxcpus)
! panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
vcpu = &vm->vcpu[vcpuid];
vcpu_lock(vcpu);
error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle);
--- 3064,3074 ----
{
int error;
struct vcpu *vcpu;
if (vcpuid < 0 || vcpuid >= vm->maxcpus)
! panic("vcpu_set_state: invalid vcpuid %d", vcpuid);
vcpu = &vm->vcpu[vcpuid];
vcpu_lock(vcpu);
error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle);
*** 2910,2920 ****
{
struct vcpu *vcpu;
enum vcpu_state state;
if (vcpuid < 0 || vcpuid >= vm->maxcpus)
! panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
vcpu = &vm->vcpu[vcpuid];
vcpu_lock(vcpu);
state = vcpu->state;
--- 3082,3092 ----
{
struct vcpu *vcpu;
enum vcpu_state state;
if (vcpuid < 0 || vcpuid >= vm->maxcpus)
! panic("vcpu_get_state: invalid vcpuid %d", vcpuid);
vcpu = &vm->vcpu[vcpuid];
vcpu_lock(vcpu);
state = vcpu->state;
*** 2923,2980 ****
vcpu_unlock(vcpu);
return (state);
}
- void
- vcpu_block_run(struct vm *vm, int vcpuid)
- {
- struct vcpu *vcpu;
-
- if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
- panic("vcpu_block_run: invalid vcpuid %d", vcpuid);
-
- vcpu = &vm->vcpu[vcpuid];
-
- vcpu_lock(vcpu);
- vcpu->runblock++;
- if (vcpu->runblock == 1 && vcpu->state == VCPU_RUNNING) {
- vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
- }
- while (vcpu->state == VCPU_RUNNING) {
- #ifdef __FreeBSD__
- msleep_spin(&vcpu->state, &vcpu->mtx, "vcpublk", 0);
- #else
- cv_wait(&vcpu->state_cv, &vcpu->mtx.m);
- #endif
- }
- vcpu_unlock(vcpu);
- }
-
- void
- vcpu_unblock_run(struct vm *vm, int vcpuid)
- {
- struct vcpu *vcpu;
-
- if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
- panic("vcpu_block_run: invalid vcpuid %d", vcpuid);
-
- vcpu = &vm->vcpu[vcpuid];
-
- vcpu_lock(vcpu);
- KASSERT(vcpu->runblock != 0, ("expected non-zero runblock"));
- vcpu->runblock--;
- if (vcpu->runblock == 0) {
- #ifdef __FreeBSD__
- wakeup(&vcpu->state);
- #else
- cv_broadcast(&vcpu->state_cv);
- #endif
- }
- vcpu_unlock(vcpu);
- }
-
#ifndef __FreeBSD__
uint64_t
vcpu_tsc_offset(struct vm *vm, int vcpuid)
{
return (vm->vcpu[vcpuid].tsc_offset);
--- 3095,3104 ----
*** 3036,3052 ****
CPU_CLR_ATOMIC(vcpuid, &vm->debug_cpus);
}
return (0);
}
! int
! vcpu_debugged(struct vm *vm, int vcpuid)
{
! return (CPU_ISSET(vcpuid, &vm->debug_cpus));
}
cpuset_t
vm_active_cpus(struct vm *vm)
{
return (vm->active_cpus);
--- 3160,3258 ----
CPU_CLR_ATOMIC(vcpuid, &vm->debug_cpus);
}
return (0);
}
! static bool
! vcpu_bailout_checks(struct vm *vm, int vcpuid, bool on_entry,
! uint64_t entry_rip)
{
+ struct vcpu *vcpu = &vm->vcpu[vcpuid];
+ struct vm_exit *vme = &vcpu->exitinfo;
+ bool bail = false;
! ASSERT(vcpuid >= 0 && vcpuid < vm->maxcpus);
!
! if (vm->suspend) {
! if (on_entry) {
! VERIFY(vm->suspend > VM_SUSPEND_NONE &&
! vm->suspend < VM_SUSPEND_LAST);
!
! vme->exitcode = VM_EXITCODE_SUSPENDED;
! vme->u.suspended.how = vm->suspend;
! } else {
! /*
! * Handling VM suspend is complicated, so if that
! * condition is detected outside of VM-entry itself,
! * just emit a BOGUS exitcode so we take a lap to pick
! * up the event during an entry and are directed into
! * the vm_handle_suspend() logic.
! */
! vme->exitcode = VM_EXITCODE_BOGUS;
! }
! bail = true;
! }
! if (vcpu->reqidle) {
! vme->exitcode = VM_EXITCODE_REQIDLE;
! vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1);
!
! if (!on_entry) {
! /*
! * A reqidle request detected outside of VM-entry can be
! * handled directly by clearing the request (and taking
! * a lap to userspace).
! */
! vcpu_assert_locked(vcpu);
! vcpu->reqidle = 0;
! }
! bail = true;
! }
! if (vcpu_should_yield(vm, vcpuid)) {
! vme->exitcode = VM_EXITCODE_BOGUS;
! vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1);
! bail = true;
! }
! if (CPU_ISSET(vcpuid, &vm->debug_cpus)) {
! vme->exitcode = VM_EXITCODE_DEBUG;
! bail = true;
! }
!
! if (bail) {
! if (on_entry) {
! /*
! * If bailing out during VM-entry, the current %rip must
! * be recorded in the exitinfo.
! */
! vme->rip = entry_rip;
! }
! vme->inst_length = 0;
! }
! return (bail);
}
+ static bool
+ vcpu_sleep_bailout_checks(struct vm *vm, int vcpuid)
+ {
+ /*
+ * Bail-out check done prior to sleeping (in vCPU contexts like HLT or
+ * wait-for-SIPI) expect that %rip is already populated in the vm_exit
+ * structure, and we would only modify the exitcode.
+ */
+ return (vcpu_bailout_checks(vm, vcpuid, false, 0));
+ }
+
+ bool
+ vcpu_entry_bailout_checks(struct vm *vm, int vcpuid, uint64_t rip)
+ {
+ /*
+ * Bail-out checks done as part of VM entry require an updated %rip to
+ * populate the vm_exit struct if any of the conditions of interest are
+ * matched in the check.
+ */
+ return (vcpu_bailout_checks(vm, vcpuid, true, rip));
+ }
+
cpuset_t
vm_active_cpus(struct vm *vm)
{
return (vm->active_cpus);