Print this page
13275 bhyve needs richer INIT/SIPI support
Reviewed by: Robert Mustacchi <rm@fingolfin.org>
Approved by: Gordon Ross <gordon.w.ross@gmail.com>

@@ -516,17 +516,18 @@
 #ifdef __FreeBSD__
 void
 fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip)
 #else
 void
-fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip,
-    bool suspend)
+fbsdrun_addcpu(struct vmctx *ctx, int newcpu, uint64_t rip, bool suspend)
 #endif
 {
         int error;
 
+#ifdef __FreeBSD__
         assert(fromcpu == BSP);
+#endif
 
         /*
          * The 'newcpu' must be activated in the context of 'fromcpu'. If
          * vm_activate_cpu() is delayed until newcpu's pthread starts running
          * then vmm.ko is out-of-sync with bhyve and this can create a race

@@ -575,11 +576,11 @@
         struct vm_entry *entry = &vmentry[vcpu];
         struct vm_mmio *mmio = &entry->u.mmio;
 
         assert(entry->cmd == VEC_DEFAULT);
 
-        entry->cmd = VEC_COMPLETE_MMIO;
+        entry->cmd = VEC_FULFILL_MMIO;
         mmio->bytes = bytes;
         mmio->read = 1;
         mmio->gpa = gpa;
         mmio->data = data;
 }

@@ -590,11 +591,11 @@
         struct vm_entry *entry = &vmentry[vcpu];
         struct vm_mmio *mmio = &entry->u.mmio;
 
         assert(entry->cmd == VEC_DEFAULT);
 
-        entry->cmd = VEC_COMPLETE_MMIO;
+        entry->cmd = VEC_FULFILL_MMIO;
         mmio->bytes = bytes;
         mmio->read = 0;
         mmio->gpa = gpa;
         mmio->data = 0;
 }

@@ -605,11 +606,11 @@
         struct vm_entry *entry = &vmentry[vcpu];
         struct vm_inout *inout = &entry->u.inout;
 
         assert(entry->cmd == VEC_DEFAULT);
 
-        entry->cmd = VEC_COMPLETE_INOUT;
+        entry->cmd = VEC_FULFILL_INOUT;
         inout->bytes = bytes;
         inout->flags = INOUT_IN;
         inout->port = port;
         inout->eax = data;
 }

@@ -620,11 +621,11 @@
         struct vm_entry *entry = &vmentry[vcpu];
         struct vm_inout *inout = &entry->u.inout;
 
         assert(entry->cmd == VEC_DEFAULT);
 
-        entry->cmd = VEC_COMPLETE_INOUT;
+        entry->cmd = VEC_FULFILL_INOUT;
         inout->bytes = bytes;
         inout->flags = 0;
         inout->port = port;
         inout->eax = 0;
 }

@@ -729,19 +730,32 @@
                 }
         }
         return (VMEXIT_CONTINUE);
 }
 
+#ifdef __FreeBSD__
 static int
 vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 {
 
         (void)spinup_ap(ctx, *pvcpu,
                     vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip);
 
         return (VMEXIT_CONTINUE);
 }
+#else
+static int
+vmexit_run_state(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
+{
+        /*
+         * Run-state transitions (INIT, SIPI, etc) are handled in-kernel, so an
+         * exit to userspace with that code is not expected.
+         */
+        fprintf(stderr, "unexpected run-state VM exit");
+        return (VMEXIT_ABORT);
+}
+#endif /* __FreeBSD__ */
 
 #ifdef __FreeBSD__
 #define DEBUG_EPT_MISCONFIG
 #else
 /* EPT misconfig debugging not possible now that raw VMCS access is gone */

@@ -1015,11 +1029,15 @@
         [VM_EXITCODE_REQIDLE] = vmexit_reqidle,
         [VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
         [VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
         [VM_EXITCODE_MTRAP]  = vmexit_mtrap,
         [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
+#ifdef __FreeBSD__
         [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
+#else
+        [VM_EXITCODE_RUN_STATE] = vmexit_run_state,
+#endif
         [VM_EXITCODE_SUSPENDED] = vmexit_suspend,
         [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
         [VM_EXITCODE_DEBUG] = vmexit_debug,
         [VM_EXITCODE_BPT] = vmexit_breakpoint,
 };

@@ -1545,18 +1563,25 @@
 
         if (caph_enter() == -1)
                 errx(EX_OSERR, "cap_enter() failed");
 #endif
 
+#ifdef __FreeBSD__
         /*
          * Add CPU 0
          */
-#ifdef __FreeBSD__
         fbsdrun_addcpu(ctx, BSP, BSP, rip);
 #else
-        fbsdrun_addcpu(ctx, BSP, BSP, rip, suspend);
+        /* Set BSP to run (unlike the APs which wait for INIT) */
+        error = vm_set_run_state(ctx, BSP, VRS_RUN, 0);
+        assert(error == 0);
+        fbsdrun_addcpu(ctx, BSP, rip, suspend);
 
+        /* Add subsequent CPUs, which will wait until INIT/SIPI-ed */
+        for (uint_t i = 1; i < guest_ncpus; i++) {
+                spinup_halted_ap(ctx, i);
+        }
         mark_provisioned();
 #endif
 
         /*
          * Head off to the main event dispatch loop