Print this page
13275 bhyve needs richer INIT/SIPI support
Reviewed by: Robert Mustacchi <rm@fingolfin.org>
Approved by: Gordon Ross <gordon.w.ross@gmail.com>


2721         vmxctx->guest_dr1 = rdr1();
2722         vmxctx->guest_dr2 = rdr2();
2723         vmxctx->guest_dr3 = rdr3();
2724         vmxctx->guest_dr6 = rdr6();
2725 
2726         /*
2727          * Restore host debug registers.  Restore DR7, DEBUGCTL, and
2728          * PSL_T last.
2729          */
2730         load_dr0(vmxctx->host_dr0);
2731         load_dr1(vmxctx->host_dr1);
2732         load_dr2(vmxctx->host_dr2);
2733         load_dr3(vmxctx->host_dr3);
2734         load_dr6(vmxctx->host_dr6);
2735         wrmsr(MSR_DEBUGCTLMSR, vmxctx->host_debugctl);
2736         load_dr7(vmxctx->host_dr7);
2737         write_rflags(read_rflags() | vmxctx->host_tf);
2738 }
2739 
2740 static int
2741 vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap,
2742     struct vm_eventinfo *evinfo)
2743 {
2744         int rc, handled, launched;
2745         struct vmx *vmx;
2746         struct vm *vm;
2747         struct vmxctx *vmxctx;
2748         uintptr_t vmcs_pa;
2749         struct vm_exit *vmexit;
2750         struct vlapic *vlapic;
2751         uint32_t exit_reason;
2752 #ifdef __FreeBSD__
2753         struct region_descriptor gdtr, idtr;
2754         uint16_t ldt_sel;
2755 #endif
2756         bool tpr_shadow_active;
2757 
2758         vmx = arg;
2759         vm = vmx->vm;
2760         vmcs_pa = vmx->vmcs_pa[vcpu];
2761         vmxctx = &vmx->ctx[vcpu];
2762         vlapic = vm_lapic(vm, vcpu);


2817                  * A posted interrupt after vmx_inject_vlapic() will not be
2818                  * "lost" because it will be held pending in the host APIC
2819                  * because interrupts are disabled. The pending interrupt will
2820                  * be recognized as soon as the guest state is loaded.
2821                  *
2822                  * The same reasoning applies to the IPI generated by
2823                  * pmap_invalidate_ept().
2824                  */
2825                 disable_intr();
2826 
2827                 /*
2828                  * If not precluded by existing events, inject any interrupt
2829                  * pending on the vLAPIC.  As a lock-less operation, it is safe
2830                  * (and prudent) to perform with host CPU interrupts disabled.
2831                  */
2832                 if (inject_state == EIS_CAN_INJECT) {
2833                         inject_state = vmx_inject_vlapic(vmx, vcpu, vlapic);
2834                 }
2835 
2836                 /*
2837                  * Check for vcpu suspension after injecting events because
2838                  * vmx_inject_events() can suspend the vcpu due to a
2839                  * triple fault.
2840                  */
2841                 if (vcpu_suspended(evinfo)) {
2842                         enable_intr();
2843                         vm_exit_suspended(vmx->vm, vcpu, rip);
2844                         break;
2845                 }
2846 
2847                 if (vcpu_runblocked(evinfo)) {
2848                         enable_intr();
2849                         vm_exit_runblock(vmx->vm, vcpu, rip);
2850                         break;
2851                 }
2852 
2853                 if (vcpu_reqidle(evinfo)) {
2854                         enable_intr();
2855                         vm_exit_reqidle(vmx->vm, vcpu, rip);
2856                         break;
2857                 }
2858 
2859                 if (vcpu_should_yield(vm, vcpu)) {
2860                         enable_intr();
2861                         vm_exit_astpending(vmx->vm, vcpu, rip);
2862                         vmx_astpending_trace(vmx, vcpu, rip);
2863                         handled = HANDLED;
2864                         break;
2865                 }
2866 
2867                 if (vcpu_debugged(vm, vcpu)) {
2868                         enable_intr();
2869                         vm_exit_debug(vmx->vm, vcpu, rip);
2870                         break;
2871                 }
2872 
2873                 /*
2874                  * If subsequent activity queued events which require injection
2875                  * handling, take another lap to handle them.
2876                  */
2877                 if (vmx_inject_recheck(vmx, vcpu, inject_state)) {
2878                         enable_intr();
2879                         handled = HANDLED;
2880                         continue;
2881                 }
2882 
2883 #ifndef __FreeBSD__
2884                 if ((rc = smt_acquire()) != 1) {
2885                         enable_intr();
2886                         vmexit->rip = rip;
2887                         vmexit->inst_length = 0;
2888                         if (rc == -1) {
2889                                 vmexit->exitcode = VM_EXITCODE_HT;
2890                         } else {
2891                                 vmexit->exitcode = VM_EXITCODE_BOGUS;
2892                                 handled = HANDLED;


2968 
2969                 /* Update 'nextrip' */
2970                 vmx->state[vcpu].nextrip = rip;
2971 
2972                 if (rc == VMX_GUEST_VMEXIT) {
2973                         vmx_exit_handle_nmi(vmx, vcpu, vmexit);
2974                         enable_intr();
2975                         handled = vmx_exit_process(vmx, vcpu, vmexit);
2976                 } else {
2977                         enable_intr();
2978                         vmx_exit_inst_error(vmxctx, rc, vmexit);
2979                 }
2980 #ifdef  __FreeBSD__
2981                 launched = 1;
2982 #endif
2983                 DTRACE_PROBE3(vmm__vexit, int, vcpu, uint64_t, rip,
2984                     uint32_t, exit_reason);
2985                 rip = vmexit->rip;
2986         } while (handled);
2987 
2988         /*
2989          * If a VM exit has been handled then the exitcode must be BOGUS
2990          * If a VM exit is not handled then the exitcode must not be BOGUS
2991          */
2992         if ((handled && vmexit->exitcode != VM_EXITCODE_BOGUS) ||
2993             (!handled && vmexit->exitcode == VM_EXITCODE_BOGUS)) {
2994                 panic("Mismatch between handled (%d) and exitcode (%d)",
2995                     handled, vmexit->exitcode);
2996         }
2997 
2998         if (!handled)
2999                 vmm_stat_incr(vm, vcpu, VMEXIT_USERSPACE, 1);
3000 
3001         VCPU_CTR1(vm, vcpu, "returning from vmx_run: exitcode %d",
3002             vmexit->exitcode);
3003 
3004         vmcs_clear(vmcs_pa);
3005         vmx_msr_guest_exit(vmx, vcpu);
3006 
3007 #ifndef __FreeBSD__
3008         VERIFY(vmx->vmcs_state != VS_NONE && curthread->t_preempt != 0);
3009         vmx->vmcs_state[vcpu] = VS_NONE;
3010 #endif
3011 
3012         return (0);
3013 }
3014 
3015 static void
3016 vmx_vmcleanup(void *arg)
3017 {
3018         int i;
3019         struct vmx *vmx = arg;
3020         uint16_t maxcpus;


3244         if (!running) {
3245                 vmcs_load(vmx->vmcs_pa[vcpu]);
3246         }
3247 
3248         vmcs_seg_desc_encoding(seg, &base, &limit, &access);
3249         desc->base = vmcs_read(base);
3250         desc->limit = vmcs_read(limit);
3251         if (access != VMCS_INVALID_ENCODING) {
3252                 desc->access = vmcs_read(access);
3253         } else {
3254                 desc->access = 0;
3255         }
3256 
3257         if (!running) {
3258                 vmcs_clear(vmx->vmcs_pa[vcpu]);
3259         }
3260         return (0);
3261 }
3262 
3263 static int
3264 vmx_setdesc(void *arg, int vcpu, int seg, struct seg_desc *desc)
3265 {
3266         int hostcpu, running;
3267         struct vmx *vmx = arg;
3268         uint32_t base, limit, access;
3269 
3270         running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
3271         if (running && hostcpu != curcpu)
3272                 panic("vmx_setdesc: %s%d is running", vm_name(vmx->vm), vcpu);
3273 
3274         if (!running) {
3275                 vmcs_load(vmx->vmcs_pa[vcpu]);
3276         }
3277 
3278         vmcs_seg_desc_encoding(seg, &base, &limit, &access);
3279         vmcs_write(base, desc->base);
3280         vmcs_write(limit, desc->limit);
3281         if (access != VMCS_INVALID_ENCODING) {
3282                 vmcs_write(access, desc->access);
3283         }
3284 




2721         vmxctx->guest_dr1 = rdr1();
2722         vmxctx->guest_dr2 = rdr2();
2723         vmxctx->guest_dr3 = rdr3();
2724         vmxctx->guest_dr6 = rdr6();
2725 
2726         /*
2727          * Restore host debug registers.  Restore DR7, DEBUGCTL, and
2728          * PSL_T last.
2729          */
2730         load_dr0(vmxctx->host_dr0);
2731         load_dr1(vmxctx->host_dr1);
2732         load_dr2(vmxctx->host_dr2);
2733         load_dr3(vmxctx->host_dr3);
2734         load_dr6(vmxctx->host_dr6);
2735         wrmsr(MSR_DEBUGCTLMSR, vmxctx->host_debugctl);
2736         load_dr7(vmxctx->host_dr7);
2737         write_rflags(read_rflags() | vmxctx->host_tf);
2738 }
2739 
2740 static int
2741 vmx_run(void *arg, int vcpu, uint64_t rip, pmap_t pmap)

2742 {
2743         int rc, handled, launched;
2744         struct vmx *vmx;
2745         struct vm *vm;
2746         struct vmxctx *vmxctx;
2747         uintptr_t vmcs_pa;
2748         struct vm_exit *vmexit;
2749         struct vlapic *vlapic;
2750         uint32_t exit_reason;
2751 #ifdef __FreeBSD__
2752         struct region_descriptor gdtr, idtr;
2753         uint16_t ldt_sel;
2754 #endif
2755         bool tpr_shadow_active;
2756 
2757         vmx = arg;
2758         vm = vmx->vm;
2759         vmcs_pa = vmx->vmcs_pa[vcpu];
2760         vmxctx = &vmx->ctx[vcpu];
2761         vlapic = vm_lapic(vm, vcpu);


2816                  * A posted interrupt after vmx_inject_vlapic() will not be
2817                  * "lost" because it will be held pending in the host APIC
2818                  * because interrupts are disabled. The pending interrupt will
2819                  * be recognized as soon as the guest state is loaded.
2820                  *
2821                  * The same reasoning applies to the IPI generated by
2822                  * pmap_invalidate_ept().
2823                  */
2824                 disable_intr();
2825 
2826                 /*
2827                  * If not precluded by existing events, inject any interrupt
2828                  * pending on the vLAPIC.  As a lock-less operation, it is safe
2829                  * (and prudent) to perform with host CPU interrupts disabled.
2830                  */
2831                 if (inject_state == EIS_CAN_INJECT) {
2832                         inject_state = vmx_inject_vlapic(vmx, vcpu, vlapic);
2833                 }
2834 
2835                 /*
2836                  * Check for vCPU bail-out conditions.  This must be done after
2837                  * vmx_inject_events() to detect a triple-fault condition.

2838                  */
2839                 if (vcpu_entry_bailout_checks(vmx->vm, vcpu, rip)) {
2840                         enable_intr();

2841                         break;
2842                 }
2843 
2844                 if (vcpu_run_state_pending(vm, vcpu)) {
2845                         enable_intr();
2846                         vm_exit_run_state(vmx->vm, vcpu, rip);
2847                         break;
2848                 }
2849 




















2850                 /*
2851                  * If subsequent activity queued events which require injection
2852                  * handling, take another lap to handle them.
2853                  */
2854                 if (vmx_inject_recheck(vmx, vcpu, inject_state)) {
2855                         enable_intr();
2856                         handled = HANDLED;
2857                         continue;
2858                 }
2859 
2860 #ifndef __FreeBSD__
2861                 if ((rc = smt_acquire()) != 1) {
2862                         enable_intr();
2863                         vmexit->rip = rip;
2864                         vmexit->inst_length = 0;
2865                         if (rc == -1) {
2866                                 vmexit->exitcode = VM_EXITCODE_HT;
2867                         } else {
2868                                 vmexit->exitcode = VM_EXITCODE_BOGUS;
2869                                 handled = HANDLED;


2945 
2946                 /* Update 'nextrip' */
2947                 vmx->state[vcpu].nextrip = rip;
2948 
2949                 if (rc == VMX_GUEST_VMEXIT) {
2950                         vmx_exit_handle_nmi(vmx, vcpu, vmexit);
2951                         enable_intr();
2952                         handled = vmx_exit_process(vmx, vcpu, vmexit);
2953                 } else {
2954                         enable_intr();
2955                         vmx_exit_inst_error(vmxctx, rc, vmexit);
2956                 }
2957 #ifdef  __FreeBSD__
2958                 launched = 1;
2959 #endif
2960                 DTRACE_PROBE3(vmm__vexit, int, vcpu, uint64_t, rip,
2961                     uint32_t, exit_reason);
2962                 rip = vmexit->rip;
2963         } while (handled);
2964 
2965         /* If a VM exit has been handled then the exitcode must be BOGUS */
2966         if (handled && vmexit->exitcode != VM_EXITCODE_BOGUS) {
2967                 panic("Non-BOGUS exitcode (%d) unexpected for handled VM exit",
2968                     vmexit->exitcode);




2969         }
2970 



2971         VCPU_CTR1(vm, vcpu, "returning from vmx_run: exitcode %d",
2972             vmexit->exitcode);
2973 
2974         vmcs_clear(vmcs_pa);
2975         vmx_msr_guest_exit(vmx, vcpu);
2976 
2977 #ifndef __FreeBSD__
2978         VERIFY(vmx->vmcs_state != VS_NONE && curthread->t_preempt != 0);
2979         vmx->vmcs_state[vcpu] = VS_NONE;
2980 #endif
2981 
2982         return (0);
2983 }
2984 
2985 static void
2986 vmx_vmcleanup(void *arg)
2987 {
2988         int i;
2989         struct vmx *vmx = arg;
2990         uint16_t maxcpus;


3214         if (!running) {
3215                 vmcs_load(vmx->vmcs_pa[vcpu]);
3216         }
3217 
3218         vmcs_seg_desc_encoding(seg, &base, &limit, &access);
3219         desc->base = vmcs_read(base);
3220         desc->limit = vmcs_read(limit);
3221         if (access != VMCS_INVALID_ENCODING) {
3222                 desc->access = vmcs_read(access);
3223         } else {
3224                 desc->access = 0;
3225         }
3226 
3227         if (!running) {
3228                 vmcs_clear(vmx->vmcs_pa[vcpu]);
3229         }
3230         return (0);
3231 }
3232 
3233 static int
3234 vmx_setdesc(void *arg, int vcpu, int seg, const struct seg_desc *desc)
3235 {
3236         int hostcpu, running;
3237         struct vmx *vmx = arg;
3238         uint32_t base, limit, access;
3239 
3240         running = vcpu_is_running(vmx->vm, vcpu, &hostcpu);
3241         if (running && hostcpu != curcpu)
3242                 panic("vmx_setdesc: %s%d is running", vm_name(vmx->vm), vcpu);
3243 
3244         if (!running) {
3245                 vmcs_load(vmx->vmcs_pa[vcpu]);
3246         }
3247 
3248         vmcs_seg_desc_encoding(seg, &base, &limit, &access);
3249         vmcs_write(base, desc->base);
3250         vmcs_write(limit, desc->limit);
3251         if (access != VMCS_INVALID_ENCODING) {
3252                 vmcs_write(access, desc->access);
3253         }
3254