Print this page
13275 bhyve needs richer INIT/SIPI support
Reviewed by: Robert Mustacchi <rm@fingolfin.org>
Approved by: Gordon Ross <gordon.w.ross@gmail.com>


 501         vcpu = mtp->mt_vcpu;
 502 
 503         snprintf(tname, sizeof(tname), "vcpu %d", vcpu);
 504         pthread_set_name_np(mtp->mt_thr, tname);
 505 
 506         if (gdb_port != 0)
 507                 gdb_cpu_add(vcpu);
 508 
 509         vm_loop(mtp->mt_ctx, vcpu, mtp->mt_startrip);
 510 
 511         /* not reached */
 512         exit(1);
 513         return (NULL);
 514 }
 515 
 516 #ifdef __FreeBSD__
 517 void
 518 fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip)
 519 #else
 520 void
 521 fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip,
 522     bool suspend)
 523 #endif
 524 {
 525         int error;
 526 

 527         assert(fromcpu == BSP);

 528 
 529         /*
 530          * The 'newcpu' must be activated in the context of 'fromcpu'. If
 531          * vm_activate_cpu() is delayed until newcpu's pthread starts running
 532          * then vmm.ko is out-of-sync with bhyve and this can create a race
 533          * with vm_suspend().
 534          */
 535         error = vm_activate_cpu(ctx, newcpu);
 536         if (error != 0)
 537                 err(EX_OSERR, "could not activate CPU %d", newcpu);
 538 
 539         CPU_SET_ATOMIC(newcpu, &cpumask);
 540 
 541 #ifndef __FreeBSD__
 542         if (suspend)
 543                 (void) vm_suspend_cpu(ctx, newcpu);
 544 #endif
 545 
 546         /*
 547          * Set up the vmexit struct to allow execution to start


 560 fbsdrun_deletecpu(struct vmctx *ctx, int vcpu)
 561 {
 562 
 563         if (!CPU_ISSET(vcpu, &cpumask)) {
 564                 fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu);
 565                 exit(4);
 566         }
 567 
 568         CPU_CLR_ATOMIC(vcpu, &cpumask);
 569         return (CPU_EMPTY(&cpumask));
 570 }
 571 
 572 static void
 573 vmentry_mmio_read(int vcpu, uint64_t gpa, uint8_t bytes, uint64_t data)
 574 {
 575         struct vm_entry *entry = &vmentry[vcpu];
 576         struct vm_mmio *mmio = &entry->u.mmio;
 577 
 578         assert(entry->cmd == VEC_DEFAULT);
 579 
 580         entry->cmd = VEC_COMPLETE_MMIO;
 581         mmio->bytes = bytes;
 582         mmio->read = 1;
 583         mmio->gpa = gpa;
 584         mmio->data = data;
 585 }
 586 
 587 static void
 588 vmentry_mmio_write(int vcpu, uint64_t gpa, uint8_t bytes)
 589 {
 590         struct vm_entry *entry = &vmentry[vcpu];
 591         struct vm_mmio *mmio = &entry->u.mmio;
 592 
 593         assert(entry->cmd == VEC_DEFAULT);
 594 
 595         entry->cmd = VEC_COMPLETE_MMIO;
 596         mmio->bytes = bytes;
 597         mmio->read = 0;
 598         mmio->gpa = gpa;
 599         mmio->data = 0;
 600 }
 601 
 602 static void
 603 vmentry_inout_read(int vcpu, uint16_t port, uint8_t bytes, uint32_t data)
 604 {
 605         struct vm_entry *entry = &vmentry[vcpu];
 606         struct vm_inout *inout = &entry->u.inout;
 607 
 608         assert(entry->cmd == VEC_DEFAULT);
 609 
 610         entry->cmd = VEC_COMPLETE_INOUT;
 611         inout->bytes = bytes;
 612         inout->flags = INOUT_IN;
 613         inout->port = port;
 614         inout->eax = data;
 615 }
 616 
 617 static void
 618 vmentry_inout_write(int vcpu, uint16_t port, uint8_t bytes)
 619 {
 620         struct vm_entry *entry = &vmentry[vcpu];
 621         struct vm_inout *inout = &entry->u.inout;
 622 
 623         assert(entry->cmd == VEC_DEFAULT);
 624 
 625         entry->cmd = VEC_COMPLETE_INOUT;
 626         inout->bytes = bytes;
 627         inout->flags = 0;
 628         inout->port = port;
 629         inout->eax = 0;
 630 }
 631 
 632 static int
 633 vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
 634                      uint32_t eax)
 635 {
 636 #if BHYVE_DEBUG
 637         /*
 638          * put guest-driven debug here
 639          */
 640 #endif
 641         return (VMEXIT_CONTINUE);
 642 }
 643 
 644 static int
 645 vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)


 714         return (VMEXIT_CONTINUE);
 715 }
 716 
 717 static int
 718 vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 719 {
 720         int error;
 721 
 722         error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval);
 723         if (error != 0) {
 724                 fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
 725                     vme->u.msr.code, vme->u.msr.wval, *pvcpu);
 726                 if (strictmsr) {
 727                         vm_inject_gp(ctx, *pvcpu);
 728                         return (VMEXIT_CONTINUE);
 729                 }
 730         }
 731         return (VMEXIT_CONTINUE);
 732 }
 733 

 734 static int
 735 vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 736 {
 737 
 738         (void)spinup_ap(ctx, *pvcpu,
 739                     vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip);
 740 
 741         return (VMEXIT_CONTINUE);
 742 }












 743 
 744 #ifdef __FreeBSD__
 745 #define DEBUG_EPT_MISCONFIG
 746 #else
 747 /* EPT misconfig debugging not possible now that raw VMCS access is gone */
 748 #endif
 749 
 750 #ifdef DEBUG_EPT_MISCONFIG
 751 #define VMCS_GUEST_PHYSICAL_ADDRESS     0x00002400
 752 
 753 static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4];
 754 static int ept_misconfig_ptenum;
 755 #endif
 756 
 757 static const char *
 758 vmexit_vmx_desc(uint32_t exit_reason)
 759 {
 760 
 761         if (exit_reason >= nitems(vmx_exit_reason_desc) ||
 762             vmx_exit_reason_desc[exit_reason] == NULL)


1000 
1001         if (gdb_port == 0) {
1002                 fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n");
1003                 exit(4);
1004         }
1005         gdb_cpu_breakpoint(*pvcpu, vmexit);
1006         return (VMEXIT_CONTINUE);
1007 }
1008 
1009 static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
1010         [VM_EXITCODE_INOUT]  = vmexit_inout,
1011         [VM_EXITCODE_MMIO]  = vmexit_mmio,
1012         [VM_EXITCODE_VMX]    = vmexit_vmx,
1013         [VM_EXITCODE_SVM]    = vmexit_svm,
1014         [VM_EXITCODE_BOGUS]  = vmexit_bogus,
1015         [VM_EXITCODE_REQIDLE] = vmexit_reqidle,
1016         [VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
1017         [VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
1018         [VM_EXITCODE_MTRAP]  = vmexit_mtrap,
1019         [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,

1020         [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,



1021         [VM_EXITCODE_SUSPENDED] = vmexit_suspend,
1022         [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
1023         [VM_EXITCODE_DEBUG] = vmexit_debug,
1024         [VM_EXITCODE_BPT] = vmexit_breakpoint,
1025 };
1026 
1027 static void
1028 vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
1029 {
1030         int error, rc;
1031         enum vm_exitcode exitcode;
1032         cpuset_t active_cpus;
1033         struct vm_exit *vexit;
1034         struct vm_entry *ventry;
1035 
1036 #ifdef  __FreeBSD__
1037         if (vcpumap[vcpu] != NULL) {
1038                 error = pthread_setaffinity_np(pthread_self(),
1039                     sizeof(cpuset_t), vcpumap[vcpu]);
1040                 assert(error == 0);


1530         }
1531 
1532         if (lpc_bootrom())
1533                 fwctl_init();
1534 
1535         /*
1536          * Change the proc title to include the VM name.
1537          */
1538         setproctitle("%s", vmname);
1539 
1540 #ifndef WITHOUT_CAPSICUM
1541         caph_cache_catpages();
1542 
1543         if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
1544                 errx(EX_OSERR, "Unable to apply rights for sandbox");
1545 
1546         if (caph_enter() == -1)
1547                 errx(EX_OSERR, "cap_enter() failed");
1548 #endif
1549 

1550         /*
1551          * Add CPU 0
1552          */
1553 #ifdef __FreeBSD__
1554         fbsdrun_addcpu(ctx, BSP, BSP, rip);
1555 #else
1556         fbsdrun_addcpu(ctx, BSP, BSP, rip, suspend);



1557 




1558         mark_provisioned();
1559 #endif
1560 
1561         /*
1562          * Head off to the main event dispatch loop
1563          */
1564         mevent_dispatch();
1565 
1566         exit(4);
1567 }


 501         vcpu = mtp->mt_vcpu;
 502 
 503         snprintf(tname, sizeof(tname), "vcpu %d", vcpu);
 504         pthread_set_name_np(mtp->mt_thr, tname);
 505 
 506         if (gdb_port != 0)
 507                 gdb_cpu_add(vcpu);
 508 
 509         vm_loop(mtp->mt_ctx, vcpu, mtp->mt_startrip);
 510 
 511         /* not reached */
 512         exit(1);
 513         return (NULL);
 514 }
 515 
 516 #ifdef __FreeBSD__
 517 void
 518 fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip)
 519 #else
 520 void
 521 fbsdrun_addcpu(struct vmctx *ctx, int newcpu, uint64_t rip, bool suspend)

 522 #endif
 523 {
 524         int error;
 525 
 526 #ifdef __FreeBSD__
 527         assert(fromcpu == BSP);
 528 #endif
 529 
 530         /*
 531          * The 'newcpu' must be activated in the context of 'fromcpu'. If
 532          * vm_activate_cpu() is delayed until newcpu's pthread starts running
 533          * then vmm.ko is out-of-sync with bhyve and this can create a race
 534          * with vm_suspend().
 535          */
 536         error = vm_activate_cpu(ctx, newcpu);
 537         if (error != 0)
 538                 err(EX_OSERR, "could not activate CPU %d", newcpu);
 539 
 540         CPU_SET_ATOMIC(newcpu, &cpumask);
 541 
 542 #ifndef __FreeBSD__
 543         if (suspend)
 544                 (void) vm_suspend_cpu(ctx, newcpu);
 545 #endif
 546 
 547         /*
 548          * Set up the vmexit struct to allow execution to start


 561 fbsdrun_deletecpu(struct vmctx *ctx, int vcpu)
 562 {
 563 
 564         if (!CPU_ISSET(vcpu, &cpumask)) {
 565                 fprintf(stderr, "Attempting to delete unknown cpu %d\n", vcpu);
 566                 exit(4);
 567         }
 568 
 569         CPU_CLR_ATOMIC(vcpu, &cpumask);
 570         return (CPU_EMPTY(&cpumask));
 571 }
 572 
 573 static void
 574 vmentry_mmio_read(int vcpu, uint64_t gpa, uint8_t bytes, uint64_t data)
 575 {
 576         struct vm_entry *entry = &vmentry[vcpu];
 577         struct vm_mmio *mmio = &entry->u.mmio;
 578 
 579         assert(entry->cmd == VEC_DEFAULT);
 580 
 581         entry->cmd = VEC_FULFILL_MMIO;
 582         mmio->bytes = bytes;
 583         mmio->read = 1;
 584         mmio->gpa = gpa;
 585         mmio->data = data;
 586 }
 587 
 588 static void
 589 vmentry_mmio_write(int vcpu, uint64_t gpa, uint8_t bytes)
 590 {
 591         struct vm_entry *entry = &vmentry[vcpu];
 592         struct vm_mmio *mmio = &entry->u.mmio;
 593 
 594         assert(entry->cmd == VEC_DEFAULT);
 595 
 596         entry->cmd = VEC_FULFILL_MMIO;
 597         mmio->bytes = bytes;
 598         mmio->read = 0;
 599         mmio->gpa = gpa;
 600         mmio->data = 0;
 601 }
 602 
 603 static void
 604 vmentry_inout_read(int vcpu, uint16_t port, uint8_t bytes, uint32_t data)
 605 {
 606         struct vm_entry *entry = &vmentry[vcpu];
 607         struct vm_inout *inout = &entry->u.inout;
 608 
 609         assert(entry->cmd == VEC_DEFAULT);
 610 
 611         entry->cmd = VEC_FULFILL_INOUT;
 612         inout->bytes = bytes;
 613         inout->flags = INOUT_IN;
 614         inout->port = port;
 615         inout->eax = data;
 616 }
 617 
 618 static void
 619 vmentry_inout_write(int vcpu, uint16_t port, uint8_t bytes)
 620 {
 621         struct vm_entry *entry = &vmentry[vcpu];
 622         struct vm_inout *inout = &entry->u.inout;
 623 
 624         assert(entry->cmd == VEC_DEFAULT);
 625 
 626         entry->cmd = VEC_FULFILL_INOUT;
 627         inout->bytes = bytes;
 628         inout->flags = 0;
 629         inout->port = port;
 630         inout->eax = 0;
 631 }
 632 
 633 static int
 634 vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
 635                      uint32_t eax)
 636 {
 637 #if BHYVE_DEBUG
 638         /*
 639          * put guest-driven debug here
 640          */
 641 #endif
 642         return (VMEXIT_CONTINUE);
 643 }
 644 
 645 static int
 646 vmexit_inout(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)


 715         return (VMEXIT_CONTINUE);
 716 }
 717 
 718 static int
 719 vmexit_wrmsr(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 720 {
 721         int error;
 722 
 723         error = emulate_wrmsr(ctx, *pvcpu, vme->u.msr.code, vme->u.msr.wval);
 724         if (error != 0) {
 725                 fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
 726                     vme->u.msr.code, vme->u.msr.wval, *pvcpu);
 727                 if (strictmsr) {
 728                         vm_inject_gp(ctx, *pvcpu);
 729                         return (VMEXIT_CONTINUE);
 730                 }
 731         }
 732         return (VMEXIT_CONTINUE);
 733 }
 734 
 735 #ifdef __FreeBSD__
 736 static int
 737 vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 738 {
 739 
 740         (void)spinup_ap(ctx, *pvcpu,
 741                     vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip);
 742 
 743         return (VMEXIT_CONTINUE);
 744 }
 745 #else
 746 static int
 747 vmexit_run_state(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 748 {
 749         /*
 750          * Run-state transitions (INIT, SIPI, etc) are handled in-kernel, so an
 751          * exit to userspace with that code is not expected.
 752          */
 753         fprintf(stderr, "unexpected run-state VM exit");
 754         return (VMEXIT_ABORT);
 755 }
 756 #endif /* __FreeBSD__ */
 757 
 758 #ifdef __FreeBSD__
 759 #define DEBUG_EPT_MISCONFIG
 760 #else
 761 /* EPT misconfig debugging not possible now that raw VMCS access is gone */
 762 #endif
 763 
 764 #ifdef DEBUG_EPT_MISCONFIG
 765 #define VMCS_GUEST_PHYSICAL_ADDRESS     0x00002400
 766 
 767 static uint64_t ept_misconfig_gpa, ept_misconfig_pte[4];
 768 static int ept_misconfig_ptenum;
 769 #endif
 770 
 771 static const char *
 772 vmexit_vmx_desc(uint32_t exit_reason)
 773 {
 774 
 775         if (exit_reason >= nitems(vmx_exit_reason_desc) ||
 776             vmx_exit_reason_desc[exit_reason] == NULL)


1014 
1015         if (gdb_port == 0) {
1016                 fprintf(stderr, "vm_loop: unexpected VMEXIT_DEBUG\n");
1017                 exit(4);
1018         }
1019         gdb_cpu_breakpoint(*pvcpu, vmexit);
1020         return (VMEXIT_CONTINUE);
1021 }
1022 
1023 static vmexit_handler_t handler[VM_EXITCODE_MAX] = {
1024         [VM_EXITCODE_INOUT]  = vmexit_inout,
1025         [VM_EXITCODE_MMIO]  = vmexit_mmio,
1026         [VM_EXITCODE_VMX]    = vmexit_vmx,
1027         [VM_EXITCODE_SVM]    = vmexit_svm,
1028         [VM_EXITCODE_BOGUS]  = vmexit_bogus,
1029         [VM_EXITCODE_REQIDLE] = vmexit_reqidle,
1030         [VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
1031         [VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
1032         [VM_EXITCODE_MTRAP]  = vmexit_mtrap,
1033         [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
1034 #ifdef __FreeBSD__
1035         [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
1036 #else
1037         [VM_EXITCODE_RUN_STATE] = vmexit_run_state,
1038 #endif
1039         [VM_EXITCODE_SUSPENDED] = vmexit_suspend,
1040         [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
1041         [VM_EXITCODE_DEBUG] = vmexit_debug,
1042         [VM_EXITCODE_BPT] = vmexit_breakpoint,
1043 };
1044 
1045 static void
1046 vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
1047 {
1048         int error, rc;
1049         enum vm_exitcode exitcode;
1050         cpuset_t active_cpus;
1051         struct vm_exit *vexit;
1052         struct vm_entry *ventry;
1053 
1054 #ifdef  __FreeBSD__
1055         if (vcpumap[vcpu] != NULL) {
1056                 error = pthread_setaffinity_np(pthread_self(),
1057                     sizeof(cpuset_t), vcpumap[vcpu]);
1058                 assert(error == 0);


1548         }
1549 
1550         if (lpc_bootrom())
1551                 fwctl_init();
1552 
1553         /*
1554          * Change the proc title to include the VM name.
1555          */
1556         setproctitle("%s", vmname);
1557 
1558 #ifndef WITHOUT_CAPSICUM
1559         caph_cache_catpages();
1560 
1561         if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
1562                 errx(EX_OSERR, "Unable to apply rights for sandbox");
1563 
1564         if (caph_enter() == -1)
1565                 errx(EX_OSERR, "cap_enter() failed");
1566 #endif
1567 
1568 #ifdef __FreeBSD__
1569         /*
1570          * Add CPU 0
1571          */

1572         fbsdrun_addcpu(ctx, BSP, BSP, rip);
1573 #else
1574         /* Set BSP to run (unlike the APs which wait for INIT) */
1575         error = vm_set_run_state(ctx, BSP, VRS_RUN, 0);
1576         assert(error == 0);
1577         fbsdrun_addcpu(ctx, BSP, rip, suspend);
1578 
1579         /* Add subsequent CPUs, which will wait until INIT/SIPI-ed */
1580         for (uint_t i = 1; i < guest_ncpus; i++) {
1581                 spinup_halted_ap(ctx, i);
1582         }
1583         mark_provisioned();
1584 #endif
1585 
1586         /*
1587          * Head off to the main event dispatch loop
1588          */
1589         mevent_dispatch();
1590 
1591         exit(4);
1592 }