Print this page
13275 bhyve needs richer INIT/SIPI support
Reviewed by: Robert Mustacchi <rm@fingolfin.org>
Approved by: Gordon Ross <gordon.w.ross@gmail.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/cmd/bhyve/bhyverun.c
          +++ new/usr/src/cmd/bhyve/bhyverun.c
↓ open down ↓ 510 lines elided ↑ open up ↑
 511  511          /* not reached */
 512  512          exit(1);
 513  513          return (NULL);
 514  514  }
 515  515  
 516  516  #ifdef __FreeBSD__
 517  517  void
 518  518  fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip)
 519  519  #else
 520  520  void
 521      -fbsdrun_addcpu(struct vmctx *ctx, int fromcpu, int newcpu, uint64_t rip,
 522      -    bool suspend)
      521 +fbsdrun_addcpu(struct vmctx *ctx, int newcpu, uint64_t rip, bool suspend)
 523  522  #endif
 524  523  {
 525  524          int error;
 526  525  
      526 +#ifdef __FreeBSD__
 527  527          assert(fromcpu == BSP);
      528 +#endif
 528  529  
 529  530          /*
 530  531           * The 'newcpu' must be activated in the context of 'fromcpu'. If
 531  532           * vm_activate_cpu() is delayed until newcpu's pthread starts running
 532  533           * then vmm.ko is out-of-sync with bhyve and this can create a race
 533  534           * with vm_suspend().
 534  535           */
 535  536          error = vm_activate_cpu(ctx, newcpu);
 536  537          if (error != 0)
 537  538                  err(EX_OSERR, "could not activate CPU %d", newcpu);
↓ open down ↓ 32 lines elided ↑ open up ↑
 570  571  }
 571  572  
 572  573  static void
 573  574  vmentry_mmio_read(int vcpu, uint64_t gpa, uint8_t bytes, uint64_t data)
 574  575  {
 575  576          struct vm_entry *entry = &vmentry[vcpu];
 576  577          struct vm_mmio *mmio = &entry->u.mmio;
 577  578  
 578  579          assert(entry->cmd == VEC_DEFAULT);
 579  580  
 580      -        entry->cmd = VEC_COMPLETE_MMIO;
      581 +        entry->cmd = VEC_FULFILL_MMIO;
 581  582          mmio->bytes = bytes;
 582  583          mmio->read = 1;
 583  584          mmio->gpa = gpa;
 584  585          mmio->data = data;
 585  586  }
 586  587  
 587  588  static void
 588  589  vmentry_mmio_write(int vcpu, uint64_t gpa, uint8_t bytes)
 589  590  {
 590  591          struct vm_entry *entry = &vmentry[vcpu];
 591  592          struct vm_mmio *mmio = &entry->u.mmio;
 592  593  
 593  594          assert(entry->cmd == VEC_DEFAULT);
 594  595  
 595      -        entry->cmd = VEC_COMPLETE_MMIO;
      596 +        entry->cmd = VEC_FULFILL_MMIO;
 596  597          mmio->bytes = bytes;
 597  598          mmio->read = 0;
 598  599          mmio->gpa = gpa;
 599  600          mmio->data = 0;
 600  601  }
 601  602  
 602  603  static void
 603  604  vmentry_inout_read(int vcpu, uint16_t port, uint8_t bytes, uint32_t data)
 604  605  {
 605  606          struct vm_entry *entry = &vmentry[vcpu];
 606  607          struct vm_inout *inout = &entry->u.inout;
 607  608  
 608  609          assert(entry->cmd == VEC_DEFAULT);
 609  610  
 610      -        entry->cmd = VEC_COMPLETE_INOUT;
      611 +        entry->cmd = VEC_FULFILL_INOUT;
 611  612          inout->bytes = bytes;
 612  613          inout->flags = INOUT_IN;
 613  614          inout->port = port;
 614  615          inout->eax = data;
 615  616  }
 616  617  
 617  618  static void
 618  619  vmentry_inout_write(int vcpu, uint16_t port, uint8_t bytes)
 619  620  {
 620  621          struct vm_entry *entry = &vmentry[vcpu];
 621  622          struct vm_inout *inout = &entry->u.inout;
 622  623  
 623  624          assert(entry->cmd == VEC_DEFAULT);
 624  625  
 625      -        entry->cmd = VEC_COMPLETE_INOUT;
      626 +        entry->cmd = VEC_FULFILL_INOUT;
 626  627          inout->bytes = bytes;
 627  628          inout->flags = 0;
 628  629          inout->port = port;
 629  630          inout->eax = 0;
 630  631  }
 631  632  
 632  633  static int
 633  634  vmexit_handle_notify(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu,
 634  635                       uint32_t eax)
 635  636  {
↓ open down ↓ 88 lines elided ↑ open up ↑
 724  725                  fprintf(stderr, "wrmsr to register %#x(%#lx) on vcpu %d\n",
 725  726                      vme->u.msr.code, vme->u.msr.wval, *pvcpu);
 726  727                  if (strictmsr) {
 727  728                          vm_inject_gp(ctx, *pvcpu);
 728  729                          return (VMEXIT_CONTINUE);
 729  730                  }
 730  731          }
 731  732          return (VMEXIT_CONTINUE);
 732  733  }
 733  734  
      735 +#ifdef __FreeBSD__
 734  736  static int
 735  737  vmexit_spinup_ap(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
 736  738  {
 737  739  
 738  740          (void)spinup_ap(ctx, *pvcpu,
 739  741                      vme->u.spinup_ap.vcpu, vme->u.spinup_ap.rip);
 740  742  
 741  743          return (VMEXIT_CONTINUE);
 742  744  }
      745 +#else
      746 +static int
      747 +vmexit_run_state(struct vmctx *ctx, struct vm_exit *vme, int *pvcpu)
      748 +{
      749 +        /*
      750 +         * Run-state transitions (INIT, SIPI, etc) are handled in-kernel, so an
      751 +         * exit to userspace with that code is not expected.
      752 +         */
      753 +        fprintf(stderr, "unexpected run-state VM exit");
      754 +        return (VMEXIT_ABORT);
      755 +}
      756 +#endif /* __FreeBSD__ */
 743  757  
 744  758  #ifdef __FreeBSD__
 745  759  #define DEBUG_EPT_MISCONFIG
 746  760  #else
 747  761  /* EPT misconfig debugging not possible now that raw VMCS access is gone */
 748  762  #endif
 749  763  
 750  764  #ifdef DEBUG_EPT_MISCONFIG
 751  765  #define VMCS_GUEST_PHYSICAL_ADDRESS     0x00002400
 752  766  
↓ open down ↓ 257 lines elided ↑ open up ↑
1010 1024          [VM_EXITCODE_INOUT]  = vmexit_inout,
1011 1025          [VM_EXITCODE_MMIO]  = vmexit_mmio,
1012 1026          [VM_EXITCODE_VMX]    = vmexit_vmx,
1013 1027          [VM_EXITCODE_SVM]    = vmexit_svm,
1014 1028          [VM_EXITCODE_BOGUS]  = vmexit_bogus,
1015 1029          [VM_EXITCODE_REQIDLE] = vmexit_reqidle,
1016 1030          [VM_EXITCODE_RDMSR]  = vmexit_rdmsr,
1017 1031          [VM_EXITCODE_WRMSR]  = vmexit_wrmsr,
1018 1032          [VM_EXITCODE_MTRAP]  = vmexit_mtrap,
1019 1033          [VM_EXITCODE_INST_EMUL] = vmexit_inst_emul,
     1034 +#ifdef __FreeBSD__
1020 1035          [VM_EXITCODE_SPINUP_AP] = vmexit_spinup_ap,
     1036 +#else
     1037 +        [VM_EXITCODE_RUN_STATE] = vmexit_run_state,
     1038 +#endif
1021 1039          [VM_EXITCODE_SUSPENDED] = vmexit_suspend,
1022 1040          [VM_EXITCODE_TASK_SWITCH] = vmexit_task_switch,
1023 1041          [VM_EXITCODE_DEBUG] = vmexit_debug,
1024 1042          [VM_EXITCODE_BPT] = vmexit_breakpoint,
1025 1043  };
1026 1044  
1027 1045  static void
1028 1046  vm_loop(struct vmctx *ctx, int vcpu, uint64_t startrip)
1029 1047  {
1030 1048          int error, rc;
↓ open down ↓ 509 lines elided ↑ open up ↑
1540 1558  #ifndef WITHOUT_CAPSICUM
1541 1559          caph_cache_catpages();
1542 1560  
1543 1561          if (caph_limit_stdout() == -1 || caph_limit_stderr() == -1)
1544 1562                  errx(EX_OSERR, "Unable to apply rights for sandbox");
1545 1563  
1546 1564          if (caph_enter() == -1)
1547 1565                  errx(EX_OSERR, "cap_enter() failed");
1548 1566  #endif
1549 1567  
     1568 +#ifdef __FreeBSD__
1550 1569          /*
1551 1570           * Add CPU 0
1552 1571           */
1553      -#ifdef __FreeBSD__
1554 1572          fbsdrun_addcpu(ctx, BSP, BSP, rip);
1555 1573  #else
1556      -        fbsdrun_addcpu(ctx, BSP, BSP, rip, suspend);
     1574 +        /* Set BSP to run (unlike the APs which wait for INIT) */
     1575 +        error = vm_set_run_state(ctx, BSP, VRS_RUN, 0);
     1576 +        assert(error == 0);
     1577 +        fbsdrun_addcpu(ctx, BSP, rip, suspend);
1557 1578  
     1579 +        /* Add subsequent CPUs, which will wait until INIT/SIPI-ed */
     1580 +        for (uint_t i = 1; i < guest_ncpus; i++) {
     1581 +                spinup_halted_ap(ctx, i);
     1582 +        }
1558 1583          mark_provisioned();
1559 1584  #endif
1560 1585  
1561 1586          /*
1562 1587           * Head off to the main event dispatch loop
1563 1588           */
1564 1589          mevent_dispatch();
1565 1590  
1566 1591          exit(4);
1567 1592  }
    
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX