Print this page
13275 bhyve needs richer INIT/SIPI support
Reviewed by: Robert Mustacchi <rm@fingolfin.org>
Approved by: Gordon Ross <gordon.w.ross@gmail.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/i86pc/io/vmm/vmm.c
          +++ new/usr/src/uts/i86pc/io/vmm/vmm.c
↓ open down ↓ 101 lines elided ↑ open up ↑
 102  102  struct vlapic;
 103  103  
 104  104  /*
 105  105   * Initialization:
 106  106   * (a) allocated when vcpu is created
 107  107   * (i) initialized when vcpu is created and when it is reinitialized
 108  108   * (o) initialized the first time the vcpu is created
 109  109   * (x) initialized before use
 110  110   */
 111  111  struct vcpu {
 112      -        struct mtx      mtx;            /* (o) protects 'state' and 'hostcpu' */
      112 +        /* (o) protects state, run_state, hostcpu, sipi_vector */
      113 +        struct mtx      mtx;
      114 +
 113  115          enum vcpu_state state;          /* (o) vcpu state */
 114      -#ifndef __FreeBSD__
      116 +        enum vcpu_run_state run_state;  /* (i) vcpu init/sipi/run state */
 115  117          kcondvar_t      vcpu_cv;        /* (o) cpu waiter cv */
 116  118          kcondvar_t      state_cv;       /* (o) IDLE-transition cv */
 117      -#endif /* __FreeBSD__ */
 118  119          int             hostcpu;        /* (o) vcpu's current host cpu */
 119      -#ifndef __FreeBSD__
 120  120          int             lastloccpu;     /* (o) last host cpu localized to */
 121      -#endif
 122      -        uint_t          runblock;       /* (i) block vcpu from run state */
 123  121          int             reqidle;        /* (i) request vcpu to idle */
 124  122          struct vlapic   *vlapic;        /* (i) APIC device model */
 125  123          enum x2apic_state x2apic_state; /* (i) APIC mode */
 126  124          uint64_t        exitintinfo;    /* (i) events pending at VM exit */
 127  125          int             nmi_pending;    /* (i) NMI pending */
 128  126          int             extint_pending; /* (i) INTR pending */
 129  127          int     exception_pending;      /* (i) exception pending */
 130  128          int     exc_vector;             /* (x) exception collateral */
 131  129          int     exc_errcode_valid;
 132  130          uint32_t exc_errcode;
      131 +        uint8_t         sipi_vector;    /* (i) SIPI vector */
 133  132          struct savefpu  *guestfpu;      /* (a,i) guest fpu state */
 134  133          uint64_t        guest_xcr0;     /* (i) guest %xcr0 register */
 135  134          void            *stats;         /* (a,i) statistics */
 136  135          struct vm_exit  exitinfo;       /* (x) exit reason and collateral */
 137  136          uint64_t        nextrip;        /* (x) next instruction to execute */
 138  137          struct vie      *vie_ctx;       /* (x) instruction emulation context */
 139  138  #ifndef __FreeBSD__
 140  139          uint64_t        tsc_offset;     /* (x) offset from host TSC */
 141  140  #endif
 142  141  };
↓ open down ↓ 50 lines elided ↑ open up ↑
 193  192          struct vmspace  *vmspace;               /* (o) guest's address space */
 194  193          char            name[VM_MAX_NAMELEN];   /* (o) virtual machine name */
 195  194          struct vcpu     vcpu[VM_MAXCPU];        /* (i) guest vcpus */
 196  195          /* The following describe the vm cpu topology */
 197  196          uint16_t        sockets;                /* (o) num of sockets */
 198  197          uint16_t        cores;                  /* (o) num of cores/socket */
 199  198          uint16_t        threads;                /* (o) num of threads/core */
 200  199          uint16_t        maxcpus;                /* (o) max pluggable cpus */
 201  200  
 202  201          struct ioport_config ioports;           /* (o) ioport handling */
 203      -
 204      -        bool            sipi_req;               /* (i) SIPI requested */
 205      -        int             sipi_req_vcpu;          /* (i) SIPI destination */
 206      -        uint64_t        sipi_req_rip;           /* (i) SIPI start %rip */
 207      -
 208      -        /* Miscellaneous VM-wide statistics and counters */
 209      -        struct vm_wide_stats {
 210      -                uint64_t sipi_supersede;
 211      -        } stats;
 212  202  };
 213  203  
 214  204  static int vmm_initialized;
 215  205  
 216  206  
 217  207  static void
 218  208  nullop_panic(void)
 219  209  {
 220  210          panic("null vmm operation call");
 221  211  }
↓ open down ↓ 20 lines elided ↑ open up ↑
 242  232          .vmrestorectx   = (vmi_restorectx)nullop_panic,
 243  233  };
 244  234  
 245  235  static struct vmm_ops *ops = &vmm_ops_null;
 246  236  
 247  237  #define VMM_INIT(num)                   ((*ops->init)(num))
 248  238  #define VMM_CLEANUP()                   ((*ops->cleanup)())
 249  239  #define VMM_RESUME()                    ((*ops->resume)())
 250  240  
 251  241  #define VMINIT(vm, pmap)                ((*ops->vminit)(vm, pmap))
 252      -#define VMRUN(vmi, vcpu, rip, pmap, evinfo) \
 253      -        ((*ops->vmrun)(vmi, vcpu, rip, pmap, evinfo))
      242 +#define VMRUN(vmi, vcpu, rip, pmap) \
      243 +        ((*ops->vmrun)(vmi, vcpu, rip, pmap))
 254  244  #define VMCLEANUP(vmi)                  ((*ops->vmcleanup)(vmi))
 255  245  #define VMSPACE_ALLOC(min, max)         ((*ops->vmspace_alloc)(min, max))
 256  246  #define VMSPACE_FREE(vmspace)           ((*ops->vmspace_free)(vmspace))
 257  247  
 258  248  #define VMGETREG(vmi, vcpu, num, rv)    ((*ops->vmgetreg)(vmi, vcpu, num, rv))
 259  249  #define VMSETREG(vmi, vcpu, num, val)   ((*ops->vmsetreg)(vmi, vcpu, num, val))
 260  250  #define VMGETDESC(vmi, vcpu, num, dsc)  ((*ops->vmgetdesc)(vmi, vcpu, num, dsc))
 261  251  #define VMSETDESC(vmi, vcpu, num, dsc)  ((*ops->vmsetdesc)(vmi, vcpu, num, dsc))
 262  252  #define VMGETCAP(vmi, vcpu, num, rv)    ((*ops->vmgetcap)(vmi, vcpu, num, rv))
 263  253  #define VMSETCAP(vmi, vcpu, num, val)   ((*ops->vmsetcap)(vmi, vcpu, num, val))
↓ open down ↓ 21 lines elided ↑ open up ↑
 285  275  
 286  276  /* IPI vector used for vcpu notifications */
 287  277  static int vmm_ipinum;
 288  278  
 289  279  /* Trap into hypervisor on all guest exceptions and reflect them back */
 290  280  static int trace_guest_exceptions;
 291  281  
 292  282  static void vm_free_memmap(struct vm *vm, int ident);
 293  283  static bool sysmem_mapping(struct vm *vm, struct mem_map *mm);
 294  284  static void vcpu_notify_event_locked(struct vcpu *vcpu, vcpu_notify_t);
      285 +static bool vcpu_sleep_bailout_checks(struct vm *vm, int vcpuid);
      286 +static int vcpu_vector_sipi(struct vm *vm, int vcpuid, uint8_t vector);
 295  287  
 296  288  #ifndef __FreeBSD__
 297  289  static void vm_clear_memseg(struct vm *, int);
 298  290  
 299  291  /* Flags for vtc_status */
 300  292  #define VTCS_FPU_RESTORED       1 /* guest FPU restored, host FPU saved */
 301  293  #define VTCS_FPU_CTX_CRITICAL   2 /* in ctx where FPU restore cannot be lazy */
 302  294  
 303  295  typedef struct vm_thread_ctx {
 304  296          struct vm       *vtc_vm;
↓ open down ↓ 58 lines elided ↑ open up ↑
 363  355                  vcpu->lastloccpu = NOCPU;
 364  356  #endif
 365  357                  vcpu->guestfpu = fpu_save_area_alloc();
 366  358                  vcpu->stats = vmm_stat_alloc();
 367  359                  vcpu->vie_ctx = vie_alloc();
 368  360          } else {
 369  361                  vie_reset(vcpu->vie_ctx);
 370  362                  bzero(&vcpu->exitinfo, sizeof (vcpu->exitinfo));
 371  363          }
 372  364  
      365 +        vcpu->run_state = VRS_HALT;
 373  366          vcpu->vlapic = VLAPIC_INIT(vm->cookie, vcpu_id);
 374  367          vm_set_x2apic_state(vm, vcpu_id, X2APIC_DISABLED);
 375      -        vcpu->runblock = 0;
 376  368          vcpu->reqidle = 0;
 377  369          vcpu->exitintinfo = 0;
 378  370          vcpu->nmi_pending = 0;
 379  371          vcpu->extint_pending = 0;
 380  372          vcpu->exception_pending = 0;
 381  373          vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
 382  374          fpu_save_area_reset(vcpu->guestfpu);
 383  375          vmm_stat_init(vcpu->stats);
 384  376  }
 385  377  
↓ open down ↓ 840 lines elided ↑ open up ↑
1226 1218          if (vcpu < 0 || vcpu >= vm->maxcpus)
1227 1219                  return (EINVAL);
1228 1220  
1229 1221          if (!is_segment_register(reg) && !is_descriptor_table(reg))
1230 1222                  return (EINVAL);
1231 1223  
1232 1224          return (VMGETDESC(vm->cookie, vcpu, reg, desc));
1233 1225  }
1234 1226  
1235 1227  int
1236      -vm_set_seg_desc(struct vm *vm, int vcpu, int reg, struct seg_desc *desc)
     1228 +vm_set_seg_desc(struct vm *vm, int vcpu, int reg, const struct seg_desc *desc)
1237 1229  {
1238 1230          if (vcpu < 0 || vcpu >= vm->maxcpus)
1239 1231                  return (EINVAL);
1240 1232  
1241 1233          if (!is_segment_register(reg) && !is_descriptor_table(reg))
1242 1234                  return (EINVAL);
1243 1235  
1244 1236          return (VMSETDESC(vm->cookie, vcpu, reg, desc));
1245 1237  }
1246 1238  
     1239 +int
     1240 +vm_get_run_state(struct vm *vm, int vcpuid, uint32_t *state, uint8_t *sipi_vec)
     1241 +{
     1242 +        struct vcpu *vcpu;
     1243 +
     1244 +        if (vcpuid < 0 || vcpuid >= vm->maxcpus) {
     1245 +                return (EINVAL);
     1246 +        }
     1247 +
     1248 +        vcpu = &vm->vcpu[vcpuid];
     1249 +
     1250 +        vcpu_lock(vcpu);
     1251 +        *state = vcpu->run_state;
     1252 +        *sipi_vec = vcpu->sipi_vector;
     1253 +        vcpu_unlock(vcpu);
     1254 +
     1255 +        return (0);
     1256 +}
     1257 +
     1258 +int
     1259 +vm_set_run_state(struct vm *vm, int vcpuid, uint32_t state, uint8_t sipi_vec)
     1260 +{
     1261 +        struct vcpu *vcpu;
     1262 +
     1263 +        if (vcpuid < 0 || vcpuid >= vm->maxcpus) {
     1264 +                return (EINVAL);
     1265 +        }
     1266 +        if (!VRS_IS_VALID(state)) {
     1267 +                return (EINVAL);
     1268 +        }
     1269 +
     1270 +        vcpu = &vm->vcpu[vcpuid];
     1271 +
     1272 +        vcpu_lock(vcpu);
     1273 +        vcpu->run_state = state;
     1274 +        vcpu->sipi_vector = sipi_vec;
     1275 +        vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
     1276 +        vcpu_unlock(vcpu);
     1277 +
     1278 +        return (0);
     1279 +}
     1280 +
     1281 +
1247 1282  static void
1248 1283  restore_guest_fpustate(struct vcpu *vcpu)
1249 1284  {
1250 1285  
1251 1286          /* flush host state to the pcb */
1252 1287          fpuexit(curthread);
1253 1288  
1254 1289          /* restore guest FPU state */
1255 1290          fpu_stop_emulating();
1256 1291          fpurestore(vcpu->guestfpu);
↓ open down ↓ 90 lines elided ↑ open up ↑
1347 1382                  error = (newstate != VCPU_FROZEN);
1348 1383                  break;
1349 1384          case VCPU_FROZEN:
1350 1385                  error = (newstate == VCPU_FROZEN);
1351 1386                  break;
1352 1387          default:
1353 1388                  error = 1;
1354 1389                  break;
1355 1390          }
1356 1391  
1357      -        if (newstate == VCPU_RUNNING) {
1358      -                while (vcpu->runblock != 0) {
1359      -#ifdef __FreeBSD__
1360      -                        msleep_spin(&vcpu->state, &vcpu->mtx, "vcpublk", 0);
1361      -#else
1362      -                        cv_wait(&vcpu->state_cv, &vcpu->mtx.m);
1363      -#endif
1364      -                }
1365      -        }
1366      -
1367 1392          if (error)
1368 1393                  return (EBUSY);
1369 1394  
1370 1395          VCPU_CTR2(vm, vcpuid, "vcpu state changed from %s to %s",
1371 1396              vcpu_state2str(vcpu->state), vcpu_state2str(newstate));
1372 1397  
1373 1398          vcpu->state = newstate;
1374 1399          if (newstate == VCPU_RUNNING)
1375 1400                  vcpu->hostcpu = curcpu;
1376 1401          else
1377 1402                  vcpu->hostcpu = NOCPU;
1378 1403  
1379      -        if (newstate == VCPU_IDLE ||
1380      -            (newstate == VCPU_FROZEN && vcpu->runblock != 0)) {
     1404 +        if (newstate == VCPU_IDLE) {
1381 1405  #ifdef __FreeBSD__
1382 1406                  wakeup(&vcpu->state);
1383 1407  #else
1384 1408                  cv_broadcast(&vcpu->state_cv);
1385 1409  #endif
1386 1410          }
1387 1411  
1388 1412          return (0);
1389 1413  }
1390 1414  
↓ open down ↓ 15 lines elided ↑ open up ↑
1406 1430                  panic("Error %d setting state to %d", error, newstate);
1407 1431  }
1408 1432  
1409 1433  /*
1410 1434   * Emulate a guest 'hlt' by sleeping until the vcpu is ready to run.
1411 1435   */
1412 1436  static int
1413 1437  vm_handle_hlt(struct vm *vm, int vcpuid, bool intr_disabled)
1414 1438  {
1415 1439          struct vcpu *vcpu;
1416      -#ifdef __FreeBSD__
1417      -        const char *wmesg;
1418      -#else
1419      -        const char *wmesg __unused;
1420      -#endif
1421 1440          int t, vcpu_halted, vm_halted;
     1441 +        bool userspace_exit = false;
1422 1442  
1423 1443          KASSERT(!CPU_ISSET(vcpuid, &vm->halted_cpus), ("vcpu already halted"));
1424 1444  
1425 1445          vcpu = &vm->vcpu[vcpuid];
1426 1446          vcpu_halted = 0;
1427 1447          vm_halted = 0;
1428 1448  
1429 1449          vcpu_lock(vcpu);
1430 1450          while (1) {
1431 1451                  /*
1432      -                 * Do a final check for pending NMI or interrupts before
1433      -                 * really putting this thread to sleep. Also check for
1434      -                 * software events that would cause this vcpu to wakeup.
1435      -                 *
1436      -                 * These interrupts/events could have happened after the
1437      -                 * vcpu returned from VMRUN() and before it acquired the
1438      -                 * vcpu lock above.
     1452 +                 * Do a final check for pending interrupts (including NMI and
     1453 +                 * INIT) before putting this thread to sleep.
1439 1454                   */
1440      -                if (vm->suspend || vcpu->reqidle)
1441      -                        break;
1442 1455                  if (vm_nmi_pending(vm, vcpuid))
1443 1456                          break;
     1457 +                if (vcpu_run_state_pending(vm, vcpuid))
     1458 +                        break;
1444 1459                  if (!intr_disabled) {
1445 1460                          if (vm_extint_pending(vm, vcpuid) ||
1446 1461                              vlapic_pending_intr(vcpu->vlapic, NULL)) {
1447 1462                                  break;
1448 1463                          }
1449 1464                  }
1450 1465  
1451      -                /* Don't go to sleep if the vcpu thread needs to yield */
1452      -                if (vcpu_should_yield(vm, vcpuid))
     1466 +                /*
     1467 +                 * Also check for software events which would cause a wake-up.
     1468 +                 * This will set the appropriate exitcode directly, rather than
     1469 +                 * requiring a trip through VM_RUN().
     1470 +                 */
     1471 +                if (vcpu_sleep_bailout_checks(vm, vcpuid)) {
     1472 +                        userspace_exit = true;
1453 1473                          break;
     1474 +                }
1454 1475  
1455      -                if (vcpu_debugged(vm, vcpuid))
1456      -                        break;
1457      -
1458 1476                  /*
1459 1477                   * Some Linux guests implement "halt" by having all vcpus
1460 1478                   * execute HLT with interrupts disabled. 'halted_cpus' keeps
1461 1479                   * track of the vcpus that have entered this state. When all
1462 1480                   * vcpus enter the halted state the virtual machine is halted.
1463 1481                   */
1464 1482                  if (intr_disabled) {
1465      -                        wmesg = "vmhalt";
1466      -                        VCPU_CTR0(vm, vcpuid, "Halted");
1467 1483                          if (!vcpu_halted && halt_detection_enabled) {
1468 1484                                  vcpu_halted = 1;
1469 1485                                  CPU_SET_ATOMIC(vcpuid, &vm->halted_cpus);
1470 1486                          }
1471 1487                          if (CPU_CMP(&vm->halted_cpus, &vm->active_cpus) == 0) {
1472 1488                                  vm_halted = 1;
1473 1489                                  break;
1474 1490                          }
1475      -                } else {
1476      -                        wmesg = "vmidle";
1477 1491                  }
1478 1492  
1479 1493                  t = ticks;
1480 1494                  vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
1481      -#ifdef __FreeBSD__
1482      -                /*
1483      -                 * XXX msleep_spin() cannot be interrupted by signals so
1484      -                 * wake up periodically to check pending signals.
1485      -                 */
1486      -                msleep_spin(vcpu, &vcpu->mtx, wmesg, hz);
1487      -#else
1488      -                /*
1489      -                 * Fortunately, cv_wait_sig can be interrupted by signals, so
1490      -                 * there is no need to periodically wake up.
1491      -                 */
1492 1495                  (void) cv_wait_sig(&vcpu->vcpu_cv, &vcpu->mtx.m);
1493      -#endif
1494 1496                  vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
1495 1497                  vmm_stat_incr(vm, vcpuid, VCPU_IDLE_TICKS, ticks - t);
1496 1498          }
1497 1499  
1498 1500          if (vcpu_halted)
1499 1501                  CPU_CLR_ATOMIC(vcpuid, &vm->halted_cpus);
1500 1502  
1501 1503          vcpu_unlock(vcpu);
1502 1504  
1503 1505          if (vm_halted)
1504 1506                  vm_suspend(vm, VM_SUSPEND_HALT);
1505 1507  
1506      -        return (0);
     1508 +        return (userspace_exit ? -1 : 0);
1507 1509  }
1508 1510  
1509 1511  static int
1510 1512  vm_handle_paging(struct vm *vm, int vcpuid)
1511 1513  {
1512 1514          int rv, ftype;
1513 1515          struct vm_map *map;
1514 1516          struct vcpu *vcpu;
1515 1517          struct vm_exit *vme;
1516 1518  
↓ open down ↓ 308 lines elided ↑ open up ↑
1825 1827  {
1826 1828          struct vcpu *vcpu = &vm->vcpu[vcpuid];
1827 1829  
1828 1830          vcpu_lock(vcpu);
1829 1831          KASSERT(vcpu->reqidle, ("invalid vcpu reqidle %d", vcpu->reqidle));
1830 1832          vcpu->reqidle = 0;
1831 1833          vcpu_unlock(vcpu);
1832 1834          return (-1);
1833 1835  }
1834 1836  
     1837 +static int
     1838 +vm_handle_run_state(struct vm *vm, int vcpuid)
     1839 +{
     1840 +        struct vcpu *vcpu = &vm->vcpu[vcpuid];
     1841 +        bool handled = false;
     1842 +
     1843 +        vcpu_lock(vcpu);
     1844 +        while (1) {
     1845 +                if ((vcpu->run_state & VRS_PEND_INIT) != 0) {
     1846 +                        vcpu_unlock(vcpu);
     1847 +                        VERIFY0(vcpu_arch_reset(vm, vcpuid, true));
     1848 +                        vcpu_lock(vcpu);
     1849 +
     1850 +                        vcpu->run_state &= ~(VRS_RUN | VRS_PEND_INIT);
     1851 +                        vcpu->run_state |= VRS_INIT;
     1852 +                }
     1853 +
     1854 +                if ((vcpu->run_state & (VRS_INIT | VRS_RUN | VRS_PEND_SIPI)) ==
     1855 +                    (VRS_INIT | VRS_PEND_SIPI)) {
     1856 +                        const uint8_t vector = vcpu->sipi_vector;
     1857 +
     1858 +                        vcpu_unlock(vcpu);
     1859 +                        VERIFY0(vcpu_vector_sipi(vm, vcpuid, vector));
     1860 +                        vcpu_lock(vcpu);
     1861 +
     1862 +                        vcpu->run_state &= ~VRS_PEND_SIPI;
     1863 +                        vcpu->run_state |= VRS_RUN;
     1864 +                }
     1865 +
     1866 +                /*
     1867 +                 * If the vCPU is now in the running state, there is no need to
     1868 +                 * wait for anything prior to re-entry.
     1869 +                 */
     1870 +                if ((vcpu->run_state & VRS_RUN) != 0) {
     1871 +                        handled = true;
     1872 +                        break;
     1873 +                }
     1874 +
     1875 +                /*
     1876 +                 * Also check for software events which would cause a wake-up.
     1877 +                 * This will set the appropriate exitcode directly, rather than
     1878 +                 * requiring a trip through VM_RUN().
     1879 +                 */
     1880 +                if (vcpu_sleep_bailout_checks(vm, vcpuid)) {
     1881 +                        break;
     1882 +                }
     1883 +
     1884 +                vcpu_require_state_locked(vm, vcpuid, VCPU_SLEEPING);
     1885 +                (void) cv_wait_sig(&vcpu->vcpu_cv, &vcpu->mtx.m);
     1886 +                vcpu_require_state_locked(vm, vcpuid, VCPU_FROZEN);
     1887 +        }
     1888 +        vcpu_unlock(vcpu);
     1889 +
     1890 +        return (handled ? 0 : -1);
     1891 +}
     1892 +
1835 1893  #ifndef __FreeBSD__
1836 1894  static int
1837 1895  vm_handle_wrmsr(struct vm *vm, int vcpuid, struct vm_exit *vme)
1838 1896  {
1839 1897          struct vcpu *cpu = &vm->vcpu[vcpuid];
1840 1898          const uint32_t code = vme->u.msr.code;
1841 1899          const uint64_t val = vme->u.msr.wval;
1842 1900  
1843 1901          switch (code) {
1844 1902          case MSR_TSC:
1845 1903                  cpu->tsc_offset = val - rdtsc();
1846 1904                  return (0);
1847 1905          }
1848 1906  
1849 1907          return (-1);
1850 1908  }
1851 1909  #endif /* __FreeBSD__ */
1852 1910  
1853      -void
1854      -vm_req_spinup_ap(struct vm *vm, int req_vcpuid, uint64_t req_rip)
1855      -{
1856      -        if (vm->sipi_req) {
1857      -                /* This should never occur if userspace is doing its job. */
1858      -                vm->stats.sipi_supersede++;
1859      -        }
1860      -        vm->sipi_req = true;
1861      -        vm->sipi_req_vcpu = req_vcpuid;
1862      -        vm->sipi_req_rip = req_rip;
1863      -}
1864      -
1865 1911  int
1866 1912  vm_suspend(struct vm *vm, enum vm_suspend_how how)
1867 1913  {
1868 1914          int i;
1869 1915  
1870 1916          if (how <= VM_SUSPEND_NONE || how >= VM_SUSPEND_LAST)
1871 1917                  return (EINVAL);
1872 1918  
1873 1919          if (atomic_cmpset_int((uint_t *)&vm->suspend, 0, how) == 0) {
1874 1920                  VM_CTR2(vm, "virtual machine already suspended %d/%d",
↓ open down ↓ 8 lines elided ↑ open up ↑
1883 1929           */
1884 1930          for (i = 0; i < vm->maxcpus; i++) {
1885 1931                  if (CPU_ISSET(i, &vm->active_cpus))
1886 1932                          vcpu_notify_event(vm, i);
1887 1933          }
1888 1934  
1889 1935          return (0);
1890 1936  }
1891 1937  
1892 1938  void
1893      -vm_exit_suspended(struct vm *vm, int vcpuid, uint64_t rip)
     1939 +vm_exit_run_state(struct vm *vm, int vcpuid, uint64_t rip)
1894 1940  {
1895 1941          struct vm_exit *vmexit;
1896 1942  
1897      -        KASSERT(vm->suspend > VM_SUSPEND_NONE && vm->suspend < VM_SUSPEND_LAST,
1898      -            ("vm_exit_suspended: invalid suspend type %d", vm->suspend));
1899      -
1900 1943          vmexit = vm_exitinfo(vm, vcpuid);
1901 1944          vmexit->rip = rip;
1902 1945          vmexit->inst_length = 0;
1903      -        vmexit->exitcode = VM_EXITCODE_SUSPENDED;
1904      -        vmexit->u.suspended.how = vm->suspend;
     1946 +        vmexit->exitcode = VM_EXITCODE_RUN_STATE;
     1947 +        vmm_stat_incr(vm, vcpuid, VMEXIT_RUN_STATE, 1);
1905 1948  }
1906 1949  
1907      -void
1908      -vm_exit_debug(struct vm *vm, int vcpuid, uint64_t rip)
1909      -{
1910      -        struct vm_exit *vmexit;
1911 1950  
1912      -        vmexit = vm_exitinfo(vm, vcpuid);
1913      -        vmexit->rip = rip;
1914      -        vmexit->inst_length = 0;
1915      -        vmexit->exitcode = VM_EXITCODE_DEBUG;
1916      -}
1917      -
1918      -void
1919      -vm_exit_runblock(struct vm *vm, int vcpuid, uint64_t rip)
1920      -{
1921      -        struct vm_exit *vmexit;
1922      -
1923      -        vmexit = vm_exitinfo(vm, vcpuid);
1924      -        vmexit->rip = rip;
1925      -        vmexit->inst_length = 0;
1926      -        vmexit->exitcode = VM_EXITCODE_RUNBLOCK;
1927      -        vmm_stat_incr(vm, vcpuid, VMEXIT_RUNBLOCK, 1);
1928      -}
1929      -
1930      -void
1931      -vm_exit_reqidle(struct vm *vm, int vcpuid, uint64_t rip)
1932      -{
1933      -        struct vm_exit *vmexit;
1934      -
1935      -        vmexit = vm_exitinfo(vm, vcpuid);
1936      -        vmexit->rip = rip;
1937      -        vmexit->inst_length = 0;
1938      -        vmexit->exitcode = VM_EXITCODE_REQIDLE;
1939      -        vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1);
1940      -}
1941      -
1942      -void
1943      -vm_exit_astpending(struct vm *vm, int vcpuid, uint64_t rip)
1944      -{
1945      -        struct vm_exit *vmexit;
1946      -
1947      -        vmexit = vm_exitinfo(vm, vcpuid);
1948      -        vmexit->rip = rip;
1949      -        vmexit->inst_length = 0;
1950      -        vmexit->exitcode = VM_EXITCODE_BOGUS;
1951      -        vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1);
1952      -}
1953      -
1954 1951  #ifndef __FreeBSD__
1955 1952  /*
1956 1953   * Some vmm resources, such as the lapic, may have CPU-specific resources
1957 1954   * allocated to them which would benefit from migration onto the host CPU which
1958 1955   * is processing the vcpu state.
1959 1956   */
1960 1957  static void
1961 1958  vm_localize_resources(struct vm *vm, struct vcpu *vcpu)
1962 1959  {
1963 1960          /*
↓ open down ↓ 101 lines elided ↑ open up ↑
2065 2062          vcpu = &vm->vcpu[vcpuid];
2066 2063          vie = vcpu->vie_ctx;
2067 2064          err = 0;
2068 2065  
2069 2066          switch (entry->cmd) {
2070 2067          case VEC_DEFAULT:
2071 2068                  return (0);
2072 2069          case VEC_DISCARD_INSTR:
2073 2070                  vie_reset(vie);
2074 2071                  return (0);
2075      -        case VEC_COMPLETE_MMIO:
     2072 +        case VEC_FULFILL_MMIO:
2076 2073                  err = vie_fulfill_mmio(vie, &entry->u.mmio);
2077 2074                  if (err == 0) {
2078 2075                          err = vie_emulate_mmio(vie, vm, vcpuid);
2079 2076                          if (err == 0) {
2080 2077                                  vie_advance_pc(vie, &vcpu->nextrip);
2081 2078                          } else if (err < 0) {
2082 2079                                  vie_exitinfo(vie, vme);
2083 2080                          } else if (err == EAGAIN) {
2084 2081                                  /*
2085 2082                                   * Clear the instruction emulation state in
2086 2083                                   * order to re-enter VM context and continue
2087 2084                                   * this 'rep <instruction>'
2088 2085                                   */
2089 2086                                  vie_reset(vie);
2090 2087                                  err = 0;
2091 2088                          }
2092 2089                  }
2093 2090                  break;
2094      -        case VEC_COMPLETE_INOUT:
     2091 +        case VEC_FULFILL_INOUT:
2095 2092                  err = vie_fulfill_inout(vie, &entry->u.inout);
2096 2093                  if (err == 0) {
2097 2094                          err = vie_emulate_inout(vie, vm, vcpuid);
2098 2095                          if (err == 0) {
2099 2096                                  vie_advance_pc(vie, &vcpu->nextrip);
2100 2097                          } else if (err < 0) {
2101 2098                                  vie_exitinfo(vie, vme);
2102 2099                          } else if (err == EAGAIN) {
2103 2100                                  /*
2104 2101                                   * Clear the instruction emulation state in
↓ open down ↓ 20 lines elided ↑ open up ↑
2125 2122  
2126 2123          if (vie_pending(vie)) {
2127 2124                  /*
2128 2125                   * Userspace has not fulfilled the pending needs of the
2129 2126                   * instruction emulation, so bail back out.
2130 2127                   */
2131 2128                  vie_exitinfo(vie, vme);
2132 2129                  return (-1);
2133 2130          }
2134 2131  
2135      -        if (vcpuid == 0 && vm->sipi_req) {
2136      -                /* The boot vCPU has sent a SIPI to one of the other CPUs */
2137      -                vme->exitcode = VM_EXITCODE_SPINUP_AP;
2138      -                vme->u.spinup_ap.vcpu = vm->sipi_req_vcpu;
2139      -                vme->u.spinup_ap.rip = vm->sipi_req_rip;
2140      -
2141      -                vm->sipi_req = false;
2142      -                vm->sipi_req_vcpu = 0;
2143      -                vm->sipi_req_rip = 0;
2144      -                return (-1);
2145      -        }
2146      -
2147 2132          return (0);
2148 2133  }
2149 2134  
2150 2135  int
2151 2136  vm_run(struct vm *vm, int vcpuid, const struct vm_entry *entry)
2152 2137  {
2153      -        struct vm_eventinfo evinfo;
2154 2138          int error;
2155 2139          struct vcpu *vcpu;
2156 2140  #ifdef  __FreeBSD__
2157 2141          struct pcb *pcb;
2158 2142  #endif
2159 2143          uint64_t tscval;
2160 2144          struct vm_exit *vme;
2161 2145          bool intr_disabled;
2162 2146          pmap_t pmap;
2163 2147  #ifndef __FreeBSD__
↓ open down ↓ 6 lines elided ↑ open up ↑
2170 2154  
2171 2155          if (!CPU_ISSET(vcpuid, &vm->active_cpus))
2172 2156                  return (EINVAL);
2173 2157  
2174 2158          if (CPU_ISSET(vcpuid, &vm->suspended_cpus))
2175 2159                  return (EINVAL);
2176 2160  
2177 2161          pmap = vmspace_pmap(vm->vmspace);
2178 2162          vcpu = &vm->vcpu[vcpuid];
2179 2163          vme = &vcpu->exitinfo;
2180      -        evinfo.rptr = &vcpu->runblock;
2181      -        evinfo.sptr = &vm->suspend;
2182      -        evinfo.iptr = &vcpu->reqidle;
2183 2164  
2184 2165  #ifndef __FreeBSD__
2185 2166          vtc.vtc_vm = vm;
2186 2167          vtc.vtc_vcpuid = vcpuid;
2187 2168          vtc.vtc_status = 0;
2188 2169  
2189 2170          installctx(curthread, &vtc, vmm_savectx, vmm_restorectx, NULL, NULL,
2190 2171              NULL, vmm_freectx);
2191 2172  #endif
2192 2173  
↓ open down ↓ 42 lines elided ↑ open up ↑
2235 2216          restore_guest_fpustate(vcpu);
2236 2217  #else
2237 2218          if ((vtc.vtc_status & VTCS_FPU_RESTORED) == 0) {
2238 2219                  restore_guest_fpustate(vcpu);
2239 2220                  vtc.vtc_status |= VTCS_FPU_RESTORED;
2240 2221          }
2241 2222          vtc.vtc_status |= VTCS_FPU_CTX_CRITICAL;
2242 2223  #endif
2243 2224  
2244 2225          vcpu_require_state(vm, vcpuid, VCPU_RUNNING);
2245      -        error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap, &evinfo);
     2226 +        error = VMRUN(vm->cookie, vcpuid, vcpu->nextrip, pmap);
2246 2227          vcpu_require_state(vm, vcpuid, VCPU_FROZEN);
2247 2228  
2248 2229  #ifdef  __FreeBSD__
2249 2230          save_guest_fpustate(vcpu);
2250 2231  #else
2251 2232          vtc.vtc_status &= ~VTCS_FPU_CTX_CRITICAL;
2252 2233  #endif
2253 2234  
2254 2235  #ifndef __FreeBSD__
2255 2236          /*
↓ open down ↓ 10 lines elided ↑ open up ↑
2266 2247          if (error != 0) {
2267 2248                  /* Communicate out any error from VMRUN() above */
2268 2249                  goto exit;
2269 2250          }
2270 2251  
2271 2252          vcpu->nextrip = vme->rip + vme->inst_length;
2272 2253          switch (vme->exitcode) {
2273 2254          case VM_EXITCODE_REQIDLE:
2274 2255                  error = vm_handle_reqidle(vm, vcpuid);
2275 2256                  break;
     2257 +        case VM_EXITCODE_RUN_STATE:
     2258 +                error = vm_handle_run_state(vm, vcpuid);
     2259 +                break;
2276 2260          case VM_EXITCODE_SUSPENDED:
2277 2261                  error = vm_handle_suspend(vm, vcpuid);
2278 2262                  break;
2279 2263          case VM_EXITCODE_IOAPIC_EOI:
2280 2264                  vioapic_process_eoi(vm, vcpuid,
2281 2265                      vme->u.ioapic_eoi.vector);
2282 2266                  break;
2283      -        case VM_EXITCODE_RUNBLOCK:
2284      -                break;
2285 2267          case VM_EXITCODE_HLT:
2286 2268                  intr_disabled = ((vme->u.hlt.rflags & PSL_I) == 0);
2287 2269                  error = vm_handle_hlt(vm, vcpuid, intr_disabled);
2288 2270                  break;
2289 2271          case VM_EXITCODE_PAGING:
2290 2272                  error = vm_handle_paging(vm, vcpuid);
2291 2273                  break;
2292 2274          case VM_EXITCODE_MMIO_EMUL:
2293 2275                  error = vm_handle_mmio_emul(vm, vcpuid);
2294 2276                  break;
↓ open down ↓ 490 lines elided ↑ open up ↑
2785 2767          vcpu = &vm->vcpu[vcpuid];
2786 2768  
2787 2769          if (vcpu->extint_pending == 0)
2788 2770                  panic("vm_extint_clear: inconsistent extint_pending state");
2789 2771  
2790 2772          vcpu->extint_pending = 0;
2791 2773          vmm_stat_incr(vm, vcpuid, VCPU_EXTINT_COUNT, 1);
2792 2774  }
2793 2775  
2794 2776  int
     2777 +vm_inject_init(struct vm *vm, int vcpuid)
     2778 +{
     2779 +        struct vcpu *vcpu;
     2780 +
     2781 +        if (vcpuid < 0 || vcpuid >= vm->maxcpus)
     2782 +                return (EINVAL);
     2783 +
     2784 +        vcpu = &vm->vcpu[vcpuid];
     2785 +        vcpu_lock(vcpu);
     2786 +        vcpu->run_state |= VRS_PEND_INIT;
     2787 +        vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
     2788 +        vcpu_unlock(vcpu);
     2789 +        return (0);
     2790 +}
     2791 +
     2792 +int
     2793 +vm_inject_sipi(struct vm *vm, int vcpuid, uint8_t vector)
     2794 +{
     2795 +        struct vcpu *vcpu;
     2796 +
     2797 +        if (vcpuid < 0 || vcpuid >= vm->maxcpus)
     2798 +                return (EINVAL);
     2799 +
     2800 +        vcpu = &vm->vcpu[vcpuid];
     2801 +        vcpu_lock(vcpu);
     2802 +        vcpu->run_state |= VRS_PEND_SIPI;
     2803 +        vcpu->sipi_vector = vector;
     2804 +        /* SIPI is only actionable if the CPU is waiting in INIT state */
     2805 +        if ((vcpu->run_state & (VRS_INIT | VRS_RUN)) == VRS_INIT) {
     2806 +                vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
     2807 +        }
     2808 +        vcpu_unlock(vcpu);
     2809 +        return (0);
     2810 +}
     2811 +
     2812 +bool
     2813 +vcpu_run_state_pending(struct vm *vm, int vcpuid)
     2814 +{
     2815 +        struct vcpu *vcpu;
     2816 +
     2817 +        ASSERT(vcpuid >= 0 && vcpuid < vm->maxcpus);
     2818 +        vcpu = &vm->vcpu[vcpuid];
     2819 +
     2820 +        /* Of interest: vCPU not in running state or with pending INIT */
     2821 +        return ((vcpu->run_state & (VRS_RUN | VRS_PEND_INIT)) != VRS_RUN);
     2822 +}
     2823 +
     2824 +int
     2825 +vcpu_arch_reset(struct vm *vm, int vcpuid, bool init_only)
     2826 +{
     2827 +        struct seg_desc desc;
     2828 +        const enum vm_reg_name clear_regs[] = {
     2829 +                VM_REG_GUEST_CR2,
     2830 +                VM_REG_GUEST_CR3,
     2831 +                VM_REG_GUEST_CR4,
     2832 +                VM_REG_GUEST_RAX,
     2833 +                VM_REG_GUEST_RBX,
     2834 +                VM_REG_GUEST_RCX,
     2835 +                VM_REG_GUEST_RSI,
     2836 +                VM_REG_GUEST_RDI,
     2837 +                VM_REG_GUEST_RBP,
     2838 +                VM_REG_GUEST_RSP,
     2839 +                VM_REG_GUEST_R8,
     2840 +                VM_REG_GUEST_R9,
     2841 +                VM_REG_GUEST_R10,
     2842 +                VM_REG_GUEST_R11,
     2843 +                VM_REG_GUEST_R12,
     2844 +                VM_REG_GUEST_R13,
     2845 +                VM_REG_GUEST_R14,
     2846 +                VM_REG_GUEST_R15,
     2847 +                VM_REG_GUEST_DR0,
     2848 +                VM_REG_GUEST_DR1,
     2849 +                VM_REG_GUEST_DR2,
     2850 +                VM_REG_GUEST_DR3,
     2851 +                VM_REG_GUEST_EFER,
     2852 +        };
     2853 +        const enum vm_reg_name data_segs[] = {
     2854 +                VM_REG_GUEST_SS,
     2855 +                VM_REG_GUEST_DS,
     2856 +                VM_REG_GUEST_ES,
     2857 +                VM_REG_GUEST_FS,
     2858 +                VM_REG_GUEST_GS,
     2859 +        };
     2860 +        struct vcpu *vcpu = &vm->vcpu[vcpuid];
     2861 +
     2862 +        if (vcpuid < 0 || vcpuid >= vm->maxcpus)
     2863 +                return (EINVAL);
     2864 +
     2865 +        for (uint_t i = 0; i < nitems(clear_regs); i++) {
     2866 +                VERIFY0(vm_set_register(vm, vcpuid, clear_regs[i], 0));
     2867 +        }
     2868 +
     2869 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RFLAGS, 2));
     2870 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RIP, 0xfff0));
     2871 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_CR0, 0x60000010));
     2872 +
     2873 +        /*
     2874 +         * The prescribed contents of %rdx differ slightly between the Intel and
     2875 +         * AMD architectural definitions.  The former expects the Extended Model
     2876 +         * in bits 16-19 where the latter expects all the Family, Model, and
     2877 +         * Stepping be there.  Common boot ROMs appear to disregard this
     2878 +         * anyways, so we stick with a compromise value similar to what is
     2879 +         * spelled out in the Intel SDM.
     2880 +         */
     2881 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RDX, 0x600));
     2882 +
     2883 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_DR6, 0xffff0ff0));
     2884 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_DR7, 0x400));
     2885 +
     2886 +        /* CS: Present, R/W, Accessed */
     2887 +        desc.access = 0x0093;
     2888 +        desc.base = 0xffff0000;
     2889 +        desc.limit = 0xffff;
     2890 +        VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_CS, &desc));
     2891 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_CS, 0xf000));
     2892 +
     2893 +        /* SS, DS, ES, FS, GS: Present, R/W, Accessed */
     2894 +        desc.access = 0x0093;
     2895 +        desc.base = 0;
     2896 +        desc.limit = 0xffff;
     2897 +        for (uint_t i = 0; i < nitems(data_segs); i++) {
     2898 +                VERIFY0(vm_set_seg_desc(vm, vcpuid, data_segs[i], &desc));
     2899 +                VERIFY0(vm_set_register(vm, vcpuid, data_segs[i], 0));
     2900 +        }
     2901 +
     2902 +        /* GDTR, IDTR */
     2903 +        desc.base = 0;
     2904 +        desc.limit = 0xffff;
     2905 +        VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_GDTR, &desc));
     2906 +        VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_IDTR, &desc));
     2907 +
     2908 +        /* LDTR: Present, LDT */
     2909 +        desc.access = 0x0082;
     2910 +        desc.base = 0;
     2911 +        desc.limit = 0xffff;
     2912 +        VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_LDTR, &desc));
     2913 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_LDTR, 0));
     2914 +
     2915 +        /* TR: Present, 32-bit TSS */
     2916 +        desc.access = 0x008b;
     2917 +        desc.base = 0;
     2918 +        desc.limit = 0xffff;
     2919 +        VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_TR, &desc));
     2920 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_TR, 0));
     2921 +
     2922 +        vlapic_reset(vm_lapic(vm, vcpuid));
     2923 +
     2924 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_INTR_SHADOW, 0));
     2925 +
     2926 +        vcpu->exitintinfo = 0;
     2927 +        vcpu->exception_pending = 0;
     2928 +        vcpu->nmi_pending = 0;
     2929 +        vcpu->extint_pending = 0;
     2930 +
     2931 +        /*
     2932 +         * A CPU reset caused by power-on or system reset clears more state than
     2933 +         * one which is trigged from an INIT IPI.
     2934 +         */
     2935 +        if (!init_only) {
     2936 +                vcpu->guest_xcr0 = XFEATURE_ENABLED_X87;
     2937 +                fpu_save_area_reset(vcpu->guestfpu);
     2938 +
     2939 +                /* XXX: clear MSRs and other pieces */
     2940 +        }
     2941 +
     2942 +        return (0);
     2943 +}
     2944 +
     2945 +static int
     2946 +vcpu_vector_sipi(struct vm *vm, int vcpuid, uint8_t vector)
     2947 +{
     2948 +        struct seg_desc desc;
     2949 +
     2950 +        if (vcpuid < 0 || vcpuid >= vm->maxcpus)
     2951 +                return (EINVAL);
     2952 +
     2953 +        /* CS: Present, R/W, Accessed */
     2954 +        desc.access = 0x0093;
     2955 +        desc.base = (uint64_t)vector << 12;
     2956 +        desc.limit = 0xffff;
     2957 +        VERIFY0(vm_set_seg_desc(vm, vcpuid, VM_REG_GUEST_CS, &desc));
     2958 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_CS,
     2959 +            (uint64_t)vector << 8));
     2960 +
     2961 +        VERIFY0(vm_set_register(vm, vcpuid, VM_REG_GUEST_RIP, 0));
     2962 +
     2963 +        return (0);
     2964 +}
     2965 +
     2966 +int
2795 2967  vm_get_capability(struct vm *vm, int vcpu, int type, int *retval)
2796 2968  {
2797 2969          if (vcpu < 0 || vcpu >= vm->maxcpus)
2798 2970                  return (EINVAL);
2799 2971  
2800 2972          if (type < 0 || type >= VM_CAP_MAX)
2801 2973                  return (EINVAL);
2802 2974  
2803 2975          return (VMGETCAP(vm->cookie, vcpu, type, retval));
2804 2976  }
↓ open down ↓ 82 lines elided ↑ open up ↑
2887 3059  }
2888 3060  
2889 3061  int
2890 3062  vcpu_set_state(struct vm *vm, int vcpuid, enum vcpu_state newstate,
2891 3063      bool from_idle)
2892 3064  {
2893 3065          int error;
2894 3066          struct vcpu *vcpu;
2895 3067  
2896 3068          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2897      -                panic("vm_set_run_state: invalid vcpuid %d", vcpuid);
     3069 +                panic("vcpu_set_state: invalid vcpuid %d", vcpuid);
2898 3070  
2899 3071          vcpu = &vm->vcpu[vcpuid];
2900 3072  
2901 3073          vcpu_lock(vcpu);
2902 3074          error = vcpu_set_state_locked(vm, vcpuid, newstate, from_idle);
2903 3075          vcpu_unlock(vcpu);
2904 3076  
2905 3077          return (error);
2906 3078  }
2907 3079  
2908 3080  enum vcpu_state
2909 3081  vcpu_get_state(struct vm *vm, int vcpuid, int *hostcpu)
2910 3082  {
2911 3083          struct vcpu *vcpu;
2912 3084          enum vcpu_state state;
2913 3085  
2914 3086          if (vcpuid < 0 || vcpuid >= vm->maxcpus)
2915      -                panic("vm_get_run_state: invalid vcpuid %d", vcpuid);
     3087 +                panic("vcpu_get_state: invalid vcpuid %d", vcpuid);
2916 3088  
2917 3089          vcpu = &vm->vcpu[vcpuid];
2918 3090  
2919 3091          vcpu_lock(vcpu);
2920 3092          state = vcpu->state;
2921 3093          if (hostcpu != NULL)
2922 3094                  *hostcpu = vcpu->hostcpu;
2923 3095          vcpu_unlock(vcpu);
2924 3096  
2925 3097          return (state);
2926 3098  }
2927 3099  
2928      -void
2929      -vcpu_block_run(struct vm *vm, int vcpuid)
2930      -{
2931      -        struct vcpu *vcpu;
2932      -
2933      -        if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
2934      -                panic("vcpu_block_run: invalid vcpuid %d", vcpuid);
2935      -
2936      -        vcpu = &vm->vcpu[vcpuid];
2937      -
2938      -        vcpu_lock(vcpu);
2939      -        vcpu->runblock++;
2940      -        if (vcpu->runblock == 1 && vcpu->state == VCPU_RUNNING) {
2941      -                vcpu_notify_event_locked(vcpu, VCPU_NOTIFY_EXIT);
2942      -        }
2943      -        while (vcpu->state == VCPU_RUNNING) {
2944      -#ifdef __FreeBSD__
2945      -                msleep_spin(&vcpu->state, &vcpu->mtx, "vcpublk", 0);
2946      -#else
2947      -                cv_wait(&vcpu->state_cv, &vcpu->mtx.m);
2948      -#endif
2949      -        }
2950      -        vcpu_unlock(vcpu);
2951      -}
2952      -
2953      -void
2954      -vcpu_unblock_run(struct vm *vm, int vcpuid)
2955      -{
2956      -        struct vcpu *vcpu;
2957      -
2958      -        if (vcpuid < 0 || vcpuid >= VM_MAXCPU)
2959      -                panic("vcpu_block_run: invalid vcpuid %d", vcpuid);
2960      -
2961      -        vcpu = &vm->vcpu[vcpuid];
2962      -
2963      -        vcpu_lock(vcpu);
2964      -        KASSERT(vcpu->runblock != 0, ("expected non-zero runblock"));
2965      -        vcpu->runblock--;
2966      -        if (vcpu->runblock == 0) {
2967      -#ifdef __FreeBSD__
2968      -                wakeup(&vcpu->state);
2969      -#else
2970      -                cv_broadcast(&vcpu->state_cv);
2971      -#endif
2972      -        }
2973      -        vcpu_unlock(vcpu);
2974      -}
2975      -
2976 3100  #ifndef __FreeBSD__
2977 3101  uint64_t
2978 3102  vcpu_tsc_offset(struct vm *vm, int vcpuid)
2979 3103  {
2980 3104          return (vm->vcpu[vcpuid].tsc_offset);
2981 3105  }
2982 3106  #endif /* __FreeBSD__ */
2983 3107  
2984 3108  int
2985 3109  vm_activate_cpu(struct vm *vm, int vcpuid)
↓ open down ↓ 45 lines elided ↑ open up ↑
3031 3155                  CPU_ZERO(&vm->debug_cpus);
3032 3156          } else {
3033 3157                  if (!CPU_ISSET(vcpuid, &vm->debug_cpus))
3034 3158                          return (EINVAL);
3035 3159  
3036 3160                  CPU_CLR_ATOMIC(vcpuid, &vm->debug_cpus);
3037 3161          }
3038 3162          return (0);
3039 3163  }
3040 3164  
3041      -int
3042      -vcpu_debugged(struct vm *vm, int vcpuid)
     3165 +static bool
     3166 +vcpu_bailout_checks(struct vm *vm, int vcpuid, bool on_entry,
     3167 +    uint64_t entry_rip)
3043 3168  {
     3169 +        struct vcpu *vcpu = &vm->vcpu[vcpuid];
     3170 +        struct vm_exit *vme = &vcpu->exitinfo;
     3171 +        bool bail = false;
3044 3172  
3045      -        return (CPU_ISSET(vcpuid, &vm->debug_cpus));
     3173 +        ASSERT(vcpuid >= 0 && vcpuid < vm->maxcpus);
     3174 +
     3175 +        if (vm->suspend) {
     3176 +                if (on_entry) {
     3177 +                        VERIFY(vm->suspend > VM_SUSPEND_NONE &&
     3178 +                            vm->suspend < VM_SUSPEND_LAST);
     3179 +
     3180 +                        vme->exitcode = VM_EXITCODE_SUSPENDED;
     3181 +                        vme->u.suspended.how = vm->suspend;
     3182 +                } else {
     3183 +                        /*
     3184 +                         * Handling VM suspend is complicated, so if that
     3185 +                         * condition is detected outside of VM-entry itself,
     3186 +                         * just emit a BOGUS exitcode so we take a lap to pick
     3187 +                         * up the event during an entry and are directed into
     3188 +                         * the vm_handle_suspend() logic.
     3189 +                         */
     3190 +                        vme->exitcode = VM_EXITCODE_BOGUS;
     3191 +                }
     3192 +                bail = true;
     3193 +        }
     3194 +        if (vcpu->reqidle) {
     3195 +                vme->exitcode = VM_EXITCODE_REQIDLE;
     3196 +                vmm_stat_incr(vm, vcpuid, VMEXIT_REQIDLE, 1);
     3197 +
     3198 +                if (!on_entry) {
     3199 +                        /*
     3200 +                         * A reqidle request detected outside of VM-entry can be
     3201 +                         * handled directly by clearing the request (and taking
     3202 +                         * a lap to userspace).
     3203 +                         */
     3204 +                        vcpu_assert_locked(vcpu);
     3205 +                        vcpu->reqidle = 0;
     3206 +                }
     3207 +                bail = true;
     3208 +        }
     3209 +        if (vcpu_should_yield(vm, vcpuid)) {
     3210 +                vme->exitcode = VM_EXITCODE_BOGUS;
     3211 +                vmm_stat_incr(vm, vcpuid, VMEXIT_ASTPENDING, 1);
     3212 +                bail = true;
     3213 +        }
     3214 +        if (CPU_ISSET(vcpuid, &vm->debug_cpus)) {
     3215 +                vme->exitcode = VM_EXITCODE_DEBUG;
     3216 +                bail = true;
     3217 +        }
     3218 +
     3219 +        if (bail) {
     3220 +                if (on_entry) {
     3221 +                        /*
     3222 +                         * If bailing out during VM-entry, the current %rip must
     3223 +                         * be recorded in the exitinfo.
     3224 +                         */
     3225 +                        vme->rip = entry_rip;
     3226 +                }
     3227 +                vme->inst_length = 0;
     3228 +        }
     3229 +        return (bail);
3046 3230  }
3047 3231  
     3232 +static bool
     3233 +vcpu_sleep_bailout_checks(struct vm *vm, int vcpuid)
     3234 +{
     3235 +        /*
     3236 +         * Bail-out check done prior to sleeping (in vCPU contexts like HLT or
     3237 +         * wait-for-SIPI) expect that %rip is already populated in the vm_exit
     3238 +         * structure, and we would only modify the exitcode.
     3239 +         */
     3240 +        return (vcpu_bailout_checks(vm, vcpuid, false, 0));
     3241 +}
     3242 +
     3243 +bool
     3244 +vcpu_entry_bailout_checks(struct vm *vm, int vcpuid, uint64_t rip)
     3245 +{
     3246 +        /*
     3247 +         * Bail-out checks done as part of VM entry require an updated %rip to
     3248 +         * populate the vm_exit struct if any of the conditions of interest are
     3249 +         * matched in the check.
     3250 +         */
     3251 +        return (vcpu_bailout_checks(vm, vcpuid, true, rip));
     3252 +}
     3253 +
3048 3254  cpuset_t
3049 3255  vm_active_cpus(struct vm *vm)
3050 3256  {
3051 3257  
3052 3258          return (vm->active_cpus);
3053 3259  }
3054 3260  
3055 3261  cpuset_t
3056 3262  vm_debug_cpus(struct vm *vm)
3057 3263  {
↓ open down ↓ 368 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX