Print this page
13275 bhyve needs richer INIT/SIPI support
Reviewed by: Robert Mustacchi <rm@fingolfin.org>
Approved by: Gordon Ross <gordon.w.ross@gmail.com>

Split Close
Expand all
Collapse all
          --- old/usr/src/uts/i86pc/io/vmm/io/vlapic.c
          +++ new/usr/src/uts/i86pc/io/vmm/io/vlapic.c
↓ open down ↓ 984 lines elided ↑ open up ↑
 985  985          uint8_t tpr;
 986  986  
 987  987          tpr = vlapic_get_tpr(vlapic);
 988  988          return (tpr >> 4);
 989  989  }
 990  990  
 991  991  int
 992  992  vlapic_icrlo_write_handler(struct vlapic *vlapic)
 993  993  {
 994  994          int i;
 995      -        bool phys;
 996  995          cpuset_t dmask;
 997  996          uint64_t icrval;
 998      -        uint32_t dest, vec, mode;
 999      -        struct vlapic *vlapic2;
      997 +        uint32_t dest, vec, mode, dsh;
1000  998          struct LAPIC *lapic;
1001      -        uint16_t maxcpus;
1002  999  
1003 1000          lapic = vlapic->apic_page;
1004 1001          lapic->icr_lo &= ~APIC_DELSTAT_PEND;
1005 1002          icrval = ((uint64_t)lapic->icr_hi << 32) | lapic->icr_lo;
1006 1003  
1007 1004          if (x2apic(vlapic))
1008 1005                  dest = icrval >> 32;
1009 1006          else
1010 1007                  dest = icrval >> (32 + 24);
1011 1008          vec = icrval & APIC_VECTOR_MASK;
1012 1009          mode = icrval & APIC_DELMODE_MASK;
     1010 +        dsh = icrval & APIC_DEST_MASK;
1013 1011  
1014 1012          if (mode == APIC_DELMODE_FIXED && vec < 16) {
1015 1013                  vlapic_set_error(vlapic, APIC_ESR_SEND_ILLEGAL_VECTOR, false);
1016      -                VLAPIC_CTR1(vlapic, "Ignoring invalid IPI %d", vec);
1017 1014                  return (0);
1018 1015          }
     1016 +        if (mode == APIC_DELMODE_INIT &&
     1017 +            (icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT) {
     1018 +                /* No work required to deassert INIT */
     1019 +                return (0);
     1020 +        }
     1021 +        if ((mode == APIC_DELMODE_STARTUP || mode == APIC_DELMODE_INIT) &&
     1022 +            !(dsh == APIC_DEST_DESTFLD || dsh == APIC_DEST_ALLESELF)) {
     1023 +                /*
     1024 +                 * While Intel makes no mention of restrictions for destination
     1025 +                 * shorthand when sending INIT or SIPI, AMD requires either a
     1026 +                 * specific destination or all-excluding self.  Common use seems
     1027 +                 * to be restricted to those two cases.
     1028 +                 */
     1029 +                return (-1);
     1030 +        }
1019 1031  
1020      -        VLAPIC_CTR2(vlapic, "icrlo 0x%016lx triggered ipi %d", icrval, vec);
     1032 +        switch (dsh) {
     1033 +        case APIC_DEST_DESTFLD:
     1034 +                vlapic_calcdest(vlapic->vm, &dmask, dest,
     1035 +                    (icrval & APIC_DESTMODE_LOG) == 0, false, x2apic(vlapic));
     1036 +                break;
     1037 +        case APIC_DEST_SELF:
     1038 +                CPU_SETOF(vlapic->vcpuid, &dmask);
     1039 +                break;
     1040 +        case APIC_DEST_ALLISELF:
     1041 +                dmask = vm_active_cpus(vlapic->vm);
     1042 +                break;
     1043 +        case APIC_DEST_ALLESELF:
     1044 +                dmask = vm_active_cpus(vlapic->vm);
     1045 +                CPU_CLR(vlapic->vcpuid, &dmask);
     1046 +                break;
     1047 +        default:
     1048 +                /*
     1049 +                 * All possible delivery notations are covered above.
     1050 +                 * We should never end up here.
     1051 +                 */
     1052 +                panic("unknown delivery shorthand: %x", dsh);
     1053 +        }
1021 1054  
1022      -        if (mode == APIC_DELMODE_FIXED || mode == APIC_DELMODE_NMI) {
1023      -                switch (icrval & APIC_DEST_MASK) {
1024      -                case APIC_DEST_DESTFLD:
1025      -                        phys = ((icrval & APIC_DESTMODE_LOG) == 0);
1026      -                        vlapic_calcdest(vlapic->vm, &dmask, dest, phys, false,
1027      -                            x2apic(vlapic));
     1055 +        while ((i = CPU_FFS(&dmask)) != 0) {
     1056 +                i--;
     1057 +                CPU_CLR(i, &dmask);
     1058 +                switch (mode) {
     1059 +                case APIC_DELMODE_FIXED:
     1060 +                        lapic_intr_edge(vlapic->vm, i, vec);
     1061 +                        vmm_stat_incr(vlapic->vm, vlapic->vcpuid,
     1062 +                            VLAPIC_IPI_SEND, 1);
     1063 +                        vmm_stat_incr(vlapic->vm, i,
     1064 +                            VLAPIC_IPI_RECV, 1);
1028 1065                          break;
1029      -                case APIC_DEST_SELF:
1030      -                        CPU_SETOF(vlapic->vcpuid, &dmask);
     1066 +                case APIC_DELMODE_NMI:
     1067 +                        vm_inject_nmi(vlapic->vm, i);
1031 1068                          break;
1032      -                case APIC_DEST_ALLISELF:
1033      -                        dmask = vm_active_cpus(vlapic->vm);
     1069 +                case APIC_DELMODE_INIT:
     1070 +                        (void) vm_inject_init(vlapic->vm, i);
1034 1071                          break;
1035      -                case APIC_DEST_ALLESELF:
1036      -                        dmask = vm_active_cpus(vlapic->vm);
1037      -                        CPU_CLR(vlapic->vcpuid, &dmask);
     1072 +                case APIC_DELMODE_STARTUP:
     1073 +                        (void) vm_inject_sipi(vlapic->vm, i, vec);
1038 1074                          break;
     1075 +                case APIC_DELMODE_LOWPRIO:
     1076 +                case APIC_DELMODE_SMI:
1039 1077                  default:
1040      -                        CPU_ZERO(&dmask);       /* satisfy gcc */
     1078 +                        /* Unhandled IPI modes (for now) */
1041 1079                          break;
1042 1080                  }
1043      -
1044      -                while ((i = CPU_FFS(&dmask)) != 0) {
1045      -                        i--;
1046      -                        CPU_CLR(i, &dmask);
1047      -                        if (mode == APIC_DELMODE_FIXED) {
1048      -                                lapic_intr_edge(vlapic->vm, i, vec);
1049      -                                vmm_stat_incr(vlapic->vm, vlapic->vcpuid,
1050      -                                    VLAPIC_IPI_SEND, 1);
1051      -                                vmm_stat_incr(vlapic->vm, i,
1052      -                                    VLAPIC_IPI_RECV, 1);
1053      -                                VLAPIC_CTR2(vlapic, "vlapic sending ipi %d "
1054      -                                    "to vcpuid %d", vec, i);
1055      -                        } else {
1056      -                                vm_inject_nmi(vlapic->vm, i);
1057      -                                VLAPIC_CTR1(vlapic, "vlapic sending ipi nmi "
1058      -                                    "to vcpuid %d", i);
1059      -                        }
1060      -                }
1061      -
1062      -                return (0);     /* handled completely in the kernel */
1063 1081          }
1064      -
1065      -        maxcpus = vm_get_maxcpus(vlapic->vm);
1066      -        if (mode == APIC_DELMODE_INIT) {
1067      -                if ((icrval & APIC_LEVEL_MASK) == APIC_LEVEL_DEASSERT)
1068      -                        return (0);
1069      -
1070      -                if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) {
1071      -                        vlapic2 = vm_lapic(vlapic->vm, dest);
1072      -
1073      -                        /* move from INIT to waiting-for-SIPI state */
1074      -                        if (vlapic2->boot_state == BS_INIT) {
1075      -                                vlapic2->boot_state = BS_SIPI;
1076      -                        }
1077      -
1078      -                        return (0);
1079      -                }
1080      -        }
1081      -
1082      -        if (mode == APIC_DELMODE_STARTUP) {
1083      -                if (vlapic->vcpuid == 0 && dest != 0 && dest < maxcpus) {
1084      -                        vlapic2 = vm_lapic(vlapic->vm, dest);
1085      -
1086      -                        /*
1087      -                         * Ignore SIPIs in any state other than wait-for-SIPI
1088      -                         */
1089      -                        if (vlapic2->boot_state != BS_SIPI)
1090      -                                return (0);
1091      -
1092      -                        vlapic2->boot_state = BS_RUNNING;
1093      -                        vm_req_spinup_ap(vlapic->vm, dest, vec << PAGE_SHIFT);
1094      -                        return (0);
1095      -                }
1096      -        }
1097      -
1098      -        /* Return to userland.  */
1099      -        return (-1);
     1082 +        return (0);
1100 1083  }
1101 1084  
1102 1085  void
1103 1086  vlapic_self_ipi_handler(struct vlapic *vlapic, uint64_t val)
1104 1087  {
1105 1088          int vec;
1106 1089  
1107 1090          KASSERT(x2apic(vlapic), ("SELF_IPI does not exist in xAPIC mode"));
1108 1091  
1109 1092          vec = val & 0xff;
↓ open down ↓ 333 lines elided ↑ open up ↑
1443 1426                  case APIC_OFFSET_IRR0 ... APIC_OFFSET_IRR7:
1444 1427                  case APIC_OFFSET_TIMER_CCR:
1445 1428                  default:
1446 1429                          // Read only.
1447 1430                          break;
1448 1431          }
1449 1432  
1450 1433          return (retval);
1451 1434  }
1452 1435  
1453      -static void
     1436 +void
1454 1437  vlapic_reset(struct vlapic *vlapic)
1455 1438  {
1456      -        struct LAPIC *lapic;
     1439 +        struct LAPIC *lapic = vlapic->apic_page;
     1440 +        uint32_t *isrptr, *tmrptr, *irrptr;
1457 1441  
1458      -        lapic = vlapic->apic_page;
1459      -        bzero(lapic, sizeof (struct LAPIC));
     1442 +        /* Reset any timer-related state first */
     1443 +        VLAPIC_TIMER_LOCK(vlapic);
     1444 +        callout_stop(&vlapic->callout);
     1445 +        lapic->icr_timer = 0;
     1446 +        lapic->ccr_timer = 0;
     1447 +        VLAPIC_TIMER_UNLOCK(vlapic);
     1448 +        lapic->dcr_timer = 0;
     1449 +        vlapic_dcr_write_handler(vlapic);
1460 1450  
     1451 +        /*
     1452 +         * Sync any APIC acceleration (APICv/AVIC) state into the APIC page so
     1453 +         * it is not leftover after the reset.  This is performed after the APIC
     1454 +         * timer has been stopped, in case it happened to fire just prior to
     1455 +         * being deactivated.
     1456 +         */
     1457 +        if (vlapic->ops.sync_state) {
     1458 +                (*vlapic->ops.sync_state)(vlapic);
     1459 +        }
     1460 +
1461 1461          lapic->id = vlapic_get_id(vlapic);
1462 1462          lapic->version = VLAPIC_VERSION;
1463 1463          lapic->version |= (VLAPIC_MAXLVT_INDEX << MAXLVTSHIFT);
     1464 +
     1465 +        lapic->tpr = 0;
     1466 +        lapic->apr = 0;
     1467 +        lapic->ppr = 0;
     1468 +
     1469 +#ifdef __ISRVEC_DEBUG
     1470 +        /* With the PPR cleared, the isrvec tracking should be reset too */
     1471 +        vlapic->isrvec_stk_top = 0;
     1472 +#endif
     1473 +
     1474 +        lapic->eoi = 0;
     1475 +        lapic->ldr = 0;
1464 1476          lapic->dfr = 0xffffffff;
1465 1477          lapic->svr = APIC_SVR_VECTOR;
1466      -        vlapic_mask_lvts(vlapic);
     1478 +        vlapic->svr_last = lapic->svr;
1467 1479  
1468      -        lapic->dcr_timer = 0;
1469      -        vlapic_dcr_write_handler(vlapic);
     1480 +        isrptr = &lapic->isr0;
     1481 +        tmrptr = &lapic->tmr0;
     1482 +        irrptr = &lapic->irr0;
     1483 +        for (uint_t i = 0; i < 8; i++) {
     1484 +                atomic_store_rel_int(&isrptr[i * 4], 0);
     1485 +                atomic_store_rel_int(&tmrptr[i * 4], 0);
     1486 +                atomic_store_rel_int(&irrptr[i * 4], 0);
     1487 +        }
1470 1488  
1471      -        if (vlapic->vcpuid == 0)
1472      -                vlapic->boot_state = BS_RUNNING;        /* BSP */
1473      -        else
1474      -                vlapic->boot_state = BS_INIT;           /* AP */
     1489 +        lapic->esr = 0;
     1490 +        vlapic->esr_pending = 0;
     1491 +        lapic->icr_lo = 0;
     1492 +        lapic->icr_hi = 0;
1475 1493  
1476      -        vlapic->svr_last = lapic->svr;
     1494 +        lapic->lvt_cmci = 0;
     1495 +        lapic->lvt_timer = 0;
     1496 +        lapic->lvt_thermal = 0;
     1497 +        lapic->lvt_pcint = 0;
     1498 +        lapic->lvt_lint0 = 0;
     1499 +        lapic->lvt_lint1 = 0;
     1500 +        lapic->lvt_error = 0;
     1501 +        vlapic_mask_lvts(vlapic);
1477 1502  }
1478 1503  
1479 1504  void
1480 1505  vlapic_init(struct vlapic *vlapic)
1481 1506  {
1482 1507          KASSERT(vlapic->vm != NULL, ("vlapic_init: vm is not initialized"));
1483 1508          KASSERT(vlapic->vcpuid >= 0 &&
1484 1509              vlapic->vcpuid < vm_get_maxcpus(vlapic->vm),
1485 1510              ("vlapic_init: vcpuid is not initialized"));
1486 1511          KASSERT(vlapic->apic_page != NULL, ("vlapic_init: apic_page is not "
↓ open down ↓ 244 lines elided ↑ open up ↑
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX