1658 "tbm",
1659 "avx512_vnni",
1660 "amd_pcec",
1661 "md_clear",
1662 "mds_no",
1663 "core_thermal",
1664 "pkg_thermal",
1665 "tsx_ctrl",
1666 "taa_no",
1667 "ppin",
1668 "vaes",
1669 "vpclmulqdq",
1670 "lfence_serializing",
1671 "gfni",
1672 "avx512_vp2intersect",
1673 "avx512_bitalg",
1674 "avx512_vbmi2",
1675 "avx512_bf16",
1676 "auto_ibrs",
1677 "rfds_no",
1678 "rfds_clear"
1679 };
1680
1681 boolean_t
1682 is_x86_feature(void *featureset, uint_t feature)
1683 {
1684 ASSERT(feature < NUM_X86_FEATURES);
1685 return (BT_TEST((ulong_t *)featureset, feature));
1686 }
1687
1688 void
1689 add_x86_feature(void *featureset, uint_t feature)
1690 {
1691 ASSERT(feature < NUM_X86_FEATURES);
1692 BT_SET((ulong_t *)featureset, feature);
1693 }
1694
1695 void
1696 remove_x86_feature(void *featureset, uint_t feature)
1697 {
1698 ASSERT(feature < NUM_X86_FEATURES);
2951 if (need_l1d) {
2952 /*
2953 * As of Feb, 2024, no CPU needs L1D *and* RFDS mitigation
2954 * together. If the following VERIFY trips, we need to add
2955 * further fixes here.
2956 */
2957 VERIFY(!need_rfds);
2958 spec_uarch_flush = spec_uarch_flush_msr;
2959 } else if (need_mds || need_rfds) {
2960 spec_uarch_flush = x86_md_clear;
2961 } else {
2962 /*
2963 * We have no hardware mitigations available to us.
2964 */
2965 spec_uarch_flush = spec_uarch_flush_noop;
2966 }
2967 membar_producer();
2968 }
2969
2970 /*
2971 * We default to enabling RSB mitigations.
2972 *
2973 * NOTE: We used to skip RSB mitigations with eIBRS, but developments around
2974 * post-barrier RSB guessing suggests we should enable RSB mitigations always
2975 * unless specifically instructed not to.
2976 *
2977 * AMD indicates that when Automatic IBRS is enabled we do not need to implement
2978 * return stack buffer clearing for VMEXIT as it takes care of it. The manual
2979 * also states that as long as SMEP and we maintain at least one page between
2980 * the kernel and user space (we have much more of a red zone), then we do not
2981 * need to clear the RSB. We constrain this to only when Automatic IRBS is
2982 * present.
2983 */
2984 static void
2985 cpuid_patch_rsb(x86_spectrev2_mitigation_t mit)
2986 {
2987 const uint8_t ret = RET_INSTR;
2988 uint8_t *stuff = (uint8_t *)x86_rsb_stuff;
2989
2990 switch (mit) {
2991 case X86_SPECTREV2_AUTO_IBRS:
2992 case X86_SPECTREV2_DISABLED:
2993 *stuff = ret;
2994 break;
2995 default:
2996 break;
2997 }
2998 }
2999
3000 static void
3001 cpuid_patch_retpolines(x86_spectrev2_mitigation_t mit)
3002 {
3003 const char *thunks[] = { "_rax", "_rbx", "_rcx", "_rdx", "_rdi",
3004 "_rsi", "_rbp", "_r8", "_r9", "_r10", "_r11", "_r12", "_r13",
3005 "_r14", "_r15" };
3006 const uint_t nthunks = ARRAY_SIZE(thunks);
3007 const char *type;
3008 uint_t i;
3009
3010 if (mit == x86_spectrev2_mitigation)
3011 return;
3012
3013 switch (mit) {
3014 case X86_SPECTREV2_RETPOLINE:
3015 type = "gen";
3250 if (reg & IA32_ARCH_CAP_MDS_NO) {
3251 add_x86_feature(featureset,
3252 X86FSET_MDS_NO);
3253 }
3254 if (reg & IA32_ARCH_CAP_TSX_CTRL) {
3255 add_x86_feature(featureset,
3256 X86FSET_TSX_CTRL);
3257 }
3258 if (reg & IA32_ARCH_CAP_TAA_NO) {
3259 add_x86_feature(featureset,
3260 X86FSET_TAA_NO);
3261 }
3262 if (reg & IA32_ARCH_CAP_RFDS_NO) {
3263 add_x86_feature(featureset,
3264 X86FSET_RFDS_NO);
3265 }
3266 if (reg & IA32_ARCH_CAP_RFDS_CLEAR) {
3267 add_x86_feature(featureset,
3268 X86FSET_RFDS_CLEAR);
3269 }
3270 }
3271 no_trap();
3272 }
3273 #endif /* !__xpv */
3274
3275 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
3276 add_x86_feature(featureset, X86FSET_SSBD);
3277
3278 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
3279 add_x86_feature(featureset, X86FSET_FLUSH_CMD);
3280 }
3281
3282 /*
3283 * Take care of certain mitigations on the non-boot CPU. The boot CPU
3284 * will have already run this function and determined what we need to
3285 * do. This gives us a hook for per-HW thread mitigations such as
3286 * enhanced IBRS, or disabling TSX.
3287 */
3288 if (cpu->cpu_id != 0) {
3289 switch (x86_spectrev2_mitigation) {
3290 case X86_SPECTREV2_ENHANCED_IBRS:
3310 /*
3311 * By default we've come in with retpolines enabled. Check whether we
3312 * should disable them or enable enhanced or automatic IBRS. RSB
3313 * stuffing is enabled by default. Note, we do not allow the use of AMD
3314 * optimized retpolines as it was disclosed by AMD in March 2022 that
3315 * they were still vulnerable. Prior to that point, we used them.
3316 */
3317 if (x86_disable_spectrev2 != 0) {
3318 v2mit = X86_SPECTREV2_DISABLED;
3319 } else if (is_x86_feature(featureset, X86FSET_AUTO_IBRS)) {
3320 cpuid_enable_auto_ibrs();
3321 v2mit = X86_SPECTREV2_AUTO_IBRS;
3322 } else if (is_x86_feature(featureset, X86FSET_IBRS_ALL)) {
3323 cpuid_enable_enhanced_ibrs();
3324 v2mit = X86_SPECTREV2_ENHANCED_IBRS;
3325 } else {
3326 v2mit = X86_SPECTREV2_RETPOLINE;
3327 }
3328
3329 cpuid_patch_retpolines(v2mit);
3330 cpuid_patch_rsb(v2mit);
3331 x86_spectrev2_mitigation = v2mit;
3332 membar_producer();
3333
3334 /*
3335 * We need to determine what changes are required for mitigating L1TF
3336 * and MDS. If the CPU suffers from either of them, then SMT exclusion
3337 * is required.
3338 *
3339 * If any of these are present, then we need to flush u-arch state at
3340 * various points. For MDS, we need to do so whenever we change to a
3341 * lesser privilege level or we are halting the CPU. For L1TF we need to
3342 * flush the L1D cache at VM entry. When we have microcode that handles
3343 * MDS, the L1D flush also clears the other u-arch state that the
3344 * md_clear does.
3345 */
3346
3347 /*
3348 * Update whether or not we need to be taking explicit action against
3349 * MDS or RFDS.
3350 */
3351 cpuid_update_md_clear(cpu, featureset);
3352
3353 /*
|
1658 "tbm",
1659 "avx512_vnni",
1660 "amd_pcec",
1661 "md_clear",
1662 "mds_no",
1663 "core_thermal",
1664 "pkg_thermal",
1665 "tsx_ctrl",
1666 "taa_no",
1667 "ppin",
1668 "vaes",
1669 "vpclmulqdq",
1670 "lfence_serializing",
1671 "gfni",
1672 "avx512_vp2intersect",
1673 "avx512_bitalg",
1674 "avx512_vbmi2",
1675 "avx512_bf16",
1676 "auto_ibrs",
1677 "rfds_no",
1678 "rfds_clear",
1679 "pbrsb_no"
1680 };
1681
1682 boolean_t
1683 is_x86_feature(void *featureset, uint_t feature)
1684 {
1685 ASSERT(feature < NUM_X86_FEATURES);
1686 return (BT_TEST((ulong_t *)featureset, feature));
1687 }
1688
1689 void
1690 add_x86_feature(void *featureset, uint_t feature)
1691 {
1692 ASSERT(feature < NUM_X86_FEATURES);
1693 BT_SET((ulong_t *)featureset, feature);
1694 }
1695
1696 void
1697 remove_x86_feature(void *featureset, uint_t feature)
1698 {
1699 ASSERT(feature < NUM_X86_FEATURES);
2952 if (need_l1d) {
2953 /*
2954 * As of Feb, 2024, no CPU needs L1D *and* RFDS mitigation
2955 * together. If the following VERIFY trips, we need to add
2956 * further fixes here.
2957 */
2958 VERIFY(!need_rfds);
2959 spec_uarch_flush = spec_uarch_flush_msr;
2960 } else if (need_mds || need_rfds) {
2961 spec_uarch_flush = x86_md_clear;
2962 } else {
2963 /*
2964 * We have no hardware mitigations available to us.
2965 */
2966 spec_uarch_flush = spec_uarch_flush_noop;
2967 }
2968 membar_producer();
2969 }
2970
2971 /*
2972 * We default to enabling Return Stack Buffer (RSB) mitigations.
2973 *
2974 * We used to skip RSB mitigations with Intel eIBRS, but developments around
2975 * post-barrier RSB (PBRSB) guessing suggests we should enable Intel RSB
2976 * mitigations always unless explicitly bypassed, or unless hardware indicates
2977 * the bug has been fixed. Intel also says that machines without eIBRS do not
2978 * have VMEXIT problems with PBRSB. Basically, if we're Intel and have eIBRS,
2979 * we must stuff the RSB in both context switches AND in VMEXIT, unless the
2980 * hardware says the PBRSB bug is fixed. If we're Intel but without eIBRS
2981 * (i.e. using retpolines), we must stuff the RSB in context switches, but we
2982 * do not have to for VMEXIT.
2983 *
2984 * See (pardon broken URL) https://www.intel.com/content/www/us/en/developer \
2985 * /articles/technical/software-security-guidance/advisory-guidance
2986 * /post-barrier-return-stack-buffer-predictions.html
2987 *
2988 * AMD indicates that when Automatic IBRS is enabled we do not need to implement
2989 * return stack buffer clearing for VMEXIT as it takes care of it. The manual
2990 * also states that as long as SMEP and we maintain at least one page between
2991 * the kernel and user space (we have much more of a red zone), then we do not
2992 * need to clear the RSB. We constrain this to only when Automatic IRBS is
2993 * present.
2994 */
2995 static void
2996 cpuid_patch_rsb(x86_spectrev2_mitigation_t mit, bool intel_pbrsb_no)
2997 {
2998 const uint8_t ret = RET_INSTR;
2999 uint8_t *stuff = (uint8_t *)x86_rsb_stuff;
3000 uint8_t *vmx_stuff = (uint8_t *)x86_rsb_stuff_vmexit;
3001
3002 switch (mit) {
3003 case X86_SPECTREV2_AUTO_IBRS:
3004 case X86_SPECTREV2_DISABLED:
3005 /* Don't bother with any RSB stuffing! */
3006 *stuff = ret;
3007 *vmx_stuff = ret;
3008 break;
3009 case X86_SPECTREV2_RETPOLINE:
3010 /*
3011 * The Intel document on Post-Barrier RSB says that processors
3012 * without eIBRS do not have PBRSB problems upon VMEXIT.
3013 */
3014 VERIFY(!intel_pbrsb_no);
3015 VERIFY3U(*stuff, !=, ret);
3016 *vmx_stuff = ret;
3017 break;
3018 default:
3019 /*
3020 * eIBRS is all that's left. If CPU claims PBRSB is fixed,
3021 * don't use the RSB mitigation in either case.
3022 */
3023 if (intel_pbrsb_no) {
3024 /* CPU claims PBRSB problems are fixed. */
3025 *stuff = ret;
3026 *vmx_stuff = ret;
3027 }
3028 VERIFY3U(*stuff, ==, *vmx_stuff);
3029 break;
3030 }
3031 }
3032
3033 static void
3034 cpuid_patch_retpolines(x86_spectrev2_mitigation_t mit)
3035 {
3036 const char *thunks[] = { "_rax", "_rbx", "_rcx", "_rdx", "_rdi",
3037 "_rsi", "_rbp", "_r8", "_r9", "_r10", "_r11", "_r12", "_r13",
3038 "_r14", "_r15" };
3039 const uint_t nthunks = ARRAY_SIZE(thunks);
3040 const char *type;
3041 uint_t i;
3042
3043 if (mit == x86_spectrev2_mitigation)
3044 return;
3045
3046 switch (mit) {
3047 case X86_SPECTREV2_RETPOLINE:
3048 type = "gen";
3283 if (reg & IA32_ARCH_CAP_MDS_NO) {
3284 add_x86_feature(featureset,
3285 X86FSET_MDS_NO);
3286 }
3287 if (reg & IA32_ARCH_CAP_TSX_CTRL) {
3288 add_x86_feature(featureset,
3289 X86FSET_TSX_CTRL);
3290 }
3291 if (reg & IA32_ARCH_CAP_TAA_NO) {
3292 add_x86_feature(featureset,
3293 X86FSET_TAA_NO);
3294 }
3295 if (reg & IA32_ARCH_CAP_RFDS_NO) {
3296 add_x86_feature(featureset,
3297 X86FSET_RFDS_NO);
3298 }
3299 if (reg & IA32_ARCH_CAP_RFDS_CLEAR) {
3300 add_x86_feature(featureset,
3301 X86FSET_RFDS_CLEAR);
3302 }
3303 if (reg & IA32_ARCH_CAP_PBRSB_NO) {
3304 add_x86_feature(featureset,
3305 X86FSET_PBRSB_NO);
3306 }
3307 }
3308 no_trap();
3309 }
3310 #endif /* !__xpv */
3311
3312 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
3313 add_x86_feature(featureset, X86FSET_SSBD);
3314
3315 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
3316 add_x86_feature(featureset, X86FSET_FLUSH_CMD);
3317 }
3318
3319 /*
3320 * Take care of certain mitigations on the non-boot CPU. The boot CPU
3321 * will have already run this function and determined what we need to
3322 * do. This gives us a hook for per-HW thread mitigations such as
3323 * enhanced IBRS, or disabling TSX.
3324 */
3325 if (cpu->cpu_id != 0) {
3326 switch (x86_spectrev2_mitigation) {
3327 case X86_SPECTREV2_ENHANCED_IBRS:
3347 /*
3348 * By default we've come in with retpolines enabled. Check whether we
3349 * should disable them or enable enhanced or automatic IBRS. RSB
3350 * stuffing is enabled by default. Note, we do not allow the use of AMD
3351 * optimized retpolines as it was disclosed by AMD in March 2022 that
3352 * they were still vulnerable. Prior to that point, we used them.
3353 */
3354 if (x86_disable_spectrev2 != 0) {
3355 v2mit = X86_SPECTREV2_DISABLED;
3356 } else if (is_x86_feature(featureset, X86FSET_AUTO_IBRS)) {
3357 cpuid_enable_auto_ibrs();
3358 v2mit = X86_SPECTREV2_AUTO_IBRS;
3359 } else if (is_x86_feature(featureset, X86FSET_IBRS_ALL)) {
3360 cpuid_enable_enhanced_ibrs();
3361 v2mit = X86_SPECTREV2_ENHANCED_IBRS;
3362 } else {
3363 v2mit = X86_SPECTREV2_RETPOLINE;
3364 }
3365
3366 cpuid_patch_retpolines(v2mit);
3367 x86_spectrev2_mitigation = v2mit;
3368 membar_producer();
3369
3370 /*
3371 * Return-stack buffer clearing may need a software-sequence. Discover
3372 * and patch as appropriate, after setting the SPECTREv2 global
3373 * mitigation level.
3374 */
3375 cpuid_patch_rsb(v2mit, is_x86_feature(featureset, X86FSET_PBRSB_NO));
3376 membar_producer();
3377
3378 /*
3379 * We need to determine what changes are required for mitigating L1TF
3380 * and MDS. If the CPU suffers from either of them, then SMT exclusion
3381 * is required.
3382 *
3383 * If any of these are present, then we need to flush u-arch state at
3384 * various points. For MDS, we need to do so whenever we change to a
3385 * lesser privilege level or we are halting the CPU. For L1TF we need to
3386 * flush the L1D cache at VM entry. When we have microcode that handles
3387 * MDS, the L1D flush also clears the other u-arch state that the
3388 * md_clear does.
3389 */
3390
3391 /*
3392 * Update whether or not we need to be taking explicit action against
3393 * MDS or RFDS.
3394 */
3395 cpuid_update_md_clear(cpu, featureset);
3396
3397 /*
|