Print this page
16413 Post-barrier Return Stack Buffer (consider no-eIBRS cases)
16413 Post-barrier Return Stack Buffer (PBRSB) fixes can be detected in HW


1658         "tbm",
1659         "avx512_vnni",
1660         "amd_pcec",
1661         "md_clear",
1662         "mds_no",
1663         "core_thermal",
1664         "pkg_thermal",
1665         "tsx_ctrl",
1666         "taa_no",
1667         "ppin",
1668         "vaes",
1669         "vpclmulqdq",
1670         "lfence_serializing",
1671         "gfni",
1672         "avx512_vp2intersect",
1673         "avx512_bitalg",
1674         "avx512_vbmi2",
1675         "avx512_bf16",
1676         "auto_ibrs",
1677         "rfds_no",
1678         "rfds_clear"

1679 };
1680 
1681 boolean_t
1682 is_x86_feature(void *featureset, uint_t feature)
1683 {
1684         ASSERT(feature < NUM_X86_FEATURES);
1685         return (BT_TEST((ulong_t *)featureset, feature));
1686 }
1687 
1688 void
1689 add_x86_feature(void *featureset, uint_t feature)
1690 {
1691         ASSERT(feature < NUM_X86_FEATURES);
1692         BT_SET((ulong_t *)featureset, feature);
1693 }
1694 
1695 void
1696 remove_x86_feature(void *featureset, uint_t feature)
1697 {
1698         ASSERT(feature < NUM_X86_FEATURES);


2951         if (need_l1d) {
2952                 /*
2953                  * As of Feb, 2024, no CPU needs L1D *and* RFDS mitigation
2954                  * together. If the following VERIFY trips, we need to add
2955                  * further fixes here.
2956                  */
2957                 VERIFY(!need_rfds);
2958                 spec_uarch_flush = spec_uarch_flush_msr;
2959         } else if (need_mds || need_rfds) {
2960                 spec_uarch_flush = x86_md_clear;
2961         } else {
2962                 /*
2963                  * We have no hardware mitigations available to us.
2964                  */
2965                 spec_uarch_flush = spec_uarch_flush_noop;
2966         }
2967         membar_producer();
2968 }
2969 
2970 /*
2971  * We default to enabling RSB mitigations.
2972  *
2973  * NOTE: We used to skip RSB mitigations with eIBRS, but developments around
2974  * post-barrier RSB guessing suggests we should enable RSB mitigations always
2975  * unless specifically instructed not to.






2976  *




2977  * AMD indicates that when Automatic IBRS is enabled we do not need to implement
2978  * return stack buffer clearing for VMEXIT as it takes care of it. The manual
2979  * also states that as long as SMEP and we maintain at least one page between
2980  * the kernel and user space (we have much more of a red zone), then we do not
2981  * need to clear the RSB. We constrain this to only when Automatic IRBS is
2982  * present.
2983  */
2984 static void
2985 cpuid_patch_rsb(x86_spectrev2_mitigation_t mit)
2986 {
2987         const uint8_t ret = RET_INSTR;
2988         uint8_t *stuff = (uint8_t *)x86_rsb_stuff;

2989 
2990         switch (mit) {
2991         case X86_SPECTREV2_AUTO_IBRS:
2992         case X86_SPECTREV2_DISABLED:

2993                 *stuff = ret;

2994                 break;









2995         default:










2996                 break;
2997         }
2998 }
2999 
3000 static void
3001 cpuid_patch_retpolines(x86_spectrev2_mitigation_t mit)
3002 {
3003         const char *thunks[] = { "_rax", "_rbx", "_rcx", "_rdx", "_rdi",
3004             "_rsi", "_rbp", "_r8", "_r9", "_r10", "_r11", "_r12", "_r13",
3005             "_r14", "_r15" };
3006         const uint_t nthunks = ARRAY_SIZE(thunks);
3007         const char *type;
3008         uint_t i;
3009 
3010         if (mit == x86_spectrev2_mitigation)
3011                 return;
3012 
3013         switch (mit) {
3014         case X86_SPECTREV2_RETPOLINE:
3015                 type = "gen";


3250                                 if (reg & IA32_ARCH_CAP_MDS_NO) {
3251                                         add_x86_feature(featureset,
3252                                             X86FSET_MDS_NO);
3253                                 }
3254                                 if (reg & IA32_ARCH_CAP_TSX_CTRL) {
3255                                         add_x86_feature(featureset,
3256                                             X86FSET_TSX_CTRL);
3257                                 }
3258                                 if (reg & IA32_ARCH_CAP_TAA_NO) {
3259                                         add_x86_feature(featureset,
3260                                             X86FSET_TAA_NO);
3261                                 }
3262                                 if (reg & IA32_ARCH_CAP_RFDS_NO) {
3263                                         add_x86_feature(featureset,
3264                                             X86FSET_RFDS_NO);
3265                                 }
3266                                 if (reg & IA32_ARCH_CAP_RFDS_CLEAR) {
3267                                         add_x86_feature(featureset,
3268                                             X86FSET_RFDS_CLEAR);
3269                                 }



3270                         }

3271                         no_trap();
3272                 }
3273 #endif  /* !__xpv */
3274 
3275                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
3276                         add_x86_feature(featureset, X86FSET_SSBD);
3277 
3278                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
3279                         add_x86_feature(featureset, X86FSET_FLUSH_CMD);
3280         }
3281 
3282         /*
3283          * Take care of certain mitigations on the non-boot CPU. The boot CPU
3284          * will have already run this function and determined what we need to
3285          * do. This gives us a hook for per-HW thread mitigations such as
3286          * enhanced IBRS, or disabling TSX.
3287          */
3288         if (cpu->cpu_id != 0) {
3289                 switch (x86_spectrev2_mitigation) {
3290                 case X86_SPECTREV2_ENHANCED_IBRS:


3310         /*
3311          * By default we've come in with retpolines enabled. Check whether we
3312          * should disable them or enable enhanced or automatic IBRS. RSB
3313          * stuffing is enabled by default. Note, we do not allow the use of AMD
3314          * optimized retpolines as it was disclosed by AMD in March 2022 that
3315          * they were still vulnerable. Prior to that point, we used them.
3316          */
3317         if (x86_disable_spectrev2 != 0) {
3318                 v2mit = X86_SPECTREV2_DISABLED;
3319         } else if (is_x86_feature(featureset, X86FSET_AUTO_IBRS)) {
3320                 cpuid_enable_auto_ibrs();
3321                 v2mit = X86_SPECTREV2_AUTO_IBRS;
3322         } else if (is_x86_feature(featureset, X86FSET_IBRS_ALL)) {
3323                 cpuid_enable_enhanced_ibrs();
3324                 v2mit = X86_SPECTREV2_ENHANCED_IBRS;
3325         } else {
3326                 v2mit = X86_SPECTREV2_RETPOLINE;
3327         }
3328 
3329         cpuid_patch_retpolines(v2mit);
3330         cpuid_patch_rsb(v2mit);
3331         x86_spectrev2_mitigation = v2mit;
3332         membar_producer();
3333 








3334         /*
3335          * We need to determine what changes are required for mitigating L1TF
3336          * and MDS. If the CPU suffers from either of them, then SMT exclusion
3337          * is required.
3338          *
3339          * If any of these are present, then we need to flush u-arch state at
3340          * various points. For MDS, we need to do so whenever we change to a
3341          * lesser privilege level or we are halting the CPU. For L1TF we need to
3342          * flush the L1D cache at VM entry. When we have microcode that handles
3343          * MDS, the L1D flush also clears the other u-arch state that the
3344          * md_clear does.
3345          */
3346 
3347         /*
3348          * Update whether or not we need to be taking explicit action against
3349          * MDS or RFDS.
3350          */
3351         cpuid_update_md_clear(cpu, featureset);
3352 
3353         /*




1658         "tbm",
1659         "avx512_vnni",
1660         "amd_pcec",
1661         "md_clear",
1662         "mds_no",
1663         "core_thermal",
1664         "pkg_thermal",
1665         "tsx_ctrl",
1666         "taa_no",
1667         "ppin",
1668         "vaes",
1669         "vpclmulqdq",
1670         "lfence_serializing",
1671         "gfni",
1672         "avx512_vp2intersect",
1673         "avx512_bitalg",
1674         "avx512_vbmi2",
1675         "avx512_bf16",
1676         "auto_ibrs",
1677         "rfds_no",
1678         "rfds_clear",
1679         "pbrsb_no"
1680 };
1681 
1682 boolean_t
1683 is_x86_feature(void *featureset, uint_t feature)
1684 {
1685         ASSERT(feature < NUM_X86_FEATURES);
1686         return (BT_TEST((ulong_t *)featureset, feature));
1687 }
1688 
1689 void
1690 add_x86_feature(void *featureset, uint_t feature)
1691 {
1692         ASSERT(feature < NUM_X86_FEATURES);
1693         BT_SET((ulong_t *)featureset, feature);
1694 }
1695 
1696 void
1697 remove_x86_feature(void *featureset, uint_t feature)
1698 {
1699         ASSERT(feature < NUM_X86_FEATURES);


2952         if (need_l1d) {
2953                 /*
2954                  * As of Feb, 2024, no CPU needs L1D *and* RFDS mitigation
2955                  * together. If the following VERIFY trips, we need to add
2956                  * further fixes here.
2957                  */
2958                 VERIFY(!need_rfds);
2959                 spec_uarch_flush = spec_uarch_flush_msr;
2960         } else if (need_mds || need_rfds) {
2961                 spec_uarch_flush = x86_md_clear;
2962         } else {
2963                 /*
2964                  * We have no hardware mitigations available to us.
2965                  */
2966                 spec_uarch_flush = spec_uarch_flush_noop;
2967         }
2968         membar_producer();
2969 }
2970 
2971 /*
2972  * We default to enabling Return Stack Buffer (RSB) mitigations.
2973  *
2974  * We used to skip RSB mitigations with Intel eIBRS, but developments around
2975  * post-barrier RSB (PBRSB) guessing suggests we should enable Intel RSB
2976  * mitigations always unless explicitly bypassed, or unless hardware indicates
2977  * the bug has been fixed. Intel also says that machines without eIBRS do not
2978  * have VMEXIT problems with PBRSB. Basically, if we're Intel and have eIBRS,
2979  * we must stuff the RSB in both context switches AND in VMEXIT, unless the
2980  * hardware says the PBRSB bug is fixed.  If we're Intel but without eIBRS
2981  * (i.e. using retpolines), we must stuff the RSB in context switches, but we
2982  * do not have to for VMEXIT.
2983  *
2984  * See (pardon broken URL)  https://www.intel.com/content/www/us/en/developer \
2985  * /articles/technical/software-security-guidance/advisory-guidance
2986  * /post-barrier-return-stack-buffer-predictions.html
2987  *
2988  * AMD indicates that when Automatic IBRS is enabled we do not need to implement
2989  * return stack buffer clearing for VMEXIT as it takes care of it. The manual
2990  * also states that as long as SMEP and we maintain at least one page between
2991  * the kernel and user space (we have much more of a red zone), then we do not
2992  * need to clear the RSB. We constrain this to only when Automatic IRBS is
2993  * present.
2994  */
2995 static void
2996 cpuid_patch_rsb(x86_spectrev2_mitigation_t mit, bool intel_pbrsb_no)
2997 {
2998         const uint8_t ret = RET_INSTR;
2999         uint8_t *stuff = (uint8_t *)x86_rsb_stuff;
3000         uint8_t *vmx_stuff = (uint8_t *)x86_rsb_stuff_vmexit;
3001 
3002         switch (mit) {
3003         case X86_SPECTREV2_AUTO_IBRS:
3004         case X86_SPECTREV2_DISABLED:
3005                 /* Don't bother with any RSB stuffing! */
3006                 *stuff = ret;
3007                 *vmx_stuff = ret;
3008                 break;
3009         case X86_SPECTREV2_RETPOLINE:
3010                 /*
3011                  * The Intel document on Post-Barrier RSB says that processors
3012                  * without eIBRS do not have PBRSB problems upon VMEXIT.
3013                  */
3014                 VERIFY(!intel_pbrsb_no);
3015                 VERIFY3U(*stuff, !=, ret);
3016                 *vmx_stuff = ret;
3017                 break;
3018         default:
3019                 /*
3020                  * eIBRS is all that's left.  If CPU claims PBRSB is fixed,
3021                  * don't use the RSB mitigation in either case.
3022                  */
3023                 if (intel_pbrsb_no) {
3024                         /* CPU claims PBRSB problems are fixed. */
3025                         *stuff = ret;
3026                         *vmx_stuff = ret;
3027                 }
3028                 VERIFY3U(*stuff, ==, *vmx_stuff);
3029                 break;
3030         }
3031 }
3032 
3033 static void
3034 cpuid_patch_retpolines(x86_spectrev2_mitigation_t mit)
3035 {
3036         const char *thunks[] = { "_rax", "_rbx", "_rcx", "_rdx", "_rdi",
3037             "_rsi", "_rbp", "_r8", "_r9", "_r10", "_r11", "_r12", "_r13",
3038             "_r14", "_r15" };
3039         const uint_t nthunks = ARRAY_SIZE(thunks);
3040         const char *type;
3041         uint_t i;
3042 
3043         if (mit == x86_spectrev2_mitigation)
3044                 return;
3045 
3046         switch (mit) {
3047         case X86_SPECTREV2_RETPOLINE:
3048                 type = "gen";


3283                                 if (reg & IA32_ARCH_CAP_MDS_NO) {
3284                                         add_x86_feature(featureset,
3285                                             X86FSET_MDS_NO);
3286                                 }
3287                                 if (reg & IA32_ARCH_CAP_TSX_CTRL) {
3288                                         add_x86_feature(featureset,
3289                                             X86FSET_TSX_CTRL);
3290                                 }
3291                                 if (reg & IA32_ARCH_CAP_TAA_NO) {
3292                                         add_x86_feature(featureset,
3293                                             X86FSET_TAA_NO);
3294                                 }
3295                                 if (reg & IA32_ARCH_CAP_RFDS_NO) {
3296                                         add_x86_feature(featureset,
3297                                             X86FSET_RFDS_NO);
3298                                 }
3299                                 if (reg & IA32_ARCH_CAP_RFDS_CLEAR) {
3300                                         add_x86_feature(featureset,
3301                                             X86FSET_RFDS_CLEAR);
3302                                 }
3303                                 if (reg & IA32_ARCH_CAP_PBRSB_NO) {
3304                                         add_x86_feature(featureset,
3305                                             X86FSET_PBRSB_NO);
3306                                 }
3307                         }
3308                         no_trap();
3309                 }
3310 #endif  /* !__xpv */
3311 
3312                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_SSBD)
3313                         add_x86_feature(featureset, X86FSET_SSBD);
3314 
3315                 if (ecp->cp_edx & CPUID_INTC_EDX_7_0_FLUSH_CMD)
3316                         add_x86_feature(featureset, X86FSET_FLUSH_CMD);
3317         }
3318 
3319         /*
3320          * Take care of certain mitigations on the non-boot CPU. The boot CPU
3321          * will have already run this function and determined what we need to
3322          * do. This gives us a hook for per-HW thread mitigations such as
3323          * enhanced IBRS, or disabling TSX.
3324          */
3325         if (cpu->cpu_id != 0) {
3326                 switch (x86_spectrev2_mitigation) {
3327                 case X86_SPECTREV2_ENHANCED_IBRS:


3347         /*
3348          * By default we've come in with retpolines enabled. Check whether we
3349          * should disable them or enable enhanced or automatic IBRS. RSB
3350          * stuffing is enabled by default. Note, we do not allow the use of AMD
3351          * optimized retpolines as it was disclosed by AMD in March 2022 that
3352          * they were still vulnerable. Prior to that point, we used them.
3353          */
3354         if (x86_disable_spectrev2 != 0) {
3355                 v2mit = X86_SPECTREV2_DISABLED;
3356         } else if (is_x86_feature(featureset, X86FSET_AUTO_IBRS)) {
3357                 cpuid_enable_auto_ibrs();
3358                 v2mit = X86_SPECTREV2_AUTO_IBRS;
3359         } else if (is_x86_feature(featureset, X86FSET_IBRS_ALL)) {
3360                 cpuid_enable_enhanced_ibrs();
3361                 v2mit = X86_SPECTREV2_ENHANCED_IBRS;
3362         } else {
3363                 v2mit = X86_SPECTREV2_RETPOLINE;
3364         }
3365 
3366         cpuid_patch_retpolines(v2mit);

3367         x86_spectrev2_mitigation = v2mit;
3368         membar_producer();
3369 
3370         /*
3371          * Return-stack buffer clearing may need a software-sequence. Discover
3372          * and patch as appropriate, after setting the SPECTREv2 global
3373          * mitigation level.
3374          */
3375         cpuid_patch_rsb(v2mit, is_x86_feature(featureset, X86FSET_PBRSB_NO));
3376         membar_producer();
3377 
3378         /*
3379          * We need to determine what changes are required for mitigating L1TF
3380          * and MDS. If the CPU suffers from either of them, then SMT exclusion
3381          * is required.
3382          *
3383          * If any of these are present, then we need to flush u-arch state at
3384          * various points. For MDS, we need to do so whenever we change to a
3385          * lesser privilege level or we are halting the CPU. For L1TF we need to
3386          * flush the L1D cache at VM entry. When we have microcode that handles
3387          * MDS, the L1D flush also clears the other u-arch state that the
3388          * md_clear does.
3389          */
3390 
3391         /*
3392          * Update whether or not we need to be taking explicit action against
3393          * MDS or RFDS.
3394          */
3395         cpuid_update_md_clear(cpu, featureset);
3396 
3397         /*