1 /*
   2  * CDDL HEADER START
   3  *
   4  * The contents of this file are subject to the terms of the
   5  * Common Development and Distribution License (the "License").
   6  * You may not use this file except in compliance with the License.
   7  *
   8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
   9  * or http://www.opensolaris.org/os/licensing.
  10  * See the License for the specific language governing permissions
  11  * and limitations under the License.
  12  *
  13  * When distributing Covered Code, include this CDDL HEADER in each
  14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
  15  * If applicable, add the following below this CDDL HEADER, with the
  16  * fields enclosed by brackets "[]" replaced with your own identifying
  17  * information: Portions Copyright [yyyy] [name of copyright owner]
  18  *
  19  * CDDL HEADER END
  20  */
  21 /*
  22  * Copyright 2015 Nexenta Systems, Inc.  All rights reserved.
  23  * Copyright (c) 2018, Joyent, Inc.
  24  * Copyright 2022 Oxide Computer Company
  25  *
  26  * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
  27  */
  28 
  29 /*      Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
  30 /*      Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T       */
  31 /*              All Rights Reserved                             */
  32 
  33 #ifndef _SYS_FP_H
  34 #define _SYS_FP_H
  35 
  36 #ifdef __cplusplus
  37 extern "C" {
  38 #endif
  39 
  40 /*
  41  * 80287/80387 and SSE/SSE2 floating point processor definitions
  42  */
  43 
  44 /*
  45  * values that go into fp_kind
  46  */
  47 #define FP_NO   0       /* no fp chip, no emulator (no fp support)      */
  48 #define FP_SW   1       /* no fp chip, using software emulator          */
  49 #define FP_HW   2       /* chip present bit                             */
  50 #define FP_287  2       /* 80287 chip present                           */
  51 #define FP_387  3       /* 80387 chip present                           */
  52 #define FP_487  6       /* 80487 chip present                           */
  53 #define FP_486  6       /* 80486 chip present                           */
  54 /*
  55  * The following values are bit flags instead of actual values.
  56  * E.g. to know if we are using SSE, test (value & __FP_SSE) instead
  57  * of (value == __FP_SSE).
  58  */
  59 #define __FP_SSE        0x100   /* .. plus SSE-capable CPU              */
  60 #define __FP_AVX        0x200   /* .. plus AVX-capable CPU              */
  61 
  62 /*
  63  * values that go into fp_save_mech
  64  */
  65 #define FP_FNSAVE       1       /* fnsave/frstor instructions           */
  66 #define FP_FXSAVE       2       /* fxsave/fxrstor instructions          */
  67 #define FP_XSAVE        3       /* xsave/xrstor instructions            */
  68 
  69 /*
  70  * masks for 80387 control word
  71  */
  72 #define FPIM    0x00000001      /* invalid operation                    */
  73 #define FPDM    0x00000002      /* denormalized operand                 */
  74 #define FPZM    0x00000004      /* zero divide                          */
  75 #define FPOM    0x00000008      /* overflow                             */
  76 #define FPUM    0x00000010      /* underflow                            */
  77 #define FPPM    0x00000020      /* precision                            */
  78 #define FPPC    0x00000300      /* precision control                    */
  79 #define FPRC    0x00000C00      /* rounding control                     */
  80 #define FPIC    0x00001000      /* infinity control                     */
  81 #define WFPDE   0x00000080      /* data chain exception                 */
  82 
  83 /*
  84  * (Old symbol compatibility)
  85  */
  86 #define FPINV   FPIM
  87 #define FPDNO   FPDM
  88 #define FPZDIV  FPZM
  89 #define FPOVR   FPOM
  90 #define FPUNR   FPUM
  91 #define FPPRE   FPPM
  92 
  93 /*
  94  * precision, rounding, and infinity options in control word
  95  */
  96 #define FPSIG24 0x00000000      /* 24-bit significand precision (short) */
  97 #define FPSIG53 0x00000200      /* 53-bit significand precision (long)  */
  98 #define FPSIG64 0x00000300      /* 64-bit significand precision (temp)  */
  99 #define FPRTN   0x00000000      /* round to nearest or even             */
 100 #define FPRD    0x00000400      /* round down                           */
 101 #define FPRU    0x00000800      /* round up                             */
 102 #define FPCHOP  0x00000C00      /* chop (truncate toward zero)          */
 103 #define FPP     0x00000000      /* projective infinity                  */
 104 #define FPA     0x00001000      /* affine infinity                      */
 105 #define WFPB17  0x00020000      /* bit 17                               */
 106 #define WFPB24  0x00040000      /* bit 24                               */
 107 
 108 /*
 109  * masks for 80387 status word
 110  */
 111 #define FPS_IE  0x00000001      /* invalid operation                    */
 112 #define FPS_DE  0x00000002      /* denormalized operand                 */
 113 #define FPS_ZE  0x00000004      /* zero divide                          */
 114 #define FPS_OE  0x00000008      /* overflow                             */
 115 #define FPS_UE  0x00000010      /* underflow                            */
 116 #define FPS_PE  0x00000020      /* precision                            */
 117 #define FPS_SF  0x00000040      /* stack fault                          */
 118 #define FPS_ES  0x00000080      /* error summary bit                    */
 119 #define FPS_C0  0x00000100      /* C0 bit                               */
 120 #define FPS_C1  0x00000200      /* C1 bit                               */
 121 #define FPS_C2  0x00000400      /* C2 bit                               */
 122 #define FPS_TOP 0x00003800      /* top of stack pointer                 */
 123 #define FPS_C3  0x00004000      /* C3 bit                               */
 124 #define FPS_B   0x00008000      /* busy bit                             */
 125 
 126 /*
 127  * Exception flags manually cleared during x87 exception handling.
 128  */
 129 #define FPS_SW_EFLAGS   \
 130         (FPS_IE|FPS_DE|FPS_ZE|FPS_OE|FPS_UE|FPS_PE|FPS_SF|FPS_ES|FPS_B)
 131 
 132 /*
 133  * Initial value of FPU control word as per 4th ed. ABI document
 134  * - affine infinity
 135  * - round to nearest or even
 136  * - 64-bit double precision
 137  * - all exceptions masked
 138  */
 139 #define FPU_CW_INIT     0x133f
 140 
 141 /*
 142  * masks and flags for SSE/SSE2 MXCSR
 143  */
 144 #define SSE_IE  0x00000001      /* invalid operation                    */
 145 #define SSE_DE  0x00000002      /* denormalized operand                 */
 146 #define SSE_ZE  0x00000004      /* zero divide                          */
 147 #define SSE_OE  0x00000008      /* overflow                             */
 148 #define SSE_UE  0x00000010      /* underflow                            */
 149 #define SSE_PE  0x00000020      /* precision                            */
 150 #define SSE_DAZ 0x00000040      /* denormals are zero                   */
 151 #define SSE_IM  0x00000080      /* invalid op exception mask            */
 152 #define SSE_DM  0x00000100      /* denormalize exception mask           */
 153 #define SSE_ZM  0x00000200      /* zero-divide exception mask           */
 154 #define SSE_OM  0x00000400      /* overflow exception mask              */
 155 #define SSE_UM  0x00000800      /* underflow exception mask             */
 156 #define SSE_PM  0x00001000      /* precision exception mask             */
 157 #define SSE_RC  0x00006000      /* rounding control                     */
 158 #define SSE_RD  0x00002000      /* rounding control: round down         */
 159 #define SSE_RU  0x00004000      /* rounding control: round up           */
 160 #define SSE_FZ  0x00008000      /* flush to zero for masked underflow   */
 161 
 162 #define SSE_MXCSR_EFLAGS        \
 163         (SSE_IE|SSE_DE|SSE_ZE|SSE_OE|SSE_UE|SSE_PE)     /* 0x3f */
 164 
 165 #define SSE_MXCSR_INIT  \
 166         (SSE_IM|SSE_DM|SSE_ZM|SSE_OM|SSE_UM|SSE_PM)     /* 0x1f80 */
 167 
 168 #define SSE_MXCSR_MASK_DEFAULT  \
 169         (0xffff & ~SSE_DAZ)                         /* 0xffbf */
 170 
 171 #define SSE_FMT_MXCSR   \
 172         "\20\20fz\17ru\16rd\15pm\14um\13om\12zm\11dm"   \
 173         "\10im\7daz\6pe\5ue\4oe\3ze\2de\1ie"
 174 
 175 /*
 176  * This structure is written to memory by an 'fnsave' instruction
 177  */
 178 struct fnsave_state {
 179         uint16_t        f_fcw;
 180         uint16_t        __f_ign0;
 181         uint16_t        f_fsw;
 182         uint16_t        __f_ign1;
 183         uint16_t        f_ftw;
 184         uint16_t        __f_ign2;
 185         uint32_t        f_eip;
 186         uint16_t        f_cs;
 187         uint16_t        f_fop;
 188         uint32_t        f_dp;
 189         uint16_t        f_ds;
 190         uint16_t        __f_ign3;
 191         union {
 192                 uint16_t fpr_16[5];     /* 80-bits of x87 state */
 193         } f_st[8];
 194 };      /* 108 bytes */
 195 
 196 /*
 197  * This structure is written to memory by an 'fxsave' instruction
 198  * Note the variant behaviour of this instruction between long mode
 199  * and legacy environments!
 200  */
 201 struct fxsave_state {
 202         uint16_t        fx_fcw;
 203         uint16_t        fx_fsw;
 204         uint16_t        fx_fctw;        /* compressed tag word */
 205         uint16_t        fx_fop;
 206 #if defined(__amd64)
 207         uint64_t        fx_rip;
 208         uint64_t        fx_rdp;
 209 #else
 210         uint32_t        fx_eip;
 211         uint16_t        fx_cs;
 212         uint16_t        __fx_ign0;
 213         uint32_t        fx_dp;
 214         uint16_t        fx_ds;
 215         uint16_t        __fx_ign1;
 216 #endif
 217         uint32_t        fx_mxcsr;
 218         uint32_t        fx_mxcsr_mask;
 219         union {
 220                 uint16_t fpr_16[5];     /* 80-bits of x87 state */
 221                 u_longlong_t fpr_mmx;   /* 64-bit mmx register */
 222                 uint32_t __fpr_pad[4];  /* (pad out to 128-bits) */
 223         } fx_st[8];
 224 #if defined(__amd64)
 225         upad128_t       fx_xmm[16];     /* 128-bit registers */
 226         upad128_t       __fx_ign2[6];
 227 #else
 228         upad128_t       fx_xmm[8];      /* 128-bit registers */
 229         upad128_t       __fx_ign2[14];
 230 #endif
 231 } __aligned(16);        /* 512 bytes */
 232 
 233 /*
 234  * This structure represents the header portion of the data layout used by the
 235  * 'xsave' instruction variants.  It is documented in section 13.4.2 of the
 236  * Intel 64 and IA-32 Architectures Software Developer’s Manual, Volume 1
 237  * (IASDv1).  Although "header" is somewhat of a misnomer, considering the data
 238  * begins at offset 512 of the xsave area, its contents dictate which portions
 239  * of the area are present and how they may be formatted.
 240  */
 241 struct xsave_header {
 242         uint64_t        xsh_xstate_bv;
 243         uint64_t        xsh_xcomp_bv;
 244         uint64_t        xsh_reserved[6];
 245 };
 246 
 247 /*
 248  * This structure is written to memory by one of the 'xsave' instruction
 249  * variants. The first 512 bytes are compatible with the format of the 'fxsave'
 250  * area.  The extended portion is documented in section 13.4.3.
 251  *
 252  * Our size is at least AVX_XSAVE_SIZE (832 bytes), which is asserted
 253  * statically.  Enabling additional xsave-related CPU features requires an
 254  * increase in the size. We dynamically allocate the per-lwp xsave area at
 255  * runtime, based on the size needed for the CPU-specific features. This
 256  * xsave_state structure simply defines our historical layout for the beginning
 257  * of the xsave area. The locations and size of new, extended, components is
 258  * determined dynamically by querying the CPU. See the xsave_info structure in
 259  * cpuid.c.
 260  *
 261  * xsave component usage is tracked using bits in the xstate_bv field of the
 262  * header. The components are documented in section 13.1 of IASDv1. For easy
 263  * reference, this is a summary of the currently defined component bit
 264  * definitions:
 265  *      x87                     0x0001
 266  *      SSE                     0x0002
 267  *      AVX                     0x0004
 268  *      bndreg (MPX)            0x0008
 269  *      bndcsr (MPX)            0x0010
 270  *      opmask (AVX512)         0x0020
 271  *      zmm hi256 (AVX512)      0x0040
 272  *      zmm hi16 (AVX512)       0x0080
 273  *      PT                      0x0100
 274  *      PKRU                    0x0200
 275  * When xsaveopt_ctxt is being used to save into the xsave_state area, the
 276  * xstate_bv field is updated by the xsaveopt instruction to indicate which
 277  * elements of the xsave area are active.
 278  *
 279  * The xcomp_bv field should always be 0, since we do not currently use the
 280  * compressed form of xsave (xsavec).
 281  */
 282 struct xsave_state {
 283         struct fxsave_state     xs_fxsave;      /* 0-511 legacy region */
 284         struct xsave_header     xs_header;      /* 512-575 XSAVE header */
 285         upad128_t               xs_ymm[16];     /* 576 AVX component */
 286 } __aligned(64);
 287 
 288 /*
 289  * While AVX_XSTATE_SIZE is the smallest the kernel will allocate for FPU
 290  * state-saving, other consumers may constrain themselves to the minimum
 291  * possible xsave state structure, which features only the legacy area and the
 292  * bare xsave header.
 293  */
 294 #define MIN_XSAVE_SIZE  (sizeof (struct fxsave_state) + \
 295                             sizeof (struct xsave_header))
 296 
 297 /*
 298  * Kernel's FPU save area
 299  */
 300 typedef struct {
 301         union _kfpu_u {
 302                 void *kfpu_generic;
 303                 struct fxsave_state *kfpu_fx;
 304 #if defined(__i386)
 305                 struct fnsave_state *kfpu_fn;
 306 #endif
 307                 struct xsave_state *kfpu_xs;
 308         } kfpu_u;
 309         uint32_t kfpu_status;           /* saved at #mf exception */
 310         uint32_t kfpu_xstatus;          /* saved at #xm exception */
 311 } kfpu_t;
 312 
 313 extern int fp_kind;             /* kind of fp support                   */
 314 extern int fp_save_mech;        /* fp save/restore mechanism            */
 315 extern int fpu_exists;          /* FPU hw exists                        */
 316 
 317 #ifdef _KERNEL
 318 
 319 extern int fpu_ignored;
 320 extern int fpu_pentium_fdivbug;
 321 
 322 extern uint32_t sse_mxcsr_mask;
 323 
 324 extern void fpu_probe(void);
 325 extern uint_t fpu_initial_probe(void);
 326 
 327 extern void fpu_auxv_info(int *, size_t *);
 328 
 329 extern void fpnsave_ctxt(void *);
 330 extern void fpxsave_ctxt(void *);
 331 extern void xsave_ctxt(void *);
 332 extern void xsaveopt_ctxt(void *);
 333 extern void fpxsave_excp_clr_ctxt(void *);
 334 extern void xsave_excp_clr_ctxt(void *);
 335 extern void xsaveopt_excp_clr_ctxt(void *);
 336 extern void (*fpsave_ctxt)(void *);
 337 extern void (*xsavep)(struct xsave_state *, uint64_t);
 338 
 339 extern void fpxrestore_ctxt(void *);
 340 extern void xrestore_ctxt(void *);
 341 extern void (*fprestore_ctxt)(void *);
 342 
 343 extern void fxsave_insn(struct fxsave_state *);
 344 extern void fpsave(struct fnsave_state *);
 345 extern void fprestore(struct fnsave_state *);
 346 extern void fpxsave(struct fxsave_state *);
 347 extern void fpxrestore(struct fxsave_state *);
 348 extern void xsave(struct xsave_state *, uint64_t);
 349 extern void xsaveopt(struct xsave_state *, uint64_t);
 350 extern void xrestore(struct xsave_state *, uint64_t);
 351 
 352 extern void fpenable(void);
 353 extern void fpdisable(void);
 354 extern void fpinit(void);
 355 
 356 extern uint32_t fperr_reset(void);
 357 extern uint32_t fpxerr_reset(void);
 358 
 359 extern uint32_t fpgetcwsw(void);
 360 extern uint32_t fpgetmxcsr(void);
 361 
 362 struct regs;
 363 extern int fpexterrflt(struct regs *);
 364 extern int fpsimderrflt(struct regs *);
 365 extern void fpsetcw(uint16_t, uint32_t);
 366 extern void fp_seed(void);
 367 extern void fp_exec(void);
 368 struct _klwp;
 369 extern void fp_lwp_init(struct _klwp *);
 370 extern void fp_lwp_cleanup(struct _klwp *);
 371 extern void fp_lwp_dup(struct _klwp *);
 372 
 373 extern const struct fxsave_state sse_initial;
 374 extern const struct xsave_state avx_initial;
 375 
 376 #endif  /* _KERNEL */
 377 
 378 #ifdef __cplusplus
 379 }
 380 #endif
 381 
 382 #endif  /* _SYS_FP_H */