Print this page
| Split |
Close |
| Expand all |
| Collapse all |
--- old/usr/src/uts/intel/sys/fp.h
+++ new/usr/src/uts/intel/sys/fp.h
1 1 /*
2 2 * CDDL HEADER START
3 3 *
4 4 * The contents of this file are subject to the terms of the
5 5 * Common Development and Distribution License (the "License").
6 6 * You may not use this file except in compliance with the License.
7 7 *
8 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 9 * or http://www.opensolaris.org/os/licensing.
10 10 * See the License for the specific language governing permissions
11 11 * and limitations under the License.
12 12 *
13 13 * When distributing Covered Code, include this CDDL HEADER in each
14 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 15 * If applicable, add the following below this CDDL HEADER, with the
16 16 * fields enclosed by brackets "[]" replaced with your own identifying
17 17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 18 *
19 19 * CDDL HEADER END
20 20 */
21 21 /*
22 22 * Copyright 2015 Nexenta Systems, Inc. All rights reserved.
23 23 * Copyright (c) 2018, Joyent, Inc.
24 24 * Copyright 2023 Oxide Computer Company
25 25 *
26 26 * Copyright (c) 1992, 2010, Oracle and/or its affiliates. All rights reserved.
27 27 */
28 28
29 29 /* Copyright (c) 1990, 1991 UNIX System Laboratories, Inc. */
30 30 /* Copyright (c) 1984, 1986, 1987, 1988, 1989, 1990 AT&T */
31 31 /* All Rights Reserved */
32 32
33 33 #ifndef _SYS_FP_H
34 34 #define _SYS_FP_H
35 35
36 36 #ifdef __cplusplus
37 37 extern "C" {
38 38 #endif
39 39
40 40 /*
41 41 * 80287/80387 and SSE/SSE2 floating point processor definitions
42 42 */
43 43
44 44 /*
45 45 * values that go into fp_kind
46 46 */
47 47 #define FP_NO 0 /* no fp chip, no emulator (no fp support) */
48 48 #define FP_SW 1 /* no fp chip, using software emulator */
49 49 #define FP_HW 2 /* chip present bit */
50 50 #define FP_287 2 /* 80287 chip present */
51 51 #define FP_387 3 /* 80387 chip present */
52 52 #define FP_487 6 /* 80487 chip present */
53 53 #define FP_486 6 /* 80486 chip present */
54 54 /*
55 55 * The following values are bit flags instead of actual values.
56 56 * E.g. to know if we are using SSE, test (value & __FP_SSE) instead
57 57 * of (value == __FP_SSE).
58 58 */
59 59 #define __FP_SSE 0x100 /* .. plus SSE-capable CPU */
60 60 #define __FP_AVX 0x200 /* .. plus AVX-capable CPU */
61 61
62 62 /*
63 63 * values that go into fp_save_mech
64 64 */
65 65 #define FP_FNSAVE 1 /* fnsave/frstor instructions */
66 66 #define FP_FXSAVE 2 /* fxsave/fxrstor instructions */
67 67 #define FP_XSAVE 3 /* xsave/xrstor instructions */
68 68
69 69 /*
70 70 * masks for 80387 control word
71 71 */
72 72 #define FPIM 0x00000001 /* invalid operation */
73 73 #define FPDM 0x00000002 /* denormalized operand */
74 74 #define FPZM 0x00000004 /* zero divide */
75 75 #define FPOM 0x00000008 /* overflow */
76 76 #define FPUM 0x00000010 /* underflow */
77 77 #define FPPM 0x00000020 /* precision */
78 78 #define FPPC 0x00000300 /* precision control */
79 79 #define FPRC 0x00000C00 /* rounding control */
80 80 #define FPIC 0x00001000 /* infinity control */
81 81 #define WFPDE 0x00000080 /* data chain exception */
82 82
83 83 /*
84 84 * (Old symbol compatibility)
85 85 */
86 86 #define FPINV FPIM
87 87 #define FPDNO FPDM
88 88 #define FPZDIV FPZM
89 89 #define FPOVR FPOM
90 90 #define FPUNR FPUM
91 91 #define FPPRE FPPM
92 92
93 93 /*
94 94 * precision, rounding, and infinity options in control word
95 95 */
96 96 #define FPSIG24 0x00000000 /* 24-bit significand precision (short) */
97 97 #define FPSIG53 0x00000200 /* 53-bit significand precision (long) */
98 98 #define FPSIG64 0x00000300 /* 64-bit significand precision (temp) */
99 99 #define FPRTN 0x00000000 /* round to nearest or even */
100 100 #define FPRD 0x00000400 /* round down */
101 101 #define FPRU 0x00000800 /* round up */
102 102 #define FPCHOP 0x00000C00 /* chop (truncate toward zero) */
103 103 #define FPP 0x00000000 /* projective infinity */
104 104 #define FPA 0x00001000 /* affine infinity */
105 105 #define WFPB17 0x00020000 /* bit 17 */
106 106 #define WFPB24 0x00040000 /* bit 24 */
107 107
108 108 /*
109 109 * masks for 80387 status word
110 110 */
111 111 #define FPS_IE 0x00000001 /* invalid operation */
112 112 #define FPS_DE 0x00000002 /* denormalized operand */
113 113 #define FPS_ZE 0x00000004 /* zero divide */
114 114 #define FPS_OE 0x00000008 /* overflow */
115 115 #define FPS_UE 0x00000010 /* underflow */
116 116 #define FPS_PE 0x00000020 /* precision */
117 117 #define FPS_SF 0x00000040 /* stack fault */
118 118 #define FPS_ES 0x00000080 /* error summary bit */
119 119 #define FPS_C0 0x00000100 /* C0 bit */
120 120 #define FPS_C1 0x00000200 /* C1 bit */
121 121 #define FPS_C2 0x00000400 /* C2 bit */
122 122 #define FPS_TOP 0x00003800 /* top of stack pointer */
123 123 #define FPS_C3 0x00004000 /* C3 bit */
124 124 #define FPS_B 0x00008000 /* busy bit */
125 125
126 126 /*
127 127 * Exception flags manually cleared during x87 exception handling.
128 128 */
129 129 #define FPS_SW_EFLAGS \
130 130 (FPS_IE|FPS_DE|FPS_ZE|FPS_OE|FPS_UE|FPS_PE|FPS_SF|FPS_ES|FPS_B)
131 131
132 132 /*
133 133 * Initial value of FPU control word as per 4th ed. ABI document
134 134 * - affine infinity
135 135 * - round to nearest or even
136 136 * - 64-bit double precision
137 137 * - all exceptions masked
138 138 *
139 139 * The 4th ed. SVR4 ABI didn't discuss the value of reserved bits. The ISA
140 140 * defines bit 6 (0x40) as reserved, but also that it is set (rather than clear,
141 141 * like many other Reserved bits). We preserve that in our value here.
142 142 */
143 143 #define FPU_CW_INIT 0x137f
144 144
145 145 /*
146 146 * This is the Intel mandated form of the default value of the x87 control word.
147 147 * This is different from what we use and should only be used in the context of
148 148 * representing that default state (e.g. in /proc xregs).
149 149 */
150 150 #define FPU_CW_INIT_HW 0x037f
151 151
152 152 /*
153 153 * masks and flags for SSE/SSE2 MXCSR
154 154 */
155 155 #define SSE_IE 0x00000001 /* invalid operation */
156 156 #define SSE_DE 0x00000002 /* denormalized operand */
157 157 #define SSE_ZE 0x00000004 /* zero divide */
158 158 #define SSE_OE 0x00000008 /* overflow */
159 159 #define SSE_UE 0x00000010 /* underflow */
160 160 #define SSE_PE 0x00000020 /* precision */
161 161 #define SSE_DAZ 0x00000040 /* denormals are zero */
162 162 #define SSE_IM 0x00000080 /* invalid op exception mask */
163 163 #define SSE_DM 0x00000100 /* denormalize exception mask */
164 164 #define SSE_ZM 0x00000200 /* zero-divide exception mask */
165 165 #define SSE_OM 0x00000400 /* overflow exception mask */
166 166 #define SSE_UM 0x00000800 /* underflow exception mask */
167 167 #define SSE_PM 0x00001000 /* precision exception mask */
168 168 #define SSE_RC 0x00006000 /* rounding control */
169 169 #define SSE_RD 0x00002000 /* rounding control: round down */
170 170 #define SSE_RU 0x00004000 /* rounding control: round up */
171 171 #define SSE_FZ 0x00008000 /* flush to zero for masked underflow */
172 172
173 173 #define SSE_MXCSR_EFLAGS \
174 174 (SSE_IE|SSE_DE|SSE_ZE|SSE_OE|SSE_UE|SSE_PE) /* 0x3f */
175 175
176 176 #define SSE_MXCSR_INIT \
177 177 (SSE_IM|SSE_DM|SSE_ZM|SSE_OM|SSE_UM|SSE_PM) /* 0x1f80 */
178 178
179 179 #define SSE_MXCSR_MASK_DEFAULT \
180 180 (0xffff & ~SSE_DAZ) /* 0xffbf */
181 181
182 182 #define SSE_FMT_MXCSR \
183 183 "\20\20fz\17ru\16rd\15pm\14um\13om\12zm\11dm" \
184 184 "\10im\7daz\6pe\5ue\4oe\3ze\2de\1ie"
185 185
186 186 /*
187 187 * This structure is written to memory by an 'fnsave' instruction
188 188 */
189 189 struct fnsave_state {
190 190 uint16_t f_fcw;
191 191 uint16_t __f_ign0;
192 192 uint16_t f_fsw;
193 193 uint16_t __f_ign1;
194 194 uint16_t f_ftw;
195 195 uint16_t __f_ign2;
196 196 uint32_t f_eip;
197 197 uint16_t f_cs;
198 198 uint16_t f_fop;
199 199 uint32_t f_dp;
200 200 uint16_t f_ds;
201 201 uint16_t __f_ign3;
202 202 union {
203 203 uint16_t fpr_16[5]; /* 80-bits of x87 state */
204 204 } f_st[8];
205 205 }; /* 108 bytes */
206 206
207 207 /*
208 208 * This structure is written to memory by an 'fxsave' instruction
209 209 * Note the variant behaviour of this instruction between long mode
210 210 * and legacy environments!
211 211 */
212 212 struct fxsave_state {
213 213 uint16_t fx_fcw;
214 214 uint16_t fx_fsw;
215 215 uint16_t fx_fctw; /* compressed tag word */
216 216 uint16_t fx_fop;
217 217 #if defined(__amd64)
218 218 uint64_t fx_rip;
219 219 uint64_t fx_rdp;
220 220 #else
221 221 uint32_t fx_eip;
222 222 uint16_t fx_cs;
223 223 uint16_t __fx_ign0;
224 224 uint32_t fx_dp;
225 225 uint16_t fx_ds;
226 226 uint16_t __fx_ign1;
227 227 #endif
228 228 uint32_t fx_mxcsr;
229 229 uint32_t fx_mxcsr_mask;
230 230 union {
231 231 uint16_t fpr_16[5]; /* 80-bits of x87 state */
232 232 u_longlong_t fpr_mmx; /* 64-bit mmx register */
233 233 uint32_t __fpr_pad[4]; /* (pad out to 128-bits) */
234 234 } fx_st[8];
235 235 #if defined(__amd64)
236 236 upad128_t fx_xmm[16]; /* 128-bit registers */
237 237 upad128_t __fx_ign2[6];
238 238 #else
239 239 upad128_t fx_xmm[8]; /* 128-bit registers */
240 240 upad128_t __fx_ign2[14];
241 241 #endif
242 242 } __aligned(16); /* 512 bytes */
243 243
244 244 /*
245 245 * This structure represents the header portion of the data layout used by the
246 246 * 'xsave' instruction variants. It is documented in section 13.4.2 of the
247 247 * Intel 64 and IA-32 Architectures Software Developer’s Manual, Volume 1
248 248 * (IASDv1). Although "header" is somewhat of a misnomer, considering the data
249 249 * begins at offset 512 of the xsave area, its contents dictate which portions
250 250 * of the area are present and how they may be formatted.
251 251 */
252 252 struct xsave_header {
253 253 uint64_t xsh_xstate_bv;
254 254 uint64_t xsh_xcomp_bv;
255 255 uint64_t xsh_reserved[6];
256 256 };
257 257
258 258 /*
259 259 * This structure is written to memory by one of the 'xsave' instruction
260 260 * variants. The first 512 bytes are compatible with the format of the 'fxsave'
261 261 * area. The extended portion is documented in section 13.4.3.
262 262 *
263 263 * Our size is at least AVX_XSAVE_SIZE (832 bytes), which is asserted
264 264 * statically. Enabling additional xsave-related CPU features requires an
265 265 * increase in the size. We dynamically allocate the per-lwp xsave area at
266 266 * runtime, based on the size needed for the CPU-specific features. This
267 267 * xsave_state structure simply defines our historical layout for the beginning
268 268 * of the xsave area. The locations and size of new, extended, components is
269 269 * determined dynamically by querying the CPU. See the xsave_info structure in
270 270 * cpuid.c.
271 271 *
272 272 * xsave component usage is tracked using bits in the xstate_bv field of the
273 273 * header. The components are documented in section 13.1 of IASDv1. For easy
274 274 * reference, this is a summary of the currently defined component bit
275 275 * definitions:
276 276 * x87 0x0001
277 277 * SSE 0x0002
278 278 * AVX 0x0004
279 279 * bndreg (MPX) 0x0008
280 280 * bndcsr (MPX) 0x0010
281 281 * opmask (AVX512) 0x0020
282 282 * zmm hi256 (AVX512) 0x0040
283 283 * zmm hi16 (AVX512) 0x0080
284 284 * PT 0x0100
285 285 * PKRU 0x0200
286 286 * When xsaveopt_ctxt is being used to save into the xsave_state area, the
287 287 * xstate_bv field is updated by the xsaveopt instruction to indicate which
288 288 * elements of the xsave area are active.
289 289 *
290 290 * The xcomp_bv field should always be 0, since we do not currently use the
291 291 * compressed form of xsave (xsavec).
292 292 */
293 293 struct xsave_state {
294 294 struct fxsave_state xs_fxsave; /* 0-511 legacy region */
295 295 struct xsave_header xs_header; /* 512-575 XSAVE header */
296 296 upad128_t xs_ymm[16]; /* 576 AVX component */
297 297 } __aligned(64);
298 298
299 299 /*
300 300 * While AVX_XSTATE_SIZE is the smallest the kernel will allocate for FPU
301 301 * state-saving, other consumers may constrain themselves to the minimum
302 302 * possible xsave state structure, which features only the legacy area and the
303 303 * bare xsave header.
304 304 */
305 305 #define MIN_XSAVE_SIZE (sizeof (struct fxsave_state) + \
306 306 sizeof (struct xsave_header))
307 307
308 308 /*
309 309 * Kernel's FPU save area
310 310 */
311 311 typedef struct {
312 312 union _kfpu_u {
313 313 void *kfpu_generic;
314 314 struct fxsave_state *kfpu_fx;
315 315 struct xsave_state *kfpu_xs;
316 316 } kfpu_u;
317 317 uint32_t kfpu_status; /* saved at #mf exception */
318 318 uint32_t kfpu_xstatus; /* saved at #xm exception */
319 319 } kfpu_t;
320 320
321 321 extern int fp_kind; /* kind of fp support */
322 322 extern int fp_save_mech; /* fp save/restore mechanism */
323 323 extern int fpu_exists; /* FPU hw exists */
324 324 extern int fp_elf; /* FP elf type */
325 325 extern uint64_t xsave_bv_all; /* Set of enabed xcr0 values */
326 326
327 327 #ifdef _KERNEL
328 328
329 329 extern int fpu_ignored;
330 330 extern int fpu_pentium_fdivbug;
331 331
332 332 extern uint32_t sse_mxcsr_mask;
333 333
334 334 extern void fpu_probe(void);
335 335 extern uint_t fpu_initial_probe(void);
336 336
337 337 extern void fpu_auxv_info(int *, size_t *);
338 338 extern boolean_t fpu_xsave_enabled(void);
339 339
340 340 extern void fpnsave_ctxt(void *);
341 341 extern void fpxsave_ctxt(void *);
342 342 extern void xsave_ctxt(void *);
343 343 extern void xsaveopt_ctxt(void *);
344 344 extern void fpxsave_excp_clr_ctxt(void *);
345 345 extern void xsave_excp_clr_ctxt(void *);
346 346 extern void xsaveopt_excp_clr_ctxt(void *);
347 347 extern void (*fpsave_ctxt)(void *);
348 348 extern void (*xsavep)(struct xsave_state *, uint64_t);
349 349
350 350 extern void fpxrestore_ctxt(void *);
351 351 extern void xrestore_ctxt(void *);
352 352 extern void (*fprestore_ctxt)(void *);
353 353
354 354 extern void fxsave_insn(struct fxsave_state *);
355 355 extern void fpxsave(struct fxsave_state *);
356 356 extern void fpxrestore(struct fxsave_state *);
357 357 extern void xsave(struct xsave_state *, uint64_t);
358 358 extern void xsaveopt(struct xsave_state *, uint64_t);
359 359 extern void xrestore(struct xsave_state *, uint64_t);
360 360
361 361 extern void fpenable(void);
362 362 extern void fpdisable(void);
363 363 extern void fpinit(void);
364 364
365 365 extern uint32_t fperr_reset(void);
366 366 extern uint32_t fpxerr_reset(void);
367 367
368 368 extern uint32_t fpgetcwsw(void);
369 369 extern uint32_t fpgetmxcsr(void);
370 370
371 371 struct regs;
372 372 extern int fpexterrflt(struct regs *);
373 373 extern int fpsimderrflt(struct regs *);
374 374 extern void fpsetcw(uint16_t, uint32_t);
375 375 extern void fp_seed(void);
376 376 extern void fp_exec(void);
377 377 struct _klwp;
378 378 extern void fp_lwp_init(struct _klwp *);
379 379 extern void fp_lwp_cleanup(struct _klwp *);
380 380 extern void fp_lwp_dup(struct _klwp *);
381 381
382 382 extern const struct fxsave_state sse_initial;
383 383 extern const struct xsave_state avx_initial;
384 384
385 385 struct proc;
386 386 struct ucontext;
387 387 extern void fpu_proc_xregs_info(struct proc *, uint32_t *, uint32_t *,
388 388 uint32_t *);
389 389 extern size_t fpu_proc_xregs_max_size(void);
390 390 extern void fpu_proc_xregs_get(struct _klwp *, void *);
391 391 extern int fpu_proc_xregs_set(struct _klwp *, void *);
392 392 extern int fpu_signal_copyin(struct _klwp *, struct ucontext *);
393 393 typedef int (*fpu_copyout_f)(const void *, void *, size_t);
394 394 extern int fpu_signal_copyout(struct _klwp *, uintptr_t, fpu_copyout_f);
395 395 extern void fpu_set_xsave(struct _klwp *, const void *);
396 396 extern size_t fpu_signal_size(struct _klwp *);
397 397
398 398 extern void fpu_get_fpregset(struct _klwp *, fpregset_t *);
399 399 extern void fpu_set_fpregset(struct _klwp *, const fpregset_t *);
400 400
401 401 #endif /* _KERNEL */
402 402
403 403 #ifdef __cplusplus
404 404 }
405 405 #endif
406 406
407 407 #endif /* _SYS_FP_H */
|
↓ open down ↓ |
407 lines elided |
↑ open up ↑ |
XXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXXX